summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Scalar/SROA.cpp119
-rw-r--r--test/Transforms/SROA/basictest.ll63
2 files changed, 161 insertions, 21 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 5a9247ea1b..e3182d319c 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1723,6 +1723,54 @@ static bool isVectorPromotionViable(const TargetData &TD,
return true;
}
+/// \brief Test whether the given alloca partition can be promoted to an int.
+///
+/// This is a quick test to check whether we can rewrite a particular alloca
+/// partition (and its newly formed alloca) into an integer alloca suitable for
+/// promotion to an SSA value. We only can ensure this for a limited set of
+/// operations, and we don't want to do the rewrites unless we are confident
+/// that the result will be promotable, so we have an early test here.
+static bool isIntegerPromotionViable(const TargetData &TD,
+ Type *AllocaTy,
+ AllocaPartitioning &P,
+ AllocaPartitioning::const_use_iterator I,
+ AllocaPartitioning::const_use_iterator E) {
+ IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
+ if (!Ty)
+ return false;
+
+ // Check the uses to ensure the uses are (likely) promoteable integer uses.
+ // Also ensure that the alloca has a covering load or store. We don't want
+ // promote because of some other unsplittable entry (which we may make
+ // splittable later) and lose the ability to promote each element access.
+ bool WholeAllocaOp = false;
+ for (; I != E; ++I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(&*I->User)) {
+ if (LI->isVolatile() || !LI->getType()->isIntegerTy())
+ return false;
+ if (LI->getType() == Ty)
+ WholeAllocaOp = true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(&*I->User)) {
+ if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())
+ return false;
+ if (SI->getValueOperand()->getType() == Ty)
+ WholeAllocaOp = true;
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&*I->User)) {
+ if (MI->isVolatile())
+ return false;
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(&*I->User)) {
+ const AllocaPartitioning::MemTransferOffsets &MTO
+ = P.getMemTransferOffsets(*MTI);
+ if (!MTO.IsSplittable)
+ return false;
+ }
+ } else {
+ return false;
+ }
+ }
+ return WholeAllocaOp;
+}
+
namespace {
/// \brief Visitor to rewrite instructions using a partition of an alloca to
/// use a new alloca.
@@ -1754,6 +1802,12 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
Type *ElementTy;
uint64_t ElementSize;
+ // This is a convenience and flag variable that will be null unless the new
+ // alloca has a promotion-targeted integer type due to passing
+ // isIntegerPromotionViable above. If it is non-null does, the desired
+ // integer type will be stored here for easy access during rewriting.
+ IntegerType *IntPromotionTy;
+
// The offset of the partition user currently being rewritten.
uint64_t BeginOffset, EndOffset;
Instruction *OldPtr;
@@ -1770,7 +1824,7 @@ public:
OldAI(OldAI), NewAI(NewAI),
NewAllocaBeginOffset(NewBeginOffset),
NewAllocaEndOffset(NewEndOffset),
- VecTy(), ElementTy(), ElementSize(),
+ VecTy(), ElementTy(), ElementSize(), IntPromotionTy(),
BeginOffset(), EndOffset() {
}
@@ -1786,6 +1840,9 @@ public:
assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
"Only multiple-of-8 sized vector elements are viable");
ElementSize = VecTy->getScalarSizeInBits() / 8;
+ } else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(),
+ P, I, E)) {
+ IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType());
}
bool CanSROA = true;
for (; I != E; ++I) {
@@ -1830,6 +1887,43 @@ private:
return IRB.getInt32(Index);
}
+ Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
+ uint64_t Offset) {
+ assert(IntPromotionTy && "Alloca is not an integer we can extract from");
+ Value *V = IRB.CreateLoad(&NewAI, getName(".load"));
+ assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t RelOffset = Offset - NewAllocaBeginOffset;
+ if (RelOffset)
+ V = IRB.CreateLShr(V, RelOffset*8, getName(".shift"));
+ if (TargetTy != IntPromotionTy) {
+ assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() &&
+ "Cannot extract to a larger integer!");
+ V = IRB.CreateTrunc(V, TargetTy, getName(".trunc"));
+ }
+ return V;
+ }
+
+ StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
+ IntegerType *Ty = cast<IntegerType>(V->getType());
+ if (Ty == IntPromotionTy)
+ return IRB.CreateStore(V, &NewAI);
+
+ assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
+ "Cannot insert a larger integer!");
+ V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext"));
+ assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t RelOffset = Offset - NewAllocaBeginOffset;
+ if (RelOffset)
+ V = IRB.CreateShl(V, RelOffset*8, getName(".shift"));
+
+ APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth())
+ .shl(RelOffset*8);
+ Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")),
+ Mask, getName(".mask"));
+ return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")),
+ &NewAI);
+ }
+
void deleteIfTriviallyDead(Value *V) {
Instruction *I = cast<Instruction>(V);
if (isInstructionTriviallyDead(I))
@@ -1865,6 +1959,16 @@ private:
return true;
}
+ bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+ assert(!LI.isVolatile());
+ Value *Result = extractInteger(IRB, cast<IntegerType>(LI.getType()),
+ BeginOffset);
+ LI.replaceAllUsesWith(Result);
+ Pass.DeadInsts.push_back(&LI);
+ DEBUG(dbgs() << " to: " << *Result << "\n");
+ return true;
+ }
+
bool visitLoadInst(LoadInst &LI) {
DEBUG(dbgs() << " original: " << LI << "\n");
Value *OldOp = LI.getOperand(0);
@@ -1873,6 +1977,8 @@ private:
if (VecTy)
return rewriteVectorizedLoadInst(IRB, LI, OldOp);
+ if (IntPromotionTy)
+ return rewriteIntegerLoad(IRB, LI);
Value *NewPtr = getAdjustedAllocaPtr(IRB,
LI.getPointerOperand()->getType());
@@ -1904,6 +2010,15 @@ private:
return true;
}
+ bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) {
+ assert(!SI.isVolatile());
+ StoreInst *Store = insertInteger(IRB, SI.getValueOperand(), BeginOffset);
+ Pass.DeadInsts.push_back(&SI);
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ return true;
+ }
+
bool visitStoreInst(StoreInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
Value *OldOp = SI.getOperand(1);
@@ -1912,6 +2027,8 @@ private:
if (VecTy)
return rewriteVectorizedStoreInst(IRB, SI, OldOp);
+ if (IntPromotionTy)
+ return rewriteIntegerStore(IRB, SI);
Value *NewPtr = getAdjustedAllocaPtr(IRB,
SI.getPointerOperand()->getType());
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index be3ef64dc2..a61de05f45 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -553,30 +553,53 @@ bad:
ret i32 %Z2
}
-define i32 @test12() {
-; CHECK: @test12
-; CHECK: alloca i24
-;
-; FIXME: SROA should promote accesses to this into whole i24 operations instead
-; of i8 operations.
-; CHECK: store i8 0
-; CHECK: store i8 0
-; CHECK: store i8 0
+define i8 @test12() {
+; We fully promote these to the i24 load or store size, resulting in just masks
+; and other operations that instcombine will fold, but no alloca.
;
-; CHECK: load i24*
+; CHECK: @test12
entry:
%a = alloca [3 x i8]
- %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
- store i8 0, i8* %b0ptr
- %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
- store i8 0, i8* %b1ptr
- %b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
- store i8 0, i8* %b2ptr
- %iptr = bitcast [3 x i8]* %a to i24*
- %i = load i24* %iptr
- %ret = zext i24 %i to i32
- ret i32 %ret
+ %b = alloca [3 x i8]
+; CHECK-NOT: alloca
+
+ %a0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
+ store i8 0, i8* %a0ptr
+ %a1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
+ store i8 0, i8* %a1ptr
+ %a2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
+ store i8 0, i8* %a2ptr
+ %aiptr = bitcast [3 x i8]* %a to i24*
+ %ai = load i24* %aiptr
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+; CHECK: %[[mask0:.*]] = and i24 undef, -256
+; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281
+; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], 65535
+
+ %biptr = bitcast [3 x i8]* %b to i24*
+ store i24 %ai, i24* %biptr
+ %b0ptr = getelementptr [3 x i8]* %b, i64 0, i32 0
+ %b0 = load i8* %b0ptr
+ %b1ptr = getelementptr [3 x i8]* %b, i64 0, i32 1
+ %b1 = load i8* %b1ptr
+ %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
+ %b2 = load i8* %b2ptr
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+; CHECK: %[[trunc0:.*]] = trunc i24 %[[mask2]] to i8
+; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8
+; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
+; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[mask2]], 16
+; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8
+
+ %bsum0 = add i8 %b0, %b1
+ %bsum1 = add i8 %bsum0, %b2
+ ret i8 %bsum1
+; CHECK: %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
+; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
+; CHECK-NEXT: ret i8 %[[sum1]]
}
define i32 @test13() {