Teach SROA how to split whole-alloca integer loads and stores into

smaller integer loads and stores. The high-level motivation is that the frontend sometimes generates a single whole-alloca integer load or store during ABI lowering of splittable allocas. We need to be able to break this apart in order to see the underlying elements and properly promote them to SSA values. The hope is that this fixes some performance regressions on x86-32 with the new SROA pass. Unfortunately, this causes quite a bit of churn in the test cases, and bloats some IR that comes out. When we see an alloca that consists soley of bits and bytes being extracted and re-inserted, we now do some splitting first, before building widened integer "bucket of bits" representations. These are always well folded by instcombine however, so this shouldn't actually result in missed opportunities. If this splitting of all-integer allocas does cause problems (perhaps due to smaller SSA values going into the RA), we could potentially go to some extreme measures to only do this integer splitting trick when there are non-integer component accesses of an alloca, but discovering this is quite expensive: it adds yet another complete walk of the recursive use tree of the alloca. Either way, I will be watching build bots and LNT bots to see what fallout there is here. If anyone gets x86-32 numbers before & after this change, I would be very interested. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166662 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chandler Carruth <chandlerc@gmail.com> 2012-10-25 04:37:07 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2012-10-25 04:37:07 +0000
commit: a2b88163af30f59d12ae0172565f3406bdbf6c45 (patch)
tree: 2bca1e0574a0720c4ceba0c74b31231ddf27efd6 /lib/Transforms/Scalar/SROA.cpp
parent: 8dbac7b529cfb73bcd0ceef514e5c1d247cf3baa (diff)
download: llvm-a2b88163af30f59d12ae0172565f3406bdbf6c45.tar.gz
llvm-a2b88163af30f59d12ae0172565f3406bdbf6c45.tar.bz2
llvm-a2b88163af30f59d12ae0172565f3406bdbf6c45.tar.xz
1 files changed, 117 insertions, 4 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 728ce997a9..af3a880cb9 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -582,7 +582,8 @@ private:
     P.Partitions.push_back(New);
   }
 
-  bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) {
+  bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset,
+                         bool IsVolatile) {
     uint64_t Size = TD.getTypeStoreSize(Ty);
 
     // If this memory access can be shown to *statically* extend outside the
@@ -603,7 +604,14 @@ private:
       return true;
     }
 
-    insertUse(I, Offset, Size);
+    // We allow splitting of loads and stores where the type is an integer type
+    // and which cover the entire alloca. Such integer loads and stores
+    // often require decomposition into fine grained loads and stores.
+    bool IsSplittable = false;
+    if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+      IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+
+    insertUse(I, Offset, Size, IsSplittable);
     return true;
   }
 
@@ -624,7 +632,7 @@ private:
   bool visitLoadInst(LoadInst &LI) {
     assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
            "All simple FCA loads should have been pre-split");
-    return handleLoadOrStore(LI.getType(), LI, Offset);
+    return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
   }
 
   bool visitStoreInst(StoreInst &SI) {
@@ -634,7 +642,7 @@ private:
 
     assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
            "All simple FCA stores should have been pre-split");
-    return handleLoadOrStore(ValOp->getType(), SI, Offset);
+    return handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
   }
 
 
@@ -1173,6 +1181,21 @@ Type *AllocaPartitioning::getCommonType(iterator I) const {
       UserTy = LI->getType();
     } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
       UserTy = SI->getValueOperand()->getType();
+    } else {
+      return 0; // Bail if we have weird uses.
+    }
+
+    if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
+      // If the type is larger than the partition, skip it. We only encounter
+      // this for split integer operations where we want to use the type of the
+      // entity causing the split.
+      if (ITy->getBitWidth() > (I->EndOffset - I->BeginOffset)*8)
+        continue;
+
+      // If we have found an integer type use covering the alloca, use that
+      // regardless of the other types, as integers are often used for a "bucket
+      // of bits" type.
+      return ITy;
     }
 
     if (Ty && Ty != UserTy)
@@ -2138,6 +2161,14 @@ static bool isIntegerWideningViable(const DataLayout &TD,
   if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
     return false;
 
+  // We need to ensure that an integer type with the appropriate bitwidth can
+  // be converted to the alloca type, whatever that is. We don't want to force
+  // the alloca itself to have an integer type if there is a more suitable one.
+  Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
+  if (!canConvertValue(TD, AllocaTy, IntTy) ||
+      !canConvertValue(TD, IntTy, AllocaTy))
+    return false;
+
   uint64_t Size = TD.getTypeStoreSize(AllocaTy);
 
   // Check the uses to ensure the uses are (likely) promoteable integer uses.
@@ -2461,6 +2492,50 @@ private:
 
     if (VecTy)
       return rewriteVectorizedLoadInst(IRB, LI, OldOp);
+
+    uint64_t Size = EndOffset - BeginOffset;
+    if (Size < TD.getTypeStoreSize(LI.getType())) {
+      assert(!LI.isVolatile());
+      assert(LI.getType()->isIntegerTy() &&
+             "Only integer type loads and stores are split");
+      assert(LI.getType()->getIntegerBitWidth() ==
+             TD.getTypeStoreSizeInBits(LI.getType()) &&
+             "Non-byte-multiple bit width");
+      assert(LI.getType()->getIntegerBitWidth() ==
+             TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+             "Only alloca-wide loads can be split and recomposed");
+      IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8);
+      bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
+                           canConvertValue(TD, NewAllocaTy, NarrowTy);
+      Value *V;
+      // Move the insertion point just past the load so that we can refer to it.
+      IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
+      if (IsConvertable)
+        V = convertValue(TD, IRB,
+                         IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                               getName(".load")),
+                         NarrowTy);
+      else
+        V = IRB.CreateAlignedLoad(
+          getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
+          getPartitionTypeAlign(NarrowTy), getName(".load"));
+      // Create a placeholder value with the same type as LI to use as the
+      // basis for the new value. This allows us to replace the uses of LI with
+      // the computed value, and then replace the placeholder with LI, leaving
+      // LI only used for this computation.
+      Value *Placeholder
+        = IRB.CreateLoad(UndefValue::get(LI.getType()->getPointerTo()));
+      V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
+                        getName(".insert"));
+      LI.replaceAllUsesWith(V);
+      Placeholder->replaceAllUsesWith(&LI);
+      cast<Instruction>(Placeholder)->eraseFromParent();
+      if (Pass.DeadSplitInsts.insert(&LI))
+        Pass.DeadInsts.push_back(&LI);
+      DEBUG(dbgs() << "          to: " << *V << "\n");
+      return IsConvertable;
+    }
+
     if (IntTy && LI.getType()->isIntegerTy())
       return rewriteIntegerLoad(IRB, LI);
 
@@ -2540,6 +2615,39 @@ private:
     if (VecTy)
       return rewriteVectorizedStoreInst(IRB, SI, OldOp);
     Type *ValueTy = SI.getValueOperand()->getType();
+
+    uint64_t Size = EndOffset - BeginOffset;
+    if (Size < TD.getTypeStoreSize(ValueTy)) {
+      assert(!SI.isVolatile());
+      assert(ValueTy->isIntegerTy() &&
+             "Only integer type loads and stores are split");
+      assert(ValueTy->getIntegerBitWidth() ==
+             TD.getTypeStoreSizeInBits(ValueTy) &&
+             "Non-byte-multiple bit width");
+      assert(ValueTy->getIntegerBitWidth() ==
+             TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+             "Only alloca-wide stores can be split and recomposed");
+      IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
+      Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy,
+                                BeginOffset, getName(".extract"));
+      StoreInst *NewSI;
+      bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
+                           canConvertValue(TD, NarrowTy, NewAllocaTy);
+      if (IsConvertable)
+        NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy),
+                                       &NewAI, NewAI.getAlignment());
+      else
+        NewSI = IRB.CreateAlignedStore(
+          V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
+          getPartitionTypeAlign(NarrowTy));
+      (void)NewSI;
+      if (Pass.DeadSplitInsts.insert(&SI))
+        Pass.DeadInsts.push_back(&SI);
+
+      DEBUG(dbgs() << "          to: " << *NewSI << "\n");
+      return IsConvertable;
+    }
+
     if (IntTy && ValueTy->isIntegerTy())
       return rewriteIntegerStore(IRB, SI);
 
@@ -3173,6 +3281,9 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
                               uint64_t Offset, uint64_t Size) {
   if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)
     return stripAggregateTypeWrapping(TD, Ty);
+  if (Offset > TD.getTypeAllocSize(Ty) ||
+      (TD.getTypeAllocSize(Ty) - Offset) < Size)
+    return 0;
 
   if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
     // We can't partition pointers...
@@ -3464,6 +3575,8 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
     Instruction *I = DeadInsts.pop_back_val();
     DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
 
+    I->replaceAllUsesWith(UndefValue::get(I->getType()));
+
     for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
       if (Instruction *U = dyn_cast<Instruction>(*OI)) {
         // Zero out the operand and see if it becomes trivially dead.
author	Chandler Carruth <chandlerc@gmail.com>	2012-10-25 04:37:07 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2012-10-25 04:37:07 +0000
commit	a2b88163af30f59d12ae0172565f3406bdbf6c45 (patch)
tree	2bca1e0574a0720c4ceba0c74b31231ddf27efd6 /lib/Transforms/Scalar/SROA.cpp
parent	8dbac7b529cfb73bcd0ceef514e5c1d247cf3baa (diff)
download	llvm-a2b88163af30f59d12ae0172565f3406bdbf6c45.tar.gz llvm-a2b88163af30f59d12ae0172565f3406bdbf6c45.tar.bz2 llvm-a2b88163af30f59d12ae0172565f3406bdbf6c45.tar.xz