Now that mem2reg understands how to cope with a slightly wider set of

uses of an alloca, we can pre-compute promotability while analyzing an alloca for splitting in SROA. That lets us short-circuit the common case of a bunch of trivially promotable allocas. This cuts 20% to 30% off the run time of SROA for typical frontend-generated IR sequneces I'm seeing. It gets the new SROA to within 20% of ScalarRepl for such code. My current benchmark for these numbers is PR15412, but it fits the general pattern of IR emitted by Clang so it should be widely applicable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187323 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chandler Carruth <chandlerc@gmail.com> 2013-07-28 08:27:12 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2013-07-28 08:27:12 +0000
commit: cea60aff34ada256a77f5760863218a976786f45 (patch)
tree: d0f2e7810ecd6b791b1a17ab98400e91cee63111 /lib/Transforms/Scalar/SROA.cpp
parent: 6c3a95dab561ef9747876d14014e965f8688f55b (diff)
download: llvm-cea60aff34ada256a77f5760863218a976786f45.tar.gz
llvm-cea60aff34ada256a77f5760863218a976786f45.tar.bz2
llvm-cea60aff34ada256a77f5760863218a976786f45.tar.xz
1 files changed, 81 insertions, 12 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 1525caa868..ef4558878a 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -197,6 +197,18 @@ public:
   /// \brief Construct the slices of a particular alloca.
   AllocaSlices(const DataLayout &DL, AllocaInst &AI);
 
+  /// \brief Whether we determined during the trivial analysis of the alloca
+  /// that it was immediately promotable with mem2reg.
+  bool isAllocaPromotable() const { return IsAllocaPromotable; }
+
+  /// \brief A list of directly stored values when \c isAllocaPromotable is
+  /// true.
+  ///
+  /// The contents are undefined if the alloca is not trivially promotable.
+  /// This is used to detect other allocas which should be iterated on when
+  /// doing direct promotion.
+  ArrayRef<Value *> getStoredValues() const { return StoredValues; }
+
   /// \brief Test whether a pointer to the allocation escapes our analysis.
   ///
   /// If this is true, the slices are never fully built and should be
@@ -253,10 +265,20 @@ private:
   class SliceBuilder;
   friend class AllocaSlices::SliceBuilder;
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// \brief Handle to alloca instruction to simplify method interfaces.
   AllocaInst &AI;
-#endif
+
+  /// \brief A flag indicating if the alloca is trivially promotable.
+  ///
+  /// While walking the alloca's uses we track when the uses exceed what
+  /// mem2reg can trivially handle. This essentially should match the logic in
+  /// \c isAllocaPromotable but re-using the existing walk of the pointer uses.
+  bool IsAllocaPromotable;
+
+  /// \brief Storage for stored values.
+  ///
+  /// Only used while the alloca is trivially promotable.
+  SmallVector<Value *, 8> StoredValues;
 
   /// \brief The instruction responsible for this alloca not having a known set
   /// of slices.
@@ -325,9 +347,9 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
   SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
 
 public:
-  SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S)
+  SliceBuilder(const DataLayout &DL, AllocaSlices &S)
       : PtrUseVisitor<SliceBuilder>(DL),
-        AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {}
+        AllocSize(DL.getTypeAllocSize(S.AI.getAllocatedType())), S(S) {}
 
 private:
   void markAsDead(Instruction &I) {
@@ -380,6 +402,15 @@ private:
     if (GEPI.use_empty())
       return markAsDead(GEPI);
 
+    // FIXME: mem2reg shouldn't care about the nature of the GEP, but instead
+    // the offsets of the loads. Until then, we short-circuit here for the
+    // promotable case.
+    if (GEPI.hasAllZeroIndices())
+      return Base::enqueueUsers(GEPI);
+
+    // Otherwise, there is something in the GEP, so we disable mem2reg and
+    // accumulate it.
+    S.IsAllocaPromotable = false;
     return Base::visitGetElementPtrInst(GEPI);
   }
 
@@ -396,6 +427,13 @@ private:
     bool IsSplittable =
         Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
 
+    // mem2reg can only promote non-volatile loads and stores which exactly
+    // load the alloca (no offset and the right type).
+    if (IsVolatile || Offset != 0 || Ty != S.AI.getAllocatedType())
+      S.IsAllocaPromotable = false;
+    if (S.IsAllocaPromotable)
+      assert(Offset == 0);
+
     insertUse(I, Offset, Size, IsSplittable);
   }
 
@@ -436,6 +474,9 @@ private:
       return markAsDead(SI);
     }
 
+    if (S.IsAllocaPromotable)
+      S.StoredValues.push_back(ValOp);
+
     assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
            "All simple FCA stores should have been pre-split");
     handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
@@ -453,6 +494,8 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&II);
 
+    S.IsAllocaPromotable = false;
+
     insertUse(II, Offset,
               Length ? Length->getLimitedValue()
                      : AllocSize - Offset.getLimitedValue(),
@@ -469,6 +512,8 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&II);
 
+    S.IsAllocaPromotable = false;
+
     uint64_t RawOffset = Offset.getLimitedValue();
     uint64_t Size = Length ? Length->getLimitedValue()
                            : AllocSize - RawOffset;
@@ -529,6 +574,8 @@ private:
       return;
     }
 
+    S.IsAllocaPromotable = false;
+
     Base::visitIntrinsicInst(II);
   }
 
@@ -603,6 +650,8 @@ private:
       return;
     }
 
+    S.IsAllocaPromotable = false;
+
     insertUse(PN, Offset, PHISize);
   }
 
@@ -610,14 +659,18 @@ private:
     if (SI.use_empty())
       return markAsDead(SI);
     if (Value *Result = foldSelectInst(SI)) {
-      if (Result == *U)
+      if (Result == *U) {
         // If the result of the constant fold will be the pointer, recurse
         // through the select as if we had RAUW'ed it.
         enqueueUsers(SI);
-      else
+
+        // FIXME: mem2reg should support this pattern, but it doesn't.
+        S.IsAllocaPromotable = false;
+      } else {
         // Otherwise the operand to the select is dead, and we can replace it
         // with undef.
         S.DeadOperands.push_back(U);
+      }
 
       return;
     }
@@ -644,6 +697,8 @@ private:
       return;
     }
 
+    S.IsAllocaPromotable = false;
+
     insertUse(SI, Offset, SelectSize);
   }
 
@@ -654,12 +709,8 @@ private:
 };
 
 AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
-    :
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-      AI(AI),
-#endif
-      PointerEscapingInstr(0) {
-  SliceBuilder PB(DL, AI, *this);
+    : AI(AI), IsAllocaPromotable(true), PointerEscapingInstr(0) {
+  SliceBuilder PB(DL, *this);
   SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
   if (PtrI.isEscaped() || PtrI.isAborted()) {
     // FIXME: We should sink the escape vs. abort info into the caller nicely,
@@ -3315,6 +3366,24 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
   if (S.begin() == S.end())
     return Changed;
 
+  // Trivially promotable, don't go through the splitting and rewriting.
+  if (S.isAllocaPromotable()) {
+    DEBUG(dbgs() << "  Directly promoting alloca: " << AI << "\n");
+    PromotableAllocas.push_back(&AI);
+
+    // Walk through the stored values quickly here to handle directly
+    // promotable allocas that require iterating on other allocas.
+    ArrayRef<Value *> StoredValues = S.getStoredValues();
+    for (ArrayRef<Value *>::iterator SVI = StoredValues.begin(),
+                                     SVE = StoredValues.end();
+         SVI != SVE; ++SVI)
+      if ((*SVI)->getType()->isPointerTy())
+        if (AllocaInst *SAI =
+                dyn_cast<AllocaInst>((*SVI)->stripInBoundsOffsets()))
+          PostPromotionWorklist.insert(SAI);
+    return true;
+  }
+
   Changed |= splitAlloca(AI, S);
 
   DEBUG(dbgs() << "  Speculating PHIs\n");
author	Chandler Carruth <chandlerc@gmail.com>	2013-07-28 08:27:12 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2013-07-28 08:27:12 +0000
commit	cea60aff34ada256a77f5760863218a976786f45 (patch)
tree	d0f2e7810ecd6b791b1a17ab98400e91cee63111 /lib/Transforms/Scalar/SROA.cpp
parent	6c3a95dab561ef9747876d14014e965f8688f55b (diff)
download	llvm-cea60aff34ada256a77f5760863218a976786f45.tar.gz llvm-cea60aff34ada256a77f5760863218a976786f45.tar.bz2 llvm-cea60aff34ada256a77f5760863218a976786f45.tar.xz