From cea60aff34ada256a77f5760863218a976786f45 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 28 Jul 2013 08:27:12 +0000 Subject: Now that mem2reg understands how to cope with a slightly wider set of uses of an alloca, we can pre-compute promotability while analyzing an alloca for splitting in SROA. That lets us short-circuit the common case of a bunch of trivially promotable allocas. This cuts 20% to 30% off the run time of SROA for typical frontend-generated IR sequneces I'm seeing. It gets the new SROA to within 20% of ScalarRepl for such code. My current benchmark for these numbers is PR15412, but it fits the general pattern of IR emitted by Clang so it should be widely applicable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187323 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SROA.cpp | 93 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 81 insertions(+), 12 deletions(-) (limited to 'lib/Transforms/Scalar/SROA.cpp') diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 1525caa868..ef4558878a 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -197,6 +197,18 @@ public: /// \brief Construct the slices of a particular alloca. AllocaSlices(const DataLayout &DL, AllocaInst &AI); + /// \brief Whether we determined during the trivial analysis of the alloca + /// that it was immediately promotable with mem2reg. + bool isAllocaPromotable() const { return IsAllocaPromotable; } + + /// \brief A list of directly stored values when \c isAllocaPromotable is + /// true. + /// + /// The contents are undefined if the alloca is not trivially promotable. + /// This is used to detect other allocas which should be iterated on when + /// doing direct promotion. + ArrayRef getStoredValues() const { return StoredValues; } + /// \brief Test whether a pointer to the allocation escapes our analysis. /// /// If this is true, the slices are never fully built and should be @@ -253,10 +265,20 @@ private: class SliceBuilder; friend class AllocaSlices::SliceBuilder; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// \brief Handle to alloca instruction to simplify method interfaces. AllocaInst &AI; -#endif + + /// \brief A flag indicating if the alloca is trivially promotable. + /// + /// While walking the alloca's uses we track when the uses exceed what + /// mem2reg can trivially handle. This essentially should match the logic in + /// \c isAllocaPromotable but re-using the existing walk of the pointer uses. + bool IsAllocaPromotable; + + /// \brief Storage for stored values. + /// + /// Only used while the alloca is trivially promotable. + SmallVector StoredValues; /// \brief The instruction responsible for this alloca not having a known set /// of slices. @@ -325,9 +347,9 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { SmallPtrSet VisitedDeadInsts; public: - SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S) + SliceBuilder(const DataLayout &DL, AllocaSlices &S) : PtrUseVisitor(DL), - AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {} + AllocSize(DL.getTypeAllocSize(S.AI.getAllocatedType())), S(S) {} private: void markAsDead(Instruction &I) { @@ -380,6 +402,15 @@ private: if (GEPI.use_empty()) return markAsDead(GEPI); + // FIXME: mem2reg shouldn't care about the nature of the GEP, but instead + // the offsets of the loads. Until then, we short-circuit here for the + // promotable case. + if (GEPI.hasAllZeroIndices()) + return Base::enqueueUsers(GEPI); + + // Otherwise, there is something in the GEP, so we disable mem2reg and + // accumulate it. + S.IsAllocaPromotable = false; return Base::visitGetElementPtrInst(GEPI); } @@ -396,6 +427,13 @@ private: bool IsSplittable = Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize; + // mem2reg can only promote non-volatile loads and stores which exactly + // load the alloca (no offset and the right type). + if (IsVolatile || Offset != 0 || Ty != S.AI.getAllocatedType()) + S.IsAllocaPromotable = false; + if (S.IsAllocaPromotable) + assert(Offset == 0); + insertUse(I, Offset, Size, IsSplittable); } @@ -436,6 +474,9 @@ private: return markAsDead(SI); } + if (S.IsAllocaPromotable) + S.StoredValues.push_back(ValOp); + assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); @@ -453,6 +494,8 @@ private: if (!IsOffsetKnown) return PI.setAborted(&II); + S.IsAllocaPromotable = false; + insertUse(II, Offset, Length ? Length->getLimitedValue() : AllocSize - Offset.getLimitedValue(), @@ -469,6 +512,8 @@ private: if (!IsOffsetKnown) return PI.setAborted(&II); + S.IsAllocaPromotable = false; + uint64_t RawOffset = Offset.getLimitedValue(); uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset; @@ -529,6 +574,8 @@ private: return; } + S.IsAllocaPromotable = false; + Base::visitIntrinsicInst(II); } @@ -603,6 +650,8 @@ private: return; } + S.IsAllocaPromotable = false; + insertUse(PN, Offset, PHISize); } @@ -610,14 +659,18 @@ private: if (SI.use_empty()) return markAsDead(SI); if (Value *Result = foldSelectInst(SI)) { - if (Result == *U) + if (Result == *U) { // If the result of the constant fold will be the pointer, recurse // through the select as if we had RAUW'ed it. enqueueUsers(SI); - else + + // FIXME: mem2reg should support this pattern, but it doesn't. + S.IsAllocaPromotable = false; + } else { // Otherwise the operand to the select is dead, and we can replace it // with undef. S.DeadOperands.push_back(U); + } return; } @@ -644,6 +697,8 @@ private: return; } + S.IsAllocaPromotable = false; + insertUse(SI, Offset, SelectSize); } @@ -654,12 +709,8 @@ private: }; AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) - : -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - AI(AI), -#endif - PointerEscapingInstr(0) { - SliceBuilder PB(DL, AI, *this); + : AI(AI), IsAllocaPromotable(true), PointerEscapingInstr(0) { + SliceBuilder PB(DL, *this); SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); if (PtrI.isEscaped() || PtrI.isAborted()) { // FIXME: We should sink the escape vs. abort info into the caller nicely, @@ -3315,6 +3366,24 @@ bool SROA::runOnAlloca(AllocaInst &AI) { if (S.begin() == S.end()) return Changed; + // Trivially promotable, don't go through the splitting and rewriting. + if (S.isAllocaPromotable()) { + DEBUG(dbgs() << " Directly promoting alloca: " << AI << "\n"); + PromotableAllocas.push_back(&AI); + + // Walk through the stored values quickly here to handle directly + // promotable allocas that require iterating on other allocas. + ArrayRef StoredValues = S.getStoredValues(); + for (ArrayRef::iterator SVI = StoredValues.begin(), + SVE = StoredValues.end(); + SVI != SVE; ++SVI) + if ((*SVI)->getType()->isPointerTy()) + if (AllocaInst *SAI = + dyn_cast((*SVI)->stripInBoundsOffsets())) + PostPromotionWorklist.insert(SAI); + return true; + } + Changed |= splitAlloca(AI, S); DEBUG(dbgs() << " Speculating PHIs\n"); -- cgit v1.2.3