summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/SROA.cpp
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2013-07-28 08:27:12 +0000
committerChandler Carruth <chandlerc@gmail.com>2013-07-28 08:27:12 +0000
commitcea60aff34ada256a77f5760863218a976786f45 (patch)
treed0f2e7810ecd6b791b1a17ab98400e91cee63111 /lib/Transforms/Scalar/SROA.cpp
parent6c3a95dab561ef9747876d14014e965f8688f55b (diff)
downloadllvm-cea60aff34ada256a77f5760863218a976786f45.tar.gz
llvm-cea60aff34ada256a77f5760863218a976786f45.tar.bz2
llvm-cea60aff34ada256a77f5760863218a976786f45.tar.xz
Now that mem2reg understands how to cope with a slightly wider set of
uses of an alloca, we can pre-compute promotability while analyzing an alloca for splitting in SROA. That lets us short-circuit the common case of a bunch of trivially promotable allocas. This cuts 20% to 30% off the run time of SROA for typical frontend-generated IR sequneces I'm seeing. It gets the new SROA to within 20% of ScalarRepl for such code. My current benchmark for these numbers is PR15412, but it fits the general pattern of IR emitted by Clang so it should be widely applicable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187323 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Scalar/SROA.cpp')
-rw-r--r--lib/Transforms/Scalar/SROA.cpp93
1 files changed, 81 insertions, 12 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 1525caa868..ef4558878a 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -197,6 +197,18 @@ public:
/// \brief Construct the slices of a particular alloca.
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
+ /// \brief Whether we determined during the trivial analysis of the alloca
+ /// that it was immediately promotable with mem2reg.
+ bool isAllocaPromotable() const { return IsAllocaPromotable; }
+
+ /// \brief A list of directly stored values when \c isAllocaPromotable is
+ /// true.
+ ///
+ /// The contents are undefined if the alloca is not trivially promotable.
+ /// This is used to detect other allocas which should be iterated on when
+ /// doing direct promotion.
+ ArrayRef<Value *> getStoredValues() const { return StoredValues; }
+
/// \brief Test whether a pointer to the allocation escapes our analysis.
///
/// If this is true, the slices are never fully built and should be
@@ -253,10 +265,20 @@ private:
class SliceBuilder;
friend class AllocaSlices::SliceBuilder;
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// \brief Handle to alloca instruction to simplify method interfaces.
AllocaInst &AI;
-#endif
+
+ /// \brief A flag indicating if the alloca is trivially promotable.
+ ///
+ /// While walking the alloca's uses we track when the uses exceed what
+ /// mem2reg can trivially handle. This essentially should match the logic in
+ /// \c isAllocaPromotable but re-using the existing walk of the pointer uses.
+ bool IsAllocaPromotable;
+
+ /// \brief Storage for stored values.
+ ///
+ /// Only used while the alloca is trivially promotable.
+ SmallVector<Value *, 8> StoredValues;
/// \brief The instruction responsible for this alloca not having a known set
/// of slices.
@@ -325,9 +347,9 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
public:
- SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S)
+ SliceBuilder(const DataLayout &DL, AllocaSlices &S)
: PtrUseVisitor<SliceBuilder>(DL),
- AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {}
+ AllocSize(DL.getTypeAllocSize(S.AI.getAllocatedType())), S(S) {}
private:
void markAsDead(Instruction &I) {
@@ -380,6 +402,15 @@ private:
if (GEPI.use_empty())
return markAsDead(GEPI);
+ // FIXME: mem2reg shouldn't care about the nature of the GEP, but instead
+ // the offsets of the loads. Until then, we short-circuit here for the
+ // promotable case.
+ if (GEPI.hasAllZeroIndices())
+ return Base::enqueueUsers(GEPI);
+
+ // Otherwise, there is something in the GEP, so we disable mem2reg and
+ // accumulate it.
+ S.IsAllocaPromotable = false;
return Base::visitGetElementPtrInst(GEPI);
}
@@ -396,6 +427,13 @@ private:
bool IsSplittable =
Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
+ // mem2reg can only promote non-volatile loads and stores which exactly
+ // load the alloca (no offset and the right type).
+ if (IsVolatile || Offset != 0 || Ty != S.AI.getAllocatedType())
+ S.IsAllocaPromotable = false;
+ if (S.IsAllocaPromotable)
+ assert(Offset == 0);
+
insertUse(I, Offset, Size, IsSplittable);
}
@@ -436,6 +474,9 @@ private:
return markAsDead(SI);
}
+ if (S.IsAllocaPromotable)
+ S.StoredValues.push_back(ValOp);
+
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
@@ -453,6 +494,8 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ S.IsAllocaPromotable = false;
+
insertUse(II, Offset,
Length ? Length->getLimitedValue()
: AllocSize - Offset.getLimitedValue(),
@@ -469,6 +512,8 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ S.IsAllocaPromotable = false;
+
uint64_t RawOffset = Offset.getLimitedValue();
uint64_t Size = Length ? Length->getLimitedValue()
: AllocSize - RawOffset;
@@ -529,6 +574,8 @@ private:
return;
}
+ S.IsAllocaPromotable = false;
+
Base::visitIntrinsicInst(II);
}
@@ -603,6 +650,8 @@ private:
return;
}
+ S.IsAllocaPromotable = false;
+
insertUse(PN, Offset, PHISize);
}
@@ -610,14 +659,18 @@ private:
if (SI.use_empty())
return markAsDead(SI);
if (Value *Result = foldSelectInst(SI)) {
- if (Result == *U)
+ if (Result == *U) {
// If the result of the constant fold will be the pointer, recurse
// through the select as if we had RAUW'ed it.
enqueueUsers(SI);
- else
+
+ // FIXME: mem2reg should support this pattern, but it doesn't.
+ S.IsAllocaPromotable = false;
+ } else {
// Otherwise the operand to the select is dead, and we can replace it
// with undef.
S.DeadOperands.push_back(U);
+ }
return;
}
@@ -644,6 +697,8 @@ private:
return;
}
+ S.IsAllocaPromotable = false;
+
insertUse(SI, Offset, SelectSize);
}
@@ -654,12 +709,8 @@ private:
};
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
- :
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- AI(AI),
-#endif
- PointerEscapingInstr(0) {
- SliceBuilder PB(DL, AI, *this);
+ : AI(AI), IsAllocaPromotable(true), PointerEscapingInstr(0) {
+ SliceBuilder PB(DL, *this);
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) {
// FIXME: We should sink the escape vs. abort info into the caller nicely,
@@ -3315,6 +3366,24 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
if (S.begin() == S.end())
return Changed;
+ // Trivially promotable, don't go through the splitting and rewriting.
+ if (S.isAllocaPromotable()) {
+ DEBUG(dbgs() << " Directly promoting alloca: " << AI << "\n");
+ PromotableAllocas.push_back(&AI);
+
+ // Walk through the stored values quickly here to handle directly
+ // promotable allocas that require iterating on other allocas.
+ ArrayRef<Value *> StoredValues = S.getStoredValues();
+ for (ArrayRef<Value *>::iterator SVI = StoredValues.begin(),
+ SVE = StoredValues.end();
+ SVI != SVE; ++SVI)
+ if ((*SVI)->getType()->isPointerTy())
+ if (AllocaInst *SAI =
+ dyn_cast<AllocaInst>((*SVI)->stripInBoundsOffsets()))
+ PostPromotionWorklist.insert(SAI);
+ return true;
+ }
+
Changed |= splitAlloca(AI, S);
DEBUG(dbgs() << " Speculating PHIs\n");