summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvgeniy Stepanov <eugeni.stepanov@gmail.com>2014-06-25 07:54:58 +0000
committerEvgeniy Stepanov <eugeni.stepanov@gmail.com>2014-06-25 07:54:58 +0000
commit98726c311b0dc19076bf242973d92ca1858febe7 (patch)
tree4f05d831dd12a24b620b5d87772468891b8d8e99
parentaa2e057bc108c32529815afb8be4ef090acddd94 (diff)
downloadllvm-98726c311b0dc19076bf242973d92ca1858febe7.tar.gz
llvm-98726c311b0dc19076bf242973d92ca1858febe7.tar.bz2
llvm-98726c311b0dc19076bf242973d92ca1858febe7.tar.xz
[LICM] Don't create more than one copy of an instruction per loop exit block when sinking.
Fixes exponential compilation complexity in PR19835, caused by LICM::sink not handling the following pattern well: f = op g e = op f, g d = op e c = op d, e b = op c a = op b, c When an instruction with N uses is sunk, each of its operands gets N new uses (all of them - phi nodes). In the example above, if a had 1 use, c would have 2, e would have 4, and g would have 8. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211673 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/LICM.cpp58
-rw-r--r--test/Transforms/LICM/extra-copies.ll29
2 files changed, 63 insertions, 24 deletions
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 0a8d16f49e..c16ffdbf64 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -550,6 +550,9 @@ void LICM::sink(Instruction &I) {
SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
#endif
+ // Clones of this instruction. Don't create more than one per exit block!
+ SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
+
// If this instruction is only used outside of the loop, then all users are
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.
@@ -561,30 +564,37 @@ void LICM::sink(Instruction &I) {
assert(ExitBlockSet.count(ExitBlock) &&
"The LCSSA PHI is not in an exit block!");
- Instruction *New = I.clone();
- ExitBlock->getInstList().insert(ExitBlock->getFirstInsertionPt(), New);
- if (!I.getName().empty())
- New->setName(I.getName() + ".le");
-
- // Build LCSSA PHI nodes for any in-loop operands. Note that this is
- // particularly cheap because we can rip off the PHI node that we're
- // replacing for the number and blocks of the predecessors.
- // OPT: If this shows up in a profile, we can instead finish sinking all
- // invariant instructions, and then walk their operands to re-establish
- // LCSSA. That will eliminate creating PHI nodes just to nuke them when
- // sinking bottom-up.
- for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
- ++OI)
- if (Instruction *OInst = dyn_cast<Instruction>(*OI))
- if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
- if (!OLoop->contains(PN)) {
- PHINode *OpPN = PHINode::Create(
- OInst->getType(), PN->getNumIncomingValues(),
- OInst->getName() + ".lcssa", ExitBlock->begin());
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- OpPN->addIncoming(OInst, PN->getIncomingBlock(i));
- *OI = OpPN;
- }
+ Instruction *New;
+ auto It = SunkCopies.find(ExitBlock);
+ if (It != SunkCopies.end()) {
+ New = It->second;
+ } else {
+ New = I.clone();
+ SunkCopies[ExitBlock] = New;
+ ExitBlock->getInstList().insert(ExitBlock->getFirstInsertionPt(), New);
+ if (!I.getName().empty())
+ New->setName(I.getName() + ".le");
+
+ // Build LCSSA PHI nodes for any in-loop operands. Note that this is
+ // particularly cheap because we can rip off the PHI node that we're
+ // replacing for the number and blocks of the predecessors.
+ // OPT: If this shows up in a profile, we can instead finish sinking all
+ // invariant instructions, and then walk their operands to re-establish
+ // LCSSA. That will eliminate creating PHI nodes just to nuke them when
+ // sinking bottom-up.
+ for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
+ ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(*OI))
+ if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
+ if (!OLoop->contains(PN)) {
+ PHINode *OpPN = PHINode::Create(
+ OInst->getType(), PN->getNumIncomingValues(),
+ OInst->getName() + ".lcssa", ExitBlock->begin());
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ OpPN->addIncoming(OInst, PN->getIncomingBlock(i));
+ *OI = OpPN;
+ }
+ }
PN->replaceAllUsesWith(New);
PN->eraseFromParent();
diff --git a/test/Transforms/LICM/extra-copies.ll b/test/Transforms/LICM/extra-copies.ll
new file mode 100644
index 0000000000..ef52f9f404
--- /dev/null
+++ b/test/Transforms/LICM/extra-copies.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -licm -S | FileCheck %s
+; PR19835
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @f(i32 %x) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %storemerge4 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %mul = mul nsw i32 %x, %x
+ %add2 = add nsw i32 %mul, %x
+ %mul3 = add nsw i32 %add2, %mul
+ %inc = add nsw i32 %storemerge4, 1
+ %cmp = icmp slt i32 %inc, 100
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ %a9.0.lcssa = phi i32 [ %mul3, %for.body ]
+ ret i32 %a9.0.lcssa
+}
+
+; Test that there is exactly one copy of mul nsw i32 %x, %x in the exit block.
+; CHECK: define i32 @f(i32 [[X:%.*]])
+; CHECK: for.end:
+; CHECK-NOT: mul nsw i32 [[X]], [[X]]
+; CHECK: mul nsw i32 [[X]], [[X]]
+; CHECK-NOT: mul nsw i32 [[X]], [[X]]