summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp131
-rw-r--r--test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll96
2 files changed, 181 insertions, 46 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 349d04524f..7867d9fad3 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -634,6 +634,19 @@ static Type *getAccessType(const Instruction *Inst) {
return AccessTy;
}
+/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(AR->getType())) &&
+ SE.getSCEV(PN) == AR)
+ return true;
+ }
+ return false;
+}
+
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
@@ -703,7 +716,8 @@ public:
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
void print(raw_ostream &OS) const;
void dump() const;
@@ -716,7 +730,8 @@ private:
void RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs);
};
}
@@ -736,18 +751,13 @@ void Cost::RateRegister(const SCEV *Reg,
// on other loops, and cannot be expected to change sibling loops. If the
// AddRec exists, consider it's register free and leave it alone. Otherwise,
// do not consider this formula at all.
- // FIXME: why do we need to generate such fomulae?
else if (!EnableNested || L->contains(AR->getLoop()) ||
(!AR->getLoop()->contains(L) &&
DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
- for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (SE.isSCEVable(PN->getType()) &&
- (SE.getEffectiveSCEVType(PN->getType()) ==
- SE.getEffectiveSCEVType(AR->getType())) &&
- SE.getSCEV(PN) == AR)
- return;
- }
+ if (isExistingPhi(AR, SE))
+ return;
+
+ // For !EnableNested, never rewrite IVs in other loops.
if (!EnableNested) {
Loose();
return;
@@ -789,13 +799,22 @@ void Cost::RateRegister(const SCEV *Reg,
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it.
+/// before, rate it. Optional LoserRegs provides a way to declare any formula
+/// that refers to one of those regs an instant loser.
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT) {
- if (Regs.insert(Reg))
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ if (LoserRegs && LoserRegs->count(Reg)) {
+ Loose();
+ return;
+ }
+ if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
+ if (isLoser())
+ LoserRegs->insert(Reg);
+ }
}
void Cost::RateFormula(const Formula &F,
@@ -803,14 +822,15 @@ void Cost::RateFormula(const Formula &F,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT) {
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Loose();
return;
}
- RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
@@ -821,7 +841,7 @@ void Cost::RateFormula(const Formula &F,
Loose();
return;
}
- RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
@@ -1103,7 +1123,6 @@ bool LSRUse::InsertFormula(const Formula &F) {
Formulae.push_back(F);
// Record registers now being used by this use.
- if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
return true;
@@ -1114,7 +1133,6 @@ void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
- assert(!Formulae.empty() && "LSRUse has no formulae left!");
}
/// RecomputeRegs - Recompute the Regs field, and update RegUses.
@@ -2912,6 +2930,7 @@ LSRInstance::GenerateAllReuseFormulae() {
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
+ SmallPtrSet<const SCEV *, 16> LoserRegs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
@@ -2931,46 +2950,66 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
- SmallVector<const SCEV *, 2> Key;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
- JE = F.BaseRegs.end(); J != JE; ++J) {
- const SCEV *Reg = *J;
- if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
- Key.push_back(Reg);
+ // Some formulas are instant losers. For example, they may depend on
+ // nonexistent AddRecs from other loops. These need to be filtered
+ // immediately, otherwise heuristics could choose them over others leading
+ // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
+ // avoids the need to recompute this information across formulae using the
+ // same bad AddRec. Passing LoserRegs is also essential unless we remove
+ // the corresponding bad register from the Regs set.
+ Cost CostF;
+ Regs.clear();
+ CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+ &LoserRegs);
+ if (CostF.isLoser()) {
+ // During initial formula generation, undesirable formulae are generated
+ // by uses within other loops that have some non-trivial address mode or
+ // use the postinc form of the IV. LSR needs to provide these formulae
+ // as the basis of rediscovering the desired formula that uses an AddRec
+ // corresponding to the existing phi. Once all formulae have been
+ // generated, these initial losers may be pruned.
+ DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
+ dbgs() << "\n");
}
- if (F.ScaledReg &&
- RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
- Key.push_back(F.ScaledReg);
- // Unstable sort by host order ok, because this is only used for
- // uniquifying.
- std::sort(Key.begin(), Key.end());
-
- std::pair<BestFormulaeTy::const_iterator, bool> P =
- BestFormulae.insert(std::make_pair(Key, FIdx));
- if (!P.second) {
+ else {
+ SmallVector<const SCEV *, 2> Key;
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+ Key.push_back(Reg);
+ }
+ if (F.ScaledReg &&
+ RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+ Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for
+ // uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ std::pair<BestFormulaeTy::const_iterator, bool> P =
+ BestFormulae.insert(std::make_pair(Key, FIdx));
+ if (P.second)
+ continue;
+
Formula &Best = LU.Formulae[P.first->second];
- Cost CostF;
- CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
- Regs.clear();
Cost CostBest;
- CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
Regs.clear();
+ CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula "; Best.print(dbgs());
dbgs() << '\n');
+ }
#ifndef NDEBUG
- ChangedFormulae = true;
+ ChangedFormulae = true;
#endif
- LU.DeleteFormula(F);
- --FIdx;
- --NumForms;
- Any = true;
- continue;
- }
+ LU.DeleteFormula(F);
+ --FIdx;
+ --NumForms;
+ Any = true;
}
// Now that we've filtered out some formulae, recompute the Regs set.
diff --git a/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll
new file mode 100644
index 0000000000..5108650962
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s | FileCheck %s
+;
+; Test LSR's ability to prune formulae that refer to nonexistant
+; AddRecs in other loops.
+;
+; Unable to reduce this case further because it requires LSR to exceed
+; ComplexityLimit.
+;
+; We really just want to ensure that LSR can process this loop without
+; finding an unsatisfactory solution and bailing out. I've added
+; dummyout, an obvious candidate for postinc replacement so we can
+; verify that LSR removes it.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+; CHECK: @test
+; CHECK: # %for.body{{$}}
+; dummyiv copy should be removed
+; CHECK-NOT: movq
+; CHECK: # %for.cond19.preheader
+; dummycnt should be removed
+; CHECK-NOT: incq
+; CHECK: # %for.body23{{$}}
+define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp {
+entry:
+ %cmp34 = icmp eq i64 %count, 0
+ br i1 %cmp34, label %for.end29, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %dummyiv = phi i64 [ %dummycnt, %for.body ], [ 0, %entry ]
+ %indvars.iv39 = phi i64 [ %indvars.iv.next40, %for.body ], [ 0, %entry ]
+ %dp.036 = phi i32* [ %add.ptr, %for.body ], [ %destrow, %entry ]
+ %p.035 = phi float* [ %incdec.ptr4, %for.body ], [ %srcrow, %entry ]
+ %incdec.ptr = getelementptr inbounds float* %p.035, i64 1
+ %0 = load float* %incdec.ptr, align 4
+ %incdec.ptr2 = getelementptr inbounds float* %p.035, i64 2
+ %1 = load float* %incdec.ptr2, align 4
+ %incdec.ptr3 = getelementptr inbounds float* %p.035, i64 3
+ %2 = load float* %incdec.ptr3, align 4
+ %incdec.ptr4 = getelementptr inbounds float* %p.035, i64 4
+ %3 = load float* %incdec.ptr4, align 4
+ %4 = load i32* %dp.036, align 4
+ %conv5 = fptoui float %0 to i32
+ %or = or i32 %4, %conv5
+ %arrayidx6 = getelementptr inbounds i32* %dp.036, i64 1
+ %5 = load i32* %arrayidx6, align 4
+ %conv7 = fptoui float %1 to i32
+ %or8 = or i32 %5, %conv7
+ %arrayidx9 = getelementptr inbounds i32* %dp.036, i64 2
+ %6 = load i32* %arrayidx9, align 4
+ %conv10 = fptoui float %2 to i32
+ %or11 = or i32 %6, %conv10
+ %arrayidx12 = getelementptr inbounds i32* %dp.036, i64 3
+ %7 = load i32* %arrayidx12, align 4
+ %conv13 = fptoui float %3 to i32
+ %or14 = or i32 %7, %conv13
+ store i32 %or, i32* %dp.036, align 4
+ store i32 %or8, i32* %arrayidx6, align 4
+ store i32 %or11, i32* %arrayidx9, align 4
+ store i32 %or14, i32* %arrayidx12, align 4
+ %add.ptr = getelementptr inbounds i32* %dp.036, i64 4
+ %indvars.iv.next40 = add i64 %indvars.iv39, 4
+ %dummycnt = add i64 %dummyiv, 1
+ %cmp = icmp ult i64 %indvars.iv.next40, %count
+ br i1 %cmp, label %for.body, label %for.cond19.preheader
+
+for.cond19.preheader: ; preds = %for.body
+ %dummyout = add i64 %dummyiv, 1
+ %rem = and i64 %count, 3
+ %cmp2130 = icmp eq i64 %rem, 0
+ br i1 %cmp2130, label %for.end29, label %for.body23.lr.ph
+
+for.body23.lr.ph: ; preds = %for.cond19.preheader
+ %8 = and i64 %count, 3
+ br label %for.body23
+
+for.body23: ; preds = %for.body23, %for.body23.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body23.lr.ph ], [ %indvars.iv.next, %for.body23 ]
+ %dp.132 = phi i32* [ %add.ptr, %for.body23.lr.ph ], [ %incdec.ptr28, %for.body23 ]
+ %p.131 = phi float* [ %incdec.ptr4, %for.body23.lr.ph ], [ %incdec.ptr24, %for.body23 ]
+ %incdec.ptr24 = getelementptr inbounds float* %p.131, i64 1
+ %9 = load float* %incdec.ptr24, align 4
+ %10 = load i32* %dp.132, align 4
+ %conv25 = fptoui float %9 to i32
+ %or26 = or i32 %10, %conv25
+ store i32 %or26, i32* %dp.132, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %incdec.ptr28 = getelementptr inbounds i32* %dp.132, i64 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %8
+ br i1 %exitcond, label %for.end29, label %for.body23
+
+for.end29: ; preds = %entry, %for.body23, %for.cond19.preheader
+ %result = phi i64 [ 0, %entry ], [ %dummyout, %for.body23 ], [ %dummyout, %for.cond19.preheader ]
+ ret i64 %result
+}