summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpander.h2
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp38
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp3
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp21
-rw-r--r--test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll42
5 files changed, 91 insertions, 15 deletions
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 1e88bd5445..4433be000d 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -26,7 +26,7 @@ namespace llvm {
/// Return true if the given expression is safe to expand in the sense that
/// all materialized values are safe to speculate.
- bool isSafeToExpand(const SCEV *S);
+ bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE);
/// SCEVExpander - This class uses information about analyze scalars to
/// rewrite expressions in canonical form.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index f373a80681..86a557b55f 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1233,6 +1233,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Re-apply any non-loop-dominating scale.
if (PostLoopScale) {
+ assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
Result = InsertNoopCastOfTo(Result, IntTy);
Result = Builder.CreateMul(Result,
expandCodeFor(PostLoopScale, IntTy));
@@ -1704,28 +1705,43 @@ namespace {
// Currently, we only allow division by a nonzero constant here. If this is
// inadequate, we could easily allow division by SCEVUnknown by using
// ValueTracking to check isKnownNonZero().
+//
+// We cannot generally expand recurrences unless the step dominates the loop
+// header. The expander handles the special case of affine recurrences by
+// scaling the recurrence outside the loop, but this technique isn't generally
+// applicable. Expanding a nested recurrence outside a loop requires computing
+// binomial coefficients. This could be done, but the recurrence has to be in a
+// perfectly reduced form, which can't be guaranteed.
struct SCEVFindUnsafe {
+ ScalarEvolution &SE;
bool IsUnsafe;
- SCEVFindUnsafe(): IsUnsafe(false) {}
+ SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
bool follow(const SCEV *S) {
- const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S);
- if (!D)
- return true;
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
- if (SC && !SC->getValue()->isZero())
- return true;
- IsUnsafe = true;
- return false;
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
+ if (!SC || SC->getValue()->isZero()) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ return true;
}
bool isDone() const { return IsUnsafe; }
};
}
namespace llvm {
-bool isSafeToExpand(const SCEV *S) {
- SCEVFindUnsafe Search;
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
+ SCEVFindUnsafe Search(SE);
visitAll(S, Search);
return !Search.IsUnsafe;
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index cfd8db0f6c..235aaaa6f8 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -532,7 +532,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible.
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
- if (!SE->isLoopInvariant(ExitValue, L) || !isSafeToExpand(ExitValue))
+ if (!SE->isLoopInvariant(ExitValue, L) ||
+ !isSafeToExpand(ExitValue, *SE))
continue;
// Computing the value outside of the loop brings no benefit if :
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 14cb979392..eff5268c44 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1170,6 +1170,13 @@ public:
/// may be used.
bool AllFixupsOutsideLoop;
+ /// RigidFormula is set to true to guarantee that this use will be associated
+ /// with a single formula--the one that initially matched. Some SCEV
+ /// expressions cannot be expanded. This allows LSR to consider the registers
+ /// used by those expressions without the need to expand them later after
+ /// changing the formula.
+ bool RigidFormula;
+
/// WidestFixupType - This records the widest use type for any fixup using
/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
/// max fixup widths to be equivalent, because the narrower one may be relying
@@ -1188,6 +1195,7 @@ public:
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
+ RigidFormula(false),
WidestFixupType(0) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
@@ -1214,6 +1222,9 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRUse::InsertFormula(const Formula &F) {
+ if (!Formulae.empty() && RigidFormula)
+ return false;
+
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
@@ -1433,7 +1444,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
}
case LSRUse::ICmpZero:
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
- // Therefore, return 0 in case F.Scale == -1.
+ // Therefore, return 0 in case F.Scale == -1.
return F.Scale != -1;
case LSRUse::Basic:
@@ -2943,7 +2954,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
@@ -2986,6 +2997,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
/// and loop-computable portions.
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+ // Mark uses whose expressions cannot be expanded.
+ if (!isSafeToExpand(S, SE))
+ LU.RigidFormula = true;
+
Formula F;
F.InitialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
@@ -4353,6 +4368,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) const {
const LSRUse &LU = Uses[LF.LUIdx];
+ if (LU.RigidFormula)
+ return LF.OperandValToReplace;
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
new file mode 100644
index 0000000000..255cf41a81
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
@@ -0,0 +1,42 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; PR15470: LSR miscompile. The test2 function should return '1'.
+;
+; SCEV expander cannot expand quadratic recurrences outside of the
+; loop. This recurrence depends on %sub.us, so can't be expanded.
+;
+; CHECK-LABEL: @test2
+; CHECK-LABEL: test2.loop:
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
+; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
+;
+; CHECK=LABEL: for.end:
+; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
+; CHECK: %f = ashr i32 %sext.us, 24
+; CHECK: ret i32 %f
+define i32 @test2() {
+entry:
+ br label %test2.loop
+
+test2.loop:
+ %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
+ %inc11.us = add nsw i32 %inc1115.us, 1
+ %cmp.us = icmp slt i32 %inc11.us, 2
+ br i1 %cmp.us, label %test2.loop, label %for.end
+
+for.end:
+ %tobool.us = icmp eq i32 %inc1115.us, 0
+ %sub.us = select i1 %tobool.us, i32 0, i32 0
+ %mul.us = shl i32 %inc1115.us, 24
+ %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+ %sext.us = mul i32 %mul.us, %sub.cond.us
+ %f = ashr i32 %sext.us, 24
+ br label %exit
+
+exit:
+ ret i32 %f
+}