summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpander.h4
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp41
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp2
-rw-r--r--test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll90
4 files changed, 136 insertions, 1 deletions
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index cbbe4295b0..3f8f149cb4 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -24,6 +24,10 @@
namespace llvm {
class TargetLowering;
+ /// Return true if the given expression is safe to expand in the sense that
+ /// all materialized values are safe to speculate.
+ bool isSafeToExpand(const SCEV *S);
+
/// SCEVExpander - This class uses information about analyze scalars to
/// rewrite expressions in canonical form.
///
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index e7fe672ad3..b77f8d6ddc 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1700,3 +1700,44 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
}
return NumElim;
}
+
+namespace {
+// Search for a SCEV subexpression that is not safe to expand. Any expression
+// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
+// UDiv expressions. We don't know if the UDiv is derived from an IR divide
+// instruction, but the important thing is that we prove the denominator is
+// nonzero before expansion.
+//
+// IVUsers already checks that IV-derived expressions are safe. So this check is
+// only needed when the expression includes some subexpression that is not IV
+// derived.
+//
+// Currently, we only allow division by a nonzero constant here. If this is
+// inadequate, we could easily allow division by SCEVUnknown by using
+// ValueTracking to check isKnownNonZero().
+struct SCEVFindUnsafe {
+ bool IsUnsafe;
+
+ SCEVFindUnsafe(): IsUnsafe(false) {}
+
+ bool follow(const SCEV *S) {
+ const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S);
+ if (!D)
+ return true;
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
+ if (SC && !SC->getValue()->isZero())
+ return true;
+ IsUnsafe = true;
+ return false;
+ }
+ bool isDone() const { return IsUnsafe; }
+};
+}
+
+namespace llvm {
+bool isSafeToExpand(const SCEV *S) {
+ SCEVFindUnsafe Search;
+ visitAll(S, Search);
+ return !Search.IsUnsafe;
+}
+}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4ba969e675..c0cb13de73 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2836,7 +2836,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
new file mode 100644
index 0000000000..a1222083ab
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
@@ -0,0 +1,90 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; PR11356: likely wrong code bug
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+@g_66 = global [1 x i32] zeroinitializer, align 4
+@g_775 = global i32 0, align 4
+@g_752 = global i32 0, align 4
+@g_3 = global i32 0, align 4
+
+; Ensure that %div.i.i.us is not hoisted.
+; CHECK: @main
+; CHECK: for.body.i.i.us:
+; CHECK: %div.i.i.i.us
+; CHECK: %cmp5.i.i.us
+define i32 @main() nounwind uwtable ssp {
+entry:
+ %l_2 = alloca [1 x i32], align 4
+ %arrayidx = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 0
+ store i32 0, i32* %arrayidx, align 4, !tbaa !0
+ %tmp = load i32* @g_3, align 4, !tbaa !0
+ %idxprom = sext i32 %tmp to i64
+ %arrayidx1 = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 %idxprom
+ %tmp1 = load i32* %arrayidx1, align 4, !tbaa !0
+ %conv.i.i = and i32 %tmp1, 65535
+ %tobool.i.i.i = icmp ne i32 %tmp, 0
+ br label %codeRepl
+
+codeRepl.loopexit.us-lcssa: ; preds = %for.body.i.i, %codeRepl5
+ br label %codeRepl.loopexit
+
+codeRepl.loopexit: ; preds = %codeRepl.loopexit.us-lcssa.us, %codeRepl.loopexit.us-lcssa
+ br label %codeRepl
+
+codeRepl: ; preds = %codeRepl.loopexit, %entry
+ br i1 %tobool.i.i.i, label %codeRepl.split.us, label %codeRepl.codeRepl.split_crit_edge
+
+codeRepl.codeRepl.split_crit_edge: ; preds = %codeRepl
+ br label %codeRepl.split
+
+codeRepl.split.us: ; preds = %codeRepl
+ br label %for.cond.i.i.us
+
+for.cond.i.i.us: ; preds = %for.inc.i.i.us, %codeRepl.split.us
+ %tmp2 = phi i32 [ 0, %codeRepl.split.us ], [ %add.i.i.us, %for.inc.i.i.us ]
+ br label %codeRepl5.us
+
+for.inc.i.i.us: ; preds = %for.body.i.i.us
+ %add.i.i.us = add nsw i32 %tmp2, 1
+ store i32 %add.i.i.us, i32* @g_752, align 4, !tbaa !0
+ br label %for.cond.i.i.us
+
+for.body.i.i.us: ; preds = %codeRepl5.us
+ %div.i.i.i.us = udiv i32 1, %conv.i.i
+ %cmp5.i.i.us = icmp eq i32 %div.i.i.i.us, %tmp2
+ br i1 %cmp5.i.i.us, label %codeRepl.loopexit.us-lcssa.us, label %for.inc.i.i.us
+
+codeRepl5.us: ; preds = %for.cond.i.i.us
+ br i1 true, label %codeRepl.loopexit.us-lcssa.us, label %for.body.i.i.us
+
+codeRepl.loopexit.us-lcssa.us: ; preds = %codeRepl5.us, %for.body.i.i.us
+ br label %codeRepl.loopexit
+
+codeRepl.split: ; preds = %codeRepl.codeRepl.split_crit_edge
+ br label %for.cond.i.i
+
+for.cond.i.i: ; preds = %for.inc.i.i, %codeRepl.split
+ %tmp3 = phi i32 [ 0, %codeRepl.split ], [ %add.i.i, %for.inc.i.i ]
+ br label %codeRepl5
+
+codeRepl5: ; preds = %for.cond.i.i
+ br i1 true, label %codeRepl.loopexit.us-lcssa, label %for.body.i.i
+
+for.body.i.i: ; preds = %codeRepl5
+ %cmp5.i.i = icmp eq i32 0, %tmp3
+ br i1 %cmp5.i.i, label %codeRepl.loopexit.us-lcssa, label %for.inc.i.i
+
+for.inc.i.i: ; preds = %for.body.i.i
+ %add.i.i = add nsw i32 %tmp3, 1
+ store i32 %add.i.i, i32* @g_752, align 4, !tbaa !0
+ br label %for.cond.i.i
+
+func_4.exit: ; No predecessors!
+ ret i32 0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}