diff options
-rw-r--r-- | lib/Analysis/ScalarEvolution.cpp | 29 | ||||
-rw-r--r-- | test/Transforms/IndVarSimplify/lftr-promote.ll | 38 |
2 files changed, 65 insertions, 2 deletions
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 7c68f8920e..dcd6558d34 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -960,6 +960,22 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), L); + + // Similar to above, only this time treat the step value as unsigned. + // This covers loops that count up with an unsigned step. + const SCEV *UMul = + getMulExpr(CastedMaxBECount, + getTruncateOrZeroExtend(Step, Start->getType())); + Add = getAddExpr(Start, UMul); + OperandExtendedAdd = + getAddExpr(getZeroExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getZeroExtendExpr(Step, WideTy))); + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); } // If the backedge is guarded by a comparison with the pre-inc value @@ -4248,7 +4264,7 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, switch (Pred) { default: - assert(0 && "Unexpected ICmpInst::Predicate value!"); + llvm_unreachable("Unexpected ICmpInst::Predicate value!"); break; case ICmpInst::ICMP_SGT: Pred = ICmpInst::ICMP_SLT; @@ -4556,23 +4572,32 @@ ScalarEvolution::isNecessaryCondOperands(ICmpInst::Predicate Pred, const SCEV *FoundLHS, const SCEV *FoundRHS) { switch (Pred) { - default: break; + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) + return true; + break; case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: if (isKnownPredicate(ICmpInst::ICMP_SLE, LHS, FoundLHS) && isKnownPredicate(ICmpInst::ICMP_SGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: if (isKnownPredicate(ICmpInst::ICMP_SGE, LHS, FoundLHS) && isKnownPredicate(ICmpInst::ICMP_SLE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: if (isKnownPredicate(ICmpInst::ICMP_ULE, LHS, FoundLHS) && isKnownPredicate(ICmpInst::ICMP_UGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: if (isKnownPredicate(ICmpInst::ICMP_UGE, LHS, FoundLHS) && isKnownPredicate(ICmpInst::ICMP_ULE, RHS, FoundRHS)) return true; diff --git a/test/Transforms/IndVarSimplify/lftr-promote.ll b/test/Transforms/IndVarSimplify/lftr-promote.ll new file mode 100644 index 0000000000..b2cb770dd6 --- /dev/null +++ b/test/Transforms/IndVarSimplify/lftr-promote.ll @@ -0,0 +1,38 @@ +; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep add | count 1 + +; Indvars should be able to compute the exit value of this loop +; without any additional arithmetic. The only add needed should +; be the canonical IV increment. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + +define void @foo(double* %p, i32 %n) nounwind { +entry: + %0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1] + br i1 %0, label %bb.nph, label %return + +bb.nph: ; preds = %entry + br label %bb2 + +bb2: ; preds = %bb3, %bb.nph + %i.01 = phi i32 [ %7, %bb3 ], [ 0, %bb.nph ] ; <i32> [#uses=3] + %1 = sext i32 %i.01 to i64 ; <i64> [#uses=1] + %2 = getelementptr double* %p, i64 %1 ; <double*> [#uses=1] + %3 = load double* %2, align 8 ; <double> [#uses=1] + %4 = fmul double %3, 1.100000e+00 ; <double> [#uses=1] + %5 = sext i32 %i.01 to i64 ; <i64> [#uses=1] + %6 = getelementptr double* %p, i64 %5 ; <double*> [#uses=1] + store double %4, double* %6, align 8 + %7 = add i32 %i.01, 1 ; <i32> [#uses=2] + br label %bb3 + +bb3: ; preds = %bb2 + %8 = icmp slt i32 %7, %n ; <i1> [#uses=1] + br i1 %8, label %bb2, label %bb3.return_crit_edge + +bb3.return_crit_edge: ; preds = %bb3 + br label %return + +return: ; preds = %bb3.return_crit_edge, %entry + ret void +} |