summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-04-24 03:13:44 +0000
committerDan Gohman <gohman@apple.com>2010-04-24 03:13:44 +0000
commit1d367988e21bb1b4e8346877f8fa377dff194c29 (patch)
treecb0953ec9ece04f795994b7df064445324229413
parent9f93d30a26ae9928f886ef7271efeafcea2a00a6 (diff)
downloadllvm-1d367988e21bb1b4e8346877f8fa377dff194c29.tar.gz
llvm-1d367988e21bb1b4e8346877f8fa377dff194c29.tar.bz2
llvm-1d367988e21bb1b4e8346877f8fa377dff194c29.tar.xz
Generalize LSR's OptimizeMax to handle the new kinds of max expressions
that indvars may use, now that indvars is recognizing le and ge loops. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@102235 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp53
-rw-r--r--test/CodeGen/X86/optimize-max-3.ll32
2 files changed, 75 insertions, 10 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a09b3dc5f8..9fdbf4aa1b 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1461,12 +1461,26 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Add one to the backedge-taken count to get the trip count.
const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
-
- // Check for a max calculation that matches the pattern.
- if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
+ if (IterationCount != SE.getSCEV(Sel)) return Cond;
+
+ // Check for a max calculation that matches the pattern. There's no check
+ // for ICMP_ULE here because the comparison would be with zero, which
+ // isn't interesting.
+ CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+ const SCEVNAryExpr *Max = 0;
+ if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
+ Pred = ICmpInst::ICMP_SLE;
+ Max = S;
+ } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
+ Pred = ICmpInst::ICMP_SLT;
+ Max = S;
+ } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
+ Pred = ICmpInst::ICMP_ULT;
+ Max = U;
+ } else {
+ // No match; bail.
return Cond;
- const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
- if (Max != SE.getSCEV(Sel)) return Cond;
+ }
// To handle a max with more than two operands, this optimization would
// require additional checking and setup.
@@ -1475,7 +1489,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
const SCEV *MaxLHS = Max->getOperand(0);
const SCEV *MaxRHS = Max->getOperand(1);
- if (!MaxLHS || MaxLHS != One) return Cond;
+
+ // ScalarEvolution canonicalizes constants to the left. For < and >, look
+ // for a comparison with 1. For <= and >=, a comparison with zero.
+ if (!MaxLHS ||
+ (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
+ return Cond;
+
// Check the relevant induction variable for conformance to
// the pattern.
const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
@@ -1491,16 +1511,29 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
- if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
+ if (ICmpInst::isTrueWhenEqual(Pred)) {
+ // Look for n+1, and grab n.
+ if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
+ if (isa<ConstantInt>(BO->getOperand(1)) &&
+ cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+ SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
+ if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
+ if (isa<ConstantInt>(BO->getOperand(1)) &&
+ cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+ SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
+ if (!NewRHS)
+ return Cond;
+ } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
- if (!NewRHS) return Cond;
+ else
+ llvm_unreachable("Max doesn't match expected pattern!");
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
- CmpInst::Predicate Pred =
- isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
if (Cond->getPredicate() == CmpInst::ICMP_EQ)
Pred = CmpInst::getInversePredicate(Pred);
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
new file mode 100644
index 0000000000..bf8bfa28da
--- /dev/null
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; LSR's OptimizeMax should eliminate the select (max).
+
+; CHECK: foo:
+; CHECK-NOT: cmov
+; CHECK: jle
+
+define void @foo(i64 %n, double* nocapture %p) nounwind {
+entry:
+ %cmp6 = icmp slt i64 %n, 0 ; <i1> [#uses=1]
+ br i1 %cmp6, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1]
+ %n.op = add i64 %n, 1 ; <i64> [#uses=1]
+ %tmp1 = select i1 %tmp, i64 %n.op, i64 1 ; <i64> [#uses=1]
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %for.body.preheader ] ; <i64> [#uses=2]
+ %arrayidx = getelementptr double* %p, i64 %i ; <double*> [#uses=2]
+ %t4 = load double* %arrayidx ; <double> [#uses=1]
+ %mul = fmul double %t4, 2.200000e+00 ; <double> [#uses=1]
+ store double %mul, double* %arrayidx
+ %i.next = add nsw i64 %i, 1 ; <i64> [#uses=2]
+ %exitcond = icmp eq i64 %i.next, %tmp1 ; <i1> [#uses=1]
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}