diff options
author | Dan Gohman <gohman@apple.com> | 2010-04-24 03:13:44 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2010-04-24 03:13:44 +0000 |
commit | 1d367988e21bb1b4e8346877f8fa377dff194c29 (patch) | |
tree | cb0953ec9ece04f795994b7df064445324229413 | |
parent | 9f93d30a26ae9928f886ef7271efeafcea2a00a6 (diff) | |
download | llvm-1d367988e21bb1b4e8346877f8fa377dff194c29.tar.gz llvm-1d367988e21bb1b4e8346877f8fa377dff194c29.tar.bz2 llvm-1d367988e21bb1b4e8346877f8fa377dff194c29.tar.xz |
Generalize LSR's OptimizeMax to handle the new kinds of max expressions
that indvars may use, now that indvars is recognizing le and ge loops.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@102235 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 53 | ||||
-rw-r--r-- | test/CodeGen/X86/optimize-max-3.ll | 32 |
2 files changed, 75 insertions, 10 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a09b3dc5f8..9fdbf4aa1b 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1461,12 +1461,26 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // Add one to the backedge-taken count to get the trip count. const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One); - - // Check for a max calculation that matches the pattern. - if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount)) + if (IterationCount != SE.getSCEV(Sel)) return Cond; + + // Check for a max calculation that matches the pattern. There's no check + // for ICMP_ULE here because the comparison would be with zero, which + // isn't interesting. + CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + const SCEVNAryExpr *Max = 0; + if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) { + Pred = ICmpInst::ICMP_SLE; + Max = S; + } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) { + Pred = ICmpInst::ICMP_SLT; + Max = S; + } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) { + Pred = ICmpInst::ICMP_ULT; + Max = U; + } else { + // No match; bail. return Cond; - const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount); - if (Max != SE.getSCEV(Sel)) return Cond; + } // To handle a max with more than two operands, this optimization would // require additional checking and setup. @@ -1475,7 +1489,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { const SCEV *MaxLHS = Max->getOperand(0); const SCEV *MaxRHS = Max->getOperand(1); - if (!MaxLHS || MaxLHS != One) return Cond; + + // ScalarEvolution canonicalizes constants to the left. For < and >, look + // for a comparison with 1. For <= and >=, a comparison with zero. + if (!MaxLHS || + (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One))) + return Cond; + // Check the relevant induction variable for conformance to // the pattern. const SCEV *IV = SE.getSCEV(Cond->getOperand(0)); @@ -1491,16 +1511,29 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // Check the right operand of the select, and remember it, as it will // be used in the new comparison instruction. Value *NewRHS = 0; - if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) + if (ICmpInst::isTrueWhenEqual(Pred)) { + // Look for n+1, and grab n. + if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1))) + if (isa<ConstantInt>(BO->getOperand(1)) && + cast<ConstantInt>(BO->getOperand(1))->isOne() && + SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); + if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2))) + if (isa<ConstantInt>(BO->getOperand(1)) && + cast<ConstantInt>(BO->getOperand(1))->isOne() && + SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); + if (!NewRHS) + return Cond; + } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) NewRHS = Sel->getOperand(1); else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); - if (!NewRHS) return Cond; + else + llvm_unreachable("Max doesn't match expected pattern!"); // Determine the new comparison opcode. It may be signed or unsigned, // and the original comparison may be either equality or inequality. - CmpInst::Predicate Pred = - isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; if (Cond->getPredicate() == CmpInst::ICMP_EQ) Pred = CmpInst::getInversePredicate(Pred); diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll new file mode 100644 index 0000000000..bf8bfa28da --- /dev/null +++ b/test/CodeGen/X86/optimize-max-3.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +; LSR's OptimizeMax should eliminate the select (max). + +; CHECK: foo: +; CHECK-NOT: cmov +; CHECK: jle + +define void @foo(i64 %n, double* nocapture %p) nounwind { +entry: + %cmp6 = icmp slt i64 %n, 0 ; <i1> [#uses=1] + br i1 %cmp6, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1] + %n.op = add i64 %n, 1 ; <i64> [#uses=1] + %tmp1 = select i1 %tmp, i64 %n.op, i64 1 ; <i64> [#uses=1] + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i = phi i64 [ %i.next, %for.body ], [ 0, %for.body.preheader ] ; <i64> [#uses=2] + %arrayidx = getelementptr double* %p, i64 %i ; <double*> [#uses=2] + %t4 = load double* %arrayidx ; <double> [#uses=1] + %mul = fmul double %t4, 2.200000e+00 ; <double> [#uses=1] + store double %mul, double* %arrayidx + %i.next = add nsw i64 %i, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %i.next, %tmp1 ; <i1> [#uses=1] + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} |