summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Analysis/ScalarEvolution.h15
-rw-r--r--lib/Analysis/ScalarEvolution.cpp57
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp30
-rw-r--r--test/Transforms/LoopUnroll/scevunroll.ll172
4 files changed, 266 insertions, 8 deletions
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index c621bec86a..6e30b31771 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -507,7 +507,8 @@ namespace llvm {
/// FoundLHS, and FoundRHS is true.
bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
- const SCEV *FoundLHS, const SCEV *FoundRHS);
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS);
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
/// in the header of its containing loop, we know the loop executes a
@@ -710,6 +711,18 @@ namespace llvm {
bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS);
+ /// getSmallConstantTripCount - Returns the maximum trip count of this loop
+ /// as a normal unsigned value, if possible. Returns 0 if the trip count is
+ /// unknown or not constant.
+ unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitBlock);
+
+ /// getSmallConstantTripMultiple - Returns the largest constant divisor of
+ /// the trip count of this loop as a normal unsigned value, if
+ /// possible. This means that the actual trip count is always a multiple of
+ /// the returned value (don't forget the trip count could very well be zero
+ /// as well!).
+ unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitBlock);
+
// getExitCount - Get the expression for the number of loop iterations for
// which this loop is guaranteed not to exit via ExitingBlock. Otherwise
// return SCEVCouldNotCompute.
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 487bec6a39..202e715aba 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -3830,6 +3830,63 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// Iteration Count Computation Code
//
+/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
+/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+/// or not constant. Will also return 0 if the maximum trip count is very large
+/// (>= 2^32)
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
+ BasicBlock *ExitBlock) {
+ const SCEVConstant *ExitCount =
+ dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock));
+ if (!ExitCount)
+ return 0;
+
+ ConstantInt *ExitConst = ExitCount->getValue();
+
+ // Guard against huge trip counts.
+ if (ExitConst->getValue().getActiveBits() > 32)
+ return 0;
+
+ // In case of integer overflow, this returns 0, which is correct.
+ return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
+ BasicBlock *ExitBlock) {
+ const SCEV *ExitCount = getExitCount(L, ExitBlock);
+ if (ExitCount == getCouldNotCompute())
+ return 1;
+
+ // Get the trip count from the BE count by adding 1.
+ const SCEV *TCMul = getAddExpr(ExitCount,
+ getConstant(ExitCount->getType(), 1));
+ // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
+ // to factor simple cases.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
+ TCMul = Mul->getOperand(0);
+
+ const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
+ if (!MulC)
+ return 1;
+
+ ConstantInt *Result = MulC->getValue();
+
+ // Guard against huge trip counts.
+ if (!Result || Result->getValue().getActiveBits() > 32)
+ return 1;
+
+ return (unsigned)Result->getZExtValue();
+}
+
// getExitCount - Get the expression for the number of loop iterations for which
// this loop is guaranteed not to exit via ExitintBlock. Otherwise return
// SCEVCouldNotCompute.
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 94afff6813..dab3ac42ea 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -39,6 +39,11 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
+// Temporary flag to be made default shortly.
+static cl::opt<bool>
+UnrollWithSCEV("unroll-scev", cl::init(false), cl::Hidden,
+ cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
+
namespace {
class LoopUnroll : public LoopPass {
public:
@@ -121,6 +126,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
LoopInfo *LI = &getAnalysis<LoopInfo>();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -136,14 +142,24 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
Threshold = OptSizeUnrollThreshold;
- // Find trip count
- unsigned TripCount = L->getSmallConstantTripCount();
-
- // Find trip multiple if count is not available
+ // Find trip count and trip multiple if count is not available
+ unsigned TripCount = 0;
unsigned TripMultiple = 1;
- if (TripCount == 0)
- TripMultiple = L->getSmallConstantTripMultiple();
-
+ if (UnrollWithSCEV) {
+ // Find "latch trip count". UnrollLoop assumes that control cannot exit
+ // via the loop latch on any iteration prior to TripCount. The loop may exit
+ // early via an earlier branch.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
+ }
+ }
+ else {
+ TripCount = L->getSmallConstantTripCount();
+ if (TripCount == 0)
+ TripMultiple = L->getSmallConstantTripMultiple();
+ }
// Automatically select an unroll count.
unsigned Count = CurrentCount;
if (Count == 0) {
diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll
new file mode 100644
index 0000000000..0f5fbe4e9c
--- /dev/null
+++ b/test/Transforms/LoopUnroll/scevunroll.ll
@@ -0,0 +1,172 @@
+; RUN: opt < %s -S -indvars -loop-unroll -verify-loop-info -unroll-scev | FileCheck %s
+;
+; Unit tests for loop unrolling using ScalarEvolution to compute trip counts.
+;
+; Indvars is run first to generate an "old" SCEV result. Some unit
+; tests may check that SCEV is properly invalidated between passes.
+
+; Completely unroll loops without a canonical IV.
+;
+; CHECK: @sansCanonical
+; CHECK-NOT: phi
+; CHECK-NOT: icmp
+; CHECK: ret
+define i32 @sansCanonical(i32* %base) nounwind {
+entry:
+ br label %while.body
+
+while.body:
+ %iv = phi i64 [ 10, %entry ], [ %iv.next, %while.body ]
+ %sum = phi i32 [ 0, %entry ], [ %sum.next, %while.body ]
+ %iv.next = add i64 %iv, -1
+ %adr = getelementptr inbounds i32* %base, i64 %iv.next
+ %tmp = load i32* %adr, align 8
+ %sum.next = add i32 %sum, %tmp
+ %iv.narrow = trunc i64 %iv.next to i32
+ %cmp.i65 = icmp sgt i32 %iv.narrow, 0
+ br i1 %cmp.i65, label %while.body, label %exit
+
+exit:
+ ret i32 %sum
+}
+
+; SCEV unrolling properly handles loops with multiple exits. In this
+; case, the computed trip count based on a canonical IV is *not* for a
+; latch block. Canonical unrolling incorrectly unrolls it, but SCEV
+; unrolling does not.
+;
+; CHECK: @earlyLoopTest
+; CHECK: tail:
+; CHECK-NOT: br
+; CHECK: br i1 %cmp2, label %loop, label %exit2
+define i64 @earlyLoopTest(i64* %base) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %inc, %tail ]
+ %s = phi i64 [ 0, %entry ], [ %s.next, %tail ]
+ %adr = getelementptr i64* %base, i64 %iv
+ %val = load i64* %adr
+ %s.next = add i64 %s, %val
+ %inc = add i64 %iv, 1
+ %cmp = icmp ne i64 %inc, 4
+ br i1 %cmp, label %tail, label %exit1
+
+tail:
+ %cmp2 = icmp ne i64 %val, 0
+ br i1 %cmp2, label %loop, label %exit2
+
+exit1:
+ ret i64 %s
+
+exit2:
+ ret i64 %s.next
+}
+
+; SCEV properly unrolls multi-exit loops.
+;
+; CHECK: @multiExit
+; CHECK: getelementptr i32* %base, i64 10
+; CHECK-NEXT: load i32*
+; CHECK: br i1 false, label %l2.10, label %exit1
+; CHECK: l2.10:
+; CHECK-NOT: br
+; CHECK: ret i32
+define i32 @multiExit(i32* %base) nounwind {
+entry:
+ br label %l1
+l1:
+ %iv1 = phi i32 [ 0, %entry ], [ %inc1, %l2 ]
+ %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l2 ]
+ %inc1 = add i32 %iv1, 1
+ %inc2 = add i32 %iv2, 1
+ %adr = getelementptr i32* %base, i32 %iv1
+ %val = load i32* %adr
+ %cmp1 = icmp slt i32 %iv1, 5
+ br i1 %cmp1, label %l2, label %exit1
+l2:
+ %cmp2 = icmp slt i32 %iv2, 10
+ br i1 %cmp2, label %l1, label %exit2
+exit1:
+ ret i32 1
+exit2:
+ ret i32 %val
+}
+
+
+; SCEV should not unroll a multi-exit loops unless the latch block has
+; a known trip count, regardless of the early exit trip counts. The
+; LoopUnroll utility uses this assumption to optimize the latch
+; block's branch.
+;
+; CHECK: @multiExit
+; CHECK: l3:
+; CHECK-NOT: br
+; CHECK: br i1 %cmp3, label %l1, label %exit3
+define i32 @multiExitIncomplete(i32* %base) nounwind {
+entry:
+ br label %l1
+l1:
+ %iv1 = phi i32 [ 0, %entry ], [ %inc1, %l3 ]
+ %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l3 ]
+ %inc1 = add i32 %iv1, 1
+ %inc2 = add i32 %iv2, 1
+ %adr = getelementptr i32* %base, i32 %iv1
+ %val = load i32* %adr
+ %cmp1 = icmp slt i32 %iv1, 5
+ br i1 %cmp1, label %l2, label %exit1
+l2:
+ %cmp2 = icmp slt i32 %iv2, 10
+ br i1 %cmp2, label %l3, label %exit2
+l3:
+ %cmp3 = icmp ne i32 %val, 0
+ br i1 %cmp3, label %l1, label %exit3
+
+exit1:
+ ret i32 1
+exit2:
+ ret i32 2
+exit3:
+ ret i32 3
+}
+
+; When loop unroll merges a loop exit with one of its parent loop's
+; exits, SCEV must forget its ExitNotTaken info.
+;
+; CHECK: @nestedUnroll
+; CHECK-NOT: br i1
+; CHECK: for.body87:
+define void @nestedUnroll() nounwind {
+entry:
+ br label %for.inc
+
+for.inc:
+ br i1 false, label %for.inc, label %for.body38.preheader
+
+for.body38.preheader:
+ br label %for.body38
+
+for.body38:
+ %i.113 = phi i32 [ %inc76, %for.inc74 ], [ 0, %for.body38.preheader ]
+ %mul48 = mul nsw i32 %i.113, 6
+ br label %for.body43
+
+for.body43:
+ %j.011 = phi i32 [ 0, %for.body38 ], [ %inc72, %for.body43 ]
+ %add49 = add nsw i32 %j.011, %mul48
+ %sh_prom50 = zext i32 %add49 to i64
+ %inc72 = add nsw i32 %j.011, 1
+ br i1 false, label %for.body43, label %for.inc74
+
+for.inc74:
+ %inc76 = add nsw i32 %i.113, 1
+ br i1 false, label %for.body38, label %for.body87.preheader
+
+for.body87.preheader:
+ br label %for.body87
+
+for.body87:
+ br label %for.body87
+}
+