summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp22
-rw-r--r--test/Transforms/LoopReroll/basic.ll10
-rw-r--r--test/Transforms/LoopReroll/nonconst_lb.ll152
-rw-r--r--test/Transforms/LoopReroll/reduction.ll4
4 files changed, 171 insertions, 17 deletions
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 335af81b95..643bc78f6e 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -1088,9 +1088,8 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
L, SCEV::FlagAnyWrap));
{ // Limit the lifetime of SCEVExpander.
SCEVExpander Expander(*SE, "reroll");
- PHINode *NewIV =
- cast<PHINode>(Expander.expandCodeFor(H, IV->getType(),
- Header->begin()));
+ Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
+
for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
JE = BaseUseSet.end(); J != JE; ++J)
(*J)->replaceUsesOfWith(IV, NewIV);
@@ -1101,20 +1100,23 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
if (Inc == 1)
ICSCEV =
SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
- Value *IC;
- if (isa<SCEVConstant>(ICSCEV)) {
- IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
+ // Iteration count SCEV minus 1
+ const SCEV *ICMinus1SCEV =
+ SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
+
+ Value *ICMinus1; // Iteration count minus 1
+ if (isa<SCEVConstant>(ICMinus1SCEV)) {
+ ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
} else {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader)
Preheader = InsertPreheaderForLoop(L, this);
- IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
- Preheader->getTerminator());
+ ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
+ Preheader->getTerminator());
}
- Value *NewIVNext = NewIV->getIncomingValueForBlock(Header);
- Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
+ Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1,
"exitcond");
BI->setCondition(Cond);
diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll
index 314a14947e..3bd6d7ae02 100644
--- a/test/Transforms/LoopReroll/basic.ll
+++ b/test/Transforms/LoopReroll/basic.ll
@@ -33,7 +33,7 @@ for.body: ; preds = %for.body, %entry
; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
; CHECK: %indvar.next = add i32 %indvar, 1
-; CHECK: %exitcond1 = icmp eq i32 %indvar.next, 498
+; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
; CHECK: br i1 %exitcond1, label %for.end, label %for.body
; CHECK: ret
@@ -83,7 +83,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
; CHECK: store i32 %call, i32* %arrayidx, align 4
; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 1500
+; CHECK: %exitcond = icmp eq i64 %indvar, 1499
; CHECK: br i1 %exitcond, label %for.end, label %for.body
; CHECK: ret
@@ -131,7 +131,7 @@ for.body: ; preds = %for.body, %entry
; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
; CHECK: store i32 %call, i32* %arrayidx, align 4
; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %exitcond1 = icmp eq i64 %indvars.iv.next, 1500
+; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499
; CHECK: br i1 %exitcond1, label %for.end, label %for.body
; CHECK: ret
@@ -213,7 +213,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: %add = fadd float %1, %mul
; CHECK: store float %add, float* %arrayidx2, align 4
; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: %exitcond = icmp eq i64 %indvar, 3199
; CHECK: br i1 %exitcond, label %for.end, label %for.body
; CHECK: ret
@@ -313,7 +313,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: %add = fadd float %2, %mul
; CHECK: store float %add, float* %arrayidx4, align 4
; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: %exitcond = icmp eq i64 %indvar, 3199
; CHECK: br i1 %exitcond, label %for.end, label %for.body
; CHECK: ret
diff --git a/test/Transforms/LoopReroll/nonconst_lb.ll b/test/Transforms/LoopReroll/nonconst_lb.ll
new file mode 100644
index 0000000000..a45469bd23
--- /dev/null
+++ b/test/Transforms/LoopReroll/nonconst_lb.ll
@@ -0,0 +1,152 @@
+; RUN: opt < %s -loop-reroll -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-none-linux"
+
+;void foo(int *A, int *B, int m, int n) {
+; for (int i = m; i < n; i+=4) {
+; A[i+0] = B[i+0] * 4;
+; A[i+1] = B[i+1] * 4;
+; A[i+2] = B[i+2] * 4;
+; A[i+3] = B[i+3] * 4;
+; }
+;}
+define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %m, i32 %n) {
+entry:
+ %cmp34 = icmp slt i32 %m, %n
+ br i1 %cmp34, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.035 = phi i32 [ %add18, %for.body ], [ %m, %entry ]
+ %arrayidx = getelementptr inbounds i32* %B, i32 %i.035
+ %0 = load i32* %arrayidx, align 4
+ %mul = shl nsw i32 %0, 2
+ %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.035
+ store i32 %mul, i32* %arrayidx2, align 4
+ %add3 = add nsw i32 %i.035, 1
+ %arrayidx4 = getelementptr inbounds i32* %B, i32 %add3
+ %1 = load i32* %arrayidx4, align 4
+ %mul5 = shl nsw i32 %1, 2
+ %arrayidx7 = getelementptr inbounds i32* %A, i32 %add3
+ store i32 %mul5, i32* %arrayidx7, align 4
+ %add8 = add nsw i32 %i.035, 2
+ %arrayidx9 = getelementptr inbounds i32* %B, i32 %add8
+ %2 = load i32* %arrayidx9, align 4
+ %mul10 = shl nsw i32 %2, 2
+ %arrayidx12 = getelementptr inbounds i32* %A, i32 %add8
+ store i32 %mul10, i32* %arrayidx12, align 4
+ %add13 = add nsw i32 %i.035, 3
+ %arrayidx14 = getelementptr inbounds i32* %B, i32 %add13
+ %3 = load i32* %arrayidx14, align 4
+ %mul15 = shl nsw i32 %3, 2
+ %arrayidx17 = getelementptr inbounds i32* %A, i32 %add13
+ store i32 %mul15, i32* %arrayidx17, align 4
+ %add18 = add nsw i32 %i.035, 4
+ %cmp = icmp slt i32 %add18, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+; CHECK-LABEL: @foo
+; CHECK: for.body.preheader: ; preds = %entry
+; CHECK: %0 = add i32 %n, -1
+; CHECK: %1 = sub i32 %0, %m
+; CHECK: %2 = lshr i32 %1, 2
+; CHECK: %3 = mul i32 %2, 4
+; CHECK: %4 = add i32 %m, %3
+; CHECK: %5 = add i32 %4, 3
+; CHECK: br label %for.body
+
+; CHECK: for.body: ; preds = %for.body, %for.body.preheader
+; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK: %6 = add i32 %m, %indvar
+; CHECK: %arrayidx = getelementptr inbounds i32* %B, i32 %6
+; CHECK: %7 = load i32* %arrayidx, align 4
+; CHECK: %mul = shl nsw i32 %7, 2
+; CHECK: %arrayidx2 = getelementptr inbounds i32* %A, i32 %6
+; CHECK: store i32 %mul, i32* %arrayidx2, align 4
+; CHECK: %indvar.next = add i32 %indvar, 1
+; CHECK: %exitcond = icmp eq i32 %6, %5
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+;void daxpy_ur(int n,float da,float *dx,float *dy)
+; {
+; int m = n % 4;
+; for (int i = m; i < n; i = i + 4)
+; {
+; dy[i] = dy[i] + da*dx[i];
+; dy[i+1] = dy[i+1] + da*dx[i+1];
+; dy[i+2] = dy[i+2] + da*dx[i+2];
+; dy[i+3] = dy[i+3] + da*dx[i+3];
+; }
+; }
+define void @daxpy_ur(i32 %n, float %da, float* nocapture readonly %dx, float* nocapture %dy) {
+entry:
+ %rem = srem i32 %n, 4
+ %cmp55 = icmp slt i32 %rem, %n
+ br i1 %cmp55, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.056 = phi i32 [ %add27, %for.body ], [ %rem, %entry ]
+ %arrayidx = getelementptr inbounds float* %dy, i32 %i.056
+ %0 = load float* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float* %dx, i32 %i.056
+ %1 = load float* %arrayidx1, align 4
+ %mul = fmul float %1, %da
+ %add = fadd float %0, %mul
+ store float %add, float* %arrayidx, align 4
+ %add3 = add nsw i32 %i.056, 1
+ %arrayidx4 = getelementptr inbounds float* %dy, i32 %add3
+ %2 = load float* %arrayidx4, align 4
+ %arrayidx6 = getelementptr inbounds float* %dx, i32 %add3
+ %3 = load float* %arrayidx6, align 4
+ %mul7 = fmul float %3, %da
+ %add8 = fadd float %2, %mul7
+ store float %add8, float* %arrayidx4, align 4
+ %add11 = add nsw i32 %i.056, 2
+ %arrayidx12 = getelementptr inbounds float* %dy, i32 %add11
+ %4 = load float* %arrayidx12, align 4
+ %arrayidx14 = getelementptr inbounds float* %dx, i32 %add11
+ %5 = load float* %arrayidx14, align 4
+ %mul15 = fmul float %5, %da
+ %add16 = fadd float %4, %mul15
+ store float %add16, float* %arrayidx12, align 4
+ %add19 = add nsw i32 %i.056, 3
+ %arrayidx20 = getelementptr inbounds float* %dy, i32 %add19
+ %6 = load float* %arrayidx20, align 4
+ %arrayidx22 = getelementptr inbounds float* %dx, i32 %add19
+ %7 = load float* %arrayidx22, align 4
+ %mul23 = fmul float %7, %da
+ %add24 = fadd float %6, %mul23
+ store float %add24, float* %arrayidx20, align 4
+ %add27 = add nsw i32 %i.056, 4
+ %cmp = icmp slt i32 %add27, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; CHECK-LABEL: @daxpy_ur
+; CHECK: for.body.preheader:
+; CHECK: %0 = add i32 %n, -1
+; CHECK: %1 = sub i32 %0, %rem
+; CHECK: %2 = lshr i32 %1, 2
+; CHECK: %3 = mul i32 %2, 4
+; CHECK: %4 = add i32 %rem, %3
+; CHECK: %5 = add i32 %4, 3
+; CHECK: br label %for.body
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK: %6 = add i32 %rem, %indvar
+; CHECK: %arrayidx = getelementptr inbounds float* %dy, i32 %6
+; CHECK: %7 = load float* %arrayidx, align 4
+; CHECK: %arrayidx1 = getelementptr inbounds float* %dx, i32 %6
+; CHECK: %8 = load float* %arrayidx1, align 4
+; CHECK: %mul = fmul float %8, %da
+; CHECK: %add = fadd float %7, %mul
+; CHECK: store float %add, float* %arrayidx, align 4
+; CHECK: %indvar.next = add i32 %indvar, 1
+; CHECK: %exitcond = icmp eq i32 %6, %5
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/Transforms/LoopReroll/reduction.ll b/test/Transforms/LoopReroll/reduction.ll
index aed7670b66..c9991c723e 100644
--- a/test/Transforms/LoopReroll/reduction.ll
+++ b/test/Transforms/LoopReroll/reduction.ll
@@ -38,7 +38,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: %0 = load i32* %arrayidx, align 4
; CHECK: %add = add nsw i32 %0, %r.029
; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: %exitcond = icmp eq i64 %indvar, 399
; CHECK: br i1 %exitcond, label %for.end, label %for.body
; CHECK: ret
@@ -83,7 +83,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: %0 = load float* %arrayidx, align 4
; CHECK: %add = fadd float %0, %r.029
; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: %exitcond = icmp eq i64 %indvar, 399
; CHECK: br i1 %exitcond, label %for.end, label %for.body
; CHECK: ret