summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp25
-rw-r--r--test/Transforms/LoopVectorize/induction.ll34
2 files changed, 51 insertions, 8 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 15d4c1c79d..79a6ecd15b 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1987,10 +1987,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
Constant::getAllOnesValue(BackedgeCount->getType()),
"backedge.overflow", BypassBlock->getTerminator());
- // Count holds the overall loop count (N).
- Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
- BypassBlock->getTerminator());
-
// The loop index does not have to start at Zero. Find the original start
// value from the induction PHI node. If we don't have an induction variable
// then we know that it starts at zero.
@@ -2000,6 +1996,18 @@ void InnerLoopVectorizer::createEmptyLoop() {
IdxTy):
ConstantInt::get(IdxTy, 0);
+ // We need an instruction to anchor the overflow check on. StartIdx needs to
+ // be defined before the overflow check branch. Because the scalar preheader
+ // is going to merge the start index and so the overflow branch block needs to
+ // contain a definition of the start index.
+ Instruction *OverflowCheckAnchor = BinaryOperator::CreateAdd(
+ StartIdx, ConstantInt::get(IdxTy, 0), "overflow.check.anchor",
+ BypassBlock->getTerminator());
+
+ // Count holds the overall loop count (N).
+ Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
+ BypassBlock->getTerminator());
+
LoopBypassBlocks.push_back(BypassBlock);
// Split the single block loop into the two loop structure described above.
@@ -2068,17 +2076,18 @@ void InnerLoopVectorizer::createEmptyLoop() {
// Now, compare the new count to zero. If it is zero skip the vector loop and
// jump to the scalar loop.
- Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx,
- "cmp.zero");
+ Value *Cmp =
+ BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero");
BasicBlock *LastBypassBlock = BypassBlock;
// Generate code to check that the loops trip count that we computed by adding
// one to the backedge-taken count will not overflow.
{
- auto PastOverflowCheck = std::next(BasicBlock::iterator(CheckBCOverflow));
+ auto PastOverflowCheck =
+ std::next(BasicBlock::iterator(OverflowCheckAnchor));
BasicBlock *CheckBlock =
- LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
+ LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
if (ParentLoop)
ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
LoopBypassBlocks.push_back(CheckBlock);
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 3102ec59bd..7dabcb2ba0 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -135,3 +135,37 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
; <label>:5 ; preds = %1
ret i32 %2
}
+
+; When generating the overflow check we must sure that the induction start value
+; is defined before the branch to the scalar preheader.
+
+; CHECK-LABEL: testoverflowcheck
+; CHECK: entry
+; CHECK: %[[LOAD:.*]] = load i8
+; CHECK: %[[VAL:.*]] = zext i8 %[[LOAD]] to i32
+; CHECK: br
+
+; CHECK: scalar.ph
+; CHECK: phi i32 [ %{{.*}}, %middle.block ], [ %[[VAL]], %entry ]
+
+@e = global i8 1, align 1
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+define i32 @testoverflowcheck() {
+entry:
+ %.pr.i = load i8* @e, align 1
+ %0 = load i32* @d, align 4
+ %c.promoted.i = load i32* @c, align 4
+ br label %cond.end.i
+
+cond.end.i:
+ %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ]
+ %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ]
+ %and.i = and i32 %0, %and3.i
+ %inc.i = add i8 %inc4.i, 1
+ %tobool.i = icmp eq i8 %inc.i, 0
+ br i1 %tobool.i, label %loopexit, label %cond.end.i
+
+loopexit:
+ ret i32 %and.i
+}