2 files changed, 51 insertions, 8 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 15d4c1c79d..79a6ecd15b 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1987,10 +1987,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
                       Constant::getAllOnesValue(BackedgeCount->getType()),
                       "backedge.overflow", BypassBlock->getTerminator());
 
-  // Count holds the overall loop count (N).
-  Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
-                                   BypassBlock->getTerminator());
-
   // The loop index does not have to start at Zero. Find the original start
   // value from the induction PHI node. If we don't have an induction variable
   // then we know that it starts at zero.
@@ -2000,6 +1996,18 @@ void InnerLoopVectorizer::createEmptyLoop() {
                        IdxTy):
     ConstantInt::get(IdxTy, 0);
 
+  // We need an instruction to anchor the overflow check on. StartIdx needs to
+  // be defined before the overflow check branch. Because the scalar preheader
+  // is going to merge the start index and so the overflow branch block needs to
+  // contain a definition of the start index.
+  Instruction *OverflowCheckAnchor = BinaryOperator::CreateAdd(
+      StartIdx, ConstantInt::get(IdxTy, 0), "overflow.check.anchor",
+      BypassBlock->getTerminator());
+
+  // Count holds the overall loop count (N).
+  Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
+                                   BypassBlock->getTerminator());
+
   LoopBypassBlocks.push_back(BypassBlock);
 
   // Split the single block loop into the two loop structure described above.
@@ -2068,17 +2076,18 @@ void InnerLoopVectorizer::createEmptyLoop() {
 
   // Now, compare the new count to zero. If it is zero skip the vector loop and
   // jump to the scalar loop.
-  Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx,
-                                          "cmp.zero");
+  Value *Cmp =
+      BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero");
 
   BasicBlock *LastBypassBlock = BypassBlock;
 
   // Generate code to check that the loops trip count that we computed by adding
   // one to the backedge-taken count will not overflow.
   {
-    auto PastOverflowCheck = std::next(BasicBlock::iterator(CheckBCOverflow));
+    auto PastOverflowCheck =
+        std::next(BasicBlock::iterator(OverflowCheckAnchor));
     BasicBlock *CheckBlock =
-        LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
+      LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
     if (ParentLoop)
       ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
     LoopBypassBlocks.push_back(CheckBlock);
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 3102ec59bd..7dabcb2ba0 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -135,3 +135,37 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 ; <label>:5                                       ; preds = %1
   ret i32 %2
 }
+
+; When generating the overflow check we must sure that the induction start value
+; is defined before the branch to the scalar preheader.
+
+; CHECK-LABEL: testoverflowcheck
+; CHECK: entry
+; CHECK: %[[LOAD:.*]] = load i8
+; CHECK: %[[VAL:.*]] =  zext i8 %[[LOAD]] to i32
+; CHECK: br
+
+; CHECK: scalar.ph
+; CHECK: phi i32 [ %{{.*}}, %middle.block ], [ %[[VAL]], %entry ]
+
+@e = global i8 1, align 1
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+define i32 @testoverflowcheck() {
+entry:
+  %.pr.i = load i8* @e, align 1
+  %0 = load i32* @d, align 4
+  %c.promoted.i = load i32* @c, align 4
+  br label %cond.end.i
+
+cond.end.i:
+  %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ]
+  %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ]
+  %and.i = and i32 %0, %and3.i
+  %inc.i = add i8 %inc4.i, 1
+  %tobool.i = icmp eq i8 %inc.i, 0
+  br i1 %tobool.i, label %loopexit, label %cond.end.i
+
+loopexit:
+  ret i32 %and.i
+}