diff options
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 25 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/induction.ll | 34 |
2 files changed, 51 insertions, 8 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 15d4c1c79d..79a6ecd15b 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1987,10 +1987,6 @@ void InnerLoopVectorizer::createEmptyLoop() { Constant::getAllOnesValue(BackedgeCount->getType()), "backedge.overflow", BypassBlock->getTerminator()); - // Count holds the overall loop count (N). - Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), - BypassBlock->getTerminator()); - // The loop index does not have to start at Zero. Find the original start // value from the induction PHI node. If we don't have an induction variable // then we know that it starts at zero. @@ -2000,6 +1996,18 @@ void InnerLoopVectorizer::createEmptyLoop() { IdxTy): ConstantInt::get(IdxTy, 0); + // We need an instruction to anchor the overflow check on. StartIdx needs to + // be defined before the overflow check branch. Because the scalar preheader + // is going to merge the start index and so the overflow branch block needs to + // contain a definition of the start index. + Instruction *OverflowCheckAnchor = BinaryOperator::CreateAdd( + StartIdx, ConstantInt::get(IdxTy, 0), "overflow.check.anchor", + BypassBlock->getTerminator()); + + // Count holds the overall loop count (N). + Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), + BypassBlock->getTerminator()); + LoopBypassBlocks.push_back(BypassBlock); // Split the single block loop into the two loop structure described above. @@ -2068,17 +2076,18 @@ void InnerLoopVectorizer::createEmptyLoop() { // Now, compare the new count to zero. If it is zero skip the vector loop and // jump to the scalar loop. - Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, - "cmp.zero"); + Value *Cmp = + BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero"); BasicBlock *LastBypassBlock = BypassBlock; // Generate code to check that the loops trip count that we computed by adding // one to the backedge-taken count will not overflow. { - auto PastOverflowCheck = std::next(BasicBlock::iterator(CheckBCOverflow)); + auto PastOverflowCheck = + std::next(BasicBlock::iterator(OverflowCheckAnchor)); BasicBlock *CheckBlock = - LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked"); + LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked"); if (ParentLoop) ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase()); LoopBypassBlocks.push_back(CheckBlock); diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll index 3102ec59bd..7dabcb2ba0 100644 --- a/test/Transforms/LoopVectorize/induction.ll +++ b/test/Transforms/LoopVectorize/induction.ll @@ -135,3 +135,37 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { ; <label>:5 ; preds = %1 ret i32 %2 } + +; When generating the overflow check we must sure that the induction start value +; is defined before the branch to the scalar preheader. + +; CHECK-LABEL: testoverflowcheck +; CHECK: entry +; CHECK: %[[LOAD:.*]] = load i8 +; CHECK: %[[VAL:.*]] = zext i8 %[[LOAD]] to i32 +; CHECK: br + +; CHECK: scalar.ph +; CHECK: phi i32 [ %{{.*}}, %middle.block ], [ %[[VAL]], %entry ] + +@e = global i8 1, align 1 +@d = common global i32 0, align 4 +@c = common global i32 0, align 4 +define i32 @testoverflowcheck() { +entry: + %.pr.i = load i8* @e, align 1 + %0 = load i32* @d, align 4 + %c.promoted.i = load i32* @c, align 4 + br label %cond.end.i + +cond.end.i: + %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ] + %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ] + %and.i = and i32 %0, %and3.i + %inc.i = add i8 %inc4.i, 1 + %tobool.i = icmp eq i8 %inc.i, 0 + br i1 %tobool.i, label %loopexit, label %cond.end.i + +loopexit: + ret i32 %and.i +} |