diff options
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 43 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/no_outside_user.ll | 41 |
2 files changed, 71 insertions, 13 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index bc420bd6a9..08d372512d 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2378,6 +2378,26 @@ bool LoopVectorizationLegality::canVectorize() { return true; } +/// \brief Check that the instruction has outside loop users and is not an +/// identified reduction variable. +static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, + SmallPtrSet<Value *, 4> &Reductions) { + // Reduction instructions are allowed to have exit users. All other + // instructions must not have external users. + if (!Reductions.count(Inst)) + //Check that all of the users of the loop are inside the BB. + for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); + I != E; ++I) { + Instruction *U = cast<Instruction>(*I); + // This user may be a reduction exit value. + if (!TheLoop->contains(U)) { + DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); + return true; + } + } + return false; +} + bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *PreHeader = TheLoop->getLoopPreheader(); BasicBlock *Header = TheLoop->getHeader(); @@ -2416,8 +2436,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // If this PHINode is not in the header block, then we know that we // can convert it to select during if-conversion. No need to check if // the PHIs in this block are induction or reduction variables. - if (*bb != Header) - continue; + if (*bb != Header) { + // Check that this instruction has no outside users or is an + // identified reduction value with an outside user. + if(!hasOutsideLoopUser(TheLoop, it, AllowedExit)) + continue; + return false; + } // We only allow if-converted PHIs with more than two incoming values. if (Phi->getNumIncomingValues() != 2) { @@ -2510,17 +2535,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Reduction instructions are allowed to have exit users. // All other instructions must not have external users. - if (!AllowedExit.count(it)) - //Check that all of the users of the loop are inside the BB. - for (Value::use_iterator I = it->use_begin(), E = it->use_end(); - I != E; ++I) { - Instruction *U = cast<Instruction>(*I); - // This user may be a reduction exit value. - if (!TheLoop->contains(U)) { - DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); - return false; - } - } + if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) + return false; + } // next instr. } diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll new file mode 100644 index 0000000000..6f0357c5e5 --- /dev/null +++ b/test/Transforms/LoopVectorize/no_outside_user.ll @@ -0,0 +1,41 @@ +; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" + +@f = common global i32 0, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 +@c = common global i32 0, align 4 +@a = common global i32 0, align 4 +@b = common global i32 0, align 4 +@e = common global i32 0, align 4 + +; We used to vectorize this loop. But it has a value that is used outside of the +; and is not a recognized reduction variable "tmp17". + +; CHECK-NOT: <2 x i32> + +define i32 @main() { +bb: + %b.promoted = load i32* @b, align 4 + br label %.lr.ph.i + +.lr.ph.i: + %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ] + %tmp2 = icmp sgt i32 %tmp8, 10 + br i1 %tmp2, label %bb16, label %bb10 + +bb10: + br label %bb16 + +bb16: + %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ] + %tmp18 = add nsw i32 %tmp8, 1 + %tmp19 = icmp slt i32 %tmp18, 4 + br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit + +f1.exit.loopexit: + %.lcssa = phi i32 [ %tmp17, %bb16 ] + ret i32 %.lcssa +} + + |