summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTyler Nowicki <tnowicki@apple.com>2014-06-25 17:50:15 +0000
committerTyler Nowicki <tnowicki@apple.com>2014-06-25 17:50:15 +0000
commitd5a8fa72bb41eedbfde7e8124414123c49a2351c (patch)
tree19220adf400ad00c6817f0b226ef3b861ea62b40
parentcae1ea691d79ce786e9209cb5917036f42f13d7c (diff)
downloadllvm-d5a8fa72bb41eedbfde7e8124414123c49a2351c.tar.gz
llvm-d5a8fa72bb41eedbfde7e8124414123c49a2351c.tar.bz2
llvm-d5a8fa72bb41eedbfde7e8124414123c49a2351c.tar.xz
Add Rpass-missed and Rpass-analysis reports to the loop vectorizer. The remarks give the vector width of vectorized loops and a brief analysis of loops that fail to be vectorized. For example, an analysis will be generated for loops containing control flow that cannot be simplified to a select. The optimization remarks also give the debug location of expressions that cannot be vectorized, for example the location of an unvectorizable call.
Reviewed by: Arnold Schwaighofer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211721 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp207
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll160
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks.ll8
-rw-r--r--test/Transforms/LoopVectorize/control-flow.ll78
-rw-r--r--test/Transforms/LoopVectorize/no_switch.ll85
5 files changed, 511 insertions, 27 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 27452825c7..cb8a41dbea 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -209,6 +209,29 @@ namespace {
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
+/// Optimization analysis message produced during vectorization. Messages inform
+/// the user why vectorization did not occur.
+class Report {
+ std::string Message;
+ raw_string_ostream Out;
+ Instruction *Instr;
+
+public:
+ Report(Instruction *I = nullptr) : Out(Message), Instr(I) {
+ Out << "loop not vectorized: ";
+ }
+
+ template <typename A> Report &operator<<(const A &Value) {
+ Out << Value;
+ return *this;
+ }
+
+ Instruction *getInstr() { return Instr; }
+
+ std::string &str() { return Out.str(); }
+ operator Twine() { return Out.str(); }
+};
+
/// InnerLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
/// This class performs the widening of scalars into vectors, or multiple
@@ -515,10 +538,12 @@ public:
unsigned NumPredStores;
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
- DominatorTree *DT, TargetLibraryInfo *TLI)
+ DominatorTree *DT, TargetLibraryInfo *TLI,
+ Function *F)
: NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
- DT(DT), TLI(TLI), Induction(nullptr), WidestIndTy(nullptr),
- HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {}
+ DT(DT), TLI(TLI), TheFunction(F), Induction(nullptr),
+ WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {
+ }
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -747,6 +772,16 @@ private:
/// invariant.
void collectStridedAcccess(Value *LoadOrStoreInst);
+ /// Report an analysis message to assist the user in diagnosing loops that are
+ /// not vectorized.
+ void emitAnalysis(Report &Message) {
+ DebugLoc DL = TheLoop->getStartLoc();
+ if (Instruction *I = Message.getInstr())
+ DL = I->getDebugLoc();
+ emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
+ *TheFunction, DL, Message.str());
+ }
+
/// The loop that we evaluate.
Loop *TheLoop;
/// Scev analysis.
@@ -757,6 +792,8 @@ private:
DominatorTree *DT;
/// Target Library Info.
TargetLibraryInfo *TLI;
+ /// Parent function
+ Function *TheFunction;
// --- vectorization state --- //
@@ -942,6 +979,29 @@ public:
LoopID = NewLoopID;
}
+ std::string emitRemark() const {
+ Report R;
+ R << "vectorization ";
+ switch (Force) {
+ case LoopVectorizeHints::FK_Disabled:
+ R << "is explicitly disabled";
+ break;
+ case LoopVectorizeHints::FK_Enabled:
+ R << "is explicitly enabled";
+ if (Width != 0 && Unroll != 0)
+ R << " with width " << Width << " and interleave count " << Unroll;
+ else if (Width != 0)
+ R << " with width " << Width;
+ else if (Unroll != 0)
+ R << " with interleave count " << Unroll;
+ break;
+ case LoopVectorizeHints::FK_Undefined:
+ R << "was not specified";
+ break;
+ }
+ return R.str();
+ }
+
unsigned getWidth() const { return Width; }
unsigned getUnroll() const { return Unroll; }
enum ForceKind getForce() const { return Force; }
@@ -1125,18 +1185,37 @@ struct LoopVectorize : public FunctionPass {
: "?")) << " width=" << Hints.getWidth()
<< " unroll=" << Hints.getUnroll() << "\n");
+ // Function containing loop
+ Function *F = L->getHeader()->getParent();
+
+ // Looking at the diagnostic output is the only way to determine if a loop
+ // was vectorized (other than looking at the IR or machine code), so it
+ // is important to generate an optimization remark for each loop. Most of
+ // these messages are generated by emitOptimizationRemarkAnalysis. Remarks
+ // generated by emitOptimizationRemark and emitOptimizationRemarkMissed are
+ // less verbose reporting vectorized loops and unvectorized loops that may
+ // benefit from vectorization, respectively.
+
if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) {
DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
+ emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) {
DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
+ emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) {
DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "loop not vectorized: vector width and interleave count are "
+ "explicitly set to 1");
return false;
}
@@ -1151,14 +1230,19 @@ struct LoopVectorize : public FunctionPass {
DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
else {
DEBUG(dbgs() << "\n");
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "vectorization is not beneficial and is not explicitly forced");
return false;
}
}
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, F);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
+ emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
@@ -1167,7 +1251,6 @@ struct LoopVectorize : public FunctionPass {
// Check the function attributes to find out if this function should be
// optimized for size.
- Function *F = L->getHeader()->getParent();
bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
F->hasFnAttribute(Attribute::OptimizeForSize);
@@ -1190,6 +1273,11 @@ struct LoopVectorize : public FunctionPass {
if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
"attribute is used.\n");
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "loop not vectorized due to NoImplicitFloat attribute");
+ emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
@@ -1208,9 +1296,14 @@ struct LoopVectorize : public FunctionPass {
DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
if (VF.Width == 1) {
- DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
- if (UF == 1)
+ DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial\n");
+
+ if (UF == 1) {
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "not beneficial to vectorize and user disabled interleaving");
return false;
+ }
DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n");
// Report the unrolling decision.
@@ -1220,6 +1313,7 @@ struct LoopVectorize : public FunctionPass {
" (vectorization not beneficial)"));
// We decided not to vectorize, but we may want to unroll.
+
InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
Unroller.vectorize(&LVL);
} else {
@@ -3213,8 +3307,10 @@ static bool canIfConvertPHINodes(BasicBlock *BB) {
}
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
- if (!EnableIfConversion)
+ if (!EnableIfConversion) {
+ emitAnalysis(Report() << "if-conversion is disabled");
return false;
+ }
assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
@@ -3244,16 +3340,24 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
BasicBlock *BB = *BI;
// We don't support switch statements inside loops.
- if (!isa<BranchInst>(BB->getTerminator()))
+ if (!isa<BranchInst>(BB->getTerminator())) {
+ emitAnalysis(Report(BB->getTerminator())
+ << "loop contains a switch statement");
return false;
+ }
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB)) {
- if (!blockCanBePredicated(BB, SafePointes))
+ if (!blockCanBePredicated(BB, SafePointes)) {
+ emitAnalysis(Report(BB->getTerminator())
+ << "control flow cannot be substituted for a select");
return false;
- } else if (BB != Header && !canIfConvertPHINodes(BB))
+ }
+ } else if (BB != Header && !canIfConvertPHINodes(BB)) {
+ emitAnalysis(Report(BB->getTerminator())
+ << "control flow cannot be substituted for a select");
return false;
-
+ }
}
// We can if-convert this loop.
@@ -3263,20 +3367,31 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
bool LoopVectorizationLegality::canVectorize() {
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
- if (!TheLoop->getLoopPreheader())
+ if (!TheLoop->getLoopPreheader()) {
+ emitAnalysis(
+ Report() << "loop control flow is not understood by vectorizer");
return false;
+ }
// We can only vectorize innermost loops.
- if (TheLoop->getSubLoopsVector().size())
+ if (TheLoop->getSubLoopsVector().size()) {
+ emitAnalysis(Report() << "loop is not the innermost loop");
return false;
+ }
// We must have a single backedge.
- if (TheLoop->getNumBackEdges() != 1)
+ if (TheLoop->getNumBackEdges() != 1) {
+ emitAnalysis(
+ Report() << "loop control flow is not understood by vectorizer");
return false;
+ }
// We must have a single exiting block.
- if (!TheLoop->getExitingBlock())
+ if (!TheLoop->getExitingBlock()) {
+ emitAnalysis(
+ Report() << "loop control flow is not understood by vectorizer");
return false;
+ }
// We need to have a loop header.
DEBUG(dbgs() << "LV: Found a loop: " <<
@@ -3292,6 +3407,7 @@ bool LoopVectorizationLegality::canVectorize() {
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
if (ExitCount == SE->getCouldNotCompute()) {
+ emitAnalysis(Report() << "could not determine number of loop iterations");
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
return false;
}
@@ -3385,6 +3501,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!PhiTy->isIntegerTy() &&
!PhiTy->isFloatingPointTy() &&
!PhiTy->isPointerTy()) {
+ emitAnalysis(Report(it)
+ << "loop control flow is not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
}
@@ -3395,13 +3513,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (*bb != Header) {
// Check that this instruction has no outside users or is an
// identified reduction value with an outside user.
- if(!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ if (!hasOutsideLoopUser(TheLoop, it, AllowedExit))
continue;
+ emitAnalysis(Report(it) << "value that could not be identified as "
+ "reduction is used outside the loop");
return false;
}
// We only allow if-converted PHIs with more than two incoming values.
if (Phi->getNumIncomingValues() != 2) {
+ emitAnalysis(Report(it)
+ << "control flow not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
return false;
}
@@ -3432,8 +3554,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Until we explicitly handle the case of an induction variable with
// an outside loop user we have to give up vectorizing this loop.
- if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
+ emitAnalysis(Report(it) << "use of induction value outside of the "
+ "loop is not handled by vectorizer");
return false;
+ }
continue;
}
@@ -3476,6 +3601,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
+ emitAnalysis(Report(it) << "unvectorizable operation");
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
return false;
}// end of PHI handling
@@ -3484,6 +3610,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// calls and we do handle certain intrinsic and libm functions.
CallInst *CI = dyn_cast<CallInst>(it);
if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
+ emitAnalysis(Report(it) << "call instruction cannot be vectorized");
DEBUG(dbgs() << "LV: Found a call site.\n");
return false;
}
@@ -3493,6 +3620,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (CI &&
hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
+ emitAnalysis(Report(it)
+ << "intrinsic instruction cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
return false;
}
@@ -3502,6 +3631,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(it->getType()) &&
!it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
+ emitAnalysis(Report(it)
+ << "instruction return type cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
return false;
}
@@ -3509,8 +3640,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Check that the stored type is vectorizable.
if (StoreInst *ST = dyn_cast<StoreInst>(it)) {
Type *T = ST->getValueOperand()->getType();
- if (!VectorType::isValidElementType(T))
+ if (!VectorType::isValidElementType(T)) {
+ emitAnalysis(Report(ST) << "store instruction cannot be vectorized");
return false;
+ }
if (EnableMemAccessVersioning)
collectStridedAcccess(ST);
}
@@ -3521,8 +3654,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
- if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
+ emitAnalysis(Report(it) << "value cannot be used outside the loop");
return false;
+ }
} // next instr.
@@ -3530,8 +3665,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!Induction) {
DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
- if (Inductions.empty())
+ if (Inductions.empty()) {
+ emitAnalysis(Report()
+ << "loop induction variable could not be identified");
return false;
+ }
}
return true;
@@ -4438,8 +4576,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
continue;
LoadInst *Ld = dyn_cast<LoadInst>(it);
- if (!Ld) return false;
- if (!Ld->isSimple() && !IsAnnotatedParallel) {
+ if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
+ emitAnalysis(Report(Ld)
+ << "read with atomic ordering or volatile read");
DEBUG(dbgs() << "LV: Found a non-simple load.\n");
return false;
}
@@ -4452,8 +4591,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Save 'store' instructions. Abort if other instructions write to memory.
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
- if (!St) return false;
+ if (!St) {
+ emitAnalysis(Report(it) << "instruction cannot be vectorized");
+ return false;
+ }
if (!St->isSimple() && !IsAnnotatedParallel) {
+ emitAnalysis(Report(St)
+ << "write with atomic ordering or volatile write");
DEBUG(dbgs() << "LV: Found a non-simple store.\n");
return false;
}
@@ -4490,6 +4634,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Value* Ptr = ST->getPointerOperand();
if (isUniform(Ptr)) {
+ emitAnalysis(
+ Report(ST)
+ << "write to a loop invariant address could not be vectorized");
DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
return false;
}
@@ -4568,6 +4715,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
if (NeedRTCheck && !CanDoRT) {
+ emitAnalysis(Report() << "cannot identify array bounds");
DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
"the array bounds.\n");
PtrRtCheck.reset();
@@ -4598,6 +4746,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Check that we did not collect too many pointers or found an unsizeable
// pointer.
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
+ if (!CanDoRT && NumComparisons > 0)
+ emitAnalysis(Report()
+ << "cannot check memory dependencies at runtime");
+ else
+ emitAnalysis(Report()
+ << NumComparisons << " exceeds limit of "
+ << RuntimeMemoryCheckThreshold
+ << " dependent memory operations checked at runtime");
DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
PtrRtCheck.reset();
return false;
@@ -4607,6 +4763,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
}
+ if (!CanVecMem)
+ emitAnalysis(Report() << "unsafe dependent memory operations in loop");
+
DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
" need a runtime memory check.\n");
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
new file mode 100644
index 0000000000..6cdd29bda2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -0,0 +1,160 @@
+; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; C/C++ code for tests
+; void test(int *A, int Length) {
+; #pragma clang loop vectorize(enable) interleave(enable)
+; for (int i = 0; i < Length; i++) {
+; A[i] = i;
+; if (A[i] > Length)
+; break;
+; }
+; }
+
+; void test_disabled(int *A, int Length) {
+; #pragma clang loop vectorize(disable) interleave(disable)
+; for (int i = 0; i < Length; i++)
+; A[i] = i;
+; }
+
+; void test_array_bounds(int *A, int *B, int Length) {
+; #pragma clang loop vectorize(enable)
+; for (int i = 0; i < Length; i++)
+; A[i] = A[B[i]];
+; }
+
+; File, line, and column should match those specified in the metadata
+; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
+; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization was not specified
+; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1
+; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
+; CHECK: remark: source.cpp:19:5: loop not vectorized: vectorization is explicitly enabled
+
+; CHECK: _Z4testPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+; CHECK: _Z13test_disabledPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+; CHECK: _Z17test_array_boundsPiS_i
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+ %cmp10 = icmp sgt i32 %Length, 0, !dbg !12
+ br i1 %cmp10, label %for.body, label %for.end, !dbg !12, !llvm.loop !14
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !16
+ %0 = trunc i64 %indvars.iv to i32, !dbg !16
+ store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
+ %cmp3 = icmp sle i32 %0, %Length, !dbg !22
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12
+ %1 = trunc i64 %indvars.iv.next to i32
+ %cmp = icmp slt i32 %1, %Length, !dbg !12
+ %or.cond = and i1 %cmp3, %cmp, !dbg !22
+ br i1 %or.cond, label %for.body, label %for.end, !dbg !22
+
+for.end: ; preds = %for.body, %entry
+ ret void, !dbg !24
+}
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+ %cmp4 = icmp sgt i32 %Length, 0, !dbg !25
+ br i1 %cmp4, label %for.body, label %for.end, !dbg !25, !llvm.loop !27
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !30
+ %0 = trunc i64 %indvars.iv to i32, !dbg !30
+ store i32 %0, i32* %arrayidx, align 4, !dbg !30, !tbaa !18
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !25
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !25
+ %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !25
+ br i1 %exitcond, label %for.end, label %for.body, !dbg !25, !llvm.loop !27
+
+for.end: ; preds = %for.body, %entry
+ ret void, !dbg !31
+}
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 {
+entry:
+ %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
+ br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34
+
+for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !35
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv, !dbg !35
+ %0 = load i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+ %idxprom1 = sext i32 %0 to i64, !dbg !35
+ %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1, !dbg !35
+ %1 = load i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+ %arrayidx4 = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !35
+ store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
+ %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
+ br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !34
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void, !dbg !36
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!1 = metadata !{metadata !"source.cpp", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !7, metadata !8}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 1}
+!5 = metadata !{i32 786473, metadata !1}
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
+!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_disabled", metadata !"test_disabled", metadata !"", i32 10, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2, i32 10}
+!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_array_bounds", metadata !"test_array_bounds", metadata !"", i32 16, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2, i32 16}
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{metadata !"clang version 3.5.0"}
+!12 = metadata !{i32 3, i32 8, metadata !13, null}
+!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!14 = metadata !{metadata !14, metadata !15, metadata !15}
+!15 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!16 = metadata !{i32 4, i32 5, metadata !17, null}
+!17 = metadata !{i32 786443, metadata !1, metadata !13, i32 3, i32 36, i32 0, i32 1}
+!18 = metadata !{metadata !19, metadata !19, i64 0}
+!19 = metadata !{metadata !"int", metadata !20, i64 0}
+!20 = metadata !{metadata !"omnipotent char", metadata !21, i64 0}
+!21 = metadata !{metadata !"Simple C/C++ TBAA"}
+!22 = metadata !{i32 5, i32 9, metadata !23, null}
+!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 5, i32 9, i32 0, i32 2}
+!24 = metadata !{i32 8, i32 1, metadata !4, null}
+!25 = metadata !{i32 12, i32 8, metadata !26, null}
+!26 = metadata !{i32 786443, metadata !1, metadata !7, i32 12, i32 3, i32 0, i32 3}
+!27 = metadata !{metadata !27, metadata !28, metadata !29}
+!28 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
+!29 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
+!30 = metadata !{i32 13, i32 5, metadata !26, null}
+!31 = metadata !{i32 14, i32 1, metadata !7, null}
+!32 = metadata !{i32 18, i32 8, metadata !33, null}
+!33 = metadata !{i32 786443, metadata !1, metadata !8, i32 18, i32 3, i32 0, i32 4}
+!34 = metadata !{metadata !34, metadata !15}
+!35 = metadata !{i32 19, i32 5, metadata !33, null}
+!36 = metadata !{i32 20, i32 1, metadata !8, null}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index f1641762fd..f6834477ff 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -1,5 +1,6 @@
; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s
-; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='.*vectorize.*' 2>&1 | FileCheck -check-prefix=UNROLLED %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
; This code has all the !dbg annotations needed to track source line information,
; but is missing the llvm.dbg.cu annotation. This prevents code generation from
@@ -8,8 +9,9 @@
; DEBUG-OUTPUT-NOT: .loc
; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
-; VECTORIZED: remark: {{.*}}.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1)
-; UNROLLED: remark: {{.*}}.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial)
+; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1)
+; UNROLLED: remark: vectorization-remarks.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial)
+; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vector width and interleave count are explicitly set to 1
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll
new file mode 100644
index 0000000000..e4ba77fa3d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/control-flow.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; C/C++ code for control flow test
+; int test(int *A, int Length) {
+; for (int i = 0; i < Length; i++) {
+; if (A[i] > 10.0) goto end;
+; A[i] = 0;
+; }
+; end:
+; return 0;
+; }
+
+; CHECK: remark: source.cpp:5:9: loop not vectorized: loop control flow is not understood by vectorizer
+; CHECK: remark: source.cpp:5:9: loop not vectorized: vectorization was not specified
+
+; CHECK: _Z4testPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+ %cmp8 = icmp sgt i32 %Length, 0, !dbg !10
+ br i1 %cmp8, label %for.body.preheader, label %end, !dbg !10
+
+for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !12
+
+for.body: ; preds = %for.body.preheader, %if.else
+ %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !12
+ %0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !15
+ %cmp1 = icmp sgt i32 %0, 10, !dbg !12
+ br i1 %cmp1, label %end.loopexit, label %if.else, !dbg !12
+
+if.else: ; preds = %for.body
+ store i32 0, i32* %arrayidx, align 4, !dbg !19, !tbaa !15
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+ %1 = trunc i64 %indvars.iv.next to i32, !dbg !10
+ %cmp = icmp slt i32 %1, %Length, !dbg !10
+ br i1 %cmp, label %for.body, label %end.loopexit, !dbg !10
+
+end.loopexit: ; preds = %if.else, %for.body
+ br label %end
+
+end: ; preds = %end.loopexit, %entry
+ ret i32 0, !dbg !20
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!1 = metadata !{metadata !"source.cpp", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 2}
+!5 = metadata !{i32 786473, metadata !1}
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{metadata !"clang version 3.5.0"}
+!10 = metadata !{i32 3, i32 8, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!12 = metadata !{i32 5, i32 9, metadata !13, null}
+!13 = metadata !{i32 786443, metadata !1, metadata !14, i32 5, i32 9, i32 0, i32 2}
+!14 = metadata !{i32 786443, metadata !1, metadata !11, i32 4, i32 3, i32 0, i32 1}
+!15 = metadata !{metadata !16, metadata !16, i64 0}
+!16 = metadata !{metadata !"int", metadata !17, i64 0}
+!17 = metadata !{metadata !"omnipotent char", metadata !18, i64 0}
+!18 = metadata !{metadata !"Simple C/C++ TBAA"}
+!19 = metadata !{i32 8, i32 7, metadata !13, null}
+!20 = metadata !{i32 12, i32 3, metadata !4, null}
diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll
new file mode 100644
index 0000000000..52b42850f4
--- /dev/null
+++ b/test/Transforms/LoopVectorize/no_switch.ll
@@ -0,0 +1,85 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
+; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly enabled with width 4
+
+; CHECK: _Z11test_switchPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+ %cmp18 = icmp sgt i32 %Length, 0, !dbg !10
+ br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
+
+for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !14
+
+for.body: ; preds = %for.body.preheader, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !14
+ %0 = load i32* %arrayidx, align 4, !dbg !14, !tbaa !16
+ switch i32 %0, label %for.inc [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb3
+ ], !dbg !14
+
+sw.bb: ; preds = %for.body
+ %1 = trunc i64 %indvars.iv to i32, !dbg !20
+ %mul = shl nsw i32 %1, 1, !dbg !20
+ br label %for.inc, !dbg !22
+
+sw.bb3: ; preds = %for.body
+ %2 = trunc i64 %indvars.iv to i32, !dbg !23
+ store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16
+ br label %for.inc, !dbg !23
+
+for.inc: ; preds = %sw.bb3, %for.body, %sw.bb
+ %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ]
+ store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
+ %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10
+ br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !10, !llvm.loop !12
+
+for.end.loopexit: ; preds = %for.inc
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void, !dbg !24
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!1 = metadata !{metadata !"source.cpp", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_switch", metadata !"test_switch", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2, i32 1}
+!5 = metadata !{i32 786473, metadata !1}
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{metadata !"clang version 3.5.0"}
+!10 = metadata !{i32 3, i32 8, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!12 = metadata !{metadata !12, metadata !13, metadata !13}
+!13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!14 = metadata !{i32 4, i32 5, metadata !15, null}
+!15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 36, i32 0, i32 1}
+!16 = metadata !{metadata !17, metadata !17, i64 0}
+!17 = metadata !{metadata !"int", metadata !18, i64 0}
+!18 = metadata !{metadata !"omnipotent char", metadata !19, i64 0}
+!19 = metadata !{metadata !"Simple C/C++ TBAA"}
+!20 = metadata !{i32 6, i32 7, metadata !21, null}
+!21 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 18, i32 0, i32 2}
+!22 = metadata !{i32 7, i32 5, metadata !21, null}
+!23 = metadata !{i32 9, i32 7, metadata !21, null}
+!24 = metadata !{i32 14, i32 1, metadata !4, null}