summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-10-24 20:36:32 +0000
committerNadav Rotem <nrotem@apple.com>2012-10-24 20:36:32 +0000
commit50bec6f8c494957b00dd225ddf580d3e0b97b871 (patch)
tree1ed8a65a91fd87f8f56cb65c40df55f8ead9114f
parentd258eb3ec5cc5c9a28d3a8cd80241c9df24ce3a1 (diff)
downloadllvm-50bec6f8c494957b00dd225ddf580d3e0b97b871.tar.gz
llvm-50bec6f8c494957b00dd225ddf580d3e0b97b871.tar.bz2
llvm-50bec6f8c494957b00dd225ddf580d3e0b97b871.tar.xz
LoopVectorizer: Add a basic cost model which uses the VTTI interface.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166620 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp303
-rw-r--r--test/Transforms/LoopVectorize/2012-10-20-infloop.ll2
-rw-r--r--test/Transforms/LoopVectorize/cost-model.ll40
-rw-r--r--test/Transforms/LoopVectorize/cpp-new-array.ll2
-rw-r--r--test/Transforms/LoopVectorize/gcc-examples.ll2
-rw-r--r--test/Transforms/LoopVectorize/increment.ll2
-rw-r--r--test/Transforms/LoopVectorize/induction_plus.ll2
-rw-r--r--test/Transforms/LoopVectorize/non-const-n.ll2
-rw-r--r--test/Transforms/LoopVectorize/read-only.ll2
-rw-r--r--test/Transforms/LoopVectorize/reduction.ll2
-rw-r--r--test/Transforms/LoopVectorize/scalar-select.ll2
11 files changed, 322 insertions, 39 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index bead39225b..6f6685bde2 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -18,10 +18,13 @@
//
// This pass has three parts:
// 1. The main loop pass that drives the different parts.
-// 2. LoopVectorizationLegality - A helper class that checks for the legality
+// 2. LoopVectorizationLegality - A unit that checks for the legality
// of the vectorization.
-// 3. SingleBlockLoopVectorizer - A helper class that performs the actual
+// 3. SingleBlockLoopVectorizer - A unit that performs the actual
// widening of instructions.
+// 4. LoopVectorizationCostModel - A unit that checks for the profitability
+// of vectorization. It decides on the optimal vector width, which
+// can be one, if vectorization is not profitable.
//===----------------------------------------------------------------------===//
//
// The reduction-variable vectorization is based on the paper:
@@ -51,13 +54,14 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/TargetTransformInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -67,13 +71,14 @@
using namespace llvm;
static cl::opt<unsigned>
-DefaultVectorizationFactor("default-loop-vectorize-width",
- cl::init(4), cl::Hidden,
- cl::desc("Set the default loop vectorization width"));
+VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
+ cl::desc("Set the default vectorization width. Zero is autoselect."));
+
namespace {
-// Forward declaration.
+// Forward declarations.
class LoopVectorizationLegality;
+class LoopVectorizationCostModel;
/// SingleBlockLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
@@ -229,11 +234,10 @@ public:
/// of the reductions that were found in the loop.
typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
- /// Returns the maximum vectorization factor that we *can* use to vectorize
- /// this loop. This does not mean that it is profitable to vectorize this
- /// loop, only that it is legal to do so. This may be a large number. We
- /// can vectorize to any SIMD width below this number.
- unsigned getLoopMaxVF();
+ /// Returns true if it is legal to vectorize this loop.
+ /// This does not mean that it is profitable to vectorize this
+ /// loop, only that it is legal to do so.
+ bool canVectorize();
/// Returns the Induction variable.
PHINode *getInduction() {return Induction;}
@@ -286,6 +290,49 @@ private:
SmallPtrSet<Value*, 4> AllowedExit;
};
+/// LoopVectorizationCostModel - estimates the expected speedups due to
+/// vectorization.
+/// In many cases vectorization is not profitable. This can happen because
+/// of a number of reasons. In this class we mainly attempt to predict
+/// the expected speedup/slowdowns due to the supported instruction set.
+/// We use the VectorTargetTransformInfo to query the different backends
+/// for the cost of different operations.
+class LoopVectorizationCostModel {
+public:
+ /// C'tor.
+ LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl,
+ LoopVectorizationLegality *Leg,
+ const VectorTargetTransformInfo *Vtti):
+ TheLoop(Lp), SE(Se), DL(Dl), Legal(Leg), VTTI(Vtti) { }
+
+ /// Returns the most profitable vectorization factor for the loop that is
+ /// smaller or equal to the VF argument. This method checks every power
+ /// of two up to VF.
+ unsigned findBestVectorizationFactor(unsigned VF = 4);
+
+private:
+ /// Returns the expected execution cost. The unit of the cost does
+ /// not matter because we use the 'cost' units to compare different
+ /// vector widths. The cost that is returned is *not* normalized by
+ /// the factor width.
+ unsigned expectedCost(unsigned VF);
+
+ /// Returns the execution time cost of an instruction for a given vector
+ /// width. Vector width of one means scalar.
+ unsigned getInstructionCost(Instruction *I, unsigned VF);
+
+ /// The loop that we evaluate.
+ Loop *TheLoop;
+ /// Scev analysis.
+ ScalarEvolution *SE;
+ /// DataLayout analysis.
+ DataLayout *DL;
+ /// Vectorization legality.
+ LoopVectorizationLegality *Legal;
+ /// Vector target information.
+ const VectorTargetTransformInfo *VTTI;
+};
+
struct LoopVectorize : public LoopPass {
static char ID; // Pass identification, replacement for typeid
@@ -296,6 +343,7 @@ struct LoopVectorize : public LoopPass {
ScalarEvolution *SE;
DataLayout *DL;
LoopInfo *LI;
+ TargetTransformInfo *TTI;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
@@ -305,25 +353,42 @@ struct LoopVectorize : public LoopPass {
SE = &getAnalysis<ScalarEvolution>();
DL = getAnalysisIfAvailable<DataLayout>();
LI = &getAnalysis<LoopInfo>();
+ TTI = getAnalysisIfAvailable<TargetTransformInfo>();
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
// Check if it is legal to vectorize the loop.
LoopVectorizationLegality LVL(L, SE, DL);
- unsigned MaxVF = LVL.getLoopMaxVF();
-
- // Check that we can vectorize this loop using the chosen vectorization
- // width.
- if (MaxVF < DefaultVectorizationFactor) {
- DEBUG(dbgs() << "LV: non-vectorizable MaxVF ("<< MaxVF << ").\n");
+ if (!LVL.canVectorize()) {
+ DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
}
- DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< MaxVF << ").\n");
+ // Select the preffered vectorization factor.
+ unsigned VF = 1;
+ if (VectorizationFactor == 0) {
+ const VectorTargetTransformInfo *VTTI = 0;
+ if (TTI)
+ VTTI = TTI->getVectorTargetTransformInfo();
+ // Use the cost model.
+ LoopVectorizationCostModel CM(L, SE, DL, &LVL, VTTI);
+ VF = CM.findBestVectorizationFactor();
+
+ if (VF == 1) {
+ DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+ return false;
+ }
+
+ } else {
+ // Use the user command flag.
+ VF = VectorizationFactor;
+ }
+
+ DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ").\n");
// If we decided that it is *legal* to vectorizer the loop then do it.
- SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, DefaultVectorizationFactor);
+ SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, VF);
LB.vectorize(&LVL);
DEBUG(verifyFunction(*L->getHeader()->getParent()));
@@ -656,6 +721,13 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal
void
SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
+ //===------------------------------------------------===//
+ //
+ // Notice: any optimization or new instruction that go
+ // into the code below should be also be implemented in
+ // the cost-model.
+ //
+ //===------------------------------------------------===//
typedef SmallVector<PHINode*, 4> PhiVector;
BasicBlock &BB = *OrigLoop->getHeader();
Constant *Zero = ConstantInt::get(
@@ -957,18 +1029,18 @@ void SingleBlockLoopVectorizer::cleanup() {
SE->forgetLoop(OrigLoop);
}
-unsigned LoopVectorizationLegality::getLoopMaxVF() {
+bool LoopVectorizationLegality::canVectorize() {
if (!TheLoop->getLoopPreheader()) {
assert(false && "No preheader!!");
DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
- return 1;
+ return false;
}
// We can only vectorize single basic block loops.
unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1) {
DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n");
- return 1;
+ return false;
}
// We need to have a loop header.
@@ -978,22 +1050,22 @@ unsigned LoopVectorizationLegality::getLoopMaxVF() {
// Go over each instruction and look at memory deps.
if (!canVectorizeBlock(*BB)) {
DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
- return 1;
+ return false;
}
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
if (ExitCount == SE->getCouldNotCompute()) {
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
- return 1;
+ return false;
}
DEBUG(dbgs() << "LV: We can vectorize this loop!\n");
// Okay! We can vectorize. At this point we don't have any other mem analysis
- // which may limit our maximum vectorization factor, so just return the
- // maximum SIMD size.
- return DefaultVectorizationFactor;
+ // which may limit our maximum vectorization factor, so just return true with
+ // no restrictions.
+ return true;
}
bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
@@ -1323,6 +1395,177 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
return true;
}
+unsigned
+LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
+ if (!VTTI) {
+ DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
+ return 1;
+ }
+
+ float Cost = expectedCost(1);
+ unsigned Width = 1;
+ DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n");
+ for (unsigned i=2; i <= VF; i*=2) {
+ // Notice that the vector loop needs to be executed less times, so
+ // we need to divide the cost of the vector loops by the width of
+ // the vector elements.
+ float VectorCost = expectedCost(i) / (float)i;
+ DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " <<
+ (int)VectorCost << ".\n");
+ if (VectorCost < Cost) {
+ Cost = VectorCost;
+ Width = i;
+ }
+ }
+
+ DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
+ return Width;
+}
+
+unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
+ // We can only estimate the cost of single basic block loops.
+ assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop");
+
+ BasicBlock *BB = TheLoop->getHeader();
+ unsigned Cost = 0;
+
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ Instruction *Inst = it;
+ Cost += getInstructionCost(Inst, VF);
+ }
+
+ // Return the cost divided by VF, because we will be executing
+ // less iterations of the vector form.
+ return Cost;
+}
+
+unsigned
+LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
+ assert(VTTI && "Invalid vector target transformation info");
+ switch (I->getOpcode()) {
+ case Instruction::Br: {
+ return VTTI->getInstrCost(I->getOpcode());
+ }
+ case Instruction::PHI:
+ // PHIs are handled the same as the binary instructions below.
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ Type *VTy = VectorType::get(I->getType(), VF);
+ return VTTI->getInstrCost(I->getOpcode(), VTy);
+ }
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ Type *VTy = VectorType::get(I->getType(), VF);
+ const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
+ bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
+ Type *CondTy = SI->getCondition()->getType();
+ if (ScalarCond)
+ CondTy = VectorType::get(CondTy, VF);
+
+ return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy);
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF);
+ return VTTI->getInstrCost(I->getOpcode(), VTy);
+ }
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(I);
+ Type *VTy = VectorType::get(SI->getValueOperand()->getType(), VF);
+
+ // Scalarized stores.
+ if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
+ unsigned Cost = 0;
+ unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy);
+ // The cost of extracting from the vector value.
+ Cost += VF * ExtCost;
+ // The cost of the scalar stores.
+ Cost += VF * VTTI->getInstrCost(I->getOpcode(), VTy->getScalarType());
+ return Cost;
+ }
+
+ // Wide stores.
+ return VTTI->getMemoryOpCost(I->getOpcode(), VTy, SI->getAlignment(),
+ SI->getPointerAddressSpace());
+ }
+ case Instruction::Load: {
+ LoadInst *LI = cast<LoadInst>(I);
+ Type *VTy = VectorType::get(I->getType(), VF);
+
+ // Scalarized loads.
+ if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
+ unsigned Cost = 0;
+ unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
+ // The cost of inserting the loaded value into the result vector.
+ Cost += VF * InCost;
+ // The cost of the scalar stores.
+ Cost += VF * VTTI->getInstrCost(I->getOpcode(), VTy->getScalarType());
+ return Cost;
+ }
+
+ // Wide loads.
+ return VTTI->getMemoryOpCost(I->getOpcode(), VTy, LI->getAlignment(),
+ LI->getPointerAddressSpace());
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ Type *SrcTy = VectorType::get(I->getOperand(0)->getType(), VF);
+ Type *DstTy = VectorType::get(I->getType(), VF);
+ return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy);
+ }
+ default: {
+ // We are scalarizing the instruction. Return the cost of the scalar
+ // instruction, plus the cost of insert and extract into vector
+ // elements, times the vector width.
+ unsigned Cost = 0;
+ Type *Ty = I->getType();
+
+ if (!Ty->isVoidTy()) {
+ Type *VTy = VectorType::get(Ty, VF);
+ unsigned InsCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
+ unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy);
+ Cost += VF * (InsCost + ExtCost);
+ }
+
+ /// We don't have any information on the scalar instruction, but maybe
+ /// the target has.
+ /// TODO: This may be a target-specific intrinsic.
+ /// Need to add API for that.
+ Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty);
+
+ return Cost;
+ }
+ }// end of switch.
+}
+
+
} // namespace
char LoopVectorize::ID = 0;
diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
index 5caaffc8dd..0176c9a189 100644
--- a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
+++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce
; Check that we don't fall into an infinite loop.
define void @test() nounwind {
diff --git a/test/Transforms/LoopVectorize/cost-model.ll b/test/Transforms/LoopVectorize/cost-model.ll
new file mode 100644
index 0000000000..20c710db46
--- /dev/null
+++ b/test/Transforms/LoopVectorize/cost-model.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@c = common global [2048 x i32] zeroinitializer, align 16
+@b = common global [2048 x i32] zeroinitializer, align 16
+@d = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; At this point the cost model is pretty bad and we are vectorizing the code below.
+; TODO: This code should not be vectorized on x86.
+;CHECK: cost_model_1
+;CHECK: <4 x i32>
+;CHECK: ret void
+define void @cost_model_1() nounwind uwtable noinline ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %0
+ %1 = load i32* %arrayidx, align 8
+ %idxprom1 = sext i32 %1 to i64
+ %arrayidx2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %idxprom1
+ %2 = load i32* %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv
+ %3 = load i32* %arrayidx4, align 4
+ %idxprom5 = sext i32 %3 to i64
+ %arrayidx6 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %idxprom5
+ store i32 %2, i32* %arrayidx6, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 256
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll
index 12a91b0791..26902eba9e 100644
--- a/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index 6fb1792b2c..d8942ac861 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
index e818d68562..069b7ea031 100644
--- a/test/Transforms/LoopVectorize/increment.ll
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index bd90113e52..b31bceb50d 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
index 04c5c84a4f..7727b0a2dc 100644
--- a/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll
index 4095ea68ef..b4d1bac132 100644
--- a/test/Transforms/LoopVectorize/read-only.ll
+++ b/test/Transforms/LoopVectorize/read-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 3e871b229b..ca51c7b37d 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll
index 8d5b6fd8af..e537bde31b 100644
--- a/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/test/Transforms/LoopVectorize/scalar-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"