summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Transforms/Vectorize.h2
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp4
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp17
-rw-r--r--test/Transforms/LoopVectorize/X86/unroll-pm.ll31
-rw-r--r--test/Transforms/LoopVectorize/global_alias.ll2
-rw-r--r--tools/opt/opt.cpp12
6 files changed, 55 insertions, 13 deletions
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index 8d0db16116..823c5fba74 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -114,7 +114,7 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
//
// LoopVectorize - Create a loop vectorization pass.
//
-Pass *createLoopVectorizePass();
+Pass *createLoopVectorizePass(bool NoUnrolling = false);
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 743dc42343..d4c0c2caea 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -196,7 +196,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createLoopDeletionPass()); // Delete dead loops
if (!LateVectorize && LoopVectorize)
- MPM.add(createLoopVectorizePass());
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops));
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass()); // Unroll small loops
@@ -250,7 +250,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// Add the various vectorization passes and relevant cleanup passes for
// them since we are no longer in the middle of the main scalar pipeline.
if (LoopVectorize) {
- MPM.add(createLoopVectorizePass());
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops));
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass()); // Unroll small loops
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2ee1441e4b..0afc73e509 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -761,9 +761,9 @@ struct LoopVectorizeHints {
/// Vectorization unroll factor.
unsigned Unroll;
- LoopVectorizeHints(const Loop *L)
+ LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
: Width(VectorizationFactor)
- , Unroll(VectorizationUnroll)
+ , Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
, LoopID(L->getLoopID()) {
getHints(L);
// The command line options override any loop metadata except for when
@@ -772,6 +772,9 @@ struct LoopVectorizeHints {
Width = VectorizationFactor;
if (VectorizationUnroll.getNumOccurrences() > 0)
Unroll = VectorizationUnroll;
+
+ DEBUG(if (DisableUnrolling && Unroll == 1)
+ dbgs() << "LV: Unrolling disabled by the pass manager\n");
}
/// Return the loop vectorizer metadata prefix.
@@ -878,7 +881,8 @@ struct LoopVectorize : public LoopPass {
/// Pass identification, replacement for typeid
static char ID;
- explicit LoopVectorize() : LoopPass(ID) {
+ explicit LoopVectorize(bool NoUnrolling = false)
+ : LoopPass(ID), DisableUnrolling(NoUnrolling) {
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
}
@@ -888,6 +892,7 @@ struct LoopVectorize : public LoopPass {
TargetTransformInfo *TTI;
DominatorTree *DT;
TargetLibraryInfo *TLI;
+ bool DisableUnrolling;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
@@ -909,7 +914,7 @@ struct LoopVectorize : public LoopPass {
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
- LoopVectorizeHints Hints(L);
+ LoopVectorizeHints Hints(L, DisableUnrolling);
if (Hints.Width == 1 && Hints.Unroll == 1) {
DEBUG(dbgs() << "LV: Not vectorizing.\n");
@@ -4786,8 +4791,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
- Pass *createLoopVectorizePass() {
- return new LoopVectorize();
+ Pass *createLoopVectorizePass(bool NoUnrolling) {
+ return new LoopVectorize(NoUnrolling);
}
}
diff --git a/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/test/Transforms/LoopVectorize/X86/unroll-pm.ll
new file mode 100644
index 0000000000..5064fec286
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/unroll-pm.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -O2 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s -O2 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -disable-loop-unrolling -S | FileCheck %s -check-prefix=CHECK-NOUNRL
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+;CHECK-LABEL: @bar(
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+;CHECK-NOUNRL-LABEL: @bar(
+;CHECK-NOUNRL: store <4 x i32>
+;CHECK-NOUNRL-NOT: store <4 x i32>
+;CHECK-NOUNRL: ret
+define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = add nsw i32 %3, 6
+ store i32 %4, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
index ae72d3c608..4fd4c989de 100644
--- a/test/Transforms/LoopVectorize/global_alias.ll
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index ca82061786..691080aba3 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -136,6 +136,11 @@ UnitAtATime("funit-at-a-time",
cl::init(true));
static cl::opt<bool>
+DisableLoopUnrolling("disable-loop-unrolling",
+ cl::desc("Disable loop unrolling in all relevant passes"),
+ cl::init(false));
+
+static cl::opt<bool>
DisableSimplifyLibCalls("disable-simplify-libcalls",
cl::desc("Disable simplify-libcalls"));
@@ -447,12 +452,13 @@ static void AddOptimizationPasses(PassManagerBase &MPM,FunctionPassManager &FPM,
Builder.Inliner = createAlwaysInlinerPass();
}
Builder.DisableUnitAtATime = !UnitAtATime;
- Builder.DisableUnrollLoops = OptLevel == 0;
+ Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ?
+ DisableLoopUnrolling : OptLevel == 0;
+ Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;
+
Builder.populateFunctionPassManager(FPM);
Builder.populateModulePassManager(MPM);
-
- Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;
}
static void AddStandardCompilePasses(PassManagerBase &PM) {