summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Transforms/IPO/PassManagerBuilder.h1
-rw-r--r--include/llvm/Transforms/Vectorize.h3
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp36
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp39
-rw-r--r--test/Transforms/LoopVectorize/metadata-enable.ll175
-rw-r--r--tools/opt/opt.cpp10
6 files changed, 228 insertions, 36 deletions
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 27887749e9..42b6b27d0c 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -111,7 +111,6 @@ public:
bool BBVectorize;
bool SLPVectorize;
bool LoopVectorize;
- bool LateVectorize;
bool RerollLoops;
private:
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index 823c5fba74..e93b39a814 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -114,7 +114,8 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
//
// LoopVectorize - Create a loop vectorization pass.
//
-Pass *createLoopVectorizePass(bool NoUnrolling = false);
+Pass *createLoopVectorizePass(bool NoUnrolling = false,
+ bool AlwaysVectorize = true);
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 24c5018d54..cd46c79982 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -33,11 +33,6 @@ RunLoopVectorization("vectorize-loops", cl::Hidden,
cl::desc("Run the Loop vectorization passes"));
static cl::opt<bool>
-LateVectorization("late-vectorize", cl::init(true), cl::Hidden,
- cl::desc("Run the vectorization pasess late in the pass "
- "pipeline (after the inliner)"));
-
-static cl::opt<bool>
RunSLPVectorization("vectorize-slp", cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
@@ -68,7 +63,6 @@ PassManagerBuilder::PassManagerBuilder() {
BBVectorize = RunBBVectorization;
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
- LateVectorize = LateVectorization;
RerollLoops = RunLoopRerolling;
}
@@ -200,9 +194,6 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
- if (!LateVectorize && LoopVectorize)
- MPM.add(createLoopVectorizePass(DisableUnrollLoops));
-
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass()); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
@@ -243,21 +234,18 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Clean up after everything.
- // As an experimental mode, run any vectorization passes in a separate
- // pipeline from the CGSCC pass manager that runs iteratively with the
- // inliner.
- if (LateVectorize && LoopVectorize) {
- // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
- // pass manager that we are specifically trying to avoid. To prevent this
- // we must insert a no-op module pass to reset the pass manager.
- MPM.add(createBarrierNoopPass());
-
- // Add the various vectorization passes and relevant cleanup passes for
- // them since we are no longer in the middle of the main scalar pipeline.
- MPM.add(createLoopVectorizePass(DisableUnrollLoops));
- MPM.add(createInstructionCombiningPass());
- MPM.add(createCFGSimplificationPass());
- }
+ // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
+ // pass manager that we are specifically trying to avoid. To prevent this
+ // we must insert a no-op module pass to reset the pass manager.
+ MPM.add(createBarrierNoopPass());
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
+ // FIXME: Because of #pragma vectorize enable, the passes below are always
+ // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
+ // on -O1 and no #pragma is found). Would be good to have these two passes
+ // as function calls, so that we can only pass them when the vectorizer
+ // changed the code.
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createCFGSimplificationPass());
if (!DisableUnitAtATime) {
// FIXME: We shouldn't bother with this anymore.
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9c3d29f659..45ddeaf933 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -763,10 +763,13 @@ struct LoopVectorizeHints {
unsigned Width;
/// Vectorization unroll factor.
unsigned Unroll;
+ /// Vectorization forced (-1 not selected, 0 force disabled, 1 force enabled)
+ int Force;
LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
: Width(VectorizationFactor)
, Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
+ , Force(-1)
, LoopID(L->getLoopID()) {
getHints(L);
// The command line options override any loop metadata except for when
@@ -877,6 +880,11 @@ private:
Unroll = Val;
else
DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
+ } else if (Hint == "enable") {
+ if (C->getBitWidth() == 1)
+ Force = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n");
} else {
DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n');
}
@@ -888,8 +896,10 @@ struct LoopVectorize : public LoopPass {
/// Pass identification, replacement for typeid
static char ID;
- explicit LoopVectorize(bool NoUnrolling = false)
- : LoopPass(ID), DisableUnrolling(NoUnrolling) {
+ explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true)
+ : LoopPass(ID),
+ DisableUnrolling(NoUnrolling),
+ AlwaysVectorize(AlwaysVectorize) {
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
}
@@ -900,6 +910,7 @@ struct LoopVectorize : public LoopPass {
DominatorTree *DT;
TargetLibraryInfo *TLI;
bool DisableUnrolling;
+ bool AlwaysVectorize;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
@@ -919,7 +930,7 @@ struct LoopVectorize : public LoopPass {
return false;
if (DL == NULL) {
- DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout\n");
+ DEBUG(dbgs() << "LV: Not vectorizing: Missing data layout\n");
return false;
}
@@ -928,15 +939,25 @@ struct LoopVectorize : public LoopPass {
LoopVectorizeHints Hints(L, DisableUnrolling);
+ if (Hints.Force == 0) {
+ DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
+ return false;
+ }
+
+ if (!AlwaysVectorize && Hints.Force != 1) {
+ DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
+ return false;
+ }
+
if (Hints.Width == 1 && Hints.Unroll == 1) {
- DEBUG(dbgs() << "LV: Not vectorizing.\n");
+ DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
return false;
}
// Check if it is legal to vectorize the loop.
LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
if (!LVL.canVectorize()) {
- DEBUG(dbgs() << "LV: Not vectorizing.\n");
+ DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
return false;
}
@@ -949,7 +970,8 @@ struct LoopVectorize : public LoopPass {
Attribute::AttrKind SzAttr = Attribute::OptimizeForSize;
Attribute::AttrKind FlAttr = Attribute::NoImplicitFloat;
unsigned FnIndex = AttributeSet::FunctionIndex;
- bool OptForSize = F->getAttributes().hasAttribute(FnIndex, SzAttr);
+ bool OptForSize = Hints.Force != 1 &&
+ F->getAttributes().hasAttribute(FnIndex, SzAttr);
bool NoFloat = F->getAttributes().hasAttribute(FnIndex, FlAttr);
if (NoFloat) {
@@ -973,6 +995,7 @@ struct LoopVectorize : public LoopPass {
DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
if (UF == 1)
return false;
+ DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n");
// We decided not to vectorize, but we may want to unroll.
InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
Unroller.vectorize(&LVL);
@@ -5016,8 +5039,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
- Pass *createLoopVectorizePass(bool NoUnrolling) {
- return new LoopVectorize(NoUnrolling);
+ Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) {
+ return new LoopVectorize(NoUnrolling, AlwaysVectorize);
}
}
diff --git a/test/Transforms/LoopVectorize/metadata-enable.ll b/test/Transforms/LoopVectorize/metadata-enable.ll
new file mode 100644
index 0000000000..fff3c0e808
--- /dev/null
+++ b/test/Transforms/LoopVectorize/metadata-enable.ll
@@ -0,0 +1,175 @@
+; RUN: opt < %s -O1 -S | FileCheck %s --check-prefix=O1
+; RUN: opt < %s -O2 -S | FileCheck %s --check-prefix=O2
+; RUN: opt < %s -O3 -S | FileCheck %s --check-prefix=O3
+; RUN: opt < %s -Os -S | FileCheck %s --check-prefix=Os
+; RUN: opt < %s -Oz -S | FileCheck %s --check-prefix=Oz
+; RUN: opt < %s -O1 -vectorize-loops -S | FileCheck %s --check-prefix=O1VEC
+; RUN: opt < %s -Oz -vectorize-loops -S | FileCheck %s --check-prefix=OzVEC
+; RUN: opt < %s -O1 -loop-vectorize -S | FileCheck %s --check-prefix=O1VEC2
+; RUN: opt < %s -Oz -loop-vectorize -S | FileCheck %s --check-prefix=OzVEC2
+; RUN: opt < %s -O3 -disable-loop-vectorization -S | FileCheck %s --check-prefix=O3DIS
+
+; This file tests the llvm.vectorizer.pragma forcing vectorization even when
+; optimization levels are too low, or when vectorization is disabled.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; O1-LABEL: @enabled(
+; O1: store <4 x i32>
+; O1: ret i32
+; O2-LABEL: @enabled(
+; O2: store <4 x i32>
+; O2: ret i32
+; O3-LABEL: @enabled(
+; O3: store <4 x i32>
+; O3: ret i32
+; Pragma always wins!
+; O3DIS-LABEL: @enabled(
+; O3DIS: store <4 x i32>
+; O3DIS: ret i32
+; Os-LABEL: @enabled(
+; Os: store <4 x i32>
+; Os: ret i32
+; Oz-LABEL: @enabled(
+; Oz: store <4 x i32>
+; Oz: ret i32
+; O1VEC-LABEL: @enabled(
+; O1VEC: store <4 x i32>
+; O1VEC: ret i32
+; OzVEC-LABEL: @enabled(
+; OzVEC: store <4 x i32>
+; OzVEC: ret i32
+; O1VEC2-LABEL: @enabled(
+; O1VEC2: store <4 x i32>
+; O1VEC2: ret i32
+; OzVEC2-LABEL: @enabled(
+; OzVEC2: store <4 x i32>
+; OzVEC2: ret i32
+
+define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %N
+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 32
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end: ; preds = %for.body
+ %1 = load i32* %a, align 4
+ ret i32 %1
+}
+
+; O1-LABEL: @nopragma(
+; O1-NOT: store <4 x i32>
+; O1: ret i32
+; O2-LABEL: @nopragma(
+; O2: store <4 x i32>
+; O2: ret i32
+; O3-LABEL: @nopragma(
+; O3: store <4 x i32>
+; O3: ret i32
+; O3DIS-LABEL: @nopragma(
+; O3DIS-NOT: store <4 x i32>
+; O3DIS: ret i32
+; Os-LABEL: @nopragma(
+; Os: store <4 x i32>
+; Os: ret i32
+; Oz-LABEL: @nopragma(
+; Oz-NOT: store <4 x i32>
+; Oz: ret i32
+; O1VEC-LABEL: @nopragma(
+; O1VEC: store <4 x i32>
+; O1VEC: ret i32
+; OzVEC-LABEL: @nopragma(
+; OzVEC: store <4 x i32>
+; OzVEC: ret i32
+; O1VEC2-LABEL: @nopragma(
+; O1VEC2: store <4 x i32>
+; O1VEC2: ret i32
+; OzVEC2-LABEL: @nopragma(
+; OzVEC2: store <4 x i32>
+; OzVEC2: ret i32
+
+define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %N
+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 32
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %1 = load i32* %a, align 4
+ ret i32 %1
+}
+
+; O1-LABEL: @disabled(
+; O1-NOT: store <4 x i32>
+; O1: ret i32
+; O2-LABEL: @disabled(
+; O2-NOT: store <4 x i32>
+; O2: ret i32
+; O3-LABEL: @disabled(
+; O3-NOT: store <4 x i32>
+; O3: ret i32
+; O3DIS-LABEL: @disabled(
+; O3DIS-NOT: store <4 x i32>
+; O3DIS: ret i32
+; Os-LABEL: @disabled(
+; Os-NOT: store <4 x i32>
+; Os: ret i32
+; Oz-LABEL: @disabled(
+; Oz-NOT: store <4 x i32>
+; Oz: ret i32
+; O1VEC-LABEL: @disabled(
+; O1VEC-NOT: store <4 x i32>
+; O1VEC: ret i32
+; OzVEC-LABEL: @disabled(
+; OzVEC-NOT: store <4 x i32>
+; OzVEC: ret i32
+; O1VEC2-LABEL: @disabled(
+; O1VEC2-NOT: store <4 x i32>
+; O1VEC2: ret i32
+; OzVEC2-LABEL: @disabled(
+; OzVEC2-NOT: store <4 x i32>
+; OzVEC2: ret i32
+
+define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %N
+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 32
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
+
+for.end: ; preds = %for.body
+ %1 = load i32* %a, align 4
+ ret i32 %1
+}
+
+!0 = metadata !{metadata !0, metadata !1}
+!1 = metadata !{metadata !"llvm.vectorizer.enable", i1 1}
+!2 = metadata !{metadata !2, metadata !3}
+!3 = metadata !{metadata !"llvm.vectorizer.enable", i1 0} \ No newline at end of file
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index dba16f72da..5e27c09cdd 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -471,8 +471,14 @@ static void AddOptimizationPasses(PassManagerBase &MPM,FunctionPassManager &FPM,
Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ?
DisableLoopUnrolling : OptLevel == 0;
- Builder.LoopVectorize =
- DisableLoopVectorization ? false : OptLevel > 1 && SizeLevel < 2;
+ // This is final, unless there is a #pragma vectorize enable
+ if (DisableLoopVectorization)
+ Builder.LoopVectorize = false;
+ // If option wasn't forced via cmd line (-vectorize-loops, -loop-vectorize)
+ else if (!Builder.LoopVectorize)
+ Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;
+
+ // When #pragma vectorize is on for SLP, do the same as above
Builder.SLPVectorize =
DisableSLPVectorization ? false : OptLevel > 1 && SizeLevel < 2;