summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-05-15 01:44:30 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-05-15 01:44:30 +0000
commit1386692ef64d3151da8986589eadf0c58aba5c50 (patch)
tree0a0d7b40736b61bca369b8658fa8a669659dc97c
parenta88d974ce274152d2f8f28660ba277906bde2384 (diff)
downloadllvm-1386692ef64d3151da8986589eadf0c58aba5c50.tar.gz
llvm-1386692ef64d3151da8986589eadf0c58aba5c50.tar.bz2
llvm-1386692ef64d3151da8986589eadf0c58aba5c50.tar.xz
LoopVectorize: Hoist conditional loads if possible
InstCombine can be uncooperative to vectorization and sink loads into conditional blocks. This prevents vectorization. Undo this optimization if there are unconditional memory accesses to the same addresses in the loop. radar://13815763 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181860 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp105
-rw-r--r--test/Transforms/LoopVectorize/hoist-loads.ll69
2 files changed, 171 insertions, 3 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0b29445dd0..a8e1f4579c 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -318,6 +318,93 @@ private:
ValueMap WidenMap;
};
+/// \brief Check if conditionally executed loads are hoistable.
+///
+/// This class has two functions. isHoistableLoad and canHoistAllLoads.
+/// isHoistableLoad should be called on all load instructions that are executed
+/// conditionally. After all conditional loads are processed, the client should
+/// call canHoistAllLoads to determine if all of the conditional execute loads
+/// have an unconditional memory access in the loop.
+class LoadHoisting {
+ typedef SmallPtrSet<Value *, 8> MemorySet;
+
+ Loop *TheLoop;
+ DominatorTree *DT;
+ MemorySet CondLoadAddrSet;
+
+public:
+ LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {}
+
+ /// \brief Check if the instruction is a load with a identifiable address.
+ bool isHoistableLoad(Instruction *L);
+
+ /// \brief Check if all of the conditional loads are hoistable because there
+ /// exists an unconditional memory access to the same address in the loop.
+ bool canHoistAllLoads();
+};
+
+bool LoadHoisting::isHoistableLoad(Instruction *L) {
+ LoadInst *LI = dyn_cast<LoadInst>(L);
+ if (!LI)
+ return false;
+
+ CondLoadAddrSet.insert(LI->getPointerOperand());
+ return true;
+}
+
+static void addMemAccesses(BasicBlock *BB, SmallPtrSet<Value *, 8> &Set) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ Instruction *I = &*BI;
+ Value *Addr = 0;
+
+ // Try a load.
+ LoadInst *LI = dyn_cast<LoadInst>(I);
+ if (LI) {
+ Addr = LI->getPointerOperand();
+ Set.insert(Addr);
+ continue;
+ }
+
+ // Try a store.
+ StoreInst *SI = dyn_cast<StoreInst>(I);
+ if (!SI)
+ continue;
+
+ Addr = SI->getPointerOperand();
+ Set.insert(Addr);
+ }
+}
+
+bool LoadHoisting::canHoistAllLoads() {
+ // No conditional loads.
+ if (CondLoadAddrSet.empty())
+ return true;
+
+ MemorySet UncondMemAccesses;
+ std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
+ BasicBlock *LoopLatch = TheLoop->getLoopLatch();
+
+ // Iterate over the unconditional blocks and collect memory access addresses.
+ for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
+ BasicBlock *BB = LoopBlocks[i];
+
+ // Ignore conditional blocks.
+ if (BB != LoopLatch && !DT->dominates(BB, LoopLatch))
+ continue;
+
+ addMemAccesses(BB, UncondMemAccesses);
+ }
+
+ // And make sure there is a matching unconditional access for every
+ // conditional load.
+ for (MemorySet::iterator MI = CondLoadAddrSet.begin(),
+ ME = CondLoadAddrSet.end(); MI != ME; ++MI)
+ if (!UncondMemAccesses.count(*MI))
+ return false;
+
+ return true;
+}
+
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@@ -337,7 +424,8 @@ public:
DominatorTree *DT, TargetTransformInfo* TTI,
AliasAnalysis *AA, TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
- Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {}
+ Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
+ LoadSpeculation(L, DT) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -598,6 +686,9 @@ private:
RuntimePointerCheck PtrRtCheck;
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
+
+ /// Utility to determine whether loads can be speculated.
+ LoadHoisting LoadSpeculation;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -3259,8 +3350,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- // We don't predicate loads/stores at the moment.
- if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
+ // We might be able to hoist the load.
+ if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it))
+ return false;
+
+ // We predicate stores at the moment.
+ if (it->mayWriteToMemory() || it->mayThrow())
return false;
// The instructions below can trap.
@@ -3274,6 +3369,10 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
}
}
+ // Check that we can actually speculate the hoistable loads.
+ if (!LoadSpeculation.canHoistAllLoads())
+ return false;
+
return true;
}
diff --git a/test/Transforms/LoopVectorize/hoist-loads.ll b/test/Transforms/LoopVectorize/hoist-loads.ll
new file mode 100644
index 0000000000..fad17350cd
--- /dev/null
+++ b/test/Transforms/LoopVectorize/hoist-loads.ll
@@ -0,0 +1,69 @@
+; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@A = common global [1024 x float] zeroinitializer, align 16
+@B = common global [1024 x float] zeroinitializer, align 16
+
+; Make sure we can vectorize in the presence of hoistable conditional loads.
+; CHECK: hoist_cond_load
+; CHECK: load <2 x float>
+
+define void @hoist_cond_load() {
+entry:
+ br label %for.body
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
+ %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
+ %0 = load float* %arrayidx2, align 4
+ %cmp3 = fcmp oeq float %0, 0.000000e+00
+ br i1 %cmp3, label %if.end9, label %if.else
+
+if.else:
+ %1 = load float* %arrayidx, align 4
+ br label %if.end9
+
+if.end9:
+ %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
+ store float %tmp.0, float* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; However, we can't hoist loads whose address we have not seen unconditionally
+; accessed.
+; CHECK: dont_hoist_cond_load
+; CHECK-NOT: load <2 x float>
+
+define void @dont_hoist_cond_load() {
+entry:
+ br label %for.body
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
+ %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
+ %0 = load float* %arrayidx2, align 4
+ %cmp3 = fcmp oeq float %0, 0.000000e+00
+ br i1 %cmp3, label %if.end9, label %if.else
+
+if.else:
+ %1 = load float* %arrayidx, align 4
+ br label %if.end9
+
+if.end9:
+ %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
+ store float %tmp.0, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}