diff options
author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-10-12 18:56:27 +0000 |
---|---|---|
committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-10-12 18:56:27 +0000 |
commit | 24732c3363a9a442c14cf236c3de1086cdee6000 (patch) | |
tree | bb13ff4e0f982feedf44dc9915df59f86a6ba4a0 | |
parent | fe82a3e360fc850e7551f2f1f32b58e539182c68 (diff) | |
download | llvm-24732c3363a9a442c14cf236c3de1086cdee6000.tar.gz llvm-24732c3363a9a442c14cf236c3de1086cdee6000.tar.bz2 llvm-24732c3363a9a442c14cf236c3de1086cdee6000.tar.xz |
SLPVectorizer: Sort PHINodes based on their opcode
Before this patch we relied on the order of phi nodes when we looked for phi
nodes of the same type. This could prevent vectorization of cases where there
was a phi node of a second type in between phi nodes of some type.
This is important for vectorization of an internal graphics kernel. On the test
suite + external on x86_64 (and on a run on armv7s) it showed no impact on
either performance or compile time.
radar://15024459
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192537 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 67 | ||||
-rw-r--r-- | test/Transforms/SLPVectorizer/X86/phi.ll | 36 |
2 files changed, 78 insertions, 25 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index b5a303e17f..af1c0e7423 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2366,42 +2366,63 @@ static bool findBuildVector(InsertElementInst *IE, return false; } +static bool PhiTypeSorterFunc(Value *V, Value *V2) { + return V->getType() < V2->getType(); +} + bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { bool Changed = false; SmallVector<Value *, 4> Incoming; - SmallSet<Instruction *, 16> VisitedInstrs; + SmallSet<Value *, 16> VisitedInstrs; + + bool HaveVectorizedPhiNodes = true; + while (HaveVectorizedPhiNodes) { + HaveVectorizedPhiNodes = false; + + // Collect the incoming values from the PHIs. + Incoming.clear(); + for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie; + ++instr) { + PHINode *P = dyn_cast<PHINode>(instr); + if (!P) + break; - // Collect the incoming values from the PHIs. - for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie; - ++instr) { - PHINode *P = dyn_cast<PHINode>(instr); + if (!VisitedInstrs.count(P)) + Incoming.push_back(P); + } - if (!P) - break; + // Sort by type. + std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc); - // We may go through BB multiple times so skip the one we have checked. - if (!VisitedInstrs.insert(instr)) - continue; + // Try to vectorize elements base on their type. + for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(), + E = Incoming.end(); + IncIt != E;) { - // Stop constructing the list when you reach a different type. - if (Incoming.size() && P->getType() != Incoming[0]->getType()) { - if (tryToVectorizeList(Incoming, R)) { - // We would like to start over since some instructions are deleted - // and the iterator may become invalid value. + // Look for the next elements with the same type. + SmallVector<Value *, 4>::iterator SameTypeIt = IncIt; + while (SameTypeIt != E && + (*SameTypeIt)->getType() == (*IncIt)->getType()) { + VisitedInstrs.insert(*SameTypeIt); + ++SameTypeIt; + } + + // Try to vectorize them. + unsigned NumElts = (SameTypeIt - IncIt); + DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n"); + if (NumElts > 1 && + tryToVectorizeList(ArrayRef<Value *>(IncIt, NumElts), R)) { + // Success start over because instructions might have been changed. + HaveVectorizedPhiNodes = true; Changed = true; - instr = BB->begin(); - ie = BB->end(); + break; } - Incoming.clear(); + // Start over at the next instruction of a differnt type (or the end). + IncIt = SameTypeIt; } - - Incoming.push_back(P); } - if (Incoming.size() > 1) - Changed |= tryToVectorizeList(Incoming, R); - VisitedInstrs.clear(); for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) { diff --git a/test/Transforms/SLPVectorizer/X86/phi.ll b/test/Transforms/SLPVectorizer/X86/phi.ll index 9cc48910d8..964e0e4efe 100644 --- a/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/test/Transforms/SLPVectorizer/X86/phi.ll @@ -135,14 +135,14 @@ entry: br label %for.body for.body: ; preds = %for.body, %entry - %5 = phi float [ %1, %entry ], [ %11, %for.body ] - %6 = phi float [ %0, %entry ], [ %9, %for.body ] %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %P.056 = phi float [ %4, %entry ], [ %add26, %for.body ] %Y.055 = phi float [ %3, %entry ], [ %add21, %for.body ] %B.054 = phi float [ %2, %entry ], [ %add16, %for.body ] %G.053 = phi float [ %1, %entry ], [ %add11, %for.body ] %R.052 = phi float [ %0, %entry ], [ %add6, %for.body ] + %5 = phi float [ %1, %entry ], [ %11, %for.body ] + %6 = phi float [ %0, %entry ], [ %9, %for.body ] %mul = fmul float %6, 7.000000e+00 %add6 = fadd float %R.052, %mul %mul10 = fmul float %5, 8.000000e+00 @@ -174,6 +174,38 @@ for.end: ; preds = %for.body ret float %add31 } +; Make sure the order of phi nodes of different types does not prevent +; vectorization of same typed phi nodes. +; CHECK-LABEL: sort_phi_type +; CHECK: phi <4 x float> +; CHECK: fmul <4 x float> + +define float @sort_phi_type(float* nocapture readonly %A) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %Y = phi float [ 1.000000e+01, %entry ], [ %mul10, %for.body ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %B = phi float [ 1.000000e+01, %entry ], [ %mul15, %for.body ] + %G = phi float [ 1.000000e+01, %entry ], [ %mul20, %for.body ] + %R = phi float [ 1.000000e+01, %entry ], [ %mul25, %for.body ] + %mul10 = fmul float %Y, 8.000000e+00 + %mul15 = fmul float %B, 9.000000e+00 + %mul20 = fmul float %R, 10.000000e+01 + %mul25 = fmul float %G, 11.100000e+01 + %indvars.iv.next = add nsw i64 %indvars.iv, 4 + %cmp = icmp slt i64 %indvars.iv.next, 128 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add28 = fadd float 1.000000e+01, %mul10 + %add29 = fadd float %mul10, %mul15 + %add30 = fadd float %add29, %mul20 + %add31 = fadd float %add30, %mul25 + ret float %add31 +} + define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) { ; CHECK-LABEL: @test( ; |