diff options
author | Nadav Rotem <nrotem@apple.com> | 2013-06-20 17:41:45 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2013-06-20 17:41:45 +0000 |
commit | d69d9f20bc3acee0fc233853745c1de015b541f2 (patch) | |
tree | 79bbff6bc1c3000b7dc9dd421c0f9b88328fe8d5 /lib | |
parent | 63b8e299e47e9511e8bdb8a3a3c53674aa86813a (diff) | |
download | llvm-d69d9f20bc3acee0fc233853745c1de015b541f2.tar.gz llvm-d69d9f20bc3acee0fc233853745c1de015b541f2.tar.bz2 llvm-d69d9f20bc3acee0fc233853745c1de015b541f2.tar.xz |
SLPVectorization: Add a basic support for cross-basic block slp vectorization.
We collect gather sequences when we vectorize basic blocks. Gather sequences are excellent
hints for vectorization of other basic blocks.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184444 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 80 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/VecUtils.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/VecUtils.h | 7 |
3 files changed, 80 insertions, 13 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2b07fa2c01..91c0463324 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -99,7 +99,10 @@ struct SLPVectorizer : public FunctionPass { } // Try to hoist some of the scalarization code to the preheader. - if (BBChanged) hoistGatherSequence(LI, BB, R); + if (BBChanged) { + hoistGatherSequence(LI, BB, R); + Changed |= vectorizeUsingGatherHints(R.getGatherSeqInstructions()); + } Changed |= BBChanged; } @@ -130,8 +133,10 @@ private: /// \brief Try to vectorize a chain that starts at two arithmetic instrs. bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); - /// \brief Try to vectorize a list of operands. - bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R); + /// \brief Try to vectorize a list of operands. If \p NeedExtracts is true + /// then we calculate the cost of extracting the scalars from the vector. + /// \returns true if a value was vectorized. + bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool NeedExtracts); /// \brief Try to vectorize a chain that may start at the operands of \V; bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); @@ -143,6 +148,13 @@ private: /// all of the sources are loop invariant. void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R); + /// \brief Try to vectorize additional sequences in different basic blocks + /// based on values that we gathered in previous blocks. The list \p Gathers + /// holds the gather InsertElement instructions that were generated during + /// vectorization. + /// \returns True if some code was vectorized. + bool vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers); + /// \brief Scan the basic block and look for patterns that are likely to start /// a vectorization chain. bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R); @@ -179,10 +191,11 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { if (!A || !B) return false; Value *VL[] = { A, B }; - return tryToVectorizeList(VL, R); + return tryToVectorizeList(VL, R, true); } -bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { +bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, + bool NeedExtracts) { if (VL.size() < 2) return false; @@ -204,7 +217,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { } int Cost = R.getTreeCost(VL); - int ExtrCost = R.getScalarizationCost(VL); + int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0; DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << " Cost of extract:" << ExtrCost << ".\n"); if ((Cost+ExtrCost) >= -SLPCostThreshold) return false; @@ -307,7 +320,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } if (Incoming.size() > 1) - Changed |= tryToVectorizeList(Incoming, R); + Changed |= tryToVectorizeList(Incoming, R, true); } return Changed; @@ -329,6 +342,51 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { return Changed; } +bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) { + SmallVector<Value*, 4> Seq; + bool Changed = false; + for (int i = 0, e = Gathers.size(); i < e; ++i) { + InsertElementInst *IEI = dyn_cast_or_null<InsertElementInst>(Gathers[i]); + + if (IEI) { + if (Instruction *I = dyn_cast<Instruction>(IEI->getOperand(1))) + Seq.push_back(I); + } else { + + if (!Seq.size()) + continue; + + Instruction *I = cast<Instruction>(Seq[0]); + BasicBlock *BB = I->getParent(); + + DEBUG(dbgs()<<"SLP: Inspecting a gather list of size " << Seq.size() << + " in " << BB->getName() << ".\n"); + + // Check if the gathered values have multiple uses. If they only have one + // user then we know that the insert/extract pair will go away. + bool HasMultipleUsers = false; + for (int i=0; e = Seq.size(), i < e; ++i) { + if (!Seq[i]->hasOneUse()) { + HasMultipleUsers = true; + break; + } + } + + BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB)); + + if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) { + DEBUG(dbgs()<<"SLP: Vectorized a gather list of len " << Seq.size() << + " in " << BB->getName() << ".\n"); + Changed = true; + } + + Seq.clear(); + } + } + + return Changed; +} + void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R) { // Check if this block is inside a loop. @@ -344,12 +402,14 @@ void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, // Mark the insertion point for the block. Instruction *Location = PreHeader->getTerminator(); - BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions(); - for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end(); + BoUpSLP::InstrList &Gathers = R.getGatherSeqInstructions(); + for (BoUpSLP::InstrList::iterator it = Gathers.begin(), e = Gathers.end(); it != e; ++it) { - InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it); + InsertElementInst *Insert = dyn_cast_or_null<InsertElementInst>(*it); // The InsertElement sequence can be simplified into a constant. + // Also Ignore NULL pointers because they are only here to separate + // sequences. if (!Insert) continue; diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp index e79f08a56d..88c457d117 100644 --- a/lib/Transforms/Vectorize/VecUtils.cpp +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -731,9 +731,13 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) { // Remember that this instruction is used as part of a 'gather' sequence. // The caller of the bottom-up slp vectorizer can try to hoist the sequence // if the users are outside of the basic block. - GatherInstructions.push_back(Vec); + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Vec)) + GatherInstructions.push_back(IEI); } + // Mark the end of the gather sequence. + GatherInstructions.push_back(0); + for (unsigned i = 0; i < Ty->getNumElements(); ++i) VectorizedValues[VL[i]] = Vec; diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h index f76ae8413d..28a61e3c0d 100644 --- a/lib/Transforms/Vectorize/VecUtils.h +++ b/lib/Transforms/Vectorize/VecUtils.h @@ -34,6 +34,7 @@ class Loop; /// Bottom Up SLP vectorization utility class. struct BoUpSLP { typedef SmallVector<Value*, 8> ValueList; + typedef SmallVector<Instruction*, 16> InstrList; typedef SmallPtrSet<Value*, 16> ValueSet; typedef SmallVector<StoreInst*, 8> StoreList; static const int max_cost = 1<<20; @@ -78,7 +79,7 @@ struct BoUpSLP { /// \returns the list of new instructions that were added in order to collect /// scalars into vectors. This list can be used to further optimize the gather /// sequences. - ValueList &getGatherSeqInstructions() {return GatherInstructions; } + InstrList &getGatherSeqInstructions() {return GatherInstructions; } private: /// \brief This method contains the recursive part of getTreeCost. @@ -166,7 +167,9 @@ private: /// A list of instructions that are used when gathering scalars into vectors. /// In many cases these instructions can be hoisted outside of the BB. /// Iterating over this list is faster than calling LICM. - ValueList GatherInstructions; + /// Notice: We insert NULL ptrs to separate between the different gather + /// sequences. + InstrList GatherInstructions; /// Instruction builder to construct the vectorized tree. IRBuilder<> Builder; |