From 28a739b4dcd88fc55adc521d2017a831ca7286c9 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Sun, 4 May 2014 17:10:15 +0000 Subject: SLPVectorizer: Bring back the insertelement patch (r205965) with fixes When can't assume a vectorized tree is rooted in an instruction. The IRBuilder could have constant folded it. When we rebuild the build_vector (the series of InsertElement instructions) use the last original InsertElement instruction. The vectorized tree root is guaranteed to be before it. Also, we can't assume that the n-th InsertElement inserts the n-th element into a vector. This reverts r207746 which reverted the revert of the revert of r205018 or so. Fixes the test case in PR19621. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207939 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 101 ++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 30 deletions(-) (limited to 'lib/Transforms/Vectorize') diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index d7ccb796d3..895826f1d8 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" @@ -357,13 +358,13 @@ public: /// A negative number means that this is profitable. int getTreeCost(); - /// Construct a vectorizable tree that starts at \p Roots and is possibly - /// used by a reduction of \p RdxOps. - void buildTree(ArrayRef Roots, ValueSet *RdxOps = 0); + /// Construct a vectorizable tree that starts at \p Roots, ignoring users for + /// the purpose of scheduling and extraction in the \p UserIgnoreLst. + void buildTree(ArrayRef Roots, + ArrayRef UserIgnoreLst = None); /// Clear the internal data structures that are created by 'buildTree'. void deleteTree() { - RdxOps = 0; VectorizableTree.clear(); ScalarToTreeEntry.clear(); MustGather.clear(); @@ -526,8 +527,8 @@ private: return I.first->second; } - /// Reduction operators. - ValueSet *RdxOps; + /// List of users to ignore during scheduling and that don't need extracting. + ArrayRef UserIgnoreList; // Analysis and block reference. Function *F; @@ -542,9 +543,10 @@ private: IRBuilder<> Builder; }; -void BoUpSLP::buildTree(ArrayRef Roots, ValueSet *Rdx) { +void BoUpSLP::buildTree(ArrayRef Roots, + ArrayRef UserIgnoreLst) { deleteTree(); - RdxOps = Rdx; + UserIgnoreList = UserIgnoreLst; if (!getSameType(Roots)) return; buildTree_rec(Roots, 0); @@ -576,8 +578,9 @@ void BoUpSLP::buildTree(ArrayRef Roots, ValueSet *Rdx) { if (!UserInst) continue; - // Ignore uses that are part of the reduction. - if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end()) + // Ignore users in the user ignore list. + if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) != + UserIgnoreList.end()) continue; DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << @@ -708,8 +711,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { continue; } - // This user is part of the reduction. - if (RdxOps && RdxOps->count(UI)) + // Ignore users in the user ignore list. + if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UI) != + UserIgnoreList.end()) continue; // Make sure that we can schedule this unknown user. @@ -1747,8 +1751,9 @@ Value *BoUpSLP::vectorizeTree() { DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); assert((ScalarToTreeEntry.count(U) || - // It is legal to replace the reduction users by undef. - (RdxOps && RdxOps->count(U))) && + // It is legal to replace users in the ignorelist by undef. + (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) != + UserIgnoreList.end())) && "Replacing out-of-tree value with undef"); } #endif @@ -1954,8 +1959,11 @@ private: bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); /// \brief Try to vectorize a list of operands. + /// \@param BuildVector A list of users to ignore for the purpose of + /// scheduling and that don't need extracting. /// \returns true if a value was vectorized. - bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R); + bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R, + ArrayRef BuildVector = None); /// \brief Try to vectorize a chain that may start at the operands of \V; bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); @@ -2128,7 +2136,8 @@ bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { return tryToVectorizeList(VL, R); } -bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R) { +bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, + ArrayRef BuildVector) { if (VL.size() < 2) return false; @@ -2178,13 +2187,38 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R) { << "\n"); ArrayRef Ops = VL.slice(i, OpsWidth); - R.buildTree(Ops); + ArrayRef BuildVectorSlice; + if (!BuildVector.empty()) + BuildVectorSlice = BuildVector.slice(i, OpsWidth); + + R.buildTree(Ops, BuildVectorSlice); int Cost = R.getTreeCost(); if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); - R.vectorizeTree(); - + Value *VectorizedRoot = R.vectorizeTree(); + + // Reconstruct the build vector by extracting the vectorized root. This + // way we handle the case where some elements of the vector are undefined. + // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) + if (!BuildVectorSlice.empty()) { + // The insert point is the last build vector instruction. The vectorized + // root will precede it. This guarantees that we get an instruction. The + // vectorized tree could have been constant folded. + Instruction *InsertAfter = cast(BuildVectorSlice.back()); + unsigned VecIdx = 0; + for (auto &V : BuildVectorSlice) { + IRBuilder Builder( + ++BasicBlock::iterator(InsertAfter)); + InsertElementInst *IE = cast(V); + Instruction *Extract = cast(Builder.CreateExtractElement( + VectorizedRoot, Builder.getInt32(VecIdx++))); + IE->setOperand(1, Extract); + IE->removeFromParent(); + IE->insertAfter(Extract); + InsertAfter = IE; + } + } // Move to the next bundle. i += VF - 1; Changed = true; @@ -2293,7 +2327,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx, /// *p = /// class HorizontalReduction { - SmallPtrSet ReductionOps; + SmallVector ReductionOps; SmallVector ReducedVals; BinaryOperator *ReductionRoot; @@ -2387,7 +2421,7 @@ public: // We need to be able to reassociate the adds. if (!TreeN->isAssociative()) return false; - ReductionOps.insert(TreeN); + ReductionOps.push_back(TreeN); } // Retract. Stack.pop_back(); @@ -2424,7 +2458,7 @@ public: for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { ArrayRef ValsToReduce(&ReducedVals[i], ReduxWidth); - V.buildTree(ValsToReduce, &ReductionOps); + V.buildTree(ValsToReduce, ReductionOps); // Estimate cost. int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]); @@ -2543,13 +2577,16 @@ private: /// /// Returns true if it matches /// -static bool findBuildVector(InsertElementInst *IE, - SmallVectorImpl &Ops) { - if (!isa(IE->getOperand(0))) +static bool findBuildVector(InsertElementInst *FirstInsertElem, + SmallVectorImpl &BuildVector, + SmallVectorImpl &BuildVectorOpds) { + if (!isa(FirstInsertElem->getOperand(0))) return false; + InsertElementInst *IE = FirstInsertElem; while (true) { - Ops.push_back(IE->getOperand(1)); + BuildVector.push_back(IE); + BuildVectorOpds.push_back(IE->getOperand(1)); if (IE->use_empty()) return false; @@ -2720,12 +2757,16 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } // Try to vectorize trees that start at insertelement instructions. - if (InsertElementInst *IE = dyn_cast(it)) { - SmallVector Ops; - if (!findBuildVector(IE, Ops)) + if (InsertElementInst *FirstInsertElem = dyn_cast(it)) { + SmallVector BuildVector; + SmallVector BuildVectorOpds; + if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds)) continue; - if (tryToVectorizeList(Ops, R)) { + // Vectorize starting with the build vector operands ignoring the + // BuildVector instructions for the purpose of scheduling and user + // extraction. + if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) { Changed = true; it = BB->begin(); e = BB->end(); -- cgit v1.2.3