From 0c7ba3cef2d99bf15175303d5e2523fe898d009d Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 3 Nov 2013 12:27:52 +0000 Subject: SLPVectorizer: When CSEing generated gathers only scan blocks containing them. Instead of doing a RPO traversal of the whole function remember the blocks containing gathers (typically <= 2) and scan them in dominator-first order. The actual CSE is still quadratic, but I'm not confident that adding a scoped hash table here is worth it as we're only looking at the generated instructions and not arbitrary code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193956 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 57 +++++++++++++++++++----------- 1 file changed, 37 insertions(+), 20 deletions(-) (limited to 'lib/Transforms/Vectorize') diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9a2165329d..9082b9d75a 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1620,9 +1620,22 @@ Value *BoUpSLP::vectorizeTree() { return VectorizableTree[0].VectorizedValue; } +class DTCmp { + const DominatorTree *DT; + +public: + DTCmp(const DominatorTree *DT) : DT(DT) {} + bool operator()(const BasicBlock *A, const BasicBlock *B) const { + return DT->dominates(A, B); + } +}; + void BoUpSLP::optimizeGatherSequence() { DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() << " gather sequences instructions.\n"); + // Keep a list of visited BBs to run CSE on. It is typically small. + SmallPtrSet VisitedBBs; + SmallVector CSEWorkList; // LICM InsertElementInst sequences. for (SetVector::iterator it = GatherSeq.begin(), e = GatherSeq.end(); it != e; ++it) { @@ -1631,6 +1644,9 @@ void BoUpSLP::optimizeGatherSequence() { if (!Insert) continue; + if (VisitedBBs.insert(Insert->getParent())) + CSEWorkList.push_back(Insert->getParent()); + // Check if this block is inside a loop. Loop *L = LI->getLoopFor(Insert->getParent()); if (!L) @@ -1655,45 +1671,46 @@ void BoUpSLP::optimizeGatherSequence() { Insert->moveBefore(PreHeader->getTerminator()); } + // Sort blocks by domination. This ensures we visit a block after all blocks + // dominating it are visited. + std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT)); + // Perform O(N^2) search over the gather sequences and merge identical // instructions. TODO: We can further optimize this scan if we split the // instructions into different buckets based on the insert lane. - SmallPtrSet Visited; - SmallVector ToRemove; - ReversePostOrderTraversal RPOT(F); - for (ReversePostOrderTraversal::rpo_iterator I = RPOT.begin(), - E = RPOT.end(); I != E; ++I) { + SmallVector Visited; + for (SmallVectorImpl::iterator I = CSEWorkList.begin(), + E = CSEWorkList.end(); + I != E; ++I) { + assert(I == CSEWorkList.begin() || !DT->dominates(*I, *llvm::prior(I)) && + "Worklist not sorted properly!"); BasicBlock *BB = *I; - // For all instructions in the function: - for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - Instruction *In = it; + // For all instructions in blocks containing gather sequences: + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { + Instruction *In = it++; if ((!isa(In) && !isa(In)) || !GatherSeq.count(In)) continue; // Check if we can replace this instruction with any of the // visited instructions. - for (SmallPtrSet::iterator v = Visited.begin(), - ve = Visited.end(); v != ve; ++v) { + for (SmallVectorImpl::iterator v = Visited.begin(), + ve = Visited.end(); + v != ve; ++v) { if (In->isIdenticalTo(*v) && DT->dominates((*v)->getParent(), In->getParent())) { In->replaceAllUsesWith(*v); - ToRemove.push_back(In); + In->eraseFromParent(); In = 0; break; } } - if (In) - Visited.insert(In); + if (In) { + assert(std::find(Visited.begin(), Visited.end(), In) == Visited.end()); + Visited.push_back(In); + } } } - - // Erase all of the instructions that we RAUWed. - for (SmallVectorImpl::iterator v = ToRemove.begin(), - ve = ToRemove.end(); v != ve; ++v) { - assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses"); - (*v)->eraseFromParent(); - } } /// The SLPVectorizer Pass. -- cgit v1.2.3