diff options
author | Nadav Rotem <nrotem@apple.com> | 2013-05-10 22:59:33 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2013-05-10 22:59:33 +0000 |
commit | 9bba9f630005409f43bc3b0480e18fa9f8d43e69 (patch) | |
tree | 1dbc0e17fa72f098ec38d772e18c135578ce149d /lib/Transforms/Vectorize | |
parent | 09ec4b21648700f9d4ef5bc90d732f90f32c930c (diff) | |
download | llvm-9bba9f630005409f43bc3b0480e18fa9f8d43e69.tar.gz llvm-9bba9f630005409f43bc3b0480e18fa9f8d43e69.tar.bz2 llvm-9bba9f630005409f43bc3b0480e18fa9f8d43e69.tar.xz |
SLPVectorizer: Add support for trees with external users.
For example:
bar() {
int a = A[i];
int b = A[i+1];
B[i] = a;
B[i+1] = b;
foo(a); <--- a is used outside the vectorized expression.
}
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181648 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Vectorize')
-rw-r--r-- | lib/Transforms/Vectorize/VecUtils.cpp | 59 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/VecUtils.h | 5 |
2 files changed, 55 insertions, 9 deletions
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp index 9b9436683b..55adf8a816 100644 --- a/lib/Transforms/Vectorize/VecUtils.cpp +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -243,6 +243,10 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) { LaneMap.clear(); MultiUserVals.clear(); MustScalarize.clear(); + MustExtract.clear(); + + // Find the location of the last root. + unsigned LastRootIndex = InstrIdx[GetLastInstr(VL, VL.size())]; // Scan the tree and find which value is used by which lane, and which values // must be scalarized. @@ -258,15 +262,31 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) { for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end(); I != E; ++I) { if (LaneMap.find(*I) == LaneMap.end()) { - MustScalarize.insert(*it); - DEBUG(dbgs()<<"SLP: Adding " << **it << - " to MustScalarize because of an out of tree usage.\n"); - break; + DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n"); + + // We don't have an ordering problem if the user is not in this basic + // block. + Instruction *Inst = cast<Instruction>(*I); + if (Inst->getParent() == BB) { + // We don't have an ordering problem if the user is after the last + // root. + unsigned Idx = InstrIdx[Inst]; + if (Idx < LastRootIndex) { + MustScalarize.insert(*it); + DEBUG(dbgs()<<"SLP: Adding to MustScalarize " + "because of an unsafe out of tree usage.\n"); + break; + } + } + + DEBUG(dbgs()<<"SLP: Adding to MustExtract " + "because of a safe out of tree usage.\n"); + MustExtract.insert(*it); } if (Lane == -1) Lane = LaneMap[*I]; if (Lane != LaneMap[*I]) { MustScalarize.insert(*it); - DEBUG(dbgs()<<"Adding " << **it << + DEBUG(dbgs()<<"SLP: Adding " << **it << " to MustScalarize because multiple lane use it: " << Lane << " and " << LaneMap[*I] << ".\n"); break; @@ -456,6 +476,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) { } } + // Calculate the extract cost. + unsigned ExternalUserExtractCost = 0; + for (unsigned i = 0, e = VL.size(); i < e; ++i) + if (MustExtract.count(VL[i])) + ExternalUserExtractCost += + TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i); + switch (Opcode) { case Instruction::ZExt: case Instruction::SExt: @@ -469,7 +496,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) { case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - int Cost = 0; + int Cost = ExternalUserExtractCost; ValueList Operands; Type *SrcTy = VL0->getOperand(0)->getType(); // Prepare the operand vector. @@ -510,7 +537,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - int Cost = 0; + int Cost = ExternalUserExtractCost; // Calculate the cost of all of the operands. for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { ValueList Operands; @@ -540,7 +567,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) { int ScalarLdCost = VecTy->getNumElements() * TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); - return VecLdCost - ScalarLdCost; + return VecLdCost - ScalarLdCost + ExternalUserExtractCost; } case Instruction::Store: { // We know that we can merge the stores. Calculate the cost. @@ -556,7 +583,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) { } int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1); - return TotalCost; + return TotalCost + ExternalUserExtractCost; } default: // Unable to vectorize unknown instructions. @@ -588,10 +615,24 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) { Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { Value *V = vectorizeTree_rec(VL, VF); + + Instruction *LastInstr = GetLastInstr(VL, VL.size()); + IRBuilder<> Builder(LastInstr); + for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end(); + it != e; ++it) { + Instruction *I = cast<Instruction>(*it); + Value *Vec = VectorizedValues[I]; + assert(LaneMap.count(I) && "Unable to find the lane for the external use"); + Value *Idx = Builder.getInt32(LaneMap[I]); + Value *Extract = Builder.CreateExtractElement(Vec, Idx); + I->replaceAllUsesWith(Extract); + } + // We moved some instructions around. We have to number them again // before we can do any analysis. numberInstructions(); MustScalarize.clear(); + MustExtract.clear(); return V; } diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h index 5456c6c779..abb35840e9 100644 --- a/lib/Transforms/Vectorize/VecUtils.h +++ b/lib/Transforms/Vectorize/VecUtils.h @@ -127,6 +127,11 @@ private: /// NOTICE: The vectorization methods also use this set. ValueSet MustScalarize; + /// Contains values that have users outside of the vectorized graph. + /// We need to generate extract instructions for these values. + /// NOTICE: The vectorization methods also use this set. + ValueSet MustExtract; + /// Contains a list of values that are used outside the current tree. This /// set must be reset between runs. ValueSet MultiUserVals; |