summaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-05-10 22:59:33 +0000
committerNadav Rotem <nrotem@apple.com>2013-05-10 22:59:33 +0000
commit9bba9f630005409f43bc3b0480e18fa9f8d43e69 (patch)
tree1dbc0e17fa72f098ec38d772e18c135578ce149d /lib/Transforms/Vectorize
parent09ec4b21648700f9d4ef5bc90d732f90f32c930c (diff)
downloadllvm-9bba9f630005409f43bc3b0480e18fa9f8d43e69.tar.gz
llvm-9bba9f630005409f43bc3b0480e18fa9f8d43e69.tar.bz2
llvm-9bba9f630005409f43bc3b0480e18fa9f8d43e69.tar.xz
SLPVectorizer: Add support for trees with external users.
For example: bar() { int a = A[i]; int b = A[i+1]; B[i] = a; B[i+1] = b; foo(a); <--- a is used outside the vectorized expression. } git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181648 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Vectorize')
-rw-r--r--lib/Transforms/Vectorize/VecUtils.cpp59
-rw-r--r--lib/Transforms/Vectorize/VecUtils.h5
2 files changed, 55 insertions, 9 deletions
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
index 9b9436683b..55adf8a816 100644
--- a/lib/Transforms/Vectorize/VecUtils.cpp
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -243,6 +243,10 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
LaneMap.clear();
MultiUserVals.clear();
MustScalarize.clear();
+ MustExtract.clear();
+
+ // Find the location of the last root.
+ unsigned LastRootIndex = InstrIdx[GetLastInstr(VL, VL.size())];
// Scan the tree and find which value is used by which lane, and which values
// must be scalarized.
@@ -258,15 +262,31 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
I != E; ++I) {
if (LaneMap.find(*I) == LaneMap.end()) {
- MustScalarize.insert(*it);
- DEBUG(dbgs()<<"SLP: Adding " << **it <<
- " to MustScalarize because of an out of tree usage.\n");
- break;
+ DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n");
+
+ // We don't have an ordering problem if the user is not in this basic
+ // block.
+ Instruction *Inst = cast<Instruction>(*I);
+ if (Inst->getParent() == BB) {
+ // We don't have an ordering problem if the user is after the last
+ // root.
+ unsigned Idx = InstrIdx[Inst];
+ if (Idx < LastRootIndex) {
+ MustScalarize.insert(*it);
+ DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
+ "because of an unsafe out of tree usage.\n");
+ break;
+ }
+ }
+
+ DEBUG(dbgs()<<"SLP: Adding to MustExtract "
+ "because of a safe out of tree usage.\n");
+ MustExtract.insert(*it);
}
if (Lane == -1) Lane = LaneMap[*I];
if (Lane != LaneMap[*I]) {
MustScalarize.insert(*it);
- DEBUG(dbgs()<<"Adding " << **it <<
+ DEBUG(dbgs()<<"SLP: Adding " << **it <<
" to MustScalarize because multiple lane use it: "
<< Lane << " and " << LaneMap[*I] << ".\n");
break;
@@ -456,6 +476,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
}
+ // Calculate the extract cost.
+ unsigned ExternalUserExtractCost = 0;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i)
+ if (MustExtract.count(VL[i]))
+ ExternalUserExtractCost +=
+ TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
+
switch (Opcode) {
case Instruction::ZExt:
case Instruction::SExt:
@@ -469,7 +496,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- int Cost = 0;
+ int Cost = ExternalUserExtractCost;
ValueList Operands;
Type *SrcTy = VL0->getOperand(0)->getType();
// Prepare the operand vector.
@@ -510,7 +537,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- int Cost = 0;
+ int Cost = ExternalUserExtractCost;
// Calculate the cost of all of the operands.
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
@@ -540,7 +567,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
int ScalarLdCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
- return VecLdCost - ScalarLdCost;
+ return VecLdCost - ScalarLdCost + ExternalUserExtractCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
@@ -556,7 +583,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
- return TotalCost;
+ return TotalCost + ExternalUserExtractCost;
}
default:
// Unable to vectorize unknown instructions.
@@ -588,10 +615,24 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
Value *V = vectorizeTree_rec(VL, VF);
+
+ Instruction *LastInstr = GetLastInstr(VL, VL.size());
+ IRBuilder<> Builder(LastInstr);
+ for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end();
+ it != e; ++it) {
+ Instruction *I = cast<Instruction>(*it);
+ Value *Vec = VectorizedValues[I];
+ assert(LaneMap.count(I) && "Unable to find the lane for the external use");
+ Value *Idx = Builder.getInt32(LaneMap[I]);
+ Value *Extract = Builder.CreateExtractElement(Vec, Idx);
+ I->replaceAllUsesWith(Extract);
+ }
+
// We moved some instructions around. We have to number them again
// before we can do any analysis.
numberInstructions();
MustScalarize.clear();
+ MustExtract.clear();
return V;
}
diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h
index 5456c6c779..abb35840e9 100644
--- a/lib/Transforms/Vectorize/VecUtils.h
+++ b/lib/Transforms/Vectorize/VecUtils.h
@@ -127,6 +127,11 @@ private:
/// NOTICE: The vectorization methods also use this set.
ValueSet MustScalarize;
+ /// Contains values that have users outside of the vectorized graph.
+ /// We need to generate extract instructions for these values.
+ /// NOTICE: The vectorization methods also use this set.
+ ValueSet MustExtract;
+
/// Contains a list of values that are used outside the current tree. This
/// set must be reset between runs.
ValueSet MultiUserVals;