summaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize/VecUtils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Vectorize/VecUtils.cpp')
-rw-r--r--lib/Transforms/Vectorize/VecUtils.cpp59
1 files changed, 50 insertions, 9 deletions
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
index 9b9436683b..55adf8a816 100644
--- a/lib/Transforms/Vectorize/VecUtils.cpp
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -243,6 +243,10 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
LaneMap.clear();
MultiUserVals.clear();
MustScalarize.clear();
+ MustExtract.clear();
+
+ // Find the location of the last root.
+ unsigned LastRootIndex = InstrIdx[GetLastInstr(VL, VL.size())];
// Scan the tree and find which value is used by which lane, and which values
// must be scalarized.
@@ -258,15 +262,31 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
I != E; ++I) {
if (LaneMap.find(*I) == LaneMap.end()) {
- MustScalarize.insert(*it);
- DEBUG(dbgs()<<"SLP: Adding " << **it <<
- " to MustScalarize because of an out of tree usage.\n");
- break;
+ DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n");
+
+ // We don't have an ordering problem if the user is not in this basic
+ // block.
+ Instruction *Inst = cast<Instruction>(*I);
+ if (Inst->getParent() == BB) {
+ // We don't have an ordering problem if the user is after the last
+ // root.
+ unsigned Idx = InstrIdx[Inst];
+ if (Idx < LastRootIndex) {
+ MustScalarize.insert(*it);
+ DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
+ "because of an unsafe out of tree usage.\n");
+ break;
+ }
+ }
+
+ DEBUG(dbgs()<<"SLP: Adding to MustExtract "
+ "because of a safe out of tree usage.\n");
+ MustExtract.insert(*it);
}
if (Lane == -1) Lane = LaneMap[*I];
if (Lane != LaneMap[*I]) {
MustScalarize.insert(*it);
- DEBUG(dbgs()<<"Adding " << **it <<
+ DEBUG(dbgs()<<"SLP: Adding " << **it <<
" to MustScalarize because multiple lane use it: "
<< Lane << " and " << LaneMap[*I] << ".\n");
break;
@@ -456,6 +476,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
}
+ // Calculate the extract cost.
+ unsigned ExternalUserExtractCost = 0;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i)
+ if (MustExtract.count(VL[i]))
+ ExternalUserExtractCost +=
+ TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
+
switch (Opcode) {
case Instruction::ZExt:
case Instruction::SExt:
@@ -469,7 +496,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- int Cost = 0;
+ int Cost = ExternalUserExtractCost;
ValueList Operands;
Type *SrcTy = VL0->getOperand(0)->getType();
// Prepare the operand vector.
@@ -510,7 +537,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- int Cost = 0;
+ int Cost = ExternalUserExtractCost;
// Calculate the cost of all of the operands.
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
@@ -540,7 +567,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
int ScalarLdCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
- return VecLdCost - ScalarLdCost;
+ return VecLdCost - ScalarLdCost + ExternalUserExtractCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
@@ -556,7 +583,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
- return TotalCost;
+ return TotalCost + ExternalUserExtractCost;
}
default:
// Unable to vectorize unknown instructions.
@@ -588,10 +615,24 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
Value *V = vectorizeTree_rec(VL, VF);
+
+ Instruction *LastInstr = GetLastInstr(VL, VL.size());
+ IRBuilder<> Builder(LastInstr);
+ for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end();
+ it != e; ++it) {
+ Instruction *I = cast<Instruction>(*it);
+ Value *Vec = VectorizedValues[I];
+ assert(LaneMap.count(I) && "Unable to find the lane for the external use");
+ Value *Idx = Builder.getInt32(LaneMap[I]);
+ Value *Extract = Builder.CreateExtractElement(Vec, Idx);
+ I->replaceAllUsesWith(Extract);
+ }
+
// We moved some instructions around. We have to number them again
// before we can do any analysis.
numberInstructions();
MustScalarize.clear();
+ MustExtract.clear();
return V;
}