summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/TargetTransformImpl.cpp152
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp61
2 files changed, 177 insertions, 36 deletions
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
index 40184ed78d..d3ab105988 100644
--- a/lib/Target/TargetTransformImpl.cpp
+++ b/lib/Target/TargetTransformImpl.cpp
@@ -126,7 +126,7 @@ static int InstructionOpcodeToISD(unsigned Opcode) {
std::pair<unsigned, EVT>
VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
- EVT Ty) const {
+ EVT Ty) const {
unsigned Cost = 1;
// We keep legalizing the type until we find a legal kind. We assume that
// the only operation that costs anything is the split. After splitting
@@ -135,7 +135,7 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty);
if (LK.first == TargetLowering::TypeLegal)
- return std::make_pair(Cost, LK.second);
+ return std::make_pair(Cost, Ty);
if (LK.first == TargetLowering::TypeSplitVector)
Cost *= 2;
@@ -146,44 +146,144 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
}
unsigned
-VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
- Type *Ty2) const {
+VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty,
+ bool Insert,
+ bool Extract) const {
+ assert (Ty->isVectorTy() && "Can only scalarize vectors");
+ unsigned Cost = 0;
+
+ for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+ if (Insert)
+ Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ if (Extract)
+ Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ }
+
+ return Cost;
+}
+
+unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode,
+ Type *Ty) const {
// Check if any of the operands are vector operands.
int ISD = InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ std::pair<unsigned, EVT> LT =
+ getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty));
+
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1. Multiply
+ // by the type-legalization overhead.
+ return LT.first * 1;
+ }
+
+ // Else, assume that we need to scalarize this op.
+ if (Ty->isVectorTy()) {
+ unsigned Num = Ty->getVectorNumElements();
+ unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ // return the cost of multiple scalar invocation plus the cost of inserting
+ // and extracting the values.
+ return getScalarizationOverhead(Ty, true, true) + Num * Cost;
+ }
+
+ // We don't know anything about this scalar instruction.
+ return 1;
+}
+
+unsigned VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const {
+ return 1;
+}
+
+unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ assert(Src->isVectorTy() == Dst->isVectorTy() && "Invalid input types");
+ int ISD = InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
- // If we don't have any information about this instruction assume it costs 1.
- if (ISD == 0)
- return 1;
+ std::pair<unsigned, EVT> SrcLT =
+ getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
+ std::pair<unsigned, EVT> DstLT =
+ getTypeLegalizationCost(Dst->getContext(), TLI->getValueType(Dst));
+
+ // If the cast is between same-sized registers, then the check is simple.
+ if (SrcLT.first == DstLT.first &&
+ SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+ // Just check the op cost:
+ if (!TLI->isOperationExpand(ISD, DstLT.second)) {
+ // The operation is legal. Assume it costs 1. Multiply
+ // by the type-legalization overhead.
+ return SrcLT.first * 1;
+ }
+ }
+
+ // Otherwise, assume that the cast is scalarized.
+ if (Dst->isVectorTy()) {
+ unsigned Num = Dst->getVectorNumElements();
+ unsigned Cost = getCastInstrCost(Opcode, Src->getScalarType(),
+ Dst->getScalarType());
+ // return the cost of multiple scalar invocation plus the cost of inserting
+ // and extracting the values.
+ return getScalarizationOverhead(Dst, true, true) + Num * Cost;
+ }
+
+ // Unknown scalar opcode.
+ return 1;
+}
+
+unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const {
+ return 1;
+}
+
+unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode,
+ Type *ValTy,
+ Type *CondTy) const {
+ int ISD = InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
// Selects on vectors are actually vector selects.
if (ISD == ISD::SELECT) {
- assert(Ty2 && "Ty2 must hold the condition type");
- if (Ty2->isVectorTy())
- ISD = ISD::VSELECT;
+ assert(CondTy && "CondTy must exist");
+ if (CondTy->isVectorTy())
+ ISD = ISD::VSELECT;
}
- assert(Ty1 && "We need to have at least one type");
+ std::pair<unsigned, EVT> LT =
+ getTypeLegalizationCost(ValTy->getContext(), TLI->getValueType(ValTy));
- // From this stage we look at the legalized type.
- std::pair<unsigned, EVT> LT =
- getTypeLegalizationCost(Ty1->getContext(), TLI->getValueType(Ty1));
-
- if (TLI->isOperationLegalOrCustom(ISD, LT.second)) {
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
// The operation is legal. Assume it costs 1. Multiply
// by the type-legalization overhead.
return LT.first * 1;
}
- unsigned NumElem =
- (LT.second.isVector() ? LT.second.getVectorNumElements() : 1);
+ // Otherwise, assume that the cast is scalarized.
+ if (ValTy->isVectorTy()) {
+ unsigned Num = ValTy->getVectorNumElements();
+ if (CondTy)
+ CondTy = CondTy->getScalarType();
+ unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
+ CondTy);
+
+ // return the cost of multiple scalar invocation plus the cost of inserting
+ // and extracting the values.
+ return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
+ }
- // We will probably scalarize this instruction. Assume that the cost is the
- // number of the vector elements.
- return LT.first * NumElem * 1;
+ // Unknown scalar opcode.
+ return 1;
+}
+
+/// Returns the expected cost of Vector Insert and Extract.
+unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode,
+ Type *Val,
+ unsigned Index) const {
+ return 1;
}
unsigned
-VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const {
+VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
+ Type *Ty2) const {
return 1;
}
@@ -191,17 +291,15 @@ unsigned
VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) const {
- // From this stage we look at the legalized type.
- std::pair<unsigned, EVT> LT =
+ std::pair<unsigned, EVT> LT =
getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
+
// Assume that all loads of legal types cost 1.
return LT.first;
}
unsigned
VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const {
- std::pair<unsigned, EVT> LT =
- getTypeLegalizationCost(Tp->getContext(), TLI->getValueType(Tp));
- return LT.first;
+ return TLI->getNumRegisters(Tp->getContext(), TLI->getValueType(Tp));
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index e47baf8908..1773812da2 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -108,7 +108,7 @@ public:
createEmptyLoop(Legal);
/// Widen each instruction in the old loop to a new one in the new loop.
/// Use the Legality module to find the induction and reduction variables.
- vectorizeLoop(Legal);
+ vectorizeLoop(Legal);
// register the new loop.
cleanup();
}
@@ -254,6 +254,9 @@ public:
/// This check allows us to vectorize A[idx] into a wide load/store.
bool isConsecutiveGep(Value *Ptr);
+ /// Returns true if this instruction will remain scalar after vectorization.
+ bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);}
+
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -291,6 +294,9 @@ private:
/// Allowed outside users. This holds the reduction
/// vars which can be accessed from outside the loop.
SmallPtrSet<Value*, 4> AllowedExit;
+ /// This set holds the variables which are known to be uniform after
+ /// vectorization.
+ SmallPtrSet<Instruction*, 4> Uniforms;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1177,9 +1183,40 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
return false;
}
- // If the memory dependencies do not prevent us from
- // vectorizing, then vectorize.
- return canVectorizeMemory(BB);
+ // Don't vectorize if the memory dependencies do not allow vectorization.
+ if (!canVectorizeMemory(BB))
+ return false;
+
+ // We now know that the loop is vectorizable!
+ // Collect variables that will remain uniform after vectorization.
+ std::vector<Value*> Worklist;
+
+ // Start with the conditional branch and walk up the block.
+ Worklist.push_back(BB.getTerminator()->getOperand(0));
+
+ while (Worklist.size()) {
+ Instruction *I = dyn_cast<Instruction>(Worklist.back());
+ Worklist.pop_back();
+ // Look at instructions inside this block.
+ if (!I) continue;
+ if (I->getParent() != &BB) continue;
+
+ // Stop when reaching PHI nodes.
+ if (isa<PHINode>(I)) {
+ assert(I == Induction && "Found a uniform PHI that is not the induction");
+ break;
+ }
+
+ // This is a known uniform.
+ Uniforms.insert(I);
+
+ // Insert all operands.
+ for (int i=0, Op = I->getNumOperands(); i < Op; ++i) {
+ Worklist.push_back(I->getOperand(i));
+ }
+ }
+
+ return true;
}
bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
@@ -1484,9 +1521,15 @@ unsigned
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
assert(VTTI && "Invalid vector target transformation info");
+ // If we know that this instruction will remain uniform, check the cost of
+ // the scalar version.
+ if (Legal->isUniformAfterVectorization(I))
+ VF = 1;
+
Type *RetTy = I->getType();
Type *VectorTy = ToVectorTy(RetTy, VF);
+
// TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) {
case Instruction::GetElementPtr:
@@ -1495,7 +1538,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// generate vector geps.
return 0;
case Instruction::Br: {
- return VTTI->getInstrCost(I->getOpcode());
+ return VTTI->getCFInstrCost(I->getOpcode());
}
case Instruction::PHI:
return 0;
@@ -1517,7 +1560,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- return VTTI->getInstrCost(I->getOpcode(), VectorTy);
+ return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
@@ -1527,13 +1570,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
if (ScalarCond)
CondTy = VectorType::get(CondTy, VF);
- return VTTI->getInstrCost(I->getOpcode(), VectorTy, CondTy);
+ return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
}
case Instruction::ICmp:
case Instruction::FCmp: {
Type *ValTy = I->getOperand(0)->getType();
VectorTy = ToVectorTy(ValTy, VF);
- return VTTI->getInstrCost(I->getOpcode(), VectorTy);
+ return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy);
}
case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(I);
@@ -1602,7 +1645,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::FPTrunc:
case Instruction::BitCast: {
Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
- return VTTI->getInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
+ return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
default: {
// We are scalarizing the instruction. Return the cost of the scalar