summaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-10-25 21:03:48 +0000
committerNadav Rotem <nrotem@apple.com>2012-10-25 21:03:48 +0000
commit3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2 (patch)
tree52e3424c67ff8a48a6f501e150092d262b770a15 /lib/Transforms/Vectorize
parent6a020a71173a3ea7738a9df69982e85ddbfe0303 (diff)
downloadllvm-3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2.tar.gz
llvm-3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2.tar.bz2
llvm-3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2.tar.xz
LoopVectorize: Teach the cost model to query scalar costs as scalar types and not vectors of 1.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166715 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Vectorize')
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp102
1 files changed, 61 insertions, 41 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 423c7a4911..e47baf8908 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -324,6 +324,11 @@ private:
/// width. Vector width of one means scalar.
unsigned getInstructionCost(Instruction *I, unsigned VF);
+ /// A helper function for converting Scalar types to vector types.
+ /// If the incoming type is void, we return void. If the VF is 1, we return
+ /// the scalar type.
+ static Type* ToVectorTy(Type *Scalar, unsigned VF);
+
/// The loop that we evaluate.
Loop *TheLoop;
/// Scev analysis.
@@ -1478,8 +1483,16 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
unsigned
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
assert(VTTI && "Invalid vector target transformation info");
+
+ Type *RetTy = I->getType();
+ Type *VectorTy = ToVectorTy(RetTy, VF);
+
+ // TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) {
case Instruction::GetElementPtr:
+ // We mark this instruction as zero-cost because scalar GEPs are usually
+ // lowered to the intruction addressing mode. At the moment we don't
+ // generate vector geps.
return 0;
case Instruction::Br: {
return VTTI->getInstrCost(I->getOpcode());
@@ -1504,74 +1517,76 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- Type *VTy = VectorType::get(I->getType(), VF);
- return VTTI->getInstrCost(I->getOpcode(), VTy);
+ return VTTI->getInstrCost(I->getOpcode(), VectorTy);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
- Type *VTy = VectorType::get(I->getType(), VF);
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType();
if (ScalarCond)
CondTy = VectorType::get(CondTy, VF);
- return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy);
+ return VTTI->getInstrCost(I->getOpcode(), VectorTy, CondTy);
}
case Instruction::ICmp:
case Instruction::FCmp: {
- Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF);
- return VTTI->getInstrCost(I->getOpcode(), VTy);
+ Type *ValTy = I->getOperand(0)->getType();
+ VectorTy = ToVectorTy(ValTy, VF);
+ return VTTI->getInstrCost(I->getOpcode(), VectorTy);
}
case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(I);
- Type *VTy = VectorType::get(SI->getValueOperand()->getType(), VF);
+ Type *ValTy = SI->getValueOperand()->getType();
+ VectorTy = ToVectorTy(ValTy, VF);
+
+ if (VF == 1)
+ return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
+ SI->getAlignment(), SI->getPointerAddressSpace());
// Scalarized stores.
if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
unsigned Cost = 0;
- if (VF != 1) {
- unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
- VTy);
- // The cost of extracting from the value vector and pointer vector.
- Cost += VF * (ExtCost * 2);
- }
+ unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+ ValTy);
+ // The cost of extracting from the value vector.
+ Cost += VF * (ExtCost);
// The cost of the scalar stores.
Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
- VTy->getScalarType(),
+ ValTy->getScalarType(),
SI->getAlignment(),
SI->getPointerAddressSpace());
return Cost;
}
// Wide stores.
- return VTTI->getMemoryOpCost(I->getOpcode(), VTy, SI->getAlignment(),
+ return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(),
SI->getPointerAddressSpace());
}
case Instruction::Load: {
LoadInst *LI = cast<LoadInst>(I);
- Type *VTy = VectorType::get(I->getType(), VF);
+
+ if (VF == 1)
+ return VTTI->getMemoryOpCost(I->getOpcode(), RetTy,
+ LI->getAlignment(),
+ LI->getPointerAddressSpace());
// Scalarized loads.
if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
unsigned Cost = 0;
- if (VF != 1) {
- unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
- unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy);
-
- // The cost of inserting the loaded value into the result vector, and
- // extracting from a vector of pointers.
- Cost += VF * (InCost + ExCost);
- }
+ unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
+ // The cost of inserting the loaded value into the result vector.
+ Cost += VF * (InCost);
// The cost of the scalar stores.
- Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(),
+ Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
+ RetTy->getScalarType(),
LI->getAlignment(),
LI->getPointerAddressSpace());
return Cost;
}
// Wide loads.
- return VTTI->getMemoryOpCost(I->getOpcode(), VTy, LI->getAlignment(),
+ return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
LI->getPointerAddressSpace());
}
case Instruction::ZExt:
@@ -1586,35 +1601,40 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- Type *SrcTy = VectorType::get(I->getOperand(0)->getType(), VF);
- Type *DstTy = VectorType::get(I->getType(), VF);
- return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy);
+ Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+ return VTTI->getInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
default: {
// We are scalarizing the instruction. Return the cost of the scalar
// instruction, plus the cost of insert and extract into vector
// elements, times the vector width.
unsigned Cost = 0;
- Type *Ty = I->getType();
- if (!Ty->isVoidTy()) {
- Type *VTy = VectorType::get(Ty, VF);
- unsigned InsCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
- unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy);
- Cost += VF * (InsCost + ExtCost);
- }
+ bool IsVoid = RetTy->isVoidTy();
- /// We don't have any information on the scalar instruction, but maybe
- /// the target has.
- /// TODO: This may be a target-specific intrinsic.
- /// Need to add API for that.
- Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty);
+ unsigned InsCost = (IsVoid ? 0 :
+ VTTI->getInstrCost(Instruction::InsertElement,
+ VectorTy));
+ unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+ VectorTy);
+
+ // The cost of inserting the results plus extracting each one of the
+ // operands.
+ Cost += VF * (InsCost + ExtCost * I->getNumOperands());
+
+ // The cost of executing VF copies of the scalar instruction.
+ Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy);
return Cost;
}
}// end of switch.
}
+Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
+ if (Scalar->isVoidTy() || VF == 1)
+ return Scalar;
+ return VectorType::get(Scalar, VF);
+}
} // namespace