summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-10-24 23:47:38 +0000
committerNadav Rotem <nrotem@apple.com>2012-10-24 23:47:38 +0000
commit2652c50f74bc4a874c6a2e4b34ff2d52d479183f (patch)
tree82c8466e0c684e466cb5dec7360704269ed0f9b6
parentd95666c226e41218a07541aaa2cc1fba823c25e4 (diff)
downloadllvm-2652c50f74bc4a874c6a2e4b34ff2d52d479183f.tar.gz
llvm-2652c50f74bc4a874c6a2e4b34ff2d52d479183f.tar.bz2
llvm-2652c50f74bc4a874c6a2e4b34ff2d52d479183f.tar.xz
Implement a basic cost model for vector and scalar instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166642 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Target/TargetLowering.h2
-rw-r--r--include/llvm/Target/TargetTransformImpl.h5
-rw-r--r--lib/Target/TargetTransformImpl.cpp129
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp2
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp49
-rw-r--r--test/Transforms/LoopVectorize/cost-model.ll4
6 files changed, 168 insertions, 23 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 0dbdd25140..19eb941635 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1958,6 +1958,7 @@ private:
ValueTypeActionImpl ValueTypeActions;
+public:
LegalizeKind
getTypeConversion(LLVMContext &Context, EVT VT) const {
// If this is a simple type, use the ComputeRegisterProp mechanism.
@@ -2074,6 +2075,7 @@ private:
return LegalizeKind(TypeSplitVector, NVT);
}
+private:
std::vector<std::pair<EVT, const TargetRegisterClass*> > AvailableRegClasses;
/// TargetDAGCombineArray - Targets can specify ISD nodes that they would
diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h
index ec39f9968e..25a7edeb01 100644
--- a/include/llvm/Target/TargetTransformImpl.h
+++ b/include/llvm/Target/TargetTransformImpl.h
@@ -16,6 +16,7 @@
#define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
#include "llvm/TargetTransformInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
namespace llvm {
@@ -51,6 +52,10 @@ class VectorTargetTransformImpl : public VectorTargetTransformInfo {
private:
const TargetLowering *TLI;
+ /// Estimate the cost of type-legalization and the legalized type.
+ std::pair<unsigned, EVT>
+ getTypeLegalizationCost(LLVMContext &C, EVT Ty) const;
+
public:
explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {}
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
index cee736bd71..382eecb766 100644
--- a/lib/Target/TargetTransformImpl.cpp
+++ b/lib/Target/TargetTransformImpl.cpp
@@ -9,6 +9,7 @@
#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/Target/TargetLowering.h"
+#include <utility>
using namespace llvm;
@@ -53,11 +54,131 @@ unsigned ScalarTargetTransformImpl::getJumpBufSize() const {
// Calls used by the vectorizers.
//
//===----------------------------------------------------------------------===//
+int InstructionOpcodeToISD(unsigned Opcode) {
+ static const int OpToISDTbl[] = {
+ /*Instruction::Ret */ 0, // Opcode numbering start at #1.
+ /*Instruction::Br */ 0,
+ /*Instruction::Switch */ 0,
+ /*Instruction::IndirectBr */ 0,
+ /*Instruction::Invoke */ 0,
+ /*Instruction::Resume */ 0,
+ /*Instruction::Unreachable */ 0,
+ /*Instruction::Add */ ISD::ADD,
+ /*Instruction::FAdd */ ISD::FADD,
+ /*Instruction::Sub */ ISD::SUB,
+ /*Instruction::FSub */ ISD::FSUB,
+ /*Instruction::Mul */ ISD::MUL,
+ /*Instruction::FMul */ ISD::FMUL,
+ /*Instruction::UDiv */ ISD::UDIV,
+ /*Instruction::SDiv */ ISD::UDIV,
+ /*Instruction::FDiv */ ISD::FDIV,
+ /*Instruction::URem */ ISD::UREM,
+ /*Instruction::SRem */ ISD::SREM,
+ /*Instruction::FRem */ ISD::FREM,
+ /*Instruction::Shl */ ISD::SHL,
+ /*Instruction::LShr */ ISD::SRL,
+ /*Instruction::AShr */ ISD::SRA,
+ /*Instruction::And */ ISD::AND,
+ /*Instruction::Or */ ISD::OR,
+ /*Instruction::Xor */ ISD::XOR,
+ /*Instruction::Alloca */ 0,
+ /*Instruction::Load */ ISD::LOAD,
+ /*Instruction::Store */ ISD::STORE,
+ /*Instruction::GetElementPtr */ 0,
+ /*Instruction::Fence */ 0,
+ /*Instruction::AtomicCmpXchg */ 0,
+ /*Instruction::AtomicRMW */ 0,
+ /*Instruction::Trunc */ ISD::TRUNCATE,
+ /*Instruction::ZExt */ ISD::ZERO_EXTEND,
+ /*Instruction::SExt */ ISD::SEXTLOAD,
+ /*Instruction::FPToUI */ ISD::FP_TO_UINT,
+ /*Instruction::FPToSI */ ISD::FP_TO_SINT,
+ /*Instruction::UIToFP */ ISD::UINT_TO_FP,
+ /*Instruction::SIToFP */ ISD::SINT_TO_FP,
+ /*Instruction::FPTrunc */ ISD::FP_ROUND,
+ /*Instruction::FPExt */ ISD::FP_EXTEND,
+ /*Instruction::PtrToInt */ ISD::BITCAST,
+ /*Instruction::IntToPtr */ ISD::BITCAST,
+ /*Instruction::BitCast */ ISD::BITCAST,
+ /*Instruction::ICmp */ ISD::SETCC,
+ /*Instruction::FCmp */ ISD::SETCC,
+ /*Instruction::PHI */ 0,
+ /*Instruction::Call */ 0,
+ /*Instruction::Select */ ISD::SELECT,
+ /*Instruction::UserOp1 */ 0,
+ /*Instruction::UserOp2 */ 0,
+ /*Instruction::VAArg */ 0,
+ /*Instruction::ExtractElement*/ ISD::EXTRACT_VECTOR_ELT,
+ /*Instruction::InsertElement */ ISD::INSERT_VECTOR_ELT,
+ /*Instruction::ShuffleVector */ ISD::VECTOR_SHUFFLE,
+ /*Instruction::ExtractValue */ ISD::MERGE_VALUES,
+ /*Instruction::InsertValue */ ISD::MERGE_VALUES,
+ /*Instruction::LandingPad */ 0};
+
+ assert((Instruction::Ret == 1) && (Instruction::LandingPad == 58) &&
+ "Instruction order had changed");
+
+ // Opcode numbering starts at #1 but the table starts at #0, so we subtract
+ // one from the opcode number.
+ return OpToISDTbl[Opcode - 1];
+}
+
+std::pair<unsigned, EVT>
+VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
+ EVT Ty) const {
+ unsigned Cost = 1;
+ // We keep legalizing the type until we find a legal kind. We assume that
+ // the only operation that costs anything is the split. After splitting
+ // we need to handle two types.
+ while (true) {
+ TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty);
+
+ if (LK.first == TargetLowering::TypeLegal)
+ return std::make_pair(Cost, LK.second);
+
+ if (LK.first == TargetLowering::TypeSplitVector)
+ Cost *= 2;
+
+ // Keep legalizing the type.
+ Ty = LK.second;
+ }
+}
unsigned
VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
Type *Ty2) const {
- return 1;
+ // Check if any of the operands are vector operands.
+ int ISD = InstructionOpcodeToISD(Opcode);
+
+ // Selects on vectors are actually vector selects.
+ if (ISD == ISD::SELECT) {
+ assert(Ty2 && "Ty2 must hold the select type");
+ if (Ty2->isVectorTy())
+ ISD = ISD::VSELECT;
+ }
+
+ // If we don't have any information about this instruction assume it costs 1.
+ if (ISD == 0)
+ return 1;
+
+ assert(Ty1 && "We need to have at least one type");
+
+ // From this stage we look at the legalized type.
+ std::pair<unsigned, EVT> LT =
+ getTypeLegalizationCost(Ty1->getContext(), TLI->getValueType(Ty1));
+
+ if (TLI->isOperationLegalOrCustom(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1. Multiply
+ // by the type-legalization overhead.
+ return LT.first * 1;
+ }
+
+ unsigned NumElem =
+ (LT.second.isVector() ? LT.second.getVectorNumElements() : 1);
+
+ // We will probably scalarize this instruction. Assume that the cost is the
+ // number of the vector elements.
+ return LT.first * NumElem * 1;
}
unsigned
@@ -69,5 +190,9 @@ unsigned
VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) const {
- return 1;
+ // From this stage we look at the legalized type.
+ std::pair<unsigned, EVT> LT =
+ getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
+ // Assume that all loads of legal types cost 1.
+ return LT.first;
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 1d8f1e531a..1b196eab84 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -185,7 +185,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
- if (Vectorize) {
+ if (Vectorize || true) {
MPM.add(createLoopVectorizePass());
MPM.add(createLICMPass());
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 35f49e4f30..483b9fcbc3 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -300,10 +300,10 @@ private:
class LoopVectorizationCostModel {
public:
/// C'tor.
- LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl,
+ LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
LoopVectorizationLegality *Leg,
const VectorTargetTransformInfo *Vtti):
- TheLoop(Lp), SE(Se), DL(Dl), Legal(Leg), VTTI(Vtti) { }
+ TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
/// Returns the most profitable vectorization factor for the loop that is
/// smaller or equal to the VF argument. This method checks every power
@@ -325,8 +325,7 @@ private:
Loop *TheLoop;
/// Scev analysis.
ScalarEvolution *SE;
- /// DataLayout analysis.
- DataLayout *DL;
+
/// Vectorization legality.
LoopVectorizationLegality *Legal;
/// Vector target information.
@@ -372,7 +371,7 @@ struct LoopVectorize : public LoopPass {
if (TTI)
VTTI = TTI->getVectorTargetTransformInfo();
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, DL, &LVL, VTTI);
+ LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
VF = CM.findBestVectorizationFactor();
if (VF == 1) {
@@ -1432,11 +1431,12 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
Instruction *Inst = it;
- Cost += getInstructionCost(Inst, VF);
+ unsigned C = getInstructionCost(Inst, VF);
+ Cost += C;
+ DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF <<
+ " For instruction: "<< *Inst << "\n");
}
- // Return the cost divided by VF, because we will be executing
- // less iterations of the vector form.
return Cost;
}
@@ -1444,11 +1444,13 @@ unsigned
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
assert(VTTI && "Invalid vector target transformation info");
switch (I->getOpcode()) {
+ case Instruction::GetElementPtr:
+ return 0;
case Instruction::Br: {
return VTTI->getInstrCost(I->getOpcode());
}
case Instruction::PHI:
- // PHIs are handled the same as the binary instructions below.
+ return 0;
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -1493,11 +1495,17 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// Scalarized stores.
if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
unsigned Cost = 0;
- unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy);
- // The cost of extracting from the vector value.
- Cost += VF * ExtCost;
+ if (VF != 1) {
+ unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+ VTy);
+ // The cost of extracting from the value vector and pointer vector.
+ Cost += VF * (ExtCost * 2);
+ }
// The cost of the scalar stores.
- Cost += VF * VTTI->getInstrCost(I->getOpcode(), VTy->getScalarType());
+ Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
+ VTy->getScalarType(),
+ SI->getAlignment(),
+ SI->getPointerAddressSpace());
return Cost;
}
@@ -1512,11 +1520,18 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// Scalarized loads.
if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
unsigned Cost = 0;
- unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
- // The cost of inserting the loaded value into the result vector.
- Cost += VF * InCost;
+ if (VF != 1) {
+ unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
+ unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy);
+
+ // The cost of inserting the loaded value into the result vector, and
+ // extracting from a vector of pointers.
+ Cost += VF * (InCost + ExCost);
+ }
// The cost of the scalar stores.
- Cost += VF * VTTI->getInstrCost(I->getOpcode(), VTy->getScalarType());
+ Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(),
+ LI->getAlignment(),
+ LI->getPointerAddressSpace());
return Cost;
}
diff --git a/test/Transforms/LoopVectorize/cost-model.ll b/test/Transforms/LoopVectorize/cost-model.ll
index 20c710db46..18abf2885e 100644
--- a/test/Transforms/LoopVectorize/cost-model.ll
+++ b/test/Transforms/LoopVectorize/cost-model.ll
@@ -8,10 +8,8 @@ target triple = "x86_64-apple-macosx10.8.0"
@d = common global [2048 x i32] zeroinitializer, align 16
@a = common global [2048 x i32] zeroinitializer, align 16
-; At this point the cost model is pretty bad and we are vectorizing the code below.
-; TODO: This code should not be vectorized on x86.
;CHECK: cost_model_1
-;CHECK: <4 x i32>
+;CHECK-NOT: <4 x i32>
;CHECK: ret void
define void @cost_model_1() nounwind uwtable noinline ssp {
entry: