From 2652c50f74bc4a874c6a2e4b34ff2d52d479183f Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 24 Oct 2012 23:47:38 +0000 Subject: Implement a basic cost model for vector and scalar instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166642 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 2 + include/llvm/Target/TargetTransformImpl.h | 5 ++ lib/Target/TargetTransformImpl.cpp | 129 +++++++++++++++++++++++++++- lib/Transforms/IPO/PassManagerBuilder.cpp | 2 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 49 +++++++---- test/Transforms/LoopVectorize/cost-model.ll | 4 +- 6 files changed, 168 insertions(+), 23 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 0dbdd25140..19eb941635 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1958,6 +1958,7 @@ private: ValueTypeActionImpl ValueTypeActions; +public: LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const { // If this is a simple type, use the ComputeRegisterProp mechanism. @@ -2074,6 +2075,7 @@ private: return LegalizeKind(TypeSplitVector, NVT); } +private: std::vector > AvailableRegClasses; /// TargetDAGCombineArray - Targets can specify ISD nodes that they would diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h index ec39f9968e..25a7edeb01 100644 --- a/include/llvm/Target/TargetTransformImpl.h +++ b/include/llvm/Target/TargetTransformImpl.h @@ -16,6 +16,7 @@ #define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H #include "llvm/TargetTransformInfo.h" +#include "llvm/CodeGen/ValueTypes.h" namespace llvm { @@ -51,6 +52,10 @@ class VectorTargetTransformImpl : public VectorTargetTransformInfo { private: const TargetLowering *TLI; + /// Estimate the cost of type-legalization and the legalized type. + std::pair + getTypeLegalizationCost(LLVMContext &C, EVT Ty) const; + public: explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {} diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp index cee736bd71..382eecb766 100644 --- a/lib/Target/TargetTransformImpl.cpp +++ b/lib/Target/TargetTransformImpl.cpp @@ -9,6 +9,7 @@ #include "llvm/Target/TargetTransformImpl.h" #include "llvm/Target/TargetLowering.h" +#include using namespace llvm; @@ -53,11 +54,131 @@ unsigned ScalarTargetTransformImpl::getJumpBufSize() const { // Calls used by the vectorizers. // //===----------------------------------------------------------------------===// +int InstructionOpcodeToISD(unsigned Opcode) { + static const int OpToISDTbl[] = { + /*Instruction::Ret */ 0, // Opcode numbering start at #1. + /*Instruction::Br */ 0, + /*Instruction::Switch */ 0, + /*Instruction::IndirectBr */ 0, + /*Instruction::Invoke */ 0, + /*Instruction::Resume */ 0, + /*Instruction::Unreachable */ 0, + /*Instruction::Add */ ISD::ADD, + /*Instruction::FAdd */ ISD::FADD, + /*Instruction::Sub */ ISD::SUB, + /*Instruction::FSub */ ISD::FSUB, + /*Instruction::Mul */ ISD::MUL, + /*Instruction::FMul */ ISD::FMUL, + /*Instruction::UDiv */ ISD::UDIV, + /*Instruction::SDiv */ ISD::UDIV, + /*Instruction::FDiv */ ISD::FDIV, + /*Instruction::URem */ ISD::UREM, + /*Instruction::SRem */ ISD::SREM, + /*Instruction::FRem */ ISD::FREM, + /*Instruction::Shl */ ISD::SHL, + /*Instruction::LShr */ ISD::SRL, + /*Instruction::AShr */ ISD::SRA, + /*Instruction::And */ ISD::AND, + /*Instruction::Or */ ISD::OR, + /*Instruction::Xor */ ISD::XOR, + /*Instruction::Alloca */ 0, + /*Instruction::Load */ ISD::LOAD, + /*Instruction::Store */ ISD::STORE, + /*Instruction::GetElementPtr */ 0, + /*Instruction::Fence */ 0, + /*Instruction::AtomicCmpXchg */ 0, + /*Instruction::AtomicRMW */ 0, + /*Instruction::Trunc */ ISD::TRUNCATE, + /*Instruction::ZExt */ ISD::ZERO_EXTEND, + /*Instruction::SExt */ ISD::SEXTLOAD, + /*Instruction::FPToUI */ ISD::FP_TO_UINT, + /*Instruction::FPToSI */ ISD::FP_TO_SINT, + /*Instruction::UIToFP */ ISD::UINT_TO_FP, + /*Instruction::SIToFP */ ISD::SINT_TO_FP, + /*Instruction::FPTrunc */ ISD::FP_ROUND, + /*Instruction::FPExt */ ISD::FP_EXTEND, + /*Instruction::PtrToInt */ ISD::BITCAST, + /*Instruction::IntToPtr */ ISD::BITCAST, + /*Instruction::BitCast */ ISD::BITCAST, + /*Instruction::ICmp */ ISD::SETCC, + /*Instruction::FCmp */ ISD::SETCC, + /*Instruction::PHI */ 0, + /*Instruction::Call */ 0, + /*Instruction::Select */ ISD::SELECT, + /*Instruction::UserOp1 */ 0, + /*Instruction::UserOp2 */ 0, + /*Instruction::VAArg */ 0, + /*Instruction::ExtractElement*/ ISD::EXTRACT_VECTOR_ELT, + /*Instruction::InsertElement */ ISD::INSERT_VECTOR_ELT, + /*Instruction::ShuffleVector */ ISD::VECTOR_SHUFFLE, + /*Instruction::ExtractValue */ ISD::MERGE_VALUES, + /*Instruction::InsertValue */ ISD::MERGE_VALUES, + /*Instruction::LandingPad */ 0}; + + assert((Instruction::Ret == 1) && (Instruction::LandingPad == 58) && + "Instruction order had changed"); + + // Opcode numbering starts at #1 but the table starts at #0, so we subtract + // one from the opcode number. + return OpToISDTbl[Opcode - 1]; +} + +std::pair +VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, + EVT Ty) const { + unsigned Cost = 1; + // We keep legalizing the type until we find a legal kind. We assume that + // the only operation that costs anything is the split. After splitting + // we need to handle two types. + while (true) { + TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty); + + if (LK.first == TargetLowering::TypeLegal) + return std::make_pair(Cost, LK.second); + + if (LK.first == TargetLowering::TypeSplitVector) + Cost *= 2; + + // Keep legalizing the type. + Ty = LK.second; + } +} unsigned VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1, Type *Ty2) const { - return 1; + // Check if any of the operands are vector operands. + int ISD = InstructionOpcodeToISD(Opcode); + + // Selects on vectors are actually vector selects. + if (ISD == ISD::SELECT) { + assert(Ty2 && "Ty2 must hold the select type"); + if (Ty2->isVectorTy()) + ISD = ISD::VSELECT; + } + + // If we don't have any information about this instruction assume it costs 1. + if (ISD == 0) + return 1; + + assert(Ty1 && "We need to have at least one type"); + + // From this stage we look at the legalized type. + std::pair LT = + getTypeLegalizationCost(Ty1->getContext(), TLI->getValueType(Ty1)); + + if (TLI->isOperationLegalOrCustom(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return LT.first * 1; + } + + unsigned NumElem = + (LT.second.isVector() ? LT.second.getVectorNumElements() : 1); + + // We will probably scalarize this instruction. Assume that the cost is the + // number of the vector elements. + return LT.first * NumElem * 1; } unsigned @@ -69,5 +190,9 @@ unsigned VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { - return 1; + // From this stage we look at the legalized type. + std::pair LT = + getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src)); + // Assume that all loads of legal types cost 1. + return LT.first; } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 1d8f1e531a..1b196eab84 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -185,7 +185,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops - if (Vectorize) { + if (Vectorize || true) { MPM.add(createLoopVectorizePass()); MPM.add(createLICMPass()); } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 35f49e4f30..483b9fcbc3 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -300,10 +300,10 @@ private: class LoopVectorizationCostModel { public: /// C'tor. - LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl, + LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se, LoopVectorizationLegality *Leg, const VectorTargetTransformInfo *Vtti): - TheLoop(Lp), SE(Se), DL(Dl), Legal(Leg), VTTI(Vtti) { } + TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { } /// Returns the most profitable vectorization factor for the loop that is /// smaller or equal to the VF argument. This method checks every power @@ -325,8 +325,7 @@ private: Loop *TheLoop; /// Scev analysis. ScalarEvolution *SE; - /// DataLayout analysis. - DataLayout *DL; + /// Vectorization legality. LoopVectorizationLegality *Legal; /// Vector target information. @@ -372,7 +371,7 @@ struct LoopVectorize : public LoopPass { if (TTI) VTTI = TTI->getVectorTargetTransformInfo(); // Use the cost model. - LoopVectorizationCostModel CM(L, SE, DL, &LVL, VTTI); + LoopVectorizationCostModel CM(L, SE, &LVL, VTTI); VF = CM.findBestVectorizationFactor(); if (VF == 1) { @@ -1432,11 +1431,12 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { // For each instruction in the old loop. for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { Instruction *Inst = it; - Cost += getInstructionCost(Inst, VF); + unsigned C = getInstructionCost(Inst, VF); + Cost += C; + DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF << + " For instruction: "<< *Inst << "\n"); } - // Return the cost divided by VF, because we will be executing - // less iterations of the vector form. return Cost; } @@ -1444,11 +1444,13 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { assert(VTTI && "Invalid vector target transformation info"); switch (I->getOpcode()) { + case Instruction::GetElementPtr: + return 0; case Instruction::Br: { return VTTI->getInstrCost(I->getOpcode()); } case Instruction::PHI: - // PHIs are handled the same as the binary instructions below. + return 0; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -1493,11 +1495,17 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { // Scalarized stores. if (!Legal->isConsecutiveGep(SI->getPointerOperand())) { unsigned Cost = 0; - unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy); - // The cost of extracting from the vector value. - Cost += VF * ExtCost; + if (VF != 1) { + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, + VTy); + // The cost of extracting from the value vector and pointer vector. + Cost += VF * (ExtCost * 2); + } // The cost of the scalar stores. - Cost += VF * VTTI->getInstrCost(I->getOpcode(), VTy->getScalarType()); + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), + VTy->getScalarType(), + SI->getAlignment(), + SI->getPointerAddressSpace()); return Cost; } @@ -1512,11 +1520,18 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { // Scalarized loads. if (!Legal->isConsecutiveGep(LI->getPointerOperand())) { unsigned Cost = 0; - unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy); - // The cost of inserting the loaded value into the result vector. - Cost += VF * InCost; + if (VF != 1) { + unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy); + unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy); + + // The cost of inserting the loaded value into the result vector, and + // extracting from a vector of pointers. + Cost += VF * (InCost + ExCost); + } // The cost of the scalar stores. - Cost += VF * VTTI->getInstrCost(I->getOpcode(), VTy->getScalarType()); + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(), + LI->getAlignment(), + LI->getPointerAddressSpace()); return Cost; } diff --git a/test/Transforms/LoopVectorize/cost-model.ll b/test/Transforms/LoopVectorize/cost-model.ll index 20c710db46..18abf2885e 100644 --- a/test/Transforms/LoopVectorize/cost-model.ll +++ b/test/Transforms/LoopVectorize/cost-model.ll @@ -8,10 +8,8 @@ target triple = "x86_64-apple-macosx10.8.0" @d = common global [2048 x i32] zeroinitializer, align 16 @a = common global [2048 x i32] zeroinitializer, align 16 -; At this point the cost model is pretty bad and we are vectorizing the code below. -; TODO: This code should not be vectorized on x86. ;CHECK: cost_model_1 -;CHECK: <4 x i32> +;CHECK-NOT: <4 x i32> ;CHECK: ret void define void @cost_model_1() nounwind uwtable noinline ssp { entry: -- cgit v1.2.3