diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/TargetTransformImpl.cpp | 152 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 61 |
2 files changed, 177 insertions, 36 deletions
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp index 40184ed78d..d3ab105988 100644 --- a/lib/Target/TargetTransformImpl.cpp +++ b/lib/Target/TargetTransformImpl.cpp @@ -126,7 +126,7 @@ static int InstructionOpcodeToISD(unsigned Opcode) { std::pair<unsigned, EVT> VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, - EVT Ty) const { + EVT Ty) const { unsigned Cost = 1; // We keep legalizing the type until we find a legal kind. We assume that // the only operation that costs anything is the split. After splitting @@ -135,7 +135,7 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty); if (LK.first == TargetLowering::TypeLegal) - return std::make_pair(Cost, LK.second); + return std::make_pair(Cost, Ty); if (LK.first == TargetLowering::TypeSplitVector) Cost *= 2; @@ -146,44 +146,144 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, } unsigned -VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1, - Type *Ty2) const { +VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty, + bool Insert, + bool Extract) const { + assert (Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; +} + +unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode, + Type *Ty) const { // Check if any of the operands are vector operands. int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + std::pair<unsigned, EVT> LT = + getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty)); + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return LT.first * 1; + } + + // Else, assume that we need to scalarize this op. + if (Ty->isVectorTy()) { + unsigned Num = Ty->getVectorNumElements(); + unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(Ty, true, true) + Num * Cost; + } + + // We don't know anything about this scalar instruction. + return 1; +} + +unsigned VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const { + return 1; +} + +unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + assert(Src->isVectorTy() == Dst->isVectorTy() && "Invalid input types"); + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); - // If we don't have any information about this instruction assume it costs 1. - if (ISD == 0) - return 1; + std::pair<unsigned, EVT> SrcLT = + getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src)); + std::pair<unsigned, EVT> DstLT = + getTypeLegalizationCost(Dst->getContext(), TLI->getValueType(Dst)); + + // If the cast is between same-sized registers, then the check is simple. + if (SrcLT.first == DstLT.first && + SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { + // Just check the op cost: + if (!TLI->isOperationExpand(ISD, DstLT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return SrcLT.first * 1; + } + } + + // Otherwise, assume that the cast is scalarized. + if (Dst->isVectorTy()) { + unsigned Num = Dst->getVectorNumElements(); + unsigned Cost = getCastInstrCost(Opcode, Src->getScalarType(), + Dst->getScalarType()); + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(Dst, true, true) + Num * Cost; + } + + // Unknown scalar opcode. + return 1; +} + +unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const { + return 1; +} + +unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode, + Type *ValTy, + Type *CondTy) const { + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + // Selects on vectors are actually vector selects. if (ISD == ISD::SELECT) { - assert(Ty2 && "Ty2 must hold the condition type"); - if (Ty2->isVectorTy()) - ISD = ISD::VSELECT; + assert(CondTy && "CondTy must exist"); + if (CondTy->isVectorTy()) + ISD = ISD::VSELECT; } - assert(Ty1 && "We need to have at least one type"); + std::pair<unsigned, EVT> LT = + getTypeLegalizationCost(ValTy->getContext(), TLI->getValueType(ValTy)); - // From this stage we look at the legalized type. - std::pair<unsigned, EVT> LT = - getTypeLegalizationCost(Ty1->getContext(), TLI->getValueType(Ty1)); - - if (TLI->isOperationLegalOrCustom(ISD, LT.second)) { + if (!TLI->isOperationExpand(ISD, LT.second)) { // The operation is legal. Assume it costs 1. Multiply // by the type-legalization overhead. return LT.first * 1; } - unsigned NumElem = - (LT.second.isVector() ? LT.second.getVectorNumElements() : 1); + // Otherwise, assume that the cast is scalarized. + if (ValTy->isVectorTy()) { + unsigned Num = ValTy->getVectorNumElements(); + if (CondTy) + CondTy = CondTy->getScalarType(); + unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(), + CondTy); + + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(ValTy, true, false) + Num * Cost; + } - // We will probably scalarize this instruction. Assume that the cost is the - // number of the vector elements. - return LT.first * NumElem * 1; + // Unknown scalar opcode. + return 1; +} + +/// Returns the expected cost of Vector Insert and Extract. +unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode, + Type *Val, + unsigned Index) const { + return 1; } unsigned -VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const { +VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1, + Type *Ty2) const { return 1; } @@ -191,17 +291,15 @@ unsigned VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { - // From this stage we look at the legalized type. - std::pair<unsigned, EVT> LT = + std::pair<unsigned, EVT> LT = getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src)); + // Assume that all loads of legal types cost 1. return LT.first; } unsigned VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const { - std::pair<unsigned, EVT> LT = - getTypeLegalizationCost(Tp->getContext(), TLI->getValueType(Tp)); - return LT.first; + return TLI->getNumRegisters(Tp->getContext(), TLI->getValueType(Tp)); } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index e47baf8908..1773812da2 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -108,7 +108,7 @@ public: createEmptyLoop(Legal); /// Widen each instruction in the old loop to a new one in the new loop. /// Use the Legality module to find the induction and reduction variables. - vectorizeLoop(Legal); + vectorizeLoop(Legal); // register the new loop. cleanup(); } @@ -254,6 +254,9 @@ public: /// This check allows us to vectorize A[idx] into a wide load/store. bool isConsecutiveGep(Value *Ptr); + /// Returns true if this instruction will remain scalar after vectorization. + bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);} + private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -291,6 +294,9 @@ private: /// Allowed outside users. This holds the reduction /// vars which can be accessed from outside the loop. SmallPtrSet<Value*, 4> AllowedExit; + /// This set holds the variables which are known to be uniform after + /// vectorization. + SmallPtrSet<Instruction*, 4> Uniforms; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -1177,9 +1183,40 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { return false; } - // If the memory dependencies do not prevent us from - // vectorizing, then vectorize. - return canVectorizeMemory(BB); + // Don't vectorize if the memory dependencies do not allow vectorization. + if (!canVectorizeMemory(BB)) + return false; + + // We now know that the loop is vectorizable! + // Collect variables that will remain uniform after vectorization. + std::vector<Value*> Worklist; + + // Start with the conditional branch and walk up the block. + Worklist.push_back(BB.getTerminator()->getOperand(0)); + + while (Worklist.size()) { + Instruction *I = dyn_cast<Instruction>(Worklist.back()); + Worklist.pop_back(); + // Look at instructions inside this block. + if (!I) continue; + if (I->getParent() != &BB) continue; + + // Stop when reaching PHI nodes. + if (isa<PHINode>(I)) { + assert(I == Induction && "Found a uniform PHI that is not the induction"); + break; + } + + // This is a known uniform. + Uniforms.insert(I); + + // Insert all operands. + for (int i=0, Op = I->getNumOperands(); i < Op; ++i) { + Worklist.push_back(I->getOperand(i)); + } + } + + return true; } bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { @@ -1484,9 +1521,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { assert(VTTI && "Invalid vector target transformation info"); + // If we know that this instruction will remain uniform, check the cost of + // the scalar version. + if (Legal->isUniformAfterVectorization(I)) + VF = 1; + Type *RetTy = I->getType(); Type *VectorTy = ToVectorTy(RetTy, VF); + // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { case Instruction::GetElementPtr: @@ -1495,7 +1538,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { // generate vector geps. return 0; case Instruction::Br: { - return VTTI->getInstrCost(I->getOpcode()); + return VTTI->getCFInstrCost(I->getOpcode()); } case Instruction::PHI: return 0; @@ -1517,7 +1560,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - return VTTI->getInstrCost(I->getOpcode(), VectorTy); + return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy); } case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); @@ -1527,13 +1570,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { if (ScalarCond) CondTy = VectorType::get(CondTy, VF); - return VTTI->getInstrCost(I->getOpcode(), VectorTy, CondTy); + return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); VectorTy = ToVectorTy(ValTy, VF); - return VTTI->getInstrCost(I->getOpcode(), VectorTy); + return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy); } case Instruction::Store: { StoreInst *SI = cast<StoreInst>(I); @@ -1602,7 +1645,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF); - return VTTI->getInstrCost(I->getOpcode(), VectorTy, SrcVecTy); + return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); } default: { // We are scalarizing the instruction. Return the cost of the scalar |