Refactor the VectorTargetTransformInfo interface.

Add getCostXXX calls for different families of opcodes, such as casts, arithmetic, cmp, etc. Port the LoopVectorizer to the new API. The LoopVectorizer now finds instructions which will remain uniform after vectorization. It uses this information when calculating the cost of these instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166836 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nadav Rotem <nrotem@apple.com> 2012-10-26 23:49:28 +0000
committer: Nadav Rotem <nrotem@apple.com> 2012-10-26 23:49:28 +0000
commit: a5a3a61c5fdcee972791d4e08441ba6edf131b88 (patch)
tree: 2f84237b706b2a0f4fda6d4e37672863ebc3e4e2 /lib
parent: 17f42e02a10bd4d43e4ba904c640224de2c48f51 (diff)
download: llvm-a5a3a61c5fdcee972791d4e08441ba6edf131b88.tar.gz
llvm-a5a3a61c5fdcee972791d4e08441ba6edf131b88.tar.bz2
llvm-a5a3a61c5fdcee972791d4e08441ba6edf131b88.tar.xz
2 files changed, 177 insertions, 36 deletions
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
index 40184ed78d..d3ab105988 100644
--- a/lib/Target/TargetTransformImpl.cpp
+++ b/lib/Target/TargetTransformImpl.cpp
@@ -126,7 +126,7 @@ static int InstructionOpcodeToISD(unsigned Opcode) {
 
 std::pair<unsigned, EVT>
 VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
-                                                         EVT Ty) const {
+                                                   EVT Ty) const {
   unsigned Cost = 1;
   // We keep legalizing the type until we find a legal kind. We assume that
   // the only operation that costs anything is the split. After splitting
@@ -135,7 +135,7 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
     TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty);
 
     if (LK.first == TargetLowering::TypeLegal)
-      return std::make_pair(Cost, LK.second);
+      return std::make_pair(Cost, Ty);
 
     if (LK.first == TargetLowering::TypeSplitVector)
       Cost *= 2;
@@ -146,44 +146,144 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
 }
 
 unsigned
-VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
-                                        Type *Ty2) const {
+VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty,
+                                                    bool Insert,
+                                                    bool Extract) const {
+  assert (Ty->isVectorTy() && "Can only scalarize vectors");
+   unsigned Cost = 0;
+
+  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+    if (Insert)
+      Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
+    if (Extract)
+      Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+  }
+
+  return Cost;
+}
+
+unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode,
+                                                           Type *Ty) const {
   // Check if any of the operands are vector operands.
   int ISD = InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  std::pair<unsigned, EVT> LT =
+  getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty));
+
+  if (!TLI->isOperationExpand(ISD, LT.second)) {
+    // The operation is legal. Assume it costs 1. Multiply
+    // by the type-legalization overhead.
+    return LT.first * 1;
+  }
+
+  // Else, assume that we need to scalarize this op.
+  if (Ty->isVectorTy()) {
+    unsigned Num = Ty->getVectorNumElements();
+    unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+    // return the cost of multiple scalar invocation plus the cost of inserting
+    // and extracting the values.
+    return getScalarizationOverhead(Ty, true, true) + Num * Cost;
+  }
+
+  // We don't know anything about this scalar instruction.
+  return 1;
+}
+
+unsigned VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const {
+  return 1;
+}
+
+unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
+                                  Type *Src) const {
+  assert(Src->isVectorTy() == Dst->isVectorTy() && "Invalid input types");
+  int ISD = InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
 
-  // If we don't have any information about this instruction assume it costs 1.
-  if (ISD == 0)
-    return 1;
+  std::pair<unsigned, EVT> SrcLT =
+  getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
 
+  std::pair<unsigned, EVT> DstLT =
+  getTypeLegalizationCost(Dst->getContext(), TLI->getValueType(Dst));
+
+  // If the cast is between same-sized registers, then the check is simple.
+  if (SrcLT.first == DstLT.first &&
+      SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+    // Just check the op cost:
+    if (!TLI->isOperationExpand(ISD, DstLT.second)) {
+      // The operation is legal. Assume it costs 1. Multiply
+      // by the type-legalization overhead.
+      return SrcLT.first * 1;
+    }
+  }
+
+  // Otherwise, assume that the cast is scalarized.
+  if (Dst->isVectorTy()) {
+    unsigned Num = Dst->getVectorNumElements();
+    unsigned Cost = getCastInstrCost(Opcode, Src->getScalarType(),
+                                     Dst->getScalarType());
+    // return the cost of multiple scalar invocation plus the cost of inserting
+    // and extracting the values.
+    return getScalarizationOverhead(Dst, true, true) + Num * Cost;
+  }
+
+  // Unknown scalar opcode.
+  return 1;
+}
+
+unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const {
+  return 1;
+}
+
+unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode,
+                                                       Type *ValTy,
+                                                       Type *CondTy) const {
+  int ISD = InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+  
   // Selects on vectors are actually vector selects.
   if (ISD == ISD::SELECT) {
-    assert(Ty2 && "Ty2 must hold the condition type");
-    if (Ty2->isVectorTy())
-    ISD = ISD::VSELECT;
+    assert(CondTy && "CondTy must exist");
+    if (CondTy->isVectorTy())
+      ISD = ISD::VSELECT;
   }
 
-  assert(Ty1 && "We need to have at least one type");
+  std::pair<unsigned, EVT> LT =
+  getTypeLegalizationCost(ValTy->getContext(), TLI->getValueType(ValTy));
 
-  // From this stage we look at the legalized type.
-  std::pair<unsigned, EVT>  LT =
-  getTypeLegalizationCost(Ty1->getContext(), TLI->getValueType(Ty1));
-
-  if (TLI->isOperationLegalOrCustom(ISD, LT.second)) {
+  if (!TLI->isOperationExpand(ISD, LT.second)) {
     // The operation is legal. Assume it costs 1. Multiply
     // by the type-legalization overhead.
     return LT.first * 1;
   }
 
-  unsigned NumElem =
-    (LT.second.isVector() ? LT.second.getVectorNumElements() : 1);
+  // Otherwise, assume that the cast is scalarized.
+  if (ValTy->isVectorTy()) {
+    unsigned Num = ValTy->getVectorNumElements();
+    if (CondTy)
+      CondTy = CondTy->getScalarType();
+    unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
+                                       CondTy);
+
+    // return the cost of multiple scalar invocation plus the cost of inserting
+    // and extracting the values.
+    return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
+  }
 
-  // We will probably scalarize this instruction. Assume that the cost is the
-  // number of the vector elements.
-  return LT.first * NumElem * 1;
+  // Unknown scalar opcode. 
+  return 1;
+}
+
+/// Returns the expected cost of Vector Insert and Extract.
+unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode,
+                                                       Type *Val,
+                                                       unsigned Index) const {
+  return 1;
 }
 
 unsigned
-VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const {
+VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
+                                        Type *Ty2) const {
   return 1;
 }
 
@@ -191,17 +291,15 @@ unsigned
 VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                            unsigned Alignment,
                                            unsigned AddressSpace) const {
-  // From this stage we look at the legalized type.
-  std::pair<unsigned, EVT>  LT =
+  std::pair<unsigned, EVT> LT =
   getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
+
   // Assume that all loads of legal types cost 1.
   return LT.first;
 }
 
 unsigned
 VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const {
-  std::pair<unsigned, EVT>  LT =
-  getTypeLegalizationCost(Tp->getContext(), TLI->getValueType(Tp));
-  return LT.first;
+  return TLI->getNumRegisters(Tp->getContext(), TLI->getValueType(Tp));
 }
 
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index e47baf8908..1773812da2 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -108,7 +108,7 @@ public:
     createEmptyLoop(Legal);
     /// Widen each instruction in the old loop to a new one in the new loop.
     /// Use the Legality module to find the induction and reduction variables.
-   vectorizeLoop(Legal);
+    vectorizeLoop(Legal);
     // register the new loop.
     cleanup();
  }
@@ -254,6 +254,9 @@ public:
   /// This check allows us to vectorize A[idx] into a wide load/store.
   bool isConsecutiveGep(Value *Ptr);
 
+  /// Returns true if this instruction will remain scalar after vectorization.
+  bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);}
+
 private:
   /// Check if a single basic block loop is vectorizable.
   /// At this point we know that this is a loop with a constant trip count
@@ -291,6 +294,9 @@ private:
   /// Allowed outside users. This holds the reduction
   /// vars which can be accessed from outside the loop.
   SmallPtrSet<Value*, 4> AllowedExit;
+  /// This set holds the variables which are known to be uniform after
+  /// vectorization.
+  SmallPtrSet<Instruction*, 4> Uniforms;
 };
 
 /// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1177,9 +1183,40 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
       return false;
   }
 
-  // If the memory dependencies do not prevent us from
-  // vectorizing, then vectorize.
-  return canVectorizeMemory(BB);
+  // Don't vectorize if the memory dependencies do not allow vectorization.
+  if (!canVectorizeMemory(BB))
+    return false;
+
+  // We now know that the loop is vectorizable!
+  // Collect variables that will remain uniform after vectorization.
+  std::vector<Value*> Worklist;
+
+  // Start with the conditional branch and walk up the block.
+  Worklist.push_back(BB.getTerminator()->getOperand(0));
+
+  while (Worklist.size()) {
+    Instruction *I = dyn_cast<Instruction>(Worklist.back());
+    Worklist.pop_back();
+    // Look at instructions inside this block.
+    if (!I) continue;
+    if (I->getParent() != &BB) continue;
+
+    // Stop when reaching PHI nodes.
+    if (isa<PHINode>(I)) {
+      assert(I == Induction && "Found a uniform PHI that is not the induction");
+      break;
+    }
+
+    // This is a known uniform.
+    Uniforms.insert(I);
+
+    // Insert all operands.
+    for (int i=0, Op = I->getNumOperands(); i < Op; ++i) {
+      Worklist.push_back(I->getOperand(i));
+    }
+  }
+
+  return true;
 }
 
 bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
@@ -1484,9 +1521,15 @@ unsigned
 LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   assert(VTTI && "Invalid vector target transformation info");
 
+  // If we know that this instruction will remain uniform, check the cost of
+  // the scalar version.
+  if (Legal->isUniformAfterVectorization(I))
+    VF = 1;
+
   Type *RetTy = I->getType();
   Type *VectorTy = ToVectorTy(RetTy, VF);
 
+
   // TODO: We need to estimate the cost of intrinsic calls.
   switch (I->getOpcode()) {
     case Instruction::GetElementPtr:
@@ -1495,7 +1538,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
       // generate vector geps.
       return 0;
     case Instruction::Br: {
-      return VTTI->getInstrCost(I->getOpcode());
+      return VTTI->getCFInstrCost(I->getOpcode());
     }
     case Instruction::PHI:
       return 0;
@@ -1517,7 +1560,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
     case Instruction::And:
     case Instruction::Or:
     case Instruction::Xor: {
-      return VTTI->getInstrCost(I->getOpcode(), VectorTy);
+      return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy);
     }
     case Instruction::Select: {
       SelectInst *SI = cast<SelectInst>(I);
@@ -1527,13 +1570,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
       if (ScalarCond)
         CondTy = VectorType::get(CondTy, VF);
 
-      return VTTI->getInstrCost(I->getOpcode(), VectorTy, CondTy);
+      return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
     }
     case Instruction::ICmp:
     case Instruction::FCmp: {
       Type *ValTy = I->getOperand(0)->getType();
       VectorTy = ToVectorTy(ValTy, VF);
-      return VTTI->getInstrCost(I->getOpcode(), VectorTy);
+      return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy);
     }
     case Instruction::Store: {
       StoreInst *SI = cast<StoreInst>(I);
@@ -1602,7 +1645,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
     case Instruction::FPTrunc:
     case Instruction::BitCast: {
       Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
-      return VTTI->getInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
+      return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
     }
     default: {
       // We are scalarizing the instruction. Return the cost of the scalar
author	Nadav Rotem <nrotem@apple.com>	2012-10-26 23:49:28 +0000
committer	Nadav Rotem <nrotem@apple.com>	2012-10-26 23:49:28 +0000
commit	a5a3a61c5fdcee972791d4e08441ba6edf131b88 (patch)
tree	2f84237b706b2a0f4fda6d4e37672863ebc3e4e2 /lib
parent	17f42e02a10bd4d43e4ba904c640224de2c48f51 (diff)
download	llvm-a5a3a61c5fdcee972791d4e08441ba6edf131b88.tar.gz llvm-a5a3a61c5fdcee972791d4e08441ba6edf131b88.tar.bz2 llvm-a5a3a61c5fdcee972791d4e08441ba6edf131b88.tar.xz