LoopVectorize: Teach the cost model to query scalar costs as scalar types and not vectors of 1.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166715 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nadav Rotem <nrotem@apple.com> 2012-10-25 21:03:48 +0000
committer: Nadav Rotem <nrotem@apple.com> 2012-10-25 21:03:48 +0000
commit: 3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2 (patch)
tree: 52e3424c67ff8a48a6f501e150092d262b770a15 /lib/Transforms/Vectorize
parent: 6a020a71173a3ea7738a9df69982e85ddbfe0303 (diff)
download: llvm-3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2.tar.gz
llvm-3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2.tar.bz2
llvm-3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2.tar.xz
1 files changed, 61 insertions, 41 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 423c7a4911..e47baf8908 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -324,6 +324,11 @@ private:
   /// width. Vector width of one means scalar.
   unsigned getInstructionCost(Instruction *I, unsigned VF);
 
+  /// A helper function for converting Scalar types to vector types.
+  /// If the incoming type is void, we return void. If the VF is 1, we return
+  /// the scalar type.
+  static Type* ToVectorTy(Type *Scalar, unsigned VF);
+
   /// The loop that we evaluate.
   Loop *TheLoop;
   /// Scev analysis.
@@ -1478,8 +1483,16 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
 unsigned
 LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   assert(VTTI && "Invalid vector target transformation info");
+
+  Type *RetTy = I->getType();
+  Type *VectorTy = ToVectorTy(RetTy, VF);
+
+  // TODO: We need to estimate the cost of intrinsic calls.
   switch (I->getOpcode()) {
     case Instruction::GetElementPtr:
+      // We mark this instruction as zero-cost because scalar GEPs are usually
+      // lowered to the intruction addressing mode. At the moment we don't
+      // generate vector geps.
       return 0;
     case Instruction::Br: {
       return VTTI->getInstrCost(I->getOpcode());
@@ -1504,74 +1517,76 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
     case Instruction::And:
     case Instruction::Or:
     case Instruction::Xor: {
-      Type *VTy = VectorType::get(I->getType(), VF);
-      return VTTI->getInstrCost(I->getOpcode(), VTy);
+      return VTTI->getInstrCost(I->getOpcode(), VectorTy);
     }
     case Instruction::Select: {
       SelectInst *SI = cast<SelectInst>(I);
-      Type *VTy = VectorType::get(I->getType(), VF);
       const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
       bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
       Type *CondTy = SI->getCondition()->getType();
       if (ScalarCond)
         CondTy = VectorType::get(CondTy, VF);
 
-      return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy);
+      return VTTI->getInstrCost(I->getOpcode(), VectorTy, CondTy);
     }
     case Instruction::ICmp:
     case Instruction::FCmp: {
-      Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF);
-      return VTTI->getInstrCost(I->getOpcode(), VTy);
+      Type *ValTy = I->getOperand(0)->getType();
+      VectorTy = ToVectorTy(ValTy, VF);
+      return VTTI->getInstrCost(I->getOpcode(), VectorTy);
     }
     case Instruction::Store: {
       StoreInst *SI = cast<StoreInst>(I);
-      Type *VTy = VectorType::get(SI->getValueOperand()->getType(), VF);
+      Type *ValTy = SI->getValueOperand()->getType();
+      VectorTy = ToVectorTy(ValTy, VF);
+
+      if (VF == 1)
+        return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
+                              SI->getAlignment(), SI->getPointerAddressSpace());
 
       // Scalarized stores.
       if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
         unsigned Cost = 0;
-        if (VF != 1) {
-          unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
-                                                VTy);
-          // The cost of extracting from the value vector and pointer vector.
-          Cost += VF * (ExtCost * 2);
-        }
+        unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+                                              ValTy);
+        // The cost of extracting from the value vector.
+        Cost += VF * (ExtCost);
         // The cost of the scalar stores.
         Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
-                                           VTy->getScalarType(),
+                                           ValTy->getScalarType(),
                                            SI->getAlignment(),
                                            SI->getPointerAddressSpace());
         return Cost;
       }
 
       // Wide stores.
-      return VTTI->getMemoryOpCost(I->getOpcode(), VTy, SI->getAlignment(),
+      return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(),
                                    SI->getPointerAddressSpace());
     }
     case Instruction::Load: {
       LoadInst *LI = cast<LoadInst>(I);
-      Type *VTy = VectorType::get(I->getType(), VF);
+
+      if (VF == 1)
+        return VTTI->getMemoryOpCost(I->getOpcode(), RetTy,
+                                     LI->getAlignment(),
+                                     LI->getPointerAddressSpace());
 
       // Scalarized loads.
       if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
         unsigned Cost = 0;
-        if (VF != 1) {
-          unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
-          unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy);
-
-          // The cost of inserting the loaded value into the result vector, and
-          // extracting from a vector of pointers.
-          Cost += VF * (InCost + ExCost);
-        }
+        unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
+        // The cost of inserting the loaded value into the result vector.
+        Cost += VF * (InCost);
         // The cost of the scalar stores.
-        Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(),
+        Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
+                                           RetTy->getScalarType(),
                                            LI->getAlignment(),
                                            LI->getPointerAddressSpace());
         return Cost;
       }
 
       // Wide loads.
-      return VTTI->getMemoryOpCost(I->getOpcode(), VTy, LI->getAlignment(),
+      return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
                                    LI->getPointerAddressSpace());
     }
     case Instruction::ZExt:
@@ -1586,35 +1601,40 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
     case Instruction::Trunc:
     case Instruction::FPTrunc:
     case Instruction::BitCast: {
-      Type *SrcTy = VectorType::get(I->getOperand(0)->getType(), VF);
-      Type *DstTy = VectorType::get(I->getType(), VF);
-      return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy);
+      Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+      return VTTI->getInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
     }
     default: {
       // We are scalarizing the instruction. Return the cost of the scalar
       // instruction, plus the cost of insert and extract into vector
       // elements, times the vector width.
       unsigned Cost = 0;
-      Type *Ty = I->getType();
 
-      if (!Ty->isVoidTy()) {
-        Type *VTy = VectorType::get(Ty, VF);
-        unsigned InsCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
-        unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy);
-        Cost += VF * (InsCost + ExtCost);
-      }
+      bool IsVoid = RetTy->isVoidTy();
 
-      /// We don't have any information on the scalar instruction, but maybe
-      /// the target has.
-      /// TODO: This may be a target-specific intrinsic.
-      /// Need to add API for that.
-      Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty);
+      unsigned InsCost = (IsVoid ? 0 :
+                          VTTI->getInstrCost(Instruction::InsertElement,
+                                             VectorTy));
 
+      unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+                                            VectorTy);
+
+      // The cost of inserting the results plus extracting each one of the
+      // operands.
+      Cost += VF * (InsCost + ExtCost * I->getNumOperands());
+
+      // The cost of executing VF copies of the scalar instruction.
+      Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy);
       return Cost;
     }
   }// end of switch.
 }
 
+Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
+  if (Scalar->isVoidTy() || VF == 1)
+    return Scalar;
+  return VectorType::get(Scalar, VF);
+}
 
 } // namespace
author	Nadav Rotem <nrotem@apple.com>	2012-10-25 21:03:48 +0000
committer	Nadav Rotem <nrotem@apple.com>	2012-10-25 21:03:48 +0000
commit	3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2 (patch)
tree	52e3424c67ff8a48a6f501e150092d262b770a15 /lib/Transforms/Vectorize
parent	6a020a71173a3ea7738a9df69982e85ddbfe0303 (diff)
download	llvm-3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2.tar.gz llvm-3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2.tar.bz2 llvm-3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2.tar.xz