Cost model support for lowered math builtins.

We make the cost for calling libm functions extremely high as emitting the calls is expensive and causes spills (on x86) so performance suffers. We still vectorize important calls like ceilf and friends on SSE4.1. and fabs. Differential Revision: http://llvm-reviews.chandlerc.com/D466 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176287 91177308-0d34-0410-b5e6-96231b3b80d8
author: Benjamin Kramer <benny.kra@googlemail.com> 2013-02-28 19:09:33 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> 2013-02-28 19:09:33 +0000
commit: 8611d4449a77ca05e808823bc966573a85da00cb (patch)
tree: 948b8110ba823e8880a6445c17aaae4dd370d151 /lib/CodeGen/BasicTargetTransformInfo.cpp
parent: 279706e90e12e9418d4e8f9415d5f3ed33a99bdb (diff)
download: llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.gz
llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.bz2
llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.xz
1 files changed, 67 insertions, 12 deletions
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index e8b5b4fe8d..4cd1b80dc0 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -379,22 +379,77 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
   return LT.first;
 }
 
-unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
                                          ArrayRef<Type *> Tys) const {
-  // assume that we need to scalarize this intrinsic.
-  unsigned ScalarizationCost = 0;
-  unsigned ScalarCalls = 1;
-  if (RetTy->isVectorTy()) {
-    ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
-    ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
-  }
-  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
-    if (Tys[i]->isVectorTy()) {
-      ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+  unsigned ISD = 0;
+  switch (IID) {
+  default: {
+    // Assume that we need to scalarize this intrinsic.
+    unsigned ScalarizationCost = 0;
+    unsigned ScalarCalls = 1;
+    if (RetTy->isVectorTy()) {
+      ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
       ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
     }
+    for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
+      if (Tys[i]->isVectorTy()) {
+        ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+      }
+    }
+
+    return ScalarCalls + ScalarizationCost;
+  }
+  // Look for intrinsics that can be lowered directly or turned into a scalar
+  // intrinsic call.
+  case Intrinsic::sqrt:    ISD = ISD::FSQRT;  break;
+  case Intrinsic::sin:     ISD = ISD::FSIN;   break;
+  case Intrinsic::cos:     ISD = ISD::FCOS;   break;
+  case Intrinsic::exp:     ISD = ISD::FEXP;   break;
+  case Intrinsic::exp2:    ISD = ISD::FEXP2;  break;
+  case Intrinsic::log:     ISD = ISD::FLOG;   break;
+  case Intrinsic::log10:   ISD = ISD::FLOG10; break;
+  case Intrinsic::log2:    ISD = ISD::FLOG2;  break;
+  case Intrinsic::fabs:    ISD = ISD::FABS;   break;
+  case Intrinsic::floor:   ISD = ISD::FFLOOR; break;
+  case Intrinsic::ceil:    ISD = ISD::FCEIL;  break;
+  case Intrinsic::trunc:   ISD = ISD::FTRUNC; break;
+  case Intrinsic::rint:    ISD = ISD::FRINT;  break;
+  case Intrinsic::pow:     ISD = ISD::FPOW;   break;
+  case Intrinsic::fma:     ISD = ISD::FMA;    break;
+  case Intrinsic::fmuladd: ISD = ISD::FMA;    break; // FIXME: mul + add?
+  }
+
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
+
+  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+    // The operation is legal. Assume it costs 1.
+    // If the type is split to multiple registers, assume that thre is some
+    // overhead to this.
+    // TODO: Once we have extract/insert subvector cost we need to use them.
+    if (LT.first > 1)
+      return LT.first * 2;
+    return LT.first * 1;
   }
-  return ScalarCalls + ScalarizationCost;
+
+  if (!TLI->isOperationExpand(ISD, LT.second)) {
+    // If the operation is custom lowered then assume
+    // thare the code is twice as expensive.
+    return LT.first * 2;
+  }
+
+  // Else, assume that we need to scalarize this intrinsic. For math builtins
+  // this will emit a costly libcall, adding call overhead and spills. Make it
+  // very expensive.
+  if (RetTy->isVectorTy()) {
+    unsigned Num = RetTy->getVectorNumElements();
+    unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
+                                                  Tys);
+    return 10 * Cost * Num;
+  }
+
+  // This is going to be turned into a library call, make it expensive.
+  return 10;
 }
 
 unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
author	Benjamin Kramer <benny.kra@googlemail.com>	2013-02-28 19:09:33 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	2013-02-28 19:09:33 +0000
commit	8611d4449a77ca05e808823bc966573a85da00cb (patch)
tree	948b8110ba823e8880a6445c17aaae4dd370d151 /lib/CodeGen/BasicTargetTransformInfo.cpp
parent	279706e90e12e9418d4e8f9415d5f3ed33a99bdb (diff)
download	llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.gz llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.bz2 llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.xz