Apply the InstCombine fptrunc sqrt optimization to llvm.sqrt

InstCombine, in visitFPTrunc, applies the following optimization to sqrt calls: (fptrunc (sqrt (fpext x))) -> (sqrtf x) but does not apply the same optimization to llvm.sqrt. This is a problem because, to enable vectorization, Clang generates llvm.sqrt instead of sqrt in fast-math mode, and because this optimization is being applied to sqrt and not applied to llvm.sqrt, sometimes the fast-math code is slower. This change makes InstCombine apply this optimization to llvm.sqrt as well. This fixes the specific problem in PR17758, although the same underlying issue (optimizations applied to libcalls are not applied to intrinsics) exists for other optimizations in SimplifyLibCalls. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194935 91177308-0d34-0410-b5e6-96231b3b80d8
author: Hal Finkel <hfinkel@anl.gov> 2013-11-16 21:29:08 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2013-11-16 21:29:08 +0000
commit: 64fa501b1081b5d5c25e5e9639075abb0cb724d9 (patch)
tree: a190f3ea48a03753cd6f44e382cf97464e79b63b /lib/Transforms
parent: e6e811277f045ee3d61cd62622d71005c47eb48d (diff)
download: llvm-64fa501b1081b5d5c25e5e9639075abb0cb724d9.tar.gz
llvm-64fa501b1081b5d5c25e5e9639075abb0cb724d9.tar.bz2
llvm-64fa501b1081b5d5c25e5e9639075abb0cb724d9.tar.xz
1 files changed, 11 insertions, 6 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a1aedd4e8f..72377dc0ad 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1262,9 +1262,14 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   }
 
   // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+  // Note that we restrict this transformation based on
+  // TLI->has(LibFunc::sqrtf), even for the sqrt intrinsic, because
+  // TLI->has(LibFunc::sqrtf) is sufficient to guarantee that the
+  // single-precision intrinsic can be expanded in the backend.
   CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
   if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
-      Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
+      (Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) ||
+       Call->getCalledFunction()->getIntrinsicID() == Intrinsic::sqrt) &&
       Call->getNumArgOperands() == 1 &&
       Call->hasOneUse()) {
     CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
@@ -1275,11 +1280,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
         Arg->getOperand(0)->getType()->isFloatTy()) {
       Function *Callee = Call->getCalledFunction();
       Module *M = CI.getParent()->getParent()->getParent();
-      Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
-                                                   Callee->getAttributes(),
-                                                   Builder->getFloatTy(),
-                                                   Builder->getFloatTy(),
-                                                   NULL);
+      Constant *SqrtfFunc = (Callee->getIntrinsicID() == Intrinsic::sqrt) ?
+        Intrinsic::getDeclaration(M, Intrinsic::sqrt, Builder->getFloatTy()) :
+        M->getOrInsertFunction("sqrtf", Callee->getAttributes(),
+                               Builder->getFloatTy(), Builder->getFloatTy(),
+                               NULL);
       CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
                                        "sqrtfcall");
       ret->setAttributes(Callee->getAttributes());
author	Hal Finkel <hfinkel@anl.gov>	2013-11-16 21:29:08 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2013-11-16 21:29:08 +0000
commit	64fa501b1081b5d5c25e5e9639075abb0cb724d9 (patch)
tree	a190f3ea48a03753cd6f44e382cf97464e79b63b /lib/Transforms
parent	e6e811277f045ee3d61cd62622d71005c47eb48d (diff)
download	llvm-64fa501b1081b5d5c25e5e9639075abb0cb724d9.tar.gz llvm-64fa501b1081b5d5c25e5e9639075abb0cb724d9.tar.bz2 llvm-64fa501b1081b5d5c25e5e9639075abb0cb724d9.tar.xz