16 files changed, 109 insertions, 55 deletions
diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h
index a2c4d6116f..2456c6c726 100644
--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@@ -74,6 +74,9 @@ void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM);
 /** See llvm::createMemCpyOptPass function. */
 void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM);
 
+/** See llvm::createPartiallyInlineLibCallsPass function. */
+void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM);
+
 /** See llvm::createPromoteMemoryToRegisterPass function. */
 void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM);
 
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 21a3a12ebb..06810a7de0 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -262,6 +262,10 @@ public:
   /// getPopcntSupport - Return hardware support for population count.
   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
 
+  /// haveFastSqrt -- Return true if the hardware has a fast square-root
+  /// instruction.
+  virtual bool haveFastSqrt(Type *Ty) const;
+
   /// getIntImmCost - Return the expected cost of materializing the given
   /// integer immediate of the specified type.
   virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 08f0c7ae28..aa06eca9b2 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -205,6 +205,7 @@ void initializeObjCARCContractPass(PassRegistry&);
 void initializeObjCARCOptPass(PassRegistry&);
 void initializeOptimalEdgeProfilerPass(PassRegistry&);
 void initializeOptimizePHIsPass(PassRegistry&);
+void initializePartiallyInlineLibCallsPass(PassRegistry&);
 void initializePEIPass(PassRegistry&);
 void initializePHIEliminationPass(PassRegistry&);
 void initializePartialInlinerPass(PassRegistry&);
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index ec3982991c..ec1ca49b4f 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -163,6 +163,7 @@ namespace {
       (void) llvm::createLoopVectorizePass();
       (void) llvm::createSLPVectorizerPass();
       (void) llvm::createBBVectorizePass();
+      (void) llvm::createPartiallyInlineLibCallsPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::FindUsedTypes();
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 037ab6bea6..eec9e591ee 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -354,6 +354,13 @@ extern char &InstructionSimplifierID;
 FunctionPass *createLowerExpectIntrinsicPass();
 
 
+//===----------------------------------------------------------------------===//
+//
+// PartiallyInlineLibCalls - Tries to inline the fast path of library
+// calls such as sqrt.
+//
+FunctionPass *createPartiallyInlineLibCallsPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 4ad71627be..0a215aa53c 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -145,6 +145,10 @@ TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
   return PrevTTI->getPopcntSupport(IntTyWidthInBit);
 }
 
+bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
+  return PrevTTI->haveFastSqrt(Ty);
+}
+
 unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
   return PrevTTI->getIntImmCost(Imm, Ty);
 }
@@ -505,6 +509,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
     return PSK_Software;
   }
 
+  bool haveFastSqrt(Type *Ty) const {
+    return false;
+  }
+
   unsigned getIntImmCost(const APInt &Imm, Type *Ty) const {
     return 1;
   }
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 0883ab0ce7..d5340e6023 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -83,6 +83,7 @@ public:
   virtual unsigned getJumpBufAlignment() const;
   virtual unsigned getJumpBufSize() const;
   virtual bool shouldBuildLookupTables() const;
+  virtual bool haveFastSqrt(Type *Ty) const;
 
   /// @}
 
@@ -182,6 +183,12 @@ bool BasicTTI::shouldBuildLookupTables() const {
        TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
 }
 
+bool BasicTTI::haveFastSqrt(Type *Ty) const {
+  const TargetLoweringBase *TLI = getTLI();
+  EVT VT = TLI->getValueType(Ty);
+  return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
+}
+
 //===----------------------------------------------------------------------===//
 //
 // Calls used by the vectorizers.
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index aedb78bc49..6acc9a88c0 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -35,7 +35,6 @@ add_llvm_target(MipsCodeGen
   MipsMachineFunction.cpp
   MipsModuleISelDAGToDAG.cpp
   MipsOs16.cpp
-  MipsOptimizeMathLibCalls.cpp
   MipsRegisterInfo.cpp
   MipsSEFrameLowering.cpp
   MipsSEInstrInfo.cpp
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index b88c0d2547..e796debd79 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -28,7 +28,6 @@ namespace llvm {
   FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
                                              JITCodeEmitter &JCE);
   FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
-  FunctionPass *createMipsOptimizeMathLibCalls(MipsTargetMachine &TM);
 } // end namespace llvm;
 
 #endif
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index ced6a09674..f25afe33ff 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 
@@ -160,7 +161,7 @@ void MipsPassConfig::addIRPasses() {
     addPass(createMipsOs16(getMipsTargetMachine()));
   if (getMipsSubtarget().inMips16HardFloat())
     addPass(createMips16HardFloat(getMipsTargetMachine()));
-  addPass(createMipsOptimizeMathLibCalls(getMipsTargetMachine()));
+  addPass(createPartiallyInlineLibCallsPass());
 }
 // Install an instruction selector pass using
 // the ISelDag to gen Mips code.
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 856183c6f4..f2761520cd 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -10,6 +10,7 @@
 #include "SystemZTargetMachine.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
@@ -47,12 +48,18 @@ public:
     return getTM<SystemZTargetMachine>();
   }
 
+  virtual void addIRPasses() LLVM_OVERRIDE;
   virtual bool addInstSelector() LLVM_OVERRIDE;
   virtual bool addPreSched2() LLVM_OVERRIDE;
   virtual bool addPreEmitPass() LLVM_OVERRIDE;
 };
 } // end anonymous namespace
 
+void SystemZPassConfig::addIRPasses() {
+  TargetPassConfig::addIRPasses();
+  addPass(createPartiallyInlineLibCallsPass());
+}
+
 bool SystemZPassConfig::addInstSelector() {
   addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
   return false;
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index f5d1db1ec2..7fa7807990 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_library(LLVMScalarOpts
   LoopUnswitch.cpp
   LowerAtomic.cpp
   MemCpyOptimizer.cpp
+  PartiallyInlineLibCalls.cpp
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp
diff --git a/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index de3f09c3b5..15cee44e13 100644
--- a/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -1,4 +1,4 @@
-//===---- MipsOptimizeMathLibCalls.cpp - Optimize math lib calls.      ----===//
+//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,76 +7,60 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass does an IR transformation which enables the backend to emit native
-// math instructions.
+// This pass tries to partially inline the fast path of well-known library
+// functions, such as using square-root instructions for cases where sqrt()
+// does not need to set errno.
 //
 //===----------------------------------------------------------------------===//
 
-#include "MipsTargetMachine.h"
+#define DEBUG_TYPE "partially-inline-libcalls"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
 
-static cl::opt<bool> DisableOpt("disable-mips-math-optimization",
-                                cl::init(false),
-                                cl::desc("MIPS: Disable math lib call "
-                                         "optimization."), cl::Hidden);
-
 namespace {
-  class MipsOptimizeMathLibCalls : public FunctionPass {
+  class PartiallyInlineLibCalls : public FunctionPass {
   public:
     static char ID;
 
-    MipsOptimizeMathLibCalls(MipsTargetMachine &TM_) :
-      FunctionPass(ID), TM(TM_) {}
-
-    virtual const char *getPassName() const {
-      return "MIPS: Optimize calls to math library functions.";
+    PartiallyInlineLibCalls() :
+      FunctionPass(ID) {
+      initializePartiallyInlineLibCallsPass(*PassRegistry::getPassRegistry());
     }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
     virtual bool runOnFunction(Function &F);
 
   private:
     /// Optimize calls to sqrt.
     bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
-                      BasicBlock &CurrBB,
-                      Function::iterator &BB);
-
-    const TargetMachine &TM;
+                      BasicBlock &CurrBB, Function::iterator &BB);
   };
 
-  char MipsOptimizeMathLibCalls::ID = 0;
+  char PartiallyInlineLibCalls::ID = 0;
 }
 
-FunctionPass *llvm::createMipsOptimizeMathLibCalls(MipsTargetMachine &TM) {
-  return new MipsOptimizeMathLibCalls(TM);
-}
+INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
+                "Partially inline calls to library functions", false, false)
 
-void MipsOptimizeMathLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
+void PartiallyInlineLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetLibraryInfo>();
+  AU.addRequired<TargetTransformInfo>();
   FunctionPass::getAnalysisUsage(AU);
 }
 
-bool MipsOptimizeMathLibCalls::runOnFunction(Function &F) {
-  if (DisableOpt)
-    return false;
-
-  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
-
-  if (Subtarget.inMips16Mode())
-    return false;
-
+bool PartiallyInlineLibCalls::runOnFunction(Function &F) {
   bool Changed = false;
   Function::iterator CurrBB;
-  const TargetLibraryInfo *LibInfo = &getAnalysis<TargetLibraryInfo>();
-
+  TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+  const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfo>();
   for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
     CurrBB = BB++;
 
@@ -88,25 +72,18 @@ bool MipsOptimizeMathLibCalls::runOnFunction(Function &F) {
       if (!Call || !(CalledFunc = Call->getCalledFunction()))
         continue;
 
-      LibFunc::Func LibFunc;
-      Attribute A = CalledFunc->getAttributes()
-        .getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
-
-      // Skip if function has "use-soft-float" attribute.
-      if ((A.isStringAttribute() && (A.getValueAsString() == "true")) ||
-          TM.Options.UseSoftFloat)
-        continue;
-
       // Skip if function either has local linkage or is not a known library
       // function.
+      LibFunc::Func LibFunc;
       if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
-          !LibInfo->getLibFunc(CalledFunc->getName(), LibFunc))
+          !TLI->getLibFunc(CalledFunc->getName(), LibFunc))
         continue;
 
       switch (LibFunc) {
       case LibFunc::sqrtf:
       case LibFunc::sqrt:
-        if (optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
+        if (TTI->haveFastSqrt(Call->getType()) &&
+            optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
           break;
         continue;
       default:
@@ -121,10 +98,10 @@ bool MipsOptimizeMathLibCalls::runOnFunction(Function &F) {
   return Changed;
 }
 
-bool MipsOptimizeMathLibCalls::optimizeSQRT(CallInst *Call,
-                                            Function *CalledFunc,
-                                            BasicBlock &CurrBB,
-                                            Function::iterator &BB) {
+bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
+                                           Function *CalledFunc,
+                                           BasicBlock &CurrBB,
+                                           Function::iterator &BB) {
   // There is no need to change the IR, since backend will emit sqrt
   // instruction if the call has already been marked read-only.
   if (Call->onlyReadsMemory())
@@ -173,3 +150,7 @@ bool MipsOptimizeMathLibCalls::optimizeSQRT(CallInst *Call,
   BB = JoinBB;
   return true;
 }
+
+FunctionPass *llvm::createPartiallyInlineLibCallsPass() {
+  return new PartiallyInlineLibCalls();
+}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 758334dba4..952811b22b 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -50,6 +50,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLowerAtomicPass(Registry);
   initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
+  initializePartiallyInlineLibCallsPass(Registry);
   initializeReassociatePass(Registry);
   initializeRegToMemPass(Registry);
   initializeSCCPPass(Registry);
@@ -123,6 +124,10 @@ void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createMemCpyOptPass());
 }
 
+void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createPartiallyInlineLibCallsPass());
+}
+
 void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createPromoteMemoryToRegisterPass());
 }
diff --git a/test/CodeGen/SystemZ/fp-sqrt-01.ll b/test/CodeGen/SystemZ/fp-sqrt-01.ll
index b6568d6f01..7465af456b 100644
--- a/test/CodeGen/SystemZ/fp-sqrt-01.ll
+++ b/test/CodeGen/SystemZ/fp-sqrt-01.ll
@@ -2,7 +2,8 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-declare float @llvm.sqrt.f32(float %f)
+declare float @llvm.sqrt.f32(float)
+declare float @sqrtf(float)
 
 ; Check register square root.
 define float @f1(float %val) {
@@ -152,3 +153,17 @@ define void @f7(float *%ptr) {
 
   ret void
 }
+
+; Check that a call to the normal sqrtf function is lowered.
+define float @f8(float %dummy, float %val) {
+; CHECK-LABEL: f8:
+; CHECK: sqebr %f0, %f2
+; CHECK: cebr %f0, %f0
+; CHECK: jo [[LABEL:\.L.*]]
+; CHECK: br %r14
+; CHECK: [[LABEL]]:
+; CHECK: ler %f0, %f2
+; CHECK: jg sqrtf@PLT
+  %res = tail call float @sqrtf(float %val)
+  ret float %res
+}
diff --git a/test/CodeGen/SystemZ/fp-sqrt-02.ll b/test/CodeGen/SystemZ/fp-sqrt-02.ll
index b07a2c66fb..66ffd19d6c 100644
--- a/test/CodeGen/SystemZ/fp-sqrt-02.ll
+++ b/test/CodeGen/SystemZ/fp-sqrt-02.ll
@@ -3,6 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 declare double @llvm.sqrt.f64(double %f)
+declare double @sqrt(double)
 
 ; Check register square root.
 define double @f1(double %val) {
@@ -152,3 +153,17 @@ define void @f7(double *%ptr) {
 
   ret void
 }
+
+; Check that a call to the normal sqrt function is lowered.
+define double @f8(double %dummy, double %val) {
+; CHECK-LABEL: f8:
+; CHECK: sqdbr %f0, %f2
+; CHECK: cdbr %f0, %f0
+; CHECK: jo [[LABEL:\.L.*]]
+; CHECK: br %r14
+; CHECK: [[LABEL]]:
+; CHECK: ldr %f0, %f2
+; CHECK: jg sqrt@PLT
+  %res = tail call double @sqrt(double %val)
+  ret double %res
+}