From 89d9794d4925de2d64cb0c821e11189cd8372b05 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 29 May 2013 06:56:17 +0000 Subject: Merging r182394: ------------------------------------------------------------------------ r182394 | jholewinski | 2013-05-21 09:51:30 -0700 (Tue, 21 May 2013) | 1 line [NVPTX] Add @llvm.nvvm.sqrt.f() intrinsic ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_33@182829 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsNVVM.td | 2 ++ lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 7 +++++++ lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 4 ++++ lib/Target/NVPTX/NVPTXInstrInfo.td | 3 +++ lib/Target/NVPTX/NVPTXIntrinsics.td | 10 ++++++++++ test/CodeGen/NVPTX/intrinsics.ll | 7 +++++++ 6 files changed, 33 insertions(+) diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td index ebfd03e484..c248517def 100644 --- a/include/llvm/IR/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -405,6 +405,8 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType; // (space)i64* // Sqrt // + def int_nvvm_sqrt_f : GCCBuiltin<"__nvvm_sqrt_f">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">, Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">, diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 0f4c8dbce5..d4378c2322 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -42,6 +42,11 @@ static cl::opt UsePrecDivF32( " IEEE Compliant F32 div.rnd if avaiable."), cl::init(2)); +static cl::opt +UsePrecSqrtF32("nvptx-prec-sqrtf32", + cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), + cl::init(true)); + /// createNVPTXISelDag - This pass converts a legalized DAG into a /// NVPTX-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, @@ -74,6 +79,8 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, // Decide how to translate f32 div do_DIVF32_PREC = UsePrecDivF32; + // Decide how to translate f32 sqrt + do_SQRTF32_PREC = UsePrecSqrtF32; // sm less than sm_20 does not support div.rnd. Use div.full. if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20()) do_DIVF32_PREC = 1; diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 70e8e46429..ed16d4450b 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -41,6 +41,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { // Otherwise, use div.full int do_DIVF32_PREC; + // If true, generate sqrt.rn, else generate sqrt.approx. If FTZ + // is true, then generate the corresponding FTZ version. + bool do_SQRTF32_PREC; + // If true, add .ftz to f32 instructions. // This is only meaningful for sm_20 and later, as the default // is not ftz. diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index f43abe283b..da6dd39b93 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -75,6 +75,9 @@ def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">; def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">; def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">; +def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">; +def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">; + def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; def true : Predicate<"1">; diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 2780ef4036..24037cafef 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -512,6 +512,16 @@ def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_sqrt_rp_d>; +// nvvm_sqrt intrinsic +def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), + (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; +def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), + (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; +def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), + (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; +def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), + (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; + // // Rsqrt // diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll index 8b0357be87..1676f20643 100644 --- a/test/CodeGen/NVPTX/intrinsics.ll +++ b/test/CodeGen/NVPTX/intrinsics.ll @@ -15,5 +15,12 @@ define ptx_device double @test_fabs(double %d) { ret double %x } +define float @test_nvvm_sqrt(float %a) { + %val = call float @llvm.nvvm.sqrt.f(float %a) + ret float %val +} + + declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) +declare float @llvm.nvvm.sqrt.f(float) -- cgit v1.2.3