summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBill Wendling <isanbard@gmail.com>2013-05-29 06:56:17 +0000
committerBill Wendling <isanbard@gmail.com>2013-05-29 06:56:17 +0000
commit89d9794d4925de2d64cb0c821e11189cd8372b05 (patch)
tree2257e7e3c36122d791f7cb1ad7411d92e0d76286 /lib
parentafa582f1cd497d32f6c293cf50b08fefe0a2573f (diff)
downloadllvm-89d9794d4925de2d64cb0c821e11189cd8372b05.tar.gz
llvm-89d9794d4925de2d64cb0c821e11189cd8372b05.tar.bz2
llvm-89d9794d4925de2d64cb0c821e11189cd8372b05.tar.xz
Merging r182394:
------------------------------------------------------------------------ r182394 | jholewinski | 2013-05-21 09:51:30 -0700 (Tue, 21 May 2013) | 1 line [NVPTX] Add @llvm.nvvm.sqrt.f() intrinsic ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_33@182829 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h4
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td3
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td10
4 files changed, 24 insertions, 0 deletions
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 0f4c8dbce5..d4378c2322 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -42,6 +42,11 @@ static cl::opt<int> UsePrecDivF32(
" IEEE Compliant F32 div.rnd if avaiable."),
cl::init(2));
+static cl::opt<bool>
+UsePrecSqrtF32("nvptx-prec-sqrtf32",
+ cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
+ cl::init(true));
+
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
@@ -74,6 +79,8 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
// Decide how to translate f32 div
do_DIVF32_PREC = UsePrecDivF32;
+ // Decide how to translate f32 sqrt
+ do_SQRTF32_PREC = UsePrecSqrtF32;
// sm less than sm_20 does not support div.rnd. Use div.full.
if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
do_DIVF32_PREC = 1;
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 70e8e46429..ed16d4450b 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -41,6 +41,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
// Otherwise, use div.full
int do_DIVF32_PREC;
+ // If true, generate sqrt.rn, else generate sqrt.approx. If FTZ
+ // is true, then generate the corresponding FTZ version.
+ bool do_SQRTF32_PREC;
+
// If true, add .ftz to f32 instructions.
// This is only meaningful for sm_20 and later, as the default
// is not ftz.
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index f43abe283b..da6dd39b93 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -75,6 +75,9 @@ def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
+def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">;
+def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">;
+
def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
def true : Predicate<"1">;
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 2780ef4036..24037cafef 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -512,6 +512,16 @@ def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
Float64Regs, int_nvvm_sqrt_rp_d>;
+// nvvm_sqrt intrinsic
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
+
//
// Rsqrt
//