[NVPTX] Use approximate FP ops when unsafe-fp-math is used, and append

.ftz to instructions if the nvptx-f32ftz attribute is set to "true" git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186820 91177308-0d34-0410-b5e6-96231b3b80d8
author: Justin Holewinski <jholewinski@nvidia.com> 2013-07-22 12:18:04 +0000
committer: Justin Holewinski <jholewinski@nvidia.com> 2013-07-22 12:18:04 +0000
commit: 3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d (patch)
tree: bb08127142c401b1737718f4ac00320393e5e0fb /lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
parent: 1abb7bc7e917771a80cd7788ee37ba7dab98f183 (diff)
download: llvm-3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d.tar.gz
llvm-3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d.tar.bz2
llvm-3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d.tar.xz
1 files changed, 47 insertions, 20 deletions
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index b613587f2d..ba85e35a73 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -25,11 +25,6 @@
 
 using namespace llvm;
 
-static cl::opt<bool> UseFMADInstruction(
-    "nvptx-mad-enable", cl::ZeroOrMore,
-    cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
-    cl::init(false));
-
 static cl::opt<int>
 FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
                  cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
@@ -47,6 +42,12 @@ UsePrecSqrtF32("nvptx-prec-sqrtf32",
           cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
           cl::init(true));
 
+static cl::opt<bool>
+FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore,
+           cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
+           cl::init(false));
+
+
 /// createNVPTXISelDag - This pass converts a legalized DAG into a
 /// NVPTX-specific DAG, ready for instruction scheduling.
 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
@@ -58,12 +59,7 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
                                      CodeGenOpt::Level OptLevel)
     : SelectionDAGISel(tm, OptLevel),
       Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
-  // Always do fma.f32 fpcontract if the target supports the instruction.
-  // Always do fma.f64 fpcontract if the target supports the instruction.
-  // Do mad.f32 is nvptx-mad-enable is specified and the target does not
-  // support fma.f32.
 
-  doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
   doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
   doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
   doFMAF32AGG =
@@ -71,20 +67,51 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
   doFMAF64AGG =
       (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
 
-  allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
-
-  UseF32FTZ = false;
+  allowFMA = (FMAContractLevel >= 1);
 
   doMulWide = (OptLevel > 0);
+}
 
-  // Decide how to translate f32 div
-  do_DIVF32_PREC = UsePrecDivF32;
-  // Decide how to translate f32 sqrt
-  do_SQRTF32_PREC = UsePrecSqrtF32;
-  // sm less than sm_20 does not support div.rnd. Use div.full.
-  if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
-    do_DIVF32_PREC = 1;
+int NVPTXDAGToDAGISel::getDivF32Level() const {
+  if (UsePrecDivF32.getNumOccurrences() > 0) {
+    // If nvptx-prec-div32=N is used on the command-line, always honor it
+    return UsePrecDivF32;
+  } else {
+    // Otherwise, use div.approx if fast math is enabled
+    if (TM.Options.UnsafeFPMath)
+      return 0;
+    else
+      return 2;
+  }
+}
 
+bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
+  if (UsePrecSqrtF32.getNumOccurrences() > 0) {
+    // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
+    return UsePrecSqrtF32;
+  } else {
+    // Otherwise, use sqrt.approx if fast math is enabled
+    if (TM.Options.UnsafeFPMath)
+      return false;
+    else
+      return true;
+  }
+}
+
+bool NVPTXDAGToDAGISel::useF32FTZ() const {
+  if (FtzEnabled.getNumOccurrences() > 0) {
+    // If nvptx-f32ftz is used on the command-line, always honor it
+    return FtzEnabled;
+  } else {
+    const Function *F = MF->getFunction();
+    // Otherwise, check for an nvptx-f32ftz attribute on the function
+    if (F->hasFnAttribute("nvptx-f32ftz"))
+      return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
+                                              "nvptx-f32ftz")
+                                              .getValueAsString() == "true");
+    else
+      return false;
+  }
 }
 
 /// Select - Select instructions not customized! Used for
author	Justin Holewinski <jholewinski@nvidia.com>	2013-07-22 12:18:04 +0000
committer	Justin Holewinski <jholewinski@nvidia.com>	2013-07-22 12:18:04 +0000
commit	3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d (patch)
tree	bb08127142c401b1737718f4ac00320393e5e0fb /lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
parent	1abb7bc7e917771a80cd7788ee37ba7dab98f183 (diff)
download	llvm-3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d.tar.gz llvm-3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d.tar.bz2 llvm-3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d.tar.xz