summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJustin Holewinski <jholewinski@nvidia.com>2013-07-22 12:18:04 +0000
committerJustin Holewinski <jholewinski@nvidia.com>2013-07-22 12:18:04 +0000
commit3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d (patch)
treebb08127142c401b1737718f4ac00320393e5e0fb /lib
parent1abb7bc7e917771a80cd7788ee37ba7dab98f183 (diff)
downloadllvm-3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d.tar.gz
llvm-3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d.tar.bz2
llvm-3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d.tar.xz
[NVPTX] Use approximate FP ops when unsafe-fp-math is used, and append
.ftz to instructions if the nvptx-f32ftz attribute is set to "true" git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186820 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp67
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h26
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td24
3 files changed, 61 insertions, 56 deletions
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index b613587f2d..ba85e35a73 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -25,11 +25,6 @@
using namespace llvm;
-static cl::opt<bool> UseFMADInstruction(
- "nvptx-mad-enable", cl::ZeroOrMore,
- cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
- cl::init(false));
-
static cl::opt<int>
FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
@@ -47,6 +42,12 @@ UsePrecSqrtF32("nvptx-prec-sqrtf32",
cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
cl::init(true));
+static cl::opt<bool>
+FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
+ cl::init(false));
+
+
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
@@ -58,12 +59,7 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel),
Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
- // Always do fma.f32 fpcontract if the target supports the instruction.
- // Always do fma.f64 fpcontract if the target supports the instruction.
- // Do mad.f32 is nvptx-mad-enable is specified and the target does not
- // support fma.f32.
- doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
doFMAF32AGG =
@@ -71,20 +67,51 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
doFMAF64AGG =
(OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
- allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
-
- UseF32FTZ = false;
+ allowFMA = (FMAContractLevel >= 1);
doMulWide = (OptLevel > 0);
+}
- // Decide how to translate f32 div
- do_DIVF32_PREC = UsePrecDivF32;
- // Decide how to translate f32 sqrt
- do_SQRTF32_PREC = UsePrecSqrtF32;
- // sm less than sm_20 does not support div.rnd. Use div.full.
- if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
- do_DIVF32_PREC = 1;
+int NVPTXDAGToDAGISel::getDivF32Level() const {
+ if (UsePrecDivF32.getNumOccurrences() > 0) {
+ // If nvptx-prec-div32=N is used on the command-line, always honor it
+ return UsePrecDivF32;
+ } else {
+ // Otherwise, use div.approx if fast math is enabled
+ if (TM.Options.UnsafeFPMath)
+ return 0;
+ else
+ return 2;
+ }
+}
+bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
+ if (UsePrecSqrtF32.getNumOccurrences() > 0) {
+ // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
+ return UsePrecSqrtF32;
+ } else {
+ // Otherwise, use sqrt.approx if fast math is enabled
+ if (TM.Options.UnsafeFPMath)
+ return false;
+ else
+ return true;
+ }
+}
+
+bool NVPTXDAGToDAGISel::useF32FTZ() const {
+ if (FtzEnabled.getNumOccurrences() > 0) {
+ // If nvptx-f32ftz is used on the command-line, always honor it
+ return FtzEnabled;
+ } else {
+ const Function *F = MF->getFunction();
+ // Otherwise, check for an nvptx-f32ftz attribute on the function
+ if (F->hasFnAttribute("nvptx-f32ftz"))
+ return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
+ "nvptx-f32ftz")
+ .getValueAsString() == "true");
+ else
+ return false;
+ }
}
/// Select - Select instructions not customized! Used for
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 428e7b2288..d961e50145 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -28,38 +28,22 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
// If true, generate corresponding FPCONTRACT. This is
// language dependent (i.e. CUDA and OpenCL works differently).
- bool doFMADF32;
bool doFMAF64;
bool doFMAF32;
bool doFMAF64AGG;
bool doFMAF32AGG;
bool allowFMA;
- // 0: use div.approx
- // 1: use div.full
- // 2: For sm_20 and later, ieee-compliant div.rnd.f32 can be generated;
- // Otherwise, use div.full
- int do_DIVF32_PREC;
-
- // If true, generate sqrt.rn, else generate sqrt.approx. If FTZ
- // is true, then generate the corresponding FTZ version.
- bool do_SQRTF32_PREC;
-
- // If true, add .ftz to f32 instructions.
- // This is only meaningful for sm_20 and later, as the default
- // is not ftz.
- // For sm earlier than sm_20, f32 denorms are always ftz by the
- // hardware.
- // We always add the .ftz modifier regardless of the sm value
- // when Use32FTZ is true.
- bool UseF32FTZ;
-
// If true, generate mul.wide from sext and mul
bool doMulWide;
+ int getDivF32Level() const;
+ bool usePrecSqrtF32() const;
+ bool useF32FTZ() const;
+
public:
explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
- CodeGenOpt::Level OptLevel);
+ CodeGenOpt::Level OptLevel);
// Pass Name
virtual const char *getPassName() const {
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index e6335a0d8e..8ce16e9d1c 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -136,28 +136,26 @@ def hasLDG : Predicate<"Subtarget.hasLDG()">;
def hasLDU : Predicate<"Subtarget.hasLDU()">;
def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
-def doF32FTZ : Predicate<"UseF32FTZ==1">;
-def doNoF32FTZ : Predicate<"UseF32FTZ==0">;
+def doF32FTZ : Predicate<"useF32FTZ()">;
+def doNoF32FTZ : Predicate<"!useF32FTZ()">;
def doFMAF32 : Predicate<"doFMAF32">;
-def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">;
+def doFMAF32_ftz : Predicate<"(doFMAF32 && useF32FTZ())">;
def doFMAF32AGG : Predicate<"doFMAF32AGG">;
-def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
+def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && useF32FTZ())">;
def doFMAF64 : Predicate<"doFMAF64">;
def doFMAF64AGG : Predicate<"doFMAF64AGG">;
-def doFMADF32 : Predicate<"doFMADF32">;
-def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
def doMulWide : Predicate<"doMulWide">;
def allowFMA : Predicate<"allowFMA">;
-def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
+def allowFMA_ftz : Predicate<"(allowFMA && useF32FTZ())">;
-def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
-def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
+def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
+def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
-def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">;
-def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">;
+def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
+def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
@@ -864,8 +862,6 @@ multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
// If we reverse the order of the following two lines, then rrr2 rule will be
// generated for FMA32, but not for rrr.
// Therefore, we manually write the rrr2 rule in FPCONTRACT32.
-defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
-defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
@@ -904,8 +900,6 @@ multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
-defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
-defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),