summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJustin Holewinski <justin.holewinski@gmail.com>2011-03-10 16:57:18 +0000
committerJustin Holewinski <justin.holewinski@gmail.com>2011-03-10 16:57:18 +0000
commitfca9efcbc4914603af1fd1cbf2a76a468a9ecf78 (patch)
tree0dfedf4743de907f559cbcdebf3b884c087da42d /lib
parent7deb187736b09aa0805b7d9902f499e41feefccc (diff)
downloadllvm-fca9efcbc4914603af1fd1cbf2a76a468a9ecf78.tar.gz
llvm-fca9efcbc4914603af1fd1cbf2a76a468a9ecf78.tar.bz2
llvm-fca9efcbc4914603af1fd1cbf2a76a468a9ecf78.tar.xz
PTX: Add preliminary support for floating-point divide and multiply-and-add
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127410 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/PTX/PTXInstrInfo.td101
-rw-r--r--lib/Target/PTX/PTXSubtarget.h8
2 files changed, 104 insertions, 5 deletions
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index bc155736ad..065799430c 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -21,9 +21,22 @@ include "PTXInstrFormats.td"
// Code Generation Predicates
//===----------------------------------------------------------------------===//
+// Addressing
def Use32BitAddresses : Predicate<"!getSubtarget().use64BitAddresses()">;
def Use64BitAddresses : Predicate<"getSubtarget().use64BitAddresses()">;
+// Shader Model Support
+def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">;
+def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">;
+def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">;
+def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">;
+
+// PTX Version Support
+def SupportsPTX20 : Predicate<"getSubtarget().supportsPTX20()">;
+def DoesNotSupportPTX20 : Predicate<"!getSubtarget().supportsPTX20()">;
+def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">;
+def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
+
//===----------------------------------------------------------------------===//
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
@@ -165,8 +178,8 @@ def PTXret
// Instruction Class Templates
//===----------------------------------------------------------------------===//
-// Three-operand floating-point instruction template
-multiclass FLOAT3<string opcstr, SDNode opnode> {
+//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
+multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
def rr32 : InstPTX<(outs RRegf32:$d),
(ins RRegf32:$a, RRegf32:$b),
!strconcat(opcstr, ".f32\t$d, $a, $b"),
@@ -185,6 +198,34 @@ multiclass FLOAT3<string opcstr, SDNode opnode> {
[(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
}
+//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
+multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
+ def rrr32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, RRegf32:$b, RRegf32:$c),
+ !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+ [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
+ RRegf32:$b),
+ RRegf32:$c))]>;
+ def rri32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, RRegf32:$b, f32imm:$c),
+ !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+ [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
+ RRegf32:$b),
+ fpimm:$c))]>;
+ def rrr64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, RRegf64:$b, RRegf64:$c),
+ !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+ [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
+ RRegf64:$b),
+ RRegf64:$c))]>;
+ def rri64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, RRegf64:$b, f64imm:$c),
+ !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+ [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
+ RRegf64:$b),
+ fpimm:$c))]>;
+}
+
multiclass INT3<string opcstr, SDNode opnode> {
def rr16 : InstPTX<(outs RRegu16:$d),
(ins RRegu16:$a, RRegu16:$b),
@@ -304,9 +345,59 @@ multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
///===- Floating-Point Arithmetic Instructions ----------------------------===//
-defm FADD : FLOAT3<"add", fadd>;
-defm FSUB : FLOAT3<"sub", fsub>;
-defm FMUL : FLOAT3<"mul", fmul>;
+// Standard Binary Operations
+defm FADD : PTX_FLOAT_3OP<"add", fadd>;
+defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
+defm FMUL : PTX_FLOAT_3OP<"mul", fmul>;
+
+// TODO: Allow user selection of rounding modes for fdiv.
+// For division, we need to have f32 and f64 differently.
+// For f32, we just always use .approx since it is supported on all hardware
+// for PTX 1.4+, which is our minimum target.
+def FDIVrr32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, RRegf32:$b),
+ "div.approx.f32\t$d, $a, $b",
+ [(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>;
+def FDIVri32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, f32imm:$b),
+ "div.approx.f32\t$d, $a, $b",
+ [(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>;
+
+// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0.
+def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, RRegf64:$b),
+ "div.rn.f64\t$d, $a, $b",
+ [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
+ Requires<[SupportsSM13]>;
+def FDIVri64SM13 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, f64imm:$b),
+ "div.rn.f64\t$d, $a, $b",
+ [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
+ Requires<[SupportsSM13]>;
+def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, RRegf64:$b),
+ "div.f64\t$d, $a, $b",
+ [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
+ Requires<[DoesNotSupportSM13]>;
+def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, f64imm:$b),
+ "div.f64\t$d, $a, $b",
+ [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
+ Requires<[DoesNotSupportSM13]>;
+
+
+
+// Multi-operation hybrid instructions
+
+// The selection of mad/fma is tricky. In some cases, they are the *same*
+// instruction, but in other cases we may prefer one or the other. Also,
+// different PTX versions differ on whether rounding mode flags are required.
+// In the short term, mad is supported on all PTX versions and we use a
+// default rounding mode no matter what shader model or PTX version.
+// TODO: Allow the rounding mode to be selectable through llc.
+defm FMAD : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>;
+
+
///===- Integer Arithmetic Instructions -----------------------------------===//
diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h
index 23aa3a349f..19a870d4ce 100644
--- a/lib/Target/PTX/PTXSubtarget.h
+++ b/lib/Target/PTX/PTXSubtarget.h
@@ -54,6 +54,14 @@ namespace llvm {
bool use64BitAddresses() const { return Use64BitAddresses; }
+ bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
+
+ bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
+
+ bool supportsPTX20() const { return PTXVersion >= PTX_VERSION_2_0; }
+
+ bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
+
std::string ParseSubtargetFeatures(const std::string &FS,
const std::string &CPU);
}; // class PTXSubtarget