summaryrefslogtreecommitdiff
path: root/lib/Target/R600/SIInstructions.td
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2014-06-24 22:13:39 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2014-06-24 22:13:39 +0000
commit95eb45c5d94bfe9360ffc021697a50c6cf8c08cd (patch)
treeccff569afcf67b58b968ce88f204ad9641431949 /lib/Target/R600/SIInstructions.td
parent0029534141ee83d827267d4ac9418bbed5704c2e (diff)
downloadllvm-95eb45c5d94bfe9360ffc021697a50c6cf8c08cd.tar.gz
llvm-95eb45c5d94bfe9360ffc021697a50c6cf8c08cd.tar.bz2
llvm-95eb45c5d94bfe9360ffc021697a50c6cf8c08cd.tar.xz
R600: Fix inconsistency in rsq instructions.
R600 was using a clamped version of rsq, but SI was not. Add a new rsq_clamped intrinsic and use them consistently. It's unclear to me from the documentation what behavior the R600 instructions have, so I assume they have the legacy behavior described by the SI documents. For R600, use RECIPSQRT_IEEE for both llvm.AMDGPU.rsq.legacy and llvm.AMDGPU.rsq. R600 also has RECIPSQRT_FF, which I'm not sure how it fits in here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211637 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/R600/SIInstructions.td')
-rw-r--r--lib/Target/R600/SIInstructions.td18
1 files changed, 11 insertions, 7 deletions
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 72c51a11fa..507cfe8e81 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1123,22 +1123,26 @@ defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
[(set f32:$dst, (AMDGPUrcp f32:$src0))]
>;
defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
-defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
+defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32",
+ [(set f32:$dst, (AMDGPUrsq_clamped f32:$src0))]
+>;
defm V_RSQ_LEGACY_F32 : VOP1_32 <
0x0000002d, "V_RSQ_LEGACY_F32",
- [(set f32:$dst, (AMDGPUrsq f32:$src0))]
+ [(set f32:$dst, (AMDGPUrsq_legacy f32:$src0))]
>;
defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32",
- [(set f32:$dst, (fdiv FP_ONE, (fsqrt f32:$src0)))]
+ [(set f32:$dst, (AMDGPUrsq f32:$src0))]
>;
defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
[(set f64:$dst, (AMDGPUrcp f64:$src0))]
>;
defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64",
- [(set f64:$dst, (fdiv FP_ONE, (fsqrt f64:$src0)))]
+ [(set f64:$dst, (AMDGPUrsq f64:$src0))]
+>;
+defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64",
+ [(set f64:$dst, (AMDGPUrsq_clamped f64:$src0))]
>;
-defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32",
[(set f32:$dst, (fsqrt f32:$src0))]
>;
@@ -1781,8 +1785,8 @@ def : Pat <
def : RcpPat<V_RCP_F32_e32, f32>;
def : RcpPat<V_RCP_F64_e32, f64>;
-def : RsqPat<V_RSQ_F32_e32, f32>;
-def : RsqPat<V_RSQ_F64_e32, f64>;
+defm : RsqPat<V_RSQ_F32_e32, f32>;
+defm : RsqPat<V_RSQ_F64_e32, f64>;
//===----------------------------------------------------------------------===//
// VOP2 Patterns