diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-24 22:13:39 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-24 22:13:39 +0000 |
commit | 95eb45c5d94bfe9360ffc021697a50c6cf8c08cd (patch) | |
tree | ccff569afcf67b58b968ce88f204ad9641431949 /lib/Target/R600/SIInstructions.td | |
parent | 0029534141ee83d827267d4ac9418bbed5704c2e (diff) | |
download | llvm-95eb45c5d94bfe9360ffc021697a50c6cf8c08cd.tar.gz llvm-95eb45c5d94bfe9360ffc021697a50c6cf8c08cd.tar.bz2 llvm-95eb45c5d94bfe9360ffc021697a50c6cf8c08cd.tar.xz |
R600: Fix inconsistency in rsq instructions.
R600 was using a clamped version of rsq, but SI was not. Add a
new rsq_clamped intrinsic and use them consistently.
It's unclear to me from the documentation what behavior
the R600 instructions have, so I assume they have the legacy behavior
described by the SI documents. For R600, use RECIPSQRT_IEEE
for both llvm.AMDGPU.rsq.legacy and llvm.AMDGPU.rsq. R600 also
has RECIPSQRT_FF, which I'm not sure how it fits in here.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211637 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/R600/SIInstructions.td')
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 18 |
1 files changed, 11 insertions, 7 deletions
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 72c51a11fa..507cfe8e81 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1123,22 +1123,26 @@ defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", [(set f32:$dst, (AMDGPUrcp f32:$src0))] >; defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>; -defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>; +defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", + [(set f32:$dst, (AMDGPUrsq_clamped f32:$src0))] +>; defm V_RSQ_LEGACY_F32 : VOP1_32 < 0x0000002d, "V_RSQ_LEGACY_F32", - [(set f32:$dst, (AMDGPUrsq f32:$src0))] + [(set f32:$dst, (AMDGPUrsq_legacy f32:$src0))] >; defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", - [(set f32:$dst, (fdiv FP_ONE, (fsqrt f32:$src0)))] + [(set f32:$dst, (AMDGPUrsq f32:$src0))] >; defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", [(set f64:$dst, (AMDGPUrcp f64:$src0))] >; defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", - [(set f64:$dst, (fdiv FP_ONE, (fsqrt f64:$src0)))] + [(set f64:$dst, (AMDGPUrsq f64:$src0))] +>; +defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", + [(set f64:$dst, (AMDGPUrsq_clamped f64:$src0))] >; -defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", [(set f32:$dst, (fsqrt f32:$src0))] >; @@ -1781,8 +1785,8 @@ def : Pat < def : RcpPat<V_RCP_F32_e32, f32>; def : RcpPat<V_RCP_F64_e32, f64>; -def : RsqPat<V_RSQ_F32_e32, f32>; -def : RsqPat<V_RSQ_F64_e32, f64>; +defm : RsqPat<V_RSQ_F32_e32, f32>; +defm : RsqPat<V_RSQ_F64_e32, f64>; //===----------------------------------------------------------------------===// // VOP2 Patterns |