diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-23 18:28:28 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-23 18:28:28 +0000 |
commit | ed143b7c0cca568a7dc35e91d2e37207f79b8d76 (patch) | |
tree | 04012227cf8222bd32367926f86520c2277c7496 /lib | |
parent | d88f5b71c79ae77a41218ad3e9c5491caa2c766f (diff) | |
download | llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.gz llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.bz2 llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.xz |
R600/SI: Fix div_scale intrinsic.
The operand that must match one of the others does matter,
and implement selecting for it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211523 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 28 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 16 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.td | 16 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 6 |
4 files changed, 62 insertions, 4 deletions
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 85d0f9dd69..37071bc3f1 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -86,6 +86,7 @@ private: bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); SDNode *SelectADD_SUB_I64(SDNode *N); + SDNode *SelectDIV_SCALE(SDNode *N); // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" @@ -454,6 +455,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { PackedOffsetWidth); } + case AMDGPUISD::DIV_SCALE: { + return SelectDIV_SCALE(N); + } } return SelectCode(N); } @@ -695,6 +699,30 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); } +SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { + SDLoc SL(N); + EVT VT = N->getValueType(0); + + assert(VT == MVT::f32 || VT == MVT::f64); + + unsigned Opc + = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; + + const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); + + SDValue Ops[] = { + N->getOperand(0), + N->getOperand(1), + N->getOperand(2), + Zero, + Zero, + Zero, + Zero + }; + + return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 6ff1703c91..ca8d0a1626 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -771,9 +771,21 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::CLAMP, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - case Intrinsic::AMDGPU_div_scale: + case Intrinsic::AMDGPU_div_scale: { + // 3rd parameter required to be a constant. + const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3)); + if (!Param) + return DAG.getUNDEF(VT); + + // Translate to the operands expected by the machine instruction. The + // first parameter must be the same as the first instruction. + SDValue Numerator = Op.getOperand(1); + SDValue Denominator = Op.getOperand(2); + SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator; + return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT, - Op.getOperand(1), Op.getOperand(2)); + Src0, Denominator, Numerator); + } case Intrinsic::AMDGPU_div_fmas: return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT, diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index c4994a250a..dd9a6dc527 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -433,6 +433,22 @@ class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 < opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern >, VOP <opName>; + +class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc, + string opName, list<dag> pattern> : VOP3 < + op, (outs vrc:$dst0, SReg_64:$dst1), + (ins arc:$src0, arc:$src1, arc:$src2, + InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), + opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern +>, VOP <opName>; + + +class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> : + VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>; + +class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> : + VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>; + //===----------------------------------------------------------------------===// // Vector I/O classes //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index f888b8e6e0..6a29dbe77d 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1455,8 +1455,10 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; } // isCommutable = 1 -defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; -def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; +def V_DIV_SCALE_F32 : VOP3b_32 <0x0000016d, "V_DIV_SCALE_F32", []>; + +// Double precision division pre-scale. +def V_DIV_SCALE_F64 : VOP3b_64 <0x0000016e, "V_DIV_SCALE_F64", []>; defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))] |