diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-23 18:28:28 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-23 18:28:28 +0000 |
commit | ed143b7c0cca568a7dc35e91d2e37207f79b8d76 (patch) | |
tree | 04012227cf8222bd32367926f86520c2277c7496 | |
parent | d88f5b71c79ae77a41218ad3e9c5491caa2c766f (diff) | |
download | llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.gz llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.bz2 llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.xz |
R600/SI: Fix div_scale intrinsic.
The operand that must match one of the others does matter,
and implement selecting for it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211523 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/IR/IntrinsicsR600.td | 7 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 28 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 16 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.td | 16 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 6 | ||||
-rw-r--r-- | test/CodeGen/R600/llvm.AMDGPU.div_scale.ll | 47 |
6 files changed, 104 insertions, 16 deletions
diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td index 89b3ef3636..2566cc2561 100644 --- a/include/llvm/IR/IntrinsicsR600.td +++ b/include/llvm/IR/IntrinsicsR600.td @@ -38,8 +38,13 @@ defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < let TargetPrefix = "AMDGPU" in { def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">, + // 1st parameter: Numerator + // 2nd parameter: Denominator + // 3rd parameter: Constant to select select between first and + // second. (0 = first, 1 = second). Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], + [IntrNoMem]>; def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">, Intrinsic<[llvm_anyfloat_ty], diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 85d0f9dd69..37071bc3f1 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -86,6 +86,7 @@ private: bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); SDNode *SelectADD_SUB_I64(SDNode *N); + SDNode *SelectDIV_SCALE(SDNode *N); // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" @@ -454,6 +455,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { PackedOffsetWidth); } + case AMDGPUISD::DIV_SCALE: { + return SelectDIV_SCALE(N); + } } return SelectCode(N); } @@ -695,6 +699,30 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); } +SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { + SDLoc SL(N); + EVT VT = N->getValueType(0); + + assert(VT == MVT::f32 || VT == MVT::f64); + + unsigned Opc + = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; + + const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); + + SDValue Ops[] = { + N->getOperand(0), + N->getOperand(1), + N->getOperand(2), + Zero, + Zero, + Zero, + Zero + }; + + return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 6ff1703c91..ca8d0a1626 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -771,9 +771,21 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::CLAMP, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - case Intrinsic::AMDGPU_div_scale: + case Intrinsic::AMDGPU_div_scale: { + // 3rd parameter required to be a constant. + const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3)); + if (!Param) + return DAG.getUNDEF(VT); + + // Translate to the operands expected by the machine instruction. The + // first parameter must be the same as the first instruction. + SDValue Numerator = Op.getOperand(1); + SDValue Denominator = Op.getOperand(2); + SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator; + return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT, - Op.getOperand(1), Op.getOperand(2)); + Src0, Denominator, Numerator); + } case Intrinsic::AMDGPU_div_fmas: return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT, diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index c4994a250a..dd9a6dc527 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -433,6 +433,22 @@ class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 < opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern >, VOP <opName>; + +class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc, + string opName, list<dag> pattern> : VOP3 < + op, (outs vrc:$dst0, SReg_64:$dst1), + (ins arc:$src0, arc:$src1, arc:$src2, + InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), + opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern +>, VOP <opName>; + + +class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> : + VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>; + +class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> : + VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>; + //===----------------------------------------------------------------------===// // Vector I/O classes //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index f888b8e6e0..6a29dbe77d 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1455,8 +1455,10 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; } // isCommutable = 1 -defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; -def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; +def V_DIV_SCALE_F32 : VOP3b_32 <0x0000016d, "V_DIV_SCALE_F32", []>; + +// Double precision division pre-scale. +def V_DIV_SCALE_F64 : VOP3b_64 <0x0000016e, "V_DIV_SCALE_F64", []>; defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))] diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll index 1bcbe2f859..527c8da10a 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll @@ -1,23 +1,48 @@ -; XFAIL: * ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -declare float @llvm.AMDGPU.div.scale.f32(float, float) nounwind readnone -declare double @llvm.AMDGPU.div.scale.f64(double, double) nounwind readnone +declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone +declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone -; SI-LABEL @test_div_scale_f32: -define void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind { +; SI-LABEL @test_div_scale_f32_1: +; SI: V_DIV_SCALE_F32 +define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind { %a = load float addrspace(1)* %aptr, align 4 %b = load float addrspace(1)* %bptr, align 4 - %result = call float @llvm.AMDGPU.div.scale.f32(float %a, float %b) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone + %result0 = extractvalue { float, i1 } %result, 0 + store float %result0, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL @test_div_scale_f64: -define void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr) nounwind { +; SI-LABEL @test_div_scale_f32_2: +; SI: V_DIV_SCALE_F32 +define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind { + %a = load float addrspace(1)* %aptr, align 4 + %b = load float addrspace(1)* %bptr, align 4 + %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone + %result0 = extractvalue { float, i1 } %result, 0 + store float %result0, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL @test_div_scale_f64_1: +; SI: V_DIV_SCALE_F64 +define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind { + %a = load double addrspace(1)* %aptr, align 8 + %b = load double addrspace(1)* %bptr, align 8 + %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone + %result0 = extractvalue { double, i1 } %result, 0 + store double %result0, double addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL @test_div_scale_f64_1: +; SI: V_DIV_SCALE_F64 +define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind { %a = load double addrspace(1)* %aptr, align 8 %b = load double addrspace(1)* %bptr, align 8 - %result = call double @llvm.AMDGPU.div.scale.f64(double %a, double %b) nounwind readnone - store double %result, double addrspace(1)* %out, align 8 + %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone + %result0 = extractvalue { double, i1 } %result, 0 + store double %result0, double addrspace(1)* %out, align 8 ret void } |