R600/SI: Fix div_scale intrinsic.

The operand that must match one of the others does matter, and implement selecting for it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211523 91177308-0d34-0410-b5e6-96231b3b80d8
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2014-06-23 18:28:28 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2014-06-23 18:28:28 +0000
commit: ed143b7c0cca568a7dc35e91d2e37207f79b8d76 (patch)
tree: 04012227cf8222bd32367926f86520c2277c7496 /lib
parent: d88f5b71c79ae77a41218ad3e9c5491caa2c766f (diff)
download: llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.gz
llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.bz2
llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.xz
4 files changed, 62 insertions, 4 deletions
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 85d0f9dd69..37071bc3f1 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -86,6 +86,7 @@ private:
   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
 
   SDNode *SelectADD_SUB_I64(SDNode *N);
+  SDNode *SelectDIV_SCALE(SDNode *N);
 
   // Include the pieces autogenerated from the target description.
 #include "AMDGPUGenDAGISel.inc"
@@ -454,6 +455,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
                                   PackedOffsetWidth);
 
   }
+  case AMDGPUISD::DIV_SCALE: {
+    return SelectDIV_SCALE(N);
+  }
   }
   return SelectCode(N);
 }
@@ -695,6 +699,30 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
   return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
 }
 
+SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
+  SDLoc SL(N);
+  EVT VT = N->getValueType(0);
+
+  assert(VT == MVT::f32 || VT == MVT::f64);
+
+  unsigned Opc
+    = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
+
+  const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+
+  SDValue Ops[] = {
+    N->getOperand(0),
+    N->getOperand(1),
+    N->getOperand(2),
+    Zero,
+    Zero,
+    Zero,
+    Zero
+  };
+
+  return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+}
+
 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
   const AMDGPUTargetLowering& Lowering =
     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 6ff1703c91..ca8d0a1626 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -771,9 +771,21 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
                          Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
 
-    case Intrinsic::AMDGPU_div_scale:
+    case Intrinsic::AMDGPU_div_scale: {
+      // 3rd parameter required to be a constant.
+      const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+      if (!Param)
+        return DAG.getUNDEF(VT);
+
+      // Translate to the operands expected by the machine instruction. The
+      // first parameter must be the same as the first instruction.
+      SDValue Numerator = Op.getOperand(1);
+      SDValue Denominator = Op.getOperand(2);
+      SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
+
       return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
-                         Op.getOperand(1), Op.getOperand(2));
+                         Src0, Denominator, Numerator);
+    }
 
     case Intrinsic::AMDGPU_div_fmas:
       return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index c4994a250a..dd9a6dc527 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -433,6 +433,22 @@ class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
   opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern
 >, VOP <opName>;
 
+
+class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc,
+                    string opName, list<dag> pattern> : VOP3 <
+  op, (outs vrc:$dst0, SReg_64:$dst1),
+  (ins arc:$src0, arc:$src1, arc:$src2,
+   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
+  opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+>, VOP <opName>;
+
+
+class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> :
+  VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+
+class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> :
+  VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+
 //===----------------------------------------------------------------------===//
 // Vector I/O classes
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index f888b8e6e0..6a29dbe77d 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1455,8 +1455,10 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
 
 } // isCommutable = 1
 
-defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
-def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
+def V_DIV_SCALE_F32 : VOP3b_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+
+// Double precision division pre-scale.
+def V_DIV_SCALE_F64 : VOP3b_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
 
 defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32",
   [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2014-06-23 18:28:28 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2014-06-23 18:28:28 +0000
commit	ed143b7c0cca568a7dc35e91d2e37207f79b8d76 (patch)
tree	04012227cf8222bd32367926f86520c2277c7496 /lib
parent	d88f5b71c79ae77a41218ad3e9c5491caa2c766f (diff)
download	llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.gz llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.bz2 llvm-ed143b7c0cca568a7dc35e91d2e37207f79b8d76.tar.xz