diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-23 18:00:38 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-23 18:00:38 +0000 |
commit | c4471e92484328a42cfa608272ce3d214f91b285 (patch) | |
tree | 1f4f552325ed17dc3a9fe92c149affcb26b54c04 | |
parent | e564b6ed79a6ef44c63001e3d722e30ec90e1e02 (diff) | |
download | llvm-c4471e92484328a42cfa608272ce3d214f91b285.tar.gz llvm-c4471e92484328a42cfa608272ce3d214f91b285.tar.bz2 llvm-c4471e92484328a42cfa608272ce3d214f91b285.tar.xz |
R600/SI: Handle i64 sub.
We can handle it the same way as add
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211514 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 33 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 1 | ||||
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/sub.ll | 50 |
5 files changed, 58 insertions, 30 deletions
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index ce498e79e2..85d0f9dd69 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -85,7 +85,7 @@ private: bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); - SDNode *SelectADD_I64(SDNode *N); + SDNode *SelectADD_SUB_I64(SDNode *N); // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" @@ -214,12 +214,13 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { // We are selecting i64 ADD here instead of custom lower it during // DAG legalization, so we can fold some i64 ADDs used for address // calculation into the LOAD and STORE instructions. - case ISD::ADD: { + case ISD::ADD: + case ISD::SUB: { if (N->getValueType(0) != MVT::i64 || ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) break; - return SelectADD_I64(N); + return SelectADD_SUB_I64(N); } case ISD::SCALAR_TO_VECTOR: case AMDGPUISD::BUILD_VERTICAL_VECTOR: @@ -646,11 +647,13 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, return true; } -SDNode *AMDGPUDAGToDAGISel::SelectADD_I64(SDNode *N) { +SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { SDLoc DL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + bool IsAdd = (N->getOpcode() == ISD::ADD); + SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); @@ -665,16 +668,22 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_I64(SDNode *N) { DL, MVT::i32, RHS, Sub1); SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); - SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; - SDNode *AddLo = CurDAG->getMachineNode( - isCFDepth0() ? AMDGPU::S_ADD_I32 : AMDGPU::V_ADD_I32_e32, - DL, VTList, AddLoArgs); - SDValue Carry = SDValue(AddLo, 1); - SDNode *AddHi = CurDAG->getMachineNode( - isCFDepth0() ? AMDGPU::S_ADDC_U32 : AMDGPU::V_ADDC_U32_e32, - DL, MVT::i32, SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); + + unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32; + unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; + + if (!isCFDepth0()) { + Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32; + CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32; + } + + SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); + SDValue Carry(AddLo, 1); + SDNode *AddHi + = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, + SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); SDValue Args[5] = { CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 2707a9fd5e..f2cda63e90 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -264,7 +264,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i64, Expand); setOperationAction(ISD::MULHS, MVT::i64, Expand); - setOperationAction(ISD::SUB, MVT::i64, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index fda1ac7321..8c083262f9 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -152,6 +152,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); + setOperationAction(ISD::SUB, MVT::i64, Expand); + // These should be replaced by UDVIREM, but it does not happen automatically // during Type Legalization setOperationAction(ISD::UDIV, MVT::i64, Custom); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index cd23cd2a4e..793053a9e7 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -77,6 +77,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::ADD, MVT::i32, Legal); setOperationAction(ISD::ADDC, MVT::i32, Legal); setOperationAction(ISD::ADDE, MVT::i32, Legal); + setOperationAction(ISD::SUBC, MVT::i32, Legal); + setOperationAction(ISD::SUBE, MVT::i32, Legal); // We need to custom lower vector stores from local memory setOperationAction(ISD::LOAD, MVT::v2i32, Custom); diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll index 58523d068e..8e64148142 100644 --- a/test/CodeGen/R600/sub.ll +++ b/test/CodeGen/R600/sub.ll @@ -1,5 +1,7 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +;RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() readnone ;FUNC-LABEL: @test2 ;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -37,23 +39,37 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { ret void } -;FUNC_LABEL: @test5 +; FUNC-LABEL: @s_sub_i64: +; SI: S_SUB_I32 +; SI: S_SUBB_U32 -;EG-DAG: SETGE_UINT -;EG-DAG: CNDE_INT -;EG-DAG: SUB_INT -;EG-DAG: SUB_INT -;EG-DAG: SUB_INT +; EG-DAG: SETGE_UINT +; EG-DAG: CNDE_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind { + %result = sub i64 %a, %b + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} -;SI: S_NOT_B64 -;SI-DAG: S_ADD_I32 -;SI-DAG: S_ADDC_U32 -;SI-DAG: S_ADD_I32 -;SI-DAG: S_ADDC_U32 +; FUNC-LABEL: @v_sub_i64: +; SI: V_SUB_I32_e32 +; SI: V_SUBB_U32_e32 -define void @test5(i64 addrspace(1)* %out, i64 %a, i64 %b) { -entry: - %0 = sub i64 %a, %b - store i64 %0, i64 addrspace(1)* %out +; EG-DAG: SETGE_UINT +; EG-DAG: CNDE_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() readnone + %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid + %a = load i64 addrspace(1)* %a_ptr + %b = load i64 addrspace(1)* %b_ptr + %result = sub i64 %a, %b + store i64 %result, i64 addrspace(1)* %out, align 8 ret void } |