diff options
author | Jan Vesely <jan.vesely@rutgers.edu> | 2014-06-22 21:43:00 +0000 |
---|---|---|
committer | Jan Vesely <jan.vesely@rutgers.edu> | 2014-06-22 21:43:00 +0000 |
commit | cd88535ab984f8d271a895cb6d857105f64fce3d (patch) | |
tree | 937b03db480aaeb5b9fd415d9b3fbf36cd33a08e /lib/Target | |
parent | 728ea0c91b40a8d95dc271522dd24d30f9662941 (diff) | |
download | llvm-cd88535ab984f8d271a895cb6d857105f64fce3d.tar.gz llvm-cd88535ab984f8d271a895cb6d857105f64fce3d.tar.bz2 llvm-cd88535ab984f8d271a895cb6d857105f64fce3d.tar.xz |
R600: Implement custom SDIVREM.
Instead of separate SDIV/SREM. SDIV used UDIV which in turn used UDIVREM anyway.
SREM used SDIV(UDIV->UDIVREM)+MUL+SUB, using UDIVREM directly is more efficient.
v2: Don't use all caps names
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211477 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 47 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.h | 1 |
2 files changed, 44 insertions, 4 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index f248753bdb..8145eb23b8 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -234,10 +234,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; for (MVT VT : ScalarIntVTs) { setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::SDIV, VT, Custom); + setOperationAction(ISD::SDIV, VT, Expand); // GPU does not have divrem function for signed or unsigned. - setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Custom); setOperationAction(ISD::UDIVREM, VT, Custom); // GPU does not have [S|U]MUL_LOHI functions as a single instruction. @@ -291,13 +291,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::SINT_TO_FP, VT, Expand); setOperationAction(ISD::UINT_TO_FP, VT, Expand); // TODO: Implement custom UREM / SREM routines. - setOperationAction(ISD::SDIV, VT, Custom); + setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); - setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Custom); setOperationAction(ISD::UDIVREM, VT, Custom); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); @@ -504,6 +504,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::SDIV: return LowerSDIV(Op, DAG); case ISD::SREM: return LowerSREM(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); + case ISD::SDIVREM: return LowerSDIVREM(Op, DAG); case ISD::FCEIL: return LowerFCEIL(Op, DAG); case ISD::FTRUNC: return LowerFTRUNC(Op, DAG); case ISD::FRINT: return LowerFRINT(Op, DAG); @@ -1612,6 +1613,44 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, return DAG.getMergeValues(Ops, DL); } +SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + SDValue Zero = DAG.getConstant(0, VT); + SDValue NegOne = DAG.getConstant(-1, VT); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT); + SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT); + SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign); + SDValue RSign = LHSign; // Remainder sign is the same as LHS + + LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign); + RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign); + + LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign); + RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign); + + SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS); + SDValue Rem = Div.getValue(1); + + Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign); + Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign); + + Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign); + Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign); + + SDValue Res[2] = { + Div, + Rem + }; + return DAG.getMergeValues(Res, DL); +} + SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index e2000a04ba..0db89ecb57 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -50,6 +50,7 @@ private: SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; |