summaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2014-06-22 21:43:00 +0000
committerJan Vesely <jan.vesely@rutgers.edu>2014-06-22 21:43:00 +0000
commitcd88535ab984f8d271a895cb6d857105f64fce3d (patch)
tree937b03db480aaeb5b9fd415d9b3fbf36cd33a08e /lib/Target
parent728ea0c91b40a8d95dc271522dd24d30f9662941 (diff)
downloadllvm-cd88535ab984f8d271a895cb6d857105f64fce3d.tar.gz
llvm-cd88535ab984f8d271a895cb6d857105f64fce3d.tar.bz2
llvm-cd88535ab984f8d271a895cb6d857105f64fce3d.tar.xz
R600: Implement custom SDIVREM.
Instead of separate SDIV/SREM. SDIV used UDIV which in turn used UDIVREM anyway. SREM used SDIV(UDIV->UDIVREM)+MUL+SUB, using UDIVREM directly is more efficient. v2: Don't use all caps names Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211477 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp47
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h1
2 files changed, 44 insertions, 4 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index f248753bdb..8145eb23b8 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -234,10 +234,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs) {
setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::SDIV, VT, Expand);
// GPU does not have divrem function for signed or unsigned.
- setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Custom);
setOperationAction(ISD::UDIVREM, VT, Custom);
// GPU does not have [S|U]MUL_LOHI functions as a single instruction.
@@ -291,13 +291,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
// TODO: Implement custom UREM / SREM routines.
- setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Custom);
setOperationAction(ISD::UDIVREM, VT, Custom);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
@@ -504,6 +504,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::SDIV: return LowerSDIV(Op, DAG);
case ISD::SREM: return LowerSREM(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
case ISD::FCEIL: return LowerFCEIL(Op, DAG);
case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
case ISD::FRINT: return LowerFRINT(Op, DAG);
@@ -1612,6 +1613,44 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
return DAG.getMergeValues(Ops, DL);
}
+SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue NegOne = DAG.getConstant(-1, VT);
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
+ SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
+ SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
+ SDValue RSign = LHSign; // Remainder sign is the same as LHS
+
+ LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
+ RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
+
+ LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
+ RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
+
+ SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
+ SDValue Rem = Div.getValue(1);
+
+ Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
+ Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
+
+ Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
+ Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
+
+ SDValue Res[2] = {
+ Div,
+ Rem
+ };
+ return DAG.getMergeValues(Res, DL);
+}
+
SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index e2000a04ba..0db89ecb57 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -50,6 +50,7 @@ private:
SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;