summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp56
1 files changed, 56 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b9dba7bdf3..87c4805168 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1221,7 +1221,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
if (Subtarget->hasBMI())
@@ -14985,9 +14987,43 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+ const X86TargetLowering *XTLI) {
+ SDValue Op0 = N->getOperand(0);
+ EVT InVT = Op0->getValueType(0);
+ if (!InVT.isSimple())
+ return SDValue();
+
+ // UINT_TO_FP(v4i8) -> SINT_TO_FP(ZEXT(v4i8 to v4i32))
+ MVT SrcVT = InVT.getSimpleVT();
+ if (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i8) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT DstVT = (SrcVT.getVectorNumElements() == 4 ? MVT::v4i32 : MVT::v8i32);
+ SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
+ // Notice that we use SINT_TO_FP because we know that the high bits
+ // are zero and SINT_TO_FP is better supported by the hardware.
+ return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
+ }
+
+ return SDValue();
+}
+
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
const X86TargetLowering *XTLI) {
SDValue Op0 = N->getOperand(0);
+ EVT InVT = Op0->getValueType(0);
+ if (!InVT.isSimple())
+ return SDValue();
+
+ // SINT_TO_FP(v4i8) -> SINT_TO_FP(SEXT(v4i8 to v4i32))
+ MVT SrcVT = InVT.getSimpleVT();
+ if (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i8) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT DstVT = (SrcVT.getVectorNumElements() == 4 ? MVT::v4i32 : MVT::v8i32);
+ SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
+ return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
+ }
+
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
if (Op0.getOpcode() == ISD::LOAD) {
@@ -15006,6 +15042,24 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformFP_TO_SINTCombine(SDNode *N, SelectionDAG &DAG,
+ const X86TargetLowering *XTLI) {
+ EVT InVT = N->getValueType(0);
+ if (!InVT.isSimple())
+ return SDValue();
+
+ // v4i8 = FP_TO_SINT() -> v4i8 = TRUNCATE (V4i32 = FP_TO_SINT()
+ MVT VT = InVT.getSimpleVT();
+ if (VT == MVT::v8i8 || VT == MVT::v4i8) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT DstVT = (VT.getVectorNumElements() == 4 ? MVT::v4i32 : MVT::v8i32);
+ SDValue I = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, N->getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, I);
+ }
+
+ return SDValue();
+}
+
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
@@ -15142,7 +15196,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
+ case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, this);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
+ case ISD::FP_TO_SINT: return PerformFP_TO_SINTCombine(N, DAG, this);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR: