diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 19 | ||||
-rw-r--r-- | test/CodeGen/X86/vector-idiv.ll | 63 |
2 files changed, 57 insertions, 25 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a902cae513..ac5f60c69f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -941,6 +941,7 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom); + setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom); setOperationAction(ISD::MULHU, MVT::v8i16, Legal); setOperationAction(ISD::MULHS, MVT::v8i16, Legal); setOperationAction(ISD::SUB, MVT::v16i8, Legal); @@ -1062,7 +1063,6 @@ void X86TargetLowering::resetOperationActions() { // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); - setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom); setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); setOperationAction(ISD::VSELECT, MVT::v2i64, Legal); @@ -13166,8 +13166,9 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, // Emit two multiplies, one for the lower 2 ints and one for the higher 2 // ints. MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64; + bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI; unsigned Opcode = - Op->getOpcode() == ISD::UMUL_LOHI ? X86ISD::PMULUDQ : X86ISD::PMULDQ; + (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ; SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1)); SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT, @@ -13179,6 +13180,20 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, const int LowMask[] = {0, 4, 2, 6, 8, 12, 10, 14}; SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + // If we have a signed multiply but no PMULDQ fix up the high parts of a + // unsigned multiply. + if (IsSigned && !Subtarget->hasSSE41()) { + SDValue ShAmt = + DAG.getConstant(31, DAG.getTargetLoweringInfo().getShiftAmountTy(VT)); + SDValue T1 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1); + SDValue T2 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0); + + SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2); + Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup); + } + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Highs, Lows); } diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll index 981c317157..5738c94e37 100644 --- a/test/CodeGen/X86/vector-idiv.ll +++ b/test/CodeGen/X86/vector-idiv.ll @@ -1,19 +1,20 @@ -; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE +; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s -check-prefix=SSE41 +; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE ; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX define <4 x i32> @test1(<4 x i32> %a) { %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7> ret <4 x i32> %div -; SSE-LABEL: test1: -; SSE: pmuludq -; SSE: pshufd $57 -; SSE: pmuludq -; SSE: shufps $-35 -; SSE: psubd -; SSE: psrld $1 -; SSE: padd -; SSE: psrld $2 +; SSE41-LABEL: test1: +; SSE41: pmuludq +; SSE41: pshufd $57 +; SSE41: pmuludq +; SSE41: shufps $-35 +; SSE41: psubd +; SSE41: psrld $1 +; SSE41: padd +; SSE41: psrld $2 ; AVX-LABEL: test1: ; AVX: vpmuludq @@ -46,12 +47,12 @@ define <8 x i16> @test3(<8 x i16> %a) { %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> ret <8 x i16> %div -; SSE-LABEL: test3: -; SSE: pmulhuw -; SSE: psubw -; SSE: psrlw $1 -; SSE: paddw -; SSE: psrlw $2 +; SSE41-LABEL: test3: +; SSE41: pmulhuw +; SSE41: psubw +; SSE41: psrlw $1 +; SSE41: paddw +; SSE41: psrlw $2 ; AVX-LABEL: test3: ; AVX: vpmulhuw @@ -78,11 +79,11 @@ define <8 x i16> @test5(<8 x i16> %a) { %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> ret <8 x i16> %div -; SSE-LABEL: test5: -; SSE: pmulhw -; SSE: psrlw $15 -; SSE: psraw $1 -; SSE: paddw +; SSE41-LABEL: test5: +; SSE41: pmulhw +; SSE41: psrlw $15 +; SSE41: psraw $1 +; SSE41: paddw ; AVX-LABEL: test5: ; AVX: vpmulhw @@ -112,13 +113,29 @@ define <4 x i32> @test8(<4 x i32> %a) { %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7> ret <4 x i32> %div +; SSE41-LABEL: test8: +; SSE41: pmuldq +; SSE41: pshufd $57 +; SSE41-NOT: pshufd $57 +; SSE41: pmuldq +; SSE41: shufps $-35 +; SSE41: pshufd $-40 +; SSE41: padd +; SSE41: psrld $31 +; SSE41: psrad $2 +; SSE41: padd + ; SSE-LABEL: test8: -; SSE: pmuldq +; SSE: psrad $31 +; SSE: pand +; SSE: paddd +; SSE: pmuludq ; SSE: pshufd $57 ; SSE-NOT: pshufd $57 -; SSE: pmuldq +; SSE: pmuludq ; SSE: shufps $-35 ; SSE: pshufd $-40 +; SSE: psubd ; SSE: padd ; SSE: psrld $31 ; SSE: psrad $2 |