diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 26 | ||||
-rw-r--r-- | test/CodeGen/X86/x86-shifts.ll | 18 |
2 files changed, 44 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1af24497ba..2ddb1b7163 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9929,6 +9929,19 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) { uint64_t ShiftAmt = C->getZExtValue(); + if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SHL) { + // Make a large shift. + SDValue SHL = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + // Zero out the rightmost bits. + SmallVector<SDValue, 16> V(16, DAG.getConstant(uint8_t(-1U << ShiftAmt), + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SHL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); + } + if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL) return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), @@ -9944,6 +9957,19 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), R, DAG.getConstant(ShiftAmt, MVT::i32)); + if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRL) { + // Make a large shift. + SDValue SRL = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + // Zero out the leftmost bits. + SmallVector<SDValue, 16> V(16, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SRL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); + } + if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL) return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll index 5a91b09047..3e44eafa24 100644 --- a/test/CodeGen/X86/x86-shifts.ll +++ b/test/CodeGen/X86/x86-shifts.ll @@ -152,3 +152,21 @@ entry: %K = xor <2 x i32> %B, %C ret <2 x i32> %K } + +define <16 x i8> @shl9(<16 x i8> %A) nounwind { + %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + ret <16 x i8> %B +; CHECK: shl9: +; CHECK: psllw $3 +; CHECK: pand +; CHECK: ret +} + +define <16 x i8> @shr9(<16 x i8> %A) nounwind { + %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + ret <16 x i8> %B +; CHECK: shr9: +; CHECK: psrlw $3 +; CHECK: pand +; CHECK: ret +} |