summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2011-11-21 01:12:36 +0000
committerCraig Topper <craig.topper@gmail.com>2011-11-21 01:12:36 +0000
commita124f949527e36efc05d2dbc999ddce43791b4ec (patch)
tree13ac28efc3fb1a6389eea22216c45fed7805a2b6
parent7912ef97ffde3ab3334143ddfb4cafdf04e2ebfc (diff)
downloadllvm-a124f949527e36efc05d2dbc999ddce43791b4ec.tar.gz
llvm-a124f949527e36efc05d2dbc999ddce43791b4ec.tar.bz2
llvm-a124f949527e36efc05d2dbc999ddce43791b4ec.tar.xz
Make LowerSIGN_EXTEND_INREG split 256-bit vectors when AVX1 is enabled and use AVX2 shifts when AVX2 is enabled.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145022 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp50
-rw-r--r--test/CodeGen/X86/avx-shift.ll24
-rw-r--r--test/CodeGen/X86/avx2-shift.ll20
3 files changed, 86 insertions, 8 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 470a115747..4ba4b93571 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -10571,9 +10571,9 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
DebugLoc dl = Op.getDebugLoc();
- SDNode* Node = Op.getNode();
- EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
- EVT VT = Node->getValueType(0);
+ EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT VT = Op.getValueType();
+
if (Subtarget->hasXMMInt() && VT.isVector()) {
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
ExtraVT.getScalarType().getSizeInBits();
@@ -10584,21 +10584,55 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG)
switch (VT.getSimpleVT().SimpleTy) {
default:
return SDValue();
- case MVT::v4i32: {
+ case MVT::v4i32:
SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
break;
- }
- case MVT::v8i16: {
+ case MVT::v8i16:
SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
break;
- }
+ case MVT::v8i32:
+ case MVT::v16i16:
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ if (!Subtarget->hasAVX2()) {
+ // needs to be split
+ int NumElems = VT.getVectorNumElements();
+ SDValue Idx0 = DAG.getConstant(0, MVT::i32);
+ SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ int ExtraNumElems = ExtraVT.getVectorNumElements();
+ ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
+ ExtraNumElems/2);
+ SDValue Extra = DAG.getValueType(ExtraVT);
+
+ LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
+ LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
+ }
+ if (VT == MVT::v8i32) {
+ SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_d;
+ SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_d;
+ } else {
+ SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_w;
+ SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_w;
+ }
}
SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(SHLIntrinsicsID, MVT::i32),
- Node->getOperand(0), ShAmt);
+ Op.getOperand(0), ShAmt);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(SRAIntrinsicsID, MVT::i32),
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index a33423d7c5..681747b844 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -112,3 +112,27 @@ define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
ret <8 x i32> %bitop
}
+;;; Uses shifts for sign extension
+; CHECK: _sext_v16i16
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK: vinsertf128
+define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+ %b = trunc <16 x i16> %a to <16 x i8>
+ %c = sext <16 x i8> %b to <16 x i16>
+ ret <16 x i16> %c
+}
+
+; CHECK: _sext_v8i32
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vinsertf128
+define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+ %b = trunc <8 x i32> %a to <8 x i16>
+ %c = sext <8 x i16> %b to <8 x i32>
+ ret <8 x i32> %c
+}
diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll
index b9d1edcb13..b6cf54ebe8 100644
--- a/test/CodeGen/X86/avx2-shift.ll
+++ b/test/CodeGen/X86/avx2-shift.ll
@@ -246,3 +246,23 @@ define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
; CHECK: vpsubb
; CHECK: ret
}
+
+; CHECK: _sext_v16i16
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK-NOT: vinsertf128
+define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+ %b = trunc <16 x i16> %a to <16 x i8>
+ %c = sext <16 x i8> %b to <16 x i16>
+ ret <16 x i16> %c
+}
+
+; CHECK: _sext_v8i32
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK-NOT: vinsertf128
+define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+ %b = trunc <8 x i32> %a to <8 x i16>
+ %c = sext <8 x i16> %b to <8 x i32>
+ ret <8 x i32> %c
+}