summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp102
-rw-r--r--test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll30
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v8.ll19
3 files changed, 108 insertions, 43 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 3ceeac3b34..2e23cd3674 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -19034,6 +19034,95 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// \brief Get the PSHUF-style mask from PSHUF node.
+///
+/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
+/// PSHUF-style masks that can be reused with such instructions.
+static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
+ SmallVector<int, 4> Mask;
+ bool IsUnary;
+ bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary);
+ (void)HaveMask;
+ assert(HaveMask);
+
+ switch (N.getOpcode()) {
+ case X86ISD::PSHUFD:
+ return Mask;
+ case X86ISD::PSHUFLW:
+ Mask.resize(4);
+ return Mask;
+ case X86ISD::PSHUFHW:
+ Mask.erase(Mask.begin(), Mask.begin() + 4);
+ for (int &M : Mask)
+ M -= 4;
+ return Mask;
+ default:
+ llvm_unreachable("No valid shuffle instruction found!");
+ }
+}
+
+/// \brief Try to combine x86 target specific shuffles.
+static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ SDLoc DL(N);
+ MVT VT = N.getSimpleValueType();
+ SmallVector<int, 4> Mask;
+
+ switch (N.getOpcode()) {
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ Mask = getPSHUFShuffleMask(N);
+ assert(Mask.size() == 4);
+ break;
+ default:
+ return SDValue();
+ }
+
+ SDValue V = N.getOperand(0);
+ switch (N.getOpcode()) {
+ default:
+ break;
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ assert(VT == MVT::v8i16);
+
+ // See if this reduces to a PSHUFD which is no more expensive and can
+ // combine with more operations.
+ if (Mask[0] % 2 == 0 && Mask[2] % 2 == 0 &&
+ areAdjacentMasksSequential(Mask)) {
+ int DMask[] = {-1, -1, -1, -1};
+ int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
+ DMask[DOffset + 0] = DOffset + Mask[0] / 2;
+ DMask[DOffset + 1] = DOffset + Mask[2] / 2;
+ V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
+ DCI.AddToWorklist(V.getNode());
+ V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
+ getV4X86ShuffleImm8ForMask(DMask, DAG));
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+ }
+
+ // Fallthrough
+ case X86ISD::PSHUFD:
+ if (V.getOpcode() == N.getOpcode()) {
+ // If we have two sequential shuffles of the same kind we can always fold
+ // them. Even if there are multiple uses, this is beneficial because it
+ // breaks a dependency.
+ SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
+ for (int &M : Mask)
+ M = VMask[M];
+ return DAG.getNode(N.getOpcode(), DL, VT, V.getOperand(0),
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+ }
+
+ break;
+ }
+
+ return SDValue();
+}
+
/// PerformShuffleCombine - Performs several different shuffle combines.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -19158,7 +19247,18 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
- return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
+ SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
+ if (LD.getNode())
+ return LD;
+
+ if (isTargetShuffle(N->getOpcode())) {
+ SDValue Shuffle =
+ PerformTargetShuffleCombine(SDValue(N, 0), DAG, DCI, Subtarget);
+ if (Shuffle.getNode())
+ return Shuffle;
+ }
+
+ return SDValue();
}
/// PerformTruncateCombine - Converts truncate operation to
diff --git a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
deleted file mode 100644
index 0ae1897e60..0000000000
--- a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep -- -86
-
-define i16 @f(<4 x float>* %tmp116117.i1061.i) nounwind {
-entry:
- alloca [4 x <4 x float>] ; <[4 x <4 x float>]*>:0 [#uses=167]
- alloca [4 x <4 x float>] ; <[4 x <4 x float>]*>:1 [#uses=170]
- alloca [4 x <4 x i32>] ; <[4 x <4 x i32>]*>:2 [#uses=12]
- %.sub6235.i = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0 ; <<4 x float>*> [#uses=76]
- %.sub.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0 ; <<4 x float>*> [#uses=59]
-
- %tmp124.i1062.i = getelementptr <4 x float>* %tmp116117.i1061.i, i32 63 ; <<4 x float>*> [#uses=1]
- %tmp125.i1063.i = load <4 x float>* %tmp124.i1062.i ; <<4 x float>> [#uses=5]
- %tmp828.i1077.i = shufflevector <4 x float> %tmp125.i1063.i, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x float>> [#uses=4]
- %tmp704.i1085.i = load <4 x float>* %.sub6235.i ; <<4 x float>> [#uses=1]
- %tmp712.i1086.i = call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %tmp704.i1085.i, <4 x float> %tmp828.i1077.i ) ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp712.i1086.i, <4 x float>* %.sub.i
-
- %tmp2587.i1145.gep.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0, i32 2 ; <float*> [#uses=1]
- %tmp5334.i = load float* %tmp2587.i1145.gep.i ; <float> [#uses=5]
- %tmp2723.i1170.i = insertelement <4 x float> undef, float %tmp5334.i, i32 2 ; <<4 x float>> [#uses=5]
- store <4 x float> %tmp2723.i1170.i, <4 x float>* %.sub6235.i
-
- %tmp1406.i1367.i = shufflevector <4 x float> %tmp2723.i1170.i, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x float>> [#uses=1]
- %tmp84.i1413.i = load <4 x float>* %.sub6235.i ; <<4 x float>> [#uses=1]
- %tmp89.i1415.i = fmul <4 x float> %tmp84.i1413.i, %tmp1406.i1367.i ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp89.i1415.i, <4 x float>* %.sub.i
- ret i16 0
-}
-
-declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 7447fdc874..091822b6cb 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -51,7 +51,7 @@ define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SSE2-LABEL: @shuffle_v8i16_31206745
; CHECK-SSE2: # BB#0:
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,4,5]
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,3,2]
; CHECK-SSE2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
ret <8 x i16> %shuffle
@@ -159,7 +159,7 @@ define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,3,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,4,5]
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,3,2]
; CHECK-SSE2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
ret <8 x i16> %shuffle
@@ -273,8 +273,7 @@ define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SSE2: # BB#0:
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,3,0,1,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,0,2,3]
; CHECK-SSE2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
@@ -285,8 +284,7 @@ define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SSE2: # BB#0:
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,4,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,4,5]
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
; CHECK-SSE2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
ret <8 x i16> %shuffle
@@ -297,8 +295,7 @@ define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SSE2: # BB#0:
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,1,2,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,3,0,1,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,0,1,3]
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,5,4]
; CHECK-SSE2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
@@ -359,9 +356,8 @@ define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SSE2-LABEL: @shuffle_v8i16_0c1d6879
; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,0,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,3]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[2,3,0,1,4,5,6,7]
; CHECK-SSE2-NEXT: punpcklwd %xmm1, %xmm0
; CHECK-SSE2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
@@ -458,8 +454,7 @@ define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SSE2-NEXT: punpckhwd %xmm0, %xmm1
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,0]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,4,5]
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,2]
; CHECK-SSE2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
ret <8 x i16> %shuffle