diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2014-06-28 05:16:40 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2014-06-28 05:16:40 +0000 |
commit | fe05f61e5d4325dd354f686bd1a2f4f17527dbfb (patch) | |
tree | f480285d95df56995a334c689fa51e251f923b7b /test | |
parent | be56f1605298fab52f9e1e63aee7b64c01479172 (diff) | |
download | llvm-fe05f61e5d4325dd354f686bd1a2f4f17527dbfb.tar.gz llvm-fe05f61e5d4325dd354f686bd1a2f4f17527dbfb.tar.bz2 llvm-fe05f61e5d4325dd354f686bd1a2f4f17527dbfb.tar.xz |
[x86] Add handling for splat-like widenings of v16i8 shuffles.
These show up really frequently, not the least with actual splats. =] We
lowered these quite badly before. The new code path tries to widen i8
shuffles to i16 shuffles in a splat-like way. There are still some
inefficiencies in our i16 splat logic though, so we aren't really done
here.
Also, for certain patterns (bit of a gather-and-splat) we still
generate pretty silly code, and I've left a fixme for addressing it.
However, I'm not actually worried about this code pattern as much. The
old shuffle lowering generates a 29 instruction monstrosity for it that
should execute much more slowly.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211974 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/vector-shuffle-128-v16.ll | 94 |
1 files changed, 89 insertions, 5 deletions
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index b5e0b79088..f3a0931f73 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -3,6 +3,93 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" +define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00 +; CHECK-SSE2: # BB#0: +; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; CHECK-SSE2-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> + ret <16 x i8> %shuffle +} + +define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01 +; CHECK-SSE2: # BB#0: +; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5] +; CHECK-SSE2-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <16 x i8> %shuffle +} + +define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08 +; CHECK-SSE2: # BB#0: +; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 +; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 +; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm2 +; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 +; CHECK-SSE2-NEXT: punpcklwd %xmm2, %xmm0 +; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0 +; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5] +; CHECK-SSE2-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> + ret <16 x i8> %shuffle +} + +define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03 +; CHECK-SSE2: # BB#0: +; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1] +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,1,1,4,5,6,7] +; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,7,7] +; CHECK-SSE2-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> + ret <16 x i8> %shuffle +} + +define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12 +; CHECK-SSE2: # BB#0: +; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 +; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 +; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm2 = xmm2[2,1,2,3] +; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm0 +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] +; CHECK-SSE2-NEXT: punpcklwd %xmm2, %xmm0 +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,2,3,4,5,6,7] +; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,7] +; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0 +; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,0,1] +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7] +; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,6] +; CHECK-SSE2-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> + ret <16 x i8> %shuffle +} + +define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07 +; CHECK-SSE2: # BB#0: +; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 +; CHECK-SSE2-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> + ret <16 x i8> %shuffle +} + define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SSE2-LABEL: @shuffle_v16i8_0101010101010101 ; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] @@ -23,12 +110,9 @@ define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23( define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SSE2-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07 -; CHECK-SSE2: pxor %xmm2, %xmm2 -; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm1 -; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,1,0,3] +; CHECK-SSE2: # BB#0: +; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm1 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7] -; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,4,4,4] -; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm1 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm1 ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq |