summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-06-28 05:16:40 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-06-28 05:16:40 +0000
commitfe05f61e5d4325dd354f686bd1a2f4f17527dbfb (patch)
treef480285d95df56995a334c689fa51e251f923b7b /test
parentbe56f1605298fab52f9e1e63aee7b64c01479172 (diff)
downloadllvm-fe05f61e5d4325dd354f686bd1a2f4f17527dbfb.tar.gz
llvm-fe05f61e5d4325dd354f686bd1a2f4f17527dbfb.tar.bz2
llvm-fe05f61e5d4325dd354f686bd1a2f4f17527dbfb.tar.xz
[x86] Add handling for splat-like widenings of v16i8 shuffles.
These show up really frequently, not the least with actual splats. =] We lowered these quite badly before. The new code path tries to widen i8 shuffles to i16 shuffles in a splat-like way. There are still some inefficiencies in our i16 splat logic though, so we aren't really done here. Also, for certain patterns (bit of a gather-and-splat) we still generate pretty silly code, and I've left a fixme for addressing it. However, I'm not actually worried about this code pattern as much. The old shuffle lowering generates a 29 instruction monstrosity for it that should execute much more slowly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211974 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v16.ll94
1 files changed, 89 insertions, 5 deletions
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll
index b5e0b79088..f3a0931f73 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -3,6 +3,93 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
+define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm2
+; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0
+; CHECK-SSE2-NEXT: punpcklwd %xmm2, %xmm0
+; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,1,1,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,7,7]
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm2 = xmm2[2,1,2,3]
+; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT: punpcklwd %xmm2, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,7]
+; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,0,1]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,6]
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+ ret <16 x i8> %shuffle
+}
+
define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
; CHECK-SSE2-LABEL: @shuffle_v16i8_0101010101010101
; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
@@ -23,12 +110,9 @@ define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(
define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
; CHECK-SSE2-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07
-; CHECK-SSE2: pxor %xmm2, %xmm2
-; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm1
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,1,0,3]
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm1
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm1
; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm1
; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0
; CHECK-SSE2-NEXT: retq