[x86] Teach the target-specific combining how to aggressively fold

half-shuffles, even looking through intervening instructions in a chain. Summary: This doesn't happen to show up with any test cases I've found for the current shuffle lowering, but previous attempts would benefit from this and it seems generally useful. I've tested it directly using intrinsics, which also shows that it will work with hand vectorized code as well. Note that even though pshufd isn't directly used in these tests, it gets exercised because we combine some of the half shuffles into a pshufd first, and then merge them. Differential Revision: http://reviews.llvm.org/D4291 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211890 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chandler Carruth <chandlerc@gmail.com> 2014-06-27 11:34:40 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2014-06-27 11:34:40 +0000
commit: 4363b0729b2b2aa50949ac9fb3af5ced0240898f (patch)
tree: a79307489e3443b447e9a302f14e4d4bbbeb17f4 /test
parent: f91161874e516621aa9c1734391c5bd01d9d4e6d (diff)
download: llvm-4363b0729b2b2aa50949ac9fb3af5ced0240898f.tar.gz
llvm-4363b0729b2b2aa50949ac9fb3af5ced0240898f.tar.bz2
llvm-4363b0729b2b2aa50949ac9fb3af5ced0240898f.tar.xz
1 files changed, 49 insertions, 0 deletions
diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll
new file mode 100644
index 0000000000..dae1ef5875
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
+declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8)
+
+define <8 x i16> @combine_pshuflw1(<8 x i16> %a) {
+; CHECK-SSE2-LABEL: @combine_pshuflw1
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    retq
+  %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 
+  %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 
+  ret <8 x i16> %c
+}
+
+define <8 x i16> @combine_pshuflw2(<8 x i16> %a) {
+; CHECK-SSE2-LABEL: @combine_pshuflw2
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    retq
+  %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
+  %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28) 
+  %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 
+  ret <8 x i16> %d
+}
+
+define <8 x i16> @combine_pshuflw3(<8 x i16> %a) {
+; CHECK-SSE2-LABEL: @combine_pshuflw3
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-SSE2-NEXT:    retq
+  %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
+  %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 27) 
+  %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 
+  ret <8 x i16> %d
+}
+
+define <8 x i16> @combine_pshufhw1(<8 x i16> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufhw1
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    retq
+  %b = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27)
+  %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 
+  %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27) 
+  ret <8 x i16> %d
+}
+
author	Chandler Carruth <chandlerc@gmail.com>	2014-06-27 11:34:40 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2014-06-27 11:34:40 +0000
commit	4363b0729b2b2aa50949ac9fb3af5ced0240898f (patch)
tree	a79307489e3443b447e9a302f14e4d4bbbeb17f4 /test
parent	f91161874e516621aa9c1734391c5bd01d9d4e6d (diff)
download	llvm-4363b0729b2b2aa50949ac9fb3af5ced0240898f.tar.gz llvm-4363b0729b2b2aa50949ac9fb3af5ced0240898f.tar.bz2 llvm-4363b0729b2b2aa50949ac9fb3af5ced0240898f.tar.xz