summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-06-27 11:40:13 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-06-27 11:40:13 +0000
commitc5114dbcc3cd5a768bed43e5ae88d87a88b4a1b1 (patch)
treea40ba0c593d53aebda6ab6503acaeb501cda6f52 /test
parentfc9897f66f40ec2b079cb2d697bd41db7db2f3ed (diff)
downloadllvm-c5114dbcc3cd5a768bed43e5ae88d87a88b4a1b1.tar.gz
llvm-c5114dbcc3cd5a768bed43e5ae88d87a88b4a1b1.tar.bz2
llvm-c5114dbcc3cd5a768bed43e5ae88d87a88b4a1b1.tar.xz
[x86] Teach the target combine step to aggressively fold pshufd insturcions.
Summary: This allows it to fold pshufd instructions across intervening half-shuffles and other noise. This pattern actually shows up in the generic lowering tests, but I've also added direct tests using intrinsics to make sure that the specific desired functionality is working even if the lowering stuff changes in the future. Differential Revision: http://reviews.llvm.org/D4292 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211892 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v8.ll3
-rw-r--r--test/CodeGen/X86/vector-shuffle-combining.ll60
2 files changed, 61 insertions, 2 deletions
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 091822b6cb..5d1922a348 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -157,9 +157,8 @@ define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SSE2: # BB#0:
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,3,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,3,2]
; CHECK-SSE2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
ret <8 x i16> %shuffle
diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll
index dae1ef5875..1bc2aee6ef 100644
--- a/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -3,9 +3,69 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
+declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8)
declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8)
+define <4 x i32> @combine_pshufd1(<4 x i32> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufd1
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: retq
+ %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
+ %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27)
+ ret <4 x i32> %c
+}
+
+define <4 x i32> @combine_pshufd2(<4 x i32> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufd2
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: retq
+ %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
+ %b.cast = bitcast <4 x i32> %b to <8 x i16>
+ %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 -28)
+ %c.cast = bitcast <8 x i16> %c to <4 x i32>
+ %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
+ ret <4 x i32> %d
+}
+
+define <4 x i32> @combine_pshufd3(<4 x i32> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufd3
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: retq
+ %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
+ %b.cast = bitcast <4 x i32> %b to <8 x i16>
+ %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 -28)
+ %c.cast = bitcast <8 x i16> %c to <4 x i32>
+ %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
+ ret <4 x i32> %d
+}
+
+define <4 x i32> @combine_pshufd4(<4 x i32> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufd4
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-SSE2-NEXT: retq
+ %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -31)
+ %b.cast = bitcast <4 x i32> %b to <8 x i16>
+ %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 27)
+ %c.cast = bitcast <8 x i16> %c to <4 x i32>
+ %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -31)
+ ret <4 x i32> %d
+}
+
+define <4 x i32> @combine_pshufd5(<4 x i32> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufd5
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT: retq
+ %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -76)
+ %b.cast = bitcast <4 x i32> %b to <8 x i16>
+ %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 27)
+ %c.cast = bitcast <8 x i16> %c to <4 x i32>
+ %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -76)
+ ret <4 x i32> %d
+}
+
define <8 x i16> @combine_pshuflw1(<8 x i16> %a) {
; CHECK-SSE2-LABEL: @combine_pshuflw1
; CHECK-SSE2: # BB#0: