summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2008-04-05 00:30:36 +0000
committerEvan Cheng <evan.cheng@apple.com>2008-04-05 00:30:36 +0000
commit0c0f83ff5d214a7f42e86ae62814526ba40a28cf (patch)
treeffb95313874d040042271f886170c24b80ebd693 /test/CodeGen/X86
parenta4091d34f3c05717eb5bb66a1257a0c1005e6dfa (diff)
downloadllvm-0c0f83ff5d214a7f42e86ae62814526ba40a28cf.tar.gz
llvm-0c0f83ff5d214a7f42e86ae62814526ba40a28cf.tar.bz2
llvm-0c0f83ff5d214a7f42e86ae62814526ba40a28cf.tar.xz
Favors pshufd over shufps when shuffling elements from one vector. pshufd is faster than shufps.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49244 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r--test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll2
-rw-r--r--test/CodeGen/X86/peep-vector-extract-concat.ll4
-rw-r--r--test/CodeGen/X86/vec_set-3.ll7
-rw-r--r--test/CodeGen/X86/vec_set-6.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle-13.ll6
-rw-r--r--test/CodeGen/X86/vec_shuffle-16.ll25
-rw-r--r--test/CodeGen/X86/vec_shuffle-5.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle.ll6
-rw-r--r--test/CodeGen/X86/vec_splat-2.ll2
-rw-r--r--test/CodeGen/X86/vec_splat.ll6
10 files changed, 43 insertions, 19 deletions
diff --git a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
index 18ef151ca7..142bcd3347 100644
--- a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
+++ b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
@@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep 170
-define i16 @f(<4 x float>* %tmp116117.i1061.i) {
+define i16 @f(<4 x float>* %tmp116117.i1061.i) nounwind {
entry:
alloca [4 x <4 x float>] ; <[4 x <4 x float>]*>:0 [#uses=167]
alloca [4 x <4 x float>] ; <[4 x <4 x float>]*>:1 [#uses=170]
diff --git a/test/CodeGen/X86/peep-vector-extract-concat.ll b/test/CodeGen/X86/peep-vector-extract-concat.ll
index 737da66d1f..e6c88bbff9 100644
--- a/test/CodeGen/X86/peep-vector-extract-concat.ll
+++ b/test/CodeGen/X86/peep-vector-extract-concat.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep {shufps \$3, %xmm0, %xmm0}
+; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep {pshufd \$3, %xmm0, %xmm0}
-define float @foo(<8 x float> %a) {
+define float @foo(<8 x float> %a) nounwind {
%c = extractelement <8 x float> %a, i32 3
ret float %c
}
diff --git a/test/CodeGen/X86/vec_set-3.ll b/test/CodeGen/X86/vec_set-3.ll
index 31716bcd8c..546ca0bcf3 100644
--- a/test/CodeGen/X86/vec_set-3.ll
+++ b/test/CodeGen/X86/vec_set-3.ll
@@ -1,15 +1,14 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
-; RUN: grep shufps %t | count 1
-; RUN: grep pshufd %t | count 1
+; RUN: grep pshufd %t | count 2
-define <4 x float> @test(float %a) {
+define <4 x float> @test(float %a) nounwind {
%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1 ; <<4 x float>> [#uses=1]
%tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
%tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
ret <4 x float> %tmp6
}
-define <2 x i64> @test2(i32 %a) {
+define <2 x i64> @test2(i32 %a) nounwind {
%tmp7 = insertelement <4 x i32> zeroinitializer, i32 %a, i32 2 ; <<4 x i32>> [#uses=1]
%tmp9 = insertelement <4 x i32> %tmp7, i32 0, i32 3 ; <<4 x i32>> [#uses=1]
%tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64> ; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_set-6.ll b/test/CodeGen/X86/vec_set-6.ll
index a28c54491f..1eeedf184d 100644
--- a/test/CodeGen/X86/vec_set-6.ll
+++ b/test/CodeGen/X86/vec_set-6.ll
@@ -2,7 +2,7 @@
; RUN: grep unpcklps %t | count 1
; RUN: grep shufps %t | count 1
-define <4 x float> @test(float %a, float %b, float %c) {
+define <4 x float> @test(float %a, float %b, float %c) nounwind {
%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1 ; <<4 x float>> [#uses=1]
%tmp8 = insertelement <4 x float> %tmp, float %b, i32 2 ; <<4 x float>> [#uses=1]
%tmp10 = insertelement <4 x float> %tmp8, float %c, i32 3 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-13.ll b/test/CodeGen/X86/vec_shuffle-13.ll
index 6953bf0e4b..f889e8f279 100644
--- a/test/CodeGen/X86/vec_shuffle-13.ll
+++ b/test/CodeGen/X86/vec_shuffle-13.ll
@@ -5,17 +5,17 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuflw | count 1
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufhw | count 1
-define <8 x i16> @t1(<8 x i16> %A, <8 x i16> %B) {
+define <8 x i16> @t1(<8 x i16> %A, <8 x i16> %B) nounwind {
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
ret <8 x i16> %tmp
}
-define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) {
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
ret <8 x i16> %tmp
}
-define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) {
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
ret <8 x i16> %tmp
}
diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
new file mode 100644
index 0000000000..2e12f6e9bd
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-16.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse | grep shufps | count 4
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse | grep mov | count 2
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 4
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep shufps
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep mov
+
+define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind {
+ %tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %tmp1
+}
+
+define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
+ %tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >
+ ret <4 x float> %tmp
+}
+
+define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
+ %tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 4, i32 4, i32 4, i32 4 >
+ ret <4 x float> %tmp
+}
+
+define <4 x float> @t4(<4 x float> %A, <4 x float> %B) nounwind {
+ %tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 1, i32 3, i32 2, i32 0 >
+ ret <4 x float> %tmp
+}
diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll
index 9343684946..1acd73fcba 100644
--- a/test/CodeGen/X86/vec_shuffle-5.ll
+++ b/test/CodeGen/X86/vec_shuffle-5.ll
@@ -2,7 +2,7 @@
; RUN: grep movhlps %t | count 1
; RUN: grep shufps %t | count 1
-define void @test() {
+define void @test() nounwind {
%tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2]
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll
index 34c039ac20..f39b9fe2db 100644
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ b/test/CodeGen/X86/vec_shuffle.ll
@@ -3,7 +3,7 @@
; RUN: grep movupd %t | count 1
; RUN: grep pshufhw %t | count 1
-define void @test_v4sf(<4 x float>* %P, float %X, float %Y) {
+define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1]
%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1 ; <<4 x float>> [#uses=1]
%tmp4 = insertelement <4 x float> %tmp2, float %Y, i32 2 ; <<4 x float>> [#uses=1]
@@ -12,14 +12,14 @@ define void @test_v4sf(<4 x float>* %P, float %X, float %Y) {
ret void
}
-define void @test_v2sd(<2 x double>* %P, double %X, double %Y) {
+define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0 ; <<2 x double>> [#uses=1]
%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1 ; <<2 x double>> [#uses=1]
store <2 x double> %tmp2, <2 x double>* %P
ret void
}
-define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) {
+define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
%tmp = load <2 x i64>* %A ; <<2 x i64>> [#uses=1]
%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16> ; <<8 x i16>> [#uses=8]
%tmp.upgrd.2 = extractelement <8 x i16> %tmp.upgrd.1, i32 0 ; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
index 26e1b8839a..c6e3dddd5f 100644
--- a/test/CodeGen/X86/vec_splat-2.ll
+++ b/test/CodeGen/X86/vec_splat-2.ll
@@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 1
-define void @test(<2 x i64>* %P, i8 %x) {
+define void @test(<2 x i64>* %P, i8 %x) nounwind {
%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0 ; <<16 x i8>> [#uses=1]
%tmp36 = insertelement <16 x i8> %tmp, i8 %x, i32 1 ; <<16 x i8>> [#uses=1]
%tmp38 = insertelement <16 x i8> %tmp36, i8 %x, i32 2 ; <<16 x i8>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index c6100ecd09..64222e40ff 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep shufps
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse3 | grep movddup
-define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) {
+define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1]
%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1 ; <<4 x float>> [#uses=1]
%tmp4 = insertelement <4 x float> %tmp2, float %X, i32 2 ; <<4 x float>> [#uses=1]
@@ -12,7 +12,7 @@ define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) {
ret void
}
-define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) {
+define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0 ; <<2 x double>> [#uses=1]
%tmp2 = insertelement <2 x double> %tmp, double %X, i32 1 ; <<2 x double>> [#uses=1]
%tmp4 = load <2 x double>* %Q ; <<2 x double>> [#uses=1]