Instead of a vector load, shuffle and then extract an element. Load the element from address with an offset.

pshufd $1, (%rdi), %xmm0 movd %xmm0, %eax => movl 4(%rdi), %eax git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51026 91177308-0d34-0410-b5e6-96231b3b80d8
author: Evan Cheng <evan.cheng@apple.com> 2008-05-13 08:35:03 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2008-05-13 08:35:03 +0000
commit: 77f0b7a50a08614b5ffd58f1864b68a9a30d0cb0 (patch)
tree: bb8e0ed03e63e856988c905af714ffb7bd62205e /test
parent: 6513c1bf90be63b9bd6a43c0db593174075009a0 (diff)
download: llvm-77f0b7a50a08614b5ffd58f1864b68a9a30d0cb0.tar.gz
llvm-77f0b7a50a08614b5ffd58f1864b68a9a30d0cb0.tar.bz2
llvm-77f0b7a50a08614b5ffd58f1864b68a9a30d0cb0.tar.xz
4 files changed, 21 insertions, 13 deletions
diff --git a/test/CodeGen/X86/extractelement-from-arg.ll b/test/CodeGen/X86/extractelement-from-arg.ll
index d28f016dba..44704b6adb 100644
--- a/test/CodeGen/X86/extractelement-from-arg.ll
+++ b/test/CodeGen/X86/extractelement-from-arg.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as %s -o - | llc -march=x86-64
+; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2
 
-define void @test(float* %R, <4 x float> %X) {
+define void @test(float* %R, <4 x float> %X) nounwind {
 	%tmp = extractelement <4 x float> %X, i32 3
 	store float %tmp, float* %R
 	ret void
diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll
new file mode 100644
index 0000000000..4850eba609
--- /dev/null
+++ b/test/CodeGen/X86/extractelement-load.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as %s -o - | llc -march=x86 -mattr=+sse2 -mcpu=yonah | not grep movd
+; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2 -mcpu=yonah | not grep movd
+
+define i32 @t(<2 x i64>* %val) nounwind  {
+	%tmp2 = load <2 x i64>* %val, align 16		; <<2 x i64>> [#uses=1]
+	%tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp4 = extractelement <4 x i32> %tmp3, i32 2		; <i32> [#uses=1]
+	ret i32 %tmp4
+}
diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
index 7ff6b1e3ed..a5016601db 100644
--- a/test/CodeGen/X86/sse-align-12.ll
+++ b/test/CodeGen/X86/sse-align-12.ll
@@ -28,8 +28,7 @@ define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
   %s = insertelement <4 x float> %r, float %b, i32 3
   ret <4 x float> %s
 }
-define <2 x double> @c(<2 x double>* %y)
-{
+define <2 x double> @c(<2 x double>* %y) nounwind {
   %x = load <2 x double>* %y, align 8
   %a = extractelement <2 x double> %x, i32 0
   %c = extractelement <2 x double> %x, i32 1
@@ -37,8 +36,7 @@ define <2 x double> @c(<2 x double>* %y)
   %r = insertelement <2 x double> %p, double %a, i32 1
   ret <2 x double> %r
 }
-define <2 x double> @d(<2 x double>* %y, <2 x double> %z)
-{
+define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
   %x = load <2 x double>* %y, align 8
   %a = extractelement <2 x double> %x, i32 1
   %c = extractelement <2 x double> %z, i32 1
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index 1ef5e8803e..d6726be1db 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -1,29 +1,30 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
-; RUN: grep extractps %t | count 1
-; RUN: grep pextrd    %t | count 2
-; RUN: grep pshufd    %t | count 1
+; RUN: grep extractps   %t | count 1
+; RUN: grep pextrd      %t | count 1
+; RUN: not grep pshufd  %t
+; RUN: not grep movss   %t
 
-define void @t1(float* %R, <4 x float>* %P1) {
+define void @t1(float* %R, <4 x float>* %P1) nounwind {
 	%X = load <4 x float>* %P1
 	%tmp = extractelement <4 x float> %X, i32 3
 	store float %tmp, float* %R
 	ret void
 }
 
-define float @t2(<4 x float>* %P1) {
+define float @t2(<4 x float>* %P1) nounwind {
 	%X = load <4 x float>* %P1
 	%tmp = extractelement <4 x float> %X, i32 2
 	ret float %tmp
 }
 
-define void @t3(i32* %R, <4 x i32>* %P1) {
+define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
 	%X = load <4 x i32>* %P1
 	%tmp = extractelement <4 x i32> %X, i32 3
 	store i32 %tmp, i32* %R
 	ret void
 }
 
-define i32 @t4(<4 x i32>* %P1) {
+define i32 @t4(<4 x i32>* %P1) nounwind {
 	%X = load <4 x i32>* %P1
 	%tmp = extractelement <4 x i32> %X, i32 3
 	ret i32 %tmp
author	Evan Cheng <evan.cheng@apple.com>	2008-05-13 08:35:03 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2008-05-13 08:35:03 +0000
commit	77f0b7a50a08614b5ffd58f1864b68a9a30d0cb0 (patch)
tree	bb8e0ed03e63e856988c905af714ffb7bd62205e /test
parent	6513c1bf90be63b9bd6a43c0db593174075009a0 (diff)
download	llvm-77f0b7a50a08614b5ffd58f1864b68a9a30d0cb0.tar.gz llvm-77f0b7a50a08614b5ffd58f1864b68a9a30d0cb0.tar.bz2 llvm-77f0b7a50a08614b5ffd58f1864b68a9a30d0cb0.tar.xz