summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/sse41.ll
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-08-28 17:59:08 +0000
committerChris Lattner <sabre@nondot.org>2010-08-28 17:59:08 +0000
commit24faf611a33900b225c636f908eb30234215af21 (patch)
treefb412877725cb1019812ce5b68beab763bc9347b /test/CodeGen/X86/sse41.ll
parent3ddcc430401f0d16bee17b2afb52dcaa2f480f8b (diff)
downloadllvm-24faf611a33900b225c636f908eb30234215af21.tar.gz
llvm-24faf611a33900b225c636f908eb30234215af21.tar.bz2
llvm-24faf611a33900b225c636f908eb30234215af21.tar.xz
fix the buildvector->insertp[sd] logic to not always create a redundant
insertp[sd] $0, which is a noop. Before: _f32: ## @f32 pshufd $1, %xmm1, %xmm2 pshufd $1, %xmm0, %xmm3 addss %xmm2, %xmm3 addss %xmm1, %xmm0 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm3, %xmm0 ret after: _f32: ## @f32 movdqa %xmm0, %xmm2 addss %xmm1, %xmm2 pshufd $1, %xmm1, %xmm1 pshufd $1, %xmm0, %xmm3 addss %xmm1, %xmm3 movdqa %xmm2, %xmm0 insertps $16, %xmm3, %xmm0 ret The extra movs are due to a random (poor) scheduling decision. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112379 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/sse41.ll')
-rw-r--r--test/CodeGen/X86/sse41.ll25
1 files changed, 25 insertions, 0 deletions
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index ef66d1a44a..3a14fa2630 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -224,3 +224,28 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+; This used to compile to insertps $0 + insertps $16. insertps $0 is always
+; pointless.
+define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
+entry:
+ %tmp7 = extractelement <2 x float> %A, i32 0
+ %tmp5 = extractelement <2 x float> %A, i32 1
+ %tmp3 = extractelement <2 x float> %B, i32 0
+ %tmp1 = extractelement <2 x float> %B, i32 1
+ %add.r = fadd float %tmp7, %tmp3
+ %add.i = fadd float %tmp5, %tmp1
+ %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+ %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+ ret <2 x float> %tmp9
+; X32: buildvector:
+; X32-NOT: insertps $0
+; X32: insertps $16
+; X32-NOT: insertps $0
+; X32: ret
+; X64: buildvector:
+; X64-NOT: insertps $0
+; X64: insertps $16
+; X64-NOT: insertps $0
+; X64: ret
+}
+