diff options
author | Chris Lattner <sabre@nondot.org> | 2010-08-28 17:59:08 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2010-08-28 17:59:08 +0000 |
commit | 24faf611a33900b225c636f908eb30234215af21 (patch) | |
tree | fb412877725cb1019812ce5b68beab763bc9347b /test/CodeGen/X86/sse41.ll | |
parent | 3ddcc430401f0d16bee17b2afb52dcaa2f480f8b (diff) | |
download | llvm-24faf611a33900b225c636f908eb30234215af21.tar.gz llvm-24faf611a33900b225c636f908eb30234215af21.tar.bz2 llvm-24faf611a33900b225c636f908eb30234215af21.tar.xz |
fix the buildvector->insertp[sd] logic to not always create a redundant
insertp[sd] $0, which is a noop. Before:
_f32: ## @f32
pshufd $1, %xmm1, %xmm2
pshufd $1, %xmm0, %xmm3
addss %xmm2, %xmm3
addss %xmm1, %xmm0
## kill: XMM0<def> XMM0<kill> XMM0<def>
insertps $0, %xmm0, %xmm0
insertps $16, %xmm3, %xmm0
ret
after:
_f32: ## @f32
movdqa %xmm0, %xmm2
addss %xmm1, %xmm2
pshufd $1, %xmm1, %xmm1
pshufd $1, %xmm0, %xmm3
addss %xmm1, %xmm3
movdqa %xmm2, %xmm0
insertps $16, %xmm3, %xmm0
ret
The extra movs are due to a random (poor) scheduling decision.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112379 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/sse41.ll')
-rw-r--r-- | test/CodeGen/X86/sse41.ll | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index ef66d1a44a..3a14fa2630 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -224,3 +224,28 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone +; This used to compile to insertps $0 + insertps $16. insertps $0 is always +; pointless. +define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind { +entry: + %tmp7 = extractelement <2 x float> %A, i32 0 + %tmp5 = extractelement <2 x float> %A, i32 1 + %tmp3 = extractelement <2 x float> %B, i32 0 + %tmp1 = extractelement <2 x float> %B, i32 1 + %add.r = fadd float %tmp7, %tmp3 + %add.i = fadd float %tmp5, %tmp1 + %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0 + %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1 + ret <2 x float> %tmp9 +; X32: buildvector: +; X32-NOT: insertps $0 +; X32: insertps $16 +; X32-NOT: insertps $0 +; X32: ret +; X64: buildvector: +; X64-NOT: insertps $0 +; X64: insertps $16 +; X64-NOT: insertps $0 +; X64: ret +} + |