diff options
author | Filipe Cabecinhas <me@filcab.net> | 2014-06-06 18:07:06 +0000 |
---|---|---|
committer | Filipe Cabecinhas <me@filcab.net> | 2014-06-06 18:07:06 +0000 |
commit | 78cf19b9b98e7ea3d4873fc5b1496c1835221951 (patch) | |
tree | f273ed22e3f5f3d20649d0eb0d55dd437aa48f5b /test/CodeGen/X86 | |
parent | 64d39d3281aaabc09ee792312a48251fd3114a3b (diff) | |
download | llvm-78cf19b9b98e7ea3d4873fc5b1496c1835221951.tar.gz llvm-78cf19b9b98e7ea3d4873fc5b1496c1835221951.tar.bz2 llvm-78cf19b9b98e7ea3d4873fc5b1496c1835221951.tar.xz |
Fixed a bug in lowering shuffle_vectors to insertps
Summary:
We were being too strict and not accounting for undefs.
Added a test case and fixed another one where we improved codegen.
Reviewers: grosbach, nadav, delena
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D4039
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210361 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r-- | test/CodeGen/X86/avx-shuffle.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/sse41.ll | 11 |
2 files changed, 15 insertions, 2 deletions
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index f407ba4cc1..f3f7e554a3 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -5,8 +5,10 @@ define <4 x float> @test1(<4 x float> %a) nounwind { %b = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 5, i32 undef, i32 undef> ret <4 x float> %b ; CHECK-LABEL: test1: -; CHECK: vshufps -; CHECK: vpshufd +;; TODO: This test could be improved by removing the xor instruction and +;; having vinsertps zero out the needed elements. +; CHECK: vxorps +; CHECK: vinsertps } ; rdar://10538417 diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index a3c62016c4..a77ede228d 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -692,3 +692,14 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl %13 = fadd <4 x float> %11, %12 ret <4 x float> %13 } + +define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) { +; CHECK-LABEL: insertps_with_undefs: +; CHECK-NOT: shufps +; CHECK: insertps $32, %xmm0 +; CHECK: ret + %1 = load float* %b, align 4 + %2 = insertelement <4 x float> undef, float %1, i32 0 + %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7> + ret <4 x float> %result +} |