diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2010-09-01 22:33:20 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2010-09-01 22:33:20 +0000 |
commit | 29c353b9c3d8094b9ac7cbbc23cfc8d23a010db4 (patch) | |
tree | a4e807aa743aaad1195266c8c98117f992df82c6 | |
parent | 543cf05b9cb98f50a22cf05137d97bb3bb61f94a (diff) | |
download | llvm-29c353b9c3d8094b9ac7cbbc23cfc8d23a010db4.tar.gz llvm-29c353b9c3d8094b9ac7cbbc23cfc8d23a010db4.tar.bz2 llvm-29c353b9c3d8094b9ac7cbbc23cfc8d23a010db4.tar.xz |
Using target specific nodes for shuffle nodes makes the mask
check more strict, breaking some cases not checked in the
testsuite, but also exposes some foldings not done before,
as this example:
movaps (%rdi), %xmm0
movaps (%rax), %xmm1
movaps %xmm0, %xmm2
movss %xmm1, %xmm2
shufps $36, %xmm2, %xmm0
now is generated as:
movaps (%rdi), %xmm0
movaps %xmm0, %xmm1
movlps (%rax), %xmm1
shufps $36, %xmm1, %xmm0
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112753 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 3 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_shuffle-37.ll | 14 |
2 files changed, 17 insertions, 0 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8a1786272f..9dfcbc4836 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5909,6 +5909,9 @@ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), def : Pat<(X86Movlps VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (MOVLPSrm VR128:$src1, addr:$src2)>; +def : Pat<(X86Movlps VR128:$src1, + (bc_v4i32 (v2i64 (load addr:$src2)))), + (MOVLPSrm VR128:$src1, addr:$src2)>; // Shuffle with MOVLPD def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll new file mode 100644 index 0000000000..1ed858de64 --- /dev/null +++ b/test/CodeGen/X86/vec_shuffle-37.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp { +entry: +; CHECK: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: movlps (%rax), %xmm1 +; CHECK-NEXT: shufps $36, %xmm1, %xmm0 + %0 = load <4 x i32>* undef, align 16 + %1 = load <4 x i32>* %a0, align 16 + %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4> + ret <4 x i32> %2 +} + |