summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Friedman <eli.friedman@gmail.com>2009-06-19 07:00:55 +0000
committerEli Friedman <eli.friedman@gmail.com>2009-06-19 07:00:55 +0000
commit7e2242be71bc44ab1447b5211b3c0f1cec94c467 (patch)
treead3903b269224311358f4ad4074a42b39c9ae8fd
parent7a42b08be8836149ea4e3f95ddcc42e198561df3 (diff)
downloadllvm-7e2242be71bc44ab1447b5211b3c0f1cec94c467.tar.gz
llvm-7e2242be71bc44ab1447b5211b3c0f1cec94c467.tar.bz2
llvm-7e2242be71bc44ab1447b5211b3c0f1cec94c467.tar.xz
Fix for PR2484: add an SSE1 pattern for a shuffle we normally prefer to
handle with an SSE2 instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73760 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrSSE.td6
-rw-r--r--test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll9
2 files changed, 15 insertions, 0 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b44c7a693e..5d6ef36414 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3027,6 +3027,12 @@ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
}
+// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+// fall back to this for SSE1)
+def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+ (SHUFPSrri VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
+
// Set lowest element and zero upper elements.
let AddedComplexity = 15 in
def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
diff --git a/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
new file mode 100644
index 0000000000..d6ff5b6803
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2
+; PR2484
+
+define <4 x float> @f4523(<4 x float> %a,<4 x float> %b) nounwind {
+entry:
+%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4,i32
+5,i32 2,i32 3>
+ret <4 x float> %shuffle
+}