summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/splat-scalar-load.ll
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2009-12-09 21:00:30 +0000
committerEvan Cheng <evan.cheng@apple.com>2009-12-09 21:00:30 +0000
commitc363094e04df621d41ca570eb2a7bf8826bb8c1a (patch)
treef471aca10ab30ffc20bb95bc94a18fed3c50357e /test/CodeGen/X86/splat-scalar-load.ll
parent89452f7386540ca83e8991e74f1d74bbe7271922 (diff)
downloadllvm-c363094e04df621d41ca570eb2a7bf8826bb8c1a.tar.gz
llvm-c363094e04df621d41ca570eb2a7bf8826bb8c1a.tar.bz2
llvm-c363094e04df621d41ca570eb2a7bf8826bb8c1a.tar.xz
Optimize splat of a scalar load into a shuffle of a vector load when it's legal. e.g.
vector_shuffle (scalar_to_vector (i32 load (ptr + 4))), undef, <0, 0, 0, 0> => vector_shuffle (v4i32 load ptr), undef, <1, 1, 1, 1> iff ptr is 16-byte aligned (or can be made into 16-byte aligned). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@90984 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/splat-scalar-load.ll')
-rw-r--r--test/CodeGen/X86/splat-scalar-load.ll43
1 files changed, 43 insertions, 0 deletions
diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
new file mode 100644
index 0000000000..6c93efab70
--- /dev/null
+++ b/test/CodeGen/X86/splat-scalar-load.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; rdar://7434544
+
+define <2 x i64> @t1() nounwind ssp {
+entry:
+; CHECK: t1:
+; CHECK: pshufd $0, (%esp), %xmm0
+ %array = alloca [8 x float], align 16
+ %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 0
+ %tmp2 = load float* %arrayidx
+ %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+ %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+ %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+ %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+ %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: pshufd $85, (%esp), %xmm0
+ %array = alloca [8 x float], align 4
+ %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 1
+ %tmp2 = load float* %arrayidx
+ %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+ %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+ %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+ %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+ %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+ ret <2 x i64> %0
+}
+
+define <4 x float> @t3(float %tmp1, float %tmp2, float %tmp3) nounwind readnone ssp {
+entry:
+; CHECK: t3:
+; CHECK: pshufd $-86, (%esp), %xmm0
+ %0 = insertelement <4 x float> undef, float %tmp3, i32 0
+ %1 = insertelement <4 x float> %0, float %tmp3, i32 1
+ %2 = insertelement <4 x float> %1, float %tmp3, i32 2
+ %3 = insertelement <4 x float> %2, float %tmp3, i32 3
+ ret <4 x float> %3
+}