Optimize splat of a scalar load into a shuffle of a vector load when it's legal. e.g.

vector_shuffle (scalar_to_vector (i32 load (ptr + 4))), undef, <0, 0, 0, 0> => vector_shuffle (v4i32 load ptr), undef, <1, 1, 1, 1> iff ptr is 16-byte aligned (or can be made into 16-byte aligned). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@90984 91177308-0d34-0410-b5e6-96231b3b80d8
author: Evan Cheng <evan.cheng@apple.com> 2009-12-09 21:00:30 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2009-12-09 21:00:30 +0000
commit: c363094e04df621d41ca570eb2a7bf8826bb8c1a (patch)
tree: f471aca10ab30ffc20bb95bc94a18fed3c50357e /test/CodeGen/X86/splat-scalar-load.ll
parent: 89452f7386540ca83e8991e74f1d74bbe7271922 (diff)
download: llvm-c363094e04df621d41ca570eb2a7bf8826bb8c1a.tar.gz
llvm-c363094e04df621d41ca570eb2a7bf8826bb8c1a.tar.bz2
llvm-c363094e04df621d41ca570eb2a7bf8826bb8c1a.tar.xz
1 files changed, 43 insertions, 0 deletions
diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
new file mode 100644
index 0000000000..6c93efab70
--- /dev/null
+++ b/test/CodeGen/X86/splat-scalar-load.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; rdar://7434544
+
+define <2 x i64> @t1() nounwind ssp {
+entry:
+; CHECK: t1:
+; CHECK: pshufd	$0, (%esp), %xmm0
+  %array = alloca [8 x float], align 16
+  %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 0
+  %tmp2 = load float* %arrayidx
+  %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+  %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+  %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+  %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+  %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+  ret <2 x i64> %0
+}
+
+define <2 x i64> @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: pshufd	$85, (%esp), %xmm0
+  %array = alloca [8 x float], align 4
+  %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 1
+  %tmp2 = load float* %arrayidx
+  %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+  %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+  %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+  %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+  %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+  ret <2 x i64> %0
+}
+
+define <4 x float> @t3(float %tmp1, float %tmp2, float %tmp3) nounwind readnone ssp {
+entry:
+; CHECK: t3:
+; CHECK: pshufd	$-86, (%esp), %xmm0
+  %0 = insertelement <4 x float> undef, float %tmp3, i32 0
+  %1 = insertelement <4 x float> %0, float %tmp3, i32 1
+  %2 = insertelement <4 x float> %1, float %tmp3, i32 2
+  %3 = insertelement <4 x float> %2, float %tmp3, i32 3
+  ret <4 x float> %3
+}
author	Evan Cheng <evan.cheng@apple.com>	2009-12-09 21:00:30 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2009-12-09 21:00:30 +0000
commit	c363094e04df621d41ca570eb2a7bf8826bb8c1a (patch)
tree	f471aca10ab30ffc20bb95bc94a18fed3c50357e /test/CodeGen/X86/splat-scalar-load.ll
parent	89452f7386540ca83e8991e74f1d74bbe7271922 (diff)
download	llvm-c363094e04df621d41ca570eb2a7bf8826bb8c1a.tar.gz llvm-c363094e04df621d41ca570eb2a7bf8826bb8c1a.tar.bz2 llvm-c363094e04df621d41ca570eb2a7bf8826bb8c1a.tar.xz