X86: when constructing VZEXT_LOAD from other loads, makes sure its output

chain is correctly setup. As an example, if the original load must happen before later stores, we need to make sure the constructed VZEXT_LOAD is constrained to be before the stores. rdar://12684358 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167859 91177308-0d34-0410-b5e6-96231b3b80d8
author: Manman Ren <mren@apple.com> 2012-11-13 19:13:05 +0000
committer: Manman Ren <mren@apple.com> 2012-11-13 19:13:05 +0000
commit: 2adc503f291d69763c5fc59a8e35d318ee22b77a (patch)
tree: 095c09167430f2260763a6cc6f95ee77790a9a7e /test
parent: b64e2115de3b293ef706b75f040277477c949208 (diff)
download: llvm-2adc503f291d69763c5fc59a8e35d318ee22b77a.tar.gz
llvm-2adc503f291d69763c5fc59a8e35d318ee22b77a.tar.bz2
llvm-2adc503f291d69763c5fc59a8e35d318ee22b77a.tar.xz
1 files changed, 51 insertions, 0 deletions
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index ec11654b35..904f048d1e 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -246,3 +246,54 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
   ret <8 x float>%S
 }
 
+; rdar://12684358
+; Make sure loads happen before stores.
+; CHECK: swap8doubles
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
+; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
+; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
+; CHECK: vextractf128
+; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
+; CHECK: vextractf128
+; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
+; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
+define void @swap8doubles(double* nocapture %A, double* nocapture %C) nounwind uwtable ssp {
+entry:
+  %add.ptr = getelementptr inbounds double* %A, i64 2
+  %v.i = bitcast double* %A to <2 x double>*
+  %0 = load <2 x double>* %v.i, align 1
+  %shuffle.i.i = shufflevector <2 x double> %0, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v1.i = bitcast double* %add.ptr to <2 x double>*
+  %1 = load <2 x double>* %v1.i, align 1
+  %2 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i, <2 x double> %1, i8 1) nounwind
+  %add.ptr1 = getelementptr inbounds double* %A, i64 6
+  %add.ptr2 = getelementptr inbounds double* %A, i64 4
+  %v.i27 = bitcast double* %add.ptr2 to <2 x double>*
+  %3 = load <2 x double>* %v.i27, align 1
+  %shuffle.i.i28 = shufflevector <2 x double> %3, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v1.i29 = bitcast double* %add.ptr1 to <2 x double>*
+  %4 = load <2 x double>* %v1.i29, align 1
+  %5 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i28, <2 x double> %4, i8 1) nounwind
+  %6 = bitcast double* %C to <4 x double>*
+  %7 = load <4 x double>* %6, align 32
+  %add.ptr5 = getelementptr inbounds double* %C, i64 4
+  %8 = bitcast double* %add.ptr5 to <4 x double>*
+  %9 = load <4 x double>* %8, align 32
+  %shuffle.i26 = shufflevector <4 x double> %7, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  %10 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %7, i8 1)
+  %shuffle.i = shufflevector <4 x double> %9, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  %11 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %9, i8 1)
+  store <2 x double> %shuffle.i26, <2 x double>* %v.i, align 16
+  store <2 x double> %10, <2 x double>* %v1.i, align 16
+  store <2 x double> %shuffle.i, <2 x double>* %v.i27, align 16
+  store <2 x double> %11, <2 x double>* %v1.i29, align 16
+  store <4 x double> %2, <4 x double>* %6, align 32
+  store <4 x double> %5, <4 x double>* %8, align 32
+  ret void
+}
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
author	Manman Ren <mren@apple.com>	2012-11-13 19:13:05 +0000
committer	Manman Ren <mren@apple.com>	2012-11-13 19:13:05 +0000
commit	2adc503f291d69763c5fc59a8e35d318ee22b77a (patch)
tree	095c09167430f2260763a6cc6f95ee77790a9a7e /test
parent	b64e2115de3b293ef706b75f040277477c949208 (diff)
download	llvm-2adc503f291d69763c5fc59a8e35d318ee22b77a.tar.gz llvm-2adc503f291d69763c5fc59a8e35d318ee22b77a.tar.bz2 llvm-2adc503f291d69763c5fc59a8e35d318ee22b77a.tar.xz