diff options
-rw-r--r-- | lib/CodeGen/ExecutionDepsFix.cpp | 19 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-intrinsics-x86.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-logic.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/nontemporal.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/X86/sse-align-3.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/sse2-blend.ll | 11 | ||||
-rw-r--r-- | test/CodeGen/X86/sse2.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_shuffle.ll | 15 |
8 files changed, 58 insertions, 28 deletions
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 3d6f256dd8..bd77f655c1 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -510,11 +510,20 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { leaveBasicBlock(MBB); } - // Clear the LiveOuts vectors. Should we also collapse any remaining - // DomainValues? - for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); - i != e; ++i) - delete[] i->second; + // Clear the LiveOuts vectors and collapse any remaining DomainValues. + for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator + MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { + LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); + if (FI == LiveOuts.end()) + continue; + assert(FI->second && "Null entry"); + // The DomainValue is collapsed when the last reference is killed. + LiveRegs = FI->second; + for (unsigned i = 0, e = NumRegs; i != e; ++i) + if (LiveRegs[i]) + Kill(i); + delete[] LiveRegs; + } LiveOuts.clear(); Avail.clear(); Allocator.DestroyAll(); diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index 276209ea75..3fa1d95bf2 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -315,24 +315,31 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) { + ; CHECK: test_x86_sse2_movnt_dq ; CHECK: movl ; CHECK: vmovntdq - call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1) + ; add operation forces the execution domain. + %a2 = add <2 x i64> %a1, <i64 1, i64 1> + call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a2) ret void } declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) { + ; CHECK test_x86_sse2_movnt_pd ; CHECK: movl ; CHECK: vmovntpd - call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1) + ; fadd operation forces the execution domain. + %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> + call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a2) ret void } declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: test_x86_sse2_mul_sd ; CHECK: vmulsd %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res @@ -749,6 +756,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { + ; CHECK: test_x86_sse2_storel_dq ; CHECK: movl ; CHECK: vmovq call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) @@ -758,6 +766,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { + ; CHECK: test_x86_sse2_storeu_dq ; CHECK: movl ; CHECK: vmovdqu call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1) @@ -767,15 +776,18 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { + ; CHECK: test_x86_sse2_storeu_pd ; CHECK: movl ; CHECK: vmovupd - call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1) + %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> + call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) ret void } declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: test_x86_sse2_sub_sd ; CHECK: vsubsd %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll index 518c09c869..cd37135582 100644 --- a/test/CodeGen/X86/avx-logic.ll +++ b/test/CodeGen/X86/avx-logic.ll @@ -165,7 +165,9 @@ entry: ; CHECK: vpandn %xmm define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { entry: - %y = xor <2 x i64> %a, <i64 -1, i64 -1> + ; Force the execution domain with an add. + %a2 = add <2 x i64> %a, <i64 1, i64 1> + %y = xor <2 x i64> %a2, <i64 -1, i64 -1> %x = and <2 x i64> %a, %y ret <2 x i64> %x } @@ -173,7 +175,9 @@ entry: ; CHECK: vpand %xmm define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { entry: - %x = and <2 x i64> %a, %b + ; Force the execution domain with an add. + %a2 = add <2 x i64> %a, <i64 1, i64 1> + %x = and <2 x i64> %a2, %b ret <2 x i64> %x } diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll index 1d095359b6..ae04435ac3 100644 --- a/test/CodeGen/X86/nontemporal.ll +++ b/test/CodeGen/X86/nontemporal.ll @@ -3,13 +3,16 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) { ; CHECK: movntps %cast = bitcast i8* %B to <4 x float>* - store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0 + %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000> + store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0 ; CHECK: movntdq %cast1 = bitcast i8* %B to <2 x i64>* - store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0 + %E2 = add <2 x i64> %E, <i64 1, i64 2> + store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0 ; CHECK: movntpd %cast2 = bitcast i8* %B to <2 x double>* - store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0 + %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000> + store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0 ; CHECK: movnti %cast3 = bitcast i8* %B to i32* store i32 %D, i32* %cast3, align 16, !nontemporal !0 diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll index 04f216176c..b6b0471e91 100644 --- a/test/CodeGen/X86/sse-align-3.ll +++ b/test/CodeGen/X86/sse-align-3.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s ; CHECK-NOT: movapd ; CHECK: movaps -; CHECK-NOT: movaps -; CHECK: movapd +; CHECK-NOT: movapd +; CHECK: movaps ; CHECK-NOT: movap define void @foo(<4 x float>* %p, <4 x float> %x) nounwind { diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll index 4ff1d035e4..2f4317bf29 100644 --- a/test/CodeGen/X86/sse2-blend.ll +++ b/test/CodeGen/X86/sse2-blend.ll @@ -26,11 +26,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) { ret void } -; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the -; mixed domains here. +; Without forcing instructions, fall back to the preferred PS domain. ; CHECK: vsel_i64 ; CHECK: xorps -; CHECK: pand +; CHECK: andps ; CHECK: andnps ; CHECK: orps ; CHECK: ret @@ -43,16 +42,14 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) { ret void } -; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the -; mixed domains here. +; Without forcing instructions, fall back to the preferred PS domain. ; CHECK: vsel_double ; CHECK: xorps -; CHECK: pand +; CHECK: andps ; CHECK: andnps ; CHECK: orps ; CHECK: ret - define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) { %A = load <4 x double>* %v1 %B = load <4 x double>* %v2 diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index d520d5c1e3..1d74af2ba3 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -144,7 +144,7 @@ define <2 x double> @test11(double %a, double %b) nounwind { %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] ret <2 x double> %tmp7 ; CHECK: test11: -; CHECK: movapd 4(%esp), %xmm0 +; CHECK: movaps 4(%esp), %xmm0 } define void @test12() nounwind { diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll index 2a48de2209..d20b3e7b1f 100644 --- a/test/CodeGen/X86/vec_shuffle.ll +++ b/test/CodeGen/X86/vec_shuffle.ll @@ -1,9 +1,8 @@ -; RUN: llc < %s -march=x86 -mcpu=core2 -o %t -; RUN: grep movq %t | count 1 -; RUN: grep pshufd %t | count 1 -; RUN: grep movupd %t | count 1 -; RUN: grep pshufhw %t | count 1 +; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s +; CHECK: test_v4sf +; CHECK: movq 8(%esp) +; CHECK: pshufd $80 define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind { %tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1] %tmp2 = insertelement <4 x float> %tmp, float %X, i32 1 ; <<4 x float>> [#uses=1] @@ -13,6 +12,9 @@ define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind { ret void } +; CHECK: test_v2sd +; CHECK: movups 8(%esp) +; CHECK: movaps define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind { %tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0 ; <<2 x double>> [#uses=1] %tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1 ; <<2 x double>> [#uses=1] @@ -20,6 +22,9 @@ define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind { ret void } +; CHECK: test_v8i16 +; CHECK: pshufhw $-58 +; CHECK: movdqa define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind { %tmp = load <2 x i64>* %A ; <<2 x i64>> [#uses=1] %tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16> ; <<8 x i16>> [#uses=8] |