From b26c7727c9a45613d9bae69995cfd719c57c5614 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 7 Nov 2011 23:08:21 +0000 Subject: Kill and collapse outstanding DomainValues. DomainValues that are only used by "don't care" instructions are now collapsed to the first possible execution domain after all basic blocks have been processed. This typically means the PS domain on x86. For example, the vsel_i64 and vsel_double functions in sse2-blend.ll are completely collapsed to the PS domain instead of containing a mix of execution domains created by isel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144037 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx-intrinsics-x86.ll | 18 +++++++++++++++--- test/CodeGen/X86/avx-logic.ll | 8 ++++++-- test/CodeGen/X86/nontemporal.ll | 9 ++++++--- test/CodeGen/X86/sse-align-3.ll | 4 ++-- test/CodeGen/X86/sse2-blend.ll | 11 ++++------- test/CodeGen/X86/sse2.ll | 2 +- test/CodeGen/X86/vec_shuffle.ll | 15 ++++++++++----- 7 files changed, 44 insertions(+), 23 deletions(-) (limited to 'test/CodeGen') diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index 276209ea75..3fa1d95bf2 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -315,24 +315,31 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) { + ; CHECK: test_x86_sse2_movnt_dq ; CHECK: movl ; CHECK: vmovntdq - call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1) + ; add operation forces the execution domain. + %a2 = add <2 x i64> %a1, + call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a2) ret void } declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) { + ; CHECK test_x86_sse2_movnt_pd ; CHECK: movl ; CHECK: vmovntpd - call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1) + ; fadd operation forces the execution domain. + %a2 = fadd <2 x double> %a1, + call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a2) ret void } declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: test_x86_sse2_mul_sd ; CHECK: vmulsd %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res @@ -749,6 +756,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { + ; CHECK: test_x86_sse2_storel_dq ; CHECK: movl ; CHECK: vmovq call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) @@ -758,6 +766,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { + ; CHECK: test_x86_sse2_storeu_dq ; CHECK: movl ; CHECK: vmovdqu call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1) @@ -767,15 +776,18 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { + ; CHECK: test_x86_sse2_storeu_pd ; CHECK: movl ; CHECK: vmovupd - call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1) + %a2 = fadd <2 x double> %a1, + call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) ret void } declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: test_x86_sse2_sub_sd ; CHECK: vsubsd %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll index 518c09c869..cd37135582 100644 --- a/test/CodeGen/X86/avx-logic.ll +++ b/test/CodeGen/X86/avx-logic.ll @@ -165,7 +165,9 @@ entry: ; CHECK: vpandn %xmm define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { entry: - %y = xor <2 x i64> %a, + ; Force the execution domain with an add. + %a2 = add <2 x i64> %a, + %y = xor <2 x i64> %a2, %x = and <2 x i64> %a, %y ret <2 x i64> %x } @@ -173,7 +175,9 @@ entry: ; CHECK: vpand %xmm define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { entry: - %x = and <2 x i64> %a, %b + ; Force the execution domain with an add. + %a2 = add <2 x i64> %a, + %x = and <2 x i64> %a2, %b ret <2 x i64> %x } diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll index 1d095359b6..ae04435ac3 100644 --- a/test/CodeGen/X86/nontemporal.ll +++ b/test/CodeGen/X86/nontemporal.ll @@ -3,13 +3,16 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) { ; CHECK: movntps %cast = bitcast i8* %B to <4 x float>* - store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0 + %A2 = fadd <4 x float> %A, + store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0 ; CHECK: movntdq %cast1 = bitcast i8* %B to <2 x i64>* - store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0 + %E2 = add <2 x i64> %E, + store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0 ; CHECK: movntpd %cast2 = bitcast i8* %B to <2 x double>* - store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0 + %C2 = fadd <2 x double> %C, + store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0 ; CHECK: movnti %cast3 = bitcast i8* %B to i32* store i32 %D, i32* %cast3, align 16, !nontemporal !0 diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll index 04f216176c..b6b0471e91 100644 --- a/test/CodeGen/X86/sse-align-3.ll +++ b/test/CodeGen/X86/sse-align-3.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s ; CHECK-NOT: movapd ; CHECK: movaps -; CHECK-NOT: movaps -; CHECK: movapd +; CHECK-NOT: movapd +; CHECK: movaps ; CHECK-NOT: movap define void @foo(<4 x float>* %p, <4 x float> %x) nounwind { diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll index 4ff1d035e4..2f4317bf29 100644 --- a/test/CodeGen/X86/sse2-blend.ll +++ b/test/CodeGen/X86/sse2-blend.ll @@ -26,11 +26,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) { ret void } -; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the -; mixed domains here. +; Without forcing instructions, fall back to the preferred PS domain. ; CHECK: vsel_i64 ; CHECK: xorps -; CHECK: pand +; CHECK: andps ; CHECK: andnps ; CHECK: orps ; CHECK: ret @@ -43,16 +42,14 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) { ret void } -; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the -; mixed domains here. +; Without forcing instructions, fall back to the preferred PS domain. ; CHECK: vsel_double ; CHECK: xorps -; CHECK: pand +; CHECK: andps ; CHECK: andnps ; CHECK: orps ; CHECK: ret - define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) { %A = load <4 x double>* %v1 %B = load <4 x double>* %v2 diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index d520d5c1e3..1d74af2ba3 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -144,7 +144,7 @@ define <2 x double> @test11(double %a, double %b) nounwind { %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] ret <2 x double> %tmp7 ; CHECK: test11: -; CHECK: movapd 4(%esp), %xmm0 +; CHECK: movaps 4(%esp), %xmm0 } define void @test12() nounwind { diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll index 2a48de2209..d20b3e7b1f 100644 --- a/test/CodeGen/X86/vec_shuffle.ll +++ b/test/CodeGen/X86/vec_shuffle.ll @@ -1,9 +1,8 @@ -; RUN: llc < %s -march=x86 -mcpu=core2 -o %t -; RUN: grep movq %t | count 1 -; RUN: grep pshufd %t | count 1 -; RUN: grep movupd %t | count 1 -; RUN: grep pshufhw %t | count 1 +; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s +; CHECK: test_v4sf +; CHECK: movq 8(%esp) +; CHECK: pshufd $80 define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind { %tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1] %tmp2 = insertelement <4 x float> %tmp, float %X, i32 1 ; <<4 x float>> [#uses=1] @@ -13,6 +12,9 @@ define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind { ret void } +; CHECK: test_v2sd +; CHECK: movups 8(%esp) +; CHECK: movaps define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind { %tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0 ; <<2 x double>> [#uses=1] %tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1 ; <<2 x double>> [#uses=1] @@ -20,6 +22,9 @@ define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind { ret void } +; CHECK: test_v8i16 +; CHECK: pshufhw $-58 +; CHECK: movdqa define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind { %tmp = load <2 x i64>* %A ; <<2 x i64>> [#uses=1] %tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16> ; <<8 x i16>> [#uses=8] -- cgit v1.2.3