summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp19
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll18
-rw-r--r--test/CodeGen/X86/avx-logic.ll8
-rw-r--r--test/CodeGen/X86/nontemporal.ll9
-rw-r--r--test/CodeGen/X86/sse-align-3.ll4
-rw-r--r--test/CodeGen/X86/sse2-blend.ll11
-rw-r--r--test/CodeGen/X86/sse2.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle.ll15
8 files changed, 58 insertions, 28 deletions
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 3d6f256dd8..bd77f655c1 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -510,11 +510,20 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
leaveBasicBlock(MBB);
}
- // Clear the LiveOuts vectors. Should we also collapse any remaining
- // DomainValues?
- for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
- i != e; ++i)
- delete[] i->second;
+ // Clear the LiveOuts vectors and collapse any remaining DomainValues.
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
+ if (FI == LiveOuts.end())
+ continue;
+ assert(FI->second && "Null entry");
+ // The DomainValue is collapsed when the last reference is killed.
+ LiveRegs = FI->second;
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ if (LiveRegs[i])
+ Kill(i);
+ delete[] LiveRegs;
+ }
LiveOuts.clear();
Avail.clear();
Allocator.DestroyAll();
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 276209ea75..3fa1d95bf2 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -315,24 +315,31 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
+ ; CHECK: test_x86_sse2_movnt_dq
; CHECK: movl
; CHECK: vmovntdq
- call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
+ ; add operation forces the execution domain.
+ %a2 = add <2 x i64> %a1, <i64 1, i64 1>
+ call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a2)
ret void
}
declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
+ ; CHECK test_x86_sse2_movnt_pd
; CHECK: movl
; CHECK: vmovntpd
- call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
+ ; fadd operation forces the execution domain.
+ %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+ call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a2)
ret void
}
declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: test_x86_sse2_mul_sd
; CHECK: vmulsd
%res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
@@ -749,6 +756,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+ ; CHECK: test_x86_sse2_storel_dq
; CHECK: movl
; CHECK: vmovq
call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
@@ -758,6 +766,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+ ; CHECK: test_x86_sse2_storeu_dq
; CHECK: movl
; CHECK: vmovdqu
call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
@@ -767,15 +776,18 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+ ; CHECK: test_x86_sse2_storeu_pd
; CHECK: movl
; CHECK: vmovupd
- call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+ %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+ call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
ret void
}
declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: test_x86_sse2_sub_sd
; CHECK: vsubsd
%res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
index 518c09c869..cd37135582 100644
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -165,7 +165,9 @@ entry:
; CHECK: vpandn %xmm
define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry:
- %y = xor <2 x i64> %a, <i64 -1, i64 -1>
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
%x = and <2 x i64> %a, %y
ret <2 x i64> %x
}
@@ -173,7 +175,9 @@ entry:
; CHECK: vpand %xmm
define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry:
- %x = and <2 x i64> %a, %b
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %x = and <2 x i64> %a2, %b
ret <2 x i64> %x
}
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
index 1d095359b6..ae04435ac3 100644
--- a/test/CodeGen/X86/nontemporal.ll
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -3,13 +3,16 @@
define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
; CHECK: movntps
%cast = bitcast i8* %B to <4 x float>*
- store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+ %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+ store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0
; CHECK: movntdq
%cast1 = bitcast i8* %B to <2 x i64>*
- store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+ %E2 = add <2 x i64> %E, <i64 1, i64 2>
+ store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0
; CHECK: movntpd
%cast2 = bitcast i8* %B to <2 x double>*
- store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+ %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000>
+ store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
; CHECK: movnti
%cast3 = bitcast i8* %B to i32*
store i32 %D, i32* %cast3, align 16, !nontemporal !0
diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll
index 04f216176c..b6b0471e91 100644
--- a/test/CodeGen/X86/sse-align-3.ll
+++ b/test/CodeGen/X86/sse-align-3.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; CHECK-NOT: movapd
; CHECK: movaps
-; CHECK-NOT: movaps
-; CHECK: movapd
+; CHECK-NOT: movapd
+; CHECK: movaps
; CHECK-NOT: movap
define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 4ff1d035e4..2f4317bf29 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -26,11 +26,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
ret void
}
-; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
-; mixed domains here.
+; Without forcing instructions, fall back to the preferred PS domain.
; CHECK: vsel_i64
; CHECK: xorps
-; CHECK: pand
+; CHECK: andps
; CHECK: andnps
; CHECK: orps
; CHECK: ret
@@ -43,16 +42,14 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
ret void
}
-; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
-; mixed domains here.
+; Without forcing instructions, fall back to the preferred PS domain.
; CHECK: vsel_double
; CHECK: xorps
-; CHECK: pand
+; CHECK: andps
; CHECK: andnps
; CHECK: orps
; CHECK: ret
-
define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
%A = load <4 x double>* %v1
%B = load <4 x double>* %v2
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index d520d5c1e3..1d74af2ba3 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -144,7 +144,7 @@ define <2 x double> @test11(double %a, double %b) nounwind {
%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1]
ret <2 x double> %tmp7
; CHECK: test11:
-; CHECK: movapd 4(%esp), %xmm0
+; CHECK: movaps 4(%esp), %xmm0
}
define void @test12() nounwind {
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll
index 2a48de2209..d20b3e7b1f 100644
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ b/test/CodeGen/X86/vec_shuffle.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
-; RUN: grep movq %t | count 1
-; RUN: grep pshufd %t | count 1
-; RUN: grep movupd %t | count 1
-; RUN: grep pshufhw %t | count 1
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
+; CHECK: test_v4sf
+; CHECK: movq 8(%esp)
+; CHECK: pshufd $80
define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1]
%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1 ; <<4 x float>> [#uses=1]
@@ -13,6 +12,9 @@ define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
ret void
}
+; CHECK: test_v2sd
+; CHECK: movups 8(%esp)
+; CHECK: movaps
define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0 ; <<2 x double>> [#uses=1]
%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1 ; <<2 x double>> [#uses=1]
@@ -20,6 +22,9 @@ define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
ret void
}
+; CHECK: test_v8i16
+; CHECK: pshufhw $-58
+; CHECK: movdqa
define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
%tmp = load <2 x i64>* %A ; <<2 x i64>> [#uses=1]
%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16> ; <<8 x i16>> [#uses=8]