diff options
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r-- | test/CodeGen/X86/2006-05-11-InstrSched.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/iv-users-in-other-loops.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce4.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce8.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/lsr-reuse.ll | 159 | ||||
-rw-r--r-- | test/CodeGen/X86/masked-iv-safe.ll | 7 |
7 files changed, 12 insertions, 194 deletions
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll index 56d6aa960e..bdbe713a29 100644 --- a/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\ -; RUN: grep {asm-printer} | grep 34 +; RUN: grep {asm-printer} | grep 31 target datalayout = "e-p:32:32" define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind { @@ -40,7 +40,7 @@ cond_true: ; preds = %cond_true, %entry %tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1] store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7 %tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1] - %tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1] + %tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; <i1> [#uses=1] %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] br i1 %tmp.upgrd.8, label %cond_true, label %return diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll index 8e315f4d80..721d4c945b 100644 --- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll +++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll @@ -35,7 +35,7 @@ cond_next36.i: ; preds = %cond_next.i bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i ; CHECK: %bb.i28.i ; CHECK: addl $2 -; CHECK: addl $-2 +; CHECK: addl $2 %j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2] %din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1] %tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2] diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll index 0410bc0d9a..c695c29e06 100644 --- a/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -1,11 +1,11 @@ ; RUN: llc < %s -march=x86-64 -o %t -; RUN: not grep inc %t +; RUN: grep inc %t | count 1 ; RUN: grep dec %t | count 2 -; RUN: grep addq %t | count 10 +; RUN: grep addq %t | count 13 ; RUN: not grep addb %t ; RUN: grep leaq %t | count 9 -; RUN: grep leal %t | count 2 -; RUN: grep movq %t | count 10 +; RUN: grep leal %t | count 3 +; RUN: grep movq %t | count 5 ; IV users in each of the loops from other loops shouldn't cause LSR ; to insert new induction variables. Previously it would create a diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll index 6c0eb8c0df..07e46eca75 100644 --- a/test/CodeGen/X86/loop-strength-reduce4.ll +++ b/test/CodeGen/X86/loop-strength-reduce4.ll @@ -1,19 +1,5 @@ -; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC -; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC - -; By starting the IV at -64 instead of 0, a cmp is eliminated, -; as the flags from the add can be used directly. - -; STATIC: movl $-64, %ecx - -; STATIC: movl %eax, _state+76(%ecx) -; STATIC: addl $16, %ecx -; STATIC: jne - -; In PIC mode the symbol can't be folded, so the change-compare-stride -; trick applies. - -; PIC: cmpl $64 +; RUN: llc < %s -march=x86 | grep cmp | grep 64 +; RUN: llc < %s -march=x86 | not grep inc @state = external global [0 x i32] ; <[0 x i32]*> [#uses=4] @S = external global [0 x i32] ; <[0 x i32]*> [#uses=4] diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll index 6b2247d1d6..e14cd8a99e 100644 --- a/test/CodeGen/X86/loop-strength-reduce8.ll +++ b/test/CodeGen/X86/loop-strength-reduce8.ll @@ -1,10 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s - -; CHECK: leal 16(%eax), %edx -; CHECK: align -; CHECK: addl $4, %edx -; CHECK: decl %ecx -; CHECK: jne LBB1_2 +; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16 %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 } %struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] } diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll deleted file mode 100644 index a1919bab38..0000000000 --- a/test/CodeGen/X86/lsr-reuse.ll +++ /dev/null @@ -1,159 +0,0 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s -target datalayout = "e-p:64:64:64" -target triple = "x86_64-unknown-unknown" - -; Full strength reduction reduces register pressure from 5 to 4 here. - -; CHECK: full_me: -; CHECK: movsd (%rsi), %xmm0 -; CHECK: mulsd (%rdx), %xmm0 -; CHECK: movsd %xmm0, (%rdi) -; CHECK: addq $8, %rsi -; CHECK: addq $8, %rdx -; CHECK: addq $8, %rdi -; CHECK: decq %rcx -; CHECK: jne - -define void @full_me(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { -entry: - %t0 = icmp sgt i64 %n, 0 - br i1 %t0, label %loop, label %return - -loop: - %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] - %Ai = getelementptr inbounds double* %A, i64 %i - %Bi = getelementptr inbounds double* %B, i64 %i - %Ci = getelementptr inbounds double* %C, i64 %i - %t1 = load double* %Bi - %t2 = load double* %Ci - %m = fmul double %t1, %t2 - store double %m, double* %Ai - %i.next = add nsw i64 %i, 1 - %exitcond = icmp eq i64 %i.next, %n - br i1 %exitcond, label %return, label %loop - -return: - ret void -} - -; In this test, the counting IV exit value is used, so full strength reduction -; would not reduce register pressure. IndVarSimplify ought to simplify such -; cases away, but it's useful here to verify that LSR's register pressure -; heuristics are working as expected. - -; CHECK: count_me_0: -; CHECK: movsd (%rsi,%rax,8), %xmm0 -; CHECK: mulsd (%rdx,%rax,8), %xmm0 -; CHECK: movsd %xmm0, (%rdi,%rax,8) -; CHECK: incq %rax -; CHECK: cmpq %rax, %rcx -; CHECK: jne - -define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { -entry: - %t0 = icmp sgt i64 %n, 0 - br i1 %t0, label %loop, label %return - -loop: - %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] - %Ai = getelementptr inbounds double* %A, i64 %i - %Bi = getelementptr inbounds double* %B, i64 %i - %Ci = getelementptr inbounds double* %C, i64 %i - %t1 = load double* %Bi - %t2 = load double* %Ci - %m = fmul double %t1, %t2 - store double %m, double* %Ai - %i.next = add nsw i64 %i, 1 - %exitcond = icmp eq i64 %i.next, %n - br i1 %exitcond, label %return, label %loop - -return: - %q = phi i64 [ 0, %entry ], [ %i.next, %loop ] - ret i64 %q -} - -; In this test, the trip count value is used, so full strength reduction -; would not reduce register pressure. -; (though it would reduce register pressure inside the loop...) - -; CHECK: count_me_1: -; CHECK: movsd (%rsi,%rax,8), %xmm0 -; CHECK: mulsd (%rdx,%rax,8), %xmm0 -; CHECK: movsd %xmm0, (%rdi,%rax,8) -; CHECK: incq %rax -; CHECK: cmpq %rax, %rcx -; CHECK: jne - -define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { -entry: - %t0 = icmp sgt i64 %n, 0 - br i1 %t0, label %loop, label %return - -loop: - %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] - %Ai = getelementptr inbounds double* %A, i64 %i - %Bi = getelementptr inbounds double* %B, i64 %i - %Ci = getelementptr inbounds double* %C, i64 %i - %t1 = load double* %Bi - %t2 = load double* %Ci - %m = fmul double %t1, %t2 - store double %m, double* %Ai - %i.next = add nsw i64 %i, 1 - %exitcond = icmp eq i64 %i.next, %n - br i1 %exitcond, label %return, label %loop - -return: - %q = phi i64 [ 0, %entry ], [ %n, %loop ] - ret i64 %q -} - -; This should be fully strength-reduced to reduce register pressure, however -; the current heuristics get distracted by all the reuse with the stride-1 -; induction variable first. - -; But even so, be clever and start the stride-1 variable at a non-zero value -; to eliminate an in-loop immediate value. - -; CHECK: count_me_2: -; CHECK: movl $5, %eax -; CHECK: align -; CHECK: BB4_1: -; CHECK: movsd (%rdi,%rax,8), %xmm0 -; CHECK: addsd (%rsi,%rax,8), %xmm0 -; CHECK: movsd %xmm0, (%rdx,%rax,8) -; CHECK: movsd 40(%rdi,%rax,8), %xmm0 -; CHECK: addsd 40(%rsi,%rax,8), %xmm0 -; CHECK: movsd %xmm0, 40(%rdx,%rax,8) -; CHECK: incq %rax -; CHECK: cmpq $5005, %rax -; CHECK: jne - -define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind { -entry: - br label %loop - -loop: - %i = phi i64 [ 0, %entry ], [ %i.next, %loop ] - %i5 = add i64 %i, 5 - %Ai = getelementptr double* %A, i64 %i5 - %t2 = load double* %Ai - %Bi = getelementptr double* %B, i64 %i5 - %t4 = load double* %Bi - %t5 = fadd double %t2, %t4 - %Ci = getelementptr double* %C, i64 %i5 - store double %t5, double* %Ci - %i10 = add i64 %i, 10 - %Ai10 = getelementptr double* %A, i64 %i10 - %t9 = load double* %Ai10 - %Bi10 = getelementptr double* %B, i64 %i10 - %t11 = load double* %Bi10 - %t12 = fadd double %t9, %t11 - %Ci10 = getelementptr double* %C, i64 %i10 - store double %t12, double* %Ci10 - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 5000 - br i1 %exitcond, label %return, label %loop - -return: - ret void -} diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll index 7111d687ed..bc493bd8f7 100644 --- a/test/CodeGen/X86/masked-iv-safe.ll +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -4,9 +4,9 @@ ; RUN: not grep sar %t ; RUN: not grep shl %t ; RUN: grep add %t | count 2 -; RUN: grep inc %t | count 3 +; RUN: grep inc %t | count 4 ; RUN: grep dec %t | count 2 -; RUN: grep lea %t | count 3 +; RUN: grep lea %t | count 2 ; Optimize away zext-inreg and sext-inreg on the loop induction ; variable using trip-count information. @@ -127,9 +127,6 @@ return: ret void } -; TODO: If we could handle all the loads and stores as post-inc users, we could -; use {-1,+,1} in the induction variable register, and we'd get another inc, -; one fewer add, and a comparison with zero. define void @another_count_up(double* %d, i64 %n) nounwind { entry: br label %loop |