7 files changed, 12 insertions, 194 deletions
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 56d6aa960e..bdbe713a29 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
-; RUN:     grep {asm-printer} | grep 34
+; RUN:     grep {asm-printer} | grep 31
 
 target datalayout = "e-p:32:32"
 define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
@@ -40,7 +40,7 @@ cond_true:		; preds = %cond_true, %entry
 	%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
 	store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
 	%tmp147 = add nsw i32 %tmp.10, 8		; <i32> [#uses=1]
-	%tmp.upgrd.8 = icmp ne i32 %tmp147, %M		; <i1> [#uses=1]
+	%tmp.upgrd.8 = icmp slt i32 %tmp147, %M		; <i1> [#uses=1]
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
 	br i1 %tmp.upgrd.8, label %cond_true, label %return
 
diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 8e315f4d80..721d4c945b 100644
--- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -35,7 +35,7 @@ cond_next36.i:		; preds = %cond_next.i
 bb.i28.i:		; preds = %bb.i28.i, %cond_next36.i
 ; CHECK: %bb.i28.i
 ; CHECK: addl $2
-; CHECK: addl $-2
+; CHECK: addl $2
 	%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ]		; <i32> [#uses=2]
 	%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ]		; <double> [#uses=1]
 	%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32		; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index 0410bc0d9a..c695c29e06 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -march=x86-64 -o %t
-; RUN: not grep inc %t
+; RUN: grep inc %t | count 1
 ; RUN: grep dec %t | count 2
-; RUN: grep addq %t | count 10
+; RUN: grep addq %t | count 13
 ; RUN: not grep addb %t
 ; RUN: grep leaq %t | count 9
-; RUN: grep leal %t | count 2
-; RUN: grep movq %t | count 10
+; RUN: grep leal %t | count 3
+; RUN: grep movq %t | count 5
 
 ; IV users in each of the loops from other loops shouldn't cause LSR
 ; to insert new induction variables. Previously it would create a
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 6c0eb8c0df..07e46eca75 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -1,19 +1,5 @@
-; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC
-; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
-
-; By starting the IV at -64 instead of 0, a cmp is eliminated,
-; as the flags from the add can be used directly.
-
-; STATIC: movl    $-64, %ecx
-
-; STATIC: movl    %eax, _state+76(%ecx)
-; STATIC: addl    $16, %ecx
-; STATIC: jne
-
-; In PIC mode the symbol can't be folded, so the change-compare-stride
-; trick applies.
-
-; PIC: cmpl $64
+; RUN: llc < %s -march=x86 | grep cmp | grep 64
+; RUN: llc < %s -march=x86 | not grep inc
 
 @state = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
 @S = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
index 6b2247d1d6..e14cd8a99e 100644
--- a/test/CodeGen/X86/loop-strength-reduce8.ll
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -1,10 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
-
-; CHECK: leal 16(%eax), %edx
-; CHECK: align
-; CHECK: addl    $4, %edx
-; CHECK: decl    %ecx
-; CHECK: jne     LBB1_2
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
 	%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
deleted file mode 100644
index a1919bab38..0000000000
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ /dev/null
@@ -1,159 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-target datalayout = "e-p:64:64:64"
-target triple = "x86_64-unknown-unknown"
-
-; Full strength reduction reduces register pressure from 5 to 4 here.
-
-; CHECK: full_me:
-; CHECK: movsd   (%rsi), %xmm0
-; CHECK: mulsd   (%rdx), %xmm0
-; CHECK: movsd   %xmm0, (%rdi)
-; CHECK: addq    $8, %rsi
-; CHECK: addq    $8, %rdx
-; CHECK: addq    $8, %rdi
-; CHECK: decq    %rcx
-; CHECK: jne
-
-define void @full_me(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
-entry:
-  %t0 = icmp sgt i64 %n, 0
-  br i1 %t0, label %loop, label %return
-
-loop:
-  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
-  %Ai = getelementptr inbounds double* %A, i64 %i
-  %Bi = getelementptr inbounds double* %B, i64 %i
-  %Ci = getelementptr inbounds double* %C, i64 %i
-  %t1 = load double* %Bi
-  %t2 = load double* %Ci
-  %m = fmul double %t1, %t2
-  store double %m, double* %Ai
-  %i.next = add nsw i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, %n
-  br i1 %exitcond, label %return, label %loop
-
-return:
-  ret void
-}
-
-; In this test, the counting IV exit value is used, so full strength reduction
-; would not reduce register pressure. IndVarSimplify ought to simplify such
-; cases away, but it's useful here to verify that LSR's register pressure
-; heuristics are working as expected.
-
-; CHECK: count_me_0:
-; CHECK: movsd   (%rsi,%rax,8), %xmm0
-; CHECK: mulsd   (%rdx,%rax,8), %xmm0
-; CHECK: movsd   %xmm0, (%rdi,%rax,8)
-; CHECK: incq    %rax
-; CHECK: cmpq    %rax, %rcx
-; CHECK: jne
-
-define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
-entry:
-  %t0 = icmp sgt i64 %n, 0
-  br i1 %t0, label %loop, label %return
-
-loop:
-  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
-  %Ai = getelementptr inbounds double* %A, i64 %i
-  %Bi = getelementptr inbounds double* %B, i64 %i
-  %Ci = getelementptr inbounds double* %C, i64 %i
-  %t1 = load double* %Bi
-  %t2 = load double* %Ci
-  %m = fmul double %t1, %t2
-  store double %m, double* %Ai
-  %i.next = add nsw i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, %n
-  br i1 %exitcond, label %return, label %loop
-
-return:
-  %q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
-  ret i64 %q
-}
-
-; In this test, the trip count value is used, so full strength reduction
-; would not reduce register pressure.
-; (though it would reduce register pressure inside the loop...)
-
-; CHECK: count_me_1:
-; CHECK: movsd   (%rsi,%rax,8), %xmm0
-; CHECK: mulsd   (%rdx,%rax,8), %xmm0
-; CHECK: movsd   %xmm0, (%rdi,%rax,8)
-; CHECK: incq    %rax
-; CHECK: cmpq    %rax, %rcx
-; CHECK: jne
-
-define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
-entry:
-  %t0 = icmp sgt i64 %n, 0
-  br i1 %t0, label %loop, label %return
-
-loop:
-  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
-  %Ai = getelementptr inbounds double* %A, i64 %i
-  %Bi = getelementptr inbounds double* %B, i64 %i
-  %Ci = getelementptr inbounds double* %C, i64 %i
-  %t1 = load double* %Bi
-  %t2 = load double* %Ci
-  %m = fmul double %t1, %t2
-  store double %m, double* %Ai
-  %i.next = add nsw i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, %n
-  br i1 %exitcond, label %return, label %loop
-
-return:
-  %q = phi i64 [ 0, %entry ], [ %n, %loop ]
-  ret i64 %q
-}
-
-; This should be fully strength-reduced to reduce register pressure, however
-; the current heuristics get distracted by all the reuse with the stride-1
-; induction variable first.
-
-; But even so, be clever and start the stride-1 variable at a non-zero value
-; to eliminate an in-loop immediate value.
-
-; CHECK: count_me_2:
-; CHECK: movl    $5, %eax
-; CHECK: align
-; CHECK: BB4_1:
-; CHECK: movsd   (%rdi,%rax,8), %xmm0
-; CHECK: addsd   (%rsi,%rax,8), %xmm0
-; CHECK: movsd   %xmm0, (%rdx,%rax,8)
-; CHECK: movsd   40(%rdi,%rax,8), %xmm0
-; CHECK: addsd   40(%rsi,%rax,8), %xmm0
-; CHECK: movsd   %xmm0, 40(%rdx,%rax,8)
-; CHECK: incq    %rax
-; CHECK: cmpq    $5005, %rax
-; CHECK: jne
-
-define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
-entry:
-  br label %loop
-
-loop:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
-  %i5 = add i64 %i, 5
-  %Ai = getelementptr double* %A, i64 %i5
-  %t2 = load double* %Ai
-  %Bi = getelementptr double* %B, i64 %i5
-  %t4 = load double* %Bi
-  %t5 = fadd double %t2, %t4
-  %Ci = getelementptr double* %C, i64 %i5
-  store double %t5, double* %Ci
-  %i10 = add i64 %i, 10
-  %Ai10 = getelementptr double* %A, i64 %i10
-  %t9 = load double* %Ai10
-  %Bi10 = getelementptr double* %B, i64 %i10
-  %t11 = load double* %Bi10
-  %t12 = fadd double %t9, %t11
-  %Ci10 = getelementptr double* %C, i64 %i10
-  store double %t12, double* %Ci10
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 5000
-  br i1 %exitcond, label %return, label %loop
-
-return:
-  ret void
-}
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 7111d687ed..bc493bd8f7 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -4,9 +4,9 @@
 ; RUN: not grep sar %t
 ; RUN: not grep shl %t
 ; RUN: grep add %t | count 2
-; RUN: grep inc %t | count 3
+; RUN: grep inc %t | count 4
 ; RUN: grep dec %t | count 2
-; RUN: grep lea %t | count 3
+; RUN: grep lea %t | count 2
 
 ; Optimize away zext-inreg and sext-inreg on the loop induction
 ; variable using trip-count information.
@@ -127,9 +127,6 @@ return:
 	ret void
 }
 
-; TODO: If we could handle all the loads and stores as post-inc users, we could
-; use {-1,+,1} in the induction variable register, and we'd get another inc,
-; one fewer add, and a comparison with zero.
 define void @another_count_up(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop