summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-01-21 02:09:26 +0000
committerDan Gohman <gohman@apple.com>2010-01-21 02:09:26 +0000
commita10756ee657a4d43a48cca5c166919093930ed6b (patch)
tree52a9cf0b867521026963ca127f6a46d19f0f8f78 /test
parentac8b4bf66b7263018fe6c133604a30780c24982e (diff)
downloadllvm-a10756ee657a4d43a48cca5c166919093930ed6b.tar.gz
llvm-a10756ee657a4d43a48cca5c166919093930ed6b.tar.bz2
llvm-a10756ee657a4d43a48cca5c166919093930ed6b.tar.xz
Re-implement the main strength-reduction portion of LoopStrengthReduction.
This new version is much more aggressive about doing "full" reduction in cases where it reduces register pressure, and also more aggressive about rewriting induction variables to count down (or up) to zero when doing so reduces register pressure. It currently uses fairly simplistic algorithms for finding reuse opportunities, but it introduces a new framework allows it to combine multiple strategies at once to form hybrid solutions, instead of doing all full-reduction or all base+index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94061 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/ARM/arm-negative-stride.ll26
-rw-r--r--test/CodeGen/ARM/lsr-code-insertion.ll4
-rw-r--r--test/CodeGen/ARM/remat.ll3
-rw-r--r--test/CodeGen/Thumb2/lsr-deficiency.ll18
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt1.ll12
-rw-r--r--test/CodeGen/X86/2006-05-11-InstrSched.ll4
-rw-r--r--test/CodeGen/X86/2007-08-13-SpillerReuse.ll102
-rw-r--r--test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll2
-rw-r--r--test/CodeGen/X86/2009-09-10-SpillComments.ll9
-rw-r--r--test/CodeGen/X86/full-lsr.ll12
-rw-r--r--test/CodeGen/X86/iv-users-in-other-loops.ll8
-rw-r--r--test/CodeGen/X86/loop-strength-reduce4.ll18
-rw-r--r--test/CodeGen/X86/loop-strength-reduce8.ll8
-rw-r--r--test/CodeGen/X86/lsr-reuse.ll159
-rw-r--r--test/CodeGen/X86/masked-iv-safe.ll7
-rw-r--r--test/CodeGen/X86/pr3495-2.ll1
-rw-r--r--test/CodeGen/X86/remat-mov-0.ll26
-rw-r--r--test/CodeGen/X86/remat-mov-1.ll40
-rw-r--r--test/CodeGen/X86/subreg-to-reg-5.ll35
-rw-r--r--test/Transforms/IndVarSimplify/gep-with-mul-base.ll5
-rw-r--r--test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll5
-rw-r--r--test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll7
-rw-r--r--test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll8
-rw-r--r--test/Transforms/LoopStrengthReduce/count-to-zero.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll27
25 files changed, 292 insertions, 256 deletions
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index 72ec8efcc4..52ab8717c1 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -1,7 +1,32 @@
; RUN: llc < %s -march=arm | FileCheck %s
+; This loop is rewritten with an indvar which counts down, which
+; frees up a register from holding the trip count.
+
define void @test(i32* %P, i32 %A, i32 %i) nounwind {
entry:
+; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2]
+ icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
+ br i1 %0, label %return, label %bb
+
+bb: ; preds = %bb, %entry
+ %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
+ %i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
+ %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
+ store i32 %A, i32* %tmp2
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
+ icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
+ br i1 %1, label %return, label %bb
+
+return: ; preds = %bb, %entry
+ ret void
+}
+
+; This loop has a non-address use of the count-up indvar, so
+; it'll remain. Now the original store uses a negative-stride address.
+
+define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind {
+entry:
; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
br i1 %0, label %return, label %bb
@@ -11,6 +36,7 @@ bb: ; preds = %bb, %entry
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
%tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
store i32 %A, i32* %tmp2
+ store i32 %indvar, i32* null
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
br i1 %1, label %return, label %bb
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index 507ec2c7bd..1bbb96deee 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
-; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
+; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
; This test really wants to check that the resultant "cond_true" block only
; has a single store in it, and that cond_true55 only has code to materialize
; the constant and do a store. We do *not* want something like this:
diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll
index 9565c8bca6..9072bcb762 100644
--- a/test/CodeGen/ARM/remat.ll
+++ b/test/CodeGen/ARM/remat.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin
-; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 3
+; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | not grep "Number of re-materialization"
%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
%struct.LOCBOX = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 7b1b57a786..ac2cd34e4b 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -1,25 +1,29 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
; rdar://7387640
-; FIXME: We still need to rewrite array reference iv of stride -4 with loop
-; count iv of stride -1.
+; This now reduces to a single induction variable.
+
+; TODO: It still gets a GPR shuffle at the end of the loop
+; This is because something in instruction selection has decided
+; that comparing the pre-incremented value with zero is better
+; than comparing the post-incremented value with -4.
@G = external global i32 ; <i32*> [#uses=2]
@array = external global i32* ; <i32**> [#uses=1]
define arm_apcscc void @t() nounwind optsize {
; CHECK: t:
-; CHECK: mov.w r2, #4000
-; CHECK: movw r3, #1001
+; CHECK: mov.w r2, #1000
entry:
%.pre = load i32* @G, align 4 ; <i32> [#uses=1]
br label %bb
bb: ; preds = %bb, %entry
; CHECK: LBB1_1:
-; CHECK: subs r3, #1
-; CHECK: cmp r3, #0
-; CHECK: sub.w r2, r2, #4
+; CHECK: cmp r2, #0
+; CHECK: sub.w r9, r2, #1
+; CHECK: mov r2, r9
+
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
%tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
index 71199abc57..1d267565e0 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
-define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
; CHECK: t1:
; CHECK: it ne
; CHECK: cmpne
@@ -20,12 +20,12 @@ cond_next:
}
; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
-define i32 @t2(i32 %a, i32 %b) {
+define i32 @t2(i32 %a, i32 %b) nounwind {
entry:
; CHECK: t2:
-; CHECK: ite le
-; CHECK: suble
+; CHECK: ite gt
; CHECK: subgt
+; CHECK: suble
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
br i1 %tmp1434, label %bb17, label %bb.outer
@@ -60,14 +60,14 @@ bb17: ; preds = %cond_false, %cond_true, %entry
@x = external global i32* ; <i32**> [#uses=1]
-define void @foo(i32 %a) {
+define void @foo(i32 %a) nounwind {
entry:
%tmp = load i32** @x ; <i32*> [#uses=1]
store i32 %a, i32* %tmp
ret void
}
-define void @t3(i32 %a, i32 %b) {
+define void @t3(i32 %a, i32 %b) nounwind {
entry:
; CHECK: t3:
; CHECK: it lt
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index bdbe713a29..56d6aa960e 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
-; RUN: grep {asm-printer} | grep 31
+; RUN: grep {asm-printer} | grep 34
target datalayout = "e-p:32:32"
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
@@ -40,7 +40,7 @@ cond_true: ; preds = %cond_true, %entry
%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
%tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1]
- %tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; <i1> [#uses=1]
+ %tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
br i1 %tmp.upgrd.8, label %cond_true, label %return
diff --git a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
deleted file mode 100644
index d6ea5109d1..0000000000
--- a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
+++ /dev/null
@@ -1,102 +0,0 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin | grep "48(%esp)" | count 5
-
- %struct..0anon = type { i32 }
- %struct.rtvec_def = type { i32, [1 x %struct..0anon] }
- %struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
-@rtx_format = external global [116 x i8*] ; <[116 x i8*]*> [#uses=1]
-@rtx_length = external global [117 x i32] ; <[117 x i32]*> [#uses=1]
-
-declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32)
-
-define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
-entry:
- %tmp2 = icmp eq %struct.rtx_def* %x, null ; <i1> [#uses=1]
- br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
-
-cond_next: ; preds = %entry
- %tmp6 = getelementptr %struct.rtx_def* %x, i32 0, i32 0 ; <i16*> [#uses=1]
- %tmp7 = load i16* %tmp6 ; <i16> [#uses=2]
- %tmp78 = zext i16 %tmp7 to i32 ; <i32> [#uses=2]
- %tmp10 = icmp eq i16 %tmp7, 54 ; <i1> [#uses=1]
- br i1 %tmp10, label %cond_true13, label %cond_next32
-
-cond_true13: ; preds = %cond_next
- %tmp15 = getelementptr %struct.rtx_def* %x, i32 0, i32 3 ; <[1 x %struct..0anon]*> [#uses=1]
- %tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
- %tmp19 = load %struct.rtx_def** %tmp1718 ; <%struct.rtx_def*> [#uses=1]
- %tmp20 = getelementptr %struct.rtx_def* %tmp19, i32 0, i32 0 ; <i16*> [#uses=1]
- %tmp21 = load i16* %tmp20 ; <i16> [#uses=1]
- %tmp22 = icmp eq i16 %tmp21, 57 ; <i1> [#uses=1]
- br i1 %tmp22, label %cond_true25, label %cond_next32
-
-cond_true25: ; preds = %cond_true13
- %tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) ; <%struct.rtx_def*> [#uses=1]
- ret %struct.rtx_def* %tmp29
-
-cond_next32: ; preds = %cond_true13, %cond_next
- %tmp34 = getelementptr [116 x i8*]* @rtx_format, i32 0, i32 %tmp78 ; <i8**> [#uses=1]
- %tmp35 = load i8** %tmp34, align 4 ; <i8*> [#uses=1]
- %tmp37 = getelementptr [117 x i32]* @rtx_length, i32 0, i32 %tmp78 ; <i32*> [#uses=1]
- %tmp38 = load i32* %tmp37, align 4 ; <i32> [#uses=1]
- %i.011 = add i32 %tmp38, -1 ; <i32> [#uses=2]
- %tmp12513 = icmp sgt i32 %i.011, -1 ; <i1> [#uses=1]
- br i1 %tmp12513, label %bb, label %UnifiedReturnBlock
-
-bb: ; preds = %bb123, %cond_next32
- %indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ] ; <i32> [#uses=2]
- %i.01.0 = sub i32 %i.011, %indvar ; <i32> [#uses=5]
- %tmp42 = getelementptr i8* %tmp35, i32 %i.01.0 ; <i8*> [#uses=2]
- %tmp43 = load i8* %tmp42 ; <i8> [#uses=1]
- switch i8 %tmp43, label %bb123 [
- i8 101, label %cond_true47
- i8 69, label %bb105.preheader
- ]
-
-cond_true47: ; preds = %bb
- %tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1]
- %tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
- %tmp55 = load %struct.rtx_def** %tmp5354 ; <%struct.rtx_def*> [#uses=1]
- %tmp58 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1]
- %tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0 ; <i32*> [#uses=1]
- %tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32 ; <i32> [#uses=1]
- store i32 %tmp58.c, i32* %tmp62
- %tmp6816 = load i8* %tmp42 ; <i8> [#uses=1]
- %tmp6917 = icmp eq i8 %tmp6816, 69 ; <i1> [#uses=1]
- br i1 %tmp6917, label %bb105.preheader, label %bb123
-
-bb105.preheader: ; preds = %cond_true47, %bb
- %tmp11020 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1]
- %tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def** ; <%struct.rtvec_def**> [#uses=3]
- %tmp11322 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1]
- %tmp11423 = getelementptr %struct.rtvec_def* %tmp11322, i32 0, i32 0 ; <i32*> [#uses=1]
- %tmp11524 = load i32* %tmp11423 ; <i32> [#uses=1]
- %tmp11625 = icmp eq i32 %tmp11524, 0 ; <i1> [#uses=1]
- br i1 %tmp11625, label %bb123, label %bb73
-
-bb73: ; preds = %bb73, %bb105.preheader
- %j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ] ; <i32> [#uses=3]
- %tmp81 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=2]
- %tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019 ; <%struct..0anon*> [#uses=1]
- %tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
- %tmp95 = load %struct.rtx_def** %tmp9394 ; <%struct.rtx_def*> [#uses=1]
- %tmp98 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1]
- %tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0 ; <i32*> [#uses=1]
- %tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32 ; <i32> [#uses=1]
- store i32 %tmp98.c, i32* %tmp101
- %tmp104 = add i32 %j.019, 1 ; <i32> [#uses=2]
- %tmp113 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1]
- %tmp114 = getelementptr %struct.rtvec_def* %tmp113, i32 0, i32 0 ; <i32*> [#uses=1]
- %tmp115 = load i32* %tmp114 ; <i32> [#uses=1]
- %tmp116 = icmp ult i32 %tmp104, %tmp115 ; <i1> [#uses=1]
- br i1 %tmp116, label %bb73, label %bb123
-
-bb123: ; preds = %bb73, %bb105.preheader, %cond_true47, %bb
- %i.0 = add i32 %i.01.0, -1 ; <i32> [#uses=1]
- %tmp125 = icmp sgt i32 %i.0, -1 ; <i1> [#uses=1]
- %indvar.next26 = add i32 %indvar, 1 ; <i32> [#uses=1]
- br i1 %tmp125, label %bb, label %UnifiedReturnBlock
-
-UnifiedReturnBlock: ; preds = %bb123, %cond_next32, %entry
- %UnifiedRetVal = phi %struct.rtx_def* [ null, %entry ], [ %x, %cond_next32 ], [ %x, %bb123 ] ; <%struct.rtx_def*> [#uses=1]
- ret %struct.rtx_def* %UnifiedRetVal
-}
diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 721d4c945b..8e315f4d80 100644
--- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -35,7 +35,7 @@ cond_next36.i: ; preds = %cond_next.i
bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i
; CHECK: %bb.i28.i
; CHECK: addl $2
-; CHECK: addl $2
+; CHECK: addl $-2
%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2]
%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1]
%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll
index 1dd9990e71..f9ca861c55 100644
--- a/test/CodeGen/X86/2009-09-10-SpillComments.ll
+++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll
@@ -1,5 +1,11 @@
; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s
+; This test shouldn't require spills.
+
+; CHECK: subq $8, %rsp
+; CHECK-NOT: $rsp
+; CHECK: addq $8, %rsp
+
%struct..0anon = type { i32 }
%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
@@ -10,9 +16,6 @@ declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*
define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
entry:
-; CHECK: Spill
-; CHECK: Folded Spill
-; CHECK: Reload
%tmp2 = icmp eq %struct.rtx_def* %x, null ; <i1> [#uses=1]
br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 68575bc401..3bd58b65be 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,6 +1,12 @@
-; RUN: llc < %s -march=x86 -enable-full-lsr >%t
-; RUN: grep {addl \\\$4,} %t | count 3
-; RUN: not grep {,%} %t
+; RUN: llc < %s -march=x86 >%t
+
+; TODO: Enhance full lsr mode to get this:
+; RUNX: grep {addl \\\$4,} %t | count 3
+; RUNX: not grep {,%} %t
+
+; For now, it should find this, which is still pretty good:
+; RUN: not grep {addl \\\$4,} %t
+; RUN: grep {,%} %t | count 6
define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
entry:
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index c695c29e06..0410bc0d9a 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,11 +1,11 @@
; RUN: llc < %s -march=x86-64 -o %t
-; RUN: grep inc %t | count 1
+; RUN: not grep inc %t
; RUN: grep dec %t | count 2
-; RUN: grep addq %t | count 13
+; RUN: grep addq %t | count 10
; RUN: not grep addb %t
; RUN: grep leaq %t | count 9
-; RUN: grep leal %t | count 3
-; RUN: grep movq %t | count 5
+; RUN: grep leal %t | count 2
+; RUN: grep movq %t | count 10
; IV users in each of the loops from other loops shouldn't cause LSR
; to insert new induction variables. Previously it would create a
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 07e46eca75..6c0eb8c0df 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -1,5 +1,19 @@
-; RUN: llc < %s -march=x86 | grep cmp | grep 64
-; RUN: llc < %s -march=x86 | not grep inc
+; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+
+; By starting the IV at -64 instead of 0, a cmp is eliminated,
+; as the flags from the add can be used directly.
+
+; STATIC: movl $-64, %ecx
+
+; STATIC: movl %eax, _state+76(%ecx)
+; STATIC: addl $16, %ecx
+; STATIC: jne
+
+; In PIC mode the symbol can't be folded, so the change-compare-stride
+; trick applies.
+
+; PIC: cmpl $64
@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
index e14cd8a99e..6b2247d1d6 100644
--- a/test/CodeGen/X86/loop-strength-reduce8.ll
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -1,4 +1,10 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CHECK: leal 16(%eax), %edx
+; CHECK: align
+; CHECK: addl $4, %edx
+; CHECK: decl %ecx
+; CHECK: jne LBB1_2
%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
new file mode 100644
index 0000000000..a1919bab38
--- /dev/null
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -0,0 +1,159 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+target datalayout = "e-p:64:64:64"
+target triple = "x86_64-unknown-unknown"
+
+; Full strength reduction reduces register pressure from 5 to 4 here.
+
+; CHECK: full_me:
+; CHECK: movsd (%rsi), %xmm0
+; CHECK: mulsd (%rdx), %xmm0
+; CHECK: movsd %xmm0, (%rdi)
+; CHECK: addq $8, %rsi
+; CHECK: addq $8, %rdx
+; CHECK: addq $8, %rdi
+; CHECK: decq %rcx
+; CHECK: jne
+
+define void @full_me(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+; In this test, the counting IV exit value is used, so full strength reduction
+; would not reduce register pressure. IndVarSimplify ought to simplify such
+; cases away, but it's useful here to verify that LSR's register pressure
+; heuristics are working as expected.
+
+; CHECK: count_me_0:
+; CHECK: movsd (%rsi,%rax,8), %xmm0
+; CHECK: mulsd (%rdx,%rax,8), %xmm0
+; CHECK: movsd %xmm0, (%rdi,%rax,8)
+; CHECK: incq %rax
+; CHECK: cmpq %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ %q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ ret i64 %q
+}
+
+; In this test, the trip count value is used, so full strength reduction
+; would not reduce register pressure.
+; (though it would reduce register pressure inside the loop...)
+
+; CHECK: count_me_1:
+; CHECK: movsd (%rsi,%rax,8), %xmm0
+; CHECK: mulsd (%rdx,%rax,8), %xmm0
+; CHECK: movsd %xmm0, (%rdi,%rax,8)
+; CHECK: incq %rax
+; CHECK: cmpq %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ %q = phi i64 [ 0, %entry ], [ %n, %loop ]
+ ret i64 %q
+}
+
+; This should be fully strength-reduced to reduce register pressure, however
+; the current heuristics get distracted by all the reuse with the stride-1
+; induction variable first.
+
+; But even so, be clever and start the stride-1 variable at a non-zero value
+; to eliminate an in-loop immediate value.
+
+; CHECK: count_me_2:
+; CHECK: movl $5, %eax
+; CHECK: align
+; CHECK: BB4_1:
+; CHECK: movsd (%rdi,%rax,8), %xmm0
+; CHECK: addsd (%rsi,%rax,8), %xmm0
+; CHECK: movsd %xmm0, (%rdx,%rax,8)
+; CHECK: movsd 40(%rdi,%rax,8), %xmm0
+; CHECK: addsd 40(%rsi,%rax,8), %xmm0
+; CHECK: movsd %xmm0, 40(%rdx,%rax,8)
+; CHECK: incq %rax
+; CHECK: cmpq $5005, %rax
+; CHECK: jne
+
+define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ %i5 = add i64 %i, 5
+ %Ai = getelementptr double* %A, i64 %i5
+ %t2 = load double* %Ai
+ %Bi = getelementptr double* %B, i64 %i5
+ %t4 = load double* %Bi
+ %t5 = fadd double %t2, %t4
+ %Ci = getelementptr double* %C, i64 %i5
+ store double %t5, double* %Ci
+ %i10 = add i64 %i, 10
+ %Ai10 = getelementptr double* %A, i64 %i10
+ %t9 = load double* %Ai10
+ %Bi10 = getelementptr double* %B, i64 %i10
+ %t11 = load double* %Bi10
+ %t12 = fadd double %t9, %t11
+ %Ci10 = getelementptr double* %C, i64 %i10
+ store double %t12, double* %Ci10
+ %i.next = add i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, 5000
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index bc493bd8f7..7111d687ed 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -4,9 +4,9 @@
; RUN: not grep sar %t
; RUN: not grep shl %t
; RUN: grep add %t | count 2
-; RUN: grep inc %t | count 4
+; RUN: grep inc %t | count 3
; RUN: grep dec %t | count 2
-; RUN: grep lea %t | count 2
+; RUN: grep lea %t | count 3
; Optimize away zext-inreg and sext-inreg on the loop induction
; variable using trip-count information.
@@ -127,6 +127,9 @@ return:
ret void
}
+; TODO: If we could handle all the loads and stores as post-inc users, we could
+; use {-1,+,1} in the induction variable register, and we'd get another inc,
+; one fewer add, and a comparison with zero.
define void @another_count_up(double* %d, i64 %n) nounwind {
entry:
br label %loop
diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll
index 1372a1522b..71aa5a0488 100644
--- a/test/CodeGen/X86/pr3495-2.ll
+++ b/test/CodeGen/X86/pr3495-2.ll
@@ -1,5 +1,6 @@
; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
+target datalayout = "e-p:32:32:32"
target triple = "i386-apple-darwin9.6"
%struct.constraintVCGType = type { i32, i32, i32, i32 }
%struct.nodeVCGType = type { %struct.constraintVCGType*, i32, i32, i32, %struct.constraintVCGType*, i32, i32, i32 }
diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll
index c4f768ca52..5fb445c935 100644
--- a/test/CodeGen/X86/remat-mov-0.ll
+++ b/test/CodeGen/X86/remat-mov-0.ll
@@ -1,13 +1,33 @@
-; RUN: llc < %s -march=x86-64 | grep {xorl %edi, %edi} | count 4
+; RUN: llc < %s -march=x86-64 | FileCheck %s
; CodeGen should remat the zero instead of spilling it.
declare void @foo(i64 %p)
+; CHECK: bar:
+; CHECK: xorl %edi, %edi
+; CHECK: xorl %edi, %edi
define void @bar() nounwind {
call void @foo(i64 0)
call void @foo(i64 0)
- call void @foo(i64 0)
- call void @foo(i64 0)
ret void
}
+
+; CHECK: bat:
+; CHECK: movq $-1, %rdi
+; CHECK: movq $-1, %rdi
+define void @bat() nounwind {
+ call void @foo(i64 -1)
+ call void @foo(i64 -1)
+ ret void
+}
+
+; CHECK: bau:
+; CHECK: movl $1, %edi
+; CHECK: movl $1, %edi
+define void @bau() nounwind {
+ call void @foo(i64 1)
+ call void @foo(i64 1)
+ ret void
+}
+
diff --git a/test/CodeGen/X86/remat-mov-1.ll b/test/CodeGen/X86/remat-mov-1.ll
deleted file mode 100644
index d71b7a5b91..0000000000
--- a/test/CodeGen/X86/remat-mov-1.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: llc < %s -march=x86 | grep -- -1 | grep mov | count 2
-
- %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
- %struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* }
- %struct._CompT = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, i8, %struct._PixT*, %struct._CompT*, i8, %struct._CompT* }
- %struct._PixT = type { i32, i32, %struct._PixT* }
- %struct.__sFILEX = type opaque
- %struct.__sbuf = type { i8*, i32 }
-
-declare fastcc void @MergeComponents(%struct._CompT*, %struct._CompT*, %struct._CompT*, %struct._CompT**, %struct.ImgT*) nounwind
-
-define fastcc void @MergeToLeft(%struct._CompT* %comp, %struct._CompT** %head, %struct.ImgT* %img) nounwind {
-entry:
- br label %bb208
-
-bb105: ; preds = %bb200
- br i1 false, label %bb197, label %bb149
-
-bb149: ; preds = %bb105
- %tmp151 = getelementptr %struct._CompT* %comp, i32 0, i32 0 ; <i32*> [#uses=1]
- br label %bb193
-
-bb193: ; preds = %bb184, %bb149
- %tmp196 = load i32* %tmp151, align 4 ; <i32> [#uses=1]
- br label %bb197
-
-bb197: ; preds = %bb193, %bb105
- %last_comp.0 = phi i32 [ %tmp196, %bb193 ], [ 0, %bb105 ] ; <i32> [#uses=0]
- %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
- br label %bb200
-
-bb200: ; preds = %bb208, %bb197
- %indvar = phi i32 [ 0, %bb208 ], [ %indvar.next, %bb197 ] ; <i32> [#uses=2]
- %xm.0 = sub i32 %indvar, 0 ; <i32> [#uses=1]
- %tmp202 = icmp slt i32 %xm.0, 1 ; <i1> [#uses=1]
- br i1 %tmp202, label %bb105, label %bb208
-
-bb208: ; preds = %bb200, %entry
- br label %bb200
-}
diff --git a/test/CodeGen/X86/subreg-to-reg-5.ll b/test/CodeGen/X86/subreg-to-reg-5.ll
deleted file mode 100644
index ba4c307d10..0000000000
--- a/test/CodeGen/X86/subreg-to-reg-5.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep addl %t
-; RUN: not egrep {movl|movq} %t
-
-define float @foo(float* %B) nounwind {
-entry:
- br label %bb2
-
-bb2: ; preds = %bb3, %entry
- %B_addr.0.rec = phi i64 [ %indvar.next154, %bb3 ], [ 0, %entry ] ; <i64> [#uses=2]
- %z = icmp slt i64 %B_addr.0.rec, 20000
- br i1 %z, label %bb3, label %bb4
-
-bb3: ; preds = %bb2
- %indvar.next154 = add i64 %B_addr.0.rec, 1 ; <i64> [#uses=1]
- br label %bb2
-
-bb4: ; preds = %bb2
- %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec ; <float*> [#uses=1]
- %t1 = ptrtoint float* %B_addr.0 to i64 ; <i64> [#uses=1]
- %t2 = and i64 %t1, 4294967295 ; <i64> [#uses=1]
- %t3 = icmp eq i64 %t2, 0 ; <i1> [#uses=1]
- br i1 %t3, label %bb5, label %bb10.preheader
-
-bb10.preheader: ; preds = %bb4
- br label %bb9
-
-bb5: ; preds = %bb4
- ret float 7.0
-
-bb9: ; preds = %bb10.preheader
- %t5 = getelementptr float* %B, i64 0 ; <float*> [#uses=1]
- %t7 = load float* %t5 ; <float> [#uses=1]
- ret float %t7
-}
diff --git a/test/Transforms/IndVarSimplify/gep-with-mul-base.ll b/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
index 7809594076..19d54ff2a2 100644
--- a/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
+++ b/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
@@ -1,6 +1,7 @@
; RUN: opt < %s -indvars -S > %t
-; RUN: grep add %t | count 8
-; RUN: grep mul %t | count 7
+; RUN: grep add %t | count 6
+; RUN: grep sub %t | count 2
+; RUN: grep mul %t | count 6
define void @foo(i64 %n, i64 %m, i64 %o, double* nocapture %p) nounwind {
entry:
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll b/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
index 7c7a21c013..99cb8569b3 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -loop-reduce -S | grep ugt
-; PR2535
+; RUN: llc -march=x86-64 < %s -o - | grep {cmpl \\$\[1\], %}
@.str = internal constant [4 x i8] c"%d\0A\00"
@@ -16,7 +15,7 @@ forbody:
%add166 = or i32 %mul15, 1 ; <i32> [#uses=1] *
call i32 (i8*, ...)* @printf( i8* noalias getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
%inc = add i32 %i.0, 1 ; <i32> [#uses=3]
- %cmp = icmp ult i32 %inc, 1027 ; <i1> [#uses=1]
+ %cmp = icmp ne i32 %inc, 1027 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
afterfor: ; preds = %forcond
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
index 36941ad6d3..d9abc8ba66 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
+++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
@@ -1,10 +1,9 @@
-; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpl \$4}
+; RUN: llc < %s -o - | grep {testl %ecx, %ecx}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin9"
-; This is like change-compare-stride-trickiness-1.ll except the comparison
-; happens before the relevant use, so the comparison stride can't be
-; easily changed.
+; The comparison happens before the relevant use, but it can still be rewritten
+; to compare with zero.
define void @foo() nounwind {
entry:
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
index 8a3978bb2e..ea8a259ecd 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
+++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
@@ -1,10 +1,10 @@
-; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpq \$8}
+; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmp. \$8}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin9"
-; This is like change-compare-stride-trickiness-0.ll except the comparison
-; happens after the relevant use, so the comparison stride can be
-; easily changed.
+; The comparison happens after the relevant use, so the stride can easily
+; be changed. The comparison can be done in a narrower mode than the
+; induction variable.
define void @foo() nounwind {
entry:
diff --git a/test/Transforms/LoopStrengthReduce/count-to-zero.ll b/test/Transforms/LoopStrengthReduce/count-to-zero.ll
index 8cc3b5c103..feb79f8a0c 100644
--- a/test/Transforms/LoopStrengthReduce/count-to-zero.ll
+++ b/test/Transforms/LoopStrengthReduce/count-to-zero.ll
@@ -19,7 +19,7 @@ bb3: ; preds = %bb1
%tmp4 = add i32 %c_addr.1, -1 ; <i32> [#uses=1]
%c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
-; CHECK: sub i32 %lsr.iv, 1
+; CHECK: add i32 %lsr.iv, -1
br label %bb6
bb6: ; preds = %bb3, %entry
diff --git a/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll b/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll
deleted file mode 100644
index 4ad5d1478d..0000000000
--- a/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -loop-reduce -S | FileCheck %s
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
-entry:
- br i1 undef, label %bb4.preheader, label %bb.nph8
-
-bb4.preheader: ; preds = %entry
- br label %bb4
-
-bb1: ; preds = %bb4
- br i1 undef, label %bb.nph8, label %bb3
-
-bb3: ; preds = %bb1
- %phitmp = add i32 %indvar, 1 ; <i32> [#uses=1]
- br label %bb4
-
-bb4: ; preds = %bb3, %bb4.preheader
-; CHECK: %lsr.iv = phi
-; CHECK: %lsr.iv.next = add i32 %lsr.iv, 1
-; CHECK: %0 = icmp slt i32 %lsr.iv.next, %argc
- %indvar = phi i32 [ 1, %bb4.preheader ], [ %phitmp, %bb3 ] ; <i32> [#uses=2]
- %0 = icmp slt i32 %indvar, %argc ; <i1> [#uses=1]
- br i1 %0, label %bb1, label %bb.nph8
-
-bb.nph8: ; preds = %bb4, %bb1, %entry
- unreachable
-}