Don't break the IV update in TLI::SimplifySetCC().

LSR always tries to make the ICmp in the loop latch use the incremented induction variable. This allows the induction variable to be kept in a single register. When the induction variable limit is equal to the stride, SimplifySetCC() would break LSR's hard work by transforming: (icmp (add iv, stride), stride) --> (cmp iv, 0) This forced us to use lea for the IC update, preventing the simpler incl+cmp. <rdar://problem/7643606> <rdar://problem/11184260> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154119 91177308-0d34-0410-b5e6-96231b3b80d8
author: Jakob Stoklund Olesen <stoklund@2pi.dk> 2012-04-05 20:30:20 +0000
committer: Jakob Stoklund Olesen <stoklund@2pi.dk> 2012-04-05 20:30:20 +0000
commit: 740cd657f3d9d4e88614831c70a649f9257164da (patch)
tree: 55acdd0c3c13cc20ea4e1b1863f80b4cc62f21f8 /test
parent: 036ebfd87434bc30a0e51f4b9a3d55ab49ee5509 (diff)
download: llvm-740cd657f3d9d4e88614831c70a649f9257164da.tar.gz
llvm-740cd657f3d9d4e88614831c70a649f9257164da.tar.bz2
llvm-740cd657f3d9d4e88614831c70a649f9257164da.tar.xz
2 files changed, 45 insertions, 8 deletions
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 9ff114e2b6..9aaa821698 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -3,11 +3,6 @@
 
 ; This now reduces to a single induction variable.
 
-; TODO: It still gets a GPR shuffle at the end of the loop
-; This is because something in instruction selection has decided
-; that comparing the pre-incremented value with zero is better
-; than comparing the post-incremented value with -4.
-
 @G = external global i32                          ; <i32*> [#uses=2]
 @array = external global i32*                     ; <i32**> [#uses=1]
 
@@ -20,9 +15,9 @@ entry:
 
 bb:                                               ; preds = %bb, %entry
 ; CHECK: LBB0_1:
-; CHECK: cmp [[R2:r[0-9]+]], #0
-; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], [[R2]], #1
-; CHECK: mov [[R2]], [[REGISTER]]
+; CHECK: subs [[R2:r[0-9]+]], #1
+; CHECK: cmp.w [[R2]], #-1
+; CHECK: bne LBB0_1
 
   %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
   %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index bdf09dff0b..ebda9f201d 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s
 
+; CHECK: t:
 ; CHECK: decq
 ; CHECK-NEXT: movl (
 ; CHECK-NEXT: jne
@@ -136,3 +137,44 @@ bb2:		; preds = %bb
 	store i8 %92, i8* %93, align 1
 	ret void
 }
+
+; Check that DAGCombiner doesn't mess up the IV update when the exiting value
+; is equal to the stride.
+; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0).
+
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: incl [[IV:%e..]]
+; CHECK: cmpl $1, [[IV]]
+; CHECK: jne
+; CHECK: ret
+
+define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp eq i32 %i, 1
+  br i1 %cmp4, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = sext i32 %i to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ]
+  %b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4
+  %cmp1 = icmp ugt i32 %1, %b.05
+  %.b.0 = select i1 %cmp1, i32 %1, i32 %b.05
+  %2 = trunc i64 %indvars.iv to i32
+  %i.addr.0.bi.0 = select i1 %cmp1, i32 %2, i32 %bi.06
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+  ret i32 %bi.0.lcssa
+}
+
author	Jakob Stoklund Olesen <stoklund@2pi.dk>	2012-04-05 20:30:20 +0000
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>	2012-04-05 20:30:20 +0000
commit	740cd657f3d9d4e88614831c70a649f9257164da (patch)
tree	55acdd0c3c13cc20ea4e1b1863f80b4cc62f21f8 /test
parent	036ebfd87434bc30a0e51f4b9a3d55ab49ee5509 (diff)
download	llvm-740cd657f3d9d4e88614831c70a649f9257164da.tar.gz llvm-740cd657f3d9d4e88614831c70a649f9257164da.tar.bz2 llvm-740cd657f3d9d4e88614831c70a649f9257164da.tar.xz