diff options
author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-04-05 20:30:20 +0000 |
---|---|---|
committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-04-05 20:30:20 +0000 |
commit | 740cd657f3d9d4e88614831c70a649f9257164da (patch) | |
tree | 55acdd0c3c13cc20ea4e1b1863f80b4cc62f21f8 /test/CodeGen/X86/lsr-loop-exit-cond.ll | |
parent | 036ebfd87434bc30a0e51f4b9a3d55ab49ee5509 (diff) | |
download | llvm-740cd657f3d9d4e88614831c70a649f9257164da.tar.gz llvm-740cd657f3d9d4e88614831c70a649f9257164da.tar.bz2 llvm-740cd657f3d9d4e88614831c70a649f9257164da.tar.xz |
Don't break the IV update in TLI::SimplifySetCC().
LSR always tries to make the ICmp in the loop latch use the incremented
induction variable. This allows the induction variable to be kept in a
single register.
When the induction variable limit is equal to the stride,
SimplifySetCC() would break LSR's hard work by transforming:
(icmp (add iv, stride), stride) --> (cmp iv, 0)
This forced us to use lea for the IC update, preventing the simpler
incl+cmp.
<rdar://problem/7643606>
<rdar://problem/11184260>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154119 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/lsr-loop-exit-cond.ll')
-rw-r--r-- | test/CodeGen/X86/lsr-loop-exit-cond.ll | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll index bdf09dff0b..ebda9f201d 100644 --- a/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s +; CHECK: t: ; CHECK: decq ; CHECK-NEXT: movl ( ; CHECK-NEXT: jne @@ -136,3 +137,44 @@ bb2: ; preds = %bb store i8 %92, i8* %93, align 1 ret void } + +; Check that DAGCombiner doesn't mess up the IV update when the exiting value +; is equal to the stride. +; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0). + +; CHECK: f: +; CHECK: %for.body +; CHECK: incl [[IV:%e..]] +; CHECK: cmpl $1, [[IV]] +; CHECK: jne +; CHECK: ret + +define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp { +entry: + %cmp4 = icmp eq i32 %i, 1 + br i1 %cmp4, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %0 = sext i32 %i to i64 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ] + %b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %1 = load i32* %arrayidx, align 4 + %cmp1 = icmp ugt i32 %1, %b.05 + %.b.0 = select i1 %cmp1, i32 %1, i32 %b.05 + %2 = trunc i64 %indvars.iv to i32 + %i.addr.0.bi.0 = select i1 %cmp1, i32 %2, i32 %bi.06 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ] + ret i32 %bi.0.lcssa +} + |