From b42a6261225e5a1b9a75b9aa11732944046d7999 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 4 Aug 2008 23:49:06 +0000 Subject: PR2621: Improvements to the SCEV AddRec binomial expansion. This version uses a new algorithm for evaluating the binomial coefficients which is significantly more efficient for AddRecs of more than 2 terms (see the comments in the code for details on how the algorithm works). It also fixes some bugs: it removes the arbitrary length restriction for AddRecs, it fixes the silent generation of incorrect code for AddRecs which require a wide calculation width, and it fixes an issue where we were incorrectly truncating the iteration count too far when evaluating an AddRec expression narrower than the induction variable. There are still a few related issues I know of: I think there's still an issue with the SCEVExpander expansion of AddRec in terms of the width of the induction variable used. The hack to avoid generating too-wide integers shouldn't be necessary; instead, the callers should be considering the cost of the expansion before expanding it (in addition to not expanding too-wide integers, we might not want to expand expressions that are really expensive, especially when optimizing for size; calculating an length-17 32-bit AddRec currently generates about 250 instructions of straight-line code on X86). Also, for long 32-bit AddRecs on X86, CodeGen really sucks at scheduling the code. I'm planning on filing follow-up PRs for these issues. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54332 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../ScalarEvolution/2008-08-04-IVOverflow.ll | 25 ++++++++++ .../ScalarEvolution/2008-08-04-LongAddRec.ll | 56 ++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll create mode 100644 test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll (limited to 'test/Analysis') diff --git a/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll b/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll new file mode 100644 index 0000000000..04cd289a7a --- /dev/null +++ b/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll @@ -0,0 +1,25 @@ +; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: -scalar-evolution-max-iterations=0 | grep -F "Exits: 20028" +; PR2621 + +define i32 @a() nounwind { +entry: + br label %bb1 + +bb: + trunc i32 %i.0 to i16 + add i16 %0, %x16.0 + add i32 %i.0, 1 + br label %bb1 + +bb1: + %i.0 = phi i32 [ 0, %entry ], [ %2, %bb ] + %x16.0 = phi i16 [ 0, %entry ], [ %1, %bb ] + icmp ult i32 %i.0, 888888 + br i1 %3, label %bb, label %bb2 + +bb2: + zext i16 %x16.0 to i32 + ret i32 %4 +} + diff --git a/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll b/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll new file mode 100644 index 0000000000..dbbc4eca20 --- /dev/null +++ b/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll @@ -0,0 +1,56 @@ +; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: -scalar-evolution-max-iterations=0 | grep -F "Exits: -19168" +; PR2621 + +define i32 @a() nounwind { +entry: + br label %bb1 + +bb: ; preds = %bb1 + add i16 %x17.0, 1 ; :0 [#uses=2] + add i16 %0, %x16.0 ; :1 [#uses=2] + add i16 %1, %x15.0 ; :2 [#uses=2] + add i16 %2, %x14.0 ; :3 [#uses=2] + add i16 %3, %x13.0 ; :4 [#uses=2] + add i16 %4, %x12.0 ; :5 [#uses=2] + add i16 %5, %x11.0 ; :6 [#uses=2] + add i16 %6, %x10.0 ; :7 [#uses=2] + add i16 %7, %x9.0 ; :8 [#uses=2] + add i16 %8, %x8.0 ; :9 [#uses=2] + add i16 %9, %x7.0 ; :10 [#uses=2] + add i16 %10, %x6.0 ; :11 [#uses=2] + add i16 %11, %x5.0 ; :12 [#uses=2] + add i16 %12, %x4.0 ; :13 [#uses=2] + add i16 %13, %x3.0 ; :14 [#uses=2] + add i16 %14, %x2.0 ; :15 [#uses=2] + add i16 %15, %x1.0 ; :16 [#uses=1] + add i32 %i.0, 1 ; :17 [#uses=1] + br label %bb1 + +bb1: ; preds = %bb, %entry + %x2.0 = phi i16 [ 0, %entry ], [ %15, %bb ] ; [#uses=1] + %x3.0 = phi i16 [ 0, %entry ], [ %14, %bb ] ; [#uses=1] + %x4.0 = phi i16 [ 0, %entry ], [ %13, %bb ] ; [#uses=1] + %x5.0 = phi i16 [ 0, %entry ], [ %12, %bb ] ; [#uses=1] + %x6.0 = phi i16 [ 0, %entry ], [ %11, %bb ] ; [#uses=1] + %x7.0 = phi i16 [ 0, %entry ], [ %10, %bb ] ; [#uses=1] + %x8.0 = phi i16 [ 0, %entry ], [ %9, %bb ] ; [#uses=1] + %x9.0 = phi i16 [ 0, %entry ], [ %8, %bb ] ; [#uses=1] + %x10.0 = phi i16 [ 0, %entry ], [ %7, %bb ] ; [#uses=1] + %x11.0 = phi i16 [ 0, %entry ], [ %6, %bb ] ; [#uses=1] + %x12.0 = phi i16 [ 0, %entry ], [ %5, %bb ] ; [#uses=1] + %x13.0 = phi i16 [ 0, %entry ], [ %4, %bb ] ; [#uses=1] + %x14.0 = phi i16 [ 0, %entry ], [ %3, %bb ] ; [#uses=1] + %x15.0 = phi i16 [ 0, %entry ], [ %2, %bb ] ; [#uses=1] + %x16.0 = phi i16 [ 0, %entry ], [ %1, %bb ] ; [#uses=1] + %x17.0 = phi i16 [ 0, %entry ], [ %0, %bb ] ; [#uses=1] + %i.0 = phi i32 [ 0, %entry ], [ %17, %bb ] ; [#uses=2] + %x1.0 = phi i16 [ 0, %entry ], [ %16, %bb ] ; [#uses=2] + icmp ult i32 %i.0, 8888 ; :18 [#uses=1] + br i1 %18, label %bb, label %bb2 + +bb2: ; preds = %bb1 + zext i16 %x1.0 to i32 ; :19 [#uses=1] + ret i32 %19 +} + -- cgit v1.2.3