From b42a6261225e5a1b9a75b9aa11732944046d7999 Mon Sep 17 00:00:00 2001
From: Eli Friedman <eli.friedman@gmail.com>
Date: Mon, 4 Aug 2008 23:49:06 +0000
Subject: PR2621: Improvements to the SCEV AddRec binomial expansion.  This
 version uses a new algorithm for evaluating the binomial coefficients which
 is significantly more efficient for AddRecs of more than 2 terms (see the
 comments in the code for details on how the algorithm works). It also fixes
 some bugs: it removes the arbitrary length restriction for AddRecs, it fixes
 the silent generation of incorrect code for AddRecs which require a wide
 calculation width, and it fixes an issue where we were incorrectly truncating
 the iteration count too far when evaluating an AddRec expression narrower
 than the induction variable.

There are still a few related issues I know of: I think there's
still an issue with the SCEVExpander expansion of AddRec in terms of
the width of the induction variable used.  The hack to avoid generating
too-wide integers shouldn't be necessary; instead, the callers should be
considering the cost of the expansion before expanding it (in addition
to not expanding too-wide integers, we might not want to expand
expressions that are really expensive, especially when optimizing for
size; calculating an length-17 32-bit AddRec currently generates about 250
instructions of straight-line code on X86).  Also, for long 32-bit
AddRecs on X86, CodeGen really sucks at scheduling the code.  I'm planning on
filing follow-up PRs for these issues.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54332 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../ScalarEvolution/2008-08-04-IVOverflow.ll       | 25 ++++++++++
 .../ScalarEvolution/2008-08-04-LongAddRec.ll       | 56 ++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
 create mode 100644 test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll

(limited to 'test/Analysis')
diff --git a/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll b/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
new file mode 100644
index 0000000000..04cd289a7a
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN:   -scalar-evolution-max-iterations=0 | grep -F "Exits:  20028"
+; PR2621
+
+define i32 @a() nounwind  {
+entry:
+	br label %bb1
+
+bb:
+	trunc i32 %i.0 to i16
+	add i16 %0, %x16.0
+	add i32 %i.0, 1
+	br label %bb1
+
+bb1:
+	%i.0 = phi i32 [ 0, %entry ], [ %2, %bb ]
+	%x16.0 = phi i16 [ 0, %entry ], [ %1, %bb ]
+	icmp ult i32 %i.0, 888888
+	br i1 %3, label %bb, label %bb2
+
+bb2:
+	zext i16 %x16.0 to i32
+	ret i32 %4
+}
+
diff --git a/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll b/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
new file mode 100644
index 0000000000..dbbc4eca20
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
@@ -0,0 +1,56 @@
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN:   -scalar-evolution-max-iterations=0 | grep -F "Exits:  -19168"
+; PR2621
+
+define i32 @a() nounwind  {
+entry:
+	br label %bb1
+
+bb:		; preds = %bb1
+	add i16 %x17.0, 1		; <i16>:0 [#uses=2]
+	add i16 %0, %x16.0		; <i16>:1 [#uses=2]
+	add i16 %1, %x15.0		; <i16>:2 [#uses=2]
+	add i16 %2, %x14.0		; <i16>:3 [#uses=2]
+	add i16 %3, %x13.0		; <i16>:4 [#uses=2]
+	add i16 %4, %x12.0		; <i16>:5 [#uses=2]
+	add i16 %5, %x11.0		; <i16>:6 [#uses=2]
+	add i16 %6, %x10.0		; <i16>:7 [#uses=2]
+	add i16 %7, %x9.0		; <i16>:8 [#uses=2]
+	add i16 %8, %x8.0		; <i16>:9 [#uses=2]
+	add i16 %9, %x7.0		; <i16>:10 [#uses=2]
+	add i16 %10, %x6.0		; <i16>:11 [#uses=2]
+	add i16 %11, %x5.0		; <i16>:12 [#uses=2]
+	add i16 %12, %x4.0		; <i16>:13 [#uses=2]
+	add i16 %13, %x3.0		; <i16>:14 [#uses=2]
+	add i16 %14, %x2.0		; <i16>:15 [#uses=2]
+	add i16 %15, %x1.0		; <i16>:16 [#uses=1]
+	add i32 %i.0, 1		; <i32>:17 [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%x2.0 = phi i16 [ 0, %entry ], [ %15, %bb ]		; <i16> [#uses=1]
+	%x3.0 = phi i16 [ 0, %entry ], [ %14, %bb ]		; <i16> [#uses=1]
+	%x4.0 = phi i16 [ 0, %entry ], [ %13, %bb ]		; <i16> [#uses=1]
+	%x5.0 = phi i16 [ 0, %entry ], [ %12, %bb ]		; <i16> [#uses=1]
+	%x6.0 = phi i16 [ 0, %entry ], [ %11, %bb ]		; <i16> [#uses=1]
+	%x7.0 = phi i16 [ 0, %entry ], [ %10, %bb ]		; <i16> [#uses=1]
+	%x8.0 = phi i16 [ 0, %entry ], [ %9, %bb ]		; <i16> [#uses=1]
+	%x9.0 = phi i16 [ 0, %entry ], [ %8, %bb ]		; <i16> [#uses=1]
+	%x10.0 = phi i16 [ 0, %entry ], [ %7, %bb ]		; <i16> [#uses=1]
+	%x11.0 = phi i16 [ 0, %entry ], [ %6, %bb ]		; <i16> [#uses=1]
+	%x12.0 = phi i16 [ 0, %entry ], [ %5, %bb ]		; <i16> [#uses=1]
+	%x13.0 = phi i16 [ 0, %entry ], [ %4, %bb ]		; <i16> [#uses=1]
+	%x14.0 = phi i16 [ 0, %entry ], [ %3, %bb ]		; <i16> [#uses=1]
+	%x15.0 = phi i16 [ 0, %entry ], [ %2, %bb ]		; <i16> [#uses=1]
+	%x16.0 = phi i16 [ 0, %entry ], [ %1, %bb ]		; <i16> [#uses=1]
+	%x17.0 = phi i16 [ 0, %entry ], [ %0, %bb ]		; <i16> [#uses=1]
+	%i.0 = phi i32 [ 0, %entry ], [ %17, %bb ]		; <i32> [#uses=2]
+	%x1.0 = phi i16 [ 0, %entry ], [ %16, %bb ]		; <i16> [#uses=2]
+	icmp ult i32 %i.0, 8888		; <i1>:18 [#uses=1]
+	br i1 %18, label %bb, label %bb2
+
+bb2:		; preds = %bb1
+	zext i16 %x1.0 to i32		; <i32>:19 [#uses=1]
+	ret i32 %19
+}
+
-- 
cgit v1.2.3