summaryrefslogtreecommitdiff
path: root/test/Transforms/LoopSimplify
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2009-06-27 21:30:38 +0000
committerDan Gohman <gohman@apple.com>2009-06-27 21:30:38 +0000
commit4b35f83b91a1a313f0730c600e5178aaf7df98d6 (patch)
tree50006e0a70949304f64ee1395d602224e8338078 /test/Transforms/LoopSimplify
parentcfb32203bceb0638db976fea5ce906b2a1010bac (diff)
downloadllvm-4b35f83b91a1a313f0730c600e5178aaf7df98d6.tar.gz
llvm-4b35f83b91a1a313f0730c600e5178aaf7df98d6.tar.bz2
llvm-4b35f83b91a1a313f0730c600e5178aaf7df98d6.tar.xz
Teach LoopSimplify how to merge multiple loop exits into a single exit,
when one of them can be converted to a trivial icmp and conditional branch. This addresses what is essentially a phase ordering problem. SimplifyCFG knows how to do this transformation, but it doesn't do so if the primary block has any instructions in it other than an icmp and a branch. In the given testcase, the block contains other instructions, however they are loop-invariant and can be hoisted. SimplifyCFG doesn't have LoopInfo though, so it can't hoist them. And, it's important that the blocks be merged before LoopRotation, as it doesn't support multiple-exit loops. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@74396 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/LoopSimplify')
-rw-r--r--test/Transforms/LoopSimplify/merge-exits.ll45
1 files changed, 45 insertions, 0 deletions
diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll
new file mode 100644
index 0000000000..c5bf7fdc3c
--- /dev/null
+++ b/test/Transforms/LoopSimplify/merge-exits.ll
@@ -0,0 +1,45 @@
+; RUN: llvm-as < %s | opt -loopsimplify -loop-rotate -instcombine -indvars \
+; RUN: | llvm-dis > %t
+; RUN: not grep sext %t
+; RUN: grep {phi i64} %t | count 1
+
+; Loopsimplify should be able to merge the two loop exits
+; into one, so that loop rotate can rotate the loop, so
+; that indvars can promote the induction variable to i64
+; without needing casts.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
+entry:
+ %t0 = load float* %peakWeight, align 4 ; <float> [#uses=1]
+ br label %bb1
+
+bb: ; preds = %bb2
+ %t1 = sext i32 %hiPart.0 to i64 ; <i64> [#uses=1]
+ %t2 = getelementptr float* %pTmp1, i64 %t1 ; <float*> [#uses=1]
+ %t3 = load float* %t2, align 4 ; <float> [#uses=1]
+ %t4 = fadd float %t3, %distERBhi.0 ; <float> [#uses=1]
+ %t5 = add i32 %hiPart.0, 1 ; <i32> [#uses=2]
+ %t6 = sext i32 %t5 to i64 ; <i64> [#uses=1]
+ %t7 = getelementptr float* %peakWeight, i64 %t6 ; <float*> [#uses=1]
+ %t8 = load float* %t7, align 4 ; <float> [#uses=1]
+ %t9 = fadd float %t8, %peakCount.0 ; <float> [#uses=1]
+ br label %bb1
+
+bb1: ; preds = %bb, %entry
+ %peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ] ; <float> [#uses=2]
+ %hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ] ; <i32> [#uses=3]
+ %distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ] ; <float> [#uses=3]
+ %t10 = fcmp uge float %distERBhi.0, 2.500000e+00 ; <i1> [#uses=1]
+ br i1 %t10, label %bb3, label %bb2
+
+bb2: ; preds = %bb1
+ %t11 = add i32 %bandEdgeIndex, -1 ; <i32> [#uses=1]
+ %t12 = icmp sgt i32 %t11, %hiPart.0 ; <i1> [#uses=1]
+ br i1 %t12, label %bb, label %bb3
+
+bb3: ; preds = %bb2, %bb1
+ %t13 = fdiv float %peakCount.0, %distERBhi.0 ; <float> [#uses=1]
+ ret float %t13
+}