summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-08-14 05:19:07 +0000
committerNadav Rotem <nrotem@apple.com>2012-08-14 05:19:07 +0000
commit3e883734fab4da8413f16957dd116d4ffd9d3223 (patch)
tree3165c1b0a62a477da5ca386e10ee5dc97d752da3
parent443c9ed7688e66c55c43819a75be681574b291de (diff)
downloadllvm-3e883734fab4da8413f16957dd116d4ffd9d3223.tar.gz
llvm-3e883734fab4da8413f16957dd116d4ffd9d3223.tar.bz2
llvm-3e883734fab4da8413f16957dd116d4ffd9d3223.tar.xz
During the CodeGenPrepare we often lower intrinsics (such as objsize)
and allow some optimizations to turn conditional branches into unconditional. This commit adds a simple control-flow optimization which merges two consecutive basic blocks which are connected by a single edge. This allows the codegen to operate on larger basic blocks. rdar://11973998 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161852 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp39
-rw-r--r--test/CodeGen/ARM/debug-info-branch-folding.ll13
-rw-r--r--test/CodeGen/PowerPC/lbzux.ll6
-rw-r--r--test/CodeGen/X86/2006-11-12-CSRetCC.ll4
-rw-r--r--test/CodeGen/X86/2006-11-17-IllegalMove.ll2
-rw-r--r--test/CodeGen/X86/2008-01-08-SchedulerCrash.ll6
-rw-r--r--test/Transforms/CodeGenPrepare/basic.ll6
-rw-r--r--test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll8
8 files changed, 62 insertions, 22 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 4b4a8c598f..bc87106b3d 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -116,6 +116,7 @@ namespace {
}
private:
+ bool EliminateFallThrough(Function &F);
bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
@@ -192,6 +193,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
DeleteDeadBlock(*I);
+ // Merge pairs of basic blocks with unconditional branches, connected by
+ // a single edge.
+ if (EverMadeChange || MadeChange)
+ MadeChange |= EliminateFallThrough(F);
+
if (MadeChange)
ModifiedDT = true;
EverMadeChange |= MadeChange;
@@ -203,6 +209,39 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange;
}
+/// EliminateFallThrough - Merge basic blocks which are connected
+/// by a single edge, where one of the basic blocks has a single successor
+/// pointing to the other basic block, which has a single predecessor.
+bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+ bool Changed = false;
+ // Scan all of the blocks in the function, except for the entry block.
+ for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+ // If the destination block has a single pred, then this is a trivial
+ // edge, just collapse it.
+ BasicBlock *SinglePred = BB->getSinglePredecessor();
+
+ if (!SinglePred || SinglePred == BB) continue;
+
+ BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
+ if (Term && !Term->isConditional()) {
+ Changed = true;
+ // Remember if SinglePred was the entry block of the function.
+ // If so, we will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(BB, this);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ // We have erased a block. Update the iterator.
+ I = BB;
+ DEBUG(dbgs() << "Merged:\n"<< *SinglePred << "\n\n\n");
+ }
+ }
+ return Changed;
+}
+
/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
/// debug info directives, and an unconditional branch. Passes before isel
/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 9bdae436de..4f4ff8e817 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -3,16 +3,17 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
target triple = "thumbv7-apple-macosx10.6.7"
;CHECK: vadd.f32 q4, q8, q8
-;CHECK-NEXT: Ltmp
-;CHECK-NEXT: @DEBUG_VALUE: y <- Q4+0
-;CHECK-NEXT: @DEBUG_VALUE: x <- Q4+0
+;CHECK-NEXT: Ltmp1
+
+;CHECK:@DEBUG_VALUE: x <- Q4+0
+;CHECK-NEXT:@DEBUG_VALUE: y <- Q4+0
@.str = external constant [13 x i8]
declare <4 x float> @test0001(float) nounwind readnone ssp
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+define i32 @main(i32 %argc, i8** nocapture %argv, i1 %cond) nounwind ssp {
entry:
br label %for.body9
@@ -21,7 +22,7 @@ for.body9: ; preds = %for.body9, %entry
tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
%add20 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
tail call void @llvm.dbg.value(metadata !{<4 x float> %add20}, i64 0, metadata !28), !dbg !39
- br i1 undef, label %for.end54, label %for.body9, !dbg !44
+ br i1 %cond, label %for.end54, label %for.body9, !dbg !44
for.end54: ; preds = %for.body9
%tmp115 = extractelement <4 x float> %add19, i32 1
@@ -52,7 +53,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
!8 = metadata !{metadata !9}
!9 = metadata !{i32 589857, i64 0, i64 3} ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null} ; [ DW_TAG_subprogram ]
!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
!12 = metadata !{metadata !13}
!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll
index 5725c0dddf..12f1d1f130 100644
--- a/test/CodeGen/PowerPC/lbzux.ll
+++ b/test/CodeGen/PowerPC/lbzux.ll
@@ -2,7 +2,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "powerpc64-unknown-linux-gnu"
; RUN: llc < %s | FileCheck %s
-define fastcc void @allocateSpace() nounwind {
+define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
entry:
%0 = load i8** undef, align 8, !tbaa !0
br i1 undef, label %return, label %lor.lhs.false
@@ -20,10 +20,10 @@ while.cond: ; preds = %while.body, %if.the
%idxprom17 = sext i32 0 to i64
%arrayidx18 = getelementptr inbounds i8* %0, i64 %idxprom17
%or = or i32 undef, undef
- br i1 false, label %if.end71, label %while.body
+ br i1 %cond1, label %if.end71, label %while.body
while.body: ; preds = %while.cond
- br i1 undef, label %while.cond, label %if.then45
+ br i1 %cond2, label %while.cond, label %if.then45
if.then45: ; preds = %while.body
%idxprom48139 = zext i32 %or to i64
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index 6ec9a48849..a58c9b102d 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -52,8 +52,8 @@ entry:
%tmp21 = load double* %tmp20 ; <double> [#uses=1]
%tmp.upgrd.6 = getelementptr [9 x i8]* @str, i32 0, i64 0 ; <i8*> [#uses=1]
%tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 ) ; <i32> [#uses=0]
- br label %return
-return: ; preds = %entry
+ br label %finish
+finish:
%retval.upgrd.8 = load i32* %retval ; <i32> [#uses=1]
ret i32 %retval.upgrd.8
}
diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
index adc825c039..783d9f94ca 100644
--- a/test/CodeGen/X86/2006-11-17-IllegalMove.ll
+++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep movb %t | count 2
+; RUN: grep movb %t | count 1
; RUN: grep "movzb[wl]" %t
diff --git a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
index 266fd7b913..39af9319c8 100644
--- a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
+++ b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -10,10 +10,10 @@
%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
-define i32 @_bfd_stab_section_find_nearest_line(i32 %offset) nounwind {
+define i32 @_bfd_stab_section_find_nearest_line(i32 %offset, i1 %cond) nounwind {
entry:
%tmp910 = add i32 0, %offset ; <i32> [#uses=1]
- br i1 true, label %bb951, label %bb917
+ br i1 %cond, label %bb951, label %bb917
bb917: ; preds = %entry
ret i32 0
@@ -21,7 +21,7 @@ bb917: ; preds = %entry
bb951: ; preds = %bb986, %entry
%tmp955 = sdiv i32 0, 2 ; <i32> [#uses=3]
%tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0 ; <i32*> [#uses=1]
- br i1 true, label %bb986, label %bb967
+ br i1 %cond, label %bb986, label %bb967
bb967: ; preds = %bb951
ret i32 0
diff --git a/test/Transforms/CodeGenPrepare/basic.ll b/test/Transforms/CodeGenPrepare/basic.ll
index ebf10f0e9d..c68e77eb55 100644
--- a/test/Transforms/CodeGenPrepare/basic.ll
+++ b/test/Transforms/CodeGenPrepare/basic.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin10.0.0"
; CHECK: @test1
; objectsize should fold to a constant, which causes the branch to fold to an
-; uncond branch.
+; uncond branch. Next, we fold the control flow alltogether.
; rdar://8785296
define i32 @test1(i8* %ptr) nounwind ssp noredzone align 2 {
entry:
@@ -13,8 +13,8 @@ entry:
%1 = icmp ugt i64 %0, 3
br i1 %1, label %T, label %trap
-; CHECK: entry:
-; CHECK-NEXT: br label %T
+; CHECK: T:
+; CHECK-NOT: br label %
trap: ; preds = %0, %entry
tail call void @llvm.trap() noreturn nounwind
diff --git a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
index 70ead330c1..b5124ea6f3 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
@@ -44,7 +44,7 @@ declare %s* @getstruct() nounwind
; CHECK: @main
; Check that the loop preheader contains no address computation.
-; CHECK: %entry
+; CHECK: %end_of_chain
; CHECK-NOT: add{{.*}}lsl
; CHECK: ldr{{.*}}lsl #2
; CHECK: ldr{{.*}}lsl #2
@@ -65,15 +65,15 @@ while.cond:
while.body:
%v3 = load i32* @ncol, align 4, !tbaa !0
- br label %while.cond.i
+ br label %end_of_chain
-while.cond.i:
+end_of_chain:
%state.i = getelementptr inbounds %s* %call18, i32 0, i32 0
%v4 = load i32** %state.i, align 4, !tbaa !3
br label %while.cond.i.i
while.cond.i.i:
- %counter.0.i.i = phi i32 [ %v3, %while.cond.i ], [ %dec.i.i, %land.rhs.i.i ]
+ %counter.0.i.i = phi i32 [ %v3, %end_of_chain ], [ %dec.i.i, %land.rhs.i.i ]
%dec.i.i = add nsw i32 %counter.0.i.i, -1
%tobool.i.i = icmp eq i32 %counter.0.i.i, 0
br i1 %tobool.i.i, label %where.exit, label %land.rhs.i.i