summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/block-placement.ll
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2011-11-13 11:20:44 +0000
committerChandler Carruth <chandlerc@gmail.com>2011-11-13 11:20:44 +0000
commitdf234353fb396e84e7a3a1cdd94f73681e65bd88 (patch)
treed685cc000869f6d97d01f92ffe3a131cb299e5ef /test/CodeGen/X86/block-placement.ll
parent9eb674880b98cbeca0cd5b3f0265b77282d48b4a (diff)
downloadllvm-df234353fb396e84e7a3a1cdd94f73681e65bd88.tar.gz
llvm-df234353fb396e84e7a3a1cdd94f73681e65bd88.tar.bz2
llvm-df234353fb396e84e7a3a1cdd94f73681e65bd88.tar.xz
Rewrite #3 of machine block placement. This is based somewhat on the
second algorithm, but only loosely. It is more heavily based on the last discussion I had with Andy. It continues to walk from the inner-most loop outward, but there is a key difference. With this algorithm we ensure that as we visit each loop, the entire loop is merged into a single chain. At the end, the entire function is treated as a "loop", and merged into a single chain. This chain forms the desired sequence of blocks within the function. Switching to a single algorithm removes my biggest problem with the previous approaches -- they had different behavior depending on which system triggered the layout. Now there is exactly one algorithm and one basis for the decision making. The other key difference is how the chain is formed. This is based heavily on the idea Andy mentioned of keeping a worklist of blocks that are viable layout successors based on the CFG. Having this set allows us to consistently select the best layout successor for each block. It is expensive though. The code here remains very rough. There is a lot that needs to be done to clean up the code, and to make the runtime cost of this pass much lower. Very much WIP, but this was a giant chunk of code and I'd rather folks see it sooner than later. Everything remains behind a flag of course. I've added a couple of tests to exercise the issues that this iteration was motivated by: loop structure preservation. I've also fixed one test that was exhibiting the broken behavior of the previous version. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144495 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/block-placement.ll')
-rw-r--r--test/CodeGen/X86/block-placement.ll97
1 files changed, 96 insertions, 1 deletions
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index 38d3062cb5..4f0b6714b8 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -72,8 +72,103 @@ exit:
ret i32 %b
}
+define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
+; Check that we sink cold loop blocks after the hot loop body.
+; CHECK: test_loop_cold_blocks:
+; CHECK: %entry
+; CHECK: %body1
+; CHECK: %body2
+; CHECK: %body3
+; CHECK: %unlikely1
+; CHECK: %unlikely2
+; CHECK: %exit
+
+entry:
+ br label %body1
+
+body1:
+ %iv = phi i32 [ 0, %entry ], [ %next, %body3 ]
+ %base = phi i32 [ 0, %entry ], [ %sum, %body3 ]
+ %unlikelycond1 = icmp slt i32 %base, 42
+ br i1 %unlikelycond1, label %unlikely1, label %body2, !prof !0
+
+unlikely1:
+ call void @error(i32 %i, i32 1, i32 %base)
+ br label %body2
+
+body2:
+ %unlikelycond2 = icmp sgt i32 %base, 21
+ br i1 %unlikelycond2, label %unlikely2, label %body3, !prof !0
+
+unlikely2:
+ call void @error(i32 %i, i32 2, i32 %base)
+ br label %body3
+
+body3:
+ %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+ %0 = load i32* %arrayidx
+ %sum = add nsw i32 %0, %base
+ %next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %next, %i
+ br i1 %exitcond, label %exit, label %body1
+
+exit:
+ ret i32 %sum
+}
+
!0 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+define i32 @test_loop_early_exits(i32 %i, i32* %a) {
+; Check that we sink early exit blocks out of loop bodies.
+; CHECK: test_loop_early_exits:
+; CHECK: %entry
+; CHECK: %body1
+; CHECK: %body2
+; CHECK: %body3
+; CHECK: %body4
+; CHECK: %exit
+; CHECK: %bail1
+; CHECK: %bail2
+; CHECK: %bail3
+
+entry:
+ br label %body1
+
+body1:
+ %iv = phi i32 [ 0, %entry ], [ %next, %body4 ]
+ %base = phi i32 [ 0, %entry ], [ %sum, %body4 ]
+ %bailcond1 = icmp eq i32 %base, 42
+ br i1 %bailcond1, label %bail1, label %body2
+
+bail1:
+ ret i32 -1
+
+body2:
+ %bailcond2 = icmp eq i32 %base, 43
+ br i1 %bailcond2, label %bail2, label %body3
+
+bail2:
+ ret i32 -2
+
+body3:
+ %bailcond3 = icmp eq i32 %base, 44
+ br i1 %bailcond3, label %bail3, label %body4
+
+bail3:
+ ret i32 -3
+
+body4:
+ %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+ %0 = load i32* %arrayidx
+ %sum = add nsw i32 %0, %base
+ %next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %next, %i
+ br i1 %exitcond, label %exit, label %body1
+
+exit:
+ ret i32 %sum
+}
+
define i32 @test_loop_align(i32 %i, i32* %a) {
; Check that we provide basic loop body alignment with the block placement
; pass.
@@ -105,7 +200,7 @@ define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) {
; CHECK: test_nested_loop_align:
; CHECK: %entry
; CHECK: .align [[ALIGN]],
-; CHECK-NEXT: %loop.body.2
+; CHECK-NEXT: %loop.body.1
; CHECK: .align [[ALIGN]],
; CHECK-NEXT: %inner.loop.body
; CHECK-NOT: .align