Add a much more conservative strategy for aligning branch targets.

Previously, MBP essentially aligned every branch target it could. This bloats code quite a bit, especially non-looping code which has no real reason to prefer aligned branch targets so heavily. As Andy said in review, it's still a bit odd to do this without a real cost model, but this at least has much more plausible heuristics. Fixes PR13265. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161409 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chandler Carruth <chandlerc@gmail.com> 2012-08-07 09:45:24 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2012-08-07 09:45:24 +0000
commit: e6450dc2afc18531bf9b70180a9f67376d9f00c7 (patch)
tree: 88d92e7932ae7528c097316ea06db60180d92d94
parent: ba86b13ad9cd6a9707a954598863da1e2a9f773b (diff)
download: llvm-e6450dc2afc18531bf9b70180a9f67376d9f00c7.tar.gz
llvm-e6450dc2afc18531bf9b70180a9f67376d9f00c7.tar.bz2
llvm-e6450dc2afc18531bf9b70180a9f67376d9f00c7.tar.xz
5 files changed, 59 insertions, 21 deletions
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 93270772f0..c4dca2cd15 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1011,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
 
   // Walk through the backedges of the function now that we have fully laid out
   // the basic blocks and align the destination of each backedge. We don't rely
-  // on the loop info here so that we can align backedges in unnatural CFGs and
-  // backedges that were introduced purely because of the loop rotations done
-  // during this layout pass.
-  // FIXME: This isn't quite right, we shouldn't align backedges that result
-  // from blocks being sunken below the exit block for the function.
+  // exclusively on the loop info here so that we can align backedges in
+  // unnatural CFGs and backedges that were introduced purely because of the
+  // loop rotations done during this layout pass.
   if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
     return;
   unsigned Align = TLI->getPrefLoopAlignment();
   if (!Align)
     return;  // Don't care about loop alignment.
+  if (FunctionChain.begin() == FunctionChain.end())
+    return;  // Empty chain.
 
-  SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks;
-  for (BlockChain::iterator BI = FunctionChain.begin(),
+  const BranchProbability ColdProb(1, 5); // 20%
+  BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
+  BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
+  for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()),
                             BE = FunctionChain.end();
        BI != BE; ++BI) {
-    PreviousBlocks.insert(*BI);
-    // Set alignment on the destination of all the back edges in the new
-    // ordering.
-    for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(),
-                                          SE = (*BI)->succ_end();
-         SI != SE; ++SI)
-      if (PreviousBlocks.count(*SI))
-        (*SI)->setAlignment(Align);
+    // Don't align non-looping basic blocks. These are unlikely to execute
+    // enough times to matter in practice. Note that we'll still handle
+    // unnatural CFGs inside of a natural outer loop (the common case) and
+    // rotated loops.
+    MachineLoop *L = MLI->getLoopFor(*BI);
+    if (!L)
+      continue;
+
+    // If the block is cold relative to the function entry don't waste space
+    // aligning it.
+    BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+    if (Freq < WeightedEntryFreq)
+      continue;
+
+    // If the block is cold relative to its loop header, don't align it
+    // regardless of what edges into the block exist.
+    MachineBasicBlock *LoopHeader = L->getHeader();
+    BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
+    if (Freq < (LoopHeaderFreq * ColdProb))
+      continue;
+
+    // Check for the existence of a non-layout predecessor which would benefit
+    // from aligning this block.
+    MachineBasicBlock *LayoutPred = *llvm::prior(BI);
+
+    // Force alignment if all the predecessors are jumps. We already checked
+    // that the block isn't cold above.
+    if (!LayoutPred->isSuccessor(*BI)) {
+      (*BI)->setAlignment(Align);
+      continue;
+    }
+
+    // Align this block if the layout predecessor's edge into this block is
+    // cold relative to the block. When this is true, othe predecessors make up
+    // all of the hot entries into the block and thus alignment is likely to be
+    // important.
+    BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+    BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
+    if (LayoutEdgeFreq <= (Freq * ColdProb))
+      (*BI)->setAlignment(Align);
   }
 }
 
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index 66f06778bd..b2cf34cd20 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -17,8 +17,7 @@ bb:		; preds = %bb, %entry
 ; CHECK: %bb30.loopexit
 ; CHECK: divsd %xmm0
 ; CHECK: movsd %xmm0, 16(%esp)
-; CHECK: .align
-; CHECK-NEXT: %bb3
+; CHECK: %bb3
 bb3:		; preds = %bb30.loopexit, %bb25, %bb3
 	%2 = load i32* null, align 4		; <i32> [#uses=1]
 	%3 = mul i32 %2, 0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index ab23b88f49..5534712af8 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -7,10 +7,15 @@ define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
 ; that is not expected to run.
 ; CHECK: test_ifchains:
 ; CHECK: %entry
+; CHECK-NOT: .align
 ; CHECK: %else1
+; CHECK-NOT: .align
 ; CHECK: %else2
+; CHECK-NOT: .align
 ; CHECK: %else3
+; CHECK-NOT: .align
 ; CHECK: %else4
+; CHECK-NOT: .align
 ; CHECK: %exit
 ; CHECK: %then1
 ; CHECK: %then2
@@ -76,8 +81,11 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
 ; Check that we sink cold loop blocks after the hot loop body.
 ; CHECK: test_loop_cold_blocks:
 ; CHECK: %entry
+; CHECK-NOT: .align
 ; CHECK: %unlikely1
+; CHECK-NOT: .align
 ; CHECK: %unlikely2
+; CHECK: .align
 ; CHECK: %body1
 ; CHECK: %body2
 ; CHECK: %body3
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index d14102fe24..4bd162b452 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -41,7 +41,6 @@ done:
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB1_4:
 ; CHECK-NEXT:   callq bar99
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB1_1:
 ; CHECK-NEXT:   callq body
 
@@ -79,7 +78,6 @@ exit:
 ; CHECK-NEXT: .LBB2_5:
 ; CHECK-NEXT:   callq block_a_true_func
 ; CHECK-NEXT:   callq block_a_merge_func
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB2_1:
 ; CHECK-NEXT:   callq body
 ;
@@ -139,13 +137,13 @@ exit:
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_7:
 ; CHECK-NEXT:   callq   bar100
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_1:
 ; CHECK-NEXT:   callq   loop_header
 ;      CHECK:   jl .LBB3_7
 ;      CHECK:   jge .LBB3_3
 ; CHECK-NEXT:   callq   bar101
 ; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_3:
 ;      CHECK:   jge .LBB3_4
 ; CHECK-NEXT:   callq   bar102
diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
index a9b815fc45..c3b8b8910d 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -99,7 +99,6 @@ while.end:                                        ; preds = %entry
 ; CHECK: %for.body3.lr.ph.us.i.loopexit
 ; CHECK-NEXT: in Loop: Header
 ; CHECK-NEXT: incq
-; CHECK-NEXT: .align
 ; CHECK-NEXT: %for.body3.us.i
 ; CHECK-NEXT: Inner Loop
 ; CHECK: testb
author	Chandler Carruth <chandlerc@gmail.com>	2012-08-07 09:45:24 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2012-08-07 09:45:24 +0000
commit	e6450dc2afc18531bf9b70180a9f67376d9f00c7 (patch)
tree	88d92e7932ae7528c097316ea06db60180d92d94
parent	ba86b13ad9cd6a9707a954598863da1e2a9f773b (diff)
download	llvm-e6450dc2afc18531bf9b70180a9f67376d9f00c7.tar.gz llvm-e6450dc2afc18531bf9b70180a9f67376d9f00c7.tar.bz2 llvm-e6450dc2afc18531bf9b70180a9f67376d9f00c7.tar.xz