summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2011-05-11 01:03:01 +0000
committerEvan Cheng <evan.cheng@apple.com>2011-05-11 01:03:01 +0000
commit7e20a574eb3f4fc3840a0490c814443d756e2e0e (patch)
tree7e148f185b14e932bf5f84bb0e8e6714aa5f53e1
parentb38fe4b52de05a35c0693f4fd117f908a2abadb7 (diff)
downloadllvm-7e20a574eb3f4fc3840a0490c814443d756e2e0e.tar.gz
llvm-7e20a574eb3f4fc3840a0490c814443d756e2e0e.tar.bz2
llvm-7e20a574eb3f4fc3840a0490c814443d756e2e0e.tar.xz
Add a late optimization to BranchFolding that hoist common instruction sequences
at the start of basic blocks to their common predecessor. It's actually quite common (e.g. about 50 times in JM/lencod) and has shown to be a nice code size benefit. e.g. pushq %rax testl %edi, %edi jne LBB0_2 ## BB#1: xorb %al, %al popq %rdx ret LBB0_2: xorb %al, %al callq _foo popq %rdx ret => pushq %rax xorb %al, %al testl %edi, %edi je LBB0_2 ## BB#1: callq _foo LBB0_2: popq %rdx ret rdar://9145558 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131172 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/BranchFolding.cpp267
-rw-r--r--lib/CodeGen/BranchFolding.h7
-rw-r--r--lib/CodeGen/IfConversion.cpp4
-rw-r--r--test/CodeGen/X86/hoist-common.ll28
4 files changed, 296 insertions, 10 deletions
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 77043406bc..01aa550f5f 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -41,6 +41,7 @@ using namespace llvm;
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumBranchOpts, "Number of branches optimized");
STATISTIC(NumTailMerge , "Number of block tails merged");
+STATISTIC(NumHoist , "Number of times common instructions are hoisted");
static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -65,7 +66,7 @@ namespace {
public:
static char ID;
explicit BranchFolderPass(bool defaultEnableTailMerge)
- : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge) {}
+ : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "Control Flow Optimizer"; }
@@ -86,12 +87,14 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
}
-BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
+BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) {
switch (FlagEnableTailMerge) {
case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
case cl::BOU_TRUE: EnableTailMerge = true; break;
case cl::BOU_FALSE: EnableTailMerge = false; break;
}
+
+ EnableHoistCommonCode = CommonHoist;
}
/// RemoveDeadBlock - Remove the specified dead machine basic block from the
@@ -186,9 +189,10 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
bool MadeChangeThisIteration = true;
while (MadeChangeThisIteration) {
- MadeChangeThisIteration = false;
- MadeChangeThisIteration |= TailMergeBlocks(MF);
- MadeChangeThisIteration |= OptimizeBranches(MF);
+ MadeChangeThisIteration = TailMergeBlocks(MF);
+ MadeChangeThisIteration |= OptimizeBranches(MF);
+ if (EnableHoistCommonCode)
+ MadeChangeThisIteration |= HoistCommonCode(MF);
MadeChange |= MadeChangeThisIteration;
}
@@ -910,7 +914,8 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
// Make sure blocks are numbered in order
MF.RenumberBlocks();
- for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ) {
MachineBasicBlock *MBB = I++;
MadeChange |= OptimizeBlock(MBB);
@@ -1339,3 +1344,253 @@ ReoptimizeBlock:
return MadeChange;
}
+
+//===----------------------------------------------------------------------===//
+// Hoist Common Code
+//===----------------------------------------------------------------------===//
+
+/// HoistCommonCode - Hoist common instruction sequences at the start of basic
+/// blocks to their common predecessor.
+/// NOTE: This optimization does not update live-in information so it must be
+/// run after all passes that require correct liveness information.
+bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
+ bool MadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ MadeChange |= HoistCommonCodeInSuccs(MBB);
+ }
+
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// findHoistingInsertPosAndDeps - Find the location to move common instructions
+/// in successors to. The location is ususally just before the terminator,
+/// however if the terminator is a conditional branch and its previous
+/// instruction is the flag setting instruction, the previous instruction is
+/// the preferred location. This function also gathers uses and defs of the
+/// instructions from the insertion point to the end of the block. The data is
+/// used by HoistCommonCodeInSuccs to ensure safety.
+static
+MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ SmallSet<unsigned,4> &Uses,
+ SmallSet<unsigned,4> &Defs) {
+ MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+ if (!TII->isUnpredicatedTerminator(Loc))
+ return MBB->end();
+
+ for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Loc->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ Uses.insert(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Uses.insert(*AS);
+ } else if (!MO.isDead())
+ // Don't try to hoist code in the rare case the terminator defines a
+ // register that is later used.
+ return MBB->end();
+ }
+
+ if (Uses.empty())
+ return Loc;
+ if (Loc == MBB->begin())
+ return MBB->end();
+
+ // The terminator is probably a conditional branch, try not to separate the
+ // branch from condition setting instruction.
+ MachineBasicBlock::iterator PI = Loc;
+ --PI;
+ while (PI != MBB->begin() && Loc->isDebugValue())
+ --PI;
+
+ bool IsDef = false;
+ for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
+ const MachineOperand &MO = PI->getOperand(i);
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Uses.count(Reg))
+ IsDef = true;
+ }
+ if (!IsDef)
+ // The condition setting instruction is not just before the conditional
+ // branch.
+ return Loc;
+
+ // Be conservative, don't insert instruction above something that may have
+ // side-effects. And since it's potentially bad to separate flag setting
+ // instruction from the conditional branch, just abort the optimization
+ // completely.
+ // Also avoid moving code above predicated instruction since it's hard to
+ // reason about register liveness with predicated instruction.
+ bool DontMoveAcrossStore = true;
+ if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) ||
+ TII->isPredicated(PI))
+ return MBB->end();
+
+
+ // Find out what registers are live. Note this routine is ignoring other live
+ // registers which are only used by instructions in successor blocks.
+ for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = PI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ Uses.insert(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Uses.insert(*AS);
+ } else {
+ if (Uses.count(Reg)) {
+ Uses.erase(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ Uses.erase(*SR); // Use getSubRegisters to be conservative
+ Defs.insert(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Defs.insert(*AS);
+ }
+ }
+ }
+
+ return PI;
+}
+
+/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
+/// sequence at the start of the function, move the instructions before MBB
+/// terminator if it's legal.
+bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
+ return false;
+
+ if (!FBB) FBB = findFalseBlock(MBB, TBB);
+ if (!FBB)
+ // Malformed bcc? True and false blocks are the same?
+ return false;
+
+ // Restrict the optimization to cases where MBB is the only predecessor,
+ // it is an obvious win.
+ if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
+ return false;
+
+ // Find a suitable position to hoist the common instructions to. Also figure
+ // out which registers are used or defined by instructions from the insertion
+ // point to the end of the block.
+ SmallSet<unsigned, 4> Uses, Defs;
+ MachineBasicBlock::iterator Loc =
+ findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
+ if (Loc == MBB->end())
+ return false;
+
+ SmallSet<unsigned, 4> LocalDefs;
+ unsigned NumDups = 0;
+ MachineBasicBlock::iterator TIB = TBB->begin();
+ MachineBasicBlock::iterator FIB = FBB->begin();
+ MachineBasicBlock::iterator TIE = TBB->end();
+ MachineBasicBlock::iterator FIE = FBB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
+ break;
+ }
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
+ break;
+ }
+ if (!TIB->isIdenticalTo(FIB))
+ break;
+
+ if (TII->isPredicated(TIB))
+ // Hard to reason about register liveness with predicated instruction.
+ break;
+
+ bool IsSafe = true;
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = TIB->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(Reg)) {
+ // Avoid clobbering a register that's used by the instruction at
+ // the point of insertion.
+ IsSafe = false;
+ break;
+ }
+ if (!MO.isDead() && Defs.count(Reg)) {
+ // Don't hoist the instruction if the def would be clobber by the
+ // instruction at the point insertion. FIXME: This is overly
+ // conservative. It should be possible to hoist the instructions
+ // in BB2 in the following example:
+ // BB1:
+ // r1, eflag = op1 r2, r3
+ // brcc eflag
+ //
+ // BB2:
+ // r1 = op2, ...
+ // = op3, r1<kill>
+ IsSafe = false;
+ break;
+ }
+ LocalDefs.insert(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ LocalDefs.insert(*SR);
+ } else if (!LocalDefs.count(Reg)) {
+ if (Defs.count(Reg)) {
+ // Use is defined by the instruction at the point of insertion.
+ IsSafe = false;
+ break;
+ }
+ }
+ }
+ if (!IsSafe)
+ break;
+
+ bool DontMoveAcrossStore = true;
+ if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore))
+ break;
+
+ ++NumDups;
+ ++TIB;
+ ++FIB;
+ }
+
+ if (!NumDups)
+ return false;
+
+ MBB->splice(Loc, TBB, TBB->begin(), TIB);
+ FBB->erase(FBB->begin(), FIB);
+ ++NumHoist;
+ return true;
+}
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 15dfa7f6be..4daf4ecfe5 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -19,11 +19,10 @@ namespace llvm {
class RegScavenger;
class TargetInstrInfo;
class TargetRegisterInfo;
- template<typename T> class SmallVectorImpl;
class BranchFolder {
public:
- explicit BranchFolder(bool defaultEnableTailMerge);
+ explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist);
bool OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
@@ -85,6 +84,7 @@ namespace llvm {
std::vector<SameTailElt> SameTails;
bool EnableTailMerge;
+ bool EnableHoistCommonCode;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineModuleInfo *MMI;
@@ -110,6 +110,9 @@ namespace llvm {
bool OptimizeBlock(MachineBasicBlock *MBB);
void RemoveDeadBlock(MachineBasicBlock *MBB);
bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+
+ bool HoistCommonCode(MachineFunction &MF);
+ bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
};
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 790200b8df..8b2c981616 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -265,7 +265,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (!TII) return false;
// Tail merge tend to expose more if-conversion opportunities.
- BranchFolder BF(true);
+ BranchFolder BF(true, false);
bool BFChange = BF.OptimizeFunction(MF, TII,
MF.getTarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
@@ -399,7 +399,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
BBAnalysis.clear();
if (MadeChange && IfCvtBranchFold) {
- BranchFolder BF(false);
+ BranchFolder BF(false, false);
BF.OptimizeFunction(MF, TII,
MF.getTarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
diff --git a/test/CodeGen/X86/hoist-common.ll b/test/CodeGen/X86/hoist-common.ll
new file mode 100644
index 0000000000..2b1550a4ff
--- /dev/null
+++ b/test/CodeGen/X86/hoist-common.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; Common "xorb al, al" instruction in the two successor blocks should be
+; moved to the entry block above the test + je.
+
+; rdar://9145558
+
+define zeroext i1 @t(i32 %c) nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: xorb %al, %al
+; CHECK: test
+; CHECK: je
+ %tobool = icmp eq i32 %c, 0
+ br i1 %tobool, label %return, label %if.then
+
+if.then:
+; CHECK: callq
+ %call = tail call zeroext i1 (...)* @foo() nounwind
+ br label %return
+
+return:
+; CHECK: ret
+ %retval.0 = phi i1 [ %call, %if.then ], [ false, %entry ]
+ ret i1 %retval.0
+}
+
+declare zeroext i1 @foo(...)