summaryrefslogtreecommitdiff
path: root/lib/CodeGen/PeepholeOptimizer.cpp
diff options
context:
space:
mode:
authorManman Ren <mren@apple.com>2012-08-02 00:56:42 +0000
committerManman Ren <mren@apple.com>2012-08-02 00:56:42 +0000
commitd7d003c2b7b7f657eed364e4ac06f4ab32fc8c2d (patch)
treea135e2b9b794ea8b4b12c05e4a2a768d32577f59 /lib/CodeGen/PeepholeOptimizer.cpp
parente5c79a5c2542fa0d852df28b5ee9de8dfef694d8 (diff)
downloadllvm-d7d003c2b7b7f657eed364e4ac06f4ab32fc8c2d.tar.gz
llvm-d7d003c2b7b7f657eed364e4ac06f4ab32fc8c2d.tar.bz2
llvm-d7d003c2b7b7f657eed364e4ac06f4ab32fc8c2d.tar.xz
X86 Peephole: fold loads to the source register operand if possible.
Machine CSE and other optimizations can remove instructions so folding is possible at peephole while not possible at ISel. This patch is a rework of r160919 and was tested on clang self-host on my local machine. rdar://10554090 and rdar://11873276 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161152 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen/PeepholeOptimizer.cpp')
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp57
1 files changed, 57 insertions, 0 deletions
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 91c33c4af4..d9474bf240 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -78,6 +78,7 @@ STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
+STATISTIC(NumLoadFold, "Number of loads folded");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -114,6 +115,7 @@ namespace {
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
};
}
@@ -384,6 +386,29 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
return false;
}
+/// isLoadFoldable - Check whether MI is a candidate for folding into a later
+/// instruction. We only fold loads to virtual registers and the virtual
+/// register defined has a single use.
+bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
+ unsigned &FoldAsLoadDefReg) {
+ if (MI->canFoldAsLoad()) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getNumDefs() == 1) {
+ unsigned Reg = MI->getOperand(0).getReg();
+ // To reduce compilation time, we check MRI->hasOneUse when inserting
+ // loads. It should be checked when processing uses of the load, since
+ // uses can be removed during peephole.
+ if (!MI->getOperand(0).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->hasOneUse(Reg)) {
+ FoldAsLoadDefReg = Reg;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
@@ -441,6 +466,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SmallPtrSet<MachineInstr*, 8> LocalMIs;
SmallSet<unsigned, 4> ImmDefRegs;
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+ unsigned FoldAsLoadDefReg;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
@@ -448,6 +474,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
LocalMIs.clear();
ImmDefRegs.clear();
ImmDefMIs.clear();
+ FoldAsLoadDefReg = 0;
bool First = true;
MachineBasicBlock::iterator PMII;
@@ -456,12 +483,17 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MI = &*MII;
LocalMIs.insert(MI);
+ // If there exists an instruction which belongs to the following
+ // categories, we will discard the load candidate.
if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
MI->hasUnmodeledSideEffects()) {
+ FoldAsLoadDefReg = 0;
++MII;
continue;
}
+ if (MI->mayStore() || MI->isCall())
+ FoldAsLoadDefReg = 0;
if (MI->isBitcast()) {
if (optimizeBitcastInstr(MI, MBB)) {
@@ -489,6 +521,31 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
+ // Check whether MI is a load candidate for folding into a later
+ // instruction. If MI is not a candidate, check whether we can fold an
+ // earlier load into MI.
+ if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
+ // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
+ // can enable folding by converting SUB to CMP.
+ MachineInstr *DefMI = 0;
+ MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
+ FoldAsLoadDefReg, DefMI);
+ if (FoldMI) {
+ // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
+ LocalMIs.erase(MI);
+ LocalMIs.erase(DefMI);
+ LocalMIs.insert(FoldMI);
+ MI->eraseFromParent();
+ DefMI->eraseFromParent();
+ ++NumLoadFold;
+
+ // MI is replaced with FoldMI.
+ Changed = true;
+ PMII = FoldMI;
+ MII = llvm::next(PMII);
+ continue;
+ }
+ }
First = false;
PMII = MII;
++MII;