diff options
-rw-r--r-- | include/llvm/CodeGen/Passes.h | 4 | ||||
-rw-r--r-- | include/llvm/InitializePasses.h | 1 | ||||
-rw-r--r-- | lib/CodeGen/AggressiveAntiDepBreaker.cpp | 2 | ||||
-rw-r--r-- | lib/CodeGen/BranchFolding.cpp | 20 | ||||
-rw-r--r-- | lib/CodeGen/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/CodeGen/CriticalAntiDepBreaker.cpp | 2 | ||||
-rw-r--r-- | lib/CodeGen/LLVMTargetMachine.cpp | 33 | ||||
-rw-r--r-- | lib/CodeGen/MachineCopyPropagation.cpp | 240 | ||||
-rw-r--r-- | lib/CodeGen/RegisterScavenging.cpp | 2 | ||||
-rw-r--r-- | lib/CodeGen/ScheduleDAGInstrs.cpp | 7 | ||||
-rw-r--r-- | lib/CodeGen/ScheduleDAGInstrs.h | 5 | ||||
-rw-r--r-- | test/CodeGen/ARM/code-placement.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/machine-cp.ll | 36 |
13 files changed, 318 insertions, 41 deletions
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index b4f6cac858..f077f2b8d6 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -193,6 +193,10 @@ namespace llvm { /// instructions. FunctionPass *createMachineSinkingPass(); + /// createMachineCopyPropagationPass - This pass performs copy propagation on + /// machine instructions. + FunctionPass *createMachineCopyPropagationPass(); + /// createPeepholeOptimizerPass - This pass performs peephole optimizations - /// like extension and comparison eliminations. FunctionPass *createPeepholeOptimizerPass(); diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index ce87f16c2e..ed044c0ad9 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -79,6 +79,7 @@ void initializeCallGraphAnalysisGroup(PassRegistry&); void initializeCodeGenPreparePass(PassRegistry&); void initializeConstantMergePass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); +void initializeMachineCopyPropagationPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeDAEPass(PassRegistry&); void initializeDAHPass(PassRegistry&); diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 6cf4571d73..77958d9c95 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -186,7 +186,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); - for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + for (const unsigned *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (const unsigned *Alias = TRI->getOverlaps(Reg); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 89894c37ee..29e8545030 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -180,7 +180,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TRI = tri; MMI = mmi; - RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; + RS = new RegScavenger(); // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; @@ -368,16 +368,14 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, MachineBasicBlock *NewMBB) { - if (RS) { - RS->enterBasicBlock(CurMBB); - if (!CurMBB->empty()) - RS->forward(prior(CurMBB->end())); - BitVector RegsLiveAtExit(TRI->getNumRegs()); - RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) - if (RegsLiveAtExit[i]) - NewMBB->addLiveIn(i); - } + RS->enterBasicBlock(CurMBB); + if (!CurMBB->empty()) + RS->forward(prior(CurMBB->end())); + BitVector RegsLiveAtExit(TRI->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) + if (RegsLiveAtExit[i]) + NewMBB->addLiveIn(i); } /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index ce9d0d4d0d..7fa5e47f9e 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -40,6 +40,7 @@ add_llvm_library(LLVMCodeGen MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp MachineCodeEmitter.cpp + MachineCopyPropagation.cpp MachineCSE.cpp MachineDominators.cpp MachineFunction.cpp diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 128143e70f..5c325396db 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -102,7 +102,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); - for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + for (const unsigned *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 62227fd4d6..7fd089a5db 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -452,23 +452,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (addPostRegAlloc(PM)) printAndVerify(PM, "After PostRegAlloc passes"); - PM.add(createExpandPostRAPseudosPass()); - printAndVerify(PM, "After ExpandPostRAPseudos"); - // Insert prolog/epilog code. Eliminate abstract frame index references... PM.add(createPrologEpilogCodeInserter()); printAndVerify(PM, "After PrologEpilogCodeInserter"); - // Run pre-sched2 passes. - if (addPreSched2(PM)) - printAndVerify(PM, "After PreSched2 passes"); - - // Second pass scheduler. - if (getOptLevel() != CodeGenOpt::None && !DisablePostRA) { - PM.add(createPostRAScheduler(getOptLevel())); - printAndVerify(PM, "After PostRAScheduler"); - } - // Branch folding must be run after regalloc and prolog/epilog insertion. if (getOptLevel() != CodeGenOpt::None && !DisableBranchFold) { PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); @@ -481,6 +468,26 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printNoVerify(PM, "After TailDuplicate"); } + // Copy propagation. + if (getOptLevel() != CodeGenOpt::None) { + PM.add(createMachineCopyPropagationPass()); + printNoVerify(PM, "After copy propagation pass"); + } + + // Expand pseudo instructions before second scheduling pass. + PM.add(createExpandPostRAPseudosPass()); + printNoVerify(PM, "After ExpandPostRAPseudos"); + + // Run pre-sched2 passes. + if (addPreSched2(PM)) + printNoVerify(PM, "After PreSched2 passes"); + + // Second pass scheduler. + if (getOptLevel() != CodeGenOpt::None && !DisablePostRA) { + PM.add(createPostRAScheduler(getOptLevel())); + printNoVerify(PM, "After PostRAScheduler"); + } + PM.add(createGCMachineCodeAnalysisPass()); if (PrintGCInfo) diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp new file mode 100644 index 0000000000..861025ca0b --- /dev/null +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -0,0 +1,240 @@ +//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an extremely simple MachineInstr-level copy propagation pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "codegen-cp" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumDeletes, "Number of dead copies deleted"); + +namespace { + class MachineCopyPropagation : public MachineFunctionPass { + const TargetRegisterInfo *TRI; + BitVector ReservedRegs; + + public: + static char ID; // Pass identification, replacement for typeid + MachineCopyPropagation() : MachineFunctionPass(ID) { + initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + void SourceNoLongerAvailable(unsigned Reg, + DenseMap<unsigned, unsigned> &SrcMap, + DenseMap<unsigned, MachineInstr*> &AvailCopyMap); + bool CopyPropagateBlock(MachineBasicBlock &MBB); + }; +} +char MachineCopyPropagation::ID = 0; + +INITIALIZE_PASS(MachineCopyPropagation, "machine-cp", + "Machine Copy Propagation Pass", false, false) + +FunctionPass *llvm::createMachineCopyPropagationPass() { + return new MachineCopyPropagation(); +} + +void +MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, + DenseMap<unsigned, unsigned> &SrcMap, + DenseMap<unsigned, MachineInstr*> &AvailCopyMap) { + DenseMap<unsigned, unsigned>::iterator SI = SrcMap.find(Reg); + if (SI != SrcMap.end()) { + unsigned MappedDef = SI->second; + // Source of copy is no longer available for propagation. + if (AvailCopyMap.erase(MappedDef)) { + for (const unsigned *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + AvailCopyMap.erase(*SR); + } + } + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + SI = SrcMap.find(*AS); + if (SI != SrcMap.end()) { + unsigned MappedDef = SI->second; + if (AvailCopyMap.erase(MappedDef)) { + for (const unsigned *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + AvailCopyMap.erase(*SR); + } + } + } +} + +bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { + SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion + DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map + DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map + DenseMap<unsigned, unsigned> SrcMap; // Src -> Def map + + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { + MachineInstr *MI = &*I; + ++I; + + if (MI->isCopy()) { + unsigned Def = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Def) || + TargetRegisterInfo::isVirtualRegister(Src)) + report_fatal_error("MachineCopyPropagation should be run after" + " register allocation!"); + + DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); + if (CI != AvailCopyMap.end()) { + MachineInstr *CopyMI = CI->second; + unsigned SrcSrc = CopyMI->getOperand(1).getReg(); + if (!ReservedRegs.test(Def) && + (SrcSrc == Def || TRI->isSubRegister(SrcSrc, Def))) { + // The two copies cancel out and the source of the first copy + // hasn't been overridden, eliminate the second one. e.g. + // %ECX<def> = COPY %EAX<kill> + // ... nothing clobbered EAX. + // %EAX<def> = COPY %ECX + // => + // %ECX<def> = COPY %EAX + CopyMI->getOperand(1).setIsKill(false); + MI->eraseFromParent(); + Changed = true; + ++NumDeletes; + continue; + } + } + + // If Src is defined by a previous copy, it cannot be eliminated. + CI = CopyMap.find(Src); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + for (const unsigned *AS = TRI->getAliasSet(Src); *AS; ++AS) { + CI = CopyMap.find(*AS); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + } + + // Copy is now a candidate for deletion. + MaybeDeadCopies.insert(MI); + + // If 'Src' is previously source of another copy, then this earlier copy's + // source is no longer available. e.g. + // %xmm9<def> = copy %xmm2 + // ... + // %xmm2<def> = copy %xmm0 + // ... + // %xmm2<def> = copy %xmm9 + SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap); + + // Remember Def is defined by the copy. + CopyMap[Def] = MI; + AvailCopyMap[Def] = MI; + for (const unsigned *SR = TRI->getSubRegisters(Def); *SR; ++SR) { + CopyMap[*SR] = MI; + AvailCopyMap[*SR] = MI; + } + + // Remember source that's copied to Def. Once it's clobbered, then + // it's no longer available for copy propagation. + SrcMap[Src] = Def; + + continue; + } + + // Not a copy. + SmallVector<unsigned, 2> Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (TargetRegisterInfo::isVirtualRegister(Reg)) + report_fatal_error("MachineCopyPropagation should be run after" + " register allocation!"); + + if (MO.isDef()) { + Defs.push_back(Reg); + continue; + } + + // If 'Reg' is defined by a copy, the copy is no longer a candidate + // for elimination. + DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + CI = CopyMap.find(*AS); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + + // No longer defined by a copy. + CopyMap.erase(Reg); + AvailCopyMap.erase(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + CopyMap.erase(*AS); + AvailCopyMap.erase(*AS); + } + + // If 'Reg' is previously source of a copy, it is no longer available for + // copy propagation. + SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap); + } + } + + // If MBB doesn't have successors, delete the copies whose defs are not used. + // If MBB does have successors, then conservative assume the defs are live-out + // since we don't want to trust live-in lists. + if (MBB.succ_empty()) { + for (SmallSetVector<MachineInstr*, 8>::iterator + DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); + DI != DE; ++DI) { + if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) { + (*DI)->eraseFromParent(); + Changed = true; + ++NumDeletes; + } + } + } + + return Changed; +} + +bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + TRI = MF.getTarget().getRegisterInfo(); + ReservedRegs = TRI->getReservedRegs(MF); + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= CopyPropagateBlock(*I); + + return Changed; +} diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index ca02aa1b81..07cf027691 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -96,7 +96,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { // Create callee-saved registers bitvector. CalleeSavedRegs.resize(NumPhysRegs); - const unsigned *CSRegs = TRI->getCalleeSavedRegs(); + const unsigned *CSRegs = TRI->getCalleeSavedRegs(&MF); if (CSRegs != NULL) for (unsigned i = 0; CSRegs[i]; ++i) CalleeSavedRegs.set(CSRegs[i]); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index b02f3b6e1e..a6556a51c8 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -136,13 +136,8 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { LoopRegs.Deps.clear(); if (MachineLoop *ML = MLI.getLoopFor(BB)) - if (BB == ML->getLoopLatch()) { - MachineBasicBlock *Header = ML->getHeader(); - for (MachineBasicBlock::livein_iterator I = Header->livein_begin(), - E = Header->livein_end(); I != E; ++I) - LoopLiveInRegs.insert(*I); + if (BB == ML->getLoopLatch()) LoopRegs.VisitLoop(ML); - } } /// AddSchedBarrierDeps - Add dependencies from instructions in the current diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index 666bdf548c..a6233d3482 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -120,11 +120,6 @@ namespace llvm { /// LoopDependencies LoopRegs; - /// LoopLiveInRegs - Track which regs are live into a loop, to help guide - /// back-edge-aware scheduling. - /// - SmallSet<unsigned, 8> LoopLiveInRegs; - protected: /// DbgValues - Remember instruction that preceeds DBG_VALUE. diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll index 91ef659252..487ec690ea 100644 --- a/test/CodeGen/ARM/code-placement.ll +++ b/test/CodeGen/ARM/code-placement.ll @@ -12,9 +12,9 @@ entry: br i1 %0, label %bb2, label %bb bb: -; CHECK: LBB0_2: -; CHECK: bne LBB0_2 -; CHECK-NOT: b LBB0_2 +; CHECK: LBB0_1: +; CHECK: bne LBB0_1 +; CHECK-NOT: b LBB0_1 ; CHECK: bx lr %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ] %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ] diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll new file mode 100644 index 0000000000..54fa01c38f --- /dev/null +++ b/test/CodeGen/X86/machine-cp.ll @@ -0,0 +1,36 @@ +; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona < %s | FileCheck %s + +; After tail duplication, two copies in an early exit BB can be cancelled out. +; rdar://10640363 +define i32 @t1(i32 %a, i32 %b) nounwind { +entry: +; CHECK: t1: +; CHECK: jne + %cmp1 = icmp eq i32 %b, 0 + br i1 %cmp1, label %while.end, label %while.body + +; CHECK: BB +; CHECK-NOT: mov +; CHECK: ret + +while.body: ; preds = %entry, %while.body + %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ] + %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ] + %rem = srem i32 %a.addr.03, %b.addr.02 + %cmp = icmp eq i32 %rem, 0 + br i1 %cmp, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ] + ret i32 %a.addr.0.lcssa +} + +; Two movdqa (from phi-elimination) in the entry BB cancels out. +; rdar://10428165 +define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { +entry: +; CHECK: t2: +; CHECK-NOT: movdqa + %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef > + ret <8 x i16> %tmp8 +} |