diff options
Diffstat (limited to 'lib/CodeGen/PostRASchedulerList.cpp')
-rw-r--r-- | lib/CodeGen/PostRASchedulerList.cpp | 98 |
1 files changed, 61 insertions, 37 deletions
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 2f7c0118a0..0fc6c3370b 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -23,7 +23,9 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -49,6 +51,14 @@ namespace { static char ID; PostRAScheduler() : MachineFunctionPass(&ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + const char *getPassName() const { return "Post RA top-down list latency scheduler"; } @@ -72,8 +82,10 @@ namespace { ScheduleDAGTopologicalSort Topo; public: - SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm) - : ScheduleDAGInstrs(mbb, tm), Topo(SUnits) {} + SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm, + const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT) + : ScheduleDAGInstrs(mbb, tm, MLI, MDT), Topo(SUnits) {} void Schedule(); @@ -88,11 +100,14 @@ namespace { bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { DOUT << "PostRAScheduler\n"; + const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe; ++MBB) { - SchedulePostRATDList Scheduler(MBB, Fn.getTarget()); + SchedulePostRATDList Scheduler(MBB, Fn.getTarget(), MLI, MDT); Scheduler.Run(); @@ -142,6 +157,28 @@ getInstrOperandRegClass(const TargetRegisterInfo *TRI, return TRI->getRegClass(II.OpInfo[Op].RegClass); } +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static SDep *CriticalPathStep(SUnit *SU) { + SDep *Next = 0; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + return Next; +} + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. /// @@ -150,34 +187,16 @@ bool SchedulePostRATDList::BreakAntiDependencies() { // so just duck out immediately if the block is empty. if (BB->empty()) return false; - Topo.InitDAGTopologicalSorting(); - - // Compute a critical path for the DAG. + // Find the node at the bottom of the critical path. SUnit *Max = 0; - std::vector<SDep *> CriticalPath(SUnits.size()); - for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(), - E = Topo.end(); I != E; ++I) { - SUnit *SU = &SUnits[*I]; - for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); - P != PE; ++P) { - SUnit *PredSU = P->getSUnit(); - // This assumes that there's no delay for reusing registers. - unsigned PredLatency = P->getLatency(); - unsigned PredTotalLatency = PredSU->CycleBound + PredLatency; - if (SU->CycleBound < PredTotalLatency || - (SU->CycleBound == PredTotalLatency && - P->getKind() == SDep::Anti)) { - SU->CycleBound = PredTotalLatency; - CriticalPath[*I] = &*P; - } - } - // Keep track of the node at the end of the critical path. - if (!Max || SU->CycleBound + SU->Latency > Max->CycleBound + Max->Latency) + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + SUnit *SU = &SUnits[i]; + if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } DOUT << "Critical path has total latency " - << (Max ? Max->CycleBound + Max->Latency : 0) << "\n"; + << (Max ? Max->getDepth() + Max->Latency : 0) << "\n"; // Walk the critical path from the bottom up. Collect all anti-dependence // edges on the critical path. Skip anti-dependencies between SUnits that @@ -195,9 +214,9 @@ bool SchedulePostRATDList::BreakAntiDependencies() { // the anti-dependencies in an instruction in order to be effective. BitVector AllocatableSet = TRI->getAllocatableSet(*MF); DenseMap<MachineInstr *, unsigned> CriticalAntiDeps; - for (SUnit *SU = Max; CriticalPath[SU->NodeNum]; - SU = CriticalPath[SU->NodeNum]->getSUnit()) { - SDep *Edge = CriticalPath[SU->NodeNum]; + SUnit *SU = Max; + for (SDep *Edge = CriticalPathStep(SU); Edge; + Edge = CriticalPathStep(SU = Edge->getSUnit())) { SUnit *NextSU = Edge->getSUnit(); // Only consider anti-dependence edges. if (Edge->getKind() != SDep::Anti) @@ -494,6 +513,11 @@ bool SchedulePostRATDList::BreakAntiDependencies() { Classes[SubregReg] = 0; RegRefs.erase(SubregReg); } + for (const unsigned *Super = TRI->getSuperRegisters(Reg); + *Super; ++Super) { + unsigned SuperReg = *Super; + Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); + } } for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -556,8 +580,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { // Compute how many cycles it will be before this actually becomes // available. This is the max of the start time of all predecessors plus // their latencies. - unsigned PredDoneCycle = SU->Cycle + SuccEdge->getLatency(); - SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle); + SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); if (SuccSU->NumPredsLeft == 0) { PendingQueue.push_back(SuccSU); @@ -572,7 +595,8 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DEBUG(SU->dump(this)); Sequence.push_back(SU); - SU->Cycle = CurCycle; + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); // Top down: release successors. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); @@ -603,21 +627,21 @@ void SchedulePostRATDList::ListScheduleTopDown() { while (!AvailableQueue.empty() || !PendingQueue.empty()) { // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. + unsigned MinDepth = ~0u; for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { - if (PendingQueue[i]->CycleBound == CurCycle) { + if (PendingQueue[i]->getDepth() <= CurCycle) { AvailableQueue.push(PendingQueue[i]); PendingQueue[i]->isAvailable = true; PendingQueue[i] = PendingQueue.back(); PendingQueue.pop_back(); --i; --e; - } else { - assert(PendingQueue[i]->CycleBound > CurCycle && "Non-positive latency?"); - } + } else if (PendingQueue[i]->getDepth() < MinDepth) + MinDepth = PendingQueue[i]->getDepth(); } // If there are no instructions available, don't try to issue anything. if (AvailableQueue.empty()) { - ++CurCycle; + CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; continue; } |