diff options
author | Andrew Trick <atrick@apple.com> | 2012-06-05 21:11:27 +0000 |
---|---|---|
committer | Andrew Trick <atrick@apple.com> | 2012-06-05 21:11:27 +0000 |
commit | b7e0289fb320c8440ba5eed121a8b932dbd806a2 (patch) | |
tree | d2b65520c1191a79fa7dbccaf1947a82ede9d1ca /lib/CodeGen | |
parent | 1d72dadddbd3ec9a393dbaadda4c459ab1c4aeb1 (diff) | |
download | llvm-b7e0289fb320c8440ba5eed121a8b932dbd806a2.tar.gz llvm-b7e0289fb320c8440ba5eed121a8b932dbd806a2.tar.bz2 llvm-b7e0289fb320c8440ba5eed121a8b932dbd806a2.tar.xz |
misched: API for minimum vs. expected latency.
Minimum latency determines per-cycle scheduling groups.
Expected latency determines critical path and cost.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158021 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r-- | lib/CodeGen/MachineScheduler.cpp | 112 | ||||
-rw-r--r-- | lib/CodeGen/ScheduleDAGInstrs.cpp | 79 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 6 | ||||
-rw-r--r-- | lib/CodeGen/TwoAddressInstructionPass.cpp | 2 |
4 files changed, 100 insertions, 99 deletions
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index c318d84617..aa591779ba 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -21,8 +21,9 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -394,6 +395,12 @@ public: return RegionCriticalPSets; } + /// getIssueWidth - Return the max instructions per scheduling group. + /// + unsigned getIssueWidth() const { + return InstrItins ? InstrItins->Props.IssueWidth : 1; + } + protected: void initRegPressure(); void updateScheduledPressure(std::vector<unsigned> NewMaxPressure); @@ -787,13 +794,16 @@ class ConvergingScheduler : public MachineSchedStrategy { /// MinReadyCycle - Cycle of the soonest available instruction. unsigned MinReadyCycle; + // Remember the greatest min operand latency. + unsigned MaxMinLatency; + /// Pending queues extend the ready queues with the same ID and the /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): Available(ID, Name+".A"), Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0), - MinReadyCycle(UINT_MAX) {} + MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} ~SchedBoundary() { delete HazardRec; } @@ -805,6 +815,8 @@ class ConvergingScheduler : public MachineSchedStrategy { void bumpCycle(); + void bumpNode(SUnit *SU, unsigned IssueWidth); + void releasePending(); void removeReady(SUnit *SU); @@ -868,25 +880,53 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { } void ConvergingScheduler::releaseTopNode(SUnit *SU) { - Top.releaseNode(SU, SU->getDepth()); + if (SU->isScheduled) + return; + + for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; + unsigned Latency = + DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true); +#ifndef NDEBUG + Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + Latency) + SU->TopReadyCycle = PredReadyCycle + Latency; + } + Top.releaseNode(SU, SU->TopReadyCycle); } void ConvergingScheduler::releaseBottomNode(SUnit *SU) { - Bot.releaseNode(SU, SU->getHeight()); + if (SU->isScheduled) + return; + + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned Latency = + DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true); +#ifndef NDEBUG + Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + Latency) + SU->BotReadyCycle = SuccReadyCycle + Latency; + } + Bot.releaseNode(SU, SU->BotReadyCycle); } void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { - if (SU->isScheduled) - return; - if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; // Check for interlocks first. For the purpose of other heuristics, an // instruction that cannot issue appears as if it's not in the ReadyQueue. - if (HazardRec->isEnabled() - && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) + if (ReadyCycle > CurrCycle + || (HazardRec->isEnabled() && (HazardRec->getHazardType(SU) + != ScheduleHazardRecognizer::NoHazard))) Pending.push(SU); else Available.push(SU); @@ -900,10 +940,11 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() { unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); if (!HazardRec->isEnabled()) { - // Bypass lots of virtual calls in case of long latency. + // Bypass HazardRec virtual calls. CurrCycle = NextCycle; } else { + // Bypass getHazardType calls in case of long latency. for (; CurrCycle != NextCycle; ++CurrCycle) { if (isTop()) HazardRec->AdvanceCycle(); @@ -917,6 +958,26 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() { << CurrCycle << '\n'); } +/// Move the boundary of scheduled code by one SUnit. +void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU, + unsigned IssueWidth) { + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + HazardRec->EmitInstruction(SU); + } + // Check the instruction group size limit. + ++IssueCount; + if (IssueCount == IssueWidth) { + DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + bumpCycle(); + } +} + /// Release pending ready nodes in to the available queue. This makes them /// visible to heuristics. void ConvergingScheduler::SchedBoundary::releasePending() { @@ -928,7 +989,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { // so, add them to the available queue. for (unsigned i = 0, e = Pending.size(); i != e; ++i) { SUnit *SU = *(Pending.begin()+i); - unsigned ReadyCycle = isTop() ? SU->getHeight() : SU->getDepth(); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; @@ -965,7 +1026,8 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { releasePending(); for (unsigned i = 0; Available.empty(); ++i) { - assert(i <= HazardRec->getMaxLookAhead() && "permanent hazard"); (void)i; + assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + "permanent hazard"); (void)i; bumpCycle(); releasePending(); } @@ -1205,27 +1267,15 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { /// Update the scheduler's state after scheduling a node. This is the same node /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update -/// it's state based on the current cycle before MachineSchedStrategy. +/// it's state based on the current cycle before MachineSchedStrategy does. void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { - // Update the reservation table. - if (IsTopNode && Top.HazardRec->isEnabled()) { - Top.HazardRec->EmitInstruction(SU); - if (Top.HazardRec->atIssueLimit()) { - DEBUG(dbgs() << "*** Max instrs at cycle " << Top.CurrCycle << '\n'); - Top.bumpCycle(); - } + if (IsTopNode) { + SU->TopReadyCycle = Top.CurrCycle; + Top.bumpNode(SU, DAG->getIssueWidth()); } - else if (Bot.HazardRec->isEnabled()) { - if (SU->isCall) { - // Calls are scheduled with their preceding instructions. For bottom-up - // scheduling, clear the pipeline state before emitting. - Bot.HazardRec->Reset(); - } - Bot.HazardRec->EmitInstruction(SU); - if (Bot.HazardRec->atIssueLimit()) { - DEBUG(dbgs() << "*** Max instrs at cycle " << Bot.CurrCycle << '\n'); - Bot.bumpCycle(); - } + else { + SU->BotReadyCycle = Bot.CurrCycle; + Bot.bumpNode(SU, DAG->getIssueWidth()); } } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 773a29d0c2..875e012ed1 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -271,10 +271,12 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, // Adjust the dependence latency using operand def/use // information (if any), and then allow the target to // perform its own adjustments. - const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); + SDep dep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { - computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); - ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); + unsigned Latency = computeOperandLatency(SU, UseSU, dep); + dep.setLatency(Latency); + + ST.adjustSchedDependency(SU, UseSU, dep); } UseSU->addPred(dep); } @@ -461,11 +463,13 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Create a data dependence. // // TODO: Handle "special" address latencies cleanly. - const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); + SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg); if (!UnitLatencies) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. - computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); + unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); + dep.setLatency(Latency); + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); } @@ -970,8 +974,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, } void ScheduleDAGInstrs::computeLatency(SUnit *SU) { - // Compute the latency for the node. - if (!InstrItins || InstrItins->isEmpty()) { + // Compute the latency for the node. We only provide a default for missing + // itineraries. Empty itineraries still have latency properties. + if (!InstrItins) { SU->Latency = 1; // Simplistic target-independent heuristic: assume that loads take @@ -983,63 +988,15 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) { } } -void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, - SDep& dep) const { - if (!InstrItins || InstrItins->isEmpty()) - return; - +unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, + const SDep& dep, + bool FindMin) const { // For a data dependency with a known register... if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) - return; + return 1; - const unsigned Reg = dep.getReg(); - - // ... find the definition of the register in the defining - // instruction - MachineInstr *DefMI = Def->getInstr(); - int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); - if (DefIdx != -1) { - const MachineOperand &MO = DefMI->getOperand(DefIdx); - if (MO.isReg() && MO.isImplicit() && - DefIdx >= (int)DefMI->getDesc().getNumOperands()) { - // This is an implicit def, getOperandLatency() won't return the correct - // latency. e.g. - // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> - // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... - // What we want is to compute latency between def of %D6/%D7 and use of - // %Q3 instead. - unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); - if (DefMI->getOperand(Op2).isReg()) - DefIdx = Op2; - } - MachineInstr *UseMI = Use->getInstr(); - // For all uses of the register, calculate the maxmimum latency - int Latency = -1; - if (UseMI) { - for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = UseMI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (MOReg != Reg) - continue; - - int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, - UseMI, i); - Latency = std::max(Latency, UseCycle); - } - } else { - // UseMI is null, then it must be a scheduling barrier. - if (!InstrItins || InstrItins->isEmpty()) - return; - unsigned DefClass = DefMI->getDesc().getSchedClass(); - Latency = InstrItins->getOperandCycle(DefClass, DefIdx); - } - - // If we found a latency, then replace the existing dependence latency. - if (Latency >= 0) - dep.setLatency(Latency); - } + return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(), + Use->getInstr(), dep.getReg(), FindMin); } void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 75940ec33d..5384576a0c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -98,12 +98,6 @@ namespace llvm { /// virtual void computeLatency(SUnit *SU); - /// computeOperandLatency - Override dependence edge latency using - /// operand use/def information - /// - virtual void computeOperandLatency(SUnit *Def, SUnit *Use, - SDep& dep) const { } - virtual void computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 5218aa1f7a..ec2b577230 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1046,7 +1046,7 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, return true; // Below MI unsigned DefDist = DDI->second; assert(Dist > DefDist && "Visited def already?"); - if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist)) + if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist)) return true; } return false; |