summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Trick <atrick@apple.com>2013-06-15 05:39:19 +0000
committerAndrew Trick <atrick@apple.com>2013-06-15 05:39:19 +0000
commitfa989e7d432d170b3f92cbd87e2610b03babaea7 (patch)
tree2b74b777ca581bf894b10049f97aa8eb9b96292e
parenta3d82ce19fd825cbf3bf85b5969424217fc40b45 (diff)
downloadllvm-fa989e7d432d170b3f92cbd87e2610b03babaea7.tar.gz
llvm-fa989e7d432d170b3f92cbd87e2610b03babaea7.tar.bz2
llvm-fa989e7d432d170b3f92cbd87e2610b03babaea7.tar.xz
MI-Sched: heuristics using the new latency and machine model.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184038 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/MachineScheduler.cpp564
1 files changed, 330 insertions, 234 deletions
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index c87a1be720..3f68caf228 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1097,7 +1097,7 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) {
}
//===----------------------------------------------------------------------===//
-// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+// ConvergingScheduler - Implementation of the generic MachineSchedStrategy.
//===----------------------------------------------------------------------===//
namespace {
@@ -1185,32 +1185,21 @@ public:
// Critical path through the DAG in expected latency.
unsigned CriticalPath;
+ // Scaled count of micro-ops left to schedule.
+ unsigned RemIssueCount;
+
// Unscheduled resources
SmallVector<unsigned, 16> RemainingCounts;
- // Critical resource for the unscheduled zone.
- unsigned CritResIdx;
- // Number of micro-ops left to schedule.
- unsigned RemainingMicroOps;
void reset() {
CriticalPath = 0;
+ RemIssueCount = 0;
RemainingCounts.clear();
- CritResIdx = 0;
- RemainingMicroOps = 0;
}
SchedRemainder() { reset(); }
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
-
- unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
- if (!SchedModel->hasInstrSchedModel())
- return 0;
-
- return std::max(
- RemainingMicroOps * SchedModel->getMicroOpFactor(),
- RemainingCounts[CritResIdx]);
- }
};
/// Each Scheduling boundary is associated with ready queues. It tracks the
@@ -1231,7 +1220,12 @@ public:
ScheduleHazardRecognizer *HazardRec;
+ /// Number of cycles it takes to issue the instructions scheduled in this
+ /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
+ /// See getStalls().
unsigned CurrCycle;
+
+ /// Micro-ops issued in the current cycle
unsigned CurrMOps;
/// MinReadyCycle - Cycle of the soonest available instruction.
@@ -1241,21 +1235,30 @@ public:
unsigned ExpectedLatency;
// The latency of dependence chains leading into this zone.
- // For each node scheduled: DLat = max DLat, N.Depth.
+ // For each node scheduled top-down: DLat = max DLat, N.Depth.
// For each cycle scheduled: DLat -= 1.
unsigned DependentLatency;
- // Resources used in the scheduled zone beyond this boundary.
- SmallVector<unsigned, 16> ResourceCounts;
+ /// Count the scheduled (issued) micro-ops that can be retired by
+ /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
+ unsigned RetiredMOps;
+
+ // Count scheduled resources that have been executed. Resources are
+ // considered executed if they become ready in the time that it takes to
+ // saturate any resource including the one in question. Counts are scaled
+ // for direct comparison with other resources. Counts ca be compared with
+ // MOps * getMicroOpFactor and Latency * getLatencyFactor.
+ SmallVector<unsigned, 16> ExecutedResCounts;
+
+ /// Cache the max count for a single resource.
+ unsigned MaxExecutedResCount;
// Cache the critical resources ID in this scheduled zone.
- unsigned CritResIdx;
+ unsigned ZoneCritResIdx;
// Is the scheduled region resource limited vs. latency limited.
bool IsResourceLimited;
- unsigned ExpectedCount;
-
#ifndef NDEBUG
// Remember the greatest operand latency as an upper bound on the number of
// times we should retry the pending queue because of a hazard.
@@ -1276,16 +1279,16 @@ public:
MinReadyCycle = UINT_MAX;
ExpectedLatency = 0;
DependentLatency = 0;
- ResourceCounts.resize(1);
- assert(!ResourceCounts[0] && "nonzero count for bad resource");
- CritResIdx = 0;
+ RetiredMOps = 0;
+ MaxExecutedResCount = 0;
+ ZoneCritResIdx = 0;
IsResourceLimited = false;
- ExpectedCount = 0;
#ifndef NDEBUG
MaxObservedLatency = 0;
#endif
// Reserve a zero-count for invalid CritResIdx.
- ResourceCounts.resize(1);
+ ExecutedResCounts.resize(1);
+ assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
}
/// Pending queues extend the ready queues with the same ID and the
@@ -1306,25 +1309,58 @@ public:
return Available.getID() == ConvergingScheduler::TopQID;
}
+ const char *getResourceName(unsigned PIdx) {
+ if (!PIdx)
+ return "MOps";
+ return SchedModel->getProcResource(PIdx)->Name;
+ }
+
+ /// Get the number of latency cycles "covered" by the scheduled
+ /// instructions. This is the larger of the critical path within the zone
+ /// and the number of cycles required to issue the instructions.
+ unsigned getScheduledLatency() const {
+ return std::max(ExpectedLatency, CurrCycle);
+ }
+
unsigned getUnscheduledLatency(SUnit *SU) const {
- if (isTop())
- return SU->getHeight();
- return SU->getDepth() + SU->Latency;
+ return isTop() ? SU->getHeight() : SU->getDepth();
+ }
+
+ unsigned getResourceCount(unsigned ResIdx) const {
+ return ExecutedResCounts[ResIdx];
}
+ /// Get the scaled count of scheduled micro-ops and resources, including
+ /// executed resources.
unsigned getCriticalCount() const {
- return ResourceCounts[CritResIdx];
+ if (!ZoneCritResIdx)
+ return RetiredMOps * SchedModel->getMicroOpFactor();
+ return getResourceCount(ZoneCritResIdx);
+ }
+
+ /// Get a scaled count for the minimum execution time of the scheduled
+ /// micro-ops that are ready to execute by getExecutedCount. Notice the
+ /// feedback loop.
+ unsigned getExecutedCount() const {
+ return std::max(CurrCycle * SchedModel->getLatencyFactor(),
+ MaxExecutedResCount);
}
bool checkHazard(SUnit *SU);
- void setLatencyPolicy(CandPolicy &Policy);
+ unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
+
+ unsigned getOtherResourceCount(unsigned &OtherCritIdx);
+
+ void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone);
void releaseNode(SUnit *SU, unsigned ReadyCycle);
- void bumpCycle();
+ void bumpCycle(unsigned NextCycle);
- void countResource(unsigned PIdx, unsigned Cycles);
+ void incExecutedResources(unsigned PIdx, unsigned Count);
+
+ unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
void bumpNode(SUnit *SU);
@@ -1333,6 +1369,8 @@ public:
void removeReady(SUnit *SU);
SUnit *pickOnlyChoice();
+
+ void dumpScheduledState();
};
private:
@@ -1369,15 +1407,6 @@ public:
virtual void registerRoots();
protected:
- void balanceZones(
- ConvergingScheduler::SchedBoundary &CriticalZone,
- ConvergingScheduler::SchedCandidate &CriticalCand,
- ConvergingScheduler::SchedBoundary &OppositeZone,
- ConvergingScheduler::SchedCandidate &OppositeCand);
-
- void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
- ConvergingScheduler::SchedCandidate &BotCand);
-
void tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary &Zone,
@@ -1407,7 +1436,8 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
for (std::vector<SUnit>::iterator
I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
- RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
+ RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
+ * SchedModel->getMicroOpFactor();
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
@@ -1416,17 +1446,6 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
RemainingCounts[PIdx] += (Factor * PI->Cycles);
}
}
- for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
- PIdx != PEnd; ++PIdx) {
- if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
- >= (int)SchedModel->getLatencyFactor()) {
- CritResIdx = PIdx;
- }
- }
- DEBUG(dbgs() << "Critical Resource: "
- << SchedModel->getProcResource(CritResIdx)->Name
- << ": " << RemainingCounts[CritResIdx]
- << " / " << SchedModel->getLatencyFactor() << '\n');
}
void ConvergingScheduler::SchedBoundary::
@@ -1436,7 +1455,7 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
SchedModel = smodel;
Rem = rem;
if (SchedModel->hasInstrSchedModel())
- ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
+ ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
}
void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
@@ -1538,50 +1557,125 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
return false;
}
-/// Compute the remaining latency to determine whether ILP should be increased.
-void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
- DEBUG(dbgs() << " " << Available.getName()
- << " DependentLatency " << DependentLatency << '\n');
-
- // FIXME: compile time. In all, we visit four queues here one we should only
- // need to visit the one that was last popped if we cache the result.
- unsigned RemLatency = DependentLatency;
- for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
+// Find the unscheduled node in ReadySUs with the highest latency.
+unsigned ConvergingScheduler::SchedBoundary::
+findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
+ SUnit *LateSU = 0;
+ unsigned RemLatency = 0;
+ for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
I != E; ++I) {
unsigned L = getUnscheduledLatency(*I);
if (L > RemLatency) {
- DEBUG(dbgs() << " " << Available.getName()
- << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n');
RemLatency = L;
+ LateSU = *I;
}
}
- for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
- I != E; ++I) {
- unsigned L = getUnscheduledLatency(*I);
- if (L > RemLatency)
- RemLatency = L;
+ if (LateSU) {
+ DEBUG(dbgs() << Available.getName() << " RemLatency SU("
+ << LateSU->NodeNum << ") " << RemLatency << "c\n");
}
- unsigned CriticalPathLimit = Rem->CriticalPath;
- DEBUG(dbgs() << " " << Available.getName()
- << " ExpectedLatency " << ExpectedLatency
- << " CP Limit " << CriticalPathLimit << '\n');
+ return RemLatency;
+}
+
+// Count resources in this zone and the remaining unscheduled
+// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
+// resource index, or zero if the zone is issue limited.
+unsigned ConvergingScheduler::SchedBoundary::
+getOtherResourceCount(unsigned &OtherCritIdx) {
+ if (!SchedModel->hasInstrSchedModel())
+ return 0;
+
+ unsigned OtherCritCount = Rem->RemIssueCount
+ + (RetiredMOps * SchedModel->getMicroOpFactor());
+ DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
+ << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
+ OtherCritIdx = 0;
+ for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
+ PIdx != PEnd; ++PIdx) {
+ unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
+ if (OtherCount > OtherCritCount) {
+ OtherCritCount = OtherCount;
+ OtherCritIdx = PIdx;
+ }
+ }
+ if (OtherCritIdx) {
+ DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
+ << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
+ << " " << getResourceName(OtherCritIdx) << "\n");
+ }
+ return OtherCritCount;
+}
+
+/// Set the CandPolicy for this zone given the current resources and latencies
+/// inside and outside the zone.
+void ConvergingScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
+ SchedBoundary &OtherZone) {
+ // Now that potential stalls have been considered, apply preemptive heuristics
+ // based on the the total latency and resources inside and outside this
+ // zone.
+
+ // Compute remaining latency. We need this both to determine whether the
+ // overall schedule has become latency-limited and whether the instructions
+ // outside this zone are resource or latency limited.
+ //
+ // The "dependent" latency is updated incrementally during scheduling as the
+ // max height/depth of scheduled nodes minus the cycles since it was
+ // scheduled:
+ // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
+ //
+ // The "independent" latency is the max ready queue depth:
+ // ILat = max N.depth for N in Available|Pending
+ //
+ // RemainingLatency is the greater of independent and dependent latency.
+ unsigned RemLatency = DependentLatency;
+ RemLatency = std::max(RemLatency, findMaxLatency(Available.elements()));
+ RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements()));
- if (RemLatency + std::max(ExpectedLatency, CurrCycle) >= CriticalPathLimit
- && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
- Policy.ReduceLatency = true;
- DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n');
+ // Compute the critical resource outside the zone.
+ unsigned OtherCritIdx;
+ unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx);
+
+ bool OtherResLimited = false;
+ if (SchedModel->hasInstrSchedModel()) {
+ unsigned LFactor = SchedModel->getLatencyFactor();
+ OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
}
+ if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) {
+ Policy.ReduceLatency |= true;
+ DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency "
+ << RemLatency << " + " << CurrCycle << "c > CritPath "
+ << Rem->CriticalPath << "\n");
+ }
+ // If the same resource is limiting inside and outside the zone, do nothing.
+ if (IsResourceLimited && OtherResLimited && (ZoneCritResIdx == OtherCritIdx))
+ return;
+
+ DEBUG(
+ if (IsResourceLimited) {
+ dbgs() << " " << Available.getName() << " ResourceLimited: "
+ << getResourceName(ZoneCritResIdx) << "\n";
+ }
+ if (OtherResLimited)
+ dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx);
+ if (!IsResourceLimited && !OtherResLimited)
+ dbgs() << " Latency limited both directions.\n");
+
+ if (IsResourceLimited && !Policy.ReduceResIdx)
+ Policy.ReduceResIdx = ZoneCritResIdx;
+
+ if (OtherResLimited)
+ Policy.DemandResIdx = OtherCritIdx;
}
void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
unsigned ReadyCycle) {
-
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
// Check for interlocks first. For the purpose of other heuristics, an
// instruction that cannot issue appears as if it's not in the ReadyQueue.
- if (ReadyCycle > CurrCycle || checkHazard(SU))
+ bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
+ if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
Pending.push(SU);
else
Available.push(SU);
@@ -1591,16 +1685,17 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
}
/// Move the boundary of scheduled code by one cycle.
-void ConvergingScheduler::SchedBoundary::bumpCycle() {
- unsigned Width = SchedModel->getIssueWidth();
- CurrMOps = (CurrMOps <= Width) ? 0 : CurrMOps - Width;
-
- unsigned NextCycle = CurrCycle + 1;
- assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
- if (MinReadyCycle > NextCycle) {
- CurrMOps = 0;
- NextCycle = MinReadyCycle;
+void ConvergingScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
+ if (SchedModel->getMicroOpBufferSize() == 0) {
+ assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ if (MinReadyCycle > NextCycle)
+ NextCycle = MinReadyCycle;
}
+ // Update the current micro-ops, which will issue in the next cycle.
+ unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
+ CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
+
+ // Decrement DependentLatency based on the next cycle.
if ((NextCycle - CurrCycle) > DependentLatency)
DependentLatency = 0;
else
@@ -1620,34 +1715,52 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
}
}
CheckPending = true;
- IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
+ unsigned LFactor = SchedModel->getLatencyFactor();
+ IsResourceLimited =
+ (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
+ > (int)LFactor;
- DEBUG(dbgs() << " " << Available.getName()
- << " Cycle: " << CurrCycle << '\n');
+ DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
+}
+
+void ConvergingScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
+ unsigned Count) {
+ ExecutedResCounts[PIdx] += Count;
+ if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
+ MaxExecutedResCount = ExecutedResCounts[PIdx];
}
/// Add the given processor resource to this scheduled zone.
-void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
- unsigned Cycles) {
+///
+/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
+/// during which this resource is consumed.
+///
+/// \return the next cycle at which the instruction may execute without
+/// oversubscribing resources.
+unsigned ConvergingScheduler::SchedBoundary::
+countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
unsigned Factor = SchedModel->getResourceFactor(PIdx);
- DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name
- << " +(" << Cycles << "x" << Factor
- << ") / " << SchedModel->getLatencyFactor() << '\n');
-
unsigned Count = Factor * Cycles;
- ResourceCounts[PIdx] += Count;
+ DEBUG(dbgs() << " " << getResourceName(PIdx)
+ << " +" << Cycles << "x" << Factor << "u\n");
+
+ // Update Executed resources counts.
+ incExecutedResources(PIdx, Count);
assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
Rem->RemainingCounts[PIdx] -= Count;
// Check if this resource exceeds the current critical resource by a full
// cycle. If so, it becomes the critical resource.
- if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
- >= (int)SchedModel->getLatencyFactor()) {
- CritResIdx = PIdx;
+ if (ZoneCritResIdx != PIdx
+ && ((int)(getResourceCount(PIdx) - getCriticalCount())
+ >= (int)SchedModel->getLatencyFactor())) {
+ ZoneCritResIdx = PIdx;
DEBUG(dbgs() << " *** Critical resource "
- << SchedModel->getProcResource(PIdx)->Name << " x"
- << ResourceCounts[PIdx] << '\n');
+ << getResourceName(PIdx) << ": "
+ << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
}
+ // TODO: We don't yet model reserved resources. It's not hard though.
+ return CurrCycle;
}
/// Move the boundary of scheduled code by one SUnit.
@@ -1661,38 +1774,96 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
}
HazardRec->EmitInstruction(SU);
}
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
+ CurrMOps += IncMOps;
+ // checkHazard prevents scheduling multiple instructions per cycle that exceed
+ // issue width. However, we commonly reach the maximum. In this case
+ // opportunistically bump the cycle to avoid uselessly checking everything in
+ // the readyQ. Furthermore, a single instruction may produce more than one
+ // cycle's worth of micro-ops.
+ //
+ // TODO: Also check if this SU must end a dispatch group.
+ unsigned NextCycle = CurrCycle;
+ if (CurrMOps >= SchedModel->getIssueWidth()) {
+ ++NextCycle;
+ DEBUG(dbgs() << " *** Max MOps " << CurrMOps
+ << " at cycle " << CurrCycle << '\n');
+ }
+ unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
+ DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
+
+ switch (SchedModel->getMicroOpBufferSize()) {
+ case 0:
+ assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
+ break;
+ case 1:
+ if (ReadyCycle > NextCycle) {
+ NextCycle = ReadyCycle;
+ DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
+ }
+ break;
+ default:
+ // We don't currently model the OOO reorder buffer, so consider all
+ // scheduled MOps to be "retired".
+ break;
+ }
+ RetiredMOps += IncMOps;
+
// Update resource counts and critical resource.
if (SchedModel->hasInstrSchedModel()) {
- const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
- Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
+ unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
+ assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
+ Rem->RemIssueCount -= DecRemIssue;
+ if (ZoneCritResIdx) {
+ // Scale scheduled micro-ops for comparing with the critical resource.
+ unsigned ScaledMOps =
+ RetiredMOps * SchedModel->getMicroOpFactor();
+
+ // If scaled micro-ops are now more than the previous critical resource by
+ // a full cycle, then micro-ops issue becomes critical.
+ if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
+ >= (int)SchedModel->getLatencyFactor()) {
+ ZoneCritResIdx = 0;
+ DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
+ << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
+ }
+ }
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
- countResource(PI->ProcResourceIdx, PI->Cycles);
+ unsigned RCycle =
+ countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
+ if (RCycle > NextCycle)
+ NextCycle = RCycle;
}
}
+ // Update ExpectedLatency and DependentLatency.
unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
- if (SU->getDepth() > TopLatency)
+ if (SU->getDepth() > TopLatency) {
TopLatency = SU->getDepth();
- if (SU->getHeight() > BotLatency)
+ DEBUG(dbgs() << " " << Available.getName()
+ << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
+ }
+ if (SU->getHeight() > BotLatency) {
BotLatency = SU->getHeight();
-
- IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
-
- // Check the instruction group dispatch limit.
- // TODO: Check if this SU must end a dispatch group.
- CurrMOps += SchedModel->getNumMicroOps(SU->getInstr());
-
- // checkHazard prevents scheduling multiple instructions per cycle that exceed
- // issue width. However, we commonly reach the maximum. In this case
- // opportunistically bump the cycle to avoid uselessly checking everything in
- // the readyQ. Furthermore, a single instruction may produce more than one
- // cycle's worth of micro-ops.
- if (CurrMOps >= SchedModel->getIssueWidth()) {
- DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n');
- bumpCycle();
+ DEBUG(dbgs() << " " << Available.getName()
+ << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
}
+ // If we stall for any reason, bump the cycle.
+ if (NextCycle > CurrCycle) {
+ bumpCycle(NextCycle);
+ }
+ else {
+ // After updating ZoneCritResIdx and ExpectedLatency, check if we're
+ // resource limited. If a stall occured, bumpCycle does this.
+ unsigned LFactor = SchedModel->getLatencyFactor();
+ IsResourceLimited =
+ (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
+ > (int)LFactor;
+ }
+ DEBUG(dumpScheduledState());
}
/// Release pending ready nodes in to the available queue. This makes them
@@ -1704,6 +1875,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
+ bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
SUnit *SU = *(Pending.begin()+i);
unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
@@ -1711,7 +1883,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
- if (ReadyCycle > CurrCycle)
+ if (!IsBuffered && ReadyCycle > CurrCycle)
continue;
if (checkHazard(SU))
@@ -1756,7 +1928,7 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
for (unsigned i = 0; Available.empty(); ++i) {
assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
"permanent hazard"); (void)i;
- bumpCycle();
+ bumpCycle(CurrCycle + 1);
releasePending();
}
if (Available.size() == 1)
@@ -1764,103 +1936,28 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
return NULL;
}
-/// Record the candidate policy for opposite zones with different critical
-/// resources.
-///
-/// If the CriticalZone is latency limited, don't force a policy for the
-/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
-void ConvergingScheduler::balanceZones(
- ConvergingScheduler::SchedBoundary &CriticalZone,
- ConvergingScheduler::SchedCandidate &CriticalCand,
- ConvergingScheduler::SchedBoundary &OppositeZone,
- ConvergingScheduler::SchedCandidate &OppositeCand) {
-
- if (!CriticalZone.IsResourceLimited)
- return;
- assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
-
- SchedRemainder *Rem = CriticalZone.Rem;
-
- // If the critical zone is overconsuming a resource relative to the
- // remainder, try to reduce it.
- unsigned RemainingCritCount =
- Rem->RemainingCounts[CriticalZone.CritResIdx];
- if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
- > (int)SchedModel->getLatencyFactor()) {
- CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
- DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName()
- << " reduce "
- << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
- << '\n');
- }
- // If the other zone is underconsuming a resource relative to the full zone,
- // try to increase it.
- unsigned OppositeCount =
- OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
- if ((int)(OppositeZone.ExpectedCount - OppositeCount)
- > (int)SchedModel->getLatencyFactor()) {
- OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
- DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName()
- << " demand "
- << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
- << '\n');
+// This is useful information to dump after bumpNode.
+// Note that the Queue contents are more useful before pickNodeFromQueue.
+void ConvergingScheduler::SchedBoundary::dumpScheduledState() {
+ unsigned ResFactor;
+ unsigned ResCount;
+ if (ZoneCritResIdx) {
+ ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
+ ResCount = getResourceCount(ZoneCritResIdx);
}
-}
-
-/// Determine if the scheduled zones exceed resource limits or critical path and
-/// set each candidate's ReduceHeight policy accordingly.
-void ConvergingScheduler::checkResourceLimits(
- ConvergingScheduler::SchedCandidate &TopCand,
- ConvergingScheduler::SchedCandidate &BotCand) {
-
- // Set ReduceLatency to true if needed.
- Bot.setLatencyPolicy(BotCand.Policy);
- Top.setLatencyPolicy(TopCand.Policy);
-
- // Handle resource-limited regions.
- if (Top.IsResourceLimited && Bot.IsResourceLimited
- && Top.CritResIdx == Bot.CritResIdx) {
- // If the scheduled critical resource in both zones is no longer the
- // critical remaining resource, attempt to reduce resource height both ways.
- if (Top.CritResIdx != Rem.CritResIdx) {
- TopCand.Policy.ReduceResIdx = Top.CritResIdx;
- BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
- DEBUG(dbgs() << " Reduce scheduled "
- << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
- }
- return;
- }
- // Handle latency-limited regions.
- if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
- // If the total scheduled expected latency exceeds the region's critical
- // path then reduce latency both ways.
- //
- // Just because a zone is not resource limited does not mean it is latency
- // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
- // to exceed expected latency.
- if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
- && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
- TopCand.Policy.ReduceLatency = true;
- BotCand.Policy.ReduceLatency = true;
- DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency
- << " + " << Bot.ExpectedLatency << '\n');
- }
- return;
+ else {
+ ResFactor = SchedModel->getMicroOpFactor();
+ ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
}
- // The critical resource is different in each zone, so request balancing.
-
- // Compute the cost of each zone.
- Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
- Top.ExpectedCount = std::max(
- Top.getCriticalCount(),
- Top.ExpectedCount * SchedModel->getLatencyFactor());
- Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
- Bot.ExpectedCount = std::max(
- Bot.getCriticalCount(),
- Bot.ExpectedCount * SchedModel->getLatencyFactor());
-
- balanceZones(Top, TopCand, Bot, BotCand);
- balanceZones(Bot, BotCand, Top, TopCand);
+ unsigned LFactor = SchedModel->getLatencyFactor();
+ dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
+ << " Retired: " << RetiredMOps;
+ dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
+ dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
+ << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx)
+ << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
+ << (IsResourceLimited ? " - Resource" : " - Latency")
+ << " limited.\n";
}
void ConvergingScheduler::SchedCandidate::
@@ -2030,8 +2127,7 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
// Avoid serializing long latency dependence chains.
if (Cand.Policy.ReduceLatency) {
if (Zone.isTop()) {
- if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
- > Zone.ExpectedCount) {
+ if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
TryCand, Cand, TopDepthReduce))
return;
@@ -2041,8 +2137,7 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
return;
}
else {
- if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
- > Zone.ExpectedCount) {
+ if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
TryCand, Cand, BotHeightReduce))
return;
@@ -2061,8 +2156,8 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
Cand.Reason = MultiPressure;
// Prefer immediate defs/users of the last scheduled instruction. This is a
- // nice pressure avoidance strategy that also conserves the processor's
- // register renaming resources and keeps the machine code readable.
+ // local pressure avoidance strategy that also makes the machine code
+ // readable.
if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
TryCand, Cand, NextDefUse))
return;
@@ -2224,18 +2319,19 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
IsTopNode = false;
- DEBUG(dbgs() << "Pick Top NOCAND\n");
+ DEBUG(dbgs() << "Pick Bot NOCAND\n");
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
IsTopNode = true;
- DEBUG(dbgs() << "Pick Bot NOCAND\n");
+ DEBUG(dbgs() << "Pick Top NOCAND\n");
return SU;
}
CandPolicy NoPolicy;
SchedCandidate BotCand(NoPolicy);
SchedCandidate TopCand(NoPolicy);
- checkResourceLimits(TopCand, BotCand);
+ Bot.setPolicy(BotCand.Policy, Top);
+ Top.setPolicy(TopCand.Policy, Bot);
// Prefer bottom scheduling when heuristics are silent.
pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
@@ -2364,13 +2460,13 @@ void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
/// them here. See comments in biasPhysRegCopy.
void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
- SU->TopReadyCycle = Top.CurrCycle;
+ SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle);
Top.bumpNode(SU);
if (SU->hasPhysRegUses)
reschedulePhysRegCopies(SU, true);
}
else {
- SU->BotReadyCycle = Bot.CurrCycle;
+ SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle);
Bot.bumpNode(SU);
if (SU->hasPhysRegDefs)
reschedulePhysRegCopies(SU, false);