summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/ScheduleDAG.h21
-rw-r--r--lib/CodeGen/MachineScheduler.cpp93
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp8
3 files changed, 92 insertions, 30 deletions
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 48ed232a15..66bf8c5dd7 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -292,7 +292,8 @@ namespace llvm {
bool isScheduleHigh : 1; // True if preferable to schedule high.
bool isScheduleLow : 1; // True if preferable to schedule low.
bool isCloned : 1; // True if this node has been cloned.
- bool isUnbuffered : 1; // Reads an unbuffered resource.
+ bool isUnbuffered : 1; // Uses an unbuffered resource.
+ bool hasReservedResource : 1; // Uses a reserved resource.
Sched::Preference SchedulingPref; // Scheduling preference.
private:
@@ -318,9 +319,9 @@ namespace llvm {
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
isAvailable(false), isScheduled(false), isScheduleHigh(false),
isScheduleLow(false), isCloned(false), isUnbuffered(false),
- SchedulingPref(Sched::None), isDepthCurrent(false),
- isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0),
- BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
+ hasReservedResource(false), SchedulingPref(Sched::None),
+ isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+ TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
/// SUnit - Construct an SUnit for post-regalloc scheduling to represent
/// a MachineInstr.
@@ -333,9 +334,9 @@ namespace llvm {
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
isAvailable(false), isScheduled(false), isScheduleHigh(false),
isScheduleLow(false), isCloned(false), isUnbuffered(false),
- SchedulingPref(Sched::None), isDepthCurrent(false),
- isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0),
- BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
+ hasReservedResource(false), SchedulingPref(Sched::None),
+ isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+ TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
/// SUnit - Construct a placeholder SUnit.
SUnit()
@@ -347,9 +348,9 @@ namespace llvm {
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
isAvailable(false), isScheduled(false), isScheduleHigh(false),
isScheduleLow(false), isCloned(false), isUnbuffered(false),
- SchedulingPref(Sched::None), isDepthCurrent(false),
- isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0),
- BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
+ hasReservedResource(false), SchedulingPref(Sched::None),
+ isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+ TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
/// \brief Boundary nodes are placeholders for the boundary of the
/// scheduling region.
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 3296149afa..6cfedcbbc2 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1322,6 +1322,8 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) {
// GenericScheduler - Implementation of the generic MachineSchedStrategy.
//===----------------------------------------------------------------------===//
+static const unsigned InvalidCycle = ~0U;
+
namespace {
/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
/// the schedule.
@@ -1491,6 +1493,10 @@ public:
// Is the scheduled region resource limited vs. latency limited.
bool IsResourceLimited;
+ // Record the highest cycle at which each resource has been reserved by a
+ // scheduled instruction.
+ SmallVector<unsigned, 16> ReservedCycles;
+
#ifndef NDEBUG
// Remember the greatest operand latency as an upper bound on the number of
// times we should retry the pending queue because of a hazard.
@@ -1518,6 +1524,7 @@ public:
MaxExecutedResCount = 0;
ZoneCritResIdx = 0;
IsResourceLimited = false;
+ ReservedCycles.clear();
#ifndef NDEBUG
MaxObservedLatency = 0;
#endif
@@ -1587,6 +1594,8 @@ public:
/// cycle.
unsigned getLatencyStallCycles(SUnit *SU);
+ unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
+
bool checkHazard(SUnit *SU);
unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
@@ -1708,8 +1717,10 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
DAG = dag;
SchedModel = smodel;
Rem = rem;
- if (SchedModel->hasInstrSchedModel())
+ if (SchedModel->hasInstrSchedModel()) {
ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
+ ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
+ }
}
/// Initialize the per-region scheduling policy.
@@ -1890,6 +1901,20 @@ unsigned GenericScheduler::SchedBoundary::getLatencyStallCycles(SUnit *SU) {
return 0;
}
+/// Compute the next cycle at which the given processor resource can be
+/// scheduled.
+unsigned GenericScheduler::SchedBoundary::
+getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
+ unsigned NextUnreserved = ReservedCycles[PIdx];
+ // If this resource has never been used, always return cycle zero.
+ if (NextUnreserved == InvalidCycle)
+ return 0;
+ // For bottom-up scheduling add the cycles needed for the current operation.
+ if (!isTop())
+ NextUnreserved += Cycles;
+ return NextUnreserved;
+}
+
/// Does this SU have a hazard within the current instruction group.
///
/// The scheduler supports two modes of hazard recognition. The first is the
@@ -1913,6 +1938,15 @@ bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) {
<< SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
return true;
}
+ if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
+ return true;
+ }
+ }
return false;
}
@@ -2097,7 +2131,7 @@ void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
/// \return the next cycle at which the instruction may execute without
/// oversubscribing resources.
unsigned GenericScheduler::SchedBoundary::
-countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
+countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
unsigned Factor = SchedModel->getResourceFactor(PIdx);
unsigned Count = Factor * Cycles;
DEBUG(dbgs() << " " << getResourceName(PIdx)
@@ -2116,8 +2150,14 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
<< getResourceName(PIdx) << ": "
<< getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
}
- // TODO: We don't yet model reserved resources. It's not hard though.
- return CurrCycle;
+ // For reserved resources, record the highest cycle using the resource.
+ unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
+ if (NextAvailable > CurrCycle) {
+ DEBUG(dbgs() << " Resource conflict: "
+ << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
+ << NextAvailable << "\n");
+ }
+ return NextAvailable;
}
/// Move the boundary of scheduled code by one SUnit.
@@ -2131,25 +2171,17 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
}
HazardRec->EmitInstruction(SU);
}
+ // checkHazard should prevent scheduling multiple instructions per cycle that
+ // exceed the issue width.
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
- CurrMOps += IncMOps;
- // checkHazard prevents scheduling multiple instructions per cycle that exceed
- // issue width. However, we commonly reach the maximum. In this case
- // opportunistically bump the cycle to avoid uselessly checking everything in
- // the readyQ. Furthermore, a single instruction may produce more than one
- // cycle's worth of micro-ops.
- //
- // TODO: Also check if this SU must end a dispatch group.
- unsigned NextCycle = CurrCycle;
- if (CurrMOps >= SchedModel->getIssueWidth()) {
- ++NextCycle;
- DEBUG(dbgs() << " *** Max MOps " << CurrMOps
- << " at cycle " << CurrCycle << '\n');
- }
+ assert(CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth() &&
+ "Cannot scheduling this instructions MicroOps in the current cycle.");
+
unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
+ unsigned NextCycle = CurrCycle;
switch (SchedModel->getMicroOpBufferSize()) {
case 0:
assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
@@ -2194,10 +2226,23 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned RCycle =
- countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
+ countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
if (RCycle > NextCycle)
NextCycle = RCycle;
}
+ if (SU->hasReservedResource) {
+ // For reserved resources, record the highest cycle using the resource.
+ // For top-down scheduling, this is the cycle in which we schedule this
+ // instruction plus the number of cycles the operations reserves the
+ // resource. For bottom-up is it simply the instruction's cycle.
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ unsigned PIdx = PI->ProcResourceIdx;
+ if (SchedModel->getProcResource(PIdx)->BufferSize == 0)
+ ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
+ }
+ }
}
// Update ExpectedLatency and DependentLatency.
unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
@@ -2224,6 +2269,16 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
(int)(getCriticalCount() - (getScheduledLatency() * LFactor))
> (int)LFactor;
}
+ // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
+ // resets CurrMOps. Loop to handle instructions with more MOps than issue in
+ // one cycle. Since we commonly reach the max MOps here, opportunistically
+ // bump the cycle to avoid uselessly checking everything in the readyQ.
+ CurrMOps += IncMOps;
+ while (CurrMOps >= SchedModel->getIssueWidth()) {
+ bumpCycle(++NextCycle);
+ DEBUG(dbgs() << " *** Max MOps " << CurrMOps
+ << " at cycle " << CurrCycle << '\n');
+ }
DEBUG(dumpScheduledState());
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index eeae6ec03d..977b8f0b41 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -697,9 +697,15 @@ void ScheduleDAGInstrs::initSUnits() {
for (TargetSchedModel::ProcResIter
PI = SchedModel.getWriteProcResBegin(SC),
PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
- if (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize == 1) {
+ switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
+ case 0:
+ SU->hasReservedResource = true;
+ break;
+ case 1:
SU->isUnbuffered = true;
break;
+ default:
+ break;
}
}
}