summaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp')
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp288
1 files changed, 111 insertions, 177 deletions
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 7afdb734ec..b2e9c15b68 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -102,11 +102,11 @@ static cl::opt<unsigned> AvgIPC(
#ifndef NDEBUG
namespace {
// For sched=list-ilp, Count the number of times each factor comes into play.
- enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
- FactStatic, FactOther, NumFactors };
+ enum { FactPressureDiff, FactRegUses, FactHeight, FactDepth, FactStatic,
+ FactOther, NumFactors };
}
static const char *FactorName[NumFactors] =
-{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
+{"PressureDiff", "RegUses", "Height", "Depth","Static", "Other"};
static int FactorCount[NumFactors];
#endif //!NDEBUG
@@ -463,13 +463,6 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
if (DisableSchedCycles)
return;
- // FIXME: Nodes such as CopyFromReg probably should not advance the current
- // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
- // has predecessors the cycle will be advanced when they are scheduled.
- // But given the crude nature of modeling latency though such nodes, we
- // currently need to treat these nodes like real instructions.
- // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
-
unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
// Bump CurCycle to account for latency. We assume the latency of other
@@ -540,19 +533,16 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
}
}
-static void resetVRegCycle(SUnit *SU);
-
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
- DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
#ifndef NDEBUG
if (CurCycle < SU->getHeight())
- DEBUG(dbgs() << " Height [" << SU->getHeight()
- << "] pipeline stall!\n");
+ DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
#endif
// FIXME: Do not modify node height. It may interfere with
@@ -569,7 +559,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
AvailableQueue->ScheduledNode(SU);
// If HazardRec is disabled, and each inst counts as one cycle, then
- // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+ // advance CurCycle before ReleasePredecessors to avoid useles pushed to
// PendingQueue for schedulers that implement HasReadyFilter.
if (!HazardRec->isEnabled() && AvgIPC < 2)
AdvanceToCycle(CurCycle + 1);
@@ -590,25 +580,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
}
}
- resetVRegCycle(SU);
-
SU->isScheduled = true;
// Conditions under which the scheduler should eagerly advance the cycle:
// (1) No available instructions
// (2) All pipelines full, so available instructions must have hazards.
//
- // If HazardRec is disabled, the cycle was pre-advanced before calling
- // ReleasePredecessors. In that case, IssueCount should remain 0.
+ // If HazardRec is disabled, the cycle was advanced earlier.
//
// Check AvailableQueue after ReleasePredecessors in case of zero latency.
- if (HazardRec->isEnabled() || AvgIPC > 1) {
- if (SU->getNode() && SU->getNode()->isMachineOpcode())
- ++IssueCount;
- if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
- || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
- AdvanceToCycle(CurCycle + 1);
- }
+ ++IssueCount;
+ if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+ || (!HazardRec->isEnabled() && AvgIPC > 1 && IssueCount == AvgIPC)
+ || AvailableQueue->empty())
+ AdvanceToCycle(CurCycle + 1);
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
@@ -1235,7 +1220,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// priority. If it is not ready put it back. Schedule the node.
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
- DEBUG(dbgs() << "Examining Available:\n";
+ DEBUG(dbgs() << "\n*** Examining Available\n";
AvailableQueue->dump(this));
// Pick the best node to schedule taking all constraints into
@@ -1676,6 +1661,17 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() {
CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
}
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+}
+
void RegReductionPQBase::addNode(const SUnit *SU) {
unsigned SUSize = SethiUllmanNumbers.size();
if (SUnits->size() > SUSize)
@@ -2012,29 +2008,7 @@ static unsigned calcMaxScratches(const SUnit *SU) {
return Scratches;
}
-/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
-/// CopyFromReg from a virtual register.
-static bool hasOnlyLiveInOpers(const SUnit *SU) {
- bool RetVal = false;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl()) continue;
- const SUnit *PredSU = I->getSUnit();
- if (PredSU->getNode() &&
- PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
- unsigned Reg =
- cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- RetVal = true;
- continue;
- }
- }
- return false;
- }
- return RetVal;
-}
-
-/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
+/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
/// CopyToReg to a virtual register. This SU def is probably a liveout and
/// it has no other use. It should be scheduled closer to the terminator.
static bool hasOnlyLiveOutUses(const SUnit *SU) {
@@ -2056,71 +2030,62 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
return RetVal;
}
-// Set isVRegCycle for a node with only live in opers and live out uses. Also
-// set isVRegCycle for its CopyFromReg operands.
-//
-// This is only relevant for single-block loops, in which case the VRegCycle
-// node is likely an induction variable in which the operand and target virtual
-// registers should be coalesced (e.g. pre/post increment values). Setting the
-// isVRegCycle flag helps the scheduler prioritize other uses of the same
-// CopyFromReg so that this node becomes the virtual register "kill". This
-// avoids interference between the values live in and out of the block and
-// eliminates a copy inside the loop.
-static void initVRegCycle(SUnit *SU) {
- if (DisableSchedVRegCycle)
- return;
-
- if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
- return;
-
- DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
-
- SU->isVRegCycle = true;
-
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+/// UnitsSharePred - Return true if the two scheduling units share a common
+/// data predecessor.
+static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
+ SmallSet<const SUnit*, 4> Preds;
+ for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
I != E; ++I) {
- if (I->isCtrl()) continue;
- I->getSUnit()->isVRegCycle = true;
+ if (I->isCtrl()) continue; // ignore chain preds
+ Preds.insert(I->getSUnit());
}
-}
-
-// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
-// CopyFromReg operands. We should no longer penalize other uses of this VReg.
-static void resetVRegCycle(SUnit *SU) {
- if (!SU->isVRegCycle)
- return;
-
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
- SUnit *PredSU = I->getSUnit();
- if (PredSU->isVRegCycle) {
- assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
- "VRegCycle def must be CopyFromReg");
- I->getSUnit()->isVRegCycle = 0;
- }
+ if (Preds.count(I->getSUnit()))
+ return true;
}
+ return false;
}
-// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
-// means a node that defines the VRegCycle has not been scheduled yet.
-static bool hasVRegCycleUse(const SUnit *SU) {
- // If this SU also defines the VReg, don't hoist it as a "use".
- if (SU->isVRegCycle)
- return false;
-
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
- I != E; ++I) {
+// Return true if the virtual register defined by VRCycleSU may interfere with
+// VRUseSU.
+//
+// Note: We may consider two SU's that use the same value live into a loop as
+// interferng even though the value is not an induction variable. This is an
+// unfortunate consequence of scheduling on the selection DAG.
+static bool checkVRegCycleInterference(const SUnit *VRCycleSU,
+ const SUnit *VRUseSU) {
+ for (SUnit::const_pred_iterator I = VRCycleSU->Preds.begin(),
+ E = VRCycleSU->Preds.end(); I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
- if (I->getSUnit()->isVRegCycle &&
- I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
- DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
- return true;
+ SDNode *InNode = I->getSUnit()->getNode();
+ if (!InNode || InNode->getOpcode() != ISD::CopyFromReg)
+ continue;
+ for (SUnit::const_pred_iterator II = VRUseSU->Preds.begin(),
+ EE = VRUseSU->Preds.end(); II != EE; ++II) {
+ if (II->getSUnit() == I->getSUnit())
+ return true;
}
}
return false;
}
+// Compare the VRegCycle properties of the nodes.
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if priority is equivalent.
+static int BUCompareVRegCycle(const SUnit *left, const SUnit *right) {
+ if (left->isVRegCycle && !right->isVRegCycle) {
+ if (checkVRegCycleInterference(left, right))
+ return -1;
+ }
+ else if (!left->isVRegCycle && right->isVRegCycle) {
+ if (checkVRegCycleInterference(right, left))
+ return 1;
+ }
+ return 0;
+}
+
// Check for either a dependence (latency) or resource (hazard) stall.
//
// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
@@ -2136,12 +2101,23 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
// Return 0 if latency-based priority is equivalent.
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
RegReductionPQBase *SPQ) {
- // Scheduling an instruction that uses a VReg whose postincrement has not yet
- // been scheduled will induce a copy. Model this as an extra cycle of latency.
- int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
- int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
- int LHeight = (int)left->getHeight() + LPenalty;
- int RHeight = (int)right->getHeight() + RPenalty;
+ // If the two nodes share an operand and one of them has a single
+ // use that is a live out copy, favor the one that is live out. Otherwise
+ // it will be difficult to eliminate the copy if the instruction is a
+ // loop induction variable update. e.g.
+ // BB:
+ // sub r1, r3, #1
+ // str r0, [r2, r3]
+ // mov r3, r1
+ // cmp
+ // bne BB
+ bool SharePred = UnitsSharePred(left, right);
+ // FIXME: Only adjust if BB is a loop back edge.
+ // FIXME: What's the cost of a copy?
+ int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
+ int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
+ int LHeight = (int)left->getHeight() - LBonus;
+ int RHeight = (int)right->getHeight() - RBonus;
bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
BUHasStall(left, LHeight, SPQ);
@@ -2152,47 +2128,36 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
// If scheduling either one of the node will cause a pipeline stall, sort
// them according to their height.
if (LStall) {
- if (!RStall) {
- DEBUG(++FactorCount[FactStall]);
+ if (!RStall)
return 1;
- }
- if (LHeight != RHeight) {
- DEBUG(++FactorCount[FactStall]);
+ if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
- }
- } else if (RStall) {
- DEBUG(++FactorCount[FactStall]);
+ } else if (RStall)
return -1;
- }
// If either node is scheduling for latency, sort them by height/depth
// and latency.
if (!checkPref || (left->SchedulingPref == Sched::Latency ||
right->SchedulingPref == Sched::Latency)) {
if (DisableSchedCycles) {
- if (LHeight != RHeight) {
- DEBUG(++FactorCount[FactHeight]);
+ if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
- }
}
else {
// If neither instruction stalls (!LStall && !RStall) then
// its height is already covered so only its depth matters. We also reach
// this if both stall but have the same height.
- int LDepth = left->getDepth() - LPenalty;
- int RDepth = right->getDepth() - RPenalty;
+ unsigned LDepth = left->getDepth();
+ unsigned RDepth = right->getDepth();
if (LDepth != RDepth) {
- DEBUG(++FactorCount[FactDepth]);
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
<< ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
}
- if (left->Latency != right->Latency) {
- DEBUG(++FactorCount[FactOther]);
+ if (left->Latency != right->Latency)
return left->Latency > right->Latency ? 1 : -1;
- }
}
return 0;
}
@@ -2204,19 +2169,7 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
DEBUG(++FactorCount[FactStatic]);
return LPriority > RPriority;
}
- else if(LPriority == 0) {
- // Schedule zero-latency TokenFactor below any other special
- // nodes. The alternative may be to avoid artificially boosting the
- // TokenFactor's height when it is scheduled, but we currently rely on an
- // instruction's final height to equal the cycle in which it is scheduled,
- // so heights are monotonically increasing.
- unsigned LOpc = left->getNode() ? left->getNode()->getOpcode() : 0;
- unsigned ROpc = right->getNode() ? right->getNode()->getOpcode() : 0;
- if (LOpc == ISD::TokenFactor)
- return false;
- if (ROpc == ISD::TokenFactor)
- return true;
- }
+ DEBUG(++FactorCount[FactOther]);
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
// e.g.
@@ -2237,18 +2190,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
// This creates more short live intervals.
unsigned LDist = closestSucc(left);
unsigned RDist = closestSucc(right);
- if (LDist != RDist) {
- DEBUG(++FactorCount[FactOther]);
+ if (LDist != RDist)
return LDist < RDist;
- }
// How many registers becomes live when the node is scheduled.
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
- if (LScratch != RScratch) {
- DEBUG(++FactorCount[FactOther]);
+ if (LScratch != RScratch)
return LScratch > RScratch;
- }
if (!DisableSchedCycles) {
int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
@@ -2256,20 +2205,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
return result > 0;
}
else {
- if (left->getHeight() != right->getHeight()) {
- DEBUG(++FactorCount[FactHeight]);
+ if (left->getHeight() != right->getHeight())
return left->getHeight() > right->getHeight();
- }
- if (left->getDepth() != right->getDepth()) {
- DEBUG(++FactorCount[FactDepth]);
+ if (left->getDepth() != right->getDepth())
return left->getDepth() < right->getDepth();
- }
}
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
- DEBUG(++FactorCount[FactOther]);
return (left->NodeQueueId > right->NodeQueueId);
}
@@ -2320,22 +2264,24 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
<< right->NodeNum << ")\n");
return true;
}
else if (!LHigh && RHigh) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
<< left->NodeNum << ")\n");
return false;
}
- if (!LHigh && !RHigh) {
- int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
- if (result != 0)
- return result > 0;
+ int result = 0;
+ if (!DisableSchedVRegCycle) {
+ result = BUCompareVRegCycle(left, right);
}
+ if (result == 0 && !LHigh && !RHigh) {
+ result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ }
+ if (result != 0)
+ return result > 0;
return BURRSort(left, right, SPQ);
}
@@ -2401,6 +2347,12 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (RReduce && !LReduce) return true;
}
+ if (!DisableSchedVRegCycle) {
+ int result = BUCompareVRegCycle(left, right);
+ if (result != 0)
+ return result > 0;
+ }
+
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
<< " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
@@ -2439,24 +2391,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
return BURRSort(left, right, SPQ);
}
-void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
- SUnits = &sunits;
- // Add pseudo dependency edges for two-address nodes.
- AddPseudoTwoAddrDeps();
- // Reroute edges to nodes with multiple uses.
- if (!TracksRegPressure)
- PrescheduleNodesWithMultipleUses();
- // Calculate node priorities.
- CalculateSethiUllmanNumbers();
-
- // For single block loops, mark nodes that look like canonical IV increments.
- if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
- for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
- initVRegCycle(&sunits[i]);
- }
- }
-}
-
//===----------------------------------------------------------------------===//
// Preschedule for Register Pressure
//===----------------------------------------------------------------------===//