summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorAndrew Trick <atrick@apple.com>2012-06-05 21:11:27 +0000
committerAndrew Trick <atrick@apple.com>2012-06-05 21:11:27 +0000
commitb7e0289fb320c8440ba5eed121a8b932dbd806a2 (patch)
treed2b65520c1191a79fa7dbccaf1947a82ede9d1ca /lib/CodeGen
parent1d72dadddbd3ec9a393dbaadda4c459ab1c4aeb1 (diff)
downloadllvm-b7e0289fb320c8440ba5eed121a8b932dbd806a2.tar.gz
llvm-b7e0289fb320c8440ba5eed121a8b932dbd806a2.tar.bz2
llvm-b7e0289fb320c8440ba5eed121a8b932dbd806a2.tar.xz
misched: API for minimum vs. expected latency.
Minimum latency determines per-cycle scheduling groups. Expected latency determines critical path and cost. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158021 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/MachineScheduler.cpp112
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp79
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h6
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp2
4 files changed, 100 insertions, 99 deletions
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index c318d84617..aa591779ba 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -21,8 +21,9 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -394,6 +395,12 @@ public:
return RegionCriticalPSets;
}
+ /// getIssueWidth - Return the max instructions per scheduling group.
+ ///
+ unsigned getIssueWidth() const {
+ return InstrItins ? InstrItins->Props.IssueWidth : 1;
+ }
+
protected:
void initRegPressure();
void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
@@ -787,13 +794,16 @@ class ConvergingScheduler : public MachineSchedStrategy {
/// MinReadyCycle - Cycle of the soonest available instruction.
unsigned MinReadyCycle;
+ // Remember the greatest min operand latency.
+ unsigned MaxMinLatency;
+
/// Pending queues extend the ready queues with the same ID and the
/// PendingFlag set.
SchedBoundary(unsigned ID, const Twine &Name):
Available(ID, Name+".A"),
Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
- MinReadyCycle(UINT_MAX) {}
+ MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
~SchedBoundary() { delete HazardRec; }
@@ -805,6 +815,8 @@ class ConvergingScheduler : public MachineSchedStrategy {
void bumpCycle();
+ void bumpNode(SUnit *SU, unsigned IssueWidth);
+
void releasePending();
void removeReady(SUnit *SU);
@@ -868,25 +880,53 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
}
void ConvergingScheduler::releaseTopNode(SUnit *SU) {
- Top.releaseNode(SU, SU->getDepth());
+ if (SU->isScheduled)
+ return;
+
+ for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+ unsigned Latency =
+ DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true);
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + Latency)
+ SU->TopReadyCycle = PredReadyCycle + Latency;
+ }
+ Top.releaseNode(SU, SU->TopReadyCycle);
}
void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
- Bot.releaseNode(SU, SU->getHeight());
+ if (SU->isScheduled)
+ return;
+
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned Latency =
+ DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true);
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + Latency)
+ SU->BotReadyCycle = SuccReadyCycle + Latency;
+ }
+ Bot.releaseNode(SU, SU->BotReadyCycle);
}
void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
unsigned ReadyCycle) {
- if (SU->isScheduled)
- return;
-
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
// Check for interlocks first. For the purpose of other heuristics, an
// instruction that cannot issue appears as if it's not in the ReadyQueue.
- if (HazardRec->isEnabled()
- && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard)
+ if (ReadyCycle > CurrCycle
+ || (HazardRec->isEnabled() && (HazardRec->getHazardType(SU)
+ != ScheduleHazardRecognizer::NoHazard)))
Pending.push(SU);
else
Available.push(SU);
@@ -900,10 +940,11 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
if (!HazardRec->isEnabled()) {
- // Bypass lots of virtual calls in case of long latency.
+ // Bypass HazardRec virtual calls.
CurrCycle = NextCycle;
}
else {
+ // Bypass getHazardType calls in case of long latency.
for (; CurrCycle != NextCycle; ++CurrCycle) {
if (isTop())
HazardRec->AdvanceCycle();
@@ -917,6 +958,26 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
<< CurrCycle << '\n');
}
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU,
+ unsigned IssueWidth) {
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+ // Check the instruction group size limit.
+ ++IssueCount;
+ if (IssueCount == IssueWidth) {
+ DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ }
+}
+
/// Release pending ready nodes in to the available queue. This makes them
/// visible to heuristics.
void ConvergingScheduler::SchedBoundary::releasePending() {
@@ -928,7 +989,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
// so, add them to the available queue.
for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
SUnit *SU = *(Pending.begin()+i);
- unsigned ReadyCycle = isTop() ? SU->getHeight() : SU->getDepth();
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
@@ -965,7 +1026,8 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
releasePending();
for (unsigned i = 0; Available.empty(); ++i) {
- assert(i <= HazardRec->getMaxLookAhead() && "permanent hazard"); (void)i;
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard"); (void)i;
bumpCycle();
releasePending();
}
@@ -1205,27 +1267,15 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
/// Update the scheduler's state after scheduling a node. This is the same node
/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
-/// it's state based on the current cycle before MachineSchedStrategy.
+/// it's state based on the current cycle before MachineSchedStrategy does.
void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
- // Update the reservation table.
- if (IsTopNode && Top.HazardRec->isEnabled()) {
- Top.HazardRec->EmitInstruction(SU);
- if (Top.HazardRec->atIssueLimit()) {
- DEBUG(dbgs() << "*** Max instrs at cycle " << Top.CurrCycle << '\n');
- Top.bumpCycle();
- }
+ if (IsTopNode) {
+ SU->TopReadyCycle = Top.CurrCycle;
+ Top.bumpNode(SU, DAG->getIssueWidth());
}
- else if (Bot.HazardRec->isEnabled()) {
- if (SU->isCall) {
- // Calls are scheduled with their preceding instructions. For bottom-up
- // scheduling, clear the pipeline state before emitting.
- Bot.HazardRec->Reset();
- }
- Bot.HazardRec->EmitInstruction(SU);
- if (Bot.HazardRec->atIssueLimit()) {
- DEBUG(dbgs() << "*** Max instrs at cycle " << Bot.CurrCycle << '\n');
- Bot.bumpCycle();
- }
+ else {
+ SU->BotReadyCycle = Bot.CurrCycle;
+ Bot.bumpNode(SU, DAG->getIssueWidth());
}
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 773a29d0c2..875e012ed1 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -271,10 +271,12 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
// Adjust the dependence latency using operand def/use
// information (if any), and then allow the target to
// perform its own adjustments.
- const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
+ SDep dep(SU, SDep::Data, LDataLatency, *Alias);
if (!UnitLatencies) {
- computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
- ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+ unsigned Latency = computeOperandLatency(SU, UseSU, dep);
+ dep.setLatency(Latency);
+
+ ST.adjustSchedDependency(SU, UseSU, dep);
}
UseSU->addPred(dep);
}
@@ -461,11 +463,13 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
// Create a data dependence.
//
// TODO: Handle "special" address latencies cleanly.
- const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
+ SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
if (!UnitLatencies) {
// Adjust the dependence latency using operand def/use information, then
// allow the target to perform its own adjustments.
- computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+ unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+ dep.setLatency(Latency);
+
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
}
@@ -970,8 +974,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
}
void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
- // Compute the latency for the node.
- if (!InstrItins || InstrItins->isEmpty()) {
+ // Compute the latency for the node. We only provide a default for missing
+ // itineraries. Empty itineraries still have latency properties.
+ if (!InstrItins) {
SU->Latency = 1;
// Simplistic target-independent heuristic: assume that loads take
@@ -983,63 +988,15 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
}
}
-void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const {
- if (!InstrItins || InstrItins->isEmpty())
- return;
-
+unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
+ const SDep& dep,
+ bool FindMin) const {
// For a data dependency with a known register...
if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
- return;
+ return 1;
- const unsigned Reg = dep.getReg();
-
- // ... find the definition of the register in the defining
- // instruction
- MachineInstr *DefMI = Def->getInstr();
- int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
- if (DefIdx != -1) {
- const MachineOperand &MO = DefMI->getOperand(DefIdx);
- if (MO.isReg() && MO.isImplicit() &&
- DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
- // This is an implicit def, getOperandLatency() won't return the correct
- // latency. e.g.
- // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
- // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
- // What we want is to compute latency between def of %D6/%D7 and use of
- // %Q3 instead.
- unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
- if (DefMI->getOperand(Op2).isReg())
- DefIdx = Op2;
- }
- MachineInstr *UseMI = Use->getInstr();
- // For all uses of the register, calculate the maxmimum latency
- int Latency = -1;
- if (UseMI) {
- for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = UseMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned MOReg = MO.getReg();
- if (MOReg != Reg)
- continue;
-
- int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
- UseMI, i);
- Latency = std::max(Latency, UseCycle);
- }
- } else {
- // UseMI is null, then it must be a scheduling barrier.
- if (!InstrItins || InstrItins->isEmpty())
- return;
- unsigned DefClass = DefMI->getDesc().getSchedClass();
- Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
- }
-
- // If we found a latency, then replace the existing dependence latency.
- if (Latency >= 0)
- dep.setLatency(Latency);
- }
+ return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(),
+ Use->getInstr(), dep.getReg(), FindMin);
}
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 75940ec33d..5384576a0c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -98,12 +98,6 @@ namespace llvm {
///
virtual void computeLatency(SUnit *SU);
- /// computeOperandLatency - Override dependence edge latency using
- /// operand use/def information
- ///
- virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const { }
-
virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const;
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 5218aa1f7a..ec2b577230 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1046,7 +1046,7 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
return true; // Below MI
unsigned DefDist = DDI->second;
assert(Dist > DefDist && "Visited def already?");
- if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist))
+ if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
return true;
}
return false;