summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/LatencyPriorityQueue.h18
-rw-r--r--include/llvm/CodeGen/ScheduleDAG.h68
-rw-r--r--include/llvm/CodeGen/ScheduleDAGInstrs.h10
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp41
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp98
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp241
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp68
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp19
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp8
-rw-r--r--test/CodeGen/X86/fold-pcmpeqd-0.ll6
12 files changed, 357 insertions, 299 deletions
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index f7eb3a62a2..71fae2aeab 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -34,10 +34,6 @@ namespace llvm {
// SUnits - The SUnits for the current graph.
std::vector<SUnit> *SUnits;
- // Latencies - The latency (max of latency from this node to the bb exit)
- // for each node.
- std::vector<int> Latencies;
-
/// NumNodesSolelyBlocking - This vector contains, for every node in the
/// Queue, the number of nodes that the node is the sole unscheduled
/// predecessor for. This is used as a tie-breaker heuristic for better
@@ -51,29 +47,23 @@ public:
void initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
- // Calculate node priorities.
- CalculatePriorities();
+ NumNodesSolelyBlocking.resize(SUnits->size(), 0);
}
void addNode(const SUnit *SU) {
- Latencies.resize(SUnits->size(), -1);
NumNodesSolelyBlocking.resize(SUnits->size(), 0);
- CalcLatency(*SU);
}
void updateNode(const SUnit *SU) {
- Latencies[SU->NodeNum] = -1;
- CalcLatency(*SU);
}
void releaseState() {
SUnits = 0;
- Latencies.clear();
}
unsigned getLatency(unsigned NodeNum) const {
- assert(NodeNum < Latencies.size());
- return Latencies[NodeNum];
+ assert(NodeNum < (*SUnits).size());
+ return (*SUnits)[NodeNum].getHeight();
}
unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
@@ -114,8 +104,6 @@ public:
void ScheduledNode(SUnit *Node);
private:
- void CalculatePriorities();
- void CalcLatency(const SUnit &SU);
void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
SUnit *getSingleUnscheduledPred(SUnit *SU);
};
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 2f6fd00928..ee640e24a1 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -242,10 +242,12 @@ namespace llvm {
bool isPending : 1; // True once pending.
bool isAvailable : 1; // True once available.
bool isScheduled : 1; // True once scheduled.
- unsigned CycleBound; // Upper/lower cycle to be scheduled at.
- unsigned Cycle; // Once scheduled, the cycle of the op.
- unsigned Depth; // Node depth;
- unsigned Height; // Node height;
+ private:
+ bool isDepthCurrent : 1; // True if Depth is current.
+ bool isHeightCurrent : 1; // True if Height is current.
+ unsigned Depth; // Node depth.
+ unsigned Height; // Node height.
+ public:
const TargetRegisterClass *CopyDstRC; // Is a special copy node if not null.
const TargetRegisterClass *CopySrcRC;
@@ -256,7 +258,8 @@ namespace llvm {
Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0),
isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
isPending(false), isAvailable(false), isScheduled(false),
- CycleBound(0), Cycle(~0u), Depth(0), Height(0),
+ isDepthCurrent(false), isHeightCurrent(false),
+ Depth(0), Height(0),
CopyDstRC(NULL), CopySrcRC(NULL) {}
/// SUnit - Construct an SUnit for post-regalloc scheduling to represent
@@ -266,7 +269,8 @@ namespace llvm {
Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0),
isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
isPending(false), isAvailable(false), isScheduled(false),
- CycleBound(0), Cycle(~0u), Depth(0), Height(0),
+ isDepthCurrent(false), isHeightCurrent(false),
+ Depth(0), Height(0),
CopyDstRC(NULL), CopySrcRC(NULL) {}
/// setNode - Assign the representative SDNode for this SUnit.
@@ -307,6 +311,41 @@ namespace llvm {
/// the specified node.
void removePred(const SDep &D);
+ /// getHeight - Return the height of this node, which is the length of the
+ /// maximum path down to any node with has no successors.
+ unsigned getDepth() const {
+ if (!isDepthCurrent) const_cast<SUnit *>(this)->ComputeDepth();
+ return Depth;
+ }
+
+ /// getHeight - Return the height of this node, which is the length of the
+ /// maximum path up to any node with has no predecessors.
+ unsigned getHeight() const {
+ if (!isHeightCurrent) const_cast<SUnit *>(this)->ComputeHeight();
+ return Height;
+ }
+
+ /// setDepthToAtLeast - If NewDepth is greater than this node's depth
+ /// value, set it to be the new depth value. This also recursively
+ /// marks successor nodes dirty.
+ void setDepthToAtLeast(unsigned NewDepth);
+
+ /// setDepthToAtLeast - If NewDepth is greater than this node's depth
+ /// value, set it to be the new height value. This also recursively
+ /// marks predecessor nodes dirty.
+ void setHeightToAtLeast(unsigned NewHeight);
+
+ /// setDepthDirty - Set a flag in this node to indicate that its
+ /// stored Depth value will require recomputation the next time
+ /// getDepth() is called.
+ void setDepthDirty();
+
+ /// setHeightDirty - Set a flag in this node to indicate that its
+ /// stored Height value will require recomputation the next time
+ /// getHeight() is called.
+ void setHeightDirty();
+
+ /// isPred - Test if node N is a predecessor of this node.
bool isPred(SUnit *N) {
for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i)
if (Preds[i].getSUnit() == N)
@@ -314,6 +353,7 @@ namespace llvm {
return false;
}
+ /// isSucc - Test if node N is a successor of this node.
bool isSucc(SUnit *N) {
for (unsigned i = 0, e = (unsigned)Succs.size(); i != e; ++i)
if (Succs[i].getSUnit() == N)
@@ -324,6 +364,10 @@ namespace llvm {
void dump(const ScheduleDAG *G) const;
void dumpAll(const ScheduleDAG *G) const;
void print(raw_ostream &O, const ScheduleDAG *G) const;
+
+ private:
+ void ComputeDepth();
+ void ComputeHeight();
};
//===--------------------------------------------------------------------===//
@@ -397,12 +441,7 @@ namespace llvm {
/// ComputeLatency - Compute node latency.
///
- virtual void ComputeLatency(SUnit *SU) { SU->Latency = 1; }
-
- /// CalculateDepths, CalculateHeights - Calculate node depth / height.
- ///
- void CalculateDepths();
- void CalculateHeights();
+ virtual void ComputeLatency(SUnit *SU) = 0;
protected:
/// EmitNoop - Emit a noop instruction.
@@ -440,6 +479,11 @@ namespace llvm {
void EmitCrossRCCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
+ /// ForceUnitLatencies - Return true if all scheduling edges should be given a
+ /// latency value of one. The default is to return false; schedulers may
+ /// override this as needed.
+ virtual bool ForceUnitLatencies() const { return false; }
+
private:
/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
/// physical register has only a single copy use, then coalesced the copy
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 2b6c1d3a55..c74ef57674 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -18,10 +18,18 @@
#include "llvm/CodeGen/ScheduleDAG.h"
namespace llvm {
+ class MachineLoopInfo;
+ class MachineDominatorTree;
+
class ScheduleDAGInstrs : public ScheduleDAG {
+ const MachineLoopInfo &MLI;
+ const MachineDominatorTree &MDT;
+
public:
ScheduleDAGInstrs(MachineBasicBlock *bb,
- const TargetMachine &tm);
+ const TargetMachine &tm,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt);
virtual ~ScheduleDAGInstrs() {}
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 2abbf364e3..7ad9ac9b51 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -41,47 +41,6 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
}
-/// CalcNodePriority - Calculate the maximal path from the node to the exit.
-///
-void LatencyPriorityQueue::CalcLatency(const SUnit &SU) {
- int &Latency = Latencies[SU.NodeNum];
- if (Latency != -1)
- return;
-
- std::vector<const SUnit*> WorkList;
- WorkList.push_back(&SU);
- while (!WorkList.empty()) {
- const SUnit *Cur = WorkList.back();
- bool AllDone = true;
- unsigned MaxSuccLatency = 0;
- for (SUnit::const_succ_iterator I = Cur->Succs.begin(),E = Cur->Succs.end();
- I != E; ++I) {
- int SuccLatency = Latencies[I->getSUnit()->NodeNum];
- if (SuccLatency == -1) {
- AllDone = false;
- WorkList.push_back(I->getSUnit());
- } else {
- unsigned NewLatency = SuccLatency + I->getLatency();
- MaxSuccLatency = std::max(MaxSuccLatency, NewLatency);
- }
- }
- if (AllDone) {
- Latencies[Cur->NodeNum] = MaxSuccLatency;
- WorkList.pop_back();
- }
- }
-}
-
-/// CalculatePriorities - Calculate priorities of all scheduling units.
-void LatencyPriorityQueue::CalculatePriorities() {
- Latencies.assign(SUnits->size(), -1);
- NumNodesSolelyBlocking.assign(SUnits->size(), 0);
-
- // For each node, calculate the maximal path from the node to the exit.
- for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
- CalcLatency((*SUnits)[i]);
-}
-
/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
/// of SU, return it, otherwise return null.
SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 2f7c0118a0..0fc6c3370b 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -23,7 +23,9 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -49,6 +51,14 @@ namespace {
static char ID;
PostRAScheduler() : MachineFunctionPass(&ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
const char *getPassName() const {
return "Post RA top-down list latency scheduler";
}
@@ -72,8 +82,10 @@ namespace {
ScheduleDAGTopologicalSort Topo;
public:
- SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm)
- : ScheduleDAGInstrs(mbb, tm), Topo(SUnits) {}
+ SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm,
+ const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT)
+ : ScheduleDAGInstrs(mbb, tm, MLI, MDT), Topo(SUnits) {}
void Schedule();
@@ -88,11 +100,14 @@ namespace {
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
DOUT << "PostRAScheduler\n";
+ const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
// Loop over all of the basic blocks
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
- SchedulePostRATDList Scheduler(MBB, Fn.getTarget());
+ SchedulePostRATDList Scheduler(MBB, Fn.getTarget(), MLI, MDT);
Scheduler.Run();
@@ -142,6 +157,28 @@ getInstrOperandRegClass(const TargetRegisterInfo *TRI,
return TRI->getRegClass(II.OpInfo[Op].RegClass);
}
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SDep *CriticalPathStep(SUnit *SU) {
+ SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ return Next;
+}
+
/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
/// of the ScheduleDAG and break them by renaming registers.
///
@@ -150,34 +187,16 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
// so just duck out immediately if the block is empty.
if (BB->empty()) return false;
- Topo.InitDAGTopologicalSorting();
-
- // Compute a critical path for the DAG.
+ // Find the node at the bottom of the critical path.
SUnit *Max = 0;
- std::vector<SDep *> CriticalPath(SUnits.size());
- for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
- E = Topo.end(); I != E; ++I) {
- SUnit *SU = &SUnits[*I];
- for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
- P != PE; ++P) {
- SUnit *PredSU = P->getSUnit();
- // This assumes that there's no delay for reusing registers.
- unsigned PredLatency = P->getLatency();
- unsigned PredTotalLatency = PredSU->CycleBound + PredLatency;
- if (SU->CycleBound < PredTotalLatency ||
- (SU->CycleBound == PredTotalLatency &&
- P->getKind() == SDep::Anti)) {
- SU->CycleBound = PredTotalLatency;
- CriticalPath[*I] = &*P;
- }
- }
- // Keep track of the node at the end of the critical path.
- if (!Max || SU->CycleBound + SU->Latency > Max->CycleBound + Max->Latency)
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
Max = SU;
}
DOUT << "Critical path has total latency "
- << (Max ? Max->CycleBound + Max->Latency : 0) << "\n";
+ << (Max ? Max->getDepth() + Max->Latency : 0) << "\n";
// Walk the critical path from the bottom up. Collect all anti-dependence
// edges on the critical path. Skip anti-dependencies between SUnits that
@@ -195,9 +214,9 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
// the anti-dependencies in an instruction in order to be effective.
BitVector AllocatableSet = TRI->getAllocatableSet(*MF);
DenseMap<MachineInstr *, unsigned> CriticalAntiDeps;
- for (SUnit *SU = Max; CriticalPath[SU->NodeNum];
- SU = CriticalPath[SU->NodeNum]->getSUnit()) {
- SDep *Edge = CriticalPath[SU->NodeNum];
+ SUnit *SU = Max;
+ for (SDep *Edge = CriticalPathStep(SU); Edge;
+ Edge = CriticalPathStep(SU = Edge->getSUnit())) {
SUnit *NextSU = Edge->getSUnit();
// Only consider anti-dependence edges.
if (Edge->getKind() != SDep::Anti)
@@ -494,6 +513,11 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
Classes[SubregReg] = 0;
RegRefs.erase(SubregReg);
}
+ for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+ *Super; ++Super) {
+ unsigned SuperReg = *Super;
+ Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
}
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
@@ -556,8 +580,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
// Compute how many cycles it will be before this actually becomes
// available. This is the max of the start time of all predecessors plus
// their latencies.
- unsigned PredDoneCycle = SU->Cycle + SuccEdge->getLatency();
- SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle);
+ SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
if (SuccSU->NumPredsLeft == 0) {
PendingQueue.push_back(SuccSU);
@@ -572,7 +595,8 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(SU->dump(this));
Sequence.push_back(SU);
- SU->Cycle = CurCycle;
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
// Top down: release successors.
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -603,21 +627,21 @@ void SchedulePostRATDList::ListScheduleTopDown() {
while (!AvailableQueue.empty() || !PendingQueue.empty()) {
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
+ unsigned MinDepth = ~0u;
for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
- if (PendingQueue[i]->CycleBound == CurCycle) {
+ if (PendingQueue[i]->getDepth() <= CurCycle) {
AvailableQueue.push(PendingQueue[i]);
PendingQueue[i]->isAvailable = true;
PendingQueue[i] = PendingQueue.back();
PendingQueue.pop_back();
--i; --e;
- } else {
- assert(PendingQueue[i]->CycleBound > CurCycle && "Non-positive latency?");
- }
+ } else if (PendingQueue[i]->getDepth() < MinDepth)
+ MinDepth = PendingQueue[i]->getDepth();
}
// If there are no instructions available, don't try to issue anything.
if (AvailableQueue.empty()) {
- ++CurCycle;
+ CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1;
continue;
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 7ceab3a87b..dad5eb5d9b 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -33,115 +33,6 @@ ScheduleDAG::ScheduleDAG(SelectionDAG *dag, MachineBasicBlock *bb,
ScheduleDAG::~ScheduleDAG() {}
-/// CalculateDepths - compute depths using algorithms for the longest
-/// paths in the DAG
-void ScheduleDAG::CalculateDepths() {
- unsigned DAGSize = SUnits.size();
- std::vector<SUnit*> WorkList;
- WorkList.reserve(DAGSize);
-
- // Initialize the data structures
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &SUnits[i];
- unsigned Degree = SU->Preds.size();
- // Temporarily use the Depth field as scratch space for the degree count.
- SU->Depth = Degree;
-
- // Is it a node without dependencies?
- if (Degree == 0) {
- assert(SU->Preds.empty() && "SUnit should have no predecessors");
- // Collect leaf nodes
- WorkList.push_back(SU);
- }
- }
-
- // Process nodes in the topological order
- while (!WorkList.empty()) {
- SUnit *SU = WorkList.back();
- WorkList.pop_back();
- unsigned SUDepth = 0;
-
- // Use dynamic programming:
- // When current node is being processed, all of its dependencies
- // are already processed.
- // So, just iterate over all predecessors and take the longest path
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- unsigned PredDepth = I->getSUnit()->Depth;
- if (PredDepth+1 > SUDepth) {
- SUDepth = PredDepth + 1;
- }
- }
-
- SU->Depth = SUDepth;
-
- // Update degrees of all nodes depending on current SUnit
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- SUnit *SU = I->getSUnit();
- if (!--SU->Depth)
- // If all dependencies of the node are processed already,
- // then the longest path for the node can be computed now
- WorkList.push_back(SU);
- }
- }
-}
-
-/// CalculateHeights - compute heights using algorithms for the longest
-/// paths in the DAG
-void ScheduleDAG::CalculateHeights() {
- unsigned DAGSize = SUnits.size();
- std::vector<SUnit*> WorkList;
- WorkList.reserve(DAGSize);
-
- // Initialize the data structures
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &SUnits[i];
- unsigned Degree = SU->Succs.size();
- // Temporarily use the Height field as scratch space for the degree count.
- SU->Height = Degree;
-
- // Is it a node without dependencies?
- if (Degree == 0) {
- assert(SU->Succs.empty() && "Something wrong");
- assert(WorkList.empty() && "Should be empty");
- // Collect leaf nodes
- WorkList.push_back(SU);
- }
- }
-
- // Process nodes in the topological order
- while (!WorkList.empty()) {
- SUnit *SU = WorkList.back();
- WorkList.pop_back();
- unsigned SUHeight = 0;
-
- // Use dynamic programming:
- // When current node is being processed, all of its dependencies
- // are already processed.
- // So, just iterate over all successors and take the longest path
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- unsigned SuccHeight = I->getSUnit()->Height;
- if (SuccHeight+1 > SUHeight) {
- SUHeight = SuccHeight + 1;
- }
- }
-
- SU->Height = SUHeight;
-
- // Update degrees of all nodes depending on current SUnit
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- SUnit *SU = I->getSUnit();
- if (!--SU->Height)
- // If all dependencies of the node are processed already,
- // then the longest path for the node can be computed now
- WorkList.push_back(SU);
- }
- }
-}
-
/// dump - dump the schedule.
void ScheduleDAG::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -171,13 +62,10 @@ void SUnit::addPred(const SDep &D) {
for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i)
if (Preds[i] == D)
return;
- // Add a pred to this SUnit.
- Preds.push_back(D);
// Now add a corresponding succ to N.
SDep P = D;
P.setSUnit(this);
SUnit *N = D.getSUnit();
- N->Succs.push_back(P);
// Update the bookkeeping.
if (D.getKind() == SDep::Data) {
++NumPreds;
@@ -187,6 +75,10 @@ void SUnit::addPred(const SDep &D) {
++NumPredsLeft;
if (!isScheduled)
++N->NumSuccsLeft;
+ N->Succs.push_back(P);
+ Preds.push_back(D);
+ this->setDepthDirty();
+ N->setHeightDirty();
}
/// removePred - This removes the specified edge as a pred of the current
@@ -220,10 +112,128 @@ void SUnit::removePred(const SDep &D) {
--NumPredsLeft;
if (!isScheduled)
--N->NumSuccsLeft;
+ this->setDepthDirty();
+ N->setHeightDirty();
return;
}
}
+void SUnit::setDepthDirty() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ if (!SU->isDepthCurrent) continue;
+ SU->isDepthCurrent = false;
+ for (SUnit::const_succ_iterator I = Succs.begin(),
+ E = Succs.end(); I != E; ++I)
+ WorkList.push_back(I->getSUnit());
+ }
+}
+
+void SUnit::setHeightDirty() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ if (!SU->isHeightCurrent) continue;
+ SU->isHeightCurrent = false;
+ for (SUnit::const_pred_iterator I = Preds.begin(),
+ E = Preds.end(); I != E; ++I)
+ WorkList.push_back(I->getSUnit());
+ }
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+ if (NewDepth <= Depth)
+ return;
+ setDepthDirty();
+ Depth = NewDepth;
+ isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+ if (NewHeight <= Height)
+ return;
+ setHeightDirty();
+ Height = NewHeight;
+ isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ while (!WorkList.empty()) {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxPredDepth = 0;
+ for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+ E = Cur->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isDepthCurrent)
+ MaxPredDepth = std::max(MaxPredDepth,
+ PredSU->Depth + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(PredSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxPredDepth != Cur->Depth) {
+ Cur->setDepthDirty();
+ Cur->Depth = MaxPredDepth;
+ }
+ Cur->isDepthCurrent = true;
+ }
+ }
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ while (!WorkList.empty()) {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxSuccHeight = 0;
+ for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+ E = Cur->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isHeightCurrent)
+ MaxSuccHeight = std::max(MaxSuccHeight,
+ SuccSU->Height + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(SuccSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxSuccHeight != Cur->Height) {
+ Cur->setHeightDirty();
+ Cur->Height = MaxSuccHeight;
+ }
+ Cur->isHeightCurrent = true;
+ }
+ }
+}
+
/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
/// a group of nodes flagged together.
void SUnit::dump(const ScheduleDAG *G) const {
@@ -299,11 +309,14 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
cerr << "has not been scheduled!\n";
AnyNotSched = true;
}
- if (SUnits[i].isScheduled && SUnits[i].Cycle > (unsigned)INT_MAX) {
+ if (SUnits[i].isScheduled &&
+ (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) >
+ unsigned(INT_MAX)) {
if (!AnyNotSched)
cerr << "*** Scheduling failed! ***\n";
SUnits[i].dump(this);
- cerr << "has an unexpected Cycle value!\n";
+ cerr << "has an unexpected "
+ << (isBottomUp ? "Height" : "Depth") << " value!\n";
AnyNotSched = true;
}
if (isBottomUp) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index fa42d393a3..cfa639e940 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,19 +13,27 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sched-instrs"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
#include <map>
using namespace llvm;
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb,
- const TargetMachine &tm)
- : ScheduleDAG(0, bb, tm) {}
+ const TargetMachine &tm,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt)
+ : ScheduleDAG(0, bb, tm), MLI(mli), MDT(mdt) {}
void ScheduleDAGInstrs::BuildSchedUnits() {
SUnits.clear();
@@ -35,7 +43,7 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
// to top.
// Remember where defs and uses of each physical register are as we procede.
- SUnit *Defs[TargetRegisterInfo::FirstVirtualRegister] = {};
+ std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister] = {};
std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister] = {};
// Remember where unknown loads are after the most recent unknown store
@@ -57,13 +65,20 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
// all the work of the block is done before the terminator.
SUnit *Terminator = 0;
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = ForceUnitLatencies();
+
for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin();
MII != MIE; --MII) {
MachineInstr *MI = prior(MII);
+ const TargetInstrDesc &TID = MI->getDesc();
SUnit *SU = NewSUnit(MI);
// Assign the Latency field of SU using target-provided information.
- ComputeLatency(SU);
+ if (UnitLatencies)
+ SU->Latency = 1;
+ else
+ ComputeLatency(SU);
// Add register-based dependencies (data, anti, and output).
for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
@@ -74,33 +89,51 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
std::vector<SUnit *> &UseList = Uses[Reg];
- SUnit *&Def = Defs[Reg];
+ std::vector<SUnit *> &DefList = Defs[Reg];
// Optionally add output and anti dependencies.
// TODO: Using a latency of 1 here assumes there's no cost for
// reusing registers.
SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
- if (Def && Def != SU)
- Def->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
+ for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+ SUnit *DefSU = DefList[i];
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(Reg)))
+ DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
+ }
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- SUnit *&Def = Defs[*Alias];
- if (Def && Def != SU)
- Def->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
+ std::vector<SUnit *> &DefList = Defs[*Alias];
+ for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+ SUnit *DefSU = DefList[i];
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(Reg)))
+ DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
+ }
}
if (MO.isDef()) {
// Add any data dependencies.
- for (unsigned i = 0, e = UseList.size(); i != e; ++i)
- if (UseList[i] != SU)
- UseList[i]->addPred(SDep(SU, SDep::Data, SU->Latency, Reg));
+ unsigned DataLatency = SU->Latency;
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU != SU) {
+ UseSU->addPred(SDep(SU, SDep::Data, DataLatency, Reg));
+ }
+ }
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
std::vector<SUnit *> &UseList = Uses[*Alias];
- for (unsigned i = 0, e = UseList.size(); i != e; ++i)
- if (UseList[i] != SU)
- UseList[i]->addPred(SDep(SU, SDep::Data, SU->Latency, *Alias));
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU != SU)
+ UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias));
+ }
}
UseList.clear();
- Def = SU;
+ if (!MO.isDead())
+ DefList.clear();
+ DefList.push_back(SU);
} else {
UseList.push_back(SU);
}
@@ -111,7 +144,6 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
// after stack slots are lowered to actual addresses.
// TODO: Use an AliasAnalysis and do real alias-analysis queries, and
// produce more precise dependence information.
- const TargetInstrDesc &TID = MI->getDesc();
if (TID.isCall() || TID.isReturn() || TID.isBranch() ||
TID.hasUnmodeledSideEffects()) {
new_chain:
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 6ac608f943..d917386967 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -99,6 +99,9 @@ private:
SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
void ListScheduleBottomUp();
+
+ /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool ForceUnitLatencies() const { return true; }
};
} // end anonymous namespace
@@ -153,7 +156,8 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
DOUT << "*** Scheduling [" << CurCycle << "]: ";
DEBUG(SU->dump(this));
- SU->Cycle = CurCycle;
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
Sequence.push_back(SU);
// Bottom up: release predecessors
@@ -177,7 +181,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
- if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == SU &&
"Physical register dependency violated?");
@@ -247,9 +251,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
}
if (TID.isCommutable())
NewSU->isCommutable = true;
- // FIXME: Calculate height / depth and propagate the changes?
- NewSU->Depth = SU->Depth;
- NewSU->Height = SU->Height;
// LoadNode may already exist. This can happen when there is another
// load from the same location and producing the same type of value
@@ -262,9 +263,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
} else {
LoadSU = NewSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
-
- LoadSU->Depth = SU->Depth;
- LoadSU->Height = SU->Height;
}
SDep ChainPred;
@@ -344,10 +342,8 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
// New SUnit has the exact same predecessors.
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I)
- if (!I->isArtificial()) {
+ if (!I->isArtificial())
AddPred(NewSU, *I);
- NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1);
- }
// Only copy scheduled successors. Cut them from old node's successor
// list and move them over.
@@ -358,7 +354,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
continue;
SUnit *SuccSU = I->getSUnit();
if (SuccSU->isScheduled) {
- NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1);
SDep D = *I;
D.setSUnit(NewSU);
AddPred(SuccSU, D);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index b01700e37c..d42c8d8867 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -120,10 +120,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
}
#endif
- // Compute the cycle when this SUnit actually becomes available. This
- // is the max of the start time of all predecessors plus their latencies.
- unsigned PredDoneCycle = SU->Cycle + SU->Latency;
- SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle);
+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
if (SuccSU->NumPredsLeft == 0) {
PendingQueue.push_back(SuccSU);
@@ -138,7 +135,8 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(SU->dump(this));
Sequence.push_back(SU);
- SU->Cycle = CurCycle;
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
// Top down: release successors.
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -171,14 +169,14 @@ void ScheduleDAGList::ListScheduleTopDown() {
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
- if (PendingQueue[i]->CycleBound == CurCycle) {
+ if (PendingQueue[i]->getDepth() == CurCycle) {
AvailableQueue->push(PendingQueue[i]);
PendingQueue[i]->isAvailable = true;
PendingQueue[i] = PendingQueue.back();
PendingQueue.pop_back();
--i; --e;
} else {
- assert(PendingQueue[i]->CycleBound > CurCycle && "Negative latency?");
+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
}
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 49a5ac1904..c3b6137ae3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -154,6 +154,10 @@ private:
Topo.InitDAGTopologicalSorting();
return NewNode;
}
+
+ /// ForceUnitLatencies - Return true, since register-pressure-reducing
+ /// scheduling doesn't need actual latency information.
+ bool ForceUnitLatencies() const { return true; }
};
} // end anonymous namespace
@@ -171,8 +175,6 @@ void ScheduleDAGRRList::Schedule() {
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
- CalculateDepths();
- CalculateHeights();
Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits);
@@ -272,7 +274,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
DOUT << "*** Scheduling [" << CurCycle << "]: ";
DEBUG(SU->dump(this));
- SU->Cycle = CurCycle;
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
Sequence.push_back(SU);
// Bottom up: release predecessors
@@ -296,7 +299,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
- if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == SU &&
"Physical register dependency violated?");
@@ -328,7 +331,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
/// its predecessor states to reflect the change.
void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
- DOUT << "*** Unscheduling [" << SU->Cycle << "]: ";
+ DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
DEBUG(SU->dump(this));
AvailableQueue->UnscheduledNode(SU);
@@ -336,7 +339,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
CapturePred(&*I);
- if (I->isAssignedRegDep() && SU->Cycle == LiveRegCycles[I->getReg()]) {
+ if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
"Physical register dependency violated?");
@@ -353,12 +356,12 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
LiveRegDefs[I->getReg()] = SU;
++NumLiveRegs;
}
- if (I->getSUnit()->Cycle < LiveRegCycles[I->getReg()])
- LiveRegCycles[I->getReg()] = I->getSUnit()->Cycle;
+ if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
+ LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
}
}
- SU->Cycle = 0;
+ SU->setHeightDirty();
SU->isScheduled = false;
SU->isAvailable = true;
AvailableQueue->push(SU);
@@ -443,9 +446,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
} else {
LoadSU = CreateNewSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
-
- LoadSU->Depth = SU->Depth;
- LoadSU->Height = SU->Height;
ComputeLatency(LoadSU);
}
@@ -462,9 +462,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
}
if (TID.isCommutable())
NewSU->isCommutable = true;
- // FIXME: Calculate height / depth and propagate the changes?
- NewSU->Depth = SU->Depth;
- NewSU->Height = SU->Height;
ComputeLatency(NewSU);
SDep ChainPred;
@@ -548,10 +545,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
// New SUnit has the exact same predecessors.
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I)
- if (!I->isArtificial()) {
+ if (!I->isArtificial())
AddPred(NewSU, *I);
- NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1);
- }
// Only copy scheduled successors. Cut them from old node's successor
// list and move them over.
@@ -562,7 +557,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
continue;
SUnit *SuccSU = I->getSUnit();
if (SuccSU->isScheduled) {
- NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1);
SDep D = *I;
D.setSUnit(NewSU);
AddPred(SuccSU, D);
@@ -570,9 +564,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
DelDeps.push_back(std::make_pair(SuccSU, D));
}
}
- for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second);
- }
AvailableQueue->updateNode(SU);
AvailableQueue->addNode(NewSU);
@@ -590,8 +583,6 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
SUnit *CopyFromSU = CreateNewSUnit(NULL);
CopyFromSU->CopySrcRC = SrcRC;
CopyFromSU->CopyDstRC = DestRC;
- CopyFromSU->Depth = SU->Depth;
- CopyFromSU->Height = SU->Height;
SUnit *CopyToSU = CreateNewSUnit(NULL);
CopyToSU->CopySrcRC = DestRC;
@@ -870,7 +861,8 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DOUT << "*** Scheduling [" << CurCycle << "]: ";
DEBUG(SU->dump(this));
- SU->Cycle = CurCycle;
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
Sequence.push_back(SU);
// Top down: release successors
@@ -1107,19 +1099,19 @@ namespace {
/// closestSucc - Returns the scheduled cycle of the successor which is
/// closet to the current cycle.
static unsigned closestSucc(const SUnit *SU) {
- unsigned MaxCycle = 0;
+ unsigned MaxHeight = 0;
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
- unsigned Cycle = I->getSUnit()->Cycle;
+ unsigned Height = I->getSUnit()->getHeight();
// If there are bunch of CopyToRegs stacked up, they should be considered
// to be at the same position.
if (I->getSUnit()->getNode() &&
I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
- Cycle = closestSucc(I->getSUnit())+1;
- if (Cycle > MaxCycle)
- MaxCycle = Cycle;
+ Height = closestSucc(I->getSUnit())+1;
+ if (Height > MaxHeight)
+ MaxHeight = Height;
}
- return MaxCycle;
+ return MaxHeight;
}
/// calcMaxScratches - Returns an cost estimate of the worse case requirement
@@ -1182,11 +1174,11 @@ bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
if (LScratch != RScratch)
return LScratch > RScratch;
- if (left->Height != right->Height)
- return left->Height > right->Height;
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
- if (left->Depth != right->Depth)
- return left->Depth < right->Depth;
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
@@ -1294,7 +1286,8 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
continue;
// Be conservative. Ignore if nodes aren't at roughly the same
// depth and height.
- if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1)
+ if (SuccSU->getHeight() < SU->getHeight() &&
+ (SU->getHeight() - SuccSU->getHeight()) > 1)
continue;
if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
continue;
@@ -1384,8 +1377,8 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
if (LPriority+LBonus != RPriority+RBonus)
return LPriority+LBonus < RPriority+RBonus;
- if (left->Depth != right->Depth)
- return left->Depth < right->Depth;
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
if (left->NumSuccsLeft != right->NumSuccsLeft)
return left->NumSuccsLeft > right->NumSuccsLeft;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 69d3045bf2..21df142ff3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -80,6 +80,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
E = DAG->allnodes_end(); NI != E; ++NI)
NI->setNodeId(-1);
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = ForceUnitLatencies();
+
for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
E = DAG->allnodes_end(); NI != E; ++NI) {
if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
@@ -133,7 +136,10 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
N->setNodeId(NodeSUnit->NodeNum);
// Assign the Latency field of NodeSUnit using target-provided information.
- ComputeLatency(NodeSUnit);
+ if (UnitLatencies)
+ NodeSUnit->Latency = 1;
+ else
+ ComputeLatency(NodeSUnit);
}
// Pass 2: add the preds, succs, etc.
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index 6af7219345..152c12215d 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep pcmpeqd
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep pcmpeqd | count 1
; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
-; On x86-64, this testcase shouldn't need to spill the -1 value,
+; This testcase shouldn't need to spill the -1 value,
; so it should just use pcmpeqd to materialize an all-ones vector.
-; On x86-32, there aren't enough registers, so an all-ones
-; constant pool should be created so it can be folded.
%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
%struct._cl_image_format_t = type <{ i32, i32, i32 }>