diff options
-rw-r--r-- | include/llvm/CodeGen/LatencyPriorityQueue.h | 18 | ||||
-rw-r--r-- | include/llvm/CodeGen/ScheduleDAG.h | 68 | ||||
-rw-r--r-- | include/llvm/CodeGen/ScheduleDAGInstrs.h | 10 | ||||
-rw-r--r-- | lib/CodeGen/LatencyPriorityQueue.cpp | 41 | ||||
-rw-r--r-- | lib/CodeGen/PostRASchedulerList.cpp | 98 | ||||
-rw-r--r-- | lib/CodeGen/ScheduleDAG.cpp | 241 | ||||
-rw-r--r-- | lib/CodeGen/ScheduleDAGInstrs.cpp | 68 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 19 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 12 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 67 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/fold-pcmpeqd-0.ll | 6 |
12 files changed, 357 insertions, 299 deletions
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h index f7eb3a62a2..71fae2aeab 100644 --- a/include/llvm/CodeGen/LatencyPriorityQueue.h +++ b/include/llvm/CodeGen/LatencyPriorityQueue.h @@ -34,10 +34,6 @@ namespace llvm { // SUnits - The SUnits for the current graph. std::vector<SUnit> *SUnits; - // Latencies - The latency (max of latency from this node to the bb exit) - // for each node. - std::vector<int> Latencies; - /// NumNodesSolelyBlocking - This vector contains, for every node in the /// Queue, the number of nodes that the node is the sole unscheduled /// predecessor for. This is used as a tie-breaker heuristic for better @@ -51,29 +47,23 @@ public: void initNodes(std::vector<SUnit> &sunits) { SUnits = &sunits; - // Calculate node priorities. - CalculatePriorities(); + NumNodesSolelyBlocking.resize(SUnits->size(), 0); } void addNode(const SUnit *SU) { - Latencies.resize(SUnits->size(), -1); NumNodesSolelyBlocking.resize(SUnits->size(), 0); - CalcLatency(*SU); } void updateNode(const SUnit *SU) { - Latencies[SU->NodeNum] = -1; - CalcLatency(*SU); } void releaseState() { SUnits = 0; - Latencies.clear(); } unsigned getLatency(unsigned NodeNum) const { - assert(NodeNum < Latencies.size()); - return Latencies[NodeNum]; + assert(NodeNum < (*SUnits).size()); + return (*SUnits)[NodeNum].getHeight(); } unsigned getNumSolelyBlockNodes(unsigned NodeNum) const { @@ -114,8 +104,6 @@ public: void ScheduledNode(SUnit *Node); private: - void CalculatePriorities(); - void CalcLatency(const SUnit &SU); void AdjustPriorityOfUnscheduledPreds(SUnit *SU); SUnit *getSingleUnscheduledPred(SUnit *SU); }; diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 2f6fd00928..ee640e24a1 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -242,10 +242,12 @@ namespace llvm { bool isPending : 1; // True once pending. bool isAvailable : 1; // True once available. bool isScheduled : 1; // True once scheduled. - unsigned CycleBound; // Upper/lower cycle to be scheduled at. - unsigned Cycle; // Once scheduled, the cycle of the op. - unsigned Depth; // Node depth; - unsigned Height; // Node height; + private: + bool isDepthCurrent : 1; // True if Depth is current. + bool isHeightCurrent : 1; // True if Height is current. + unsigned Depth; // Node depth. + unsigned Height; // Node height. + public: const TargetRegisterClass *CopyDstRC; // Is a special copy node if not null. const TargetRegisterClass *CopySrcRC; @@ -256,7 +258,8 @@ namespace llvm { Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), isPending(false), isAvailable(false), isScheduled(false), - CycleBound(0), Cycle(~0u), Depth(0), Height(0), + isDepthCurrent(false), isHeightCurrent(false), + Depth(0), Height(0), CopyDstRC(NULL), CopySrcRC(NULL) {} /// SUnit - Construct an SUnit for post-regalloc scheduling to represent @@ -266,7 +269,8 @@ namespace llvm { Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), isPending(false), isAvailable(false), isScheduled(false), - CycleBound(0), Cycle(~0u), Depth(0), Height(0), + isDepthCurrent(false), isHeightCurrent(false), + Depth(0), Height(0), CopyDstRC(NULL), CopySrcRC(NULL) {} /// setNode - Assign the representative SDNode for this SUnit. @@ -307,6 +311,41 @@ namespace llvm { /// the specified node. void removePred(const SDep &D); + /// getHeight - Return the height of this node, which is the length of the + /// maximum path down to any node with has no successors. + unsigned getDepth() const { + if (!isDepthCurrent) const_cast<SUnit *>(this)->ComputeDepth(); + return Depth; + } + + /// getHeight - Return the height of this node, which is the length of the + /// maximum path up to any node with has no predecessors. + unsigned getHeight() const { + if (!isHeightCurrent) const_cast<SUnit *>(this)->ComputeHeight(); + return Height; + } + + /// setDepthToAtLeast - If NewDepth is greater than this node's depth + /// value, set it to be the new depth value. This also recursively + /// marks successor nodes dirty. + void setDepthToAtLeast(unsigned NewDepth); + + /// setDepthToAtLeast - If NewDepth is greater than this node's depth + /// value, set it to be the new height value. This also recursively + /// marks predecessor nodes dirty. + void setHeightToAtLeast(unsigned NewHeight); + + /// setDepthDirty - Set a flag in this node to indicate that its + /// stored Depth value will require recomputation the next time + /// getDepth() is called. + void setDepthDirty(); + + /// setHeightDirty - Set a flag in this node to indicate that its + /// stored Height value will require recomputation the next time + /// getHeight() is called. + void setHeightDirty(); + + /// isPred - Test if node N is a predecessor of this node. bool isPred(SUnit *N) { for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i) if (Preds[i].getSUnit() == N) @@ -314,6 +353,7 @@ namespace llvm { return false; } + /// isSucc - Test if node N is a successor of this node. bool isSucc(SUnit *N) { for (unsigned i = 0, e = (unsigned)Succs.size(); i != e; ++i) if (Succs[i].getSUnit() == N) @@ -324,6 +364,10 @@ namespace llvm { void dump(const ScheduleDAG *G) const; void dumpAll(const ScheduleDAG *G) const; void print(raw_ostream &O, const ScheduleDAG *G) const; + + private: + void ComputeDepth(); + void ComputeHeight(); }; //===--------------------------------------------------------------------===// @@ -397,12 +441,7 @@ namespace llvm { /// ComputeLatency - Compute node latency. /// - virtual void ComputeLatency(SUnit *SU) { SU->Latency = 1; } - - /// CalculateDepths, CalculateHeights - Calculate node depth / height. - /// - void CalculateDepths(); - void CalculateHeights(); + virtual void ComputeLatency(SUnit *SU) = 0; protected: /// EmitNoop - Emit a noop instruction. @@ -440,6 +479,11 @@ namespace llvm { void EmitCrossRCCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap); + /// ForceUnitLatencies - Return true if all scheduling edges should be given a + /// latency value of one. The default is to return false; schedulers may + /// override this as needed. + virtual bool ForceUnitLatencies() const { return false; } + private: /// EmitLiveInCopy - Emit a copy for a live in physical register. If the /// physical register has only a single copy use, then coalesced the copy diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 2b6c1d3a55..c74ef57674 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -18,10 +18,18 @@ #include "llvm/CodeGen/ScheduleDAG.h" namespace llvm { + class MachineLoopInfo; + class MachineDominatorTree; + class ScheduleDAGInstrs : public ScheduleDAG { + const MachineLoopInfo &MLI; + const MachineDominatorTree &MDT; + public: ScheduleDAGInstrs(MachineBasicBlock *bb, - const TargetMachine &tm); + const TargetMachine &tm, + const MachineLoopInfo &mli, + const MachineDominatorTree &mdt); virtual ~ScheduleDAGInstrs() {} diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 2abbf364e3..7ad9ac9b51 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -41,47 +41,6 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { } -/// CalcNodePriority - Calculate the maximal path from the node to the exit. -/// -void LatencyPriorityQueue::CalcLatency(const SUnit &SU) { - int &Latency = Latencies[SU.NodeNum]; - if (Latency != -1) - return; - - std::vector<const SUnit*> WorkList; - WorkList.push_back(&SU); - while (!WorkList.empty()) { - const SUnit *Cur = WorkList.back(); - bool AllDone = true; - unsigned MaxSuccLatency = 0; - for (SUnit::const_succ_iterator I = Cur->Succs.begin(),E = Cur->Succs.end(); - I != E; ++I) { - int SuccLatency = Latencies[I->getSUnit()->NodeNum]; - if (SuccLatency == -1) { - AllDone = false; - WorkList.push_back(I->getSUnit()); - } else { - unsigned NewLatency = SuccLatency + I->getLatency(); - MaxSuccLatency = std::max(MaxSuccLatency, NewLatency); - } - } - if (AllDone) { - Latencies[Cur->NodeNum] = MaxSuccLatency; - WorkList.pop_back(); - } - } -} - -/// CalculatePriorities - Calculate priorities of all scheduling units. -void LatencyPriorityQueue::CalculatePriorities() { - Latencies.assign(SUnits->size(), -1); - NumNodesSolelyBlocking.assign(SUnits->size(), 0); - - // For each node, calculate the maximal path from the node to the exit. - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) - CalcLatency((*SUnits)[i]); -} - /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor /// of SU, return it, otherwise return null. SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 2f7c0118a0..0fc6c3370b 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -23,7 +23,9 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -49,6 +51,14 @@ namespace { static char ID; PostRAScheduler() : MachineFunctionPass(&ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + const char *getPassName() const { return "Post RA top-down list latency scheduler"; } @@ -72,8 +82,10 @@ namespace { ScheduleDAGTopologicalSort Topo; public: - SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm) - : ScheduleDAGInstrs(mbb, tm), Topo(SUnits) {} + SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm, + const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT) + : ScheduleDAGInstrs(mbb, tm, MLI, MDT), Topo(SUnits) {} void Schedule(); @@ -88,11 +100,14 @@ namespace { bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { DOUT << "PostRAScheduler\n"; + const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe; ++MBB) { - SchedulePostRATDList Scheduler(MBB, Fn.getTarget()); + SchedulePostRATDList Scheduler(MBB, Fn.getTarget(), MLI, MDT); Scheduler.Run(); @@ -142,6 +157,28 @@ getInstrOperandRegClass(const TargetRegisterInfo *TRI, return TRI->getRegClass(II.OpInfo[Op].RegClass); } +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static SDep *CriticalPathStep(SUnit *SU) { + SDep *Next = 0; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + return Next; +} + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. /// @@ -150,34 +187,16 @@ bool SchedulePostRATDList::BreakAntiDependencies() { // so just duck out immediately if the block is empty. if (BB->empty()) return false; - Topo.InitDAGTopologicalSorting(); - - // Compute a critical path for the DAG. + // Find the node at the bottom of the critical path. SUnit *Max = 0; - std::vector<SDep *> CriticalPath(SUnits.size()); - for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(), - E = Topo.end(); I != E; ++I) { - SUnit *SU = &SUnits[*I]; - for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); - P != PE; ++P) { - SUnit *PredSU = P->getSUnit(); - // This assumes that there's no delay for reusing registers. - unsigned PredLatency = P->getLatency(); - unsigned PredTotalLatency = PredSU->CycleBound + PredLatency; - if (SU->CycleBound < PredTotalLatency || - (SU->CycleBound == PredTotalLatency && - P->getKind() == SDep::Anti)) { - SU->CycleBound = PredTotalLatency; - CriticalPath[*I] = &*P; - } - } - // Keep track of the node at the end of the critical path. - if (!Max || SU->CycleBound + SU->Latency > Max->CycleBound + Max->Latency) + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + SUnit *SU = &SUnits[i]; + if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } DOUT << "Critical path has total latency " - << (Max ? Max->CycleBound + Max->Latency : 0) << "\n"; + << (Max ? Max->getDepth() + Max->Latency : 0) << "\n"; // Walk the critical path from the bottom up. Collect all anti-dependence // edges on the critical path. Skip anti-dependencies between SUnits that @@ -195,9 +214,9 @@ bool SchedulePostRATDList::BreakAntiDependencies() { // the anti-dependencies in an instruction in order to be effective. BitVector AllocatableSet = TRI->getAllocatableSet(*MF); DenseMap<MachineInstr *, unsigned> CriticalAntiDeps; - for (SUnit *SU = Max; CriticalPath[SU->NodeNum]; - SU = CriticalPath[SU->NodeNum]->getSUnit()) { - SDep *Edge = CriticalPath[SU->NodeNum]; + SUnit *SU = Max; + for (SDep *Edge = CriticalPathStep(SU); Edge; + Edge = CriticalPathStep(SU = Edge->getSUnit())) { SUnit *NextSU = Edge->getSUnit(); // Only consider anti-dependence edges. if (Edge->getKind() != SDep::Anti) @@ -494,6 +513,11 @@ bool SchedulePostRATDList::BreakAntiDependencies() { Classes[SubregReg] = 0; RegRefs.erase(SubregReg); } + for (const unsigned *Super = TRI->getSuperRegisters(Reg); + *Super; ++Super) { + unsigned SuperReg = *Super; + Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); + } } for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -556,8 +580,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { // Compute how many cycles it will be before this actually becomes // available. This is the max of the start time of all predecessors plus // their latencies. - unsigned PredDoneCycle = SU->Cycle + SuccEdge->getLatency(); - SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle); + SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); if (SuccSU->NumPredsLeft == 0) { PendingQueue.push_back(SuccSU); @@ -572,7 +595,8 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DEBUG(SU->dump(this)); Sequence.push_back(SU); - SU->Cycle = CurCycle; + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); // Top down: release successors. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); @@ -603,21 +627,21 @@ void SchedulePostRATDList::ListScheduleTopDown() { while (!AvailableQueue.empty() || !PendingQueue.empty()) { // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. + unsigned MinDepth = ~0u; for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { - if (PendingQueue[i]->CycleBound == CurCycle) { + if (PendingQueue[i]->getDepth() <= CurCycle) { AvailableQueue.push(PendingQueue[i]); PendingQueue[i]->isAvailable = true; PendingQueue[i] = PendingQueue.back(); PendingQueue.pop_back(); --i; --e; - } else { - assert(PendingQueue[i]->CycleBound > CurCycle && "Non-positive latency?"); - } + } else if (PendingQueue[i]->getDepth() < MinDepth) + MinDepth = PendingQueue[i]->getDepth(); } // If there are no instructions available, don't try to issue anything. if (AvailableQueue.empty()) { - ++CurCycle; + CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; continue; } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 7ceab3a87b..dad5eb5d9b 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -33,115 +33,6 @@ ScheduleDAG::ScheduleDAG(SelectionDAG *dag, MachineBasicBlock *bb, ScheduleDAG::~ScheduleDAG() {} -/// CalculateDepths - compute depths using algorithms for the longest -/// paths in the DAG -void ScheduleDAG::CalculateDepths() { - unsigned DAGSize = SUnits.size(); - std::vector<SUnit*> WorkList; - WorkList.reserve(DAGSize); - - // Initialize the data structures - for (unsigned i = 0, e = DAGSize; i != e; ++i) { - SUnit *SU = &SUnits[i]; - unsigned Degree = SU->Preds.size(); - // Temporarily use the Depth field as scratch space for the degree count. - SU->Depth = Degree; - - // Is it a node without dependencies? - if (Degree == 0) { - assert(SU->Preds.empty() && "SUnit should have no predecessors"); - // Collect leaf nodes - WorkList.push_back(SU); - } - } - - // Process nodes in the topological order - while (!WorkList.empty()) { - SUnit *SU = WorkList.back(); - WorkList.pop_back(); - unsigned SUDepth = 0; - - // Use dynamic programming: - // When current node is being processed, all of its dependencies - // are already processed. - // So, just iterate over all predecessors and take the longest path - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - unsigned PredDepth = I->getSUnit()->Depth; - if (PredDepth+1 > SUDepth) { - SUDepth = PredDepth + 1; - } - } - - SU->Depth = SUDepth; - - // Update degrees of all nodes depending on current SUnit - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - SUnit *SU = I->getSUnit(); - if (!--SU->Depth) - // If all dependencies of the node are processed already, - // then the longest path for the node can be computed now - WorkList.push_back(SU); - } - } -} - -/// CalculateHeights - compute heights using algorithms for the longest -/// paths in the DAG -void ScheduleDAG::CalculateHeights() { - unsigned DAGSize = SUnits.size(); - std::vector<SUnit*> WorkList; - WorkList.reserve(DAGSize); - - // Initialize the data structures - for (unsigned i = 0, e = DAGSize; i != e; ++i) { - SUnit *SU = &SUnits[i]; - unsigned Degree = SU->Succs.size(); - // Temporarily use the Height field as scratch space for the degree count. - SU->Height = Degree; - - // Is it a node without dependencies? - if (Degree == 0) { - assert(SU->Succs.empty() && "Something wrong"); - assert(WorkList.empty() && "Should be empty"); - // Collect leaf nodes - WorkList.push_back(SU); - } - } - - // Process nodes in the topological order - while (!WorkList.empty()) { - SUnit *SU = WorkList.back(); - WorkList.pop_back(); - unsigned SUHeight = 0; - - // Use dynamic programming: - // When current node is being processed, all of its dependencies - // are already processed. - // So, just iterate over all successors and take the longest path - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - unsigned SuccHeight = I->getSUnit()->Height; - if (SuccHeight+1 > SUHeight) { - SUHeight = SuccHeight + 1; - } - } - - SU->Height = SUHeight; - - // Update degrees of all nodes depending on current SUnit - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - SUnit *SU = I->getSUnit(); - if (!--SU->Height) - // If all dependencies of the node are processed already, - // then the longest path for the node can be computed now - WorkList.push_back(SU); - } - } -} - /// dump - dump the schedule. void ScheduleDAG::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { @@ -171,13 +62,10 @@ void SUnit::addPred(const SDep &D) { for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i) if (Preds[i] == D) return; - // Add a pred to this SUnit. - Preds.push_back(D); // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); - N->Succs.push_back(P); // Update the bookkeeping. if (D.getKind() == SDep::Data) { ++NumPreds; @@ -187,6 +75,10 @@ void SUnit::addPred(const SDep &D) { ++NumPredsLeft; if (!isScheduled) ++N->NumSuccsLeft; + N->Succs.push_back(P); + Preds.push_back(D); + this->setDepthDirty(); + N->setHeightDirty(); } /// removePred - This removes the specified edge as a pred of the current @@ -220,10 +112,128 @@ void SUnit::removePred(const SDep &D) { --NumPredsLeft; if (!isScheduled) --N->NumSuccsLeft; + this->setDepthDirty(); + N->setHeightDirty(); return; } } +void SUnit::setDepthDirty() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + while (!WorkList.empty()) { + SUnit *SU = WorkList.back(); + WorkList.pop_back(); + if (!SU->isDepthCurrent) continue; + SU->isDepthCurrent = false; + for (SUnit::const_succ_iterator I = Succs.begin(), + E = Succs.end(); I != E; ++I) + WorkList.push_back(I->getSUnit()); + } +} + +void SUnit::setHeightDirty() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + while (!WorkList.empty()) { + SUnit *SU = WorkList.back(); + WorkList.pop_back(); + if (!SU->isHeightCurrent) continue; + SU->isHeightCurrent = false; + for (SUnit::const_pred_iterator I = Preds.begin(), + E = Preds.end(); I != E; ++I) + WorkList.push_back(I->getSUnit()); + } +} + +/// setDepthToAtLeast - Update this node's successors to reflect the +/// fact that this node's depth just increased. +/// +void SUnit::setDepthToAtLeast(unsigned NewDepth) { + if (NewDepth <= Depth) + return; + setDepthDirty(); + Depth = NewDepth; + isDepthCurrent = true; +} + +/// setHeightToAtLeast - Update this node's predecessors to reflect the +/// fact that this node's height just increased. +/// +void SUnit::setHeightToAtLeast(unsigned NewHeight) { + if (NewHeight <= Height) + return; + setHeightDirty(); + Height = NewHeight; + isHeightCurrent = true; +} + +/// ComputeDepth - Calculate the maximal path from the node to the exit. +/// +void SUnit::ComputeDepth() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + while (!WorkList.empty()) { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxPredDepth = 0; + for (SUnit::const_pred_iterator I = Cur->Preds.begin(), + E = Cur->Preds.end(); I != E; ++I) { + SUnit *PredSU = I->getSUnit(); + if (PredSU->isDepthCurrent) + MaxPredDepth = std::max(MaxPredDepth, + PredSU->Depth + I->getLatency()); + else { + Done = false; + WorkList.push_back(PredSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxPredDepth != Cur->Depth) { + Cur->setDepthDirty(); + Cur->Depth = MaxPredDepth; + } + Cur->isDepthCurrent = true; + } + } +} + +/// ComputeHeight - Calculate the maximal path from the node to the entry. +/// +void SUnit::ComputeHeight() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + while (!WorkList.empty()) { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxSuccHeight = 0; + for (SUnit::const_succ_iterator I = Cur->Succs.begin(), + E = Cur->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isHeightCurrent) + MaxSuccHeight = std::max(MaxSuccHeight, + SuccSU->Height + I->getLatency()); + else { + Done = false; + WorkList.push_back(SuccSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxSuccHeight != Cur->Height) { + Cur->setHeightDirty(); + Cur->Height = MaxSuccHeight; + } + Cur->isHeightCurrent = true; + } + } +} + /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. void SUnit::dump(const ScheduleDAG *G) const { @@ -299,11 +309,14 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { cerr << "has not been scheduled!\n"; AnyNotSched = true; } - if (SUnits[i].isScheduled && SUnits[i].Cycle > (unsigned)INT_MAX) { + if (SUnits[i].isScheduled && + (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) > + unsigned(INT_MAX)) { if (!AnyNotSched) cerr << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - cerr << "has an unexpected Cycle value!\n"; + cerr << "has an unexpected " + << (isBottomUp ? "Height" : "Depth") << " value!\n"; AnyNotSched = true; } if (isBottomUp) { diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index fa42d393a3..cfa639e940 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -13,19 +13,27 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sched-instrs" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallSet.h" #include <map> using namespace llvm; ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb, - const TargetMachine &tm) - : ScheduleDAG(0, bb, tm) {} + const TargetMachine &tm, + const MachineLoopInfo &mli, + const MachineDominatorTree &mdt) + : ScheduleDAG(0, bb, tm), MLI(mli), MDT(mdt) {} void ScheduleDAGInstrs::BuildSchedUnits() { SUnits.clear(); @@ -35,7 +43,7 @@ void ScheduleDAGInstrs::BuildSchedUnits() { // to top. // Remember where defs and uses of each physical register are as we procede. - SUnit *Defs[TargetRegisterInfo::FirstVirtualRegister] = {}; + std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister] = {}; std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister] = {}; // Remember where unknown loads are after the most recent unknown store @@ -57,13 +65,20 @@ void ScheduleDAGInstrs::BuildSchedUnits() { // all the work of the block is done before the terminator. SUnit *Terminator = 0; + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = ForceUnitLatencies(); + for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin(); MII != MIE; --MII) { MachineInstr *MI = prior(MII); + const TargetInstrDesc &TID = MI->getDesc(); SUnit *SU = NewSUnit(MI); // Assign the Latency field of SU using target-provided information. - ComputeLatency(SU); + if (UnitLatencies) + SU->Latency = 1; + else + ComputeLatency(SU); // Add register-based dependencies (data, anti, and output). for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { @@ -74,33 +89,51 @@ void ScheduleDAGInstrs::BuildSchedUnits() { assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); std::vector<SUnit *> &UseList = Uses[Reg]; - SUnit *&Def = Defs[Reg]; + std::vector<SUnit *> &DefList = Defs[Reg]; // Optionally add output and anti dependencies. // TODO: Using a latency of 1 here assumes there's no cost for // reusing registers. SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; - if (Def && Def != SU) - Def->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg)); + for (unsigned i = 0, e = DefList.size(); i != e; ++i) { + SUnit *DefSU = DefList[i]; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(Reg))) + DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg)); + } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - SUnit *&Def = Defs[*Alias]; - if (Def && Def != SU) - Def->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias)); + std::vector<SUnit *> &DefList = Defs[*Alias]; + for (unsigned i = 0, e = DefList.size(); i != e; ++i) { + SUnit *DefSU = DefList[i]; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(Reg))) + DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias)); + } } if (MO.isDef()) { // Add any data dependencies. - for (unsigned i = 0, e = UseList.size(); i != e; ++i) - if (UseList[i] != SU) - UseList[i]->addPred(SDep(SU, SDep::Data, SU->Latency, Reg)); + unsigned DataLatency = SU->Latency; + for (unsigned i = 0, e = UseList.size(); i != e; ++i) { + SUnit *UseSU = UseList[i]; + if (UseSU != SU) { + UseSU->addPred(SDep(SU, SDep::Data, DataLatency, Reg)); + } + } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { std::vector<SUnit *> &UseList = Uses[*Alias]; - for (unsigned i = 0, e = UseList.size(); i != e; ++i) - if (UseList[i] != SU) - UseList[i]->addPred(SDep(SU, SDep::Data, SU->Latency, *Alias)); + for (unsigned i = 0, e = UseList.size(); i != e; ++i) { + SUnit *UseSU = UseList[i]; + if (UseSU != SU) + UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias)); + } } UseList.clear(); - Def = SU; + if (!MO.isDead()) + DefList.clear(); + DefList.push_back(SU); } else { UseList.push_back(SU); } @@ -111,7 +144,6 @@ void ScheduleDAGInstrs::BuildSchedUnits() { // after stack slots are lowered to actual addresses. // TODO: Use an AliasAnalysis and do real alias-analysis queries, and // produce more precise dependence information. - const TargetInstrDesc &TID = MI->getDesc(); if (TID.isCall() || TID.isReturn() || TID.isBranch() || TID.hasUnmodeledSideEffects()) { new_chain: diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 6ac608f943..d917386967 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -99,6 +99,9 @@ private: SmallVector<SUnit*, 2>&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); void ListScheduleBottomUp(); + + /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool ForceUnitLatencies() const { return true; } }; } // end anonymous namespace @@ -153,7 +156,8 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { DOUT << "*** Scheduling [" << CurCycle << "]: "; DEBUG(SU->dump(this)); - SU->Cycle = CurCycle; + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); Sequence.push_back(SU); // Bottom up: release predecessors @@ -177,7 +181,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isAssignedRegDep()) { - if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(LiveRegDefs[I->getReg()] == SU && "Physical register dependency violated?"); @@ -247,9 +251,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { } if (TID.isCommutable()) NewSU->isCommutable = true; - // FIXME: Calculate height / depth and propagate the changes? - NewSU->Depth = SU->Depth; - NewSU->Height = SU->Height; // LoadNode may already exist. This can happen when there is another // load from the same location and producing the same type of value @@ -262,9 +263,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { } else { LoadSU = NewSUnit(LoadNode); LoadNode->setNodeId(LoadSU->NodeNum); - - LoadSU->Depth = SU->Depth; - LoadSU->Height = SU->Height; } SDep ChainPred; @@ -344,10 +342,8 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { // New SUnit has the exact same predecessors. for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) - if (!I->isArtificial()) { + if (!I->isArtificial()) AddPred(NewSU, *I); - NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1); - } // Only copy scheduled successors. Cut them from old node's successor // list and move them over. @@ -358,7 +354,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { continue; SUnit *SuccSU = I->getSUnit(); if (SuccSU->isScheduled) { - NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1); SDep D = *I; D.setSUnit(NewSU); AddPred(SuccSU, D); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index b01700e37c..d42c8d8867 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -120,10 +120,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { } #endif - // Compute the cycle when this SUnit actually becomes available. This - // is the max of the start time of all predecessors plus their latencies. - unsigned PredDoneCycle = SU->Cycle + SU->Latency; - SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle); + SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); if (SuccSU->NumPredsLeft == 0) { PendingQueue.push_back(SuccSU); @@ -138,7 +135,8 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DEBUG(SU->dump(this)); Sequence.push_back(SU); - SU->Cycle = CurCycle; + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); // Top down: release successors. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); @@ -171,14 +169,14 @@ void ScheduleDAGList::ListScheduleTopDown() { // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { - if (PendingQueue[i]->CycleBound == CurCycle) { + if (PendingQueue[i]->getDepth() == CurCycle) { AvailableQueue->push(PendingQueue[i]); PendingQueue[i]->isAvailable = true; PendingQueue[i] = PendingQueue.back(); PendingQueue.pop_back(); --i; --e; } else { - assert(PendingQueue[i]->CycleBound > CurCycle && "Negative latency?"); + assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 49a5ac1904..c3b6137ae3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -154,6 +154,10 @@ private: Topo.InitDAGTopologicalSorting(); return NewNode; } + + /// ForceUnitLatencies - Return true, since register-pressure-reducing + /// scheduling doesn't need actual latency information. + bool ForceUnitLatencies() const { return true; } }; } // end anonymous namespace @@ -171,8 +175,6 @@ void ScheduleDAGRRList::Schedule() { DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); - CalculateDepths(); - CalculateHeights(); Topo.InitDAGTopologicalSorting(); AvailableQueue->initNodes(SUnits); @@ -272,7 +274,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { DOUT << "*** Scheduling [" << CurCycle << "]: "; DEBUG(SU->dump(this)); - SU->Cycle = CurCycle; + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); Sequence.push_back(SU); // Bottom up: release predecessors @@ -296,7 +299,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isAssignedRegDep()) { - if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(LiveRegDefs[I->getReg()] == SU && "Physical register dependency violated?"); @@ -328,7 +331,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and /// its predecessor states to reflect the change. void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { - DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; + DOUT << "*** Unscheduling [" << SU->getHeight() << "]: "; DEBUG(SU->dump(this)); AvailableQueue->UnscheduledNode(SU); @@ -336,7 +339,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { CapturePred(&*I); - if (I->isAssignedRegDep() && SU->Cycle == LiveRegCycles[I->getReg()]) { + if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(LiveRegDefs[I->getReg()] == I->getSUnit() && "Physical register dependency violated?"); @@ -353,12 +356,12 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { LiveRegDefs[I->getReg()] = SU; ++NumLiveRegs; } - if (I->getSUnit()->Cycle < LiveRegCycles[I->getReg()]) - LiveRegCycles[I->getReg()] = I->getSUnit()->Cycle; + if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) + LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); } } - SU->Cycle = 0; + SU->setHeightDirty(); SU->isScheduled = false; SU->isAvailable = true; AvailableQueue->push(SU); @@ -443,9 +446,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } else { LoadSU = CreateNewSUnit(LoadNode); LoadNode->setNodeId(LoadSU->NodeNum); - - LoadSU->Depth = SU->Depth; - LoadSU->Height = SU->Height; ComputeLatency(LoadSU); } @@ -462,9 +462,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } if (TID.isCommutable()) NewSU->isCommutable = true; - // FIXME: Calculate height / depth and propagate the changes? - NewSU->Depth = SU->Depth; - NewSU->Height = SU->Height; ComputeLatency(NewSU); SDep ChainPred; @@ -548,10 +545,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { // New SUnit has the exact same predecessors. for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) - if (!I->isArtificial()) { + if (!I->isArtificial()) AddPred(NewSU, *I); - NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1); - } // Only copy scheduled successors. Cut them from old node's successor // list and move them over. @@ -562,7 +557,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { continue; SUnit *SuccSU = I->getSUnit(); if (SuccSU->isScheduled) { - NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1); SDep D = *I; D.setSUnit(NewSU); AddPred(SuccSU, D); @@ -570,9 +564,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { DelDeps.push_back(std::make_pair(SuccSU, D)); } } - for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); - } AvailableQueue->updateNode(SU); AvailableQueue->addNode(NewSU); @@ -590,8 +583,6 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, SUnit *CopyFromSU = CreateNewSUnit(NULL); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - CopyFromSU->Depth = SU->Depth; - CopyFromSU->Height = SU->Height; SUnit *CopyToSU = CreateNewSUnit(NULL); CopyToSU->CopySrcRC = DestRC; @@ -870,7 +861,8 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DOUT << "*** Scheduling [" << CurCycle << "]: "; DEBUG(SU->dump(this)); - SU->Cycle = CurCycle; + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); Sequence.push_back(SU); // Top down: release successors @@ -1107,19 +1099,19 @@ namespace { /// closestSucc - Returns the scheduled cycle of the successor which is /// closet to the current cycle. static unsigned closestSucc(const SUnit *SU) { - unsigned MaxCycle = 0; + unsigned MaxHeight = 0; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - unsigned Cycle = I->getSUnit()->Cycle; + unsigned Height = I->getSUnit()->getHeight(); // If there are bunch of CopyToRegs stacked up, they should be considered // to be at the same position. if (I->getSUnit()->getNode() && I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) - Cycle = closestSucc(I->getSUnit())+1; - if (Cycle > MaxCycle) - MaxCycle = Cycle; + Height = closestSucc(I->getSUnit())+1; + if (Height > MaxHeight) + MaxHeight = Height; } - return MaxCycle; + return MaxHeight; } /// calcMaxScratches - Returns an cost estimate of the worse case requirement @@ -1182,11 +1174,11 @@ bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { if (LScratch != RScratch) return LScratch > RScratch; - if (left->Height != right->Height) - return left->Height > right->Height; + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); - if (left->Depth != right->Depth) - return left->Depth < right->Depth; + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); @@ -1294,7 +1286,8 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { continue; // Be conservative. Ignore if nodes aren't at roughly the same // depth and height. - if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1) + if (SuccSU->getHeight() < SU->getHeight() && + (SU->getHeight() - SuccSU->getHeight()) > 1) continue; if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) continue; @@ -1384,8 +1377,8 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { if (LPriority+LBonus != RPriority+RBonus) return LPriority+LBonus < RPriority+RBonus; - if (left->Depth != right->Depth) - return left->Depth < right->Depth; + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); if (left->NumSuccsLeft != right->NumSuccsLeft) return left->NumSuccsLeft > right->NumSuccsLeft; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 69d3045bf2..21df142ff3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -80,6 +80,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { E = DAG->allnodes_end(); NI != E; ++NI) NI->setNodeId(-1); + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = ForceUnitLatencies(); + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), E = DAG->allnodes_end(); NI != E; ++NI) { if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. @@ -133,7 +136,10 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { N->setNodeId(NodeSUnit->NodeNum); // Assign the Latency field of NodeSUnit using target-provided information. - ComputeLatency(NodeSUnit); + if (UnitLatencies) + NodeSUnit->Latency = 1; + else + ComputeLatency(NodeSUnit); } // Pass 2: add the preds, succs, etc. diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll index 6af7219345..152c12215d 100644 --- a/test/CodeGen/X86/fold-pcmpeqd-0.ll +++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll @@ -1,10 +1,8 @@ -; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep pcmpeqd +; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep pcmpeqd | count 1 ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1 -; On x86-64, this testcase shouldn't need to spill the -1 value, +; This testcase shouldn't need to spill the -1 value, ; so it should just use pcmpeqd to materialize an all-ones vector. -; On x86-32, there aren't enough registers, so an all-ones -; constant pool should be created so it can be folded. %struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }> %struct._cl_image_format_t = type <{ i32, i32, i32 }> |