12 files changed, 357 insertions, 299 deletions
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index f7eb3a62a2..71fae2aeab 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -34,10 +34,6 @@ namespace llvm {
     // SUnits - The SUnits for the current graph.
     std::vector<SUnit> *SUnits;
     
-    // Latencies - The latency (max of latency from this node to the bb exit)
-    // for each node.
-    std::vector<int> Latencies;
-
     /// NumNodesSolelyBlocking - This vector contains, for every node in the
     /// Queue, the number of nodes that the node is the sole unscheduled
     /// predecessor for.  This is used as a tie-breaker heuristic for better
@@ -51,29 +47,23 @@ public:
     
     void initNodes(std::vector<SUnit> &sunits) {
       SUnits = &sunits;
-      // Calculate node priorities.
-      CalculatePriorities();
+      NumNodesSolelyBlocking.resize(SUnits->size(), 0);
     }
 
     void addNode(const SUnit *SU) {
-      Latencies.resize(SUnits->size(), -1);
       NumNodesSolelyBlocking.resize(SUnits->size(), 0);
-      CalcLatency(*SU);
     }
 
     void updateNode(const SUnit *SU) {
-      Latencies[SU->NodeNum] = -1;
-      CalcLatency(*SU);
     }
 
     void releaseState() {
       SUnits = 0;
-      Latencies.clear();
     }
     
     unsigned getLatency(unsigned NodeNum) const {
-      assert(NodeNum < Latencies.size());
-      return Latencies[NodeNum];
+      assert(NodeNum < (*SUnits).size());
+      return (*SUnits)[NodeNum].getHeight();
     }
     
     unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
@@ -114,8 +104,6 @@ public:
     void ScheduledNode(SUnit *Node);
 
 private:
-    void CalculatePriorities();
-    void CalcLatency(const SUnit &SU);
     void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
     SUnit *getSingleUnscheduledPred(SUnit *SU);
   };
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 2f6fd00928..ee640e24a1 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -242,10 +242,12 @@ namespace llvm {
     bool isPending        : 1;          // True once pending.
     bool isAvailable      : 1;          // True once available.
     bool isScheduled      : 1;          // True once scheduled.
-    unsigned CycleBound;                // Upper/lower cycle to be scheduled at.
-    unsigned Cycle;                     // Once scheduled, the cycle of the op.
-    unsigned Depth;                     // Node depth;
-    unsigned Height;                    // Node height;
+  private:
+    bool isDepthCurrent   : 1;          // True if Depth is current.
+    bool isHeightCurrent  : 1;          // True if Height is current.
+    unsigned Depth;                     // Node depth.
+    unsigned Height;                    // Node height.
+  public:
     const TargetRegisterClass *CopyDstRC; // Is a special copy node if not null.
     const TargetRegisterClass *CopySrcRC;
     
@@ -256,7 +258,8 @@ namespace llvm {
         Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0),
         isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
         isPending(false), isAvailable(false), isScheduled(false),
-        CycleBound(0), Cycle(~0u), Depth(0), Height(0),
+        isDepthCurrent(false), isHeightCurrent(false),
+        Depth(0), Height(0),
         CopyDstRC(NULL), CopySrcRC(NULL) {}
 
     /// SUnit - Construct an SUnit for post-regalloc scheduling to represent
@@ -266,7 +269,8 @@ namespace llvm {
         Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0),
         isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
         isPending(false), isAvailable(false), isScheduled(false),
-        CycleBound(0), Cycle(~0u), Depth(0), Height(0),
+        isDepthCurrent(false), isHeightCurrent(false),
+        Depth(0), Height(0),
         CopyDstRC(NULL), CopySrcRC(NULL) {}
 
     /// setNode - Assign the representative SDNode for this SUnit.
@@ -307,6 +311,41 @@ namespace llvm {
     /// the specified node.
     void removePred(const SDep &D);
 
+    /// getHeight - Return the height of this node, which is the length of the
+    /// maximum path down to any node with has no successors.
+    unsigned getDepth() const {
+      if (!isDepthCurrent) const_cast<SUnit *>(this)->ComputeDepth();
+      return Depth;
+    }
+
+    /// getHeight - Return the height of this node, which is the length of the
+    /// maximum path up to any node with has no predecessors.
+    unsigned getHeight() const {
+      if (!isHeightCurrent) const_cast<SUnit *>(this)->ComputeHeight();
+      return Height;
+    }
+
+    /// setDepthToAtLeast - If NewDepth is greater than this node's depth
+    /// value, set it to be the new depth value. This also recursively
+    /// marks successor nodes dirty.
+    void setDepthToAtLeast(unsigned NewDepth);
+
+    /// setDepthToAtLeast - If NewDepth is greater than this node's depth
+    /// value, set it to be the new height value. This also recursively
+    /// marks predecessor nodes dirty.
+    void setHeightToAtLeast(unsigned NewHeight);
+
+    /// setDepthDirty - Set a flag in this node to indicate that its
+    /// stored Depth value will require recomputation the next time
+    /// getDepth() is called.
+    void setDepthDirty();
+
+    /// setHeightDirty - Set a flag in this node to indicate that its
+    /// stored Height value will require recomputation the next time
+    /// getHeight() is called.
+    void setHeightDirty();
+
+    /// isPred - Test if node N is a predecessor of this node.
     bool isPred(SUnit *N) {
       for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i)
         if (Preds[i].getSUnit() == N)
@@ -314,6 +353,7 @@ namespace llvm {
       return false;
     }
     
+    /// isSucc - Test if node N is a successor of this node.
     bool isSucc(SUnit *N) {
       for (unsigned i = 0, e = (unsigned)Succs.size(); i != e; ++i)
         if (Succs[i].getSUnit() == N)
@@ -324,6 +364,10 @@ namespace llvm {
     void dump(const ScheduleDAG *G) const;
     void dumpAll(const ScheduleDAG *G) const;
     void print(raw_ostream &O, const ScheduleDAG *G) const;
+
+  private:
+    void ComputeDepth();
+    void ComputeHeight();
   };
 
   //===--------------------------------------------------------------------===//
@@ -397,12 +441,7 @@ namespace llvm {
 
     /// ComputeLatency - Compute node latency.
     ///
-    virtual void ComputeLatency(SUnit *SU) { SU->Latency = 1; }
-
-    /// CalculateDepths, CalculateHeights - Calculate node depth / height.
-    ///
-    void CalculateDepths();
-    void CalculateHeights();
+    virtual void ComputeLatency(SUnit *SU) = 0;
 
   protected:
     /// EmitNoop - Emit a noop instruction.
@@ -440,6 +479,11 @@ namespace llvm {
 
     void EmitCrossRCCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
 
+    /// ForceUnitLatencies - Return true if all scheduling edges should be given a
+    /// latency value of one.  The default is to return false; schedulers may
+    /// override this as needed.
+    virtual bool ForceUnitLatencies() const { return false; }
+
   private:
     /// EmitLiveInCopy - Emit a copy for a live in physical register. If the
     /// physical register has only a single copy use, then coalesced the copy
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 2b6c1d3a55..c74ef57674 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -18,10 +18,18 @@
 #include "llvm/CodeGen/ScheduleDAG.h"
 
 namespace llvm {
+  class MachineLoopInfo;
+  class MachineDominatorTree;
+
   class ScheduleDAGInstrs : public ScheduleDAG {
+    const MachineLoopInfo &MLI;
+    const MachineDominatorTree &MDT;
+
   public:
     ScheduleDAGInstrs(MachineBasicBlock *bb,
-                      const TargetMachine &tm);
+                      const TargetMachine &tm,
+                      const MachineLoopInfo &mli,
+                      const MachineDominatorTree &mdt);
 
     virtual ~ScheduleDAGInstrs() {}
 
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 2abbf364e3..7ad9ac9b51 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -41,47 +41,6 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
 }
 
 
-/// CalcNodePriority - Calculate the maximal path from the node to the exit.
-///
-void LatencyPriorityQueue::CalcLatency(const SUnit &SU) {
-  int &Latency = Latencies[SU.NodeNum];
-  if (Latency != -1)
-    return;
-
-  std::vector<const SUnit*> WorkList;
-  WorkList.push_back(&SU);
-  while (!WorkList.empty()) {
-    const SUnit *Cur = WorkList.back();
-    bool AllDone = true;
-    unsigned MaxSuccLatency = 0;
-    for (SUnit::const_succ_iterator I = Cur->Succs.begin(),E = Cur->Succs.end();
-         I != E; ++I) {
-      int SuccLatency = Latencies[I->getSUnit()->NodeNum];
-      if (SuccLatency == -1) {
-        AllDone = false;
-        WorkList.push_back(I->getSUnit());
-      } else {
-        unsigned NewLatency = SuccLatency + I->getLatency();
-        MaxSuccLatency = std::max(MaxSuccLatency, NewLatency);
-      }
-    }
-    if (AllDone) {
-      Latencies[Cur->NodeNum] = MaxSuccLatency;
-      WorkList.pop_back();
-    }
-  }
-}
-
-/// CalculatePriorities - Calculate priorities of all scheduling units.
-void LatencyPriorityQueue::CalculatePriorities() {
-  Latencies.assign(SUnits->size(), -1);
-  NumNodesSolelyBlocking.assign(SUnits->size(), 0);
-
-  // For each node, calculate the maximal path from the node to the exit.
-  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
-    CalcLatency((*SUnits)[i]);
-}
-
 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
 /// of SU, return it, otherwise return null.
 SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 2f7c0118a0..0fc6c3370b 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -23,7 +23,9 @@
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -49,6 +51,14 @@ namespace {
     static char ID;
     PostRAScheduler() : MachineFunctionPass(&ID) {}
 
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
     const char *getPassName() const {
       return "Post RA top-down list latency scheduler";
     }
@@ -72,8 +82,10 @@ namespace {
     ScheduleDAGTopologicalSort Topo;
 
   public:
-    SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm)
-      : ScheduleDAGInstrs(mbb, tm), Topo(SUnits) {}
+    SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm,
+                         const MachineLoopInfo &MLI,
+                         const MachineDominatorTree &MDT)
+      : ScheduleDAGInstrs(mbb, tm, MLI, MDT), Topo(SUnits) {}
 
     void Schedule();
 
@@ -88,11 +100,14 @@ namespace {
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   DOUT << "PostRAScheduler\n";
 
+  const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+  const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
   // Loop over all of the basic blocks
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
        MBB != MBBe; ++MBB) {
 
-    SchedulePostRATDList Scheduler(MBB, Fn.getTarget());
+    SchedulePostRATDList Scheduler(MBB, Fn.getTarget(), MLI, MDT);
 
     Scheduler.Run();
 
@@ -142,6 +157,28 @@ getInstrOperandRegClass(const TargetRegisterInfo *TRI,
   return TRI->getRegClass(II.OpInfo[Op].RegClass);
 }
 
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SDep *CriticalPathStep(SUnit *SU) {
+  SDep *Next = 0;
+  unsigned NextDepth = 0;
+  // Find the predecessor edge with the greatest depth.
+  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+       P != PE; ++P) {
+    SUnit *PredSU = P->getSUnit();
+    unsigned PredLatency = P->getLatency();
+    unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+    // In the case of a latency tie, prefer an anti-dependency edge over
+    // other types of edges.
+    if (NextDepth < PredTotalLatency ||
+        (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+      NextDepth = PredTotalLatency;
+      Next = &*P;
+    }
+  }
+  return Next;
+}
+
 /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
 /// of the ScheduleDAG and break them by renaming registers.
 ///
@@ -150,34 +187,16 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
   // so just duck out immediately if the block is empty.
   if (BB->empty()) return false;
 
-  Topo.InitDAGTopologicalSorting();
-
-  // Compute a critical path for the DAG.
+  // Find the node at the bottom of the critical path.
   SUnit *Max = 0;
-  std::vector<SDep *> CriticalPath(SUnits.size());
-  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
-       E = Topo.end(); I != E; ++I) {
-    SUnit *SU = &SUnits[*I];
-    for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
-         P != PE; ++P) {
-      SUnit *PredSU = P->getSUnit();
-      // This assumes that there's no delay for reusing registers.
-      unsigned PredLatency = P->getLatency();
-      unsigned PredTotalLatency = PredSU->CycleBound + PredLatency;
-      if (SU->CycleBound < PredTotalLatency ||
-          (SU->CycleBound == PredTotalLatency &&
-           P->getKind() == SDep::Anti)) {
-        SU->CycleBound = PredTotalLatency;
-        CriticalPath[*I] = &*P;
-      }
-    }
-    // Keep track of the node at the end of the critical path.
-    if (!Max || SU->CycleBound + SU->Latency > Max->CycleBound + Max->Latency)
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
       Max = SU;
   }
 
   DOUT << "Critical path has total latency "
-       << (Max ? Max->CycleBound + Max->Latency : 0) << "\n";
+       << (Max ? Max->getDepth() + Max->Latency : 0) << "\n";
 
   // Walk the critical path from the bottom up. Collect all anti-dependence
   // edges on the critical path. Skip anti-dependencies between SUnits that
@@ -195,9 +214,9 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
   // the anti-dependencies in an instruction in order to be effective.
   BitVector AllocatableSet = TRI->getAllocatableSet(*MF);
   DenseMap<MachineInstr *, unsigned> CriticalAntiDeps;
-  for (SUnit *SU = Max; CriticalPath[SU->NodeNum];
-       SU = CriticalPath[SU->NodeNum]->getSUnit()) {
-    SDep *Edge = CriticalPath[SU->NodeNum];
+  SUnit *SU = Max;
+  for (SDep *Edge = CriticalPathStep(SU); Edge;
+       Edge = CriticalPathStep(SU = Edge->getSUnit())) {
     SUnit *NextSU = Edge->getSUnit();
     // Only consider anti-dependence edges.
     if (Edge->getKind() != SDep::Anti)
@@ -494,6 +513,11 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
         Classes[SubregReg] = 0;
         RegRefs.erase(SubregReg);
       }
+      for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+           *Super; ++Super) {
+        unsigned SuperReg = *Super;
+        Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      }
     }
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
@@ -556,8 +580,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
   // Compute how many cycles it will be before this actually becomes
   // available.  This is the max of the start time of all predecessors plus
   // their latencies.
-  unsigned PredDoneCycle = SU->Cycle + SuccEdge->getLatency();
-  SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle);
+  SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
   
   if (SuccSU->NumPredsLeft == 0) {
     PendingQueue.push_back(SuccSU);
@@ -572,7 +595,8 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
-  SU->Cycle = CurCycle;
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
 
   // Top down: release successors.
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -603,21 +627,21 @@ void SchedulePostRATDList::ListScheduleTopDown() {
   while (!AvailableQueue.empty() || !PendingQueue.empty()) {
     // Check to see if any of the pending instructions are ready to issue.  If
     // so, add them to the available queue.
+    unsigned MinDepth = ~0u;
     for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-      if (PendingQueue[i]->CycleBound == CurCycle) {
+      if (PendingQueue[i]->getDepth() <= CurCycle) {
         AvailableQueue.push(PendingQueue[i]);
         PendingQueue[i]->isAvailable = true;
         PendingQueue[i] = PendingQueue.back();
         PendingQueue.pop_back();
         --i; --e;
-      } else {
-        assert(PendingQueue[i]->CycleBound > CurCycle && "Non-positive latency?");
-      }
+      } else if (PendingQueue[i]->getDepth() < MinDepth)
+        MinDepth = PendingQueue[i]->getDepth();
     }
     
     // If there are no instructions available, don't try to issue anything.
     if (AvailableQueue.empty()) {
-      ++CurCycle;
+      CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1;
       continue;
     }
 
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 7ceab3a87b..dad5eb5d9b 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -33,115 +33,6 @@ ScheduleDAG::ScheduleDAG(SelectionDAG *dag, MachineBasicBlock *bb,
 
 ScheduleDAG::~ScheduleDAG() {}
 
-/// CalculateDepths - compute depths using algorithms for the longest
-/// paths in the DAG
-void ScheduleDAG::CalculateDepths() {
-  unsigned DAGSize = SUnits.size();
-  std::vector<SUnit*> WorkList;
-  WorkList.reserve(DAGSize);
-
-  // Initialize the data structures
-  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
-    SUnit *SU = &SUnits[i];
-    unsigned Degree = SU->Preds.size();
-    // Temporarily use the Depth field as scratch space for the degree count.
-    SU->Depth = Degree;
-
-    // Is it a node without dependencies?
-    if (Degree == 0) {
-      assert(SU->Preds.empty() && "SUnit should have no predecessors");
-      // Collect leaf nodes
-      WorkList.push_back(SU);
-    }
-  }
-
-  // Process nodes in the topological order
-  while (!WorkList.empty()) {
-    SUnit *SU = WorkList.back();
-    WorkList.pop_back();
-    unsigned SUDepth = 0;
-
-    // Use dynamic programming:
-    // When current node is being processed, all of its dependencies
-    // are already processed.
-    // So, just iterate over all predecessors and take the longest path
-    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-         I != E; ++I) {
-      unsigned PredDepth = I->getSUnit()->Depth;
-      if (PredDepth+1 > SUDepth) {
-        SUDepth = PredDepth + 1;
-      }
-    }
-
-    SU->Depth = SUDepth;
-
-    // Update degrees of all nodes depending on current SUnit
-    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-         I != E; ++I) {
-      SUnit *SU = I->getSUnit();
-      if (!--SU->Depth)
-        // If all dependencies of the node are processed already,
-        // then the longest path for the node can be computed now
-        WorkList.push_back(SU);
-    }
-  }
-}
-
-/// CalculateHeights - compute heights using algorithms for the longest
-/// paths in the DAG
-void ScheduleDAG::CalculateHeights() {
-  unsigned DAGSize = SUnits.size();
-  std::vector<SUnit*> WorkList;
-  WorkList.reserve(DAGSize);
-
-  // Initialize the data structures
-  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
-    SUnit *SU = &SUnits[i];
-    unsigned Degree = SU->Succs.size();
-    // Temporarily use the Height field as scratch space for the degree count.
-    SU->Height = Degree;
-
-    // Is it a node without dependencies?
-    if (Degree == 0) {
-      assert(SU->Succs.empty() && "Something wrong");
-      assert(WorkList.empty() && "Should be empty");
-      // Collect leaf nodes
-      WorkList.push_back(SU);
-    }
-  }
-
-  // Process nodes in the topological order
-  while (!WorkList.empty()) {
-    SUnit *SU = WorkList.back();
-    WorkList.pop_back();
-    unsigned SUHeight = 0;
-
-    // Use dynamic programming:
-    // When current node is being processed, all of its dependencies
-    // are already processed.
-    // So, just iterate over all successors and take the longest path
-    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-         I != E; ++I) {
-      unsigned SuccHeight = I->getSUnit()->Height;
-      if (SuccHeight+1 > SUHeight) {
-        SUHeight = SuccHeight + 1;
-      }
-    }
-
-    SU->Height = SUHeight;
-
-    // Update degrees of all nodes depending on current SUnit
-    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-         I != E; ++I) {
-      SUnit *SU = I->getSUnit();
-      if (!--SU->Height)
-        // If all dependencies of the node are processed already,
-        // then the longest path for the node can be computed now
-        WorkList.push_back(SU);
-    }
-  }
-}
-
 /// dump - dump the schedule.
 void ScheduleDAG::dumpSchedule() const {
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -171,13 +62,10 @@ void SUnit::addPred(const SDep &D) {
   for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i)
     if (Preds[i] == D)
       return;
-  // Add a pred to this SUnit.
-  Preds.push_back(D);
   // Now add a corresponding succ to N.
   SDep P = D;
   P.setSUnit(this);
   SUnit *N = D.getSUnit();
-  N->Succs.push_back(P);
   // Update the bookkeeping.
   if (D.getKind() == SDep::Data) {
     ++NumPreds;
@@ -187,6 +75,10 @@ void SUnit::addPred(const SDep &D) {
     ++NumPredsLeft;
   if (!isScheduled)
     ++N->NumSuccsLeft;
+  N->Succs.push_back(P);
+  Preds.push_back(D);
+  this->setDepthDirty();
+  N->setHeightDirty();
 }
 
 /// removePred - This removes the specified edge as a pred of the current
@@ -220,10 +112,128 @@ void SUnit::removePred(const SDep &D) {
         --NumPredsLeft;
       if (!isScheduled)
         --N->NumSuccsLeft;
+      this->setDepthDirty();
+      N->setHeightDirty();
       return;
     }
 }
 
+void SUnit::setDepthDirty() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back();
+    WorkList.pop_back();
+    if (!SU->isDepthCurrent) continue;
+    SU->isDepthCurrent = false;
+    for (SUnit::const_succ_iterator I = Succs.begin(),
+         E = Succs.end(); I != E; ++I)
+      WorkList.push_back(I->getSUnit());
+  }
+}
+
+void SUnit::setHeightDirty() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back();
+    WorkList.pop_back();
+    if (!SU->isHeightCurrent) continue;
+    SU->isHeightCurrent = false;
+    for (SUnit::const_pred_iterator I = Preds.begin(),
+         E = Preds.end(); I != E; ++I)
+      WorkList.push_back(I->getSUnit());
+  }
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+  if (NewDepth <= Depth)
+    return;
+  setDepthDirty();
+  Depth = NewDepth;
+  isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+  if (NewHeight <= Height)
+    return;
+  setHeightDirty();
+  Height = NewHeight;
+  isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  while (!WorkList.empty()) {
+    SUnit *Cur = WorkList.back();
+
+    bool Done = true;
+    unsigned MaxPredDepth = 0;
+    for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+         E = Cur->Preds.end(); I != E; ++I) {
+      SUnit *PredSU = I->getSUnit();
+      if (PredSU->isDepthCurrent)
+        MaxPredDepth = std::max(MaxPredDepth,
+                                PredSU->Depth + I->getLatency());
+      else {
+        Done = false;
+        WorkList.push_back(PredSU);
+      }
+    }
+
+    if (Done) {
+      WorkList.pop_back();
+      if (MaxPredDepth != Cur->Depth) {
+        Cur->setDepthDirty();
+        Cur->Depth = MaxPredDepth;
+      }
+      Cur->isDepthCurrent = true;
+    }
+  }
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  while (!WorkList.empty()) {
+    SUnit *Cur = WorkList.back();
+
+    bool Done = true;
+    unsigned MaxSuccHeight = 0;
+    for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+         E = Cur->Succs.end(); I != E; ++I) {
+      SUnit *SuccSU = I->getSUnit();
+      if (SuccSU->isHeightCurrent)
+        MaxSuccHeight = std::max(MaxSuccHeight,
+                                 SuccSU->Height + I->getLatency());
+      else {
+        Done = false;
+        WorkList.push_back(SuccSU);
+      }
+    }
+
+    if (Done) {
+      WorkList.pop_back();
+      if (MaxSuccHeight != Cur->Height) {
+        Cur->setHeightDirty();
+        Cur->Height = MaxSuccHeight;
+      }
+      Cur->isHeightCurrent = true;
+    }
+  }
+}
+
 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
 /// a group of nodes flagged together.
 void SUnit::dump(const ScheduleDAG *G) const {
@@ -299,11 +309,14 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
       cerr << "has not been scheduled!\n";
       AnyNotSched = true;
     }
-    if (SUnits[i].isScheduled && SUnits[i].Cycle > (unsigned)INT_MAX) {
+    if (SUnits[i].isScheduled &&
+        (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) >
+          unsigned(INT_MAX)) {
       if (!AnyNotSched)
         cerr << "*** Scheduling failed! ***\n";
       SUnits[i].dump(this);
-      cerr << "has an unexpected Cycle value!\n";
+      cerr << "has an unexpected "
+           << (isBottomUp ? "Height" : "Depth") << " value!\n";
       AnyNotSched = true;
     }
     if (isBottomUp) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index fa42d393a3..cfa639e940 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,19 +13,27 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "sched-instrs"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
 #include <map>
 using namespace llvm;
 
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb,
-                                     const TargetMachine &tm)
-  : ScheduleDAG(0, bb, tm) {}
+                                     const TargetMachine &tm,
+                                     const MachineLoopInfo &mli,
+                                     const MachineDominatorTree &mdt)
+  : ScheduleDAG(0, bb, tm), MLI(mli), MDT(mdt) {}
 
 void ScheduleDAGInstrs::BuildSchedUnits() {
   SUnits.clear();
@@ -35,7 +43,7 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
   // to top.
 
   // Remember where defs and uses of each physical register are as we procede.
-  SUnit *Defs[TargetRegisterInfo::FirstVirtualRegister] = {};
+  std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister] = {};
   std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister] = {};
 
   // Remember where unknown loads are after the most recent unknown store
@@ -57,13 +65,20 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
   // all the work of the block is done before the terminator.
   SUnit *Terminator = 0;
 
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
   for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin();
        MII != MIE; --MII) {
     MachineInstr *MI = prior(MII);
+    const TargetInstrDesc &TID = MI->getDesc();
     SUnit *SU = NewSUnit(MI);
 
     // Assign the Latency field of SU using target-provided information.
-    ComputeLatency(SU);
+    if (UnitLatencies)
+      SU->Latency = 1;
+    else
+      ComputeLatency(SU);
 
     // Add register-based dependencies (data, anti, and output).
     for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
@@ -74,33 +89,51 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
 
       assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
       std::vector<SUnit *> &UseList = Uses[Reg];
-      SUnit *&Def = Defs[Reg];
+      std::vector<SUnit *> &DefList = Defs[Reg];
       // Optionally add output and anti dependencies.
       // TODO: Using a latency of 1 here assumes there's no cost for
       //       reusing registers.
       SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
-      if (Def && Def != SU)
-        Def->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
+      for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+        SUnit *DefSU = DefList[i];
+        if (DefSU != SU &&
+            (Kind != SDep::Output || !MO.isDead() ||
+             !DefSU->getInstr()->registerDefIsDead(Reg)))
+          DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
+      }
       for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-        SUnit *&Def = Defs[*Alias];
-        if (Def && Def != SU)
-          Def->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
+        std::vector<SUnit *> &DefList = Defs[*Alias];
+        for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+          SUnit *DefSU = DefList[i];
+          if (DefSU != SU &&
+              (Kind != SDep::Output || !MO.isDead() ||
+               !DefSU->getInstr()->registerDefIsDead(Reg)))
+            DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
+        }
       }
 
       if (MO.isDef()) {
         // Add any data dependencies.
-        for (unsigned i = 0, e = UseList.size(); i != e; ++i)
-          if (UseList[i] != SU)
-            UseList[i]->addPred(SDep(SU, SDep::Data, SU->Latency, Reg));
+        unsigned DataLatency = SU->Latency;
+        for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+          SUnit *UseSU = UseList[i];
+          if (UseSU != SU) {
+            UseSU->addPred(SDep(SU, SDep::Data, DataLatency, Reg));
+          }
+        }
         for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
           std::vector<SUnit *> &UseList = Uses[*Alias];
-          for (unsigned i = 0, e = UseList.size(); i != e; ++i)
-            if (UseList[i] != SU)
-              UseList[i]->addPred(SDep(SU, SDep::Data, SU->Latency, *Alias));
+          for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+            SUnit *UseSU = UseList[i];
+            if (UseSU != SU)
+              UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias));
+          }
         }
 
         UseList.clear();
-        Def = SU;
+        if (!MO.isDead())
+          DefList.clear();
+        DefList.push_back(SU);
       } else {
         UseList.push_back(SU);
       }
@@ -111,7 +144,6 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
     // after stack slots are lowered to actual addresses.
     // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
     // produce more precise dependence information.
-    const TargetInstrDesc &TID = MI->getDesc();
     if (TID.isCall() || TID.isReturn() || TID.isBranch() ||
         TID.hasUnmodeledSideEffects()) {
     new_chain:
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 6ac608f943..d917386967 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -99,6 +99,9 @@ private:
                                   SmallVector<SUnit*, 2>&);
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
   void ListScheduleBottomUp();
+
+  /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
+  bool ForceUnitLatencies() const { return true; }
 };
 }  // end anonymous namespace
 
@@ -153,7 +156,8 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
   DOUT << "*** Scheduling [" << CurCycle << "]: ";
   DEBUG(SU->dump(this));
 
-  SU->Cycle = CurCycle;
+  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+  SU->setHeightToAtLeast(CurCycle);
   Sequence.push_back(SU);
 
   // Bottom up: release predecessors
@@ -177,7 +181,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     if (I->isAssignedRegDep()) {
-      if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) {
+      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
         assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
         assert(LiveRegDefs[I->getReg()] == SU &&
                "Physical register dependency violated?");
@@ -247,9 +251,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     }
     if (TID.isCommutable())
       NewSU->isCommutable = true;
-    // FIXME: Calculate height / depth and propagate the changes?
-    NewSU->Depth = SU->Depth;
-    NewSU->Height = SU->Height;
 
     // LoadNode may already exist. This can happen when there is another
     // load from the same location and producing the same type of value
@@ -262,9 +263,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     } else {
       LoadSU = NewSUnit(LoadNode);
       LoadNode->setNodeId(LoadSU->NodeNum);
-
-      LoadSU->Depth = SU->Depth;
-      LoadSU->Height = SU->Height;
     }
 
     SDep ChainPred;
@@ -344,10 +342,8 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
   // New SUnit has the exact same predecessors.
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I)
-    if (!I->isArtificial()) {
+    if (!I->isArtificial())
       AddPred(NewSU, *I);
-      NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1);
-    }
 
   // Only copy scheduled successors. Cut them from old node's successor
   // list and move them over.
@@ -358,7 +354,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
       continue;
     SUnit *SuccSU = I->getSUnit();
     if (SuccSU->isScheduled) {
-      NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1);
       SDep D = *I;
       D.setSUnit(NewSU);
       AddPred(SuccSU, D);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index b01700e37c..d42c8d8867 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -120,10 +120,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
   }
 #endif
   
-  // Compute the cycle when this SUnit actually becomes available.  This
-  // is the max of the start time of all predecessors plus their latencies.
-  unsigned PredDoneCycle = SU->Cycle + SU->Latency;
-  SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle);
+  SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
   
   if (SuccSU->NumPredsLeft == 0) {
     PendingQueue.push_back(SuccSU);
@@ -138,7 +135,8 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
-  SU->Cycle = CurCycle;
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
 
   // Top down: release successors.
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -171,14 +169,14 @@ void ScheduleDAGList::ListScheduleTopDown() {
     // Check to see if any of the pending instructions are ready to issue.  If
     // so, add them to the available queue.
     for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-      if (PendingQueue[i]->CycleBound == CurCycle) {
+      if (PendingQueue[i]->getDepth() == CurCycle) {
         AvailableQueue->push(PendingQueue[i]);
         PendingQueue[i]->isAvailable = true;
         PendingQueue[i] = PendingQueue.back();
         PendingQueue.pop_back();
         --i; --e;
       } else {
-        assert(PendingQueue[i]->CycleBound > CurCycle && "Negative latency?");
+        assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
       }
     }
     
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 49a5ac1904..c3b6137ae3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -154,6 +154,10 @@ private:
       Topo.InitDAGTopologicalSorting();
     return NewNode;
   }
+
+  /// ForceUnitLatencies - Return true, since register-pressure-reducing
+  /// scheduling doesn't need actual latency information.
+  bool ForceUnitLatencies() const { return true; }
 };
 }  // end anonymous namespace
 
@@ -171,8 +175,6 @@ void ScheduleDAGRRList::Schedule() {
 
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
-  CalculateDepths();
-  CalculateHeights();
   Topo.InitDAGTopologicalSorting();
 
   AvailableQueue->initNodes(SUnits);
@@ -272,7 +274,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
   DOUT << "*** Scheduling [" << CurCycle << "]: ";
   DEBUG(SU->dump(this));
 
-  SU->Cycle = CurCycle;
+  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+  SU->setHeightToAtLeast(CurCycle);
   Sequence.push_back(SU);
 
   // Bottom up: release predecessors
@@ -296,7 +299,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     if (I->isAssignedRegDep()) {
-      if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) {
+      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
         assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
         assert(LiveRegDefs[I->getReg()] == SU &&
                "Physical register dependency violated?");
@@ -328,7 +331,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
 /// its predecessor states to reflect the change.
 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
-  DOUT << "*** Unscheduling [" << SU->Cycle << "]: ";
+  DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
   DEBUG(SU->dump(this));
 
   AvailableQueue->UnscheduledNode(SU);
@@ -336,7 +339,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     CapturePred(&*I);
-    if (I->isAssignedRegDep() && SU->Cycle == LiveRegCycles[I->getReg()]) {
+    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
       assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
       assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
              "Physical register dependency violated?");
@@ -353,12 +356,12 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
         LiveRegDefs[I->getReg()] = SU;
         ++NumLiveRegs;
       }
-      if (I->getSUnit()->Cycle < LiveRegCycles[I->getReg()])
-        LiveRegCycles[I->getReg()] = I->getSUnit()->Cycle;
+      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
+        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
     }
   }
 
-  SU->Cycle = 0;
+  SU->setHeightDirty();
   SU->isScheduled = false;
   SU->isAvailable = true;
   AvailableQueue->push(SU);
@@ -443,9 +446,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     } else {
       LoadSU = CreateNewSUnit(LoadNode);
       LoadNode->setNodeId(LoadSU->NodeNum);
-
-      LoadSU->Depth = SU->Depth;
-      LoadSU->Height = SU->Height;
       ComputeLatency(LoadSU);
     }
 
@@ -462,9 +462,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     }
     if (TID.isCommutable())
       NewSU->isCommutable = true;
-    // FIXME: Calculate height / depth and propagate the changes?
-    NewSU->Depth = SU->Depth;
-    NewSU->Height = SU->Height;
     ComputeLatency(NewSU);
 
     SDep ChainPred;
@@ -548,10 +545,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
   // New SUnit has the exact same predecessors.
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I)
-    if (!I->isArtificial()) {
+    if (!I->isArtificial())
       AddPred(NewSU, *I);
-      NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1);
-    }
 
   // Only copy scheduled successors. Cut them from old node's successor
   // list and move them over.
@@ -562,7 +557,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
       continue;
     SUnit *SuccSU = I->getSUnit();
     if (SuccSU->isScheduled) {
-      NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1);
       SDep D = *I;
       D.setSUnit(NewSU);
       AddPred(SuccSU, D);
@@ -570,9 +564,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
       DelDeps.push_back(std::make_pair(SuccSU, D));
     }
   }
-  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
     RemovePred(DelDeps[i].first, DelDeps[i].second);
-  }
 
   AvailableQueue->updateNode(SU);
   AvailableQueue->addNode(NewSU);
@@ -590,8 +583,6 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
   SUnit *CopyFromSU = CreateNewSUnit(NULL);
   CopyFromSU->CopySrcRC = SrcRC;
   CopyFromSU->CopyDstRC = DestRC;
-  CopyFromSU->Depth = SU->Depth;
-  CopyFromSU->Height = SU->Height;
 
   SUnit *CopyToSU = CreateNewSUnit(NULL);
   CopyToSU->CopySrcRC = DestRC;
@@ -870,7 +861,8 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   DOUT << "*** Scheduling [" << CurCycle << "]: ";
   DEBUG(SU->dump(this));
 
-  SU->Cycle = CurCycle;
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
   Sequence.push_back(SU);
 
   // Top down: release successors
@@ -1107,19 +1099,19 @@ namespace {
 /// closestSucc - Returns the scheduled cycle of the successor which is
 /// closet to the current cycle.
 static unsigned closestSucc(const SUnit *SU) {
-  unsigned MaxCycle = 0;
+  unsigned MaxHeight = 0;
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    unsigned Cycle = I->getSUnit()->Cycle;
+    unsigned Height = I->getSUnit()->getHeight();
     // If there are bunch of CopyToRegs stacked up, they should be considered
     // to be at the same position.
     if (I->getSUnit()->getNode() &&
         I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
-      Cycle = closestSucc(I->getSUnit())+1;
-    if (Cycle > MaxCycle)
-      MaxCycle = Cycle;
+      Height = closestSucc(I->getSUnit())+1;
+    if (Height > MaxHeight)
+      MaxHeight = Height;
   }
-  return MaxCycle;
+  return MaxHeight;
 }
 
 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
@@ -1182,11 +1174,11 @@ bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
   if (LScratch != RScratch)
     return LScratch > RScratch;
 
-  if (left->Height != right->Height)
-    return left->Height > right->Height;
+  if (left->getHeight() != right->getHeight())
+    return left->getHeight() > right->getHeight();
   
-  if (left->Depth != right->Depth)
-    return left->Depth < right->Depth;
+  if (left->getDepth() != right->getDepth())
+    return left->getDepth() < right->getDepth();
 
   assert(left->NodeQueueId && right->NodeQueueId && 
          "NodeQueueId cannot be zero");
@@ -1294,7 +1286,8 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
           continue;
         // Be conservative. Ignore if nodes aren't at roughly the same
         // depth and height.
-        if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1)
+        if (SuccSU->getHeight() < SU->getHeight() &&
+            (SU->getHeight() - SuccSU->getHeight()) > 1)
           continue;
         if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
           continue;
@@ -1384,8 +1377,8 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
   if (LPriority+LBonus != RPriority+RBonus)
     return LPriority+LBonus < RPriority+RBonus;
 
-  if (left->Depth != right->Depth)
-    return left->Depth < right->Depth;
+  if (left->getDepth() != right->getDepth())
+    return left->getDepth() < right->getDepth();
 
   if (left->NumSuccsLeft != right->NumSuccsLeft)
     return left->NumSuccsLeft > right->NumSuccsLeft;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 69d3045bf2..21df142ff3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -80,6 +80,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
        E = DAG->allnodes_end(); NI != E; ++NI)
     NI->setNodeId(-1);
 
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
   for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
        E = DAG->allnodes_end(); NI != E; ++NI) {
     if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
@@ -133,7 +136,10 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
     N->setNodeId(NodeSUnit->NodeNum);
 
     // Assign the Latency field of NodeSUnit using target-provided information.
-    ComputeLatency(NodeSUnit);
+    if (UnitLatencies)
+      NodeSUnit->Latency = 1;
+    else
+      ComputeLatency(NodeSUnit);
   }
   
   // Pass 2: add the preds, succs, etc.
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index 6af7219345..152c12215d 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin   | not grep pcmpeqd
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin   | grep pcmpeqd | count 1
 ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
 
-; On x86-64, this testcase shouldn't need to spill the -1 value,
+; This testcase shouldn't need to spill the -1 value,
 ; so it should just use pcmpeqd to materialize an all-ones vector.
-; On x86-32, there aren't enough registers, so an all-ones
-; constant pool should be created so it can be folded.
 
 	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
 	%struct._cl_image_format_t = type <{ i32, i32, i32 }>