From b86a0cdb674549d8493043331cecd9cbf53b80da Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Sat, 15 Jun 2013 04:49:57 +0000
Subject: Machine Model: Add MicroOpBufferSize and resource BufferSize.

Replace the ill-defined MinLatency and ILPWindow properties with
with straightforward buffer sizes:
MCSchedMode::MicroOpBufferSize
MCProcResourceDesc::BufferSize

These can be used to more precisely model instruction execution if desired.

Disabled some misched tests temporarily. They'll be reenabled in a few commits.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184032 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/ScheduleDAG.h       | 23 +----------
 include/llvm/CodeGen/ScheduleDAGInstrs.h |  2 +-
 include/llvm/CodeGen/TargetSchedule.h    | 28 ++++++--------
 include/llvm/MC/MCInstrItineraries.h     | 10 +----
 include/llvm/MC/MCSchedule.h             | 66 +++++++++++++-------------------
 include/llvm/Target/TargetInstrInfo.h    |  8 ++--
 include/llvm/Target/TargetSchedule.td    |  8 ++--
 7 files changed, 51 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 7cff27e172..6c62b52fa5 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -90,11 +90,6 @@ namespace llvm {
     /// the value of the Latency field of the predecessor, however advanced
     /// models may provide additional information about specific edges.
     unsigned Latency;
-    /// Record MinLatency seperately from "expected" Latency.
-    ///
-    /// FIXME: this field is not packed on LP64. Convert to 16-bit DAG edge
-    /// latency after introducing saturating truncation.
-    unsigned MinLatency;
 
   public:
     /// SDep - Construct a null SDep. This is only for use by container
@@ -120,10 +115,9 @@ namespace llvm {
         Latency = 1;
         break;
       }
-      MinLatency = Latency;
     }
     SDep(SUnit *S, OrderKind kind)
-      : Dep(S, Order), Contents(), Latency(0), MinLatency(0) {
+      : Dep(S, Order), Contents(), Latency(0) {
       Contents.OrdKind = kind;
     }
 
@@ -142,8 +136,7 @@ namespace llvm {
     }
 
     bool operator==(const SDep &Other) const {
-      return overlaps(Other)
-        && Latency == Other.Latency && MinLatency == Other.MinLatency;
+      return overlaps(Other) && Latency == Other.Latency;
     }
 
     bool operator!=(const SDep &Other) const {
@@ -163,18 +156,6 @@ namespace llvm {
       Latency = Lat;
     }
 
-    /// getMinLatency - Return the minimum latency for this edge. Minimum
-    /// latency is used for scheduling groups, while normal (expected) latency
-    /// is for instruction cost and critical path.
-    unsigned getMinLatency() const {
-      return MinLatency;
-    }
-
-    /// setMinLatency - Set the minimum latency for this edge.
-    void setMinLatency(unsigned Lat) {
-      MinLatency = Lat;
-    }
-
     //// getSUnit - Return the SUnit to which this edge points.
     SUnit *getSUnit() const {
       return Dep.getPointer();
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 990cac6348..9ab1013bf1 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -158,7 +158,7 @@ namespace llvm {
 
     /// \brief Resolve and cache a resolved scheduling class for an SUnit.
     const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
-      if (!SU->SchedClass)
+      if (!SU->SchedClass && SchedModel.hasInstrSchedModel())
         SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr());
       return SU->SchedClass;
     }
diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index 3e22252eea..f2adcf8875 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -84,9 +84,6 @@ public:
   /// \brief Maximum number of micro-ops that may be scheduled per cycle.
   unsigned getIssueWidth() const { return SchedModel.IssueWidth; }
 
-  /// \brief Number of cycles the OOO processor is expected to hide.
-  unsigned getILPWindow() const { return SchedModel.ILPWindow; }
-
   /// \brief Return the number of issue slots required for this MI.
   unsigned getNumMicroOps(const MachineInstr *MI,
                           const MCSchedClassDesc *SC = 0) const;
@@ -131,18 +128,23 @@ public:
     return ResourceLCM;
   }
 
+  /// \brief Number of micro-ops that may be buffered for OOO execution.
+  unsigned getMicroOpBufferSize() const { return SchedModel.MicroOpBufferSize; }
+
+  /// \brief Number of resource units that may be buffered for OOO execution.
+  /// \return The buffer size in resource units or -1 for unlimited.
+  int getResourceBufferSize(unsigned PIdx) const {
+    return SchedModel.getProcResource(PIdx)->BufferSize;
+  }
+
   /// \brief Compute operand latency based on the available machine model.
   ///
-  /// Computes and return the latency of the given data dependent def and use
+  /// Compute and return the latency of the given data dependent def and use
   /// when the operand indices are already known. UseMI may be NULL for an
   /// unknown user.
-  ///
-  /// FindMin may be set to get the minimum vs. expected latency. Minimum
-  /// latency is used for scheduling groups, while expected latency is for
-  /// instruction cost and critical path.
   unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
-                                 const MachineInstr *UseMI, unsigned UseOperIdx,
-                                 bool FindMin) const;
+                                 const MachineInstr *UseMI, unsigned UseOperIdx)
+    const;
 
   /// \brief Compute the instruction latency based on the available machine
   /// model.
@@ -157,12 +159,6 @@ public:
   /// This is typically one cycle.
   unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx,
                                 const MachineInstr *DepMI) const;
-
-private:
-  /// getDefLatency is a helper for computeOperandLatency. Return the
-  /// instruction's latency if operand lookup is not required.
-  /// Otherwise return -1.
-  int getDefLatency(const MachineInstr *DefMI, bool FindMin) const;
 };
 
 } // namespace llvm
diff --git a/include/llvm/MC/MCInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h
index 65d1559ac6..c4f9e1c32a 100644
--- a/include/llvm/MC/MCInstrItineraries.h
+++ b/include/llvm/MC/MCInstrItineraries.h
@@ -157,17 +157,12 @@ public:
   /// class.  The latency is the maximum completion time for any stage
   /// in the itinerary.
   ///
-  /// InstrStages override the itinerary's MinLatency property. In fact, if the
-  /// stage latencies, which may be zero, are less than MinLatency,
-  /// getStageLatency returns a value less than MinLatency.
-  ///
-  /// If no stages exist, MinLatency is used. If MinLatency is invalid (<0),
-  /// then it defaults to one cycle.
+  /// If no stages exist, it defaults to one cycle.
   unsigned getStageLatency(unsigned ItinClassIndx) const {
     // If the target doesn't provide itinerary information, use a simple
     // non-zero default value for all instructions.
     if (isEmpty())
-      return SchedModel->MinLatency < 0 ? 1 : SchedModel->MinLatency;
+      return 1;
 
     // Calculate the maximum completion time for any stage.
     unsigned Latency = 0, StartCycle = 0;
@@ -176,7 +171,6 @@ public:
       Latency = std::max(Latency, StartCycle + IS->getCycles());
       StartCycle += IS->getNextCycles();
     }
-
     return Latency;
   }
 
diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h
index defa299035..673cdf6fb6 100644
--- a/include/llvm/MC/MCSchedule.h
+++ b/include/llvm/MC/MCSchedule.h
@@ -30,15 +30,18 @@ struct MCProcResourceDesc {
   unsigned NumUnits; // Number of resource of this kind
   unsigned SuperIdx; // Index of the resources kind that contains this kind.
 
-  // Buffered resources may be consumed at some indeterminate cycle after
-  // dispatch (e.g. for instructions that may issue out-of-order). Unbuffered
-  // resources always consume their resource some fixed number of cycles after
-  // dispatch (e.g. for instruction interlocking that may stall the pipeline).
-  bool IsBuffered;
+  // Number of resources that may be buffered.
+  //
+  // Buffered resources (BufferSize > 0 || BufferSize == -1) may be consumed at
+  // some indeterminate cycle after dispatch (e.g. for instructions that may
+  // issue out-of-order). Unbuffered resources (BufferSize == 0) always consume
+  // their resource some fixed number of cycles after dispatch (e.g. for
+  // instruction interlocking that may stall the pipeline).
+  int BufferSize;
 
   bool operator==(const MCProcResourceDesc &Other) const {
     return NumUnits == Other.NumUnits && SuperIdx == Other.SuperIdx
-      && IsBuffered == Other.IsBuffered;
+      && BufferSize == Other.BufferSize;
   }
 };
 
@@ -134,28 +137,22 @@ public:
   unsigned IssueWidth;
   static const unsigned DefaultIssueWidth = 1;
 
-  // MinLatency is the minimum latency between a register write
-  // followed by a data dependent read. This determines which
-  // instructions may be scheduled in the same per-cycle group. This
-  // is distinct from *expected* latency, which determines the likely
-  // critical path but does not guarantee a pipeline
-  // hazard. MinLatency can always be overridden by the number of
-  // InstrStage cycles.
+  // MicroOpBufferSize is the number of micro-ops that the processor may buffer
+  // for out-of-order execution.
   //
-  // (-1) Standard in-order processor.
-  //      Use InstrItinerary OperandCycles as MinLatency.
-  //      If no OperandCycles exist, then use the cycle of the last InstrStage.
+  // "0" means operations that are not ready in this cycle are not considered
+  // for scheduling (they go in the pending queue). Latency is paramount. This
+  // may be more efficient if many instructions are pending in a schedule.
   //
-  //  (0) Out-of-order processor, or in-order with bundled dependencies.
-  //      RAW dependencies may be dispatched in the same cycle.
-  //      Optional InstrItinerary OperandCycles provides expected latency.
+  // "1" means all instructions are considered for scheduling regardless of
+  // whether they are ready in this cycle. Latency still causes issue stalls,
+  // but we balance those stalls against other heuristics.
   //
-  // (>0) In-order processor with variable latencies.
-  //      Use the greater of this value or the cycle of the last InstrStage.
-  //      Optional InstrItinerary OperandCycles provides expected latency.
-  //      TODO: can't yet specify both min and expected latency per operand.
-  int MinLatency;
-  static const int DefaultMinLatency = -1;
+  // "> 1" means the processor is out-of-order. This is a machine independent
+  // estimate of highly machine specific characteristics such are the register
+  // renaming pool and reorder buffer.
+  unsigned MicroOpBufferSize;
+  static const unsigned DefaultMicroOpBufferSize = 0;
 
   // LoadLatency is the expected latency of load instructions.
   //
@@ -172,16 +169,6 @@ public:
   unsigned HighLatency;
   static const unsigned DefaultHighLatency = 10;
 
-  // ILPWindow is the number of cycles that the scheduler effectively ignores
-  // before attempting to hide latency. This should be zero for in-order cpus to
-  // always hide expected latency. For out-of-order cpus, it may be tweaked as
-  // desired to roughly approximate instruction buffers. The actual threshold is
-  // not very important for an OOO processor, as long as it isn't too high. A
-  // nonzero value helps avoid rescheduling to hide latency when its is fairly
-  // obviously useless and makes register pressure heuristics more effective.
-  unsigned ILPWindow;
-  static const unsigned DefaultILPWindow = 0;
-
   // MispredictPenalty is the typical number of extra cycles the processor
   // takes to recover from a branch misprediction.
   unsigned MispredictPenalty;
@@ -203,10 +190,9 @@ public:
   // initialized in this default ctor because some clients directly instantiate
   // MCSchedModel instead of using a generated itinerary.
   MCSchedModel(): IssueWidth(DefaultIssueWidth),
-                  MinLatency(DefaultMinLatency),
+                  MicroOpBufferSize(DefaultMicroOpBufferSize),
                   LoadLatency(DefaultLoadLatency),
                   HighLatency(DefaultHighLatency),
-                  ILPWindow(DefaultILPWindow),
                   MispredictPenalty(DefaultMispredictPenalty),
                   ProcID(0), ProcResourceTable(0), SchedClassTable(0),
                   NumProcResourceKinds(0), NumSchedClasses(0),
@@ -216,12 +202,12 @@ public:
   }
 
   // Table-gen driven ctor.
-  MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned ilp,
+  MCSchedModel(unsigned iw, int mbs, unsigned ll, unsigned hl,
                unsigned mp, unsigned pi, const MCProcResourceDesc *pr,
                const MCSchedClassDesc *sc, unsigned npr, unsigned nsc,
                const InstrItinerary *ii):
-    IssueWidth(iw), MinLatency(ml), LoadLatency(ll), HighLatency(hl),
-    ILPWindow(ilp), MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr),
+    IssueWidth(iw), MicroOpBufferSize(mbs), LoadLatency(ll), HighLatency(hl),
+    MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr),
     SchedClassTable(sc), NumProcResourceKinds(npr), NumSchedClasses(nsc),
     InstrItineraries(ii) {}
 
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index d49ce1ce7f..69fda8c47c 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -817,12 +817,10 @@ public:
 
   /// computeOperandLatency - Compute and return the latency of the given data
   /// dependent def and use when the operand indices are already known.
-  ///
-  /// FindMin may be set to get the minimum vs. expected latency.
   unsigned computeOperandLatency(const InstrItineraryData *ItinData,
                                  const MachineInstr *DefMI, unsigned DefIdx,
-                                 const MachineInstr *UseMI, unsigned UseIdx,
-                                 bool FindMin = false) const;
+                                 const MachineInstr *UseMI, unsigned UseIdx)
+    const;
 
   /// getInstrLatency - Compute the instruction latency of a given instruction.
   /// If the instruction has higher cost when predicated, it's returned via
@@ -839,7 +837,7 @@ public:
                              const MachineInstr *DefMI) const;
 
   int computeDefOperandLatency(const InstrItineraryData *ItinData,
-                               const MachineInstr *DefMI, bool FindMin) const;
+                               const MachineInstr *DefMI) const;
 
   /// isHighLatencyDef - Return true if this opcode has high latency to its
   /// result.
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td
index 660d2c48b6..0ac2eed9d5 100644
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -72,11 +72,13 @@ def instregex;
 //
 // Target hooks allow subtargets to associate LoadLatency and
 // HighLatency with groups of opcodes.
+//
+// See MCSchedule.h for detailed comments.
 class SchedMachineModel {
   int IssueWidth = -1; // Max micro-ops that may be scheduled per cycle.
   int MinLatency = -1; // Determines which instrucions are allowed in a group.
                        // (-1) inorder (0) ooo, (1): inorder +var latencies.
-  int ILPWindow = -1;  // Cycles of latency likely hidden by hardware buffers.
+  int MicroOpBufferSize = -1; // Max micro-ops that can be buffered.
   int LoadLatency = -1; // Cycles for loads to access the cache.
   int HighLatency = -1; // Approximation of cycles for "high latency" ops.
   int MispredictPenalty = -1; // Extra cycles for a mispredicted branch.
@@ -106,7 +108,7 @@ class ProcResourceKind;
 // out-of-order engine that the compiler attempts to conserve.
 // Buffered resources may be held for multiple clock cycles, but the
 // scheduler does not pin them to a particular clock cycle relative to
-// instruction dispatch. Setting Buffered=0 changes this to an
+// instruction dispatch. Setting BufferSize=0 changes this to an
 // in-order resource. In this case, the scheduler counts down from the
 // cycle that the instruction issues in-order, forcing an interlock
 // with subsequent instructions that require the same resource until
@@ -119,7 +121,7 @@ class ProcResourceUnits<ProcResourceKind kind, int num> {
   ProcResourceKind Kind = kind;
   int NumUnits = num;
   ProcResourceKind Super = ?;
-  bit Buffered = 1;
+  int BufferSize = -1;
   SchedMachineModel SchedModel = ?;
 }
 
-- 
cgit v1.2.3