From b86a0cdb674549d8493043331cecd9cbf53b80da Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Sat, 15 Jun 2013 04:49:57 +0000 Subject: Machine Model: Add MicroOpBufferSize and resource BufferSize. Replace the ill-defined MinLatency and ILPWindow properties with with straightforward buffer sizes: MCSchedMode::MicroOpBufferSize MCProcResourceDesc::BufferSize These can be used to more precisely model instruction execution if desired. Disabled some misched tests temporarily. They'll be reenabled in a few commits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184032 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ScheduleDAG.h | 23 +---------- include/llvm/CodeGen/ScheduleDAGInstrs.h | 2 +- include/llvm/CodeGen/TargetSchedule.h | 28 ++++++-------- include/llvm/MC/MCInstrItineraries.h | 10 +---- include/llvm/MC/MCSchedule.h | 66 +++++++++++++------------------- include/llvm/Target/TargetInstrInfo.h | 8 ++-- include/llvm/Target/TargetSchedule.td | 8 ++-- 7 files changed, 51 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 7cff27e172..6c62b52fa5 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -90,11 +90,6 @@ namespace llvm { /// the value of the Latency field of the predecessor, however advanced /// models may provide additional information about specific edges. unsigned Latency; - /// Record MinLatency seperately from "expected" Latency. - /// - /// FIXME: this field is not packed on LP64. Convert to 16-bit DAG edge - /// latency after introducing saturating truncation. - unsigned MinLatency; public: /// SDep - Construct a null SDep. This is only for use by container @@ -120,10 +115,9 @@ namespace llvm { Latency = 1; break; } - MinLatency = Latency; } SDep(SUnit *S, OrderKind kind) - : Dep(S, Order), Contents(), Latency(0), MinLatency(0) { + : Dep(S, Order), Contents(), Latency(0) { Contents.OrdKind = kind; } @@ -142,8 +136,7 @@ namespace llvm { } bool operator==(const SDep &Other) const { - return overlaps(Other) - && Latency == Other.Latency && MinLatency == Other.MinLatency; + return overlaps(Other) && Latency == Other.Latency; } bool operator!=(const SDep &Other) const { @@ -163,18 +156,6 @@ namespace llvm { Latency = Lat; } - /// getMinLatency - Return the minimum latency for this edge. Minimum - /// latency is used for scheduling groups, while normal (expected) latency - /// is for instruction cost and critical path. - unsigned getMinLatency() const { - return MinLatency; - } - - /// setMinLatency - Set the minimum latency for this edge. - void setMinLatency(unsigned Lat) { - MinLatency = Lat; - } - //// getSUnit - Return the SUnit to which this edge points. SUnit *getSUnit() const { return Dep.getPointer(); diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 990cac6348..9ab1013bf1 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -158,7 +158,7 @@ namespace llvm { /// \brief Resolve and cache a resolved scheduling class for an SUnit. const MCSchedClassDesc *getSchedClass(SUnit *SU) const { - if (!SU->SchedClass) + if (!SU->SchedClass && SchedModel.hasInstrSchedModel()) SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr()); return SU->SchedClass; } diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h index 3e22252eea..f2adcf8875 100644 --- a/include/llvm/CodeGen/TargetSchedule.h +++ b/include/llvm/CodeGen/TargetSchedule.h @@ -84,9 +84,6 @@ public: /// \brief Maximum number of micro-ops that may be scheduled per cycle. unsigned getIssueWidth() const { return SchedModel.IssueWidth; } - /// \brief Number of cycles the OOO processor is expected to hide. - unsigned getILPWindow() const { return SchedModel.ILPWindow; } - /// \brief Return the number of issue slots required for this MI. unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC = 0) const; @@ -131,18 +128,23 @@ public: return ResourceLCM; } + /// \brief Number of micro-ops that may be buffered for OOO execution. + unsigned getMicroOpBufferSize() const { return SchedModel.MicroOpBufferSize; } + + /// \brief Number of resource units that may be buffered for OOO execution. + /// \return The buffer size in resource units or -1 for unlimited. + int getResourceBufferSize(unsigned PIdx) const { + return SchedModel.getProcResource(PIdx)->BufferSize; + } + /// \brief Compute operand latency based on the available machine model. /// - /// Computes and return the latency of the given data dependent def and use + /// Compute and return the latency of the given data dependent def and use /// when the operand indices are already known. UseMI may be NULL for an /// unknown user. - /// - /// FindMin may be set to get the minimum vs. expected latency. Minimum - /// latency is used for scheduling groups, while expected latency is for - /// instruction cost and critical path. unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, - const MachineInstr *UseMI, unsigned UseOperIdx, - bool FindMin) const; + const MachineInstr *UseMI, unsigned UseOperIdx) + const; /// \brief Compute the instruction latency based on the available machine /// model. @@ -157,12 +159,6 @@ public: /// This is typically one cycle. unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *DepMI) const; - -private: - /// getDefLatency is a helper for computeOperandLatency. Return the - /// instruction's latency if operand lookup is not required. - /// Otherwise return -1. - int getDefLatency(const MachineInstr *DefMI, bool FindMin) const; }; } // namespace llvm diff --git a/include/llvm/MC/MCInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h index 65d1559ac6..c4f9e1c32a 100644 --- a/include/llvm/MC/MCInstrItineraries.h +++ b/include/llvm/MC/MCInstrItineraries.h @@ -157,17 +157,12 @@ public: /// class. The latency is the maximum completion time for any stage /// in the itinerary. /// - /// InstrStages override the itinerary's MinLatency property. In fact, if the - /// stage latencies, which may be zero, are less than MinLatency, - /// getStageLatency returns a value less than MinLatency. - /// - /// If no stages exist, MinLatency is used. If MinLatency is invalid (<0), - /// then it defaults to one cycle. + /// If no stages exist, it defaults to one cycle. unsigned getStageLatency(unsigned ItinClassIndx) const { // If the target doesn't provide itinerary information, use a simple // non-zero default value for all instructions. if (isEmpty()) - return SchedModel->MinLatency < 0 ? 1 : SchedModel->MinLatency; + return 1; // Calculate the maximum completion time for any stage. unsigned Latency = 0, StartCycle = 0; @@ -176,7 +171,6 @@ public: Latency = std::max(Latency, StartCycle + IS->getCycles()); StartCycle += IS->getNextCycles(); } - return Latency; } diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h index defa299035..673cdf6fb6 100644 --- a/include/llvm/MC/MCSchedule.h +++ b/include/llvm/MC/MCSchedule.h @@ -30,15 +30,18 @@ struct MCProcResourceDesc { unsigned NumUnits; // Number of resource of this kind unsigned SuperIdx; // Index of the resources kind that contains this kind. - // Buffered resources may be consumed at some indeterminate cycle after - // dispatch (e.g. for instructions that may issue out-of-order). Unbuffered - // resources always consume their resource some fixed number of cycles after - // dispatch (e.g. for instruction interlocking that may stall the pipeline). - bool IsBuffered; + // Number of resources that may be buffered. + // + // Buffered resources (BufferSize > 0 || BufferSize == -1) may be consumed at + // some indeterminate cycle after dispatch (e.g. for instructions that may + // issue out-of-order). Unbuffered resources (BufferSize == 0) always consume + // their resource some fixed number of cycles after dispatch (e.g. for + // instruction interlocking that may stall the pipeline). + int BufferSize; bool operator==(const MCProcResourceDesc &Other) const { return NumUnits == Other.NumUnits && SuperIdx == Other.SuperIdx - && IsBuffered == Other.IsBuffered; + && BufferSize == Other.BufferSize; } }; @@ -134,28 +137,22 @@ public: unsigned IssueWidth; static const unsigned DefaultIssueWidth = 1; - // MinLatency is the minimum latency between a register write - // followed by a data dependent read. This determines which - // instructions may be scheduled in the same per-cycle group. This - // is distinct from *expected* latency, which determines the likely - // critical path but does not guarantee a pipeline - // hazard. MinLatency can always be overridden by the number of - // InstrStage cycles. + // MicroOpBufferSize is the number of micro-ops that the processor may buffer + // for out-of-order execution. // - // (-1) Standard in-order processor. - // Use InstrItinerary OperandCycles as MinLatency. - // If no OperandCycles exist, then use the cycle of the last InstrStage. + // "0" means operations that are not ready in this cycle are not considered + // for scheduling (they go in the pending queue). Latency is paramount. This + // may be more efficient if many instructions are pending in a schedule. // - // (0) Out-of-order processor, or in-order with bundled dependencies. - // RAW dependencies may be dispatched in the same cycle. - // Optional InstrItinerary OperandCycles provides expected latency. + // "1" means all instructions are considered for scheduling regardless of + // whether they are ready in this cycle. Latency still causes issue stalls, + // but we balance those stalls against other heuristics. // - // (>0) In-order processor with variable latencies. - // Use the greater of this value or the cycle of the last InstrStage. - // Optional InstrItinerary OperandCycles provides expected latency. - // TODO: can't yet specify both min and expected latency per operand. - int MinLatency; - static const int DefaultMinLatency = -1; + // "> 1" means the processor is out-of-order. This is a machine independent + // estimate of highly machine specific characteristics such are the register + // renaming pool and reorder buffer. + unsigned MicroOpBufferSize; + static const unsigned DefaultMicroOpBufferSize = 0; // LoadLatency is the expected latency of load instructions. // @@ -172,16 +169,6 @@ public: unsigned HighLatency; static const unsigned DefaultHighLatency = 10; - // ILPWindow is the number of cycles that the scheduler effectively ignores - // before attempting to hide latency. This should be zero for in-order cpus to - // always hide expected latency. For out-of-order cpus, it may be tweaked as - // desired to roughly approximate instruction buffers. The actual threshold is - // not very important for an OOO processor, as long as it isn't too high. A - // nonzero value helps avoid rescheduling to hide latency when its is fairly - // obviously useless and makes register pressure heuristics more effective. - unsigned ILPWindow; - static const unsigned DefaultILPWindow = 0; - // MispredictPenalty is the typical number of extra cycles the processor // takes to recover from a branch misprediction. unsigned MispredictPenalty; @@ -203,10 +190,9 @@ public: // initialized in this default ctor because some clients directly instantiate // MCSchedModel instead of using a generated itinerary. MCSchedModel(): IssueWidth(DefaultIssueWidth), - MinLatency(DefaultMinLatency), + MicroOpBufferSize(DefaultMicroOpBufferSize), LoadLatency(DefaultLoadLatency), HighLatency(DefaultHighLatency), - ILPWindow(DefaultILPWindow), MispredictPenalty(DefaultMispredictPenalty), ProcID(0), ProcResourceTable(0), SchedClassTable(0), NumProcResourceKinds(0), NumSchedClasses(0), @@ -216,12 +202,12 @@ public: } // Table-gen driven ctor. - MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned ilp, + MCSchedModel(unsigned iw, int mbs, unsigned ll, unsigned hl, unsigned mp, unsigned pi, const MCProcResourceDesc *pr, const MCSchedClassDesc *sc, unsigned npr, unsigned nsc, const InstrItinerary *ii): - IssueWidth(iw), MinLatency(ml), LoadLatency(ll), HighLatency(hl), - ILPWindow(ilp), MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr), + IssueWidth(iw), MicroOpBufferSize(mbs), LoadLatency(ll), HighLatency(hl), + MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr), SchedClassTable(sc), NumProcResourceKinds(npr), NumSchedClasses(nsc), InstrItineraries(ii) {} diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index d49ce1ce7f..69fda8c47c 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -817,12 +817,10 @@ public: /// computeOperandLatency - Compute and return the latency of the given data /// dependent def and use when the operand indices are already known. - /// - /// FindMin may be set to get the minimum vs. expected latency. unsigned computeOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx, - bool FindMin = false) const; + const MachineInstr *UseMI, unsigned UseIdx) + const; /// getInstrLatency - Compute the instruction latency of a given instruction. /// If the instruction has higher cost when predicated, it's returned via @@ -839,7 +837,7 @@ public: const MachineInstr *DefMI) const; int computeDefOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, bool FindMin) const; + const MachineInstr *DefMI) const; /// isHighLatencyDef - Return true if this opcode has high latency to its /// result. diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index 660d2c48b6..0ac2eed9d5 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -72,11 +72,13 @@ def instregex; // // Target hooks allow subtargets to associate LoadLatency and // HighLatency with groups of opcodes. +// +// See MCSchedule.h for detailed comments. class SchedMachineModel { int IssueWidth = -1; // Max micro-ops that may be scheduled per cycle. int MinLatency = -1; // Determines which instrucions are allowed in a group. // (-1) inorder (0) ooo, (1): inorder +var latencies. - int ILPWindow = -1; // Cycles of latency likely hidden by hardware buffers. + int MicroOpBufferSize = -1; // Max micro-ops that can be buffered. int LoadLatency = -1; // Cycles for loads to access the cache. int HighLatency = -1; // Approximation of cycles for "high latency" ops. int MispredictPenalty = -1; // Extra cycles for a mispredicted branch. @@ -106,7 +108,7 @@ class ProcResourceKind; // out-of-order engine that the compiler attempts to conserve. // Buffered resources may be held for multiple clock cycles, but the // scheduler does not pin them to a particular clock cycle relative to -// instruction dispatch. Setting Buffered=0 changes this to an +// instruction dispatch. Setting BufferSize=0 changes this to an // in-order resource. In this case, the scheduler counts down from the // cycle that the instruction issues in-order, forcing an interlock // with subsequent instructions that require the same resource until @@ -119,7 +121,7 @@ class ProcResourceUnits { ProcResourceKind Kind = kind; int NumUnits = num; ProcResourceKind Super = ?; - bit Buffered = 1; + int BufferSize = -1; SchedMachineModel SchedModel = ?; } -- cgit v1.2.3