diff options
-rw-r--r-- | include/llvm/Target/TargetInstrInfo.h | 5 | ||||
-rw-r--r-- | include/llvm/Target/TargetInstrItineraries.h | 1 | ||||
-rw-r--r-- | include/llvm/Target/TargetSchedule.td | 9 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 63 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.h | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb.td | 7 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 10 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleV6.td | 5 | ||||
-rw-r--r-- | lib/Target/TargetInstrInfo.cpp | 17 | ||||
-rw-r--r-- | utils/TableGen/SubtargetEmitter.cpp | 44 | ||||
-rw-r--r-- | utils/TableGen/SubtargetEmitter.h | 4 |
11 files changed, 140 insertions, 28 deletions
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 520c41be74..7ce1f71210 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -591,6 +591,11 @@ public: MachineInstr *CmpInstr) const { return false; } + + /// getNumMicroOps - Return the number of u-operations the given machine + /// instruction will be decoded to on the target cpu. + virtual unsigned getNumMicroOps(const MachineInstr *MI, + const InstrItineraryData &ItinData) const; }; /// TargetInstrInfoImpl - This is the default implementation of diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h index 39648c233f..ae3e621064 100644 --- a/include/llvm/Target/TargetInstrItineraries.h +++ b/include/llvm/Target/TargetInstrItineraries.h @@ -95,6 +95,7 @@ struct InstrStage { /// operands are read and written. /// struct InstrItinerary { + unsigned NumMicroOps; ///< # of micro-ops, 0 means it's variable unsigned FirstStage; ///< Index of first stage in itinerary unsigned LastStage; ///< Index of last + 1 stage in itinerary unsigned FirstOperandCycle; ///< Index of first operand rd/wr diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index 96c83674cb..d771cdd4de 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -66,7 +66,14 @@ class InstrStage<int cycles, list<FuncUnit> units, // across all chip sets. Thus a new chip set can be added without modifying // instruction information. // -class InstrItinClass; +// NumMicroOps represents the number of micro-operations that each instruction +// in the class are decoded to. If the number is zero, then it means the +// instruction can decode into variable number of micro-ops and it must be +// determined dynamically. +// +class InstrItinClass<int ops = 1> { + int NumMicroOps = ops; +} def NoItinerary : InstrItinClass; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 77cfcb9e50..c824b8bce9 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1412,3 +1412,66 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const { return false; } + +unsigned +ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI, + const InstrItineraryData &ItinData) const { + if (ItinData.isEmpty()) + return 1; + + const TargetInstrDesc &Desc = MI->getDesc(); + unsigned Class = Desc.getSchedClass(); + unsigned UOps = ItinData.Itineratries[Class].NumMicroOps; + if (UOps) + return UOps; + + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: + llvm_unreachable("Unexpected multi-uops instruction!"); + break; + case ARM::VSTMQ: + return 2; + + // The number of uOps for load / store multiple are determined by the number + // registers. + // On Cortex-A8, each odd / even pair of register loads / stores + // (e.g. r5 + r6) can be completed on the same cycle. The minimum is + // 2. For VFP / NEON load / store multiple, the formula is + // (#reg / 2) + (#reg % 2) + 1. + // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). + case ARM::VLDMD: + case ARM::VLDMS: + case ARM::VLDMD_UPD: + case ARM::VLDMS_UPD: + case ARM::VSTMD: + case ARM::VSTMS: + case ARM::VSTMD_UPD: + case ARM::VSTMS_UPD: { + unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); + return (NumRegs / 2) + (NumRegs % 2) + 1; + } + case ARM::LDM_RET: + case ARM::LDM: + case ARM::LDM_UPD: + case ARM::STM: + case ARM::STM_UPD: + case ARM::tLDM: + case ARM::tLDM_UPD: + case ARM::tSTM_UPD: + case ARM::tPOP_RET: + case ARM::tPOP: + case ARM::tPUSH: + case ARM::t2LDM_RET: + case ARM::t2LDM: + case ARM::t2LDM_UPD: + case ARM::t2STM: + case ARM::t2STM_UPD: { + // FIXME: Distinquish between Cortex-A8 / Cortex-A9 and other processor + // families. + unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); + UOps = (NumRegs / 2) + (NumRegs % 2); + return (UOps > 2) ? UOps : 2; + } + } +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index b4f4a33a70..b3abdeef96 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -348,6 +348,9 @@ public: /// that we can remove a "comparison with zero". virtual bool ConvertToSetZeroFlag(MachineInstr *Instr, MachineInstr *CmpInstr) const; + + virtual unsigned getNumMicroOps(const MachineInstr *MI, + const InstrItineraryData &ItinData) const; }; static inline diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index da2efb8566..30bec0f9d6 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -282,7 +282,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // FIXME: remove when we have a way to marking a MI with these properties. let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in -def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br, +def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), + IIC_iLoadmBr, "pop${p}\t$dsts", []>, T1Misc<{1,1,0,?,?,?,?}>; @@ -560,12 +561,12 @@ def tSTM_UPD : T1It<(outs tGPR:$wb), T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189 let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in -def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br, +def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_iLoadmBr, "pop${p}\t$dsts", []>, T1Misc<{1,1,0,?,?,?,?}>; let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in -def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_Br, +def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, "push${p}\t$srcs", []>, T1Misc<{0,1,0,?,?,?,?}>; diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 7566721577..6151008da6 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -42,15 +42,15 @@ def IIC_iLoadsi : InstrItinClass; def IIC_iLoadiu : InstrItinClass; def IIC_iLoadru : InstrItinClass; def IIC_iLoadsiu : InstrItinClass; -def IIC_iLoadm : InstrItinClass; -def IIC_iLoadmBr : InstrItinClass; +def IIC_iLoadm : InstrItinClass<0>; // micro-coded +def IIC_iLoadmBr : InstrItinClass<0>; // micro-coded def IIC_iStorei : InstrItinClass; def IIC_iStorer : InstrItinClass; def IIC_iStoresi : InstrItinClass; def IIC_iStoreiu : InstrItinClass; def IIC_iStoreru : InstrItinClass; def IIC_iStoresiu : InstrItinClass; -def IIC_iStorem : InstrItinClass; +def IIC_iStorem : InstrItinClass<0>; // micro-coded def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; def IIC_fpUNA32 : InstrItinClass; @@ -81,10 +81,10 @@ def IIC_fpSQRT32 : InstrItinClass; def IIC_fpSQRT64 : InstrItinClass; def IIC_fpLoad32 : InstrItinClass; def IIC_fpLoad64 : InstrItinClass; -def IIC_fpLoadm : InstrItinClass; +def IIC_fpLoadm : InstrItinClass<0>; // micro-coded def IIC_fpStore32 : InstrItinClass; def IIC_fpStore64 : InstrItinClass; -def IIC_fpStorem : InstrItinClass; +def IIC_fpStorem : InstrItinClass<0>; // micro-coded def IIC_VLD1 : InstrItinClass; def IIC_VLD2 : InstrItinClass; def IIC_VLD3 : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 08b560cc0c..b382a7a519 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -86,6 +86,11 @@ def ARMV6Itineraries : ProcessorItineraries< // Load multiple InstrItinData<IIC_iLoadm , [InstrStage<3, [V6_Pipe]>]>, + // + // Load multiple plus branch + InstrItinData<IIC_iLoadmBr , [InstrStage<3, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>]>, + // Integer store pipeline // // Immediate offset diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index c099a7eaef..118afd48c9 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -13,6 +13,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -47,6 +48,22 @@ TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc, TargetInstrInfo::~TargetInstrInfo() { } +unsigned +TargetInstrInfo::getNumMicroOps(const MachineInstr *MI, + const InstrItineraryData &ItinData) const { + if (ItinData.isEmpty()) + return 1; + + unsigned Class = MI->getDesc().getSchedClass(); + unsigned UOps = ItinData.Itineratries[Class].NumMicroOps; + if (UOps) + return UOps; + + // The # of u-ops is dynamically determined. The specific target should + // override this function to return the right number. + return 1; +} + /// insertNoop - Insert a noop into the instruction stream at the specified /// point. void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index b04eaf88f7..21870b1f5b 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -172,13 +172,10 @@ void SubtargetEmitter::CPUKeyValues(raw_ostream &OS) { // CollectAllItinClasses - Gathers and enumerates all the itinerary classes. // Returns itinerary class count. // -unsigned SubtargetEmitter::CollectAllItinClasses(raw_ostream &OS, - std::map<std::string, unsigned> &ItinClassesMap) { - // Gather and sort all itinerary classes - std::vector<Record*> ItinClassList = - Records.getAllDerivedDefinitions("InstrItinClass"); - std::sort(ItinClassList.begin(), ItinClassList.end(), LessRecord()); - +unsigned SubtargetEmitter:: +CollectAllItinClasses(raw_ostream &OS, + std::map<std::string, unsigned> &ItinClassesMap, + std::vector<Record*> &ItinClassList) { // For each itinerary class unsigned N = ItinClassList.size(); for (unsigned i = 0; i < N; i++) { @@ -271,7 +268,8 @@ void SubtargetEmitter::FormItineraryOperandCycleString(Record *ItinData, // void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS, unsigned NItinClasses, - std::map<std::string, unsigned> &ItinClassesMap, + std::map<std::string, unsigned> &ItinClassesMap, + std::vector<Record*> &ItinClassList, std::vector<std::vector<InstrItinerary> > &ProcList) { // Gather processor iteraries std::vector<Record*> ProcItinList = @@ -374,14 +372,16 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS, } } - // Set up itinerary as location and location + stage count - InstrItinerary Intinerary = { FindStage, FindStage + NStages, - FindOperandCycle, FindOperandCycle + NOperandCycles}; - // Locate where to inject into processor itinerary table const std::string &Name = ItinData->getValueAsDef("TheClass")->getName(); unsigned Find = ItinClassesMap[Name]; + // Set up itinerary as location and location + stage count + unsigned NumUOps = ItinClassList[Find]->getValueAsInt("NumMicroOps"); + InstrItinerary Intinerary = { NumUOps, FindStage, FindStage + NStages, + FindOperandCycle, + FindOperandCycle + NOperandCycles}; + // Inject - empty slots will be 0, 0 ItinList[Find] = Intinerary; } @@ -443,9 +443,11 @@ void SubtargetEmitter::EmitProcessorData(raw_ostream &OS, // Emit in the form of // { firstStage, lastStage, firstCycle, lastCycle } // index if (Intinerary.FirstStage == 0) { - OS << " { 0, 0, 0, 0 }"; + OS << " { 1, 0, 0, 0, 0 }"; } else { - OS << " { " << Intinerary.FirstStage << ", " << + OS << " { " << + Intinerary.NumMicroOps << ", " << + Intinerary.FirstStage << ", " << Intinerary.LastStage << ", " << Intinerary.FirstOperandCycle << ", " << Intinerary.LastOperandCycle << " }"; @@ -455,7 +457,7 @@ void SubtargetEmitter::EmitProcessorData(raw_ostream &OS, } // End processor itinerary table - OS << " { ~0U, ~0U, ~0U, ~0U } // end marker\n"; + OS << " { 1, ~0U, ~0U, ~0U, ~0U } // end marker\n"; OS << "};\n"; } } @@ -511,16 +513,22 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) { // void SubtargetEmitter::EmitData(raw_ostream &OS) { std::map<std::string, unsigned> ItinClassesMap; - std::vector<std::vector<InstrItinerary> > ProcList; + // Gather and sort all itinerary classes + std::vector<Record*> ItinClassList = + Records.getAllDerivedDefinitions("InstrItinClass"); + std::sort(ItinClassList.begin(), ItinClassList.end(), LessRecord()); // Enumerate all the itinerary classes - unsigned NItinClasses = CollectAllItinClasses(OS, ItinClassesMap); + unsigned NItinClasses = CollectAllItinClasses(OS, ItinClassesMap, + ItinClassList); // Make sure the rest is worth the effort HasItineraries = NItinClasses != 1; // Ignore NoItinerary. if (HasItineraries) { + std::vector<std::vector<InstrItinerary> > ProcList; // Emit the stage data - EmitStageAndOperandCycleData(OS, NItinClasses, ItinClassesMap, ProcList); + EmitStageAndOperandCycleData(OS, NItinClasses, ItinClassesMap, + ItinClassList, ProcList); // Emit the processor itinerary data EmitProcessorData(OS, ProcList); // Emit the processor lookup data diff --git a/utils/TableGen/SubtargetEmitter.h b/utils/TableGen/SubtargetEmitter.h index f43a4431d6..4edf648569 100644 --- a/utils/TableGen/SubtargetEmitter.h +++ b/utils/TableGen/SubtargetEmitter.h @@ -33,7 +33,8 @@ class SubtargetEmitter : public TableGenBackend { void FeatureKeyValues(raw_ostream &OS); void CPUKeyValues(raw_ostream &OS); unsigned CollectAllItinClasses(raw_ostream &OS, - std::map<std::string, unsigned> &ItinClassesMap); + std::map<std::string,unsigned> &ItinClassesMap, + std::vector<Record*> &ItinClassList); void FormItineraryStageString(const std::string &Names, Record *ItinData, std::string &ItinString, unsigned &NStages); @@ -41,6 +42,7 @@ class SubtargetEmitter : public TableGenBackend { unsigned &NOperandCycles); void EmitStageAndOperandCycleData(raw_ostream &OS, unsigned NItinClasses, std::map<std::string, unsigned> &ItinClassesMap, + std::vector<Record*> &ItinClassList, std::vector<std::vector<InstrItinerary> > &ProcList); void EmitProcessorData(raw_ostream &OS, std::vector<std::vector<InstrItinerary> > &ProcList); |