diff options
author | Andrew Trick <atrick@apple.com> | 2012-07-02 18:10:42 +0000 |
---|---|---|
committer | Andrew Trick <atrick@apple.com> | 2012-07-02 18:10:42 +0000 |
commit | 218ee74a011c0d350099c452810da0bd57a15047 (patch) | |
tree | 58e25d935c68661cd1654a02633e20c3a5c2ea56 /lib/Target/ARM | |
parent | 7c3a65c7edceb3a013cf49d376c3bc016eb871bf (diff) | |
download | llvm-218ee74a011c0d350099c452810da0bd57a15047.tar.gz llvm-218ee74a011c0d350099c452810da0bd57a15047.tar.bz2 llvm-218ee74a011c0d350099c452810da0bd57a15047.tar.xz |
Reapply "Make NumMicroOps a variable in the subtarget's instruction itinerary."
Reapplies r159406 with minor cleanup. The regressions appear to have been spurious.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159541 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 21 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 22 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA8.td | 34 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA9.td | 35 |
4 files changed, 64 insertions, 48 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 464c9aad3c..7227e62c7c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2176,9 +2176,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); - unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (UOps) - return UOps; + int ItinUOps = ItinData->getNumMicroOps(Class); + if (ItinUOps >= 0) + return ItinUOps; unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2252,19 +2252,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, return 2; // 4 registers would be issued: 2, 2. // 5 registers would be issued: 2, 2, 1. - UOps = (NumRegs / 2); + int A8UOps = (NumRegs / 2); if (NumRegs % 2) - ++UOps; - return UOps; + ++A8UOps; + return A8UOps; } else if (Subtarget.isCortexA9()) { - UOps = (NumRegs / 2); + int A9UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. if ((NumRegs % 2) || !MI->hasOneMemOperand() || (*MI->memoperands_begin())->getAlignment() < 8) - ++UOps; - return UOps; + ++A9UOps; + return A9UOps; } else { // Assume the worst. return NumRegs; @@ -3057,7 +3057,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, unsigned Class = MCID.getSchedClass(); // For instructions with variable uops, use uops as latency. - if (!ItinData->isEmpty() && !ItinData->Itineraries[Class].NumMicroOps) { + if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) { + dbgs() << "UOPS " << getNumMicroOps(ItinData, MI) << " " << *MI << '\n'; return getNumMicroOps(ItinData, MI); } // For the common case, fall back on the itinerary's latency. diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index b9a07f1ee6..81d2fa37c2 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -70,11 +70,11 @@ def IIC_iLoad_bh_siu : InstrItinClass; def IIC_iLoad_d_i : InstrItinClass; def IIC_iLoad_d_r : InstrItinClass; def IIC_iLoad_d_ru : InstrItinClass; -def IIC_iLoad_m : InstrItinClass<0>; // micro-coded -def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded -def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded -def IIC_iPop : InstrItinClass<0>; // micro-coded -def IIC_iPop_Br : InstrItinClass<0>; // micro-coded +def IIC_iLoad_m : InstrItinClass; +def IIC_iLoad_mu : InstrItinClass; +def IIC_iLoad_mBr : InstrItinClass; +def IIC_iPop : InstrItinClass; +def IIC_iPop_Br : InstrItinClass; def IIC_iLoadiALU : InstrItinClass; def IIC_iStore_i : InstrItinClass; def IIC_iStore_r : InstrItinClass; @@ -91,8 +91,8 @@ def IIC_iStore_bh_siu : InstrItinClass; def IIC_iStore_d_i : InstrItinClass; def IIC_iStore_d_r : InstrItinClass; def IIC_iStore_d_ru : InstrItinClass; -def IIC_iStore_m : InstrItinClass<0>; // micro-coded -def IIC_iStore_mu : InstrItinClass<0>; // micro-coded +def IIC_iStore_m : InstrItinClass; +def IIC_iStore_mu : InstrItinClass; def IIC_Preload : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; @@ -126,12 +126,12 @@ def IIC_fpSQRT32 : InstrItinClass; def IIC_fpSQRT64 : InstrItinClass; def IIC_fpLoad32 : InstrItinClass; def IIC_fpLoad64 : InstrItinClass; -def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded -def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded +def IIC_fpLoad_m : InstrItinClass; +def IIC_fpLoad_mu : InstrItinClass; def IIC_fpStore32 : InstrItinClass; def IIC_fpStore64 : InstrItinClass; -def IIC_fpStore_m : InstrItinClass<0>; // micro-coded -def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded +def IIC_fpStore_m : InstrItinClass; +def IIC_fpStore_mu : InstrItinClass; def IIC_VLD1 : InstrItinClass; def IIC_VLD1x2 : InstrItinClass; def IIC_VLD1x3 : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index eb1083ca23..61de00a208 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -155,28 +155,30 @@ def CortexA8Itineraries : MultiIssueItineraries< // Load multiple, def is the 5th operand. Pipeline 0 only. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops // // Load multiple + update, defs are the 1st and 5th operands. InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>, - InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 3], [], -1>, // dynamic uops // // Load multiple plus branch InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>, InstrStage<3, [A8_LSPipe]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], - [1, 2, 1, 1, 3]>, + [1, 2, 1, 1, 3], [], -1>, // dynamic uops // // Pop, def is the 3rd operand. InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>, - InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 3], [], -1>, // dynamic uops // // Push, def is the 3th operand. InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>, InstrStage<3, [A8_LSPipe]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], - [1, 1, 3]>, - + [1, 1, 3], [], -1>, // dynamic uops // // iLoadi + iALUr for t2LDRpci_pic. InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, @@ -231,12 +233,13 @@ def CortexA8Itineraries : MultiIssueItineraries< // Store multiple. Pipeline 0 only. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>]>, + InstrStage<2, [A8_LSPipe]>], + [], [], -1>, // dynamic uops // // Store multiple + update InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>], [2]>, - + InstrStage<2, [A8_LSPipe]>], + [2], [], -1>, // dynamic uops // // Preload InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, @@ -397,14 +400,16 @@ def CortexA8Itineraries : MultiIssueItineraries< InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>, + InstrStage<1, [A8_LSPipe]>], + [1, 1, 1, 2], [], -1>, // dynamic uops // // FP Load Multiple + update InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>, + InstrStage<1, [A8_LSPipe]>], + [2, 1, 1, 1, 2], [], -1>, // dynamic uops // // Single-precision FP Store InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, @@ -423,15 +428,16 @@ def CortexA8Itineraries : MultiIssueItineraries< InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>, + InstrStage<1, [A8_LSPipe]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Store Multiple + update InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>, - + InstrStage<1, [A8_LSPipe]>], + [2, 1, 1, 1, 1], [], -1>, // dynamic uops // NEON // Issue through integer pipeline, and execute in NEON unit. // diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index a00577bf3d..1677ba6a98 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -284,7 +284,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Load multiple + update, defs are the 1st and 5th operands. InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -292,7 +293,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Load multiple plus branch InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -301,7 +303,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_LSUnit]>, InstrStage<1, [A9_Branch]>], [1, 2, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Pop, def is the 3rd operand. InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -309,7 +312,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [1, 1, 3], - [NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Pop + branch, def is the 3rd operand. InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -318,8 +322,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_LSUnit]>, InstrStage<1, [A9_Branch]>], [1, 1, 3], - [NoBypass, NoBypass, A9_LdBypass]>, - + [NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // iLoadi + iALUr for t2LDRpci_pic. InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -413,14 +417,15 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU], 0>, - InstrStage<2, [A9_LSUnit]>]>, + InstrStage<2, [A9_LSUnit]>], + [], [], -1>, // dynamic uops // // Store multiple + update InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU], 0>, - InstrStage<2, [A9_LSUnit]>], [2]>, - + InstrStage<2, [A9_LSUnit]>], + [2], [], -1>, // dynamic uops // // Preload InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>, @@ -717,7 +722,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Load Multiple + update // FIXME: assumes 2 doubles which requires 2 LS cycles. @@ -726,7 +732,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1], [], -1>, // dynamic uops // // Single-precision FP Store InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -753,7 +760,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Store Multiple + update // FIXME: assumes 2 doubles which requires 2 LS cycles. @@ -762,7 +770,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1], [], -1>, // dynamic uops // NEON // VLD1 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, |