diff options
-rw-r--r-- | include/llvm/Target/TargetInstrItineraries.h | 70 | ||||
-rw-r--r-- | include/llvm/Target/TargetSchedule.td | 19 | ||||
-rw-r--r-- | lib/CodeGen/ExactHazardRecognizer.cpp | 54 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 54 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 46 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleV7.td | 32 | ||||
-rw-r--r-- | utils/TableGen/SubtargetEmitter.cpp | 11 |
8 files changed, 170 insertions, 118 deletions
diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h index 1d5af9a348..237ca4ef2a 100644 --- a/include/llvm/Target/TargetInstrItineraries.h +++ b/include/llvm/Target/TargetInstrItineraries.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file describes the structures used for instruction itineraries and -// states. This is used by schedulers to determine instruction states and +// stages. This is used by schedulers to determine instruction stages and // latencies. // //===----------------------------------------------------------------------===// @@ -16,17 +16,57 @@ #ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H #define LLVM_TARGET_TARGETINSTRITINERARIES_H +#include <algorithm> + namespace llvm { //===----------------------------------------------------------------------===// -/// Instruction stage - These values represent a step in the execution of an -/// instruction. The latency represents the number of discrete time slots -/// needed to complete the stage. Units represent the choice of functional -/// units that can be used to complete the stage. Eg. IntUnit1, IntUnit2. +/// Instruction stage - These values represent a non-pipelined step in +/// the execution of an instruction. Cycles represents the number of +/// discrete time slots needed to complete the stage. Units represent +/// the choice of functional units that can be used to complete the +/// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many +/// cycles should elapse from the start of this stage to the start of +/// the next stage in the itinerary. A value of -1 indicates that the +/// next stage should start immediately after the current one. +/// For example: +/// +/// { 1, x, -1 } +/// indicates that the stage occupies FU x for 1 cycle and that +/// the next stage starts immediately after this one. +/// +/// { 2, x|y, 1 } +/// indicates that the stage occupies either FU x or FU y for 2 +/// consecuative cycles and that the next stage starts one cycle +/// after this stage starts. That is, the stage requirements +/// overlap in time. +/// +/// { 1, x, 0 } +/// indicates that the stage occupies FU x for 1 cycle and that +/// the next stage starts in this same cycle. This can be used to +/// indicate that the instruction requires multiple stages at the +/// same time. /// struct InstrStage { - unsigned Cycles; ///< Length of stage in machine cycles - unsigned Units; ///< Choice of functional units + unsigned Cycles_; ///< Length of stage in machine cycles + unsigned Units_; ///< Choice of functional units + int NextCycles_; ///< Number of machine cycles to next stage + + /// getCycles - returns the number of cycles the stage is occupied + unsigned getCycles() const { + return Cycles_; + } + + /// getUnits - returns the choice of FUs + unsigned getUnits() const { + return Units_; + } + + /// getNextCycles - returns the number of cycles from the start of + /// this stage to the start of the next stage in the itinerary + unsigned getNextCycles() const { + return (NextCycles_ >= 0) ? (unsigned)NextCycles_ : Cycles_; + } }; @@ -84,13 +124,17 @@ struct InstrItineraryData { if (isEmpty()) return 1; - // Just sum the cycle count for each stage. The assumption is that all - // inputs are consumed at the start of the first stage and that all - // outputs are produced at the end of the last stage. - unsigned Latency = 0; + // Caclulate the maximum completion time for any stage. The + // assumption is that all inputs are consumed at the start of the + // first stage and that all outputs are produced at the end of the + // latest completing last stage. + unsigned Latency = 0, StartCycle = 0; for (const InstrStage *IS = begin(ItinClassIndx), *E = end(ItinClassIndx); - IS != E; ++IS) - Latency += IS->Cycles; + IS != E; ++IS) { + Latency = std::max(Latency, StartCycle + IS->getCycles()); + StartCycle += IS->getNextCycles(); + } + return Latency; } }; diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index 38461c5a38..4940e8b82b 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -23,14 +23,23 @@ class FuncUnit; //===----------------------------------------------------------------------===// -// Instruction stage - These values represent a step in the execution of an -// instruction. The latency represents the number of discrete time slots used -// need to complete the stage. Units represent the choice of functional units -// that can be used to complete the stage. Eg. IntUnit1, IntUnit2. +// Instruction stage - These values represent a non-pipelined step in +// the execution of an instruction. Cycles represents the number of +// discrete time slots needed to complete the stage. Units represent +// the choice of functional units that can be used to complete the +// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many +// cycles should elapse from the start of this stage to the start of +// the next stage in the itinerary. For example: // -class InstrStage<int cycles, list<FuncUnit> units> { +// A stage is specified in one of two ways: +// +// InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles +// InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit +// +class InstrStage<int cycles, list<FuncUnit> units, int timeinc = -1> { int Cycles = cycles; // length of stage in machine cycles list<FuncUnit> Units = units; // choice of functional units + int TimeInc = timeinc; // cycles till start of next stage } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index 48043f286c..8bac08a4a0 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -34,12 +34,12 @@ ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData // If the begin stage of an itinerary has 0 cycles and units, // then we have reached the end of the itineraries. const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); - if ((IS->Cycles == 0) && (IS->Units == 0)) + if ((IS->getCycles() == 0) && (IS->getUnits() == 0)) break; unsigned ItinDepth = 0; for (; IS != E; ++IS) - ItinDepth += std::max(1U, IS->Cycles); + ItinDepth += IS->getCycles(); ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); } @@ -89,27 +89,25 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU unsigned idx = SU->getInstr()->getDesc().getSchedClass(); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); IS != E; ++IS) { - // If the stages cycles are 0, then we must have the FU free in - // the current cycle, but we don't advance the cycle time . - unsigned StageCycles = std::max(1U, IS->Cycles); - // We must find one of the stage's units free for every cycle the - // stage is occupied. - for (unsigned int i = 0; i < StageCycles; ++i) { - assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); - - unsigned index = getFutureIndex(cycle); - unsigned freeUnits = IS->Units & ~Scoreboard[index]; + // stage is occupied. FIXME it would be more accurate to find the + // same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < ScoreboardDepth) && + "Scoreboard depth exceeded!"); + + unsigned index = getFutureIndex(cycle + i); + unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; if (!freeUnits) { - DEBUG(errs() << "*** Hazard in cycle " << cycle << ", "); + DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", "); DEBUG(errs() << "SU(" << SU->NodeNum << "): "); DEBUG(SU->getInstr()->dump()); return Hazard; } - - if (IS->Cycles > 0) - ++cycle; } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); } return NoHazard; @@ -123,17 +121,15 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { unsigned idx = SU->getInstr()->getDesc().getSchedClass(); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); IS != E; ++IS) { - // If the stages cycles are 0, then we must reserve the FU in the - // current cycle, but we don't advance the cycle time . - unsigned StageCycles = std::max(1U, IS->Cycles); - // We must reserve one of the stage's units for every cycle the - // stage is occupied. - for (unsigned int i = 0; i < StageCycles; ++i) { - assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); - - unsigned index = getFutureIndex(cycle); - unsigned freeUnits = IS->Units & ~Scoreboard[index]; + // stage is occupied. FIXME it would be more accurate to reserve + // the same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < ScoreboardDepth) && + "Scoreboard depth exceeded!"); + + unsigned index = getFutureIndex(cycle + i); + unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; // reduce to a single unit unsigned freeUnit = 0; @@ -144,10 +140,10 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { assert(freeUnit && "No function unit available!"); Scoreboard[index] |= freeUnit; - - if (IS->Cycles > 0) - ++cycle; } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); } DEBUG(dumpScoreboard()); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 0985265e19..8e5a01d4dd 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -615,7 +615,7 @@ let isReturn = 1, isTerminator = 1, mayLoad = 1 in []>; // On non-Darwin platforms R9 is callee-saved. -let isCall = 1, Itinerary = IIC_Br, +let isCall = 1, Defs = [R0, R1, R2, R3, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, @@ -652,7 +652,7 @@ let isCall = 1, Itinerary = IIC_Br, } // On Darwin R9 is call-clobbered. -let isCall = 1, Itinerary = IIC_Br, +let isCall = 1, Defs = [R0, R1, R2, R3, R9, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, @@ -685,7 +685,7 @@ let isCall = 1, Itinerary = IIC_Br, } } -let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in { +let isBranch = 1, isTerminator = 1 in { // B is "predicable" since it can be xformed into a Bcc. let isBarrier = 1 in { let isPredicable = 1 in @@ -1057,7 +1057,7 @@ defm BIC : AsI1_bin_irs<0b1110, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, NoItinerary, + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALU, "bfc", " $dst, $imm", "$src = $dst", [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>, Requires<[IsARM, HasV6T2]> { @@ -1084,16 +1084,16 @@ def : ARMPat<(and GPR:$src, so_imm_not:$imm), // let isCommutable = 1 in -def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, +def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, "mul", " $dst, $a, $b", [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "mla", " $dst, $a, $b, $c", + IIC_iMPY, "mla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "mls", " $dst, $a, $b, $c", + IIC_iMPY, "mls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>, Requires<[IsARM, HasV6T2]>; @@ -1101,32 +1101,32 @@ def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), let neverHasSideEffects = 1 in { let isCommutable = 1 in { def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "smull", " $ldst, $hdst, $a, $b", []>; def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "umull", " $ldst, $hdst, $a, $b", []>; } // Multiply + accumulate def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "smlal", " $ldst, $hdst, $a, $b", []>; def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "umlal", " $ldst, $hdst, $a, $b", []>; def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "umaal", " $ldst, $hdst, $a, $b", []>, Requires<[IsARM, HasV6]>; } // neverHasSideEffects // Most significant word multiply def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, "smmul", " $dst, $a, $b", + IIC_iMPY, "smmul", " $dst, $a, $b", [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0001; @@ -1134,7 +1134,7 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), } def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "smmla", " $dst, $a, $b, $c", + IIC_iMPY, "smmla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0001; @@ -1142,7 +1142,7 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "smmls", " $dst, $a, $b, $c", + IIC_iMPY, "smmls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b1101; @@ -1150,7 +1150,7 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), multiclass AI_smul<string opc, PatFrag opnode> { def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1159,7 +1159,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1168,7 +1168,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1177,7 +1177,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1186,7 +1186,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1195,7 +1195,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1207,7 +1207,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { multiclass AI_smla<string opc, PatFrag opnode> { def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16))))]>, @@ -1217,7 +1217,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { @@ -1226,7 +1226,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1235,7 +1235,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { @@ -1244,7 +1244,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1253,7 +1253,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 0da798e15d..1a82331980 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -508,7 +508,7 @@ def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iALU, // multiply register let isCommutable = 1 in -def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU, +def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMPY, "mul", " $dst, $rhs", [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0ef9cc03e0..5800a43047 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -808,80 +808,80 @@ def : T2Pat<(t2_so_imm_not:$src), // Multiply Instructions. // let isCommutable = 1 in -def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, "mul", " $dst, $a, $b", [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; -def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "mla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; -def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "mls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>; // Extra precision multiplies with low / high results let neverHasSideEffects = 1 in { let isCommutable = 1 in { -def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "smull", " $ldst, $hdst, $a, $b", []>; -def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "umull", " $ldst, $hdst, $a, $b", []>; } // Multiply + accumulate -def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "smlal", " $ldst, $hdst, $a, $b", []>; -def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "umlal", " $ldst, $hdst, $a, $b", []>; -def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "umaal", " $ldst, $hdst, $a, $b", []>; } // neverHasSideEffects // Most significant word multiply -def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, "smmul", " $dst, $a, $b", [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>; -def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "smmla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>; -def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "smmls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>; multiclass T2I_smul<string opc, PatFrag opnode> { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16)))]>; - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16))))]>; - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16)))]>; - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16))))]>; - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16)))]>; - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16)))]>; @@ -889,33 +889,33 @@ multiclass T2I_smul<string opc, PatFrag opnode> { multiclass T2I_smla<string opc, PatFrag opnode> { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16))))]>; - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16)))))]>; - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16))))]>; - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16)))))]>; - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16))))]>; - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16))))]>; diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index 8a7b42eb72..a789d71147 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -16,34 +16,34 @@ def CortexA8Itineraries : ProcessorItineraries<[ // two fully-pipelined integer ALU pipelines InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, // one fully-pipelined integer Multiply pipeline - // function units are used in alpha order, so use FU_Pipe1 - // for the Multiple pipeline - InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe1]>]>, + // function units are reserved by the scheduler in reverse alpha order, + // so use FU_Pipe0 for the Multiple pipeline + InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>, // loads have an extra cycle of latency, but are fully pipelined - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_iLoad , [InstrStage<0, [FU_Issue]>, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Issue], 0>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>, // fully-pipelined stores - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_iStore , [InstrStage<0, [FU_Issue]>, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_iStore , [InstrStage<1, [FU_Issue], 0>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, // no delay slots, so the latency of a branch is unimportant InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, - // VFP ALU is not pipelined so stall all issues - // FIXME assume NFP pipeline and 7 cycle non-pipelined latency - InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>, + // NFP ALU is not pipelined so stall all issues + InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0], 0>, + InstrStage<7, [FU_Pipe1], 0>]>, // VFP MPY is not pipelined so stall all issues - // FIXME assume NFP pipeline and 7 cycle non-pipelined latency - InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>, + InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0], 0>, + InstrStage<7, [FU_Pipe1], 0>]>, // loads have an extra cycle of latency, but are fully pipelined - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad , [InstrStage<0, [FU_Issue]>, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Issue], 0>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>, - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore , [InstrStage<0, [FU_Issue]>, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Issue], 0>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>]> ]>; diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index 919ac664ef..4a0bacd19c 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -215,7 +215,7 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData, // Next stage const Record *Stage = StageList[i]; - // Form string as ,{ cycles, u1 | u2 | ... | un } + // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc } int Cycles = Stage->getValueAsInt("Cycles"); ItinString += " { " + itostr(Cycles) + ", "; @@ -229,6 +229,9 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData, if (++j < M) ItinString += " | "; } + int TimeInc = Stage->getValueAsInt("TimeInc"); + ItinString += ", " + itostr(TimeInc); + // Close off stage ItinString += " }"; if (++i < N) ItinString += ", "; @@ -252,7 +255,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS, // Begin stages table OS << "static const llvm::InstrStage Stages[] = {\n" - " { 0, 0 }, // No itinerary\n"; + " { 0, 0, 0 }, // No itinerary\n"; unsigned StageCount = 1; unsigned ItinEnum = 1; @@ -289,7 +292,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS, // If new itinerary if (Find == 0) { - // Emit as { cycles, u1 | u2 | ... | un }, // index + // Emit as { cycles, u1 | u2 | ... | un, timeinc }, // index OS << ItinString << ", // " << ItinEnum << "\n"; // Record Itin class number. ItinMap[ItinString] = Find = StageCount; @@ -313,7 +316,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS, } // Closing stage - OS << " { 0, 0 } // End itinerary\n"; + OS << " { 0, 0, 0 } // End itinerary\n"; // End stages table OS << "};\n"; |