summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Target/TargetInstrItineraries.h70
-rw-r--r--include/llvm/Target/TargetSchedule.td19
-rw-r--r--lib/CodeGen/ExactHazardRecognizer.cpp54
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td54
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td2
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td46
-rw-r--r--lib/Target/ARM/ARMScheduleV7.td32
-rw-r--r--utils/TableGen/SubtargetEmitter.cpp11
8 files changed, 170 insertions, 118 deletions
diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h
index 1d5af9a348..237ca4ef2a 100644
--- a/include/llvm/Target/TargetInstrItineraries.h
+++ b/include/llvm/Target/TargetInstrItineraries.h
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This file describes the structures used for instruction itineraries and
-// states. This is used by schedulers to determine instruction states and
+// stages. This is used by schedulers to determine instruction stages and
// latencies.
//
//===----------------------------------------------------------------------===//
@@ -16,17 +16,57 @@
#ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H
#define LLVM_TARGET_TARGETINSTRITINERARIES_H
+#include <algorithm>
+
namespace llvm {
//===----------------------------------------------------------------------===//
-/// Instruction stage - These values represent a step in the execution of an
-/// instruction. The latency represents the number of discrete time slots
-/// needed to complete the stage. Units represent the choice of functional
-/// units that can be used to complete the stage. Eg. IntUnit1, IntUnit2.
+/// Instruction stage - These values represent a non-pipelined step in
+/// the execution of an instruction. Cycles represents the number of
+/// discrete time slots needed to complete the stage. Units represent
+/// the choice of functional units that can be used to complete the
+/// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many
+/// cycles should elapse from the start of this stage to the start of
+/// the next stage in the itinerary. A value of -1 indicates that the
+/// next stage should start immediately after the current one.
+/// For example:
+///
+/// { 1, x, -1 }
+/// indicates that the stage occupies FU x for 1 cycle and that
+/// the next stage starts immediately after this one.
+///
+/// { 2, x|y, 1 }
+/// indicates that the stage occupies either FU x or FU y for 2
+/// consecuative cycles and that the next stage starts one cycle
+/// after this stage starts. That is, the stage requirements
+/// overlap in time.
+///
+/// { 1, x, 0 }
+/// indicates that the stage occupies FU x for 1 cycle and that
+/// the next stage starts in this same cycle. This can be used to
+/// indicate that the instruction requires multiple stages at the
+/// same time.
///
struct InstrStage {
- unsigned Cycles; ///< Length of stage in machine cycles
- unsigned Units; ///< Choice of functional units
+ unsigned Cycles_; ///< Length of stage in machine cycles
+ unsigned Units_; ///< Choice of functional units
+ int NextCycles_; ///< Number of machine cycles to next stage
+
+ /// getCycles - returns the number of cycles the stage is occupied
+ unsigned getCycles() const {
+ return Cycles_;
+ }
+
+ /// getUnits - returns the choice of FUs
+ unsigned getUnits() const {
+ return Units_;
+ }
+
+ /// getNextCycles - returns the number of cycles from the start of
+ /// this stage to the start of the next stage in the itinerary
+ unsigned getNextCycles() const {
+ return (NextCycles_ >= 0) ? (unsigned)NextCycles_ : Cycles_;
+ }
};
@@ -84,13 +124,17 @@ struct InstrItineraryData {
if (isEmpty())
return 1;
- // Just sum the cycle count for each stage. The assumption is that all
- // inputs are consumed at the start of the first stage and that all
- // outputs are produced at the end of the last stage.
- unsigned Latency = 0;
+ // Caclulate the maximum completion time for any stage. The
+ // assumption is that all inputs are consumed at the start of the
+ // first stage and that all outputs are produced at the end of the
+ // latest completing last stage.
+ unsigned Latency = 0, StartCycle = 0;
for (const InstrStage *IS = begin(ItinClassIndx), *E = end(ItinClassIndx);
- IS != E; ++IS)
- Latency += IS->Cycles;
+ IS != E; ++IS) {
+ Latency = std::max(Latency, StartCycle + IS->getCycles());
+ StartCycle += IS->getNextCycles();
+ }
+
return Latency;
}
};
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td
index 38461c5a38..4940e8b82b 100644
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -23,14 +23,23 @@
class FuncUnit;
//===----------------------------------------------------------------------===//
-// Instruction stage - These values represent a step in the execution of an
-// instruction. The latency represents the number of discrete time slots used
-// need to complete the stage. Units represent the choice of functional units
-// that can be used to complete the stage. Eg. IntUnit1, IntUnit2.
+// Instruction stage - These values represent a non-pipelined step in
+// the execution of an instruction. Cycles represents the number of
+// discrete time slots needed to complete the stage. Units represent
+// the choice of functional units that can be used to complete the
+// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many
+// cycles should elapse from the start of this stage to the start of
+// the next stage in the itinerary. For example:
//
-class InstrStage<int cycles, list<FuncUnit> units> {
+// A stage is specified in one of two ways:
+//
+// InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles
+// InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit
+//
+class InstrStage<int cycles, list<FuncUnit> units, int timeinc = -1> {
int Cycles = cycles; // length of stage in machine cycles
list<FuncUnit> Units = units; // choice of functional units
+ int TimeInc = timeinc; // cycles till start of next stage
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
index 48043f286c..8bac08a4a0 100644
--- a/lib/CodeGen/ExactHazardRecognizer.cpp
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -34,12 +34,12 @@ ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData
// If the begin stage of an itinerary has 0 cycles and units,
// then we have reached the end of the itineraries.
const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
- if ((IS->Cycles == 0) && (IS->Units == 0))
+ if ((IS->getCycles() == 0) && (IS->getUnits() == 0))
break;
unsigned ItinDepth = 0;
for (; IS != E; ++IS)
- ItinDepth += std::max(1U, IS->Cycles);
+ ItinDepth += IS->getCycles();
ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
}
@@ -89,27 +89,25 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) {
- // If the stages cycles are 0, then we must have the FU free in
- // the current cycle, but we don't advance the cycle time .
- unsigned StageCycles = std::max(1U, IS->Cycles);
-
// We must find one of the stage's units free for every cycle the
- // stage is occupied.
- for (unsigned int i = 0; i < StageCycles; ++i) {
- assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
-
- unsigned index = getFutureIndex(cycle);
- unsigned freeUnits = IS->Units & ~Scoreboard[index];
+ // stage is occupied. FIXME it would be more accurate to find the
+ // same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < ScoreboardDepth) &&
+ "Scoreboard depth exceeded!");
+
+ unsigned index = getFutureIndex(cycle + i);
+ unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
if (!freeUnits) {
- DEBUG(errs() << "*** Hazard in cycle " << cycle << ", ");
+ DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", ");
DEBUG(errs() << "SU(" << SU->NodeNum << "): ");
DEBUG(SU->getInstr()->dump());
return Hazard;
}
-
- if (IS->Cycles > 0)
- ++cycle;
}
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
}
return NoHazard;
@@ -123,17 +121,15 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) {
- // If the stages cycles are 0, then we must reserve the FU in the
- // current cycle, but we don't advance the cycle time .
- unsigned StageCycles = std::max(1U, IS->Cycles);
-
// We must reserve one of the stage's units for every cycle the
- // stage is occupied.
- for (unsigned int i = 0; i < StageCycles; ++i) {
- assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
-
- unsigned index = getFutureIndex(cycle);
- unsigned freeUnits = IS->Units & ~Scoreboard[index];
+ // stage is occupied. FIXME it would be more accurate to reserve
+ // the same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < ScoreboardDepth) &&
+ "Scoreboard depth exceeded!");
+
+ unsigned index = getFutureIndex(cycle + i);
+ unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
// reduce to a single unit
unsigned freeUnit = 0;
@@ -144,10 +140,10 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
assert(freeUnit && "No function unit available!");
Scoreboard[index] |= freeUnit;
-
- if (IS->Cycles > 0)
- ++cycle;
}
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
}
DEBUG(dumpScoreboard());
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 0985265e19..8e5a01d4dd 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -615,7 +615,7 @@ let isReturn = 1, isTerminator = 1, mayLoad = 1 in
[]>;
// On non-Darwin platforms R9 is callee-saved.
-let isCall = 1, Itinerary = IIC_Br,
+let isCall = 1,
Defs = [R0, R1, R2, R3, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23,
@@ -652,7 +652,7 @@ let isCall = 1, Itinerary = IIC_Br,
}
// On Darwin R9 is call-clobbered.
-let isCall = 1, Itinerary = IIC_Br,
+let isCall = 1,
Defs = [R0, R1, R2, R3, R9, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23,
@@ -685,7 +685,7 @@ let isCall = 1, Itinerary = IIC_Br,
}
}
-let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in {
+let isBranch = 1, isTerminator = 1 in {
// B is "predicable" since it can be xformed into a Bcc.
let isBarrier = 1 in {
let isPredicable = 1 in
@@ -1057,7 +1057,7 @@ defm BIC : AsI1_bin_irs<0b1110, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
- AddrMode1, Size4Bytes, IndexModeNone, DPFrm, NoItinerary,
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALU,
"bfc", " $dst, $imm", "$src = $dst",
[(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
@@ -1084,16 +1084,16 @@ def : ARMPat<(and GPR:$src, so_imm_not:$imm),
//
let isCommutable = 1 in
-def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"mul", " $dst, $a, $b",
[(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iALU, "mla", " $dst, $a, $b, $c",
+ IIC_iMPY, "mla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iALU, "mls", " $dst, $a, $b, $c",
+ IIC_iMPY, "mls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
Requires<[IsARM, HasV6T2]>;
@@ -1101,32 +1101,32 @@ def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iALU,
+ (ins GPR:$a, GPR:$b), IIC_iMPY,
"smull", " $ldst, $hdst, $a, $b", []>;
def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iALU,
+ (ins GPR:$a, GPR:$b), IIC_iMPY,
"umull", " $ldst, $hdst, $a, $b", []>;
}
// Multiply + accumulate
def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iALU,
+ (ins GPR:$a, GPR:$b), IIC_iMPY,
"smlal", " $ldst, $hdst, $a, $b", []>;
def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iALU,
+ (ins GPR:$a, GPR:$b), IIC_iMPY,
"umlal", " $ldst, $hdst, $a, $b", []>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iALU,
+ (ins GPR:$a, GPR:$b), IIC_iMPY,
"umaal", " $ldst, $hdst, $a, $b", []>,
Requires<[IsARM, HasV6]>;
} // neverHasSideEffects
// Most significant word multiply
def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, "smmul", " $dst, $a, $b",
+ IIC_iMPY, "smmul", " $dst, $a, $b",
[(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b0001;
@@ -1134,7 +1134,7 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
}
def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iALU, "smmla", " $dst, $a, $b, $c",
+ IIC_iMPY, "smmla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b0001;
@@ -1142,7 +1142,7 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iALU, "smmls", " $dst, $a, $b, $c",
+ IIC_iMPY, "smmls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b1101;
@@ -1150,7 +1150,7 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
multiclass AI_smul<string opc, PatFrag opnode> {
def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16)))]>,
Requires<[IsARM, HasV5TE]> {
@@ -1159,7 +1159,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
}
def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {
@@ -1168,7 +1168,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
}
def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16)))]>,
Requires<[IsARM, HasV5TE]> {
@@ -1177,7 +1177,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
}
def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {
@@ -1186,7 +1186,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
}
def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16)))]>,
Requires<[IsARM, HasV5TE]> {
@@ -1195,7 +1195,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
}
def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16)))]>,
Requires<[IsARM, HasV5TE]> {
@@ -1207,7 +1207,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
multiclass AI_smla<string opc, PatFrag opnode> {
def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc,
(opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16))))]>,
@@ -1217,7 +1217,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
}
def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]> {
@@ -1226,7 +1226,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
}
def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16))))]>,
Requires<[IsARM, HasV5TE]> {
@@ -1235,7 +1235,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
}
def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]> {
@@ -1244,7 +1244,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
}
def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {
@@ -1253,7 +1253,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
}
def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 0da798e15d..1a82331980 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -508,7 +508,7 @@ def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iALU,
// multiply register
let isCommutable = 1 in
-def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU,
+def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMPY,
"mul", " $dst, $rhs",
[(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 0ef9cc03e0..5800a43047 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -808,80 +808,80 @@ def : T2Pat<(t2_so_imm_not:$src),
// Multiply Instructions.
//
let isCommutable = 1 in
-def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"mul", " $dst, $a, $b",
[(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
-def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
+def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"mla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
-def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
+def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"mls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>;
// Extra precision multiplies with low / high results
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
-def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smull", " $ldst, $hdst, $a, $b", []>;
-def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umull", " $ldst, $hdst, $a, $b", []>;
}
// Multiply + accumulate
-def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smlal", " $ldst, $hdst, $a, $b", []>;
-def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umlal", " $ldst, $hdst, $a, $b", []>;
-def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umaal", " $ldst, $hdst, $a, $b", []>;
} // neverHasSideEffects
// Most significant word multiply
-def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smmul", " $dst, $a, $b",
[(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>;
-def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
+def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"smmla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>;
-def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
+def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"smmls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>;
multiclass T2I_smul<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "bb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16)))]>;
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "bt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16))))]>;
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "tb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16)))]>;
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "tt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16))))]>;
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "wb"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16)))]>;
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "wt"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16)))]>;
@@ -889,33 +889,33 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
multiclass T2I_smla<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "bb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc,
(opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16))))]>;
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "bt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16)))))]>;
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "tb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16))))]>;
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "tt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16)))))]>;
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "wb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16))))]>;
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "wt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16))))]>;
diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td
index 8a7b42eb72..a789d71147 100644
--- a/lib/Target/ARM/ARMScheduleV7.td
+++ b/lib/Target/ARM/ARMScheduleV7.td
@@ -16,34 +16,34 @@ def CortexA8Itineraries : ProcessorItineraries<[
// two fully-pipelined integer ALU pipelines
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
// one fully-pipelined integer Multiply pipeline
- // function units are used in alpha order, so use FU_Pipe1
- // for the Multiple pipeline
- InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe1]>]>,
+ // function units are reserved by the scheduler in reverse alpha order,
+ // so use FU_Pipe0 for the Multiple pipeline
+ InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>,
// loads have an extra cycle of latency, but are fully pipelined
- // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_iLoad , [InstrStage<0, [FU_Issue]>,
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>]>,
// fully-pipelined stores
- // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_iStore , [InstrStage<0, [FU_Issue]>,
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_iStore , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
// no delay slots, so the latency of a branch is unimportant
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
- // VFP ALU is not pipelined so stall all issues
- // FIXME assume NFP pipeline and 7 cycle non-pipelined latency
- InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
+ // NFP ALU is not pipelined so stall all issues
+ InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0], 0>,
+ InstrStage<7, [FU_Pipe1], 0>]>,
// VFP MPY is not pipelined so stall all issues
- // FIXME assume NFP pipeline and 7 cycle non-pipelined latency
- InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
+ InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0], 0>,
+ InstrStage<7, [FU_Pipe1], 0>]>,
// loads have an extra cycle of latency, but are fully pipelined
- // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoad , [InstrStage<0, [FU_Issue]>,
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>]>,
- // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStore , [InstrStage<0, [FU_Issue]>,
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>
]>;
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 919ac664ef..4a0bacd19c 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -215,7 +215,7 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData,
// Next stage
const Record *Stage = StageList[i];
- // Form string as ,{ cycles, u1 | u2 | ... | un }
+ // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc }
int Cycles = Stage->getValueAsInt("Cycles");
ItinString += " { " + itostr(Cycles) + ", ";
@@ -229,6 +229,9 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData,
if (++j < M) ItinString += " | ";
}
+ int TimeInc = Stage->getValueAsInt("TimeInc");
+ ItinString += ", " + itostr(TimeInc);
+
// Close off stage
ItinString += " }";
if (++i < N) ItinString += ", ";
@@ -252,7 +255,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
// Begin stages table
OS << "static const llvm::InstrStage Stages[] = {\n"
- " { 0, 0 }, // No itinerary\n";
+ " { 0, 0, 0 }, // No itinerary\n";
unsigned StageCount = 1;
unsigned ItinEnum = 1;
@@ -289,7 +292,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
// If new itinerary
if (Find == 0) {
- // Emit as { cycles, u1 | u2 | ... | un }, // index
+ // Emit as { cycles, u1 | u2 | ... | un, timeinc }, // index
OS << ItinString << ", // " << ItinEnum << "\n";
// Record Itin class number.
ItinMap[ItinString] = Find = StageCount;
@@ -313,7 +316,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
}
// Closing stage
- OS << " { 0, 0 } // End itinerary\n";
+ OS << " { 0, 0, 0 } // End itinerary\n";
// End stages table
OS << "};\n";