summaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-09-10 01:29:16 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-09-10 01:29:16 +0000
commit3ef1c8759a20167457eb7fd82ebcaffe7ccaa1d1 (patch)
treeffcb01b1621bcedb427d701cfaee9ea9a19b0a2c /lib/Target/ARM
parent920a2089d9b737820631bc6de4c4fb9fa9ad1e07 (diff)
downloadllvm-3ef1c8759a20167457eb7fd82ebcaffe7ccaa1d1.tar.gz
llvm-3ef1c8759a20167457eb7fd82ebcaffe7ccaa1d1.tar.bz2
llvm-3ef1c8759a20167457eb7fd82ebcaffe7ccaa1d1.tar.xz
Teach if-converter to be more careful with predicating instructions that would
take multiple cycles to decode. For the current if-converter clients (actually only ARM), the instructions that are predicated on false are not nops. They would still take machine cycles to decode. Micro-coded instructions such as LDM / STM can potentially take multiple cycles to decode. If-converter should take treat them as non-micro-coded simple instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113570 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.td15
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp40
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp4
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp3
-rw-r--r--lib/Target/ARM/ARMSubtarget.h10
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h4
-rw-r--r--lib/Target/ARM/Thumb2HazardRecognizer.h2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h2
11 files changed, 62 insertions, 24 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index d6a8f19724..f3693e3abb 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -91,6 +91,15 @@ def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
include "ARMSchedule.td"
+// ARM processor families.
+def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others",
+ "One of the other ARM processor families">;
+def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
+ "Cortex-A8 ARM processors",
+ [FeatureSlowFPBrcc, FeatureNEONForFP]>;
+def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
+ "Cortex-A9 ARM processors">;
+
class ProcNoItin<string Name, list<SubtargetFeature> Features>
: Processor<Name, GenericItineraries, Features>;
@@ -150,10 +159,10 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>;
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
- [ArchV7A, FeatureHasSlowVMLx,
- FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>;
+ [ArchV7A, ProcA8,
+ FeatureHasSlowVMLx, FeatureT2XtPk]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
- [ArchV7A, FeatureT2XtPk]>;
+ [ArchV7A, ProcA9, FeatureT2XtPk]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [ArchV7M]>;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c824b8bce9..e7b35c6928 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1415,13 +1415,13 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
unsigned
ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
- const InstrItineraryData &ItinData) const {
- if (ItinData.isEmpty())
+ const InstrItineraryData *ItinData) const {
+ if (!ItinData || ItinData->isEmpty())
return 1;
const TargetInstrDesc &Desc = MI->getDesc();
unsigned Class = Desc.getSchedClass();
- unsigned UOps = ItinData.Itineratries[Class].NumMicroOps;
+ unsigned UOps = ItinData->Itineratries[Class].NumMicroOps;
if (UOps)
return UOps;
@@ -1430,16 +1430,19 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
default:
llvm_unreachable("Unexpected multi-uops instruction!");
break;
+ case ARM::VLDMQ:
case ARM::VSTMQ:
return 2;
// The number of uOps for load / store multiple are determined by the number
// registers.
- // On Cortex-A8, each odd / even pair of register loads / stores
- // (e.g. r5 + r6) can be completed on the same cycle. The minimum is
- // 2. For VFP / NEON load / store multiple, the formula is
+ // On Cortex-A8, each pair of register loads / stores can be scheduled on the
+ // same cycle. The scheduling for the first load / store must be done
+ // separately by assuming the the address is not 64-bit aligned.
+ // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
+ // is not 64-bit aligned, then AGU would take an extra cycle.
+ // For VFP / NEON load / store multiple, the formula is
// (#reg / 2) + (#reg % 2) + 1.
- // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2).
case ARM::VLDMD:
case ARM::VLDMS:
case ARM::VLDMD_UPD:
@@ -1467,11 +1470,24 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
case ARM::t2LDM_UPD:
case ARM::t2STM:
case ARM::t2STM_UPD: {
- // FIXME: Distinquish between Cortex-A8 / Cortex-A9 and other processor
- // families.
- unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
- UOps = (NumRegs / 2) + (NumRegs % 2);
- return (UOps > 2) ? UOps : 2;
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
+ if (Subtarget.isCortexA8()) {
+ // 4 registers would be issued: 1, 2, 1.
+ // 5 registers would be issued: 1, 2, 2.
+ return 1 + (NumRegs / 2);
+ } else if (Subtarget.isCortexA9()) {
+ UOps = (NumRegs / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((NumRegs % 2) ||
+ !MI->hasOneMemOperand() ||
+ (*MI->memoperands_begin())->getAlignment() < 8)
+ ++UOps;
+ return UOps;
+ } else {
+ // Assume the worst.
+ return NumRegs;
+ }
}
}
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index b3abdeef96..f471b6772e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -350,7 +350,7 @@ public:
MachineInstr *CmpInstr) const;
virtual unsigned getNumMicroOps(const MachineInstr *MI,
- const InstrItineraryData &ItinData) const;
+ const InstrItineraryData *ItinData) const;
};
static inline
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index d4198a5ea1..637c6e3b09 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -177,6 +177,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
RegInfo = TM.getRegisterInfo();
+ Itins = TM.getInstrItineraryData();
if (Subtarget->isTargetDarwin()) {
// Uses VFP for Thumb libfuncs if available.
@@ -749,8 +750,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
if (TID.mayLoad())
return Sched::Latency;
- const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData();
- if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2)
+ if (!Itins->isEmpty() && Itins->getStageLatency(TID.getSchedClass()) > 2)
return Sched::Latency;
return Sched::RegPressure;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index ba9ea7f15e..58b8b9eb44 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -301,6 +301,8 @@ namespace llvm {
const TargetRegisterInfo *RegInfo;
+ const InstrItineraryData *Itins;
+
/// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
///
unsigned ARMPCLabelIndex;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index cb539f4c01..8a4052bc0c 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -30,6 +30,7 @@ UseMOVT("arm-use-movt",
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
bool isT)
: ARMArchVersion(V4)
+ , ARMProcFamily(Others)
, ARMFPUType(None)
, UseNEONForSinglePrecisionFP(false)
, SlowVMLx(false)
@@ -50,7 +51,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, CPUString("generic")
, TargetType(isELF) // Default to ELF unless otherwise specified.
, TargetABI(ARM_ABI_APCS) {
- // default to soft float ABI
+ // Default to soft float ABI
if (FloatABIType == FloatABI::Default)
FloatABIType = FloatABI::Soft;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 67e58038ee..34f571fd70 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -29,6 +29,10 @@ protected:
V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
};
+ enum ARMProcFamilyEnum {
+ Others, CortexA8, CortexA9
+ };
+
enum ARMFPEnum {
None, VFPv2, VFPv3, NEON
};
@@ -42,6 +46,9 @@ protected:
/// V6, V6T2, V7A, V7M.
ARMArchEnum ARMArchVersion;
+ /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
+ ARMProcFamilyEnum ARMProcFamily;
+
/// ARMFPUType - Floating Point Unit type.
ARMFPEnum ARMFPUType;
@@ -143,6 +150,9 @@ protected:
bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
bool hasV7Ops() const { return ARMArchVersion >= V7A; }
+ bool isCortexA8() const { return ARMProcFamily == CortexA8; }
+ bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+
bool hasARMOps() const { return !NoARM; }
bool hasVFP2() const { return ARMFPUType >= VFPv2; }
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 17e5425a9d..9b375d76c7 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -45,8 +45,8 @@ public:
virtual const ARMFrameInfo *getFrameInfo() const { return &FrameInfo; }
virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
- virtual const InstrItineraryData getInstrItineraryData() const {
- return InstrItins;
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
}
// Pass Pipeline Configuration
diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.h b/lib/Target/ARM/Thumb2HazardRecognizer.h
index 472665862e..aa4411f186 100644
--- a/lib/Target/ARM/Thumb2HazardRecognizer.h
+++ b/lib/Target/ARM/Thumb2HazardRecognizer.h
@@ -26,7 +26,7 @@ class Thumb2HazardRecognizer : public PostRAHazardRecognizer {
MachineInstr *ITBlockMIs[4];
public:
- Thumb2HazardRecognizer(const InstrItineraryData &ItinData) :
+ Thumb2HazardRecognizer(const InstrItineraryData *ItinData) :
PostRAHazardRecognizer(ItinData) {}
virtual HazardType getHazardType(SUnit *SU);
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 442f41da8a..962b312ae4 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -194,7 +194,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
}
ScheduleHazardRecognizer *Thumb2InstrInfo::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const {
return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II);
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 3a9f8b194d..b66be8eac4 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -72,7 +72,7 @@ public:
const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
ScheduleHazardRecognizer *
- CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const;
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const;
};
/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical