diff options
author | Tim Northover <tnorthover@apple.com> | 2014-04-01 13:22:02 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2014-04-01 13:22:02 +0000 |
commit | c077472250ef27e514f5ad44f3b258a18a70f065 (patch) | |
tree | 29202ba7c1bc69f0b720c6d9a43168d066b1aa78 /lib/Target/ARM | |
parent | 279edf967edc1b2dcaaad3e263972cb4fe83b35d (diff) | |
download | llvm-c077472250ef27e514f5ad44f3b258a18a70f065.tar.gz llvm-c077472250ef27e514f5ad44f3b258a18a70f065.tar.bz2 llvm-c077472250ef27e514f5ad44f3b258a18a70f065.tar.xz |
ARM: add cyclone CPU with ZeroCycleZeroing feature.
The Cyclone CPU is similar to swift for most LLVM purposes, but does have two
preferred instructions for zeroing a VFP register. This teaches LLVM about
them.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205309 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARM.td | 24 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 1 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 20 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSubtarget.cpp | 1 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSubtarget.h | 5 |
5 files changed, 45 insertions, 6 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 27bbcc22b6..7916ccc180 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -73,6 +73,11 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable support for CRC instructions">; +// Cyclone has preferred instructions for zeroing VFP registers, which can +// execute in 0 cycles. +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions">; + // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. @@ -361,6 +366,13 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, FeatureDSPThumb2, FeatureHasRAS, FeatureAClass]>; +// FIXME: krait has currently the same Schedule model as A9 +def : ProcessorModel<"krait", CortexA9Model, + [ProcKrait, HasV7Ops, + FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; + // FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ProcR5, HasV7Ops, FeatureDB, @@ -395,12 +407,12 @@ def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass, FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2]>; -// FIXME: krait has currently the same Schedule model as A9 -def : ProcessorModel<"krait", CortexA9Model, - [ProcKrait, HasV7Ops, - FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS, - FeatureAClass]>; +// Cyclone is very similar to swift +def : ProcessorModel<"cyclone", SwiftModel, + [ProcSwift, HasV8Ops, HasV7Ops, + FeatureCrypto, FeatureFPARMv8, + FeatureDB,FeatureDSPThumb2, + FeatureHasRAS, FeatureZCZeroing]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index cc748e19c8..dfcc11edcd 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -244,6 +244,7 @@ def HasMP : Predicate<"Subtarget->hasMPExtension()">, def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">, AssemblerPredicate<"FeatureTrustZone", "TrustZone">; +def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index b18eac55d8..0d46c49bcf 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5245,6 +5245,26 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; } // isReMaterializable + +// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" +// require zero cycles to execute so they should be used wherever possible for +// setting a register to zero. + +// Even without these pseudo-insts we would probably end up with the correct +// instruction, but we could not mark the general ones with "isAsCheapAsAMove" +// since they are sometimes rather expensive (in general). + +let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { + def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, + [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], + (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, + Requires<[HasZCZ]>; + def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, + [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], + (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, + Requires<[HasZCZ]>; +} + // VMOV : Vector Get Lane (move scalar to ARM core register) def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index a290136f6b..0dec1c406a 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -134,6 +134,7 @@ void ARMSubtarget::initializeEnvironment() { HasTrustZone = false; HasCrypto = false; HasCRC = false; + HasZeroCycleZeroing = false; AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 2ce99c890f..e76cc85a1a 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -177,6 +177,10 @@ protected: /// HasCRC - if true, processor supports CRC instructions bool HasCRC; + /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are + /// particularly effective at zeroing a VFP register. + bool HasZeroCycleZeroing; + /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory /// accesses for some types. For details, see /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). @@ -298,6 +302,7 @@ public: bool isFPOnlySP() const { return FPOnlySP; } bool hasPerfMon() const { return HasPerfMon; } bool hasTrustZone() const { return HasTrustZone; } + bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } |