summaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
authorBob Wilson <bob.wilson@apple.com>2010-11-02 21:18:25 +0000
committerBob Wilson <bob.wilson@apple.com>2010-11-02 21:18:25 +0000
commitd0c6bc220433fab06bc1507f963ea5883fdc4f69 (patch)
tree45723c59fff45b9fdd61186f309050c97caa6464 /lib/Target/ARM
parentd2f3794e4dba7a397eaae62114fffe46213c7d41 (diff)
downloadllvm-d0c6bc220433fab06bc1507f963ea5883fdc4f69.tar.gz
llvm-d0c6bc220433fab06bc1507f963ea5883fdc4f69.tar.bz2
llvm-d0c6bc220433fab06bc1507f963ea5883fdc4f69.tar.xz
Add NEON VST1-lane instructions. Partial fix for Radar 8599955.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118069 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp19
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td33
-rw-r--r--lib/Target/ARM/ARMSchedule.td2
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td12
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td18
5 files changed, 80 insertions, 4 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 774324b452..4c556479d1 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -111,11 +111,11 @@ namespace {
static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 },
-{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, false, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 },
{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 },
-{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, false, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 },
{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 },
-{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, false, EvenDblSpc, 1, 8 },
+{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 },
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 },
{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 },
@@ -206,6 +206,13 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
+{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 },
+{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 },
+
{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 },
{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 },
{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 },
@@ -989,6 +996,12 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
case ARM::VLD4LNd32Pseudo_UPD:
case ARM::VLD4LNq16Pseudo_UPD:
case ARM::VLD4LNq32Pseudo_UPD:
+ case ARM::VST1LNq8Pseudo:
+ case ARM::VST1LNq16Pseudo:
+ case ARM::VST1LNq32Pseudo:
+ case ARM::VST1LNq8Pseudo_UPD:
+ case ARM::VST1LNq16Pseudo_UPD:
+ case ARM::VST1LNq32Pseudo_UPD:
case ARM::VST2LNd8Pseudo:
case ARM::VST2LNd16Pseudo:
case ARM::VST2LNd32Pseudo:
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 8927789567..f17ce836e4 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1087,6 +1087,8 @@ def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
// Classes for VST*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VSTQLNPseudo<InstrItinClass itin>
@@ -1112,7 +1114,36 @@ class VSTQQQQLNWBPseudo<InstrItinClass itin>
nohash_imm:$lane), itin, "$addr.addr = $wb">;
// VST1LN : Vector Store (single element from one lane)
-// FIXME: Not yet implemented.
+class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src, nohash_imm:$lane),
+ IIC_VST1ln, "vst1", Dt, "\\{$src[$lane]\\}, $addr", "", []>;
+
+def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8">;
+def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16">;
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32">;
+
+def VST1LNq8Pseudo : VSTQLNPseudo<IIC_VST1ln>;
+def VST1LNq16Pseudo : VSTQLNPseudo<IIC_VST1ln>;
+def VST1LNq32Pseudo : VSTQLNPseudo<IIC_VST1ln>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
+ "\\{$src[$lane]\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
+
+def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8">;
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16">;
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32">;
+
+def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
// VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 7f4d2bbd44..c35cadb12c 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -158,6 +158,8 @@ def IIC_VST1u : InstrItinClass;
def IIC_VST1x2u : InstrItinClass;
def IIC_VST1x3u : InstrItinClass;
def IIC_VST1x4u : InstrItinClass;
+def IIC_VST1ln : InstrItinClass;
+def IIC_VST1lnu : InstrItinClass;
def IIC_VST2 : InstrItinClass;
def IIC_VST2x2 : InstrItinClass;
def IIC_VST2u : InstrItinClass;
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index b5c8def42f..25bdaa2753 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -601,6 +601,18 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<3, [A8_LSPipe]>],
[2, 1, 1, 1, 1, 1, 2, 2]>,
//
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 1>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 1>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
// VST2
InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe], 1>,
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index c78f59383f..fb2c24d161 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1005,6 +1005,24 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<3, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2, 2]>,
//
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 1>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
// VST2
InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,