summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJim Grosbach <grosbach@apple.com>2011-12-14 21:32:11 +0000
committerJim Grosbach <grosbach@apple.com>2011-12-14 21:32:11 +0000
commitbb3a2e4d0defc6854d37384d80858037dbbc5f20 (patch)
treee540cd99adbfc84190ab3e3dd6bea0fd3d3cf68c /lib
parent20accfc6c7b22b22193eb90c53921f71c1202a73 (diff)
downloadllvm-bb3a2e4d0defc6854d37384d80858037dbbc5f20.tar.gz
llvm-bb3a2e4d0defc6854d37384d80858037dbbc5f20.tar.bz2
llvm-bb3a2e4d0defc6854d37384d80858037dbbc5f20.tar.xz
ARM NEON refactor VST2 w/ writeback instructions.
In addition to improving the representation, this adds support for assembly parsing of these instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@146588 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp36
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp21
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td100
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp27
4 files changed, 130 insertions, 54 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 69011d73ca..099278286f 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -308,18 +308,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false},
-{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, true, SingleSpc, 2, 4 ,false},
+{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false},
+{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false},
-{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, true, SingleSpc, 2, 2 ,false},
+{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false},
+{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false},
-{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, true, SingleSpc, 2, 8 ,false},
+{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false},
+{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false},
{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
-{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
-{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
-{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, true, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
@@ -1193,12 +1199,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST2q8Pseudo:
case ARM::VST2q16Pseudo:
case ARM::VST2q32Pseudo:
- case ARM::VST2d8Pseudo_UPD:
- case ARM::VST2d16Pseudo_UPD:
- case ARM::VST2d32Pseudo_UPD:
- case ARM::VST2q8Pseudo_UPD:
- case ARM::VST2q16Pseudo_UPD:
- case ARM::VST2q32Pseudo_UPD:
+ case ARM::VST2d8PseudoWB_fixed:
+ case ARM::VST2d16PseudoWB_fixed:
+ case ARM::VST2d32PseudoWB_fixed:
+ case ARM::VST2q8PseudoWB_fixed:
+ case ARM::VST2q16PseudoWB_fixed:
+ case ARM::VST2q32PseudoWB_fixed:
+ case ARM::VST2d8PseudoWB_register:
+ case ARM::VST2d16PseudoWB_register:
+ case ARM::VST2d32PseudoWB_register:
+ case ARM::VST2q8PseudoWB_register:
+ case ARM::VST2q16PseudoWB_register:
+ case ARM::VST2q32PseudoWB_register:
case ARM::VST3d8Pseudo:
case ARM::VST3d16Pseudo:
case ARM::VST3d32Pseudo:
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index ced13abca1..74731419fd 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1589,6 +1589,12 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
+ case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register;
+ case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register;
+ case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register;
+ case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
+ case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
+ case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
}
return Opc; // If not one we handle, return it unchanged.
}
@@ -1806,9 +1812,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- // FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0
+ // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
+ if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
@@ -2889,10 +2895,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VST2_UPD: {
- unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
- ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed};
- unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
- ARM::VST2q32Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed,
+ ARM::VST2d16PseudoWB_fixed,
+ ARM::VST2d32PseudoWB_fixed,
+ ARM::VST1q64PseudoWB_fixed};
+ unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed };
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 59911783f5..cd12ab9b40 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1523,44 +1523,90 @@ def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
-class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm, VdTy:$Vd),
- IIC_VST2u, "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+//class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
+// : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+// (ins addrmode6:$Rn, am6offset:$Rm, VdTy:$Vd),
+// IIC_VST2u, "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
+// let Inst{5-4} = Rn{5-4};
+// let DecoderMethod = "DecodeVSTInstruction";
+//}
+multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy> {
+ def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-class VST2QWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm, VecListFourD:$Vd), IIC_VST2x2u,
- "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+//class VST2QWB<bits<4> op7_4, string Dt>
+// : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+// (ins addrmode6:$Rn, am6offset:$Rm, VecListFourD:$Vd), IIC_VST2x2u,
+// "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
+// let Inst{5-4} = Rn{5-4};
+// let DecoderMethod = "DecodeVSTInstruction";
+//}
+multiclass VST2QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
-def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
-def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
+defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
+defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
+defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
-def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
-def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
-def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
+defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
+defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
-def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
-def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
// ...with double-spaced registers
def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>;
def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>;
def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>;
-def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
-def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
-def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
+defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
+defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
+defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index dee04ead53..45f285c395 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -2221,15 +2221,24 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VST1d16Qwb_register:
case ARM::VST1d32Qwb_register:
case ARM::VST1d64Qwb_register:
- case ARM::VST2d8_UPD:
- case ARM::VST2d16_UPD:
- case ARM::VST2d32_UPD:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
- case ARM::VST2b8_UPD:
- case ARM::VST2b16_UPD:
- case ARM::VST2b32_UPD:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d8wb_register:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2q8wb_register:
+ case ARM::VST2q16wb_register:
+ case ARM::VST2q32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b8wb_register:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_register:
case ARM::VST3d8_UPD:
case ARM::VST3d16_UPD:
case ARM::VST3d32_UPD: