diff options
author | Jim Grosbach <grosbach@apple.com> | 2011-12-14 21:32:11 +0000 |
---|---|---|
committer | Jim Grosbach <grosbach@apple.com> | 2011-12-14 21:32:11 +0000 |
commit | bb3a2e4d0defc6854d37384d80858037dbbc5f20 (patch) | |
tree | e540cd99adbfc84190ab3e3dd6bea0fd3d3cf68c /lib | |
parent | 20accfc6c7b22b22193eb90c53921f71c1202a73 (diff) | |
download | llvm-bb3a2e4d0defc6854d37384d80858037dbbc5f20.tar.gz llvm-bb3a2e4d0defc6854d37384d80858037dbbc5f20.tar.bz2 llvm-bb3a2e4d0defc6854d37384d80858037dbbc5f20.tar.xz |
ARM NEON refactor VST2 w/ writeback instructions.
In addition to improving the representation, this adds support for assembly
parsing of these instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@146588 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 36 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 21 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 100 | ||||
-rw-r--r-- | lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 27 |
4 files changed, 130 insertions, 54 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 69011d73ca..099278286f 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -308,18 +308,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, { ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, +{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, { ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, { ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, true, SingleSpc, 2, 8 ,false}, +{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, +{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, { ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, -{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, true, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, { ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false}, -{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, true, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false}, { ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false}, -{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, true, SingleSpc, 4, 8 ,false}, +{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, +{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false}, { ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true}, { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, @@ -1193,12 +1199,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST2q8Pseudo: case ARM::VST2q16Pseudo: case ARM::VST2q32Pseudo: - case ARM::VST2d8Pseudo_UPD: - case ARM::VST2d16Pseudo_UPD: - case ARM::VST2d32Pseudo_UPD: - case ARM::VST2q8Pseudo_UPD: - case ARM::VST2q16Pseudo_UPD: - case ARM::VST2q32Pseudo_UPD: + case ARM::VST2d8PseudoWB_fixed: + case ARM::VST2d16PseudoWB_fixed: + case ARM::VST2d32PseudoWB_fixed: + case ARM::VST2q8PseudoWB_fixed: + case ARM::VST2q16PseudoWB_fixed: + case ARM::VST2q32PseudoWB_fixed: + case ARM::VST2d8PseudoWB_register: + case ARM::VST2d16PseudoWB_register: + case ARM::VST2d32PseudoWB_register: + case ARM::VST2q8PseudoWB_register: + case ARM::VST2q16PseudoWB_register: + case ARM::VST2q32PseudoWB_register: case ARM::VST3d8Pseudo: case ARM::VST3d16Pseudo: case ARM::VST3d32Pseudo: diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index ced13abca1..74731419fd 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1589,6 +1589,12 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; + case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register; + case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register; + case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register; + case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; + case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; + case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; } return Opc; // If not one we handle, return it unchanged. } @@ -1806,9 +1812,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0 + // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode())) + if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. @@ -2889,10 +2895,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VST2_UPD: { - unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD, - ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed}; - unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD, - ARM::VST2q32Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed, + ARM::VST2d16PseudoWB_fixed, + ARM::VST2d32PseudoWB_fixed, + ARM::VST1q64PseudoWB_fixed}; + unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, + ARM::VST2q16PseudoWB_fixed, + ARM::VST2q32PseudoWB_fixed }; return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 59911783f5..cd12ab9b40 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1523,44 +1523,90 @@ def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; // ...with address register writeback: -class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> - : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, VdTy:$Vd), - IIC_VST2u, "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +//class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> +// : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), +// (ins addrmode6:$Rn, am6offset:$Rm, VdTy:$Vd), +// IIC_VST2u, "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { +// let Inst{5-4} = Rn{5-4}; +// let DecoderMethod = "DecodeVSTInstruction"; +//} +multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy> { + def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -class VST2QWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, VecListFourD:$Vd), IIC_VST2x2u, - "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +//class VST2QWB<bits<4> op7_4, string Dt> +// : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), +// (ins addrmode6:$Rn, am6offset:$Rm, VecListFourD:$Vd), IIC_VST2x2u, +// "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { +// let Inst{5-4} = Rn{5-4}; +// let DecoderMethod = "DecodeVSTInstruction"; +//} +multiclass VST2QWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; -def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; -def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; -def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">; -def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">; -def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">; +defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; +defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; +defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; -def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; +def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; +def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; +def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; -def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; // ...with double-spaced registers def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>; def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>; def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>; -def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; -def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; -def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index dee04ead53..45f285c395 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2221,15 +2221,24 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST1d16Qwb_register: case ARM::VST1d32Qwb_register: case ARM::VST1d64Qwb_register: - case ARM::VST2d8_UPD: - case ARM::VST2d16_UPD: - case ARM::VST2d32_UPD: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: - case ARM::VST2b8_UPD: - case ARM::VST2b16_UPD: - case ARM::VST2b32_UPD: + case ARM::VST2d8wb_fixed: + case ARM::VST2d16wb_fixed: + case ARM::VST2d32wb_fixed: + case ARM::VST2d8wb_register: + case ARM::VST2d16wb_register: + case ARM::VST2d32wb_register: + case ARM::VST2q8wb_fixed: + case ARM::VST2q16wb_fixed: + case ARM::VST2q32wb_fixed: + case ARM::VST2q8wb_register: + case ARM::VST2q16wb_register: + case ARM::VST2q32wb_register: + case ARM::VST2b8wb_fixed: + case ARM::VST2b16wb_fixed: + case ARM::VST2b32wb_fixed: + case ARM::VST2b8wb_register: + case ARM::VST2b16wb_register: + case ARM::VST2b32wb_register: case ARM::VST3d8_UPD: case ARM::VST3d16_UPD: case ARM::VST3d32_UPD: |