diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2014-04-09 15:24:12 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2014-04-09 15:24:12 +0000 |
commit | a17303598934c78afb04f623a85cd99ec0261989 (patch) | |
tree | 30d9be99ad0165ce5977a6d50dc2a34d7855f69e /lib | |
parent | 886b940a1d6fedd28b84bd3fe06963bfd1451382 (diff) | |
download | llvm-a17303598934c78afb04f623a85cd99ec0261989.tar.gz llvm-a17303598934c78afb04f623a85cd99ec0261989.tar.bz2 llvm-a17303598934c78afb04f623a85cd99ec0261989.tar.xz |
Merging r199369:
------------------------------------------------------------------------
r199369 | jiangning.liu | 2014-01-16 04:16:13 -0500 (Thu, 16 Jan 2014) | 2 lines
For ARM, fix assertuib failures for some ld/st 3/4 instruction with wirteback.
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@205901 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 4 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 75 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 8 |
4 files changed, 78 insertions, 11 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index f835a4e5b5..658af8380d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3684,6 +3684,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: case ARM::VLD1d64TPseudo: + case ARM::VLD1d64TPseudoWB_fixed: case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: @@ -3700,6 +3701,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4d16Pseudo: case ARM::VLD4d32Pseudo: case ARM::VLD1d64QPseudo: + case ARM::VLD1d64QPseudoWB_fixed: case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index e6f7f86c55..3e62b649e0 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -136,7 +136,9 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true}, { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, +{ ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, +{ ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false}, { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, @@ -1071,6 +1073,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: case ARM::VLD1d64TPseudo: + case ARM::VLD1d64TPseudoWB_fixed: case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: @@ -1087,6 +1090,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD4d16Pseudo: case ARM::VLD4d32Pseudo: case ARM::VLD1d64QPseudo: + case ARM::VLD1d64QPseudoWB_fixed: case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 87d1522694..6d9b18877f 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1673,9 +1673,61 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs, return CurDAG->getTargetConstant(Alignment, MVT::i32); } +static bool isVLDfixed(unsigned Opc) +{ + switch (Opc) { + default: return false; + case ARM::VLD1d8wb_fixed : return true; + case ARM::VLD1d16wb_fixed : return true; + case ARM::VLD1d64Qwb_fixed : return true; + case ARM::VLD1d32wb_fixed : return true; + case ARM::VLD1d64wb_fixed : return true; + case ARM::VLD1d64TPseudoWB_fixed : return true; + case ARM::VLD1d64QPseudoWB_fixed : return true; + case ARM::VLD1q8wb_fixed : return true; + case ARM::VLD1q16wb_fixed : return true; + case ARM::VLD1q32wb_fixed : return true; + case ARM::VLD1q64wb_fixed : return true; + case ARM::VLD2d8wb_fixed : return true; + case ARM::VLD2d16wb_fixed : return true; + case ARM::VLD2d32wb_fixed : return true; + case ARM::VLD2q8PseudoWB_fixed : return true; + case ARM::VLD2q16PseudoWB_fixed : return true; + case ARM::VLD2q32PseudoWB_fixed : return true; + case ARM::VLD2DUPd8wb_fixed : return true; + case ARM::VLD2DUPd16wb_fixed : return true; + case ARM::VLD2DUPd32wb_fixed : return true; + } +} + +static bool isVSTfixed(unsigned Opc) +{ + switch (Opc) { + default: return false; + case ARM::VST1d8wb_fixed : return true; + case ARM::VST1d16wb_fixed : return true; + case ARM::VST1d32wb_fixed : return true; + case ARM::VST1d64wb_fixed : return true; + case ARM::VST1q8wb_fixed : return true; + case ARM::VST1q16wb_fixed : return true; + case ARM::VST1q32wb_fixed : return true; + case ARM::VST1q64wb_fixed : return true; + case ARM::VST1d64TPseudoWB_fixed : return true; + case ARM::VST1d64QPseudoWB_fixed : return true; + case ARM::VST2d8wb_fixed : return true; + case ARM::VST2d16wb_fixed : return true; + case ARM::VST2d32wb_fixed : return true; + case ARM::VST2q8PseudoWB_fixed : return true; + case ARM::VST2q16PseudoWB_fixed : return true; + case ARM::VST2q32PseudoWB_fixed : return true; + } +} + // Get the register stride update opcode of a VLD/VST instruction that // is otherwise equivalent to the given fixed stride updating instruction. static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { + assert((isVLDfixed(Opc) || isVSTfixed(Opc)) + && "Incorrect fixed stride updating instruction."); switch (Opc) { default: break; case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; @@ -1686,6 +1738,10 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; + case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; + case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; + case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; + case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; @@ -1785,11 +1841,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode())) + if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); - // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so + // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) || + if ((NumVecs > 2 && !isVLDfixed(Opc)) || !isa<ConstantSDNode>(Inc.getNode())) Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } @@ -1937,11 +1993,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, // case entirely when the rest are updated to that form, too. if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); - // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so + // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) || - !isa<ConstantSDNode>(Inc.getNode())) - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + if (!isa<ConstantSDNode>(Inc.getNode())) + Ops.push_back(Inc); + else if (NumVecs > 2 && !isVSTfixed(Opc)) + Ops.push_back(Reg0); } Ops.push_back(SrcReg); Ops.push_back(Pred); @@ -2834,7 +2891,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD, ARM::VLD3d32Pseudo_UPD, - ARM::VLD1q64wb_fixed}; + ARM::VLD1d64TPseudoWB_fixed}; static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q16Pseudo_UPD, ARM::VLD3q32Pseudo_UPD }; @@ -2848,7 +2905,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, - ARM::VLD1q64wb_fixed}; + ARM::VLD1d64QPseudoWB_fixed}; static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q16Pseudo_UPD, ARM::VLD4q32Pseudo_UPD }; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 43bd4c21dc..0b05c08ed9 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -730,6 +730,8 @@ defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; +def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>; +def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>; // ...with 4 registers class VLD1D4<bits<4> op7_4, string Dt> @@ -769,6 +771,8 @@ defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; +def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>; +def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>; // VLD2 : Vector Load (multiple 2-element structures) class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, @@ -1671,7 +1675,7 @@ defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; -def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>; def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; // ...with 4 registers @@ -1714,7 +1718,7 @@ defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; -def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>; def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; // VST2 : Vector Store (multiple 2-element structures) |