diff options
author | Sebastian Pop <spop@codeaurora.org> | 2012-03-05 17:39:52 +0000 |
---|---|---|
committer | Sebastian Pop <spop@codeaurora.org> | 2012-03-05 17:39:52 +0000 |
commit | 74bebde7c4e2d1cfd4a16c19ce3c87521df67639 (patch) | |
tree | 07fd9dda4ac83ae4cf874a76cc41b70a61720229 /lib/Target/ARM/ARMInstrNEON.td | |
parent | 43ec0f4921e315dd9507be7467e633a837ad23db (diff) | |
download | llvm-74bebde7c4e2d1cfd4a16c19ce3c87521df67639.tar.gz llvm-74bebde7c4e2d1cfd4a16c19ce3c87521df67639.tar.bz2 llvm-74bebde7c4e2d1cfd4a16c19ce3c87521df67639.tar.xz |
updated patch for the ARM fused multiply add/sub
In this update:
- I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2.
- I kept setting .fpu=neon-vfpv4 code attribute because that is what the
assembler understands.
Patch by Ana Pazos <apazos@codeaurora.org>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152036 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM/ARMInstrNEON.td')
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 387e16d146..17fe80851c 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4060,10 +4060,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -4118,10 +4118,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -4174,19 +4174,19 @@ defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEONVFP4]>; + Requires<[HasNEON2,FPContractions]>; def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEONVFP4]>; + Requires<[HasNEON2,FPContractions]>; // Fused Vector Multiply Subtract (floating-point) def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEONVFP4]>; + Requires<[HasNEON2,FPContractions]>; def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEONVFP4]>; + Requires<[HasNEON2,FPContractions]>; // Vector Subtract Operations. @@ -5541,13 +5541,13 @@ def : N3VSPat<fadd, VADDfd>; def : N3VSPat<fsub, VSUBfd>; def : N3VSPat<fmul, VMULfd>; def : N3VSMulOpPat<fmul, fadd, VMLAfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; def : N3VSMulOpPat<fmul, fsub, VMLSfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; def : N3VSMulOpPat<fmul, fadd, VFMAfd>, - Requires<[HasNEONVFP4, UseNEONForFP]>; + Requires<[HasNEON2, UseNEONForFP,FPContractions]>; def : N3VSMulOpPat<fmul, fsub, VFMSfd>, - Requires<[HasNEONVFP4, UseNEONForFP]>; + Requires<[HasNEON2, UseNEONForFP,FPContractions]>; def : N2VSPat<fabs, VABSfd>; def : N2VSPat<fneg, VNEGfd>; def : N3VSPat<NEONfmax, VMAXfd>; |