summaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMInstrNEON.td
diff options
context:
space:
mode:
authorSebastian Pop <spop@codeaurora.org>2012-03-05 17:39:52 +0000
committerSebastian Pop <spop@codeaurora.org>2012-03-05 17:39:52 +0000
commit74bebde7c4e2d1cfd4a16c19ce3c87521df67639 (patch)
tree07fd9dda4ac83ae4cf874a76cc41b70a61720229 /lib/Target/ARM/ARMInstrNEON.td
parent43ec0f4921e315dd9507be7467e633a837ad23db (diff)
downloadllvm-74bebde7c4e2d1cfd4a16c19ce3c87521df67639.tar.gz
llvm-74bebde7c4e2d1cfd4a16c19ce3c87521df67639.tar.bz2
llvm-74bebde7c4e2d1cfd4a16c19ce3c87521df67639.tar.xz
updated patch for the ARM fused multiply add/sub
In this update: - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. - I kept setting .fpu=neon-vfpv4 code attribute because that is what the assembler understands. Patch by Ana Pazos <apazos@codeaurora.org> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152036 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM/ARMInstrNEON.td')
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td24
1 files changed, 12 insertions, 12 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 387e16d146..17fe80851c 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4060,10 +4060,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -4118,10 +4118,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4174,19 +4174,19 @@ defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasNEONVFP4]>;
+ Requires<[HasNEON2,FPContractions]>;
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasNEONVFP4]>;
+ Requires<[HasNEON2,FPContractions]>;
// Fused Vector Multiply Subtract (floating-point)
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasNEONVFP4]>;
+ Requires<[HasNEON2,FPContractions]>;
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasNEONVFP4]>;
+ Requires<[HasNEON2,FPContractions]>;
// Vector Subtract Operations.
@@ -5541,13 +5541,13 @@ def : N3VSPat<fadd, VADDfd>;
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
- Requires<[HasNEONVFP4, UseNEONForFP]>;
+ Requires<[HasNEON2, UseNEONForFP,FPContractions]>;
def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
- Requires<[HasNEONVFP4, UseNEONForFP]>;
+ Requires<[HasNEON2, UseNEONForFP,FPContractions]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
def : N3VSPat<NEONfmax, VMAXfd>;