summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAnton Korobeynikov <asl@math.spbu.ru>2012-01-22 12:07:33 +0000
committerAnton Korobeynikov <asl@math.spbu.ru>2012-01-22 12:07:33 +0000
commit4b4e62219be91839091f9e35d8accf877f925d81 (patch)
tree500dc4bc89d4e5610dc5ccaae2af18abb421d461 /lib
parent381e2bee6520a10d736a7fa633809c372cfab37c (diff)
downloadllvm-4b4e62219be91839091f9e35d8accf877f925d81.tar.gz
llvm-4b4e62219be91839091f9e35d8accf877f925d81.tar.bz2
llvm-4b4e62219be91839091f9e35d8accf877f925d81.tar.xz
Add fused multiple+add instructions from VFPv4.
Patch by Ana Pazos! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148658 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/ARM/ARM.td6
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp16
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td6
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td34
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td137
-rw-r--r--lib/Target/ARM/ARMSchedule.td4
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp2
-rw-r--r--lib/Target/ARM/ARMSubtarget.h8
8 files changed, 186 insertions, 27 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 86aeeb21a7..dd9c4bd793 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -32,9 +32,15 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
"Enable VFP3 instructions",
[FeatureVFP2]>;
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+ "Enable VFP4 instructions",
+ [FeatureVFP3]>;
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
[FeatureVFP3]>;
+def FeatureNEONVFP4 : SubtargetFeature<"neon-vfpv4", "HasNEONVFPv4", "true",
+ "Enable NEON-VFP4 instructions",
+ [FeatureVFP4, FeatureNEON]>;
def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
"Enable Thumb2 instructions">;
def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 270d9d8735..288b7f14e4 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -719,15 +719,25 @@ void ARMAsmPrinter::emitAttributes() {
if (Subtarget->hasNEON() && emitFPU) {
/* NEON is not exactly a VFP architecture, but GAS emit one of
- * neon/vfpv3/vfpv2 for .fpu parameters */
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+ * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (Subtarget->hasNEONVFP4())
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4");
+ else
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
/* If emitted for NEON, omit from VFP below, since you can have both
* NEON and VFP in build attributes but only one .fpu */
emitFPU = false;
}
+ /* VFPv4 + .fpu */
+ if (Subtarget->hasVFP4()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4A);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4");
+
/* VFPv3 + .fpu */
- if (Subtarget->hasVFP3()) {
+ } else if (Subtarget->hasVFP3()) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
ARMBuildAttrs::AllowFPv3A);
if (emitFPU)
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index dabd187069..99d180a569 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -179,8 +179,14 @@ def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2">;
def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
AssemblerPredicate<"FeatureVFP3">;
+def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
+ AssemblerPredicate<"FeatureVFP4">;
+def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON">;
+def HasNEONVFP4 : Predicate<"Subtarget->hasNEONVFP4()">,
+ AssemblerPredicate<"FeatureNEONVFP4">;
+def NoNEONVFP4 : Predicate<"!Subtarget->hasNEONVFP4()">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index a2df5bde88..f9626fef00 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -3897,10 +3897,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -3955,10 +3955,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4007,6 +4007,24 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlsl", "s", int_arm_neon_vqdmlsl>;
defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+
+// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
+def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEONVFP4]>;
+
+def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEONVFP4]>;
+
+// Fused Vector Multiply Subtract (floating-point)
+def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEONVFP4]>;
+def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEONVFP4]>;
+
// Vector Subtract Operations.
// VSUB : Vector Subtract (integer and floating-point)
@@ -5358,9 +5376,13 @@ def : N3VSPat<fadd, VADDfd>;
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
+def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
+ Requires<[HasNEONVFP4, UseNEONForFP]>;
+def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
+ Requires<[HasNEONVFP4, UseNEONForFP]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
def : N3VSPat<NEONfmax, VMAXfd>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 81c8c7e45f..1c07394f39 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -920,7 +920,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -928,7 +928,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -936,10 +936,10 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>;
def VMLSD : ADbI<0b11100, 0b00, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -947,7 +947,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -955,7 +955,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -963,10 +963,10 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -974,7 +974,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -982,7 +982,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -990,10 +990,10 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1001,14 +1001,14 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1016,11 +1016,116 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
+//===----------------------------------------------------------------------===//
+// Fused FP Multiply-Accumulate Operations.
+//
+def VFMAD : ADbI<0b11101, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4]>;
+
+def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP]>;
+
+def VFMSD : ADbI<0b11101, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4]>;
+
+def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP]>;
+
+def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4]>;
+
+def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+ (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP]>;
+
+def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4]>;
+
+def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+ (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP]>;
//===----------------------------------------------------------------------===//
// FP Conditional moves.
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 958c5c6470..86caa403f9 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -118,6 +118,8 @@ def IIC_fpMUL32 : InstrItinClass;
def IIC_fpMUL64 : InstrItinClass;
def IIC_fpMAC32 : InstrItinClass;
def IIC_fpMAC64 : InstrItinClass;
+def IIC_fpFMAC32 : InstrItinClass;
+def IIC_fpFMAC64 : InstrItinClass;
def IIC_fpDIV32 : InstrItinClass;
def IIC_fpDIV64 : InstrItinClass;
def IIC_fpSQRT32 : InstrItinClass;
@@ -208,6 +210,8 @@ def IIC_VPERMQ : InstrItinClass;
def IIC_VPERMQ3 : InstrItinClass;
def IIC_VMACD : InstrItinClass;
def IIC_VMACQ : InstrItinClass;
+def IIC_VFMACD : InstrItinClass;
+def IIC_VFMACQ : InstrItinClass;
def IIC_VRECSD : InstrItinClass;
def IIC_VRECSQ : InstrItinClass;
def IIC_VCNTiD : InstrItinClass;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 074cd4c0d7..bb77a3c48c 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -47,7 +47,9 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasV7Ops(false)
, HasVFPv2(false)
, HasVFPv3(false)
+ , HasVFPv4(false)
, HasNEON(false)
+ , HasNEONVFPv4(false)
, UseNEONForSinglePrecisionFP(false)
, SlowFPVMLx(false)
, HasVMLxForwarding(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 85479a5e1c..b6c40d32cf 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -45,11 +45,13 @@ protected:
bool HasV6T2Ops;
bool HasV7Ops;
- /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
- /// supported.
+ /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what
+ /// floating point ISAs are supported.
bool HasVFPv2;
bool HasVFPv3;
+ bool HasVFPv4;
bool HasNEON;
+ bool HasNEONVFPv4;
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
/// specified. Use the method useNEONForSinglePrecisionFP() to
@@ -197,7 +199,9 @@ protected:
bool hasVFP2() const { return HasVFPv2; }
bool hasVFP3() const { return HasVFPv3; }
+ bool hasVFP4() const { return HasVFPv4; }
bool hasNEON() const { return HasNEON; }
+ bool hasNEONVFP4() const { return HasNEONVFPv4; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }