Add DAG-combines for aggressive FMA formation.

This patch adds DAG combines to form FMAs from pairs of FADD + FMUL or FSUB + FMUL. The combines are performed when: (a) Either AllowExcessFPPrecision option (-enable-excess-fp-precision for llc) OR UnsafeFPMath option (-enable-unsafe-fp-math) are set, and (b) TargetLoweringInfo::isFMAFasterThanMulAndAdd(VT) is true for the type of the FADD/FSUB, and (c) The FMUL only has one user (the FADD/FSUB). If your target has fast FMA instructions you can make use of these combines by overriding TargetLoweringInfo::isFMAFasterThanMulAndAdd(VT) to return true for types supported by your FMA instruction, and adding patterns to match ISD::FMA to your FMA instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158757 91177308-0d34-0410-b5e6-96231b3b80d8
author: Lang Hames <lhames@gmail.com> 2012-06-19 22:51:23 +0000
committer: Lang Hames <lhames@gmail.com> 2012-06-19 22:51:23 +0000
commit: d693cafcfb9e67ba7040cb810e4409a166421482 (patch)
tree: 86a20682e48b7aaf402fc95f6497eae48e8fa89a /lib
parent: fa8becb6f9e8aa2cbe3bab79e1fc4cdf2ffbb8e5 (diff)
download: llvm-d693cafcfb9e67ba7040cb810e4409a166421482.tar.gz
llvm-d693cafcfb9e67ba7040cb810e4409a166421482.tar.bz2
llvm-d693cafcfb9e67ba7040cb810e4409a166421482.tar.xz
3 files changed, 45 insertions, 2 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 369d927112..3517b7cfbe 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5633,6 +5633,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                        DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
                                    N0.getOperand(1), N1));
 
+  // FADD -> FMA combines:
+  if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
+       DAG.getTarget().Options.UnsafeFPMath) &&
+      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+      TLI.isOperationLegal(ISD::FMA, VT)) {
+
+    // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+                         N0.getOperand(0), N0.getOperand(1), N1);
+    }
+  
+    // fold (fadd x, (fmul y, z)) -> (fma x, y, z)
+    // Note: Commutes FADD operands.
+    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+                         N1.getOperand(0), N1.getOperand(1), N0);
+    }
+  }
+
   return SDValue();
 }
 
@@ -5690,6 +5710,29 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
     }
   }
 
+  // FSUB -> FMA combines:
+  if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
+       DAG.getTarget().Options.UnsafeFPMath) &&
+      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+      TLI.isOperationLegal(ISD::FMA, VT)) {
+
+    // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+                         N0.getOperand(0), N0.getOperand(1),
+                         DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1));
+    }
+
+    // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+    // Note: Commutes FSUB operands.
+    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT,
+                         N1.getOperand(0)),
+                         N1.getOperand(1), N0);
+    }
+  }
+
   return SDValue();
 }
 
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 5131152d1e..81e3527a6f 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -236,7 +236,7 @@ def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;
 // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
 // But only select them if more precision in FP computation is allowed.
 // Do not use them for Darwin platforms.
-def UseFusedMAC      : Predicate<"!TM.Options.NoExcessFPPrecision && "
+def UseFusedMAC      : Predicate<"TM.Options.AllowExcessFPPrecision && "
                                  "!Subtarget->isTargetDarwin()">;
 def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4() || "
                                  "Subtarget->isTargetDarwin()">;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index c9cdd5c192..25b6dc733e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -353,7 +353,7 @@ def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
+def FPContractions : Predicate<"TM.Options.AllowExcessFPPrecision">;
 def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;
 def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
 def IsBookE  : Predicate<"PPCSubTarget.isBookE()">;
author	Lang Hames <lhames@gmail.com>	2012-06-19 22:51:23 +0000
committer	Lang Hames <lhames@gmail.com>	2012-06-19 22:51:23 +0000
commit	d693cafcfb9e67ba7040cb810e4409a166421482 (patch)
tree	86a20682e48b7aaf402fc95f6497eae48e8fa89a /lib
parent	fa8becb6f9e8aa2cbe3bab79e1fc4cdf2ffbb8e5 (diff)
download	llvm-d693cafcfb9e67ba7040cb810e4409a166421482.tar.gz llvm-d693cafcfb9e67ba7040cb810e4409a166421482.tar.bz2 llvm-d693cafcfb9e67ba7040cb810e4409a166421482.tar.xz