diff options
author | Lang Hames <lhames@gmail.com> | 2012-06-19 22:51:23 +0000 |
---|---|---|
committer | Lang Hames <lhames@gmail.com> | 2012-06-19 22:51:23 +0000 |
commit | d693cafcfb9e67ba7040cb810e4409a166421482 (patch) | |
tree | 86a20682e48b7aaf402fc95f6497eae48e8fa89a /lib | |
parent | fa8becb6f9e8aa2cbe3bab79e1fc4cdf2ffbb8e5 (diff) | |
download | llvm-d693cafcfb9e67ba7040cb810e4409a166421482.tar.gz llvm-d693cafcfb9e67ba7040cb810e4409a166421482.tar.bz2 llvm-d693cafcfb9e67ba7040cb810e4409a166421482.tar.xz |
Add DAG-combines for aggressive FMA formation.
This patch adds DAG combines to form FMAs from pairs of FADD + FMUL or
FSUB + FMUL. The combines are performed when:
(a) Either
AllowExcessFPPrecision option (-enable-excess-fp-precision for llc)
OR
UnsafeFPMath option (-enable-unsafe-fp-math)
are set, and
(b) TargetLoweringInfo::isFMAFasterThanMulAndAdd(VT) is true for the type of
the FADD/FSUB, and
(c) The FMUL only has one user (the FADD/FSUB).
If your target has fast FMA instructions you can make use of these combines by
overriding TargetLoweringInfo::isFMAFasterThanMulAndAdd(VT) to return true for
types supported by your FMA instruction, and adding patterns to match ISD::FMA
to your FMA instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158757 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 43 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.td | 2 |
3 files changed, 45 insertions, 2 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 369d927112..3517b7cfbe 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5633,6 +5633,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // FADD -> FMA combines: + if ((DAG.getTarget().Options.AllowExcessFPPrecision || + DAG.getTarget().Options.UnsafeFPMath) && + DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && + TLI.isOperationLegal(ISD::FMA, VT)) { + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + + // fold (fadd x, (fmul y, z)) -> (fma x, y, z) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N1.getOperand(0), N1.getOperand(1), N0); + } + } + return SDValue(); } @@ -5690,6 +5710,29 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } + // FSUB -> FMA combines: + if ((DAG.getTarget().Options.AllowExcessFPPrecision || + DAG.getTarget().Options.UnsafeFPMath) && + DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && + TLI.isOperationLegal(ISD::FMA, VT)) { + + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1)); + } + + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, + N1.getOperand(0)), + N1.getOperand(1), N0); + } + } + return SDValue(); } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 5131152d1e..81e3527a6f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -236,7 +236,7 @@ def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. // But only select them if more precision in FP computation is allowed. // Do not use them for Darwin platforms. -def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && " +def UseFusedMAC : Predicate<"TM.Options.AllowExcessFPPrecision && " "!Subtarget->isTargetDarwin()">; def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " "Subtarget->isTargetDarwin()">; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index c9cdd5c192..25b6dc733e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -353,7 +353,7 @@ def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; +def FPContractions : Predicate<"TM.Options.AllowExcessFPPrecision">; def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; def IsBookE : Predicate<"PPCSubTarget.isBookE()">; |