summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Target/TargetOptions.h40
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp4
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td3
-rw-r--r--test/CodeGen/ARM/fusedMAC.ll2
-rw-r--r--test/CodeGen/PowerPC/a2-fp-basic.ll2
-rw-r--r--test/CodeGen/PowerPC/fma.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc440-fp-basic.ll2
-rw-r--r--tools/llc/llc.cpp20
9 files changed, 55 insertions, 24 deletions
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 84287fb5d7..3a1809a80a 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -30,12 +30,20 @@ namespace llvm {
};
}
+ namespace FPOpFusion {
+ enum FPOpFusionMode {
+ Fast, // Enable fusion of FP ops wherever it's profitable.
+ Standard, // Only allow fusion of 'blessed' ops (currently just fmuladd).
+ Strict // Never fuse FP-ops.
+ };
+ }
+
class TargetOptions {
public:
TargetOptions()
: PrintMachineCode(false), NoFramePointerElim(false),
NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false),
- AllowExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false),
+ UnsafeFPMath(false), NoInfsFPMath(false),
NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false),
UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false),
JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false),
@@ -43,7 +51,8 @@ namespace llvm {
StackAlignmentOverride(0), RealignStack(true),
DisableJumpTables(false), EnableFastISel(false),
PositionIndependentExecutable(false), EnableSegmentedStacks(false),
- UseInitArray(false), TrapFuncName(""), FloatABIType(FloatABI::Default)
+ UseInitArray(false), TrapFuncName(""), FloatABIType(FloatABI::Default),
+ AllowFPOpFusion(FPOpFusion::Standard)
{}
/// PrintMachineCode - This flag is enabled when the -print-machineinstrs
@@ -74,14 +83,6 @@ namespace llvm {
unsigned LessPreciseFPMADOption : 1;
bool LessPreciseFPMAD() const;
- /// AllowExcessFPPrecision - This flag is enabled when the
- /// -enable-excess-fp-precision flag is specified on the command line. This
- /// flag is OFF by default. When it is turned on, the code generator is
- /// allowed to produce results that are "more precise" than IEEE allows.
- /// This includes use of FMA-like operations and use of the X86 FP registers
- /// without rounding all over the place.
- unsigned AllowExcessFPPrecision : 1;
-
/// UnsafeFPMath - This flag is enabled when the
/// -enable-unsafe-fp-math flag is specified on the command line. When
/// this flag is off (the default), the code generator is not allowed to
@@ -189,6 +190,25 @@ namespace llvm {
/// Such a combination is unfortunately popular (e.g. arm-apple-darwin).
/// Hard presumes that the normal FP ABI is used.
FloatABI::ABIType FloatABIType;
+
+ /// AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
+ /// This controls the creation of fused FP ops that store intermediate
+ /// results in higher precision than IEEE allows (E.g. FMAs).
+ ///
+ /// Fast mode - allows formation of fused FP ops whenever they're
+ /// profitable.
+ /// Standard mode - allow fusion only for 'blessed' FP ops. At present the
+ /// only blessed op is the fmuladd intrinsic. In the future more blessed ops
+ /// may be added.
+ /// Strict mode - allow fusion only if/when it can be proven that the excess
+ /// precision won't effect the result.
+ ///
+ /// Note: This option only controls formation of fused ops by the optimizers.
+ /// Fused operations that are explicitly specified (e.g. FMA via the
+ /// llvm.fma.* intrinsic) will always be honored, regardless of the value of
+ /// this option.
+ FPOpFusion::FPOpFusionMode AllowFPOpFusion;
+
};
} // End llvm namespace
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7babf4ab95..0bdd8b8549 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5644,7 +5644,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N0.getOperand(1), N1));
// FADD -> FMA combines:
- if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
TLI.isOperationLegal(ISD::FMA, VT)) {
@@ -5721,7 +5721,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// FSUB -> FMA combines:
- if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
TLI.isOperationLegal(ISD::FMA, VT)) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4152aa1ae1..50fd45e88b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4934,7 +4934,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(I.getType());
- if (TLI.isOperationLegal(ISD::FMA, VT) && TLI.isFMAFasterThanMulAndAdd(VT)){
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isOperationLegal(ISD::FMA, VT) &&
+ TLI.isFMAFasterThanMulAndAdd(VT)){
setValue(&I, DAG.getNode(ISD::FMA, dl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 81e3527a6f..67f050131b 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -236,7 +236,8 @@ def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
// But only select them if more precision in FP computation is allowed.
// Do not use them for Darwin platforms.
-def UseFusedMAC : Predicate<"TM.Options.AllowExcessFPPrecision && "
+def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast) && "
"!Subtarget->isTargetDarwin()">;
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
"Subtarget->isTargetDarwin()">;
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
index 0cc1cddf21..725dd274e3 100644
--- a/test/CodeGen/ARM/fusedMAC.ll
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -enable-excess-fp-precision | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fuse-fp-ops=fast | FileCheck %s
; Check generated fused MAC and MLS.
define double @fusedMACTest1(double %d1, double %d2, double %d3) {
diff --git a/test/CodeGen/PowerPC/a2-fp-basic.ll b/test/CodeGen/PowerPC/a2-fp-basic.ll
index a47e662cc8..a4370fa452 100644
--- a/test/CodeGen/PowerPC/a2-fp-basic.ll
+++ b/test/CodeGen/PowerPC/a2-fp-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 -mcpu=a2 -enable-excess-fp-precision | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2 -fuse-fp-ops=fast | FileCheck %s
%0 = type { double, double }
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index 02847147ed..4e05c279b7 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -enable-excess-fp-precision | \
+; RUN: llc < %s -march=ppc32 -fuse-fp-ops=fast | \
; RUN: egrep {fn?madd|fn?msub} | count 8
define double @test_FMADD1(double %A, double %B, double %C) {
diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
index 25ec5f892c..6884570a8a 100644
--- a/test/CodeGen/PowerPC/ppc440-fp-basic.ll
+++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=440 -enable-excess-fp-precision | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=440 -fuse-fp-ops=fast | FileCheck %s
%0 = type { double, double }
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index b303cec3b5..e08bad93b0 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -156,11 +156,6 @@ DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
cl::init(false));
static cl::opt<bool>
-EnableExcessPrecision("enable-excess-fp-precision",
- cl::desc("Enable optimizations that may increase FP precision"),
- cl::init(false));
-
-static cl::opt<bool>
EnableUnsafeFPMath("enable-unsafe-fp-math",
cl::desc("Enable optimizations that may decrease FP precision"),
cl::init(false));
@@ -199,6 +194,19 @@ FloatABIForCalls("float-abi",
"Hard float ABI (uses FP registers)"),
clEnumValEnd));
+static cl::opt<llvm::FPOpFusion::FPOpFusionMode>
+FuseFPOps("fuse-fp-ops",
+ cl::desc("Enable aggresive formation of fused FP ops"),
+ cl::init(FPOpFusion::Standard),
+ cl::values(
+ clEnumValN(FPOpFusion::Fast, "fast",
+ "Fuse FP ops whenever profitable"),
+ clEnumValN(FPOpFusion::Standard, "standard",
+ "Only fuse 'blessed' FP ops."),
+ clEnumValN(FPOpFusion::Strict, "strict",
+ "Only fuse FP ops when the result won't be effected."),
+ clEnumValEnd));
+
static cl::opt<bool>
DontPlaceZerosInBSS("nozero-initialized-in-bss",
cl::desc("Don't place zero-initialized symbols into bss section"),
@@ -404,7 +412,7 @@ int main(int argc, char **argv) {
Options.LessPreciseFPMADOption = EnableFPMAD;
Options.NoFramePointerElim = DisableFPElim;
Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
- Options.AllowExcessFPPrecision = EnableExcessPrecision;
+ Options.AllowFPOpFusion = FuseFPOps;
Options.UnsafeFPMath = EnableUnsafeFPMath;
Options.NoInfsFPMath = EnableNoInfsFPMath;
Options.NoNaNsFPMath = EnableNoNaNsFPMath;