From 0521928ae7cc492f3f45ef0e0cedc349102489c5 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 6 Jan 2011 06:52:41 +0000 Subject: Re-implement r122936 with proper target hooks. Now getMaxStoresPerMemcpy etc. takes an option OptSize. If OptSize is true, it would return the inline limit for functions with attribute OptSize. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122952 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 27 ++++++++++++--------------- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 ++ lib/Target/ARM/ARMISelLowering.cpp | 3 ++- lib/Target/X86/X86ISelLowering.cpp | 9 ++++++--- lib/Target/XCore/XCoreISelLowering.cpp | 5 +++-- 5 files changed, 25 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 110812c437..27dd4ea145 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3281,15 +3281,6 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, VT = LVT; } - // If we're optimizing for size, and there is a limit, bump the maximum number - // of operations inserted down to 4. This is a wild guess that approximates - // the size of a call to memcpy or memset (3 arguments + call). - if (Limit != ~0U) { - const Function *F = DAG.getMachineFunction().getFunction(); - if (F->hasFnAttr(Attribute::OptimizeForSize)) - Limit = 4; - } - unsigned NumMemOps = 0; while (Size != 0) { unsigned VTSize = VT.getSizeInBits() / 8; @@ -3335,7 +3326,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector MemOps; bool DstAlignCanChange = false; - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3345,7 +3338,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, std::string Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); - unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(); + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), @@ -3426,14 +3419,16 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector MemOps; bool DstAlignCanChange = false; - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; - unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(); + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), @@ -3502,13 +3497,15 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector MemOps; bool DstAlignCanChange = false; - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; bool NonScalarIntSafe = isa(Src) && cast(Src)->isNullValue(); - if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(), + if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, NonScalarIntSafe, false, DAG, TLI)) return SDValue(); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1300df8cba..396ebc15c6 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -567,6 +567,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm, memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; + maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize + = maxStoresPerMemmoveOptSize = 4; benefitFromCodePlacementOpt = false; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 22b3b431de..f50eac523f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -687,7 +687,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) else setSchedulingPreference(Sched::Hybrid); - maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type + //// temporary - rewrite interface to use type + maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fca7b02c40..f871b5a770 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -978,11 +978,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) computeRegisterProperties(); - // FIXME: These should be based on subtarget info. Plus, the values should - // be smaller when we are in optimizing for size mode. + // On Darwin, -Os means optimize for size without hurting performance, + // do not reduce the limit. maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores + maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores - maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores + maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores + maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; setPrefLoopAlignment(16); benefitFromCodePlacementOpt = true; } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 11840a1956..d1c40ac81f 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -149,8 +149,9 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - maxStoresPerMemset = 4; - maxStoresPerMemmove = maxStoresPerMemcpy = 2; + maxStoresPerMemset = maxStoresPerMemsetOptSize = 4; + maxStoresPerMemmove = maxStoresPerMemmoveOptSize + = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2; // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::STORE); -- cgit v1.2.3