diff options
author | Dale Johannesen <dalej@apple.com> | 2009-01-08 21:45:23 +0000 |
---|---|---|
committer | Dale Johannesen <dalej@apple.com> | 2009-01-08 21:45:23 +0000 |
commit | 4362387c74d8fb07ec914b3173abf834d639ff39 (patch) | |
tree | 0c7c4308429d7eea8c254da50dfa688bf5c64fc3 | |
parent | 86062afbc49b549a653d38fb03702051f3917df7 (diff) | |
download | llvm-4362387c74d8fb07ec914b3173abf834d639ff39.tar.gz llvm-4362387c74d8fb07ec914b3173abf834d639ff39.tar.bz2 llvm-4362387c74d8fb07ec914b3173abf834d639ff39.tar.xz |
Do not inline functions with (dynamic) alloca into
functions that don't already have a (dynamic) alloca.
Dynamic allocas cause inefficient codegen and we shouldn't
propagate this (behavior follows gcc). Two existing tests
assumed such inlining would be done; they are hacked by
adding an alloca in the caller, preserving the point of
the tests.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61946 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/Transforms/Utils/InlineCost.h | 7 | ||||
-rw-r--r-- | lib/Transforms/Utils/InlineCost.cpp | 22 | ||||
-rw-r--r-- | test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll | 36 | ||||
-rw-r--r-- | test/Transforms/Inline/dynamic_alloca_test.ll | 4 | ||||
-rw-r--r-- | test/Transforms/PruneEH/2008-09-05-CGUpdate.ll | 5 |
5 files changed, 70 insertions, 4 deletions
diff --git a/include/llvm/Transforms/Utils/InlineCost.h b/include/llvm/Transforms/Utils/InlineCost.h index 1698a81940..415fc1e91b 100644 --- a/include/llvm/Transforms/Utils/InlineCost.h +++ b/include/llvm/Transforms/Utils/InlineCost.h @@ -78,6 +78,9 @@ namespace llvm { /// caller. bool NeverInline; + /// usesDynamicAlloca - True if this function calls alloca (in the C sense). + bool usesDynamicAlloca; + /// NumInsts, NumBlocks - Keep track of how large each function is, which /// is used to estimate the code size cost of inlining it. unsigned NumInsts, NumBlocks; @@ -93,8 +96,8 @@ namespace llvm { /// entry here. std::vector<ArgInfo> ArgumentWeights; - FunctionInfo() : NeverInline(false), NumInsts(0), NumBlocks(0), - NumVectorInsts(0) {} + FunctionInfo() : NeverInline(false), usesDynamicAlloca(false), NumInsts(0), + NumBlocks(0), NumVectorInsts(0) {} /// analyzeFunction - Fill in the current structure with information /// gleaned from the specified function. diff --git a/lib/Transforms/Utils/InlineCost.cpp b/lib/Transforms/Utils/InlineCost.cpp index 29d4f79732..82e310b38c 100644 --- a/lib/Transforms/Utils/InlineCost.cpp +++ b/lib/Transforms/Utils/InlineCost.cpp @@ -126,6 +126,11 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { NumInsts += 5; } + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (!isa<ConstantInt>(AI->getArraySize())) + this->usesDynamicAlloca = true; + } + if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType())) ++NumVectorInsts; @@ -173,7 +178,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, SmallPtrSet<const Function *, 16> &NeverInline) { Instruction *TheCall = CS.getInstruction(); Function *Callee = CS.getCalledFunction(); - const Function *Caller = TheCall->getParent()->getParent(); + Function *Caller = TheCall->getParent()->getParent(); // Don't inline a directly recursive call. if (Caller == Callee || @@ -219,11 +224,24 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, // If we haven't calculated this information yet, do so now. if (CalleeFI.NumBlocks == 0) CalleeFI.analyzeFunction(Callee); - + // If we should never inline this, return a huge cost. if (CalleeFI.NeverInline) return InlineCost::getNever(); + // Get infomation about the caller... + FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; + + // If we haven't calculated this information yet, do so now. + if (CallerFI.NumBlocks == 0) + CallerFI.analyzeFunction(Caller); + + // Don't inline a callee with dynamic alloca into a caller without them. + // Functions containing dynamic alloca's are inefficient in various ways; + // don't create more inefficiency. + if (CalleeFI.usesDynamicAlloca && !CallerFI.usesDynamicAlloca) + return InlineCost::getNever(); + // FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we // could move this up and avoid computing the FunctionInfo for // things we are going to just return always inline for. This diff --git a/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll b/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll new file mode 100644 index 0000000000..14840bac67 --- /dev/null +++ b/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll @@ -0,0 +1,36 @@ +; RUN: llvm-as < %s | opt -inline | llvm-dis | grep call +; Do not inline calls to variable-sized alloca. + +@q = common global i8* null ; <i8**> [#uses=1] + +define i8* @a(i32 %i) nounwind { +entry: + %i_addr = alloca i32 ; <i32*> [#uses=2] + %retval = alloca i8* ; <i8**> [#uses=1] + %p = alloca i8* ; <i8**> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i32 %i, i32* %i_addr + %0 = load i32* %i_addr, align 4 ; <i32> [#uses=1] + %1 = alloca i8, i32 %0 ; <i8*> [#uses=1] + store i8* %1, i8** %p, align 4 + %2 = load i8** %p, align 4 ; <i8*> [#uses=1] + store i8* %2, i8** @q, align 4 + br label %return + +return: ; preds = %entry + %retval1 = load i8** %retval ; <i8*> [#uses=1] + ret i8* %retval1 +} + +define void @b(i32 %i) nounwind { +entry: + %i_addr = alloca i32 ; <i32*> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i32 %i, i32* %i_addr + %0 = load i32* %i_addr, align 4 ; <i32> [#uses=1] + %1 = call i8* @a(i32 %0) nounwind ; <i8*> [#uses=0] + br label %return + +return: ; preds = %entry + ret void +} diff --git a/test/Transforms/Inline/dynamic_alloca_test.ll b/test/Transforms/Inline/dynamic_alloca_test.ll index 87707120e6..b8ff7dedc4 100644 --- a/test/Transforms/Inline/dynamic_alloca_test.ll +++ b/test/Transforms/Inline/dynamic_alloca_test.ll @@ -1,5 +1,7 @@ ; Test that functions with dynamic allocas get inlined in a case where ; naively inlining it would result in a miscompilation. +; Functions with dynamic allocas can only be inlined into functions that +; already have dynamic allocas. ; RUN: llvm-as < %s | opt -inline | llvm-dis | \ ; RUN: grep llvm.stacksave @@ -16,6 +18,8 @@ define internal void @callee(i32 %N) { define void @foo(i32 %N) { ; <label>:0 + %P = alloca i32, i32 %N ; <i32*> [#uses=1] + call void @ext( i32* %P ) br label %Loop Loop: ; preds = %Loop, %0 diff --git a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll index 6e847fcfdc..c3600ab84c 100644 --- a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll +++ b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll @@ -636,6 +636,9 @@ lpad: ; preds = %entry, %invcont define internal fastcc i32 @ce3806g__fxio__put__4.1215(i8* %to.0, %struct.string___XUB* %to.1, i8 signext %item) { entry: + %P0 = load i32 * @__gnat_all_others_value, align 4 ; <i32*> [#uses=1] + %P = alloca i32, i32 %P0 ; <i32*> [#uses=1] + call void @ext( i32* %P ) %to_addr = alloca %struct.system__file_control_block__pstring ; <%struct.system__file_control_block__pstring*> [#uses=4] %FRAME.358 = alloca %struct.FRAME.ce3806g__fxio__put__4 ; <%struct.FRAME.ce3806g__fxio__put__4*> [#uses=65] %0 = getelementptr %struct.system__file_control_block__pstring* %to_addr, i32 0, i32 0 ; <i8**> [#uses=1] @@ -1435,6 +1438,8 @@ declare %struct.ada__text_io__text_afcb* @ada__text_io__standard_output() declare void @report__failed(i8*, %struct.string___XUB*) +declare void @ext(i32*) + declare %struct.ada__text_io__text_afcb* @ada__text_io__delete(%struct.ada__text_io__text_afcb*) declare void @report__result() |