[inliner] Significantly improve the compile time in cases like PR19499

by avoiding inlining massive switches merely because they have no instructions in them. These switches still show up where we fail to form lookup tables, and in those cases they are actually going to cause a very significant code size hit anyways, so inlining them is not the right call. The right way to fix any performance regressions stemming from this is to enhance the switch-to-lookup-table logic to fire in more places. This makes PR19499 about 5x less bad. It uncovers a second compile time problem in that test case that is unrelated (surprisingly!). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207403 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chandler Carruth <chandlerc@gmail.com> 2014-04-28 08:52:44 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2014-04-28 08:52:44 +0000
commit: d6d57bc3fb1922ebffff028b382c6521624e3731 (patch)
tree: cb2165e4a198c3cdf9d6f650e7733d7d45bcbd77
parent: 0ddc7447d9a4d2e363256de653f5aa9d6033cce5 (diff)
download: llvm-d6d57bc3fb1922ebffff028b382c6521624e3731.tar.gz
llvm-d6d57bc3fb1922ebffff028b382c6521624e3731.tar.bz2
llvm-d6d57bc3fb1922ebffff028b382c6521624e3731.tar.xz
2 files changed, 83 insertions, 3 deletions
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index c43b5ca250..358f61fd52 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -808,9 +808,29 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
   // We model unconditional switches as free, see the comments on handling
   // branches.
-  return isa<ConstantInt>(SI.getCondition()) ||
-         dyn_cast_or_null<ConstantInt>(
-             SimplifiedValues.lookup(SI.getCondition()));
+  if (isa<ConstantInt>(SI.getCondition()))
+    return true;
+  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
+    if (isa<ConstantInt>(V))
+      return true;
+
+  // Otherwise, we need to accumulate a cost proportional to the number of
+  // distinct successor blocks. This fan-out in the CFG cannot be represented
+  // for free even if we can represent the core switch as a jumptable that
+  // takes a single instruction.
+  //
+  // NB: We convert large switches which are just used to initialize large phi
+  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
+  // inlining those. It will prevent inlining in cases where the optimization
+  // does not (yet) fire.
+  SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
+  SuccessorBlocks.insert(SI.getDefaultDest());
+  for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
+    SuccessorBlocks.insert(I.getCaseSuccessor());
+  // Add cost corresponding to the number of distinct destinations. The first
+  // we model as free because of fallthrough.
+  Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
+  return false;
 }
 
 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
diff --git a/test/Transforms/Inline/switch.ll b/test/Transforms/Inline/switch.ll
new file mode 100644
index 0000000000..c5dab53e8b
--- /dev/null
+++ b/test/Transforms/Inline/switch.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s
+
+define i32 @callee(i32 %a) {
+  switch i32 %a, label %sw.default [
+    i32 0, label %sw.bb0
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+    i32 4, label %sw.bb4
+    i32 5, label %sw.bb5
+    i32 6, label %sw.bb6
+    i32 7, label %sw.bb7
+    i32 8, label %sw.bb8
+    i32 9, label %sw.bb9
+  ]
+
+sw.default:
+  br label %return
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.bb3:
+  br label %return
+
+sw.bb4:
+  br label %return
+
+sw.bb5:
+  br label %return
+
+sw.bb6:
+  br label %return
+
+sw.bb7:
+  br label %return
+
+sw.bb8:
+  br label %return
+
+sw.bb9:
+  br label %return
+
+return:
+  ret i32 42
+}
+
+define i32 @caller(i32 %a) {
+; CHECK-LABEL: @caller(
+; CHECK: call i32 @callee(
+
+  %result = call i32 @callee(i32 %a)
+  ret i32 %result
+}
author	Chandler Carruth <chandlerc@gmail.com>	2014-04-28 08:52:44 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2014-04-28 08:52:44 +0000
commit	d6d57bc3fb1922ebffff028b382c6521624e3731 (patch)
tree	cb2165e4a198c3cdf9d6f650e7733d7d45bcbd77
parent	0ddc7447d9a4d2e363256de653f5aa9d6033cce5 (diff)
download	llvm-d6d57bc3fb1922ebffff028b382c6521624e3731.tar.gz llvm-d6d57bc3fb1922ebffff028b382c6521624e3731.tar.bz2 llvm-d6d57bc3fb1922ebffff028b382c6521624e3731.tar.xz