Switch spill weights from a basic loop depth estimation to BlockFrequencyInfo.

The main advantages here are way better heuristics, taking into account not just loop depth but also __builtin_expect and other static heuristics and will eventually learn how to use profile info. Most of the work in this patch is pushing the MachineBlockFrequencyInfo analysis into the right places. This is good for a 5% speedup on zlib's deflate (x86_64), there were some very unfortunate spilling decisions in its hottest loop in longest_match(). Other benchmarks I tried were mostly neutral. This changes register allocation in subtle ways, update the tests for it. 2012-02-20-MachineCPBug.ll was deleted as it's very fragile and the instruction it looked for was gone already (but the FileCheck pattern picked up unrelated stuff). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184105 91177308-0d34-0410-b5e6-96231b3b80d8
author: Benjamin Kramer <benny.kra@googlemail.com> 2013-06-17 19:00:36 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> 2013-06-17 19:00:36 +0000
commit: 4eed756153b84c211114a3e9186bf0cb55d4b394 (patch)
tree: 75638704a4f7d8af5710e5ab6fb7ace3ba9ed921 /test
parent: a8a04380c597e1cdb8d635abd9e2669eab401545 (diff)
download: llvm-4eed756153b84c211114a3e9186bf0cb55d4b394.tar.gz
llvm-4eed756153b84c211114a3e9186bf0cb55d4b394.tar.bz2
llvm-4eed756153b84c211114a3e9186bf0cb55d4b394.tar.xz
4 files changed, 11 insertions, 87 deletions
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index 9b0f3e54e8..26d4be2e06 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -7,8 +7,7 @@
 ; CHECK: sub sp, #{{40|32|28|24}}
 
 ; CHECK: %for.inc
-; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, #
-; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, #
+; CHECK-NOT: ldr
 ; CHECK: add
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
diff --git a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
index b39c355fbd..4a5952736e 100644
--- a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
+++ b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -59,7 +59,7 @@ entry:
 ;CHECK:      !NO_APP
 ;CHECK-NEXT: cmp
 ;CHECK-NEXT: bg
-;CHECK-NEXT: nop
+;CHECK-NEXT: or
   tail call void asm sideeffect "sethi 0, %g0", ""() nounwind
   %0 = icmp slt i32 %a, 0
   br i1 %0, label %bb, label %bb1
diff --git a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
deleted file mode 100644
index 477b4deba8..0000000000
--- a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-macosx -mcpu=core2 -mattr=+sse | FileCheck %s
-; PR11940: Do not optimize away movb %al, %ch
-
-%struct.APInt = type { i64* }
-
-declare noalias i8* @calloc(i32, i32) nounwind
-
-define void @bug(%struct.APInt* noalias nocapture sret %agg.result, %struct.APInt* nocapture %this, i32 %rotateAmt) nounwind align 2 {
-entry:
-; CHECK: bug:
-  %call = tail call i8* @calloc(i32 1, i32 32)
-  %call.i = tail call i8* @calloc(i32 1, i32 32) nounwind
-  %0 = bitcast i8* %call.i to i64*
-  %rem.i = and i32 %rotateAmt, 63
-  %div.i = lshr i32 %rotateAmt, 6
-  %cmp.i = icmp eq i32 %rem.i, 0
-  br i1 %cmp.i, label %for.cond.preheader.i, label %if.end.i
-
-for.cond.preheader.i:                             ; preds = %entry
-  %sub.i = sub i32 4, %div.i
-  %cmp23.i = icmp eq i32 %div.i, 4
-  br i1 %cmp23.i, label %for.body9.lr.ph.i, label %for.body.lr.ph.i
-
-for.body.lr.ph.i:                                 ; preds = %for.cond.preheader.i
-  %pVal.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
-  %.pre5.i = load i64** %pVal.i, align 4
-  br label %for.body.i
-
-for.body.i:                                       ; preds = %for.body.i, %for.body.lr.ph.i
-  %i.04.i = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc.i, %for.body.i ]
-  %add.i = add i32 %i.04.i, %div.i
-  %arrayidx.i = getelementptr inbounds i64* %.pre5.i, i32 %add.i
-  %1 = load i64* %arrayidx.i, align 4
-  %arrayidx3.i = getelementptr inbounds i64* %0, i32 %i.04.i
-  store i64 %1, i64* %arrayidx3.i, align 4
-  %inc.i = add i32 %i.04.i, 1
-  %cmp2.i = icmp ult i32 %inc.i, %sub.i
-  br i1 %cmp2.i, label %for.body.i, label %if.end.i
-
-if.end.i:                                         ; preds = %for.body.i, %entry
-  %cmp81.i = icmp eq i32 %div.i, 3
-  br i1 %cmp81.i, label %_ZNK5APInt4lshrEj.exit, label %for.body9.lr.ph.i
-
-for.body9.lr.ph.i:                                ; preds = %if.end.i, %for.cond.preheader.i
-  %sub58.i = sub i32 3, %div.i
-  %pVal11.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
-  %sh_prom.i = zext i32 %rem.i to i64
-  %sub17.i = sub i32 64, %rem.i
-  %sh_prom18.i = zext i32 %sub17.i to i64
-  %.pre.i = load i64** %pVal11.i, align 4
-  br label %for.body9.i
-
-for.body9.i:                                      ; preds = %for.body9.i, %for.body9.lr.ph.i
-; CHECK: %for.body9.i
-; CHECK: movb
-; CHECK: shrdl
-  %i6.02.i = phi i32 [ 0, %for.body9.lr.ph.i ], [ %inc21.i, %for.body9.i ]
-  %add10.i = add i32 %i6.02.i, %div.i
-  %arrayidx12.i = getelementptr inbounds i64* %.pre.i, i32 %add10.i
-  %2 = load i64* %arrayidx12.i, align 4
-  %shr.i = lshr i64 %2, %sh_prom.i
-  %add14.i = add i32 %add10.i, 1
-  %arrayidx16.i = getelementptr inbounds i64* %.pre.i, i32 %add14.i
-  %3 = load i64* %arrayidx16.i, align 4
-  %shl.i = shl i64 %3, %sh_prom18.i
-  %or.i = or i64 %shl.i, %shr.i
-  %arrayidx19.i = getelementptr inbounds i64* %0, i32 %i6.02.i
-  store i64 %or.i, i64* %arrayidx19.i, align 4
-  %inc21.i = add i32 %i6.02.i, 1
-  %cmp8.i = icmp ult i32 %inc21.i, %sub58.i
-  br i1 %cmp8.i, label %for.body9.i, label %_ZNK5APInt4lshrEj.exit
-
-_ZNK5APInt4lshrEj.exit:                           ; preds = %for.body9.i, %if.end.i
-  %call.i1 = tail call i8* @calloc(i32 1, i32 32) nounwind
-  %4 = getelementptr inbounds %struct.APInt* %agg.result, i32 0, i32 0
-  store i64* %0, i64** %4, align 4
-  ret void
-}
diff --git a/test/CodeGen/X86/atom-bypass-slow-division-64.ll b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
index a3bbea3c99..26b9a1e60f 100644
--- a/test/CodeGen/X86/atom-bypass-slow-division-64.ll
+++ b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
@@ -4,8 +4,9 @@
 
 define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
 ; CHECK: Test_get_quotient:
-; CHECK: orq %rsi, %rcx
-; CHECK-NEXT: testq $-65536, %rcx
+; CHECK: movq %rdi, %rax
+; CHECK: orq %rsi, %rax
+; CHECK-NEXT: testq $-65536, %rax
 ; CHECK-NEXT: je
 ; CHECK: idivq
 ; CHECK: ret
@@ -17,8 +18,9 @@ define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
 
 define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
 ; CHECK: Test_get_remainder:
-; CHECK: orq %rsi, %rcx
-; CHECK-NEXT: testq $-65536, %rcx
+; CHECK: movq %rdi, %rax
+; CHECK: orq %rsi, %rax
+; CHECK-NEXT: testq $-65536, %rax
 ; CHECK-NEXT: je
 ; CHECK: idivq
 ; CHECK: ret
@@ -30,8 +32,9 @@ define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
 
 define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind {
 ; CHECK: Test_get_quotient_and_remainder:
-; CHECK: orq %rsi, %rcx
-; CHECK-NEXT: testq $-65536, %rcx
+; CHECK: movq %rdi, %rax
+; CHECK: orq %rsi, %rax
+; CHECK-NEXT: testq $-65536, %rax
 ; CHECK-NEXT: je
 ; CHECK: idivq
 ; CHECK: divw
author	Benjamin Kramer <benny.kra@googlemail.com>	2013-06-17 19:00:36 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	2013-06-17 19:00:36 +0000
commit	4eed756153b84c211114a3e9186bf0cb55d4b394 (patch)
tree	75638704a4f7d8af5710e5ab6fb7ace3ba9ed921 /test
parent	a8a04380c597e1cdb8d635abd9e2669eab401545 (diff)
download	llvm-4eed756153b84c211114a3e9186bf0cb55d4b394.tar.gz llvm-4eed756153b84c211114a3e9186bf0cb55d4b394.tar.bz2 llvm-4eed756153b84c211114a3e9186bf0cb55d4b394.tar.xz