diff options
author | Chris Lattner <sabre@nondot.org> | 2011-02-21 17:03:47 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2011-02-21 17:03:47 +0000 |
commit | fb456c25c28a6e9b48b131ced9ad813ac4468613 (patch) | |
tree | 027d3b50f83d872f60c9edb7de1a3fdc746db1bc /lib/Target/X86/README.txt | |
parent | 0e68cee62f251c45df92c71ca536142bc7d82631 (diff) | |
download | llvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.gz llvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.bz2 llvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.xz |
a serious "compare CSE" issue that is nontrivial to get right,
but which is responsible for us doing really bad things to 256.bzip2.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126126 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/README.txt')
-rw-r--r-- | lib/Target/X86/README.txt | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 1e1660dbca..abd1515cf5 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1878,3 +1878,72 @@ _add32carry: ret //===---------------------------------------------------------------------===// + +The hot loop of 256.bzip2 contains code that looks a bit like this: + +int foo(char *P, char *Q, int x, int y) { + if (P[0] != Q[0]) + return P[0] < Q[0]; + if (P[1] != Q[1]) + return P[1] < Q[1]; + if (P[2] != Q[2]) + return P[2] < Q[2]; + return P[3] < Q[3]; +} + +In the real code, we get a lot more wrong than this. However, even in this +code we generate: + +_foo: ## @foo +## BB#0: ## %entry + movb (%rsi), %al + movb (%rdi), %cl + cmpb %al, %cl + je LBB0_2 +LBB0_1: ## %if.then + cmpb %al, %cl + jmp LBB0_5 +LBB0_2: ## %if.end + movb 1(%rsi), %al + movb 1(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## BB#3: ## %if.end38 + movb 2(%rsi), %al + movb 2(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## BB#4: ## %if.end60 + movb 3(%rdi), %al + cmpb 3(%rsi), %al +LBB0_5: ## %if.end60 + setl %al + movzbl %al, %eax + ret + +Note that we generate jumps to LBB0_1 which does a redundant compare. The +redundant compare also forces the register values to be live, which prevents +folding one of the loads into the compare. In contrast, GCC 4.2 produces: + +_foo: + movzbl (%rsi), %eax + cmpb %al, (%rdi) + jne L10 +L12: + movzbl 1(%rsi), %eax + cmpb %al, 1(%rdi) + jne L10 + movzbl 2(%rsi), %eax + cmpb %al, 2(%rdi) + jne L10 + movzbl 3(%rdi), %eax + cmpb 3(%rsi), %al +L10: + setl %al + movzbl %al, %eax + ret + +which is "perfect". + +//===---------------------------------------------------------------------===// + |