summaryrefslogtreecommitdiff
path: root/lib/Target/X86/README.txt
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2011-02-21 17:03:47 +0000
committerChris Lattner <sabre@nondot.org>2011-02-21 17:03:47 +0000
commitfb456c25c28a6e9b48b131ced9ad813ac4468613 (patch)
tree027d3b50f83d872f60c9edb7de1a3fdc746db1bc /lib/Target/X86/README.txt
parent0e68cee62f251c45df92c71ca536142bc7d82631 (diff)
downloadllvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.gz
llvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.bz2
llvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.xz
a serious "compare CSE" issue that is nontrivial to get right,
but which is responsible for us doing really bad things to 256.bzip2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126126 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/README.txt')
-rw-r--r--lib/Target/X86/README.txt69
1 files changed, 69 insertions, 0 deletions
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 1e1660dbca..abd1515cf5 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1878,3 +1878,72 @@ _add32carry:
ret
//===---------------------------------------------------------------------===//
+
+The hot loop of 256.bzip2 contains code that looks a bit like this:
+
+int foo(char *P, char *Q, int x, int y) {
+ if (P[0] != Q[0])
+ return P[0] < Q[0];
+ if (P[1] != Q[1])
+ return P[1] < Q[1];
+ if (P[2] != Q[2])
+ return P[2] < Q[2];
+ return P[3] < Q[3];
+}
+
+In the real code, we get a lot more wrong than this. However, even in this
+code we generate:
+
+_foo: ## @foo
+## BB#0: ## %entry
+ movb (%rsi), %al
+ movb (%rdi), %cl
+ cmpb %al, %cl
+ je LBB0_2
+LBB0_1: ## %if.then
+ cmpb %al, %cl
+ jmp LBB0_5
+LBB0_2: ## %if.end
+ movb 1(%rsi), %al
+ movb 1(%rdi), %cl
+ cmpb %al, %cl
+ jne LBB0_1
+## BB#3: ## %if.end38
+ movb 2(%rsi), %al
+ movb 2(%rdi), %cl
+ cmpb %al, %cl
+ jne LBB0_1
+## BB#4: ## %if.end60
+ movb 3(%rdi), %al
+ cmpb 3(%rsi), %al
+LBB0_5: ## %if.end60
+ setl %al
+ movzbl %al, %eax
+ ret
+
+Note that we generate jumps to LBB0_1 which does a redundant compare. The
+redundant compare also forces the register values to be live, which prevents
+folding one of the loads into the compare. In contrast, GCC 4.2 produces:
+
+_foo:
+ movzbl (%rsi), %eax
+ cmpb %al, (%rdi)
+ jne L10
+L12:
+ movzbl 1(%rsi), %eax
+ cmpb %al, 1(%rdi)
+ jne L10
+ movzbl 2(%rsi), %eax
+ cmpb %al, 2(%rdi)
+ jne L10
+ movzbl 3(%rdi), %eax
+ cmpb 3(%rsi), %al
+L10:
+ setl %al
+ movzbl %al, %eax
+ ret
+
+which is "perfect".
+
+//===---------------------------------------------------------------------===//
+