a serious "compare CSE" issue that is nontrivial to get right,

but which is responsible for us doing really bad things to 256.bzip2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126126 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2011-02-21 17:03:47 +0000
committer: Chris Lattner <sabre@nondot.org> 2011-02-21 17:03:47 +0000
commit: fb456c25c28a6e9b48b131ced9ad813ac4468613 (patch)
tree: 027d3b50f83d872f60c9edb7de1a3fdc746db1bc /lib/Target/X86/README.txt
parent: 0e68cee62f251c45df92c71ca536142bc7d82631 (diff)
download: llvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.gz
llvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.bz2
llvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.xz
1 files changed, 69 insertions, 0 deletions
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 1e1660dbca..abd1515cf5 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1878,3 +1878,72 @@ _add32carry:
 	ret
 
 //===---------------------------------------------------------------------===//
+
+The hot loop of 256.bzip2 contains code that looks a bit like this:
+
+int foo(char *P, char *Q, int x, int y) {
+  if (P[0] != Q[0])
+     return P[0] < Q[0];
+  if (P[1] != Q[1])
+     return P[1] < Q[1];
+  if (P[2] != Q[2])
+     return P[2] < Q[2];
+   return P[3] < Q[3];
+}
+
+In the real code, we get a lot more wrong than this.  However, even in this
+code we generate:
+
+_foo:                                   ## @foo
+## BB#0:                                ## %entry
+	movb	(%rsi), %al
+	movb	(%rdi), %cl
+	cmpb	%al, %cl
+	je	LBB0_2
+LBB0_1:                                 ## %if.then
+	cmpb	%al, %cl
+	jmp	LBB0_5
+LBB0_2:                                 ## %if.end
+	movb	1(%rsi), %al
+	movb	1(%rdi), %cl
+	cmpb	%al, %cl
+	jne	LBB0_1
+## BB#3:                                ## %if.end38
+	movb	2(%rsi), %al
+	movb	2(%rdi), %cl
+	cmpb	%al, %cl
+	jne	LBB0_1
+## BB#4:                                ## %if.end60
+	movb	3(%rdi), %al
+	cmpb	3(%rsi), %al
+LBB0_5:                                 ## %if.end60
+	setl	%al
+	movzbl	%al, %eax
+	ret
+
+Note that we generate jumps to LBB0_1 which does a redundant compare.  The
+redundant compare also forces the register values to be live, which prevents
+folding one of the loads into the compare.  In contrast, GCC 4.2 produces:
+
+_foo:
+	movzbl	(%rsi), %eax
+	cmpb	%al, (%rdi)
+	jne	L10
+L12:
+	movzbl	1(%rsi), %eax
+	cmpb	%al, 1(%rdi)
+	jne	L10
+	movzbl	2(%rsi), %eax
+	cmpb	%al, 2(%rdi)
+	jne	L10
+	movzbl	3(%rdi), %eax
+	cmpb	3(%rsi), %al
+L10:
+	setl	%al
+	movzbl	%al, %eax
+	ret
+
+which is "perfect".
+
+//===---------------------------------------------------------------------===//
+
author	Chris Lattner <sabre@nondot.org>	2011-02-21 17:03:47 +0000
committer	Chris Lattner <sabre@nondot.org>	2011-02-21 17:03:47 +0000
commit	fb456c25c28a6e9b48b131ced9ad813ac4468613 (patch)
tree	027d3b50f83d872f60c9edb7de1a3fdc746db1bc /lib/Target/X86/README.txt
parent	0e68cee62f251c45df92c71ca536142bc7d82631 (diff)
download	llvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.gz llvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.bz2 llvm-fb456c25c28a6e9b48b131ced9ad813ac4468613.tar.xz