update this.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113116 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2010-09-05 20:22:09 +0000
committer: Chris Lattner <sabre@nondot.org> 2010-09-05 20:22:09 +0000
commit: f0f5780b39f332ac14b0d85b904126dab7c783c3 (patch)
tree: 05ae71c7bea729d8e99d990059ab7cd2804895da /lib/Target/X86/README-SSE.txt
parent: 3ae0924d6ba9db2fc2d42f96c3c4b34dd3878494 (diff)
download: llvm-f0f5780b39f332ac14b0d85b904126dab7c783c3.tar.gz
llvm-f0f5780b39f332ac14b0d85b904126dab7c783c3.tar.bz2
llvm-f0f5780b39f332ac14b0d85b904126dab7c783c3.tar.xz
1 files changed, 29 insertions, 10 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index f96b22f1e2..b2116e03b1 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -20,7 +20,28 @@ __m128i shift_right(__m128i value, unsigned long offset) {
 //===---------------------------------------------------------------------===//
 
 SSE has instructions for doing operations on complex numbers, we should pattern
-match them.  Compiling this:
+match them.   For example, this should turn into a horizontal add:
+
+typedef float __attribute__((vector_size(16))) v4f32;
+float f32(v4f32 A) {
+  return A[0]+A[1]+A[2]+A[3];
+}
+
+Instead we get this:
+
+_f32:                                   ## @f32
+	pshufd	$1, %xmm0, %xmm1        ## xmm1 = xmm0[1,0,0,0]
+	addss	%xmm0, %xmm1
+	pshufd	$3, %xmm0, %xmm2        ## xmm2 = xmm0[3,0,0,0]
+	movhlps	%xmm0, %xmm0            ## xmm0 = xmm0[1,1]
+	movaps	%xmm0, %xmm3
+	addss	%xmm1, %xmm3
+	movdqa	%xmm2, %xmm0
+	addss	%xmm3, %xmm0
+	ret
+
+Also, there are cases where some simple local SLP would improve codegen a bit.
+compiling this:
 
 _Complex float f32(_Complex float A, _Complex float B) {
   return A+B;
@@ -28,19 +49,17 @@ _Complex float f32(_Complex float A, _Complex float B) {
 
 into:
 
-_f32:
+_f32:                                   ## @f32
 	movdqa	%xmm0, %xmm2
 	addss	%xmm1, %xmm2
-	pshufd	$16, %xmm2, %xmm2
-	pshufd	$1, %xmm1, %xmm1
-	pshufd	$1, %xmm0, %xmm0
-	addss	%xmm1, %xmm0
-	pshufd	$16, %xmm0, %xmm1
-	movdqa	%xmm2, %xmm0
-	unpcklps	%xmm1, %xmm0
+	pshufd	$1, %xmm1, %xmm1        ## xmm1 = xmm1[1,0,0,0]
+	pshufd	$1, %xmm0, %xmm3        ## xmm3 = xmm0[1,0,0,0]
+	addss	%xmm1, %xmm3
+	movaps	%xmm2, %xmm0
+	unpcklps	%xmm3, %xmm0    ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
 	ret
 
-seems silly. 
+seems silly when it could just be one addps.
 
 
 //===---------------------------------------------------------------------===//
author	Chris Lattner <sabre@nondot.org>	2010-09-05 20:22:09 +0000
committer	Chris Lattner <sabre@nondot.org>	2010-09-05 20:22:09 +0000
commit	f0f5780b39f332ac14b0d85b904126dab7c783c3 (patch)
tree	05ae71c7bea729d8e99d990059ab7cd2804895da /lib/Target/X86/README-SSE.txt
parent	3ae0924d6ba9db2fc2d42f96c3c4b34dd3878494 (diff)
download	llvm-f0f5780b39f332ac14b0d85b904126dab7c783c3.tar.gz llvm-f0f5780b39f332ac14b0d85b904126dab7c783c3.tar.bz2 llvm-f0f5780b39f332ac14b0d85b904126dab7c783c3.tar.xz