diff options
author | Manman Ren <mren@apple.com> | 2012-08-02 00:56:42 +0000 |
---|---|---|
committer | Manman Ren <mren@apple.com> | 2012-08-02 00:56:42 +0000 |
commit | d7d003c2b7b7f657eed364e4ac06f4ab32fc8c2d (patch) | |
tree | a135e2b9b794ea8b4b12c05e4a2a768d32577f59 /test | |
parent | e5c79a5c2542fa0d852df28b5ee9de8dfef694d8 (diff) | |
download | llvm-d7d003c2b7b7f657eed364e4ac06f4ab32fc8c2d.tar.gz llvm-d7d003c2b7b7f657eed364e4ac06f4ab32fc8c2d.tar.bz2 llvm-d7d003c2b7b7f657eed364e4ac06f4ab32fc8c2d.tar.xz |
X86 Peephole: fold loads to the source register operand if possible.
Machine CSE and other optimizations can remove instructions so folding
is possible at peephole while not possible at ISel.
This patch is a rework of r160919 and was tested on clang self-host on my local
machine.
rdar://10554090 and rdar://11873276
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161152 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/2012-05-19-avx2-store.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/X86/break-sse-dep.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/X86/fold-load.ll | 26 | ||||
-rw-r--r-- | test/CodeGen/X86/fold-pcmpeqd-1.ll | 11 | ||||
-rw-r--r-- | test/CodeGen/X86/sse-minmax.ll | 66 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_compare.ll | 6 |
6 files changed, 61 insertions, 54 deletions
diff --git a/test/CodeGen/X86/2012-05-19-avx2-store.ll b/test/CodeGen/X86/2012-05-19-avx2-store.ll index 61fef90139..1c1e8e2f0a 100644 --- a/test/CodeGen/X86/2012-05-19-avx2-store.ll +++ b/test/CodeGen/X86/2012-05-19-avx2-store.ll @@ -3,8 +3,7 @@ define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp { entry: ; CHECK: vmovaps - ; CHECK: vmovaps - ; CHECK: vinsertf128 + ; CHECK: vinsertf128 $1, ([[A0:%rdi|%rsi]]), ; CHECK: vmovups %A = load <4 x i32>* %Ap %B = load <4 x i32>* %Bp diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll index 3e65867143..4d801891da 100644 --- a/test/CodeGen/X86/break-sse-dep.ll +++ b/test/CodeGen/X86/break-sse-dep.ll @@ -34,8 +34,7 @@ entry: define double @squirt(double* %x) nounwind { entry: ; CHECK: squirt: -; CHECK: movsd ([[A0]]), %xmm0 -; CHECK: sqrtsd %xmm0, %xmm0 +; CHECK: sqrtsd ([[A0]]), %xmm0 %z = load double* %x %t = call double @llvm.sqrt.f64(double %z) ret double %t diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll index e03cb7edb5..c961f7576f 100644 --- a/test/CodeGen/X86/fold-load.ll +++ b/test/CodeGen/X86/fold-load.ll @@ -45,3 +45,29 @@ L: } +; rdar://10554090 +; xor in exit block will be CSE'ed and load will be folded to xor in entry. +define i1 @test3(i32* %P, i32* %Q) nounwind { +; CHECK: test3: +; CHECK: movl 8(%esp), %eax +; CHECK: xorl (%eax), +; CHECK: j +; CHECK-NOT: xor +entry: + %0 = load i32* %P, align 4 + %1 = load i32* %Q, align 4 + %2 = xor i32 %0, %1 + %3 = and i32 %2, 65535 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %exit, label %land.end + +exit: + %shr.i.i19 = xor i32 %1, %0 + %5 = and i32 %shr.i.i19, 2147418112 + %6 = icmp eq i32 %5, 0 + br label %land.end + +land.end: + %7 = phi i1 [ %6, %exit ], [ false, %entry ] + ret i1 %7 +} diff --git a/test/CodeGen/X86/fold-pcmpeqd-1.ll b/test/CodeGen/X86/fold-pcmpeqd-1.ll index cc4198d7ca..a35dccddba 100644 --- a/test/CodeGen/X86/fold-pcmpeqd-1.ll +++ b/test/CodeGen/X86/fold-pcmpeqd-1.ll @@ -1,11 +1,14 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 > %t -; RUN: grep pcmpeqd %t | count 1 -; RUN: grep xor %t | count 1 -; RUN: not grep LCP %t +; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s define <2 x double> @foo() nounwind { ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>) +; CHECK: foo: +; CHECK: pcmpeqd %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; CHECK-NEXT: ret } define <2 x double> @bar() nounwind { ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>) +; CHECK: bar: +; CHECK: xorps %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; CHECK-NEXT: ret } diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index 4405f68451..5d3dbce1df 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false | FileCheck %s -; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s -; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false | FileCheck %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s ; Some of these patterns can be matched as SSE min or max. Some of ; then can be matched provided that the operands are swapped. @@ -137,16 +137,13 @@ define double @ole_inverse(double %x, double %y) nounwind { } ; CHECK: ogt_x: -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; CHECK-NEXT: maxsd %xmm1, %xmm0 +; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 ; CHECK-NEXT: ret ; UNSAFE: ogt_x: -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ogt_x: -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 ; FINITE-NEXT: ret define double @ogt_x(double %x) nounwind { %c = fcmp ogt double %x, 0.000000e+00 @@ -155,16 +152,13 @@ define double @ogt_x(double %x) nounwind { } ; CHECK: olt_x: -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; CHECK-NEXT: minsd %xmm1, %xmm0 +; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0 ; CHECK-NEXT: ret ; UNSAFE: olt_x: -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 ; UNSAFE-NEXT: ret ; FINITE: olt_x: -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; FINITE-NEXT: minsd %xmm1, %xmm0 +; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 ; FINITE-NEXT: ret define double @olt_x(double %x) nounwind { %c = fcmp olt double %x, 0.000000e+00 @@ -217,12 +211,10 @@ define double @olt_inverse_x(double %x) nounwind { ; CHECK: oge_x: ; CHECK: ucomisd %xmm1, %xmm0 ; UNSAFE: oge_x: -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 ; UNSAFE-NEXT: ret ; FINITE: oge_x: -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 ; FINITE-NEXT: ret define double @oge_x(double %x) nounwind { %c = fcmp oge double %x, 0.000000e+00 @@ -233,12 +225,10 @@ define double @oge_x(double %x) nounwind { ; CHECK: ole_x: ; CHECK: ucomisd %xmm0, %xmm1 ; UNSAFE: ole_x: -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ole_x: -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; FINITE-NEXT: minsd %xmm1, %xmm0 +; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 ; FINITE-NEXT: ret define double @ole_x(double %x) nounwind { %c = fcmp ole double %x, 0.000000e+00 @@ -411,12 +401,10 @@ define double @ule_inverse(double %x, double %y) nounwind { ; CHECK: ugt_x: ; CHECK: ucomisd %xmm0, %xmm1 ; UNSAFE: ugt_x: -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ugt_x: -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 ; FINITE-NEXT: ret define double @ugt_x(double %x) nounwind { %c = fcmp ugt double %x, 0.000000e+00 @@ -427,12 +415,10 @@ define double @ugt_x(double %x) nounwind { ; CHECK: ult_x: ; CHECK: ucomisd %xmm1, %xmm0 ; UNSAFE: ult_x: -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ult_x: -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; FINITE-NEXT: minsd %xmm1, %xmm0 +; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 ; FINITE-NEXT: ret define double @ult_x(double %x) nounwind { %c = fcmp ult double %x, 0.000000e+00 @@ -482,12 +468,10 @@ define double @ult_inverse_x(double %x) nounwind { ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: uge_x: -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 ; UNSAFE-NEXT: ret ; FINITE: uge_x: -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 ; FINITE-NEXT: ret define double @uge_x(double %x) nounwind { %c = fcmp uge double %x, 0.000000e+00 @@ -501,12 +485,10 @@ define double @uge_x(double %x) nounwind { ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: ule_x: -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ule_x: -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; FINITE-NEXT: minsd %xmm1, %xmm0 +; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 ; FINITE-NEXT: ret define double @ule_x(double %x) nounwind { %c = fcmp ule double %x, 0.000000e+00 @@ -515,8 +497,7 @@ define double @ule_x(double %x) nounwind { } ; CHECK: uge_inverse_x: -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; CHECK-NEXT: minsd %xmm1, %xmm0 +; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0 ; CHECK-NEXT: ret ; UNSAFE: uge_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -535,8 +516,7 @@ define double @uge_inverse_x(double %x) nounwind { } ; CHECK: ule_inverse_x: -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; CHECK-NEXT: maxsd %xmm1, %xmm0 +; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 ; CHECK-NEXT: ret ; UNSAFE: ule_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll index 39c9b770d5..1e04f19ee8 100644 --- a/test/CodeGen/X86/vec_compare.ll +++ b/test/CodeGen/X86/vec_compare.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i386-apple-darwin | FileCheck %s define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind { @@ -14,8 +14,8 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind { define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK: test2: ; CHECK: pcmp -; CHECK: pcmp -; CHECK: pxor +; CHECK: pxor LCP +; CHECK: movdqa ; CHECK: ret %C = icmp sge <4 x i32> %A, %B %D = sext <4 x i1> %C to <4 x i32> |