From 19d8559019b75edfd7f5b05ffa266bc278127854 Mon Sep 17 00:00:00 2001 From: Victor Umansky Date: Thu, 5 Jan 2012 08:46:19 +0000 Subject: Peephole optimization of ptest-conditioned branch in X86 arch. Performs instruction combining of sequences generated by ptestz/ptestc intrinsics to ptest+jcc pair for SSE and AVX. Testing: passed 'make check' including LIT tests for all sequences being handled (both SSE and AVX) Reviewers: Evan Cheng, David Blaikie, Bruno Lopes, Elena Demikhovsky, Chad Rosier, Anton Korobeynikov git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147601 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/brcond.ll | 246 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 245 insertions(+), 1 deletion(-) (limited to 'test/CodeGen/X86/brcond.ll') diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll index 5cdc1000f3..ff3b2e45ee 100644 --- a/test/CodeGen/X86/brcond.ll +++ b/test/CodeGen/X86/brcond.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=core2 | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=penryn | FileCheck %s + ; rdar://7475489 define i32 @test1(i32 %a, i32 %b) nounwind ssp { @@ -106,3 +107,246 @@ bb2: ; preds = %entry, %bb1 %.0 = fptrunc double %.0.in to float ; [#uses=1] ret float %.0 } + +declare i32 @llvm.x86.sse41.ptestz(<4 x float> %p1, <4 x float> %p2) nounwind +declare i32 @llvm.x86.sse41.ptestc(<4 x float> %p1, <4 x float> %p2) nounwind + +define <4 x float> @test5(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test5: +; CHECK: ptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp ne i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test6(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test6: +; CHECK: ptest +; CHECK-NEXT: je +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp eq i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test7(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test7: +; CHECK: ptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = trunc i32 %res to i1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test8(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test8: +; CHECK: ptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp ne i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test9(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test9: +; CHECK: ptest +; CHECK-NEXT: jb +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp eq i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test10(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test10: +; CHECK: ptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = trunc i32 %res to i1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test11(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test11: +; CHECK: ptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp eq i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test12(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test12: +; CHECK: ptest +; CHECK-NEXT: je +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp ne i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test13(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test13: +; CHECK: ptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp eq i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test14(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test14: +; CHECK: ptest +; CHECK-NEXT: jb +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp ne i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} -- cgit v1.2.3