X86: If SSE4.1 is missing lower SMUL_LOHI of v4i32 to pmuludq and fix up the high parts.

This is more expensive than pmuldq but still cheaper than scalarizing the whole thing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207370 91177308-0d34-0410-b5e6-96231b3b80d8
author: Benjamin Kramer <benny.kra@googlemail.com> 2014-04-27 18:47:41 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> 2014-04-27 18:47:41 +0000
commit: ad1f916eafc01592deaad7dfe85738c90081163d (patch)
tree: c46921644ddc8e4aa0ff8b56177c6ad8acc8a0ff /test
parent: 75955d843f0b21fdbc7fff63635702d4f66ed8c3 (diff)
download: llvm-ad1f916eafc01592deaad7dfe85738c90081163d.tar.gz
llvm-ad1f916eafc01592deaad7dfe85738c90081163d.tar.bz2
llvm-ad1f916eafc01592deaad7dfe85738c90081163d.tar.xz
1 files changed, 40 insertions, 23 deletions
diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll
index 981c317157..5738c94e37 100644
--- a/test/CodeGen/X86/vector-idiv.ll
+++ b/test/CodeGen/X86/vector-idiv.ll
@@ -1,19 +1,20 @@
-; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE
+; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s -check-prefix=SSE41
+; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE
 ; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX
 
 define <4 x i32> @test1(<4 x i32> %a) {
   %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
   ret <4 x i32> %div
 
-; SSE-LABEL: test1:
-; SSE: pmuludq
-; SSE: pshufd	$57
-; SSE: pmuludq
-; SSE: shufps	$-35
-; SSE: psubd
-; SSE: psrld $1
-; SSE: padd
-; SSE: psrld $2
+; SSE41-LABEL: test1:
+; SSE41: pmuludq
+; SSE41: pshufd	$57
+; SSE41: pmuludq
+; SSE41: shufps	$-35
+; SSE41: psubd
+; SSE41: psrld $1
+; SSE41: padd
+; SSE41: psrld $2
 
 ; AVX-LABEL: test1:
 ; AVX: vpmuludq
@@ -46,12 +47,12 @@ define <8 x i16> @test3(<8 x i16> %a) {
   %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
   ret <8 x i16> %div
 
-; SSE-LABEL: test3:
-; SSE: pmulhuw
-; SSE: psubw
-; SSE: psrlw $1
-; SSE: paddw
-; SSE: psrlw $2
+; SSE41-LABEL: test3:
+; SSE41: pmulhuw
+; SSE41: psubw
+; SSE41: psrlw $1
+; SSE41: paddw
+; SSE41: psrlw $2
 
 ; AVX-LABEL: test3:
 ; AVX: vpmulhuw
@@ -78,11 +79,11 @@ define <8 x i16> @test5(<8 x i16> %a) {
   %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
   ret <8 x i16> %div
 
-; SSE-LABEL: test5:
-; SSE: pmulhw
-; SSE: psrlw $15
-; SSE: psraw $1
-; SSE: paddw
+; SSE41-LABEL: test5:
+; SSE41: pmulhw
+; SSE41: psrlw $15
+; SSE41: psraw $1
+; SSE41: paddw
 
 ; AVX-LABEL: test5:
 ; AVX: vpmulhw
@@ -112,13 +113,29 @@ define <4 x i32> @test8(<4 x i32> %a) {
   %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
   ret <4 x i32> %div
 
+; SSE41-LABEL: test8:
+; SSE41: pmuldq
+; SSE41: pshufd	$57
+; SSE41-NOT: pshufd	$57
+; SSE41: pmuldq
+; SSE41: shufps	$-35
+; SSE41: pshufd	$-40
+; SSE41: padd
+; SSE41: psrld $31
+; SSE41: psrad $2
+; SSE41: padd
+
 ; SSE-LABEL: test8:
-; SSE: pmuldq
+; SSE: psrad $31
+; SSE: pand
+; SSE: paddd
+; SSE: pmuludq
 ; SSE: pshufd	$57
 ; SSE-NOT: pshufd	$57
-; SSE: pmuldq
+; SSE: pmuludq
 ; SSE: shufps	$-35
 ; SSE: pshufd	$-40
+; SSE: psubd
 ; SSE: padd
 ; SSE: psrld $31
 ; SSE: psrad $2
author	Benjamin Kramer <benny.kra@googlemail.com>	2014-04-27 18:47:41 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	2014-04-27 18:47:41 +0000
commit	ad1f916eafc01592deaad7dfe85738c90081163d (patch)
tree	c46921644ddc8e4aa0ff8b56177c6ad8acc8a0ff /test
parent	75955d843f0b21fdbc7fff63635702d4f66ed8c3 (diff)
download	llvm-ad1f916eafc01592deaad7dfe85738c90081163d.tar.gz llvm-ad1f916eafc01592deaad7dfe85738c90081163d.tar.bz2 llvm-ad1f916eafc01592deaad7dfe85738c90081163d.tar.xz