From ad1f916eafc01592deaad7dfe85738c90081163d Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 27 Apr 2014 18:47:41 +0000 Subject: X86: If SSE4.1 is missing lower SMUL_LOHI of v4i32 to pmuludq and fix up the high parts. This is more expensive than pmuldq but still cheaper than scalarizing the whole thing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207370 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vector-idiv.ll | 63 ++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 23 deletions(-) (limited to 'test') diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll index 981c317157..5738c94e37 100644 --- a/test/CodeGen/X86/vector-idiv.ll +++ b/test/CodeGen/X86/vector-idiv.ll @@ -1,19 +1,20 @@ -; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE +; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s -check-prefix=SSE41 +; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE ; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX define <4 x i32> @test1(<4 x i32> %a) { %div = udiv <4 x i32> %a, ret <4 x i32> %div -; SSE-LABEL: test1: -; SSE: pmuludq -; SSE: pshufd $57 -; SSE: pmuludq -; SSE: shufps $-35 -; SSE: psubd -; SSE: psrld $1 -; SSE: padd -; SSE: psrld $2 +; SSE41-LABEL: test1: +; SSE41: pmuludq +; SSE41: pshufd $57 +; SSE41: pmuludq +; SSE41: shufps $-35 +; SSE41: psubd +; SSE41: psrld $1 +; SSE41: padd +; SSE41: psrld $2 ; AVX-LABEL: test1: ; AVX: vpmuludq @@ -46,12 +47,12 @@ define <8 x i16> @test3(<8 x i16> %a) { %div = udiv <8 x i16> %a, ret <8 x i16> %div -; SSE-LABEL: test3: -; SSE: pmulhuw -; SSE: psubw -; SSE: psrlw $1 -; SSE: paddw -; SSE: psrlw $2 +; SSE41-LABEL: test3: +; SSE41: pmulhuw +; SSE41: psubw +; SSE41: psrlw $1 +; SSE41: paddw +; SSE41: psrlw $2 ; AVX-LABEL: test3: ; AVX: vpmulhuw @@ -78,11 +79,11 @@ define <8 x i16> @test5(<8 x i16> %a) { %div = sdiv <8 x i16> %a, ret <8 x i16> %div -; SSE-LABEL: test5: -; SSE: pmulhw -; SSE: psrlw $15 -; SSE: psraw $1 -; SSE: paddw +; SSE41-LABEL: test5: +; SSE41: pmulhw +; SSE41: psrlw $15 +; SSE41: psraw $1 +; SSE41: paddw ; AVX-LABEL: test5: ; AVX: vpmulhw @@ -112,13 +113,29 @@ define <4 x i32> @test8(<4 x i32> %a) { %div = sdiv <4 x i32> %a, ret <4 x i32> %div +; SSE41-LABEL: test8: +; SSE41: pmuldq +; SSE41: pshufd $57 +; SSE41-NOT: pshufd $57 +; SSE41: pmuldq +; SSE41: shufps $-35 +; SSE41: pshufd $-40 +; SSE41: padd +; SSE41: psrld $31 +; SSE41: psrad $2 +; SSE41: padd + ; SSE-LABEL: test8: -; SSE: pmuldq +; SSE: psrad $31 +; SSE: pand +; SSE: paddd +; SSE: pmuludq ; SSE: pshufd $57 ; SSE-NOT: pshufd $57 -; SSE: pmuldq +; SSE: pmuludq ; SSE: shufps $-35 ; SSE: pshufd $-40 +; SSE: psubd ; SSE: padd ; SSE: psrld $31 ; SSE: psrad $2 -- cgit v1.2.3