From c7e77f91fecd662b198939a9a8ee0a0cc3828fc4 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Wed, 13 Nov 2013 01:57:54 +0000 Subject: SelectionDAG: Teach the legalizer to split SETCC if VSELECT needs splitting too. This patch reapplies r193676 with an additional fix for the Hexagon backend. The SystemZ backend has already been fixed by r194148. The Type Legalizer recognizes that VSELECT needs to be split, because the type is to wide for the given target. The same does not always apply to SETCC, because less space is required to encode the result of a comparison. As a result VSELECT is split and SETCC is unrolled into scalar comparisons. This commit fixes the issue by checking for VSELECT-SETCC patterns in the DAG Combiner. If a matching pattern is found, then the result mask of SETCC is promoted to the expected vector mask type for the given target. Now the type legalizer will split both VSELECT and SETCC. This allows the following X86 DAG Combine code to sucessfully detect the MIN/MAX pattern. This fixes PR16695, PR17002, and . Reviewed by Nadav git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194542 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vec_split.ll | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 test/CodeGen/X86/vec_split.ll (limited to 'test/CodeGen/X86/vec_split.ll') diff --git a/test/CodeGen/X86/vec_split.ll b/test/CodeGen/X86/vec_split.ll new file mode 100644 index 0000000000..f9e7c20ba4 --- /dev/null +++ b/test/CodeGen/X86/vec_split.ll @@ -0,0 +1,42 @@ +; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4 +; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1 +; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2 + +define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) { +; SSE4-LABEL: split16: +; SSE4: pminuw +; SSE4: pminuw +; SSE4: ret +; AVX1-LABEL: split16: +; AVX1: vpminuw +; AVX1: vpminuw +; AVX1: ret +; AVX2-LABEL: split16: +; AVX2: vpminuw +; AVX2: ret + %1 = icmp ult <16 x i16> %a, %b + %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b + ret <16 x i16> %2 +} + +define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) { +; SSE4-LABEL: split32: +; SSE4: pminuw +; SSE4: pminuw +; SSE4: pminuw +; SSE4: pminuw +; SSE4: ret +; AVX1-LABEL: split32: +; AVX1: vpminuw +; AVX1: vpminuw +; AVX1: vpminuw +; AVX1: vpminuw +; AVX1: ret +; AVX2-LABEL: split32: +; AVX2: vpminuw +; AVX2: vpminuw +; AVX2: ret + %1 = icmp ult <32 x i16> %a, %b + %2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b + ret <32 x i16> %2 +} -- cgit v1.2.3