summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/vec_split.ll
diff options
context:
space:
mode:
authorJuergen Ributzka <juergen@apple.com>2013-09-21 04:55:18 +0000
committerJuergen Ributzka <juergen@apple.com>2013-09-21 04:55:18 +0000
commit1941431f8a0f61fb5d5e3175cc49efd5dd19033c (patch)
tree2052732d1a60c4f90cbcfb0bcedca5fe3161eb4f /test/CodeGen/X86/vec_split.ll
parent023d90edb0eeba0541424046b992f0d7394441bc (diff)
downloadllvm-1941431f8a0f61fb5d5e3175cc49efd5dd19033c.tar.gz
llvm-1941431f8a0f61fb5d5e3175cc49efd5dd19033c.tar.bz2
llvm-1941431f8a0f61fb5d5e3175cc49efd5dd19033c.tar.xz
SelectionDAG: Teach the legalizer to split SETCC if VSELECT needs splitting too.
The Type Legalizer recognizes that VSELECT needs to be split, because the type is to wide for the given target. The same does not always apply to SETCC, because less space is required to encode the result of a comparison. As a result VSELECT is split and SETCC is unrolled into scalar comparisons. This commit fixes the issue by checking for VSELECT-SETCC patterns in the DAG Combiner. If a matching pattern is found, then the result mask of SETCC is promoted to the expected vector mask for the given target. This mask has usually te same size as the VSELECT return type (except for Intel KNL). Now the type legalizer will split both VSELECT and SETCC. This allows the following X86 DAG Combine code to sucessfully detect the MIN/MAX pattern. This fixes PR16695, PR17002, and <rdar://problem/14594431>. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191130 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/vec_split.ll')
-rw-r--r--test/CodeGen/X86/vec_split.ll29
1 files changed, 29 insertions, 0 deletions
diff --git a/test/CodeGen/X86/vec_split.ll b/test/CodeGen/X86/vec_split.ll
new file mode 100644
index 0000000000..a49af34f57
--- /dev/null
+++ b/test/CodeGen/X86/vec_split.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
+; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) {
+; SSE4-LABEL: split16:
+; SSE4: pminuw
+; SSE4: pminuw
+; AVX2-LABEL: split16:
+; AVX2: vpminuw
+; AVX2: ret
+ %1 = icmp ult <16 x i16> %a, %b
+ %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %2
+}
+
+define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) {
+; SSE4-LABEL: split32:
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; AVX2-LABEL: split32:
+; AVX2: vpminuw
+; AVX2: vpminuw
+; AVX2: ret
+ %1 = icmp ult <32 x i16> %a, %b
+ %2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b
+ ret <32 x i16> %2
+}