diff options
author | Benjamin Kramer <benny.kra@googlemail.com> | 2013-04-26 12:05:21 +0000 |
---|---|---|
committer | Benjamin Kramer <benny.kra@googlemail.com> | 2013-04-26 12:05:21 +0000 |
commit | 753981784f47724143c171a7bcafe4becdab2e1c (patch) | |
tree | ec4830de51076393c68ce6bc7ec10644425141db | |
parent | 6242fda42ad13eebc908e744426ae7bc8cf8d1c3 (diff) | |
download | llvm-753981784f47724143c171a7bcafe4becdab2e1c.tar.gz llvm-753981784f47724143c171a7bcafe4becdab2e1c.tar.bz2 llvm-753981784f47724143c171a7bcafe4becdab2e1c.tar.xz |
X86: Now that we have a canonical form for vector integer abs, match it into pabs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180600 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 51 | ||||
-rw-r--r-- | test/CodeGen/X86/viabs.ll | 143 |
2 files changed, 181 insertions, 13 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 384238741b..4d6097faae 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5094,6 +5094,16 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, Sched<[WriteVecALULd]>; } +// Helper fragments to match sext vXi1 to vXiY. +def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), + VR128:$src))>; +def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>; +def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>; +def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), + VR256:$src))>; +def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>; +def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>; + let Predicates = [HasAVX] in { defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", int_x86_ssse3_pabs_b_128>, VEX; @@ -5101,6 +5111,19 @@ let Predicates = [HasAVX] in { int_x86_ssse3_pabs_w_128>, VEX; defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", int_x86_ssse3_pabs_d_128>, VEX; + + def : Pat<(xor + (bc_v2i64 (v16i1sextv16i8)), + (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), + (VPABSBrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v8i1sextv8i16)), + (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), + (VPABSWrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v4i1sextv4i32)), + (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), + (VPABSDrr128 VR128:$src)>; } let Predicates = [HasAVX2] in { @@ -5110,6 +5133,19 @@ let Predicates = [HasAVX2] in { int_x86_avx2_pabs_w>, VEX, VEX_L; defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", int_x86_avx2_pabs_d>, VEX, VEX_L; + + def : Pat<(xor + (bc_v4i64 (v32i1sextv32i8)), + (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), + (VPABSBrr256 VR256:$src)>; + def : Pat<(xor + (bc_v4i64 (v16i1sextv16i16)), + (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), + (VPABSWrr256 VR256:$src)>; + def : Pat<(xor + (bc_v4i64 (v8i1sextv8i32)), + (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), + (VPABSDrr256 VR256:$src)>; } defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", @@ -5119,6 +5155,21 @@ defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", int_x86_ssse3_pabs_d_128>; +let Predicates = [HasSSSE3] in { + def : Pat<(xor + (bc_v2i64 (v16i1sextv16i8)), + (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), + (PABSBrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v8i1sextv8i16)), + (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), + (PABSWrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v4i1sextv4i32)), + (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), + (PABSDrr128 VR128:$src)>; +} + //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/viabs.ll b/test/CodeGen/X86/viabs.ll index a509d8aa81..1f73ef5fa1 100644 --- a/test/CodeGen/X86/viabs.ll +++ b/test/CodeGen/X86/viabs.ll @@ -1,4 +1,6 @@ ; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck %s -check-prefix=SSE2 +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSSE3 +; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2 define <4 x i32> @test1(<4 x i32> %a) nounwind { ; SSE2: test1: @@ -7,6 +9,14 @@ define <4 x i32> @test1(<4 x i32> %a) nounwind { ; SSE2-NEXT: padd ; SSE2-NEXT: pxor ; SSE2-NEXT: ret + +; SSSE3: test1: +; SSSE3: pabsd +; SSSE3-NEXT: ret + +; AVX2: test1: +; AVX2: vpabsd +; AVX2-NEXT: ret %tmp1neg = sub <4 x i32> zeroinitializer, %a %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg @@ -20,36 +30,60 @@ define <4 x i32> @test2(<4 x i32> %a) nounwind { ; SSE2-NEXT: padd ; SSE2-NEXT: pxor ; SSE2-NEXT: ret + +; SSSE3: test2: +; SSSE3: pabsd +; SSSE3-NEXT: ret + +; AVX2: test2: +; AVX2: vpabsd +; AVX2-NEXT: ret %tmp1neg = sub <4 x i32> zeroinitializer, %a %b = icmp sge <4 x i32> %a, zeroinitializer %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg ret <4 x i32> %abs } -define <4 x i32> @test3(<4 x i32> %a) nounwind { +define <8 x i16> @test3(<8 x i16> %a) nounwind { ; SSE2: test3: ; SSE2: movdqa -; SSE2-NEXT: psrad $31 +; SSE2-NEXT: psraw $15 ; SSE2-NEXT: padd ; SSE2-NEXT: pxor ; SSE2-NEXT: ret - %tmp1neg = sub <4 x i32> zeroinitializer, %a - %b = icmp sgt <4 x i32> %a, zeroinitializer - %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg - ret <4 x i32> %abs + +; SSSE3: test3: +; SSSE3: pabsw +; SSSE3-NEXT: ret + +; AVX2: test3: +; AVX2: vpabsw +; AVX2-NEXT: ret + %tmp1neg = sub <8 x i16> zeroinitializer, %a + %b = icmp sgt <8 x i16> %a, zeroinitializer + %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg + ret <8 x i16> %abs } -define <4 x i32> @test4(<4 x i32> %a) nounwind { +define <16 x i8> @test4(<16 x i8> %a) nounwind { ; SSE2: test4: -; SSE2: movdqa -; SSE2-NEXT: psrad $31 +; SSE2: pxor +; SSE2-NEXT: pcmpgtb ; SSE2-NEXT: padd ; SSE2-NEXT: pxor ; SSE2-NEXT: ret - %tmp1neg = sub <4 x i32> zeroinitializer, %a - %b = icmp slt <4 x i32> %a, zeroinitializer - %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a - ret <4 x i32> %abs + +; SSSE3: test4: +; SSSE3: pabsb +; SSSE3-NEXT: ret + +; AVX2: test4: +; AVX2: vpabsb +; AVX2-NEXT: ret + %tmp1neg = sub <16 x i8> zeroinitializer, %a + %b = icmp slt <16 x i8> %a, zeroinitializer + %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a + ret <16 x i8> %abs } define <4 x i32> @test5(<4 x i32> %a) nounwind { @@ -59,8 +93,91 @@ define <4 x i32> @test5(<4 x i32> %a) nounwind { ; SSE2-NEXT: padd ; SSE2-NEXT: pxor ; SSE2-NEXT: ret + +; SSSE3: test5: +; SSSE3: pabsd +; SSSE3-NEXT: ret + +; AVX2: test5: +; AVX2: vpabsd +; AVX2-NEXT: ret %tmp1neg = sub <4 x i32> zeroinitializer, %a %b = icmp sle <4 x i32> %a, zeroinitializer %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a ret <4 x i32> %abs } + +define <8 x i32> @test6(<8 x i32> %a) nounwind { +; SSSE3: test6: +; SSSE3: pabsd +; SSSE3: pabsd +; SSSE3-NEXT: ret + +; AVX2: test6: +; AVX2: vpabsd %ymm +; AVX2-NEXT: ret + %tmp1neg = sub <8 x i32> zeroinitializer, %a + %b = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg + ret <8 x i32> %abs +} + +define <8 x i32> @test7(<8 x i32> %a) nounwind { +; SSSE3: test7: +; SSSE3: pabsd +; SSSE3: pabsd +; SSSE3-NEXT: ret + +; AVX2: test7: +; AVX2: vpabsd %ymm +; AVX2-NEXT: ret + %tmp1neg = sub <8 x i32> zeroinitializer, %a + %b = icmp sge <8 x i32> %a, zeroinitializer + %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg + ret <8 x i32> %abs +} + +define <16 x i16> @test8(<16 x i16> %a) nounwind { +; SSSE3: test8: +; SSSE3: pabsw +; SSSE3: pabsw +; SSSE3-NEXT: ret + +; AVX2: test8: +; AVX2: vpabsw %ymm +; AVX2-NEXT: ret + %tmp1neg = sub <16 x i16> zeroinitializer, %a + %b = icmp sgt <16 x i16> %a, zeroinitializer + %abs = select <16 x i1> %b, <16 x i16> %a, <16 x i16> %tmp1neg + ret <16 x i16> %abs +} + +define <32 x i8> @test9(<32 x i8> %a) nounwind { +; SSSE3: test9: +; SSSE3: pabsb +; SSSE3: pabsb +; SSSE3-NEXT: ret + +; AVX2: test9: +; AVX2: vpabsb %ymm +; AVX2-NEXT: ret + %tmp1neg = sub <32 x i8> zeroinitializer, %a + %b = icmp slt <32 x i8> %a, zeroinitializer + %abs = select <32 x i1> %b, <32 x i8> %tmp1neg, <32 x i8> %a + ret <32 x i8> %abs +} + +define <8 x i32> @test10(<8 x i32> %a) nounwind { +; SSSE3: test10: +; SSSE3: pabsd +; SSSE3: pabsd +; SSSE3-NEXT: ret + +; AVX2: test10: +; AVX2: vpabsd %ymm +; AVX2-NEXT: ret + %tmp1neg = sub <8 x i32> zeroinitializer, %a + %b = icmp sle <8 x i32> %a, zeroinitializer + %abs = select <8 x i1> %b, <8 x i32> %tmp1neg, <8 x i32> %a + ret <8 x i32> %abs +} |