summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrAVX512.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86InstrAVX512.td')
-rw-r--r--lib/Target/X86/X86InstrAVX512.td82
1 files changed, 60 insertions, 22 deletions
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index fefa5fc6bc..2f9c0578cf 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -613,13 +613,13 @@ defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512me
defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem,
X86VPermiv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERM2D : avx512_perm_3src<0x7E, "vperm2d", VR512, memopv16i32, i512mem,
+defm VPERMT2D : avx512_perm_3src<0x7E, "vpermt2d", VR512, memopv16i32, i512mem,
X86VPermv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERM2Q : avx512_perm_3src<0x7E, "vperm2q", VR512, memopv8i64, i512mem,
+defm VPERMT2Q : avx512_perm_3src<0x7E, "vpermt2q", VR512, memopv8i64, i512mem,
X86VPermv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERM2PS : avx512_perm_3src<0x7F, "vperm2ps", VR512, memopv16f32, i512mem,
+defm VPERMT2PS : avx512_perm_3src<0x7F, "vpermt2ps", VR512, memopv16f32, i512mem,
X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERM2PD : avx512_perm_3src<0x7F, "vperm2pd", VR512, memopv8f64, i512mem,
+defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd", VR512, memopv8f64, i512mem,
X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
@@ -1332,6 +1332,11 @@ let Constraints = "$src1 = $dst" in {
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[]>, EVEX, EVEX_K;
}
+ def rrkz : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src),
+ !strconcat(asm,
+ " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), []>,
+ EVEX, EVEX_KZ;
}
defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM,
@@ -1351,6 +1356,23 @@ def : Pat<(store (v16i32 VR512:$src), addr:$dst),
(VMOVDQU32mr addr:$dst, VR512:$src)>;
let AddedComplexity = 20 in {
+def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
+ (bc_v8i64 (v16i32 immAllZerosV)))),
+ (VMOVDQU64rrkz VK8WM:$mask, VR512:$src)>;
+
+def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
+ (v8i64 VR512:$src))),
+ (VMOVDQU64rrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
+ VK8), VR512:$src)>;
+
+def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
+ (v16i32 immAllZerosV))),
+ (VMOVDQU32rrkz VK16WM:$mask, VR512:$src)>;
+
+def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
+ (v16i32 VR512:$src))),
+ (VMOVDQU32rrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
+
def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
(v16f32 VR512:$src2))),
(VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
@@ -2118,24 +2140,34 @@ def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
SDNode OpNode, ValueType vt> {
- def rr : AVX5128I<opc, MRMSrcReg,
+ def rr : AVX512PI<opc, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))]>, EVEX_4V;
- def rm : AVX5128I<opc, MRMSrcMem,
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
+ SSEPackedInt>, EVEX_4V;
+ def rm : AVX512PI<opc, MRMSrcMem,
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode (vt RC:$src1),
- (bitconvert (memop_frag addr:$src2))))]>, EVEX_4V;
+ (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
}
defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
- memopv16i32, X86testm, v16i32>, EVEX_V512,
+ memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
EVEX_CD8<32, CD8VF>;
defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
- memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W,
+ memopv8i64, X86testm, v8i64>, T8XS, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
+let Predicates = [HasCDI] in {
+defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
+ memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
+ memopv8i64, X86testnm, v8i64>, T8PD, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+}
+
def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
(v16i32 VR512:$src2), (i16 -1))),
(COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
@@ -2997,35 +3029,41 @@ let Predicates = [HasAVX512] in {
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
-multiclass avx512_f16c_ph2ps<RegisterClass destRC, RegisterClass srcRC,
- X86MemOperand x86memop, Intrinsic Int> {
+multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
+ X86MemOperand x86memop> {
def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
- [(set destRC:$dst, (Int srcRC:$src))]>, EVEX;
+ []>, EVEX;
let hasSideEffects = 0, mayLoad = 1 in
def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
}
-multiclass avx512_f16c_ps2ph<RegisterClass destRC, RegisterClass srcRC,
- X86MemOperand x86memop, Intrinsic Int> {
+multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
+ X86MemOperand x86memop> {
def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
(ins srcRC:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set destRC:$dst, (Int srcRC:$src1, imm:$src2))]>, EVEX;
+ "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX;
let hasSideEffects = 0, mayStore = 1 in
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
+ "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
}
-defm VCVTPH2PSZ : avx512_f16c_ph2ps<VR512, VR256X, f256mem,
- int_x86_avx512_vcvtph2ps_512>, EVEX_V512,
+defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
-defm VCVTPS2PHZ : avx512_f16c_ps2ph<VR256X, VR512, f256mem,
- int_x86_avx512_vcvtps2ph_512>, EVEX_V512,
+defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
+def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
+ imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
+ (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
+
+def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
+ (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
+ (VCVTPH2PSZrr VR256X:$src)>;
+
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
"ucomiss">, TB, EVEX, VEX_LIG,