summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrAVX512.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86InstrAVX512.td')
-rw-r--r--lib/Target/X86/X86InstrAVX512.td93
1 files changed, 85 insertions, 8 deletions
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 4c114e2a40..2a40533271 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -683,6 +683,42 @@ def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
}
+//===----------------------------------------------------------------------===//
+// Compare Instructions
+//===----------------------------------------------------------------------===//
+
+// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
+multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+ Operand CC, SDNode OpNode, ValueType VT,
+ PatFrag ld_frag, string asm, string asm_alt> {
+ def rr : AVX512Ii8<0xC2, MRMSrcReg,
+ (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
+ IIC_SSE_ALU_F32S_RR>, EVEX_4V;
+ def rm : AVX512Ii8<0xC2, MRMSrcMem,
+ (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ [(set VK1:$dst, (OpNode (VT RC:$src1),
+ (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ let neverHasSideEffects = 1 in {
+ def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
+ (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
+ def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
+ (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ }
+}
+
+let Predicates = [HasAVX512] in {
+defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
+ "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ XS;
+defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
+ "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ XD, VEX_W;
+}
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, RegisterClass KRC,
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
@@ -734,10 +770,10 @@ multiclass avx512_icmp_cc<bits<8> opc, RegisterClass KRC,
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rri_alt : AVX512AIi8<opc, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
}
}
@@ -864,8 +900,14 @@ let Predicates = [HasAVX512] in {
def : Pat<(store (v16i1 VK16:$src), addr:$dst),
(KMOVWmk addr:$dst, VK16:$src)>;
- def : Pat<(store (v8i1 VK8:$src), addr:$dst),
- (KMOVWmk addr:$dst, (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16)))>;
+ def : Pat<(store VK8:$src, addr:$dst),
+ (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
+
+ def : Pat<(i1 (load addr:$src)),
+ (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
+
+ def : Pat<(v8i1 (load addr:$src)),
+ (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
}
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
let Predicates = [HasAVX512] in {
@@ -878,6 +920,12 @@ let Predicates = [HasAVX512] in {
(EXTRACT_SUBREG
(KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
sub_8bit)>;
+
+ def : Pat<(i1 (extractelt VK16:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK16:$src, VK1)>;
+ def : Pat<(i1 (extractelt VK8:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK8:$src, VK1)>;
+
}
// Mask unary operation
@@ -945,6 +993,19 @@ let isCommutable = 1 in {
defm KXOR : avx512_mask_binop_w<0x47, "kxor", xor>;
}
+def : Pat<(xor VK1:$src1, VK1:$src2),
+ (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
+ (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
+
+def : Pat<(or VK1:$src1, VK1:$src2),
+ (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
+ (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
+
+def : Pat<(not VK1:$src),
+ (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src, VK16),
+ (COPY_TO_REGCLASS (VCMPSSZrr (f32 (IMPLICIT_DEF)),
+ (f32 (IMPLICIT_DEF)), (i8 0)), VK16)), VK1)>;
+
multiclass avx512_mask_binop_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in
def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
@@ -1016,7 +1077,10 @@ multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
}
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
-defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest>;
+
+def : Pat<(X86cmp VK1:$src1, VK1:$src2),
+ (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
+ (COPY_TO_REGCLASS VK1:$src2, VK16))>;
// Mask shift
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
@@ -1034,8 +1098,8 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
VEX, OpSize, TA, VEX_W;
}
-defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", shl>;
-defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", srl>;
+defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
+defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
// Mask setting all 0s or 1s
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
@@ -1046,7 +1110,7 @@ multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
}
multiclass avx512_mask_setop_w<PatFrag Val> {
- defm B : avx512_mask_setop<VK8, v8i1, Val>;
+ defm B : avx512_mask_setop<VK8, v8i1, Val>;
defm W : avx512_mask_setop<VK16, v16i1, Val>;
}
@@ -1341,6 +1405,12 @@ multiclass avx512_move_scalar <string asm, RegisterClass RC,
[(set VR128X:$dst, (vt (OpNode VR128X:$src1,
(scalar_to_vector RC:$src2))))],
IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
+ let Constraints = "$src1 = $dst" in
+ def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
+ !strconcat(asm,
+ "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
+ [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
@@ -1359,6 +1429,13 @@ let ExeDomain = SSEPackedDouble in
defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
+ (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
+ VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
+
+def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
+ (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
+ VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
// For the disassembler
let isCodeGenOnly = 1 in {