diff options
Diffstat (limited to 'lib/Target/X86/X86InstrSSE.td')
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 253 |
1 files changed, 252 insertions, 1 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b5eea45780..f30a0c4699 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4008,6 +4008,23 @@ def mi : Ii8<0x70, MRMSrcMem, (bc_frag (memopv2i64 addr:$src1)), (undef))))]>; } + +multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, PatFrag pshuf_frag, + PatFrag bc_frag> { +def Yri : Ii8<0x70, MRMSrcReg, + (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (vt (pshuf_frag:$src2 VR256:$src1, + (undef))))]>; +def Ymi : Ii8<0x70, MRMSrcMem, + (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (vt (pshuf_frag:$src2 + (bc_frag (memopv4i64 addr:$src1)), + (undef))))]>; +} } // ExeDomain = SSEPackedInt let Predicates = [HasAVX] in { @@ -4052,6 +4069,20 @@ let Predicates = [HasAVX] in { (VPSHUFLWmi addr:$src, imm:$imm)>; } +let Predicates = [HasAVX2] in { + let AddedComplexity = 5 in + defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, pshufd, bc_v8i32>, TB, + OpSize, VEX; + + // SSE2 with ImmT == Imm8 and XS prefix. + defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, pshufhw, bc_v16i16>, XS, + VEX; + + // SSE2 with ImmT == Imm8 and XD prefix. + defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, pshuflw, bc_v16i16>, XD, + VEX; +} + let Predicates = [HasSSE2] in { let AddedComplexity = 5 in defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize; @@ -4114,6 +4145,19 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, addr:$src2))))]>; } +multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, + SDNode OpNode, PatFrag bc_frag> { + def Yrr : PDI<opc, MRMSrcReg, + (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>; + def Yrm : PDI<opc, MRMSrcMem, + (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), + !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (OpNode VR256:$src1, + (bc_frag (memopv4i64 addr:$src2))))]>; +} + let Predicates = [HasAVX] in { defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw, bc_v16i8, 0>, VEX_4V; @@ -4156,6 +4200,48 @@ let Predicates = [HasAVX] in { (memopv2i64 addr:$src2))))]>, VEX_4V; } +let Predicates = [HasAVX2] in { + defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw, + bc_v32i8>, VEX_4V; + defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd, + bc_v16i16>, VEX_4V; + defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq, + bc_v8i32>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg, + (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, + VR256:$src2)))]>, VEX_4V; + def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem, + (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, + (memopv4i64 addr:$src2))))]>, VEX_4V; + + defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, + bc_v32i8>, VEX_4V; + defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd, + bc_v16i16>, VEX_4V; + defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq, + bc_v8i32>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg, + (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, + VR256:$src2)))]>, VEX_4V; + def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem, + (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, + (memopv4i64 addr:$src2))))]>, VEX_4V; +} + let Constraints = "$src1 = $dst" in { defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>; defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>; @@ -4266,6 +4352,15 @@ def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX; def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX; + +let Predicates = [HasAVX2] in { +def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX; +def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX; +} + def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; @@ -5016,6 +5111,23 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (mem_frag128 addr:$src))))]>, OpSize; } +/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. +multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, + PatFrag mem_frag256, Intrinsic IntId256> { + def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (IntId256 VR256:$src))]>, + OpSize; + + def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), + (ins i256mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, + (IntId256 + (bitconvert (mem_frag256 addr:$src))))]>, OpSize; +} + let Predicates = [HasAVX] in { defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8, int_x86_ssse3_pabs_b_128>, VEX; @@ -5025,6 +5137,15 @@ let Predicates = [HasAVX] in { int_x86_ssse3_pabs_d_128>, VEX; } +let Predicates = [HasAVX2] in { + defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", memopv32i8, + int_x86_avx2_pabs_b>, VEX; + defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", memopv16i16, + int_x86_avx2_pabs_w>, VEX; + defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", memopv8i32, + int_x86_avx2_pabs_d>, VEX; +} + defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8, int_x86_ssse3_pabs_b_128>; defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16, @@ -5055,7 +5176,23 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; + (bitconvert (mem_frag128 addr:$src2))))]>, OpSize; +} + +multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, + PatFrag mem_frag256, Intrinsic IntId256> { + let isCommutable = 1 in + def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, + OpSize; + def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, i256mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (IntId256 VR256:$src1, + (bitconvert (mem_frag256 addr:$src2))))]>, OpSize; } let ImmT = NoImm, Predicates = [HasAVX] in { @@ -5087,6 +5224,35 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16, int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; } +let ImmT = NoImm, Predicates = [HasAVX2] in { +let isCommutable = 0 in { + defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", memopv16i16, + int_x86_avx2_phadd_w>, VEX_4V; + defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", memopv8i32, + int_x86_avx2_phadd_d>, VEX_4V; + defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", memopv16i16, + int_x86_avx2_phadd_sw>, VEX_4V; + defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", memopv16i16, + int_x86_avx2_phsub_w>, VEX_4V; + defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", memopv8i32, + int_x86_avx2_phsub_d>, VEX_4V; + defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", memopv16i16, + int_x86_avx2_phsub_sw>, VEX_4V; + defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", memopv32i8, + int_x86_avx2_pmadd_ub_sw>, VEX_4V; + defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8, + int_x86_avx2_pshuf_b>, VEX_4V; + defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv16i8, + int_x86_avx2_psign_b>, VEX_4V; + defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv8i16, + int_x86_avx2_psign_w>, VEX_4V; + defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv4i32, + int_x86_avx2_psign_d>, VEX_4V; +} +defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16, + int_x86_avx2_pmul_hr_sw>, VEX_4V; +} + // None of these have i8 immediate fields. let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { @@ -5166,8 +5332,23 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { []>, OpSize; } +multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> { + def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, i8imm:$src3), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; + def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, i256mem:$src2, i8imm:$src3), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; +} + let Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; +let Predicates = [HasAVX2] in + defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V; let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in defm PALIGN : ssse3_palign<"palignr">; @@ -5235,6 +5416,17 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr, + Intrinsic IntId> { + def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (IntId VR128:$src))]>, OpSize; + + def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize; +} + let Predicates = [HasAVX] in { defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, VEX; @@ -5250,6 +5442,21 @@ defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, VEX; } +let Predicates = [HasAVX2] in { +defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw", + int_x86_avx2_pmovsxbw>, VEX; +defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd", + int_x86_avx2_pmovsxwd>, VEX; +defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq", + int_x86_avx2_pmovsxdq>, VEX; +defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw", + int_x86_avx2_pmovzxbw>, VEX; +defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd", + int_x86_avx2_pmovzxwd>, VEX; +defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq", + int_x86_avx2_pmovzxdq>, VEX; +} + defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; @@ -5336,6 +5543,19 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr, + Intrinsic IntId> { + def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (IntId VR128:$src))]>, OpSize; + + def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, + (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>, + OpSize; +} + let Predicates = [HasAVX] in { defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, VEX; @@ -5347,6 +5567,17 @@ defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>, VEX; } +let Predicates = [HasAVX2] in { +defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd", + int_x86_avx2_pmovsxbd>, VEX; +defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq", + int_x86_avx2_pmovsxwq>, VEX; +defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd", + int_x86_avx2_pmovzxbd>, VEX; +defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq", + int_x86_avx2_pmovzxwq>, VEX; +} + defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; @@ -5391,12 +5622,32 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr, + Intrinsic IntId> { + def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (IntId VR128:$src))]>, OpSize; + + // Expecting a i16 load any extended to i32 value. + def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (IntId (bitconvert + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>, + OpSize; +} + let Predicates = [HasAVX] in { defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, VEX; defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, VEX; } +let Predicates = [HasAVX2] in { +defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq", + int_x86_avx2_pmovsxbq>, VEX; +defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", + int_x86_avx2_pmovzxbq>, VEX; +} defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; |