summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2013-08-28 11:21:58 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2013-08-28 11:21:58 +0000
commit2fb982aa720ec1ef149b2d9add2673c313f08792 (patch)
tree471d4569e2e7c4eca9d6f42c2507094857bd81fa /lib
parentabbcf3bd47ad8ffa70f48ebd924f99fff5c22131 (diff)
downloadllvm-2fb982aa720ec1ef149b2d9add2673c313f08792.tar.gz
llvm-2fb982aa720ec1ef149b2d9add2673c313f08792.tar.bz2
llvm-2fb982aa720ec1ef149b2d9add2673c313f08792.tar.xz
AVX-512: added SQRT, VRSQRT14, VCOMISS, VUCOMISS, VRCP14, VPABS
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189472 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86InstrAVX512.td443
-rw-r--r--lib/Target/X86/X86InstrSSE.td46
2 files changed, 466 insertions, 23 deletions
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 90eb7d9189..95b0de41fa 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -2378,6 +2378,433 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;
}
+
+let Defs = [EFLAGS], Predicates = [HasAVX512] in {
+ defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
+ "ucomiss{z}">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
+ "ucomisd{z}">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ let Pattern = []<dag> in {
+ defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
+ "comiss{z}">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
+ "comisd{z}">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ }
+ defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
+ load, "ucomiss">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
+ load, "ucomisd">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+ defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
+ load, "comiss">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
+ load, "comisd">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+}
+
+/// avx512_unop_p - AVX-512 unops in packed form.
+multiclass avx512_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PSZr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))]>,
+ EVEX, EVEX_V512;
+ def PSZm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ def PDZr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))]>,
+ EVEX, EVEX_V512, VEX_W;
+ def PDZm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+/// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms.
+multiclass avx512_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V16F32Int, Intrinsic V8F64Int> {
+ def PSZr_Int : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V16F32Int VR512:$src))]>,
+ EVEX, EVEX_V512;
+ def PSZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ def PDZr_Int : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V8F64Int VR512:$src))]>,
+ EVEX, EVEX_V512, VEX_W;
+ def PDZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (V8F64Int (memopv8f64 addr:$src)))]>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+/// avx512_fp_unop_s - AVX-512 unops in scalar form.
+multiclass avx512_fp_unop_s<bits<8> opc, string OpcodeStr,
+ Intrinsic F32Int, Intrinsic F64Int> {
+ let hasSideEffects = 0 in {
+ def SSZr : AVX5128I<opc, MRMSrcReg, (outs FR32X:$dst),
+ (ins FR32X:$src1, FR32X:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V;
+ let mayLoad = 1 in {
+ def SSZm : AVX5128I<opc, MRMSrcMem, (outs FR32X:$dst),
+ (ins FR32X:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ def SSZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst, (F32Int VR128X:$src1, sse_load_f32:$src2))]>,
+ EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ }
+ def SDZr : AVX5128I<opc, MRMSrcReg, (outs FR64X:$dst),
+ (ins FR64X:$src1, FR64X:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ EVEX_4V, VEX_W;
+ let mayLoad = 1 in {
+ def SDZm : AVX5128I<opc, MRMSrcMem, (outs FR64X:$dst),
+ (ins FR64X:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>;
+ def SDZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst, (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
+ EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>;
+ }
+}
+}
+
+defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14", int_x86_avx512_rcp14_ss,
+ int_x86_avx512_rcp14_sd>,
+ avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>,
+ avx512_fp_unop_p_int<0x4C, "vrcp14",
+ int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>;
+
+defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14", int_x86_avx512_rsqrt14_ss,
+ int_x86_avx512_rsqrt14_sd>,
+ avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>,
+ avx512_fp_unop_p_int<0x4E, "vrsqrt14",
+ int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>;
+
+multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ Intrinsic V16F32Int, Intrinsic V8F64Int,
+ OpndItins itins_s, OpndItins itins_d> {
+ def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
+ EVEX, EVEX_V512;
+
+ let mayLoad = 1 in
+ def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
+ itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+ def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
+ EVEX, EVEX_V512;
+
+ let mayLoad = 1 in
+ def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (OpNode
+ (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
+ itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+ def PSZr_Int : AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V16F32Int VR512:$src))]>,
+ EVEX, EVEX_V512;
+ def PSZm_Int : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ def PDZr_Int : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V8F64Int VR512:$src))]>,
+ EVEX, EVEX_V512, VEX_W;
+ def PDZm_Int : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V8F64Int (memopv8f64 addr:$src)))]>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
+ Intrinsic F32Int, Intrinsic F64Int,
+ OpndItins itins_s, OpndItins itins_d> {
+ def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
+ (ins FR32X:$src1, FR32X:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [], itins_s.rr>, XS, EVEX_4V;
+ def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F32Int VR128X:$src1, VR128X:$src2))],
+ itins_s.rr>, XS, EVEX_4V;
+ let mayLoad = 1 in {
+ def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
+ (ins FR32X:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F32Int VR128X:$src1, sse_load_f32:$src2))],
+ itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ }
+ def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
+ (ins FR64X:$src1, FR64X:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ XD, EVEX_4V, VEX_W;
+ def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F64Int VR128X:$src1, VR128X:$src2))],
+ itins_s.rr>, XD, EVEX_4V, VEX_W;
+ let mayLoad = 1 in {
+ def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
+ (ins FR64X:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
+ def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
+ XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
+ }
+}
+
+
+defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
+ int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
+ SSE_SQRTSS, SSE_SQRTSD>,
+ avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
+ int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512,
+ SSE_SQRTPS, SSE_SQRTPD>;
+
+def : Pat<(f32 (fsqrt FR32X:$src)),
+ (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+def : Pat<(f32 (fsqrt (load addr:$src))),
+ (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+def : Pat<(f64 (fsqrt FR64X:$src)),
+ (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
+def : Pat<(f64 (fsqrt (load addr:$src))),
+ (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+
+def : Pat<(f32 (X86frsqrt FR32X:$src)),
+ (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+def : Pat<(f32 (X86frsqrt (load addr:$src))),
+ (VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+
+def : Pat<(f32 (X86frcp FR32X:$src)),
+ (VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+def : Pat<(f32 (X86frcp (load addr:$src))),
+ (VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+
+multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
+ PatFrag mem_frag32, PatFrag mem_frag64,
+ Intrinsic V4F32Int, Intrinsic V2F64Int,
+ CD8VForm VForm> {
+let ExeDomain = SSEPackedSingle in {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr : AVX512AIi8<opcps, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
+
+ // Vector intrinsic operation, mem
+ def PSm : AVX512AIi8<opcps, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
+ EVEX_CD8<32, VForm>;
+} // ExeDomain = SSEPackedSingle
+
+let ExeDomain = SSEPackedDouble in {
+ // Vector intrinsic operation, reg
+ def PDr : AVX512AIi8<opcpd, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
+
+ // Vector intrinsic operation, mem
+ def PDm : AVX512AIi8<opcpd, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
+ EVEX_CD8<64, VForm>;
+} // ExeDomain = SSEPackedDouble
+}
+
+multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int> {
+let ExeDomain = GenericDomain in {
+ // Operation, reg.
+ let hasSideEffects = 0 in
+ def SSr : AVX512AIi8<opcss, MRMSrcReg,
+ (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>;
+
+ // Intrinsic operation, reg.
+ def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
+ (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
+
+ // Intrinsic operation, mem.
+ def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst, (F32Int VR128X:$src1,
+ sse_load_f32:$src2, imm:$src3))]>,
+ EVEX_CD8<32, CD8VT1>;
+
+ // Operation, reg.
+ let hasSideEffects = 0 in
+ def SDr : AVX512AIi8<opcsd, MRMSrcReg,
+ (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_W;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
+ (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
+ VEX_W;
+
+ // Intrinsic operation, mem.
+ def SDm : AVX512AIi8<opcsd, MRMSrcMem,
+ (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst,
+ (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ VEX_W, EVEX_CD8<64, CD8VT1>;
+} // ExeDomain = GenericDomain
+}
+
+let Predicates = [HasAVX512] in {
+ defm VRNDSCALE : avx512_fp_binop_rm<0x0A, 0x0B, "vrndscale",
+ int_x86_avx512_rndscale_ss,
+ int_x86_avx512_rndscale_sd>, EVEX_4V;
+
+ defm VRNDSCALEZ : avx512_fp_unop_rm<0x08, 0x09, "vrndscale", f256mem, VR512,
+ memopv16f32, memopv8f64,
+ int_x86_avx512_rndscale_ps_512,
+ int_x86_avx512_rndscale_pd_512, CD8VF>,
+ EVEX, EVEX_V512;
+}
+
+def : Pat<(ffloor FR32X:$src),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
+def : Pat<(f64 (ffloor FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
+def : Pat<(f32 (fnearbyint FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
+def : Pat<(f64 (fnearbyint FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
+def : Pat<(f32 (fceil FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
+def : Pat<(f64 (fceil FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
+def : Pat<(f32 (frint FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
+def : Pat<(f64 (frint FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
+def : Pat<(f32 (ftrunc FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
+def : Pat<(f64 (ftrunc FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
+
+def : Pat<(v16f32 (ffloor VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x1))>;
+def : Pat<(v16f32 (fnearbyint VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0xC))>;
+def : Pat<(v16f32 (fceil VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x2))>;
+def : Pat<(v16f32 (frint VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x4))>;
+def : Pat<(v16f32 (ftrunc VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x3))>;
+
+def : Pat<(v8f64 (ffloor VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x1))>;
+def : Pat<(v8f64 (fnearbyint VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0xC))>;
+def : Pat<(v8f64 (fceil VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x2))>;
+def : Pat<(v8f64 (frint VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x4))>;
+def : Pat<(v8f64 (ftrunc VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x3))>;
+
//===----------------------------------------------------------------------===//
// VSHUFPS - VSHUFPD Operations
@@ -2433,3 +2860,19 @@ def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
(VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
+multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop> {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ EVEX;
+ def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
+ (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ EVEX;
+}
+
+defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2d7ac73bbe..b1cfbee635 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3322,30 +3322,30 @@ defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
int_x86_avx_rcp_ps_256, SSE_RCPP>;
-def : Pat<(f32 (fsqrt FR32:$src)),
- (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-def : Pat<(f32 (fsqrt (load addr:$src))),
- (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
-def : Pat<(f64 (fsqrt FR64:$src)),
- (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
-def : Pat<(f64 (fsqrt (load addr:$src))),
- (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
-
-def : Pat<(f32 (X86frsqrt FR32:$src)),
- (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-def : Pat<(f32 (X86frsqrt (load addr:$src))),
- (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
-
-def : Pat<(f32 (X86frcp FR32:$src)),
- (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-def : Pat<(f32 (X86frcp (load addr:$src))),
- (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
+let Predicates = [UseAVX] in {
+ def : Pat<(f32 (fsqrt FR32:$src)),
+ (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f32 (fsqrt (load addr:$src))),
+ (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+ def : Pat<(f64 (fsqrt FR64:$src)),
+ (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f64 (fsqrt (load addr:$src))),
+ (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+ def : Pat<(f32 (X86frsqrt FR32:$src)),
+ (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f32 (X86frsqrt (load addr:$src))),
+ (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+ def : Pat<(f32 (X86frcp FR32:$src)),
+ (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f32 (X86frcp (load addr:$src))),
+ (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
-let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
(COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS VR128:$src, FR32)),