diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-08-21 09:36:02 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-08-21 09:36:02 +0000 |
commit | 8ba76daba09e79b10c4aad8f4298433c6dafa6d5 (patch) | |
tree | 20b39696030c95fe2c24f64f8a418892327da20a /lib/Target/X86 | |
parent | df40f8e8ad0aa93defa44b8a136e8d871cfd44ea (diff) | |
download | llvm-8ba76daba09e79b10c4aad8f4298433c6dafa6d5.tar.gz llvm-8ba76daba09e79b10c4aad8f4298433c6dafa6d5.tar.bz2 llvm-8ba76daba09e79b10c4aad8f4298433c6dafa6d5.tar.xz |
AVX-512: Added SHIFT instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188899 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 31 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 9 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 141 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 3 |
4 files changed, 178 insertions, 6 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3c3f09f0fe..6e9ecef74d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11269,6 +11269,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + case Intrinsic::x86_avx512_kortestz: + case Intrinsic::x86_avx512_kortestc: { + unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz)? X86::COND_E: X86::COND_B; + SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1)); + SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2)); + SDValue CC = DAG.getConstant(X86CC, MVT::i8); + SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } // SSE/AVX shift intrinsics case Intrinsic::x86_sse2_psll_w: @@ -12135,7 +12145,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || (Subtarget->hasInt256() && - (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) { + (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16)) || + (Subtarget->hasAVX512() && + (VT == MVT::v8i64 || VT == MVT::v16i32))) { if (Op.getOpcode() == ISD::SHL) return DAG.getNode(X86ISD::VSHLI, dl, VT, R, DAG.getConstant(ShiftAmt, MVT::i32)); @@ -12297,7 +12309,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, VT == MVT::v4i32 || VT == MVT::v8i16 || (Subtarget->hasInt256() && ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) || - VT == MVT::v8i32 || VT == MVT::v16i16))) { + VT == MVT::v8i32 || VT == MVT::v16i16)) || + (Subtarget->hasAVX512() && (VT == MVT::v8i64 || VT == MVT::v16i32))) { SDValue BaseShAmt; EVT EltVT = VT.getVectorElementType(); @@ -12365,6 +12378,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, case MVT::v4i64: case MVT::v8i32: case MVT::v16i16: + case MVT::v16i32: + case MVT::v8i64: return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG); } case ISD::SRA: @@ -12374,6 +12389,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, case MVT::v8i16: case MVT::v8i32: case MVT::v16i16: + case MVT::v16i32: + case MVT::v8i64: return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG); } case ISD::SRL: @@ -12385,6 +12402,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, case MVT::v4i64: case MVT::v8i32: case MVT::v16i16: + case MVT::v16i32: + case MVT::v8i64: return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG); } } @@ -12393,7 +12412,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, // Special case in 32-bit mode, where i64 is expanded into high and low parts. if (!Subtarget->is64Bit() && - (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64) || + (Subtarget->hasAVX512() && VT == MVT::v8i64)) && Amt.getOpcode() == ISD::BITCAST && Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { Amt = Amt.getOperand(0); @@ -12442,6 +12462,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, if (V.getNode()) return V; + if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64)) + return Op; // AVX2 has VPSLLV/VPSRAV/VPSRLV. if (Subtarget->hasInt256()) { if (Op.getOpcode() == ISD::SRL && @@ -13350,6 +13372,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; + case X86ISD::TESTM: return "X86ISD::TESTM"; + case X86ISD::KORTEST: return "X86ISD::KORTEST"; + case X86ISD::KTEST: return "X86ISD::KTEST"; case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index eafe027a55..40b2a9ce76 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -274,7 +274,7 @@ namespace llvm { // PCMP* - Vector integer comparisons. PCMPEQ, PCMPGT, - // PCMP*M - Vector integer comparisons, the result is in a mask vector + // PCMP*M - Vector integer comparisons, the result is in a mask vector. PCMPEQM, PCMPGTM, /// CMPM, CMPMU - Vector comparison generating mask bits for fp and @@ -295,12 +295,15 @@ namespace llvm { // MUL_IMM - X86 specific multiply by immediate. MUL_IMM, - // PTEST - Vector bitwise comparisons + // PTEST - Vector bitwise comparisons. PTEST, - // TESTP - Vector packed fp sign bitwise comparisons + // TESTP - Vector packed fp sign bitwise comparisons. TESTP, + // TESTM - Vector "test" in AVX-512, the result is in a mask vector. + TESTM, + // OR/AND test for masks KORTEST, KTEST, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index ccbd18edde..c3fb8019cc 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1691,3 +1691,144 @@ defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem, SSE_ALU_ITINS_P.d, 0>, EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; +//===----------------------------------------------------------------------===// +// AVX-512 VPTESTM instructions +//===----------------------------------------------------------------------===// + +multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, + SDNode OpNode, ValueType vt> { + def rr : AVX5128I<opc, MRMSrcReg, + (outs KRC:$dst), (ins RC:$src1, RC:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))]>, EVEX_4V; + def rm : AVX5128I<opc, MRMSrcMem, + (outs KRC:$dst), (ins RC:$src1, x86memop:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set KRC:$dst, (OpNode (vt RC:$src1), + (bitconvert (memop_frag addr:$src2))))]>, EVEX_4V; +} + +defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem, + memopv16i32, X86testm, v16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, memopv8i64, + X86testm, v8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 Shift instructions +//===----------------------------------------------------------------------===// +multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, + SDNode OpNode, RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag> { + def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst), + (ins RC:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (vt (OpNode RC:$src1, (i32 imm:$src2))))], + SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V; + def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst), + (ins x86memop:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (OpNode (mem_frag addr:$src1), + (i32 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V; +} + +multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, ValueType SrcVT, + PatFrag bc_frag> { + // src2 is always 128-bit + def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, VR128X:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))], + SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V; + def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, i128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (vt (OpNode RC:$src1, + (bc_frag (memopv2i64 addr:$src2)))))], + SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V; +} + +defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli, + VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl, + VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli, + VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl, + VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli, + VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl, + VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli, + VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl, + VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai, + VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra, + VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai, + VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra, + VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +//===-------------------------------------------------------------------===// +// Variable Bit Shifts +//===-------------------------------------------------------------------===// +multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag> { + def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, + (vt (OpNode RC:$src1, (vt RC:$src2))))]>, + EVEX_4V; + def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, + (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>, + EVEX_4V; +} + +defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 9f1c999cdd..b23da040d9 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -149,6 +149,9 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>; +def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisVec<1>, + SDTCisSameAs<2, 1>]>>; def X86pmuludq : SDNode<"X86ISD::PMULUDQ", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, |