From f58e4144054b85e855c57c86eb058a6bb1907552 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 14 Nov 2013 11:29:27 +0000 Subject: AVX-512: Handled extractelement from mask vector; Added VMOSHDUP/VMOVSLDUP shuffle instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194691 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 43 +++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrAVX512.td | 33 ++++++++++++++++++++++++ lib/Target/X86/X86InstrInfo.cpp | 6 +++-- test/CodeGen/X86/avx512-insert-extract.ll | 25 ++++++++++++++++++ test/CodeGen/X86/avx512-shuffle.ll | 8 ++++++ 5 files changed, 113 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6df0fd880f..a878ea82ea 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16323,6 +16323,44 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EltNo); } +/// Extract one bit from mask vector, like v16i1 or v8i1. +/// AVX-512 feature. +static SDValue ExtractBitFromMaskVector(SDNode *N, SelectionDAG &DAG) { + SDValue Vec = N->getOperand(0); + SDLoc dl(Vec); + MVT VecVT = Vec.getSimpleValueType(); + SDValue Idx = N->getOperand(1); + MVT EltVT = N->getSimpleValueType(0); + + assert((VecVT.getVectorElementType() == MVT::i1 && EltVT == MVT::i8) || + "Unexpected operands in ExtractBitFromMaskVector"); + + // variable index + if (!isa(Idx)) { + MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32); + SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + ExtVT.getVectorElementType(), Ext); + return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); + } + + unsigned IdxVal = cast(Idx)->getZExtValue(); + + MVT ScalarVT = MVT::getIntegerVT(VecVT.getSizeInBits()); + unsigned MaxShift = VecVT.getSizeInBits() - 1; + Vec = DAG.getNode(ISD::BITCAST, dl, ScalarVT, Vec); + Vec = DAG.getNode(ISD::SHL, dl, ScalarVT, Vec, + DAG.getConstant(MaxShift - IdxVal, ScalarVT)); + Vec = DAG.getNode(ISD::SRL, dl, ScalarVT, Vec, + DAG.getConstant(MaxShift, ScalarVT)); + + if (VecVT == MVT::v16i1) { + Vec = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Vec); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Vec); + } + return DAG.getNode(ISD::BITCAST, dl, MVT::i8, Vec); +} + /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index /// generation and convert it from being a bunch of shuffles and extracts /// to a simple store and scalar loads to extract the elements. @@ -16333,6 +16371,11 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return NewOp; SDValue InputVector = N->getOperand(0); + + if (InputVector.getValueType().getVectorElementType() == MVT::i1 && + !DCI.isBeforeLegalize()) + return ExtractBitFromMaskVector(N, DAG); + // Detect whether we are trying to convert from mmx to i32 and the bitcast // from mmx to v2i32 has a single usage. if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST && diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 8935f90ac2..cb19fbd563 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2075,6 +2075,38 @@ defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>, def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))), (VMOVDDUPZrm addr:$src)>; +//===---------------------------------------------------------------------===// +// Replicate Single FP - MOVSHDUP and MOVSLDUP +//===---------------------------------------------------------------------===// +multiclass avx512_replicate_sfp op, SDNode OpNode, string OpcodeStr, + ValueType vt, RegisterClass RC, PatFrag mem_frag, + X86MemOperand x86memop> { + def rr : AVX512XSI, EVEX; + let mayLoad = 1 in + def rm : AVX512XSI, EVEX; +} + +defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup", + v16f32, VR512, memopv16f32, f512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup", + v16f32, VR512, memopv16f32, f512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + +def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>; +def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))), + (VMOVSHDUPZrm addr:$src)>; +def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>; +def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))), + (VMOVSLDUPZrm addr:$src)>; + +//===----------------------------------------------------------------------===// +// Move Low to High and High to Low packed FP Instructions +//===----------------------------------------------------------------------===// def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -3385,6 +3417,7 @@ multiclass avx512_alignr, EVEX_4V; + let mayLoad = 1 in def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), !strconcat(OpcodeStr, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2351cffdaa..0a668a8991 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3165,7 +3165,8 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); return load ? X86::LD_Fp80m : X86::ST_FpP80m; case 16: { - assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass"); + assert((X86::VR128RegClass.hasSubClassEq(RC) || + X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass"); // If stack is realigned we can use aligned stores. if (isStackAligned) return load ? @@ -3177,7 +3178,8 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); } case 32: - assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); + assert((X86::VR256RegClass.hasSubClassEq(RC) || + X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass"); // If stack is realigned we can use aligned stores. if (isStackAligned) return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr; diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index 530b92e484..3f067401ed 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -98,3 +98,28 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { ret i32 %e } +;CHECK-LABEL: test11 +;CHECK: movl $260 +;CHECK: bextrl +;CHECK: movl $268 +;CHECK: bextrl +;CHECK: ret +define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { + %cmp_res = icmp ult <16 x i32> %a, %b + %ia = extractelement <16 x i1> %cmp_res, i32 4 + %ib = extractelement <16 x i1> %cmp_res, i32 12 + + br i1 %ia, label %A, label %B + + A: + ret <16 x i32>%b + B: + %c = add <16 x i32>%b, %a + br i1 %ib, label %C, label %D + C: + %c1 = sub <16 x i32>%c, %a + ret <16 x i32>%c1 + D: + %c2 = mul <16 x i32>%c, %a + ret <16 x i32>%c2 +} diff --git a/test/CodeGen/X86/avx512-shuffle.ll b/test/CodeGen/X86/avx512-shuffle.ll index f9186b643f..c9e0c2b992 100644 --- a/test/CodeGen/X86/avx512-shuffle.ll +++ b/test/CodeGen/X86/avx512-shuffle.ll @@ -215,4 +215,12 @@ define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind { define <16 x i32> @test25(<16 x i32> %a, <16 x i32> %b) nounwind { %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> ret <16 x i32> %c +} + +; CHECK-LABEL: @test26 +; CHECK: vmovshdup +; CHECK: ret +define <16 x i32> @test26(<16 x i32> %a) nounwind { + %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> + ret <16 x i32> %c } \ No newline at end of file -- cgit v1.2.3