diff options
author | Jiangning Liu <jiangning.liu@arm.com> | 2013-11-06 02:25:49 +0000 |
---|---|---|
committer | Jiangning Liu <jiangning.liu@arm.com> | 2013-11-06 02:25:49 +0000 |
commit | 258115258f8fe15e9d74b5fb524f90b75bb917d1 (patch) | |
tree | f7df585491dc8c3376135fb0e8d39db4dd0b643a /lib | |
parent | 10bb82e54fc0608e6220581bda0405af8f12d32f (diff) | |
download | llvm-258115258f8fe15e9d74b5fb524f90b75bb917d1.tar.gz llvm-258115258f8fe15e9d74b5fb524f90b75bb917d1.tar.bz2 llvm-258115258f8fe15e9d74b5fb524f90b75bb917d1.tar.xz |
Implement AArch64 Neon instruction set Bitwise Extract.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194118 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 199 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.h | 3 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrFormats.td | 18 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 68 | ||||
-rw-r--r-- | lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp | 10 | ||||
-rw-r--r-- | lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h | 5 |
6 files changed, 203 insertions, 100 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 6765c3339d..ab46d7f7b3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -907,6 +907,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { return "AArch64ISD::NEON_ST3_UPD"; case AArch64ISD::NEON_ST4_UPD: return "AArch64ISD::NEON_ST4_UPD"; + case AArch64ISD::NEON_VEXTRACT: + return "AArch64ISD::NEON_VEXTRACT"; default: return NULL; } @@ -3797,7 +3799,7 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc dl(Op); @@ -3811,101 +3813,126 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, ArrayRef<int> ShuffleMask = SVN->getMask(); unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (EltSize <= 64) { - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { - int Lane = SVN->getSplatIndex(); - // If this is undef splat, generate it via "just" vdup, if possible. - if (Lane == -1) Lane = 0; - - // Test if V1 is a SCALAR_TO_VECTOR. - if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0)); - } - // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR. - if (V1.getOpcode() == ISD::BUILD_VECTOR) { - bool IsScalarToVector = true; - for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i) - if (V1.getOperand(i).getOpcode() != ISD::UNDEF && - i != (unsigned)Lane) { - IsScalarToVector = false; - break; - } - if (IsScalarToVector) - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, - V1.getOperand(Lane)); - } - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1, - DAG.getConstant(Lane, MVT::i64)); - } - // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert - // by element from V2 to V1 . - // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a - // better choice to be inserted than V1 as less insert needed, so we count - // element to be inserted for both V1 and V2, and select less one as insert - // target. - - // Collect elements need to be inserted and their index. - SmallVector<int, 8> NV1Elt; - SmallVector<int, 8> N1Index; - SmallVector<int, 8> NV2Elt; - SmallVector<int, 8> N2Index; - int Length = ShuffleMask.size(); - int V1EltNum = V1.getValueType().getVectorNumElements(); - for (int I = 0; I != Length; ++I) { - if (ShuffleMask[I] != I) { - NV1Elt.push_back(ShuffleMask[I]); - N1Index.push_back(I); - } + if (EltSize > 64) + return SDValue(); + + // If the element of shuffle mask are all the same constant, we can + // transform it into either NEON_VDUP or NEON_VDUPLANE + if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { + int Lane = SVN->getSplatIndex(); + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane == -1) Lane = 0; + + // Test if V1 is a SCALAR_TO_VECTOR. + if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0)); } - for (int I = 0; I != Length; ++I) { - if (ShuffleMask[I] != (I + V1EltNum)) { - NV2Elt.push_back(ShuffleMask[I]); - N2Index.push_back(I); - } + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR. + if (V1.getOpcode() == ISD::BUILD_VECTOR) { + bool IsScalarToVector = true; + for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF && + i != (unsigned)Lane) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, + V1.getOperand(Lane)); } + return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1, + DAG.getConstant(Lane, MVT::i64)); + } - // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2 - // will be inserted. - SDValue InsV = V1; - SmallVector<int, 8> InsMasks = NV1Elt; - SmallVector<int, 8> InsIndex = N1Index; - if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) { - if (NV1Elt.size() > NV2Elt.size()) { - InsV = V2; - InsMasks = NV2Elt; - InsIndex = N2Index; + int Length = ShuffleMask.size(); + int V1EltNum = V1.getValueType().getVectorNumElements(); + + // If the number of v1 elements is the same as the number of shuffle mask + // element and the shuffle masks are sequential values, we can transform + // it into NEON_VEXTRACT. + if (V1EltNum == Length) { + // Check if the shuffle mask is sequential. + bool IsSequential = true; + int CurMask = ShuffleMask[0]; + for (int I = 0; I < Length; ++I) { + if (ShuffleMask[I] != CurMask) { + IsSequential = false; + break; } - } else { - InsV = DAG.getNode(ISD::UNDEF, dl, VT); + CurMask++; } + if (IsSequential) { + assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect"); + unsigned VecSize = EltSize * V1EltNum; + unsigned Index = (EltSize/8) * ShuffleMask[0]; + if (VecSize == 64 || VecSize == 128) + return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2, + DAG.getConstant(Index, MVT::i64)); + } + } - SDValue PassN; + // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert + // by element from V2 to V1 . + // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a + // better choice to be inserted than V1 as less insert needed, so we count + // element to be inserted for both V1 and V2, and select less one as insert + // target. + + // Collect elements need to be inserted and their index. + SmallVector<int, 8> NV1Elt; + SmallVector<int, 8> N1Index; + SmallVector<int, 8> NV2Elt; + SmallVector<int, 8> N2Index; + for (int I = 0; I != Length; ++I) { + if (ShuffleMask[I] != I) { + NV1Elt.push_back(ShuffleMask[I]); + N1Index.push_back(I); + } + } + for (int I = 0; I != Length; ++I) { + if (ShuffleMask[I] != (I + V1EltNum)) { + NV2Elt.push_back(ShuffleMask[I]); + N2Index.push_back(I); + } + } - for (int I = 0, E = InsMasks.size(); I != E; ++I) { - SDValue ExtV = V1; - int Mask = InsMasks[I]; - if (Mask > V1EltNum) { - ExtV = V2; - Mask -= V1EltNum; - } - // Any value type smaller than i32 is illegal in AArch64, and this lower - // function is called after legalize pass, so we need to legalize - // the result here. - EVT EltVT; - if (VT.getVectorElementType().isFloatingPoint()) - EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32; - else - EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32; - - PassN = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV, - DAG.getConstant(Mask, MVT::i64)); - PassN = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, PassN, - DAG.getConstant(InsIndex[I], MVT::i64)); + // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2 + // will be inserted. + SDValue InsV = V1; + SmallVector<int, 8> InsMasks = NV1Elt; + SmallVector<int, 8> InsIndex = N1Index; + if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) { + if (NV1Elt.size() > NV2Elt.size()) { + InsV = V2; + InsMasks = NV2Elt; + InsIndex = N2Index; } - return PassN; + } else { + InsV = DAG.getNode(ISD::UNDEF, dl, VT); } - return SDValue(); + for (int I = 0, E = InsMasks.size(); I != E; ++I) { + SDValue ExtV = V1; + int Mask = InsMasks[I]; + if (Mask >= V1EltNum) { + ExtV = V2; + Mask -= V1EltNum; + } + // Any value type smaller than i32 is illegal in AArch64, and this lower + // function is called after legalize pass, so we need to legalize + // the result here. + EVT EltVT; + if (VT.getVectorElementType().isFloatingPoint()) + EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32; + else + EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32; + + ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV, + DAG.getConstant(Mask, MVT::i64)); + InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV, + DAG.getConstant(InsIndex[I], MVT::i64)); + } + return InsV; } AArch64TargetLowering::ConstraintType diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 7effbfd66c..83fd79d6ba 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -144,6 +144,9 @@ namespace AArch64ISD { // Vector dup by lane NEON_VDUPLANE, + // Vector extract + NEON_VEXTRACT, + // NEON loads with post-increment base updates: NEON_LD1_UPD = ISD::FIRST_TARGET_MEMORY_OPCODE, NEON_LD2_UPD, diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index b3e114a01b..8a2142646e 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -983,6 +983,24 @@ class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit> { } +// Format AdvSIMD bitwise extract +class NeonI_BitExtract<bit q, bits<2> op2, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29-24} = 0b101110; + let Inst{23-22} = op2; + let Inst{21} = 0b0; + // Inherit Rm in 20-16 + let Inst{15} = 0b0; + // imm4 in 14-11 + let Inst{10} = 0b0; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + // Format AdvSIMD 3 vector registers with same vector type class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode, dag outs, dag ins, string asmstr, diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 4cb5da6b8a..8a78d14b8c 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -50,6 +50,9 @@ def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>; +def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3, + [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>; //===----------------------------------------------------------------------===// // Multiclasses @@ -1062,7 +1065,7 @@ def neon_uimm8_asmoperand : AsmOperandClass def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> { let ParserMatchClass = neon_uimm8_asmoperand; - let PrintMethod = "printNeonUImm8Operand"; + let PrintMethod = "printUImmHexOperand"; } def neon_uimm64_mask_asmoperand : AsmOperandClass @@ -4430,31 +4433,43 @@ def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; def neon_uimm0_bare : Operand<i64>, ImmLeaf<i64, [{return Imm == 0;}]> { let ParserMatchClass = neon_uimm0_asmoperand; - let PrintMethod = "printNeonUImm8OperandBare"; + let PrintMethod = "printUImmBareOperand"; } def neon_uimm1_bare : Operand<i64>, ImmLeaf<i64, [{(void)Imm; return true;}]> { let ParserMatchClass = neon_uimm1_asmoperand; - let PrintMethod = "printNeonUImm8OperandBare"; + let PrintMethod = "printUImmBareOperand"; } def neon_uimm2_bare : Operand<i64>, ImmLeaf<i64, [{(void)Imm; return true;}]> { let ParserMatchClass = neon_uimm2_asmoperand; - let PrintMethod = "printNeonUImm8OperandBare"; + let PrintMethod = "printUImmBareOperand"; } def neon_uimm3_bare : Operand<i64>, ImmLeaf<i64, [{(void)Imm; return true;}]> { let ParserMatchClass = uimm3_asmoperand; - let PrintMethod = "printNeonUImm8OperandBare"; + let PrintMethod = "printUImmBareOperand"; } def neon_uimm4_bare : Operand<i64>, ImmLeaf<i64, [{(void)Imm; return true;}]> { let ParserMatchClass = uimm4_asmoperand; - let PrintMethod = "printNeonUImm8OperandBare"; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm3 : Operand<i64>, + ImmLeaf<i64, [{(void)Imm; return true;}]> { + let ParserMatchClass = uimm3_asmoperand; + let PrintMethod = "printUImmHexOperand"; +} + +def neon_uimm4 : Operand<i64>, + ImmLeaf<i64, [{(void)Imm; return true;}]> { + let ParserMatchClass = uimm4_asmoperand; + let PrintMethod = "printUImmHexOperand"; } class NeonI_INS_main<string asmop, string Res, ValueType ResTy, @@ -4472,6 +4487,47 @@ class NeonI_INS_main<string asmop, string Res, ValueType ResTy, let Constraints = "$src = $Rd"; } +// Bitwise Extract +class NeonI_Extract<bit q, bits<2> op2, string asmop, + string OpS, RegisterOperand OpVPR, Operand OpImm> + : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd), + (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index), + asmop # "\t$Rd." # OpS # ", $Rn." # OpS # + ", $Rm." # OpS # ", $Index", + [], + NoItinerary>{ + bits<4> Index; +} + +def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b", + VPR64, neon_uimm3> { + let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}}; +} + +def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b", + VPR128, neon_uimm4> { + let Inst{14-11} = Index; +} + +class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST, + Operand OpImm> + : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm), + (i64 OpImm:$Imm))), + (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>; + +def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>; +def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>; +def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>; +def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>; +def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>; +def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>; +def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>; +def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>; +def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>; +def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>; +def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>; +def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>; + // The followings are for instruction class (3V Elem) // Variant 1 diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 51335e145b..c081691756 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -462,8 +462,8 @@ void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, o << "#0x0"; } -void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { +void AArch64InstPrinter::printUImmHexOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { const MCOperand &MOUImm = MI->getOperand(OpNum); assert(MOUImm.isImm() && @@ -475,9 +475,9 @@ void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, O.write_hex(Imm); } -void AArch64InstPrinter::printNeonUImm8OperandBare(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void AArch64InstPrinter::printUImmBareOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { const MCOperand &MOUImm = MI->getOperand(OpNum); assert(MOUImm.isImm() diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 28ebfc45f1..37b7273438 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -169,9 +169,8 @@ public: void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNeonUImm8OperandBare(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + void printUImmHexOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printUImmBareOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); |