diff options
Diffstat (limited to 'lib/Target/AArch64/AArch64ISelLowering.cpp')
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 198 |
1 files changed, 194 insertions, 4 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index d89213c80d..40ed8af551 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -297,15 +297,23 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); @@ -866,12 +874,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { return "AArch64ISD::NEON_CMPZ"; case AArch64ISD::NEON_TST: return "AArch64ISD::NEON_TST"; - case AArch64ISD::NEON_DUPIMM: - return "AArch64ISD::NEON_DUPIMM"; case AArch64ISD::NEON_QSHLs: return "AArch64ISD::NEON_QSHLs"; case AArch64ISD::NEON_QSHLu: return "AArch64ISD::NEON_QSHLu"; + case AArch64ISD::NEON_VDUP: + return "AArch64ISD::NEON_VDUP"; case AArch64ISD::NEON_VDUPLANE: return "AArch64ISD::NEON_VDUPLANE"; default: @@ -3342,7 +3350,7 @@ static SDValue PerformShiftCombine(SDNode *N, case ISD::SHL: if (isVShiftLImm(N->getOperand(1), VT, Cnt)) { SDValue RHS = - DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT, + DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT, DAG.getConstant(Cnt, MVT::i32)); return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS); } @@ -3352,7 +3360,7 @@ static SDValue PerformShiftCombine(SDNode *N, case ISD::SRL: if (isVShiftRImm(N->getOperand(1), VT, Cnt)) { SDValue RHS = - DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT, + DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT, DAG.getConstant(Cnt, MVT::i32)); return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS); } @@ -3492,6 +3500,107 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } } } + + unsigned NumElts = VT.getVectorNumElements(); + bool isOnlyLowElement = true; + bool usesOnlyOneValue = true; + bool hasDominantValue = false; + bool isConstant = true; + + // Map of the number of times a particular SDValue appears in the + // element list. + DenseMap<SDValue, unsigned> ValueCounts; + SDValue Value; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (i > 0) + isOnlyLowElement = false; + if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) + isConstant = false; + + ValueCounts.insert(std::make_pair(V, 0)); + unsigned &Count = ValueCounts[V]; + + // Is this value dominant? (takes up more than half of the lanes) + if (++Count > (NumElts / 2)) { + hasDominantValue = true; + Value = V; + } + } + if (ValueCounts.size() != 1) + usesOnlyOneValue = false; + if (!Value.getNode() && ValueCounts.size() > 0) + Value = ValueCounts.begin()->first; + + if (ValueCounts.size() == 0) + return DAG.getUNDEF(VT); + + // Loads are better lowered with insert_vector_elt. + // Keep going if we are hitting this case. + if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); + + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + // Use VDUP for non-constant splats. + if (hasDominantValue && EltSize <= 64) { + if (!isConstant) { + SDValue N; + + // If we are DUPing a value that comes directly from a vector, we could + // just use DUPLANE. We can only do this if the lane being extracted + // is at a constant index, as the DUP from lane instructions only have + // constant-index forms. + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(Value->getOperand(1))) { + N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, + Value->getOperand(0), Value->getOperand(1)); + } else + N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value); + + if (!usesOnlyOneValue) { + // The dominant value was splatted as 'N', but we now have to insert + // all differing elements. + for (unsigned I = 0; I < NumElts; ++I) { + if (Op.getOperand(I) == Value) + continue; + SmallVector<SDValue, 3> Ops; + Ops.push_back(N); + Ops.push_back(Op.getOperand(I)); + Ops.push_back(DAG.getConstant(I, MVT::i32)); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3); + } + } + return N; + } + if (usesOnlyOneValue && isConstant) { + return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value); + } + } + // If all elements are constants and the case above didn't get hit, fall back + // to the default expansion, which will generate a load from the constant + // pool. + if (isConstant) + return SDValue(); + + // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we + // know the default expansion would otherwise fall back on something even + // worse. For a vector with one or two non-undef values, that's + // scalar_to_vector for the elements followed by a shuffle (provided the + // shuffle is valid for the target) and materialization element by element + // on the stack followed by a load for everything else. + if (!isConstant && !usesOnlyOneValue) { + SDValue Vec = DAG.getUNDEF(VT); + for (unsigned i = 0 ; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + SDValue LaneIdx = DAG.getConstant(i, MVT::i32); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx); + } + return Vec; + } return SDValue(); } @@ -3499,6 +3608,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); @@ -3516,9 +3626,89 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; + // Test if V1 is a SCALAR_TO_VECTOR. + if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0)); + } + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR. + if (V1.getOpcode() == ISD::BUILD_VECTOR) { + bool IsScalarToVector = true; + for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF && + i != (unsigned)Lane) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, + V1.getOperand(Lane)); + } return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, MVT::i64)); } + // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert + // by element from V2 to V1 . + // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a + // better choice to be inserted than V1 as less insert needed, so we count + // element to be inserted for both V1 and V2, and select less one as insert + // target. + + // Collect elements need to be inserted and their index. + SmallVector<int, 8> NV1Elt; + SmallVector<int, 8> N1Index; + SmallVector<int, 8> NV2Elt; + SmallVector<int, 8> N2Index; + int Length = ShuffleMask.size(); + int V1EltNum = V1.getValueType().getVectorNumElements(); + for (int I = 0; I != Length; ++I) { + if (ShuffleMask[I] != I) { + NV1Elt.push_back(ShuffleMask[I]); + N1Index.push_back(I); + } + } + for (int I = 0; I != Length; ++I) { + if (ShuffleMask[I] != (I + V1EltNum)) { + NV2Elt.push_back(ShuffleMask[I]); + N2Index.push_back(I); + } + } + + // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2 + // will be inserted. + SDValue InsV = V1; + SmallVector<int, 8> InsMasks = NV1Elt; + SmallVector<int, 8> InsIndex = N1Index; + if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) { + if (NV1Elt.size() > NV2Elt.size()) { + InsV = V2; + InsMasks = NV2Elt; + InsIndex = N2Index; + } + } else { + InsV = DAG.getNode(ISD::UNDEF, dl, VT); + } + + SDValue PassN; + + for (int I = 0, E = InsMasks.size(); I != E; ++I) { + SDValue ExtV = V1; + int Mask = InsMasks[I]; + if (Mask > V1EltNum) { + ExtV = V2; + Mask -= V1EltNum; + } + // Any value type smaller than i32 is illegal in AArch64, and this lower + // function is called after legalize pass, so we need to legalize + // the result here. + EVT EltVT = MVT::i32; + if(EltSize == 64) + EltVT = MVT::i64; + PassN = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV, + DAG.getConstant(Mask, MVT::i64)); + PassN = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, PassN, + DAG.getConstant(InsIndex[I], MVT::i64)); + } + return PassN; } return SDValue(); |