summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorKevin Qin <Kevin.Qin@arm.com>2013-10-11 02:33:55 +0000
committerKevin Qin <Kevin.Qin@arm.com>2013-10-11 02:33:55 +0000
commit767f816b926376bd850a62a28d35343ad0559c91 (patch)
tree93c1d07ecb87066792081ac1cbc72e278c655379 /lib
parent6c066c044ed5b477cdec3eb3e95267783e6ce757 (diff)
downloadllvm-767f816b926376bd850a62a28d35343ad0559c91.tar.gz
llvm-767f816b926376bd850a62a28d35343ad0559c91.tar.bz2
llvm-767f816b926376bd850a62a28d35343ad0559c91.tar.xz
Implement aarch64 neon instruction set AdvSIMD (copy).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192410 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp198
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h10
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td312
3 files changed, 464 insertions, 56 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index d89213c80d..40ed8af551 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -297,15 +297,23 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
@@ -866,12 +874,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
return "AArch64ISD::NEON_CMPZ";
case AArch64ISD::NEON_TST:
return "AArch64ISD::NEON_TST";
- case AArch64ISD::NEON_DUPIMM:
- return "AArch64ISD::NEON_DUPIMM";
case AArch64ISD::NEON_QSHLs:
return "AArch64ISD::NEON_QSHLs";
case AArch64ISD::NEON_QSHLu:
return "AArch64ISD::NEON_QSHLu";
+ case AArch64ISD::NEON_VDUP:
+ return "AArch64ISD::NEON_VDUP";
case AArch64ISD::NEON_VDUPLANE:
return "AArch64ISD::NEON_VDUPLANE";
default:
@@ -3342,7 +3350,7 @@ static SDValue PerformShiftCombine(SDNode *N,
case ISD::SHL:
if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
SDValue RHS =
- DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT,
+ DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
DAG.getConstant(Cnt, MVT::i32));
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
}
@@ -3352,7 +3360,7 @@ static SDValue PerformShiftCombine(SDNode *N,
case ISD::SRL:
if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
SDValue RHS =
- DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT,
+ DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
DAG.getConstant(Cnt, MVT::i32));
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
}
@@ -3492,6 +3500,107 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
}
}
+
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool hasDominantValue = false;
+ bool isConstant = true;
+
+ // Map of the number of times a particular SDValue appears in the
+ // element list.
+ DenseMap<SDValue, unsigned> ValueCounts;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ ValueCounts.insert(std::make_pair(V, 0));
+ unsigned &Count = ValueCounts[V];
+
+ // Is this value dominant? (takes up more than half of the lanes)
+ if (++Count > (NumElts / 2)) {
+ hasDominantValue = true;
+ Value = V;
+ }
+ }
+ if (ValueCounts.size() != 1)
+ usesOnlyOneValue = false;
+ if (!Value.getNode() && ValueCounts.size() > 0)
+ Value = ValueCounts.begin()->first;
+
+ if (ValueCounts.size() == 0)
+ return DAG.getUNDEF(VT);
+
+ // Loads are better lowered with insert_vector_elt.
+ // Keep going if we are hitting this case.
+ if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ // Use VDUP for non-constant splats.
+ if (hasDominantValue && EltSize <= 64) {
+ if (!isConstant) {
+ SDValue N;
+
+ // If we are DUPing a value that comes directly from a vector, we could
+ // just use DUPLANE. We can only do this if the lane being extracted
+ // is at a constant index, as the DUP from lane instructions only have
+ // constant-index forms.
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Value->getOperand(1))) {
+ N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT,
+ Value->getOperand(0), Value->getOperand(1));
+ } else
+ N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
+
+ if (!usesOnlyOneValue) {
+ // The dominant value was splatted as 'N', but we now have to insert
+ // all differing elements.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ if (Op.getOperand(I) == Value)
+ continue;
+ SmallVector<SDValue, 3> Ops;
+ Ops.push_back(N);
+ Ops.push_back(Op.getOperand(I));
+ Ops.push_back(DAG.getConstant(I, MVT::i32));
+ N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3);
+ }
+ }
+ return N;
+ }
+ if (usesOnlyOneValue && isConstant) {
+ return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
+ }
+ }
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
+ if (isConstant)
+ return SDValue();
+
+ // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
+ // know the default expansion would otherwise fall back on something even
+ // worse. For a vector with one or two non-undef values, that's
+ // scalar_to_vector for the elements followed by a shuffle (provided the
+ // shuffle is valid for the target) and materialization element by element
+ // on the stack followed by a load for everything else.
+ if (!isConstant && !usesOnlyOneValue) {
+ SDValue Vec = DAG.getUNDEF(VT);
+ for (unsigned i = 0 ; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
+ }
+ return Vec;
+ }
return SDValue();
}
@@ -3499,6 +3608,7 @@ SDValue
AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
SDLoc dl(Op);
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
@@ -3516,9 +3626,89 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1) Lane = 0;
+ // Test if V1 is a SCALAR_TO_VECTOR.
+ if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
+ }
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
+ if (V1.getOpcode() == ISD::BUILD_VECTOR) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
+ i != (unsigned)Lane) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
+ V1.getOperand(Lane));
+ }
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
DAG.getConstant(Lane, MVT::i64));
}
+ // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
+ // by element from V2 to V1 .
+ // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
+ // better choice to be inserted than V1 as less insert needed, so we count
+ // element to be inserted for both V1 and V2, and select less one as insert
+ // target.
+
+ // Collect elements need to be inserted and their index.
+ SmallVector<int, 8> NV1Elt;
+ SmallVector<int, 8> N1Index;
+ SmallVector<int, 8> NV2Elt;
+ SmallVector<int, 8> N2Index;
+ int Length = ShuffleMask.size();
+ int V1EltNum = V1.getValueType().getVectorNumElements();
+ for (int I = 0; I != Length; ++I) {
+ if (ShuffleMask[I] != I) {
+ NV1Elt.push_back(ShuffleMask[I]);
+ N1Index.push_back(I);
+ }
+ }
+ for (int I = 0; I != Length; ++I) {
+ if (ShuffleMask[I] != (I + V1EltNum)) {
+ NV2Elt.push_back(ShuffleMask[I]);
+ N2Index.push_back(I);
+ }
+ }
+
+ // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
+ // will be inserted.
+ SDValue InsV = V1;
+ SmallVector<int, 8> InsMasks = NV1Elt;
+ SmallVector<int, 8> InsIndex = N1Index;
+ if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
+ if (NV1Elt.size() > NV2Elt.size()) {
+ InsV = V2;
+ InsMasks = NV2Elt;
+ InsIndex = N2Index;
+ }
+ } else {
+ InsV = DAG.getNode(ISD::UNDEF, dl, VT);
+ }
+
+ SDValue PassN;
+
+ for (int I = 0, E = InsMasks.size(); I != E; ++I) {
+ SDValue ExtV = V1;
+ int Mask = InsMasks[I];
+ if (Mask > V1EltNum) {
+ ExtV = V2;
+ Mask -= V1EltNum;
+ }
+ // Any value type smaller than i32 is illegal in AArch64, and this lower
+ // function is called after legalize pass, so we need to legalize
+ // the result here.
+ EVT EltVT = MVT::i32;
+ if(EltSize == 64)
+ EltVT = MVT::i64;
+ PassN = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
+ DAG.getConstant(Mask, MVT::i64));
+ PassN = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, PassN,
+ DAG.getConstant(InsIndex[I], MVT::i64));
+ }
+ return PassN;
}
return SDValue();
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index da7f62361b..a9bc70ab68 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -134,13 +134,13 @@ namespace AArch64ISD {
// Vector compare bitwise test
NEON_TST,
- // Operation for the immediate in vector shift
- NEON_DUPIMM,
-
// Vector saturating shift
NEON_QSHLs,
NEON_QSHLu,
+ // Vector dup
+ NEON_VDUP,
+
// Vector dup by lane
NEON_VDUPLANE
};
@@ -296,6 +296,10 @@ enum NeonModImmType {
Neon_Mov_Imm,
Neon_Mvn_Imm
};
+
+extern SDValue ScanBUILD_VECTOR(SDValue Op, bool &isOnlyLowElement,
+ bool &usesOnlyOneValue, bool &hasDominantValue,
+ bool &isConstant, bool &isUNDEF);
} // namespace llvm
#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 355de53768..63b8442dbc 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -41,14 +41,13 @@ def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
[SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
-def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1,
- [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
-
def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
+def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
+ [SDTCisVec<0>]>>;
def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
[SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
@@ -1480,7 +1479,7 @@ class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
[(set (Ty VPRC:$Rd),
(Ty (OpNode (Ty VPRC:$Rn),
- (Ty (Neon_dupImm (i32 imm:$Imm))))))],
+ (Ty (Neon_vdup (i32 imm:$Imm))))))],
NoItinerary>;
multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
@@ -1585,7 +1584,7 @@ class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
[(set (DestTy VPR128:$Rd),
(DestTy (shl
(DestTy (ExtOp (SrcTy VPR64:$Rn))),
- (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
+ (DestTy (Neon_vdup (i32 imm:$Imm))))))],
NoItinerary>;
class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
@@ -1599,7 +1598,7 @@ class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
(DestTy (shl
(DestTy (ExtOp
(SrcTy (getTop VPR128:$Rn)))),
- (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
+ (DestTy (Neon_vdup (i32 imm:$Imm))))))],
NoItinerary>;
multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
@@ -1771,7 +1770,7 @@ class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
[(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
(Ty (OpNode (Ty VPRC:$Rn),
- (Ty (Neon_dupImm (i32 imm:$Imm))))))))],
+ (Ty (Neon_vdup (i32 imm:$Imm))))))))],
NoItinerary> {
let Constraints = "$src = $Rd";
}
@@ -2048,48 +2047,48 @@ def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
(v8i16 (srl (v8i16 node:$lhs),
- (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
+ (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
(v4i32 (srl (v4i32 node:$lhs),
- (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
+ (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
(v2i64 (srl (v2i64 node:$lhs),
- (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
+ (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
(v8i16 (sra (v8i16 node:$lhs),
- (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
+ (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
(v4i32 (sra (v4i32 node:$lhs),
- (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
+ (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
(v2i64 (sra (v2i64 node:$lhs),
- (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
+ (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
multiclass Neon_shiftNarrow_patterns<string shr> {
def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
- imm:$Imm))),
+ (i32 imm:$Imm)))),
(SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
- imm:$Imm))),
+ (i32 imm:$Imm)))),
(SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
- imm:$Imm))),
+ (i32 imm:$Imm)))),
(SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
- VPR128:$Rn, imm:$Imm)))))),
- (SHRNvvi_16B (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, (i32 imm:$Imm))))))),
+ (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
VPR128:$Rn, imm:$Imm)>;
def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
- VPR128:$Rn, imm:$Imm)))))),
+ VPR128:$Rn, (i32 imm:$Imm))))))),
(SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
VPR128:$Rn, imm:$Imm)>;
def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
- VPR128:$Rn, imm:$Imm)))))),
+ VPR128:$Rn, (i32 imm:$Imm))))))),
(SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
VPR128:$Rn, imm:$Imm)>;
}
@@ -2486,13 +2485,13 @@ multiclass NeonI_get_high
{
def _8h : PatFrag<(ops node:$Rn),
(v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
- (v8i16 (Neon_dupImm 8))))))>;
+ (v8i16 (Neon_vdup (i32 8)))))))>;
def _4s : PatFrag<(ops node:$Rn),
(v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
- (v4i32 (Neon_dupImm 16))))))>;
+ (v4i32 (Neon_vdup (i32 16)))))))>;
def _2d : PatFrag<(ops node:$Rn),
(v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
- (v2i64 (Neon_dupImm 32))))))>;
+ (v2i64 (Neon_vdup (i32 32)))))))>;
}
defm NI_get_hi : NeonI_get_high;
@@ -4513,6 +4512,46 @@ def INSELd : NeonI_INS_element<"ins", "d", v2i64, neon_uimm1_bare, i64> {
// bits 11-13 are unspecified.
}
+multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
+ ValueType MidTy,
+ RegisterClass OpFPR, Operand ResImm,
+ SubRegIndex SubIndex, Instruction INS> {
+def : Pat<(ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (MidTy (vector_extract
+ (ResTy VPR128:$Rn),
+ (ResImm:$Immn))),
+ (ResImm:$Immd))),
+ (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
+ ResImm:$Immd, ResImm:$Immn)>;
+
+def : Pat <(ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (MidTy OpFPR:$Rn),
+ (ResImm:$Imm))),
+ (INS (ResTy VPR128:$src),
+ (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
+ ResImm:$Imm,
+ (i64 0))>;
+
+def : Pat <(NaTy (vector_insert
+ (NaTy VPR64:$src),
+ (MidTy OpFPR:$Rn),
+ (ResImm:$Imm))),
+ (NaTy (EXTRACT_SUBREG
+ (ResTy (INS
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
+ (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
+ ResImm:$Imm,
+ (i64 0))),
+ sub_64))>;
+}
+
+defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
+ sub_32, INSELs>;
+defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
+ sub_64, INSELd>;
+
multiclass Neon_INS_elt_pattern <ValueType NaTy, Operand NaImm,
ValueType MidTy, ValueType StTy,
Operand StImm, Instruction INS> {
@@ -4557,14 +4596,15 @@ def : Pat<(NaTy (vector_insert
sub_64))>;
}
-defm INSb_pattern : Neon_INS_elt_pattern<v8i8, neon_uimm3_bare, i32,
- v16i8, neon_uimm4_bare, INSELb>;
-defm INSh_pattern : Neon_INS_elt_pattern<v4i16, neon_uimm2_bare, i32,
- v8i16, neon_uimm3_bare, INSELh>;
-defm INSs_pattern : Neon_INS_elt_pattern<v2i32, neon_uimm1_bare, i32,
- v4i32, neon_uimm2_bare, INSELs>;
-defm INSd_pattern : Neon_INS_elt_pattern<v1i64, neon_uimm0_bare, i64,
- v2i64, neon_uimm1_bare, INSELd>;
+defm : Neon_INS_elt_pattern<v8i8, neon_uimm3_bare, i32,
+ v16i8, neon_uimm4_bare, INSELb>;
+defm : Neon_INS_elt_pattern<v4i16, neon_uimm2_bare, i32,
+ v8i16, neon_uimm3_bare, INSELh>;
+defm : Neon_INS_elt_pattern<v2i32, neon_uimm1_bare, i32,
+ v4i32, neon_uimm2_bare, INSELs>;
+defm : Neon_INS_elt_pattern<v1i64, neon_uimm0_bare, i64,
+ v2i64, neon_uimm1_bare, INSELd>;
+
class NeonI_SMOV<string asmop, string Res, bit Q,
ValueType OpTy, ValueType eleTy,
@@ -4640,12 +4680,12 @@ multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
NaImm:$Imm)>;
}
-defm SMOVxb_pattern : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
- neon_uimm3_bare, SMOVxb>;
-defm SMOVxh_pattern : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
- neon_uimm2_bare, SMOVxh>;
-defm SMOVxs_pattern : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
- neon_uimm1_bare, SMOVxs>;
+defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
+ neon_uimm3_bare, SMOVxb>;
+defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
+ neon_uimm2_bare, SMOVxh>;
+defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
+ neon_uimm1_bare, SMOVxs>;
class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
ValueType eleTy, Operand StImm, Operand NaImm,
@@ -4657,11 +4697,10 @@ class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
(SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
NaImm:$Imm)>;
-def SMOVwb_pattern : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
- neon_uimm3_bare, SMOVwb>;
-def SMOVwh_pattern : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
- neon_uimm2_bare, SMOVwh>;
-
+def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
+ neon_uimm3_bare, SMOVwb>;
+def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
+ neon_uimm2_bare, SMOVwh>;
class NeonI_UMOV<string asmop, string Res, bit Q,
ValueType OpTy, Operand OpImm,
@@ -4702,12 +4741,12 @@ class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
(SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
NaImm:$Imm)>;
-def UMOVwb_pattern : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
- neon_uimm3_bare, UMOVwb>;
-def UMOVwh_pattern : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
- neon_uimm2_bare, UMOVwh>;
-def UMOVws_pattern : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
- neon_uimm1_bare, UMOVws>;
+def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
+ neon_uimm3_bare, UMOVwb>;
+def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
+ neon_uimm2_bare, UMOVwh>;
+def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
+ neon_uimm1_bare, UMOVws>;
def : Pat<(i32 (and
(i32 (vector_extract
@@ -4786,4 +4825,179 @@ def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
(v1f32 FPR32:$Rn)>;
def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
- (v1f64 FPR64:$Rn)>; \ No newline at end of file
+ (v1f64 FPR64:$Rn)>;
+
+def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
+ (FMOVdd $src)>;
+
+class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
+ RegisterOperand ResVPR, ValueType ResTy,
+ ValueType OpTy, Operand OpImm>
+ : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
+ (ins VPR128:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ bits<4> Imm;
+}
+
+def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128, v16i8, v16i8,
+ neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+
+def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128, v8i16, v8i16,
+ neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+
+def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128, v4i32, v4i32,
+ neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+
+def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128, v2i64, v2i64,
+ neon_uimm1_bare> {
+ let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
+}
+
+def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64, v8i8, v16i8,
+ neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+
+def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64, v4i16, v8i16,
+ neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+
+def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64, v2i32, v4i32,
+ neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+
+multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
+ ValueType OpTy,ValueType NaTy,
+ ValueType ExTy, Operand OpLImm,
+ Operand OpNImm> {
+def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
+ (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
+
+def : Pat<(ResTy (Neon_vduplane
+ (NaTy VPR64:$Rn), OpNImm:$Imm)),
+ (ResTy (DUPELT
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
+}
+defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
+ neon_uimm4_bare, neon_uimm3_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
+ neon_uimm4_bare, neon_uimm3_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
+ neon_uimm3_bare, neon_uimm2_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
+ neon_uimm3_bare, neon_uimm2_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
+ neon_uimm1_bare, neon_uimm0_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
+ neon_uimm1_bare, neon_uimm0_bare>;
+
+def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
+ (v2f32 (DUPELT2s
+ (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
+ (i64 0)))>;
+def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
+ (v4f32 (DUPELT4s
+ (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
+ (i64 0)))>;
+def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
+ (v2f64 (DUPELT2d
+ (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
+ (i64 0)))>;
+
+class NeonI_DUP<bit Q, string asmop, string rdlane,
+ RegisterOperand ResVPR, ValueType ResTy,
+ RegisterClass OpGPR, ValueType OpTy>
+ : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
+ asmop # "\t$Rd" # rdlane # ", $Rn",
+ [(set (ResTy ResVPR:$Rd),
+ (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
+ NoItinerary>;
+
+def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
+ let Inst{16} = 0b1;
+ // bits 17-19 are unspecified.
+}
+
+def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
+ let Inst{17-16} = 0b10;
+ // bits 18-19 are unspecified.
+}
+
+def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
+ let Inst{18-16} = 0b100;
+ // bit 19 is unspecified.
+}
+
+def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
+ let Inst{19-16} = 0b1000;
+}
+
+def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
+ let Inst{16} = 0b1;
+ // bits 17-19 are unspecified.
+}
+
+def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
+ let Inst{17-16} = 0b10;
+ // bits 18-19 are unspecified.
+}
+
+def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
+ let Inst{18-16} = 0b100;
+ // bit 19 is unspecified.
+}
+
+// patterns for CONCAT_VECTORS
+multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
+def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
+ (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
+def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
+ (INSELd
+ (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
+ (i64 1),
+ (i64 0))>;
+def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
+ (DUPELT2d
+ (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (i64 0))> ;
+}
+
+defm : Concat_Vector_Pattern<v16i8, v8i8>;
+defm : Concat_Vector_Pattern<v8i16, v4i16>;
+defm : Concat_Vector_Pattern<v4i32, v2i32>;
+defm : Concat_Vector_Pattern<v2i64, v1i64>;
+defm : Concat_Vector_Pattern<v4f32, v2f32>;
+defm : Concat_Vector_Pattern<v2f64, v1f64>;
+
+//patterns for EXTRACT_SUBVECTOR
+def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
+ (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
+ (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
+ (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
+ (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
+ (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
+ (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; \ No newline at end of file