summaryrefslogtreecommitdiff
path: root/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64/AArch64ISelDAGToDAG.cpp')
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp589
1 files changed, 469 insertions, 120 deletions
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index a6ebfe31b4..ef99541c17 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -117,11 +117,11 @@ private:
SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
/// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
- SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating,
+ SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *Opcode);
/// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4.
- SDNode *SelectVST(SDNode *N, unsigned NumVecs, bool isUpdating,
+ SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *Opcodes);
/// Form sequences of consecutive 64/128-bit registers for use in NEON
@@ -135,6 +135,19 @@ private:
/// functions. Those should almost always be called instead.
SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
unsigned SubRegs[]);
+
+ /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4.
+ /// The opcode array specifies the instructions used for load.
+ SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes);
+
+ /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4.
+ /// The opcode arrays specify the instructions used for load/store.
+ SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
+ unsigned NumVecs, const uint16_t *Opcodes);
+
+ SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
+ SDValue Operand);
};
}
@@ -590,32 +603,84 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register;
case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register;
case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register;
+
+ // Post-index of duplicate loads
+ case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register;
+ case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register;
+ case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register;
+ case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register;
+ case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register;
+ case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register;
+ case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register;
+ case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register;
+
+ case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register;
+ case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register;
+ case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register;
+ case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register;
+ case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register;
+ case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register;
+ case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register;
+ case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register;
+
+ case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register;
+ case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register;
+ case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register;
+ case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register;
+ case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register;
+ case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register;
+ case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register;
+ case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register;
+
+ // Post-index of lane loads
+ case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register;
+ case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register;
+ case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register;
+ case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register;
+
+ case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register;
+ case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register;
+ case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register;
+ case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register;
+
+ case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register;
+ case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register;
+ case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register;
+ case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register;
+
+ // Post-index of lane stores
+ case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register;
+ case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register;
+ case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register;
+ case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register;
+
+ case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register;
+ case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register;
+ case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register;
+ case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register;
+
+ case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register;
+ case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register;
+ case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register;
+ case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register;
}
return Opc; // If not one we handle, return it unchanged.
}
-SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
- bool isUpdating,
+SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating,
+ unsigned NumVecs,
const uint16_t *Opcodes) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
EVT VT = N->getValueType(0);
unsigned OpcodeIndex;
- switch (VT.getSimpleVT().SimpleTy) {
+ bool is64BitVector = VT.is64BitVector();
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+ case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+ case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+ case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
default: llvm_unreachable("unhandled vector load type");
- case MVT::v8i8: OpcodeIndex = 0; break;
- case MVT::v4i16: OpcodeIndex = 1; break;
- case MVT::v2f32:
- case MVT::v2i32: OpcodeIndex = 2; break;
- case MVT::v1f64:
- case MVT::v1i64: OpcodeIndex = 3; break;
- case MVT::v16i8: OpcodeIndex = 4; break;
- case MVT::v8f16:
- case MVT::v8i16: OpcodeIndex = 5; break;
- case MVT::v4f32:
- case MVT::v4i32: OpcodeIndex = 6; break;
- case MVT::v2f64:
- case MVT::v2i64: OpcodeIndex = 7; break;
}
unsigned Opc = Opcodes[OpcodeIndex];
@@ -632,9 +697,8 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
Ops.push_back(N->getOperand(0)); // Push back the Chain
- std::vector<EVT> ResTys;
- bool is64BitVector = VT.is64BitVector();
-
+ SmallVector<EVT, 3> ResTys;
+ // Push back the type of return super register
if (NumVecs == 1)
ResTys.push_back(VT);
else if (NumVecs == 3)
@@ -675,8 +739,8 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
return NULL;
}
-SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
- bool isUpdating,
+SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating,
+ unsigned NumVecs,
const uint16_t *Opcodes) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
SDLoc dl(N);
@@ -685,28 +749,20 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
unsigned AddrOpIdx = isUpdating ? 1 : 2;
- unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ unsigned Vec0Idx = 3;
EVT VT = N->getOperand(Vec0Idx).getValueType();
unsigned OpcodeIndex;
- switch (VT.getSimpleVT().SimpleTy) {
+ bool is64BitVector = VT.is64BitVector();
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+ case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+ case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+ case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
default: llvm_unreachable("unhandled vector store type");
- case MVT::v8i8: OpcodeIndex = 0; break;
- case MVT::v4i16: OpcodeIndex = 1; break;
- case MVT::v2f32:
- case MVT::v2i32: OpcodeIndex = 2; break;
- case MVT::v1f64:
- case MVT::v1i64: OpcodeIndex = 3; break;
- case MVT::v16i8: OpcodeIndex = 4; break;
- case MVT::v8f16:
- case MVT::v8i16: OpcodeIndex = 5; break;
- case MVT::v4f32:
- case MVT::v4i32: OpcodeIndex = 6; break;
- case MVT::v2f64:
- case MVT::v2i64: OpcodeIndex = 7; break;
}
unsigned Opc = Opcodes[OpcodeIndex];
- std::vector<EVT> ResTys;
+ SmallVector<EVT, 2> ResTys;
if (isUpdating)
ResTys.push_back(MVT::i64);
ResTys.push_back(MVT::Other); // Type for the Chain
@@ -720,7 +776,6 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
Opc = getVLDSTRegisterUpdateOpcode(Opc);
Ops.push_back(Inc);
}
- bool is64BitVector = VT.is64BitVector();
SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
N->op_begin() + Vec0Idx + NumVecs);
@@ -737,6 +792,172 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
return VSt;
}
+SDValue
+AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
+ SDValue Operand) {
+ SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL,
+ VT, VTD, MVT::Other,
+ CurDAG->getTargetConstant(0, MVT::i64),
+ Operand,
+ CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32));
+ return SDValue(Reg, 0);
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+ unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range");
+ SDLoc dl(N);
+
+ EVT VT = N->getValueType(0);
+ unsigned OpcodeIndex;
+ bool is64BitVector = VT.is64BitVector();
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+ case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+ case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+ case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
+ default: llvm_unreachable("unhandled vector duplicate lane load type");
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ SDValue SuperReg;
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(N->getOperand(1)); // Push back the Memory Address
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(2);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
+ Ops.push_back(N->getOperand(0)); // Push back the Chain
+
+ SmallVector<EVT, 3> ResTys;
+ // Push back the type of return super register
+ if (NumVecs == 3)
+ ResTys.push_back(MVT::Untyped);
+ else {
+ EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
+ is64BitVector ? NumVecs : NumVecs * 2);
+ ResTys.push_back(ResTy);
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i64); // Type of the updated register
+ ResTys.push_back(MVT::Other); // Type of the Chain
+ SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+
+ SuperReg = SDValue(VLdDup, 0);
+ unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
+ // Update uses of each registers in super register
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ // Update uses of the Chain
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
+ return NULL;
+}
+
+// We only have 128-bit vector type of load/store lane instructions.
+// If it is 64-bit vector, we also select it to the 128-bit instructions.
+// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and
+// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output.
+SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+ SDLoc dl(N);
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3;
+
+ SDValue Chain = N->getOperand(0);
+ unsigned Lane =
+ cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+ EVT VT64; // 64-bit Vector Type
+
+ if (is64BitVector) {
+ VT64 = VT;
+ VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(),
+ VT.getVectorNumElements() * 2);
+ }
+
+ unsigned OpcodeIndex;
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = 0; break;
+ case 16: OpcodeIndex = 1; break;
+ case 32: OpcodeIndex = 2; break;
+ case 64: OpcodeIndex = 3; break;
+ default: llvm_unreachable("unhandled vector lane load/store type");
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ SmallVector<EVT, 3> ResTys;
+ if (IsLoad) {
+ // Push back the type of return super register
+ if (NumVecs == 3)
+ ResTys.push_back(MVT::Untyped);
+ else {
+ EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
+ is64BitVector ? NumVecs : NumVecs * 2);
+ ResTys.push_back(ResTy);
+ }
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i64); // Type of the updated register
+ ResTys.push_back(MVT::Other); // Type of Chain
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
+
+ SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
+ N->op_begin() + Vec0Idx + NumVecs);
+ if (is64BitVector)
+ for (unsigned i = 0; i < Regs.size(); i++)
+ Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]);
+ SDValue SuperReg = createQTuple(Regs);
+
+ Ops.push_back(SuperReg); // Source Reg
+ SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32);
+ Ops.push_back(LaneValue);
+ Ops.push_back(Chain); // Push back the Chain
+
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+ if (!IsLoad)
+ return VLdLn;
+
+ // Extract the subregisters.
+ SuperReg = SDValue(VLdLn, 0);
+ unsigned Sub0 = AArch64::qsub_0;
+ // Update uses of each registers in super register
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg);
+ if (is64BitVector) {
+ SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0);
+ }
+ ReplaceUses(SDValue(N, Vec), SUB0);
+ }
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+ return NULL;
+}
+
unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
unsigned NumOfVec) {
assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
@@ -955,7 +1176,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed
};
- return SelectVLD(Node, 1, true, Opcodes);
+ return SelectVLD(Node, true, 1, Opcodes);
}
case AArch64ISD::NEON_LD2_UPD: {
static const uint16_t Opcodes[] = {
@@ -964,7 +1185,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed
};
- return SelectVLD(Node, 2, true, Opcodes);
+ return SelectVLD(Node, true, 2, Opcodes);
}
case AArch64ISD::NEON_LD3_UPD: {
static const uint16_t Opcodes[] = {
@@ -973,7 +1194,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed
};
- return SelectVLD(Node, 3, true, Opcodes);
+ return SelectVLD(Node, true, 3, Opcodes);
}
case AArch64ISD::NEON_LD4_UPD: {
static const uint16_t Opcodes[] = {
@@ -982,7 +1203,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed
};
- return SelectVLD(Node, 4, true, Opcodes);
+ return SelectVLD(Node, true, 4, Opcodes);
}
case AArch64ISD::NEON_LD1x2_UPD: {
static const uint16_t Opcodes[] = {
@@ -991,7 +1212,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed,
AArch64::LD1x2WB_4S_fixed, AArch64::LD1x2WB_2D_fixed
};
- return SelectVLD(Node, 2, true, Opcodes);
+ return SelectVLD(Node, true, 2, Opcodes);
}
case AArch64ISD::NEON_LD1x3_UPD: {
static const uint16_t Opcodes[] = {
@@ -1000,7 +1221,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed,
AArch64::LD1x3WB_4S_fixed, AArch64::LD1x3WB_2D_fixed
};
- return SelectVLD(Node, 3, true, Opcodes);
+ return SelectVLD(Node, true, 3, Opcodes);
}
case AArch64ISD::NEON_LD1x4_UPD: {
static const uint16_t Opcodes[] = {
@@ -1009,7 +1230,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed,
AArch64::LD1x4WB_4S_fixed, AArch64::LD1x4WB_2D_fixed
};
- return SelectVLD(Node, 4, true, Opcodes);
+ return SelectVLD(Node, true, 4, Opcodes);
}
case AArch64ISD::NEON_ST1_UPD: {
static const uint16_t Opcodes[] = {
@@ -1018,7 +1239,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed
};
- return SelectVST(Node, 1, true, Opcodes);
+ return SelectVST(Node, true, 1, Opcodes);
}
case AArch64ISD::NEON_ST2_UPD: {
static const uint16_t Opcodes[] = {
@@ -1027,7 +1248,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed
};
- return SelectVST(Node, 2, true, Opcodes);
+ return SelectVST(Node, true, 2, Opcodes);
}
case AArch64ISD::NEON_ST3_UPD: {
static const uint16_t Opcodes[] = {
@@ -1036,7 +1257,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed
};
- return SelectVST(Node, 3, true, Opcodes);
+ return SelectVST(Node, true, 3, Opcodes);
}
case AArch64ISD::NEON_ST4_UPD: {
static const uint16_t Opcodes[] = {
@@ -1045,7 +1266,100 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed
};
- return SelectVST(Node, 4, true, Opcodes);
+ return SelectVST(Node, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2DUP: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S,
+ AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H,
+ AArch64::LD2R_4S, AArch64::LD2R_2D
+ };
+ return SelectVLDDup(Node, false, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3DUP: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S,
+ AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H,
+ AArch64::LD3R_4S, AArch64::LD3R_2D
+ };
+ return SelectVLDDup(Node, false, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4DUP: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S,
+ AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H,
+ AArch64::LD4R_4S, AArch64::LD4R_2D
+ };
+ return SelectVLDDup(Node, false, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2DUP_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2R_WB_8B_fixed, AArch64::LD2R_WB_4H_fixed,
+ AArch64::LD2R_WB_2S_fixed, AArch64::LD2R_WB_1D_fixed,
+ AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed,
+ AArch64::LD2R_WB_4S_fixed, AArch64::LD2R_WB_2D_fixed
+ };
+ return SelectVLDDup(Node, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3DUP_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3R_WB_8B_fixed, AArch64::LD3R_WB_4H_fixed,
+ AArch64::LD3R_WB_2S_fixed, AArch64::LD3R_WB_1D_fixed,
+ AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed,
+ AArch64::LD3R_WB_4S_fixed, AArch64::LD3R_WB_2D_fixed
+ };
+ return SelectVLDDup(Node, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4DUP_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4R_WB_8B_fixed, AArch64::LD4R_WB_4H_fixed,
+ AArch64::LD4R_WB_2S_fixed, AArch64::LD4R_WB_1D_fixed,
+ AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed,
+ AArch64::LD4R_WB_4S_fixed, AArch64::LD4R_WB_2D_fixed
+ };
+ return SelectVLDDup(Node, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed,
+ AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, true, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed,
+ AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, true, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed,
+ AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, true, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_ST2LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed,
+ AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, false, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_ST3LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed,
+ AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, false, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_ST4LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed,
+ AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, false, true, 4, Opcodes);
}
case AArch64ISD::NEON_ST1x2_UPD: {
static const uint16_t Opcodes[] = {
@@ -1054,7 +1368,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed,
AArch64::ST1x2WB_4S_fixed, AArch64::ST1x2WB_2D_fixed
};
- return SelectVST(Node, 2, true, Opcodes);
+ return SelectVST(Node, true, 2, Opcodes);
}
case AArch64ISD::NEON_ST1x3_UPD: {
static const uint16_t Opcodes[] = {
@@ -1063,7 +1377,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed,
AArch64::ST1x3WB_4S_fixed, AArch64::ST1x3WB_2D_fixed
};
- return SelectVST(Node, 3, true, Opcodes);
+ return SelectVST(Node, true, 3, Opcodes);
}
case AArch64ISD::NEON_ST1x4_UPD: {
static const uint16_t Opcodes[] = {
@@ -1072,7 +1386,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed,
AArch64::ST1x4WB_4S_fixed, AArch64::ST1x4WB_2D_fixed
};
- return SelectVST(Node, 4, true, Opcodes);
+ return SelectVST(Node, true, 4, Opcodes);
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
@@ -1105,114 +1419,149 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
switch (IntNo) {
default:
break;
-
case Intrinsic::arm_neon_vld1: {
- static const uint16_t Opcodes[] = { AArch64::LD1_8B, AArch64::LD1_4H,
- AArch64::LD1_2S, AArch64::LD1_1D,
- AArch64::LD1_16B, AArch64::LD1_8H,
- AArch64::LD1_4S, AArch64::LD1_2D };
- return SelectVLD(Node, 1, false, Opcodes);
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1_8B, AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D,
+ AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D
+ };
+ return SelectVLD(Node, false, 1, Opcodes);
}
case Intrinsic::arm_neon_vld2: {
- static const uint16_t Opcodes[] = { AArch64::LD2_8B, AArch64::LD2_4H,
- AArch64::LD2_2S, AArch64::LD1x2_1D,
- AArch64::LD2_16B, AArch64::LD2_8H,
- AArch64::LD2_4S, AArch64::LD2_2D };
- return SelectVLD(Node, 2, false, Opcodes);
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D,
+ AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D
+ };
+ return SelectVLD(Node, false, 2, Opcodes);
}
case Intrinsic::arm_neon_vld3: {
- static const uint16_t Opcodes[] = { AArch64::LD3_8B, AArch64::LD3_4H,
- AArch64::LD3_2S, AArch64::LD1x3_1D,
- AArch64::LD3_16B, AArch64::LD3_8H,
- AArch64::LD3_4S, AArch64::LD3_2D };
- return SelectVLD(Node, 3, false, Opcodes);
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D,
+ AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D
+ };
+ return SelectVLD(Node, false, 3, Opcodes);
}
case Intrinsic::arm_neon_vld4: {
- static const uint16_t Opcodes[] = { AArch64::LD4_8B, AArch64::LD4_4H,
- AArch64::LD4_2S, AArch64::LD1x4_1D,
- AArch64::LD4_16B, AArch64::LD4_8H,
- AArch64::LD4_4S, AArch64::LD4_2D };
- return SelectVLD(Node, 4, false, Opcodes);
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D,
+ AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D
+ };
+ return SelectVLD(Node, false, 4, Opcodes);
}
case Intrinsic::aarch64_neon_vld1x2: {
static const uint16_t Opcodes[] = {
- AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S,
- AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H,
- AArch64::LD1x2_4S, AArch64::LD1x2_2D
+ AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S,
+ AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H,
+ AArch64::LD1x2_4S, AArch64::LD1x2_2D
};
- return SelectVLD(Node, 2, false, Opcodes);
+ return SelectVLD(Node, false, 2, Opcodes);
}
case Intrinsic::aarch64_neon_vld1x3: {
static const uint16_t Opcodes[] = {
- AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S,
- AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H,
- AArch64::LD1x3_4S, AArch64::LD1x3_2D
+ AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S,
+ AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H,
+ AArch64::LD1x3_4S, AArch64::LD1x3_2D
};
- return SelectVLD(Node, 3, false, Opcodes);
+ return SelectVLD(Node, false, 3, Opcodes);
}
case Intrinsic::aarch64_neon_vld1x4: {
static const uint16_t Opcodes[] = {
- AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S,
- AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H,
- AArch64::LD1x4_4S, AArch64::LD1x4_2D
+ AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S,
+ AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H,
+ AArch64::LD1x4_4S, AArch64::LD1x4_2D
};
- return SelectVLD(Node, 4, false, Opcodes);
+ return SelectVLD(Node, false, 4, Opcodes);
}
case Intrinsic::arm_neon_vst1: {
- static const uint16_t Opcodes[] = { AArch64::ST1_8B, AArch64::ST1_4H,
- AArch64::ST1_2S, AArch64::ST1_1D,
- AArch64::ST1_16B, AArch64::ST1_8H,
- AArch64::ST1_4S, AArch64::ST1_2D };
- return SelectVST(Node, 1, false, Opcodes);
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D,
+ AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D
+ };
+ return SelectVST(Node, false, 1, Opcodes);
}
case Intrinsic::arm_neon_vst2: {
- static const uint16_t Opcodes[] = { AArch64::ST2_8B, AArch64::ST2_4H,
- AArch64::ST2_2S, AArch64::ST1x2_1D,
- AArch64::ST2_16B, AArch64::ST2_8H,
- AArch64::ST2_4S, AArch64::ST2_2D };
- return SelectVST(Node, 2, false, Opcodes);
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D,
+ AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D
+ };
+ return SelectVST(Node, false, 2, Opcodes);
}
case Intrinsic::arm_neon_vst3: {
- static const uint16_t Opcodes[] = { AArch64::ST3_8B, AArch64::ST3_4H,
- AArch64::ST3_2S, AArch64::ST1x3_1D,
- AArch64::ST3_16B, AArch64::ST3_8H,
- AArch64::ST3_4S, AArch64::ST3_2D };
- return SelectVST(Node, 3, false, Opcodes);
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D,
+ AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D
+ };
+ return SelectVST(Node, false, 3, Opcodes);
}
case Intrinsic::arm_neon_vst4: {
- static const uint16_t Opcodes[] = { AArch64::ST4_8B, AArch64::ST4_4H,
- AArch64::ST4_2S, AArch64::ST1x4_1D,
- AArch64::ST4_16B, AArch64::ST4_8H,
- AArch64::ST4_4S, AArch64::ST4_2D };
- return SelectVST(Node, 4, false, Opcodes);
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D,
+ AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D
+ };
+ return SelectVST(Node, false, 4, Opcodes);
}
case Intrinsic::aarch64_neon_vst1x2: {
static const uint16_t Opcodes[] = {
- AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S,
- AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H,
- AArch64::ST1x2_4S, AArch64::ST1x2_2D
+ AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S,
+ AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H,
+ AArch64::ST1x2_4S, AArch64::ST1x2_2D
};
- return SelectVST(Node, 2, false, Opcodes);
+ return SelectVST(Node, false, 2, Opcodes);
}
case Intrinsic::aarch64_neon_vst1x3: {
static const uint16_t Opcodes[] = {
- AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S,
- AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H,
- AArch64::ST1x3_4S, AArch64::ST1x3_2D
+ AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S,
+ AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H,
+ AArch64::ST1x3_4S, AArch64::ST1x3_2D
};
- return SelectVST(Node, 3, false, Opcodes);
+ return SelectVST(Node, false, 3, Opcodes);
}
case Intrinsic::aarch64_neon_vst1x4: {
static const uint16_t Opcodes[] = {
- AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S,
- AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H,
- AArch64::ST1x4_4S, AArch64::ST1x4_2D
+ AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S,
+ AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H,
+ AArch64::ST1x4_4S, AArch64::ST1x4_2D
+ };
+ return SelectVST(Node, false, 4, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld2lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D
+ };
+ return SelectVLDSTLane(Node, true, false, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld3lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D
};
- return SelectVST(Node, 4, false, Opcodes);
+ return SelectVLDSTLane(Node, true, false, 3, Opcodes);
}
+ case Intrinsic::arm_neon_vld4lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D
+ };
+ return SelectVLDSTLane(Node, true, false, 4, Opcodes);
}
+ case Intrinsic::arm_neon_vst2lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D
+ };
+ return SelectVLDSTLane(Node, false, false, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst3lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D
+ };
+ return SelectVLDSTLane(Node, false, false, 3, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst4lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D
+ };
+ return SelectVLDSTLane(Node, false, false, 4, Opcodes);
+ }
+ } // End of switch IntNo
break;
- }
+ } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN
default:
break; // Let generic code handle it
}