summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJiangning Liu <jiangning.liu@arm.com>2013-11-06 02:25:49 +0000
committerJiangning Liu <jiangning.liu@arm.com>2013-11-06 02:25:49 +0000
commit258115258f8fe15e9d74b5fb524f90b75bb917d1 (patch)
treef7df585491dc8c3376135fb0e8d39db4dd0b643a /lib
parent10bb82e54fc0608e6220581bda0405af8f12d32f (diff)
downloadllvm-258115258f8fe15e9d74b5fb524f90b75bb917d1.tar.gz
llvm-258115258f8fe15e9d74b5fb524f90b75bb917d1.tar.bz2
llvm-258115258f8fe15e9d74b5fb524f90b75bb917d1.tar.xz
Implement AArch64 Neon instruction set Bitwise Extract.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194118 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp199
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h3
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td18
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td68
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp10
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h5
6 files changed, 203 insertions, 100 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6765c3339d..ab46d7f7b3 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -907,6 +907,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
return "AArch64ISD::NEON_ST3_UPD";
case AArch64ISD::NEON_ST4_UPD:
return "AArch64ISD::NEON_ST4_UPD";
+ case AArch64ISD::NEON_VEXTRACT:
+ return "AArch64ISD::NEON_VEXTRACT";
default:
return NULL;
}
@@ -3797,7 +3799,7 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDValue
AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc dl(Op);
@@ -3811,101 +3813,126 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
ArrayRef<int> ShuffleMask = SVN->getMask();
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- if (EltSize <= 64) {
- if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
- int Lane = SVN->getSplatIndex();
- // If this is undef splat, generate it via "just" vdup, if possible.
- if (Lane == -1) Lane = 0;
-
- // Test if V1 is a SCALAR_TO_VECTOR.
- if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
- }
- // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
- if (V1.getOpcode() == ISD::BUILD_VECTOR) {
- bool IsScalarToVector = true;
- for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
- if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
- i != (unsigned)Lane) {
- IsScalarToVector = false;
- break;
- }
- if (IsScalarToVector)
- return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
- V1.getOperand(Lane));
- }
- return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
- DAG.getConstant(Lane, MVT::i64));
- }
- // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
- // by element from V2 to V1 .
- // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
- // better choice to be inserted than V1 as less insert needed, so we count
- // element to be inserted for both V1 and V2, and select less one as insert
- // target.
-
- // Collect elements need to be inserted and their index.
- SmallVector<int, 8> NV1Elt;
- SmallVector<int, 8> N1Index;
- SmallVector<int, 8> NV2Elt;
- SmallVector<int, 8> N2Index;
- int Length = ShuffleMask.size();
- int V1EltNum = V1.getValueType().getVectorNumElements();
- for (int I = 0; I != Length; ++I) {
- if (ShuffleMask[I] != I) {
- NV1Elt.push_back(ShuffleMask[I]);
- N1Index.push_back(I);
- }
+ if (EltSize > 64)
+ return SDValue();
+
+ // If the element of shuffle mask are all the same constant, we can
+ // transform it into either NEON_VDUP or NEON_VDUPLANE
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ // Test if V1 is a SCALAR_TO_VECTOR.
+ if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
}
- for (int I = 0; I != Length; ++I) {
- if (ShuffleMask[I] != (I + V1EltNum)) {
- NV2Elt.push_back(ShuffleMask[I]);
- N2Index.push_back(I);
- }
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
+ if (V1.getOpcode() == ISD::BUILD_VECTOR) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
+ i != (unsigned)Lane) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
+ V1.getOperand(Lane));
}
+ return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, MVT::i64));
+ }
- // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
- // will be inserted.
- SDValue InsV = V1;
- SmallVector<int, 8> InsMasks = NV1Elt;
- SmallVector<int, 8> InsIndex = N1Index;
- if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
- if (NV1Elt.size() > NV2Elt.size()) {
- InsV = V2;
- InsMasks = NV2Elt;
- InsIndex = N2Index;
+ int Length = ShuffleMask.size();
+ int V1EltNum = V1.getValueType().getVectorNumElements();
+
+ // If the number of v1 elements is the same as the number of shuffle mask
+ // element and the shuffle masks are sequential values, we can transform
+ // it into NEON_VEXTRACT.
+ if (V1EltNum == Length) {
+ // Check if the shuffle mask is sequential.
+ bool IsSequential = true;
+ int CurMask = ShuffleMask[0];
+ for (int I = 0; I < Length; ++I) {
+ if (ShuffleMask[I] != CurMask) {
+ IsSequential = false;
+ break;
}
- } else {
- InsV = DAG.getNode(ISD::UNDEF, dl, VT);
+ CurMask++;
}
+ if (IsSequential) {
+ assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect");
+ unsigned VecSize = EltSize * V1EltNum;
+ unsigned Index = (EltSize/8) * ShuffleMask[0];
+ if (VecSize == 64 || VecSize == 128)
+ return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
+ DAG.getConstant(Index, MVT::i64));
+ }
+ }
- SDValue PassN;
+ // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
+ // by element from V2 to V1 .
+ // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
+ // better choice to be inserted than V1 as less insert needed, so we count
+ // element to be inserted for both V1 and V2, and select less one as insert
+ // target.
+
+ // Collect elements need to be inserted and their index.
+ SmallVector<int, 8> NV1Elt;
+ SmallVector<int, 8> N1Index;
+ SmallVector<int, 8> NV2Elt;
+ SmallVector<int, 8> N2Index;
+ for (int I = 0; I != Length; ++I) {
+ if (ShuffleMask[I] != I) {
+ NV1Elt.push_back(ShuffleMask[I]);
+ N1Index.push_back(I);
+ }
+ }
+ for (int I = 0; I != Length; ++I) {
+ if (ShuffleMask[I] != (I + V1EltNum)) {
+ NV2Elt.push_back(ShuffleMask[I]);
+ N2Index.push_back(I);
+ }
+ }
- for (int I = 0, E = InsMasks.size(); I != E; ++I) {
- SDValue ExtV = V1;
- int Mask = InsMasks[I];
- if (Mask > V1EltNum) {
- ExtV = V2;
- Mask -= V1EltNum;
- }
- // Any value type smaller than i32 is illegal in AArch64, and this lower
- // function is called after legalize pass, so we need to legalize
- // the result here.
- EVT EltVT;
- if (VT.getVectorElementType().isFloatingPoint())
- EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32;
- else
- EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;
-
- PassN = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
- DAG.getConstant(Mask, MVT::i64));
- PassN = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, PassN,
- DAG.getConstant(InsIndex[I], MVT::i64));
+ // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
+ // will be inserted.
+ SDValue InsV = V1;
+ SmallVector<int, 8> InsMasks = NV1Elt;
+ SmallVector<int, 8> InsIndex = N1Index;
+ if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
+ if (NV1Elt.size() > NV2Elt.size()) {
+ InsV = V2;
+ InsMasks = NV2Elt;
+ InsIndex = N2Index;
}
- return PassN;
+ } else {
+ InsV = DAG.getNode(ISD::UNDEF, dl, VT);
}
- return SDValue();
+ for (int I = 0, E = InsMasks.size(); I != E; ++I) {
+ SDValue ExtV = V1;
+ int Mask = InsMasks[I];
+ if (Mask >= V1EltNum) {
+ ExtV = V2;
+ Mask -= V1EltNum;
+ }
+ // Any value type smaller than i32 is illegal in AArch64, and this lower
+ // function is called after legalize pass, so we need to legalize
+ // the result here.
+ EVT EltVT;
+ if (VT.getVectorElementType().isFloatingPoint())
+ EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32;
+ else
+ EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;
+
+ ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
+ DAG.getConstant(Mask, MVT::i64));
+ InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
+ DAG.getConstant(InsIndex[I], MVT::i64));
+ }
+ return InsV;
}
AArch64TargetLowering::ConstraintType
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 7effbfd66c..83fd79d6ba 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -144,6 +144,9 @@ namespace AArch64ISD {
// Vector dup by lane
NEON_VDUPLANE,
+ // Vector extract
+ NEON_VEXTRACT,
+
// NEON loads with post-increment base updates:
NEON_LD1_UPD = ISD::FIRST_TARGET_MEMORY_OPCODE,
NEON_LD2_UPD,
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index b3e114a01b..8a2142646e 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -983,6 +983,24 @@ class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit> {
}
+// Format AdvSIMD bitwise extract
+class NeonI_BitExtract<bit q, bits<2> op2,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-24} = 0b101110;
+ let Inst{23-22} = op2;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15} = 0b0;
+ // imm4 in 14-11
+ let Inst{10} = 0b0;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
// Format AdvSIMD 3 vector registers with same vector type
class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 4cb5da6b8a..8a78d14b8c 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -50,6 +50,9 @@ def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
[SDTCisVec<0>]>>;
def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
[SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
+def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
//===----------------------------------------------------------------------===//
// Multiclasses
@@ -1062,7 +1065,7 @@ def neon_uimm8_asmoperand : AsmOperandClass
def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm8_asmoperand;
- let PrintMethod = "printNeonUImm8Operand";
+ let PrintMethod = "printUImmHexOperand";
}
def neon_uimm64_mask_asmoperand : AsmOperandClass
@@ -4430,31 +4433,43 @@ def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
def neon_uimm0_bare : Operand<i64>,
ImmLeaf<i64, [{return Imm == 0;}]> {
let ParserMatchClass = neon_uimm0_asmoperand;
- let PrintMethod = "printNeonUImm8OperandBare";
+ let PrintMethod = "printUImmBareOperand";
}
def neon_uimm1_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm1_asmoperand;
- let PrintMethod = "printNeonUImm8OperandBare";
+ let PrintMethod = "printUImmBareOperand";
}
def neon_uimm2_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm2_asmoperand;
- let PrintMethod = "printNeonUImm8OperandBare";
+ let PrintMethod = "printUImmBareOperand";
}
def neon_uimm3_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm3_asmoperand;
- let PrintMethod = "printNeonUImm8OperandBare";
+ let PrintMethod = "printUImmBareOperand";
}
def neon_uimm4_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm4_asmoperand;
- let PrintMethod = "printNeonUImm8OperandBare";
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm3 : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = uimm3_asmoperand;
+ let PrintMethod = "printUImmHexOperand";
+}
+
+def neon_uimm4 : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = uimm4_asmoperand;
+ let PrintMethod = "printUImmHexOperand";
}
class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
@@ -4472,6 +4487,47 @@ class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
let Constraints = "$src = $Rd";
}
+// Bitwise Extract
+class NeonI_Extract<bit q, bits<2> op2, string asmop,
+ string OpS, RegisterOperand OpVPR, Operand OpImm>
+ : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
+ (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
+ asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
+ ", $Rm." # OpS # ", $Index",
+ [],
+ NoItinerary>{
+ bits<4> Index;
+}
+
+def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
+ VPR64, neon_uimm3> {
+ let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
+}
+
+def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
+ VPR128, neon_uimm4> {
+ let Inst{14-11} = Index;
+}
+
+class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
+ Operand OpImm>
+ : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
+ (i64 OpImm:$Imm))),
+ (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
+
+def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
+
// The followings are for instruction class (3V Elem)
// Variant 1
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 51335e145b..c081691756 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -462,8 +462,8 @@ void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum,
o << "#0x0";
}
-void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+void AArch64InstPrinter::printUImmHexOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MOUImm = MI->getOperand(OpNum);
assert(MOUImm.isImm() &&
@@ -475,9 +475,9 @@ void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum,
O.write_hex(Imm);
}
-void AArch64InstPrinter::printNeonUImm8OperandBare(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void AArch64InstPrinter::printUImmBareOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MOUImm = MI->getOperand(OpNum);
assert(MOUImm.isImm()
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 28ebfc45f1..37b7273438 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -169,9 +169,8 @@ public:
void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printNeonUImm8OperandBare(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ void printUImmHexOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printUImmBareOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);