summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChad Rosier <mcrosier@codeaurora.org>2014-01-30 21:46:54 +0000
committerChad Rosier <mcrosier@codeaurora.org>2014-01-30 21:46:54 +0000
commitdafea158a7223491febdaedb969b1ec0e8680f56 (patch)
tree2090f0af373e19447bc47dcc06814888b45af3d5
parent4e54e6fe755c54c444687f9d0d2e657982aa8e1c (diff)
downloadllvm-dafea158a7223491febdaedb969b1ec0e8680f56.tar.gz
llvm-dafea158a7223491febdaedb969b1ec0e8680f56.tar.bz2
llvm-dafea158a7223491febdaedb969b1ec0e8680f56.tar.xz
[AArch64] Custom lower concat_vector patterns with v4i16, v4i32, v8i8, v8i16, v16i8 types.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200491 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp53
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td14
-rw-r--r--test/CodeGen/AArch64/neon-copy.ll86
3 files changed, 153 insertions, 0 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7f101fffa1..769a2bcb62 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -332,6 +332,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
+
setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
@@ -2259,6 +2265,52 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(Opc, dl, VT, Vec);
}
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ // We custom lower concat_vectors with 4, 8, or 16 operands that are all the
+ // same operand and of type v1* using the DUP instruction.
+ unsigned NumOps = Op->getNumOperands();
+ if (NumOps != 4 && NumOps != 8 && NumOps != 16)
+ return Op;
+
+ // Must be a single value for VDUP.
+ bool isConstant = true;
+ SDValue Op0 = Op.getOperand(0);
+ for (unsigned i = 1; i < NumOps; ++i) {
+ SDValue OpN = Op.getOperand(i);
+ if (Op0 != OpN)
+ return Op;
+
+ if (!isa<ConstantSDNode>(OpN->getOperand(0)))
+ isConstant = false;
+ }
+
+ // Verify the value type.
+ EVT EltVT = Op0.getValueType();
+ switch (NumOps) {
+ default: llvm_unreachable("Unexpected number of operands");
+ case 4:
+ if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32)
+ return Op;
+ break;
+ case 8:
+ if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16)
+ return Op;
+ break;
+ case 16:
+ if (EltVT != MVT::v1i8)
+ return Op;
+ break;
+ }
+
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ // VDUP produces better code for constants.
+ if (isConstant)
+ return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0));
+ return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0,
+ DAG.getConstant(0, MVT::i64));
+}
+
SDValue
AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
bool IsSigned) const {
@@ -3241,6 +3293,7 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
}
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 1309bf12b5..2cf27b861b 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -7106,6 +7106,20 @@ def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
(SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
(i64 0)))>;
+multiclass NeonI_DUP_pattern<Instruction DUPELT, ValueType ResTy,
+ ValueType OpTy, RegisterClass OpRC,
+ Operand OpNImm, SubRegIndex SubIndex> {
+def : Pat<(ResTy (Neon_vduplane (OpTy OpRC:$Rn), OpNImm:$Imm)),
+ (ResTy (DUPELT
+ (SUBREG_TO_REG (i64 0), OpRC:$Rn, SubIndex), OpNImm:$Imm))>;
+}
+
+defm : NeonI_DUP_pattern<DUPELT4h, v4i16, v1i16, FPR16, neon_uimm2_bare,sub_16>;
+defm : NeonI_DUP_pattern<DUPELT4s, v4i32, v1i32, FPR32, neon_uimm2_bare,sub_32>;
+defm : NeonI_DUP_pattern<DUPELT8b, v8i8, v1i8, FPR8, neon_uimm3_bare, sub_8>;
+defm : NeonI_DUP_pattern<DUPELT8h, v8i16, v1i16, FPR16, neon_uimm3_bare,sub_16>;
+defm : NeonI_DUP_pattern<DUPELT16b, v16i8, v1i8, FPR8, neon_uimm4_bare, sub_8>;
+
class NeonI_DUP<bit Q, string asmop, string rdlane,
RegisterOperand ResVPR, ValueType ResTy,
RegisterClass OpGPR, ValueType OpTy>
diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll
index eb86421126..8d4a3388bb 100644
--- a/test/CodeGen/AArch64/neon-copy.ll
+++ b/test/CodeGen/AArch64/neon-copy.ll
@@ -1301,3 +1301,89 @@ entry:
%vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %b, <1 x i16> %a)
ret <1 x i16> %vsqadd2.i
}
+
+define <4 x i16> @concat_vector_v4i16_const() {
+; CHECK-LABEL: concat_vector_v4i16_const:
+; CHECK: dup {{v[0-9]+}}.4h, wzr
+ %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
+ ret <4 x i16> %r
+}
+
+define <4 x i16> @concat_vector_v4i16_const_one() {
+; CHECK-LABEL: concat_vector_v4i16_const_one:
+; CHECK: movz {{w[0-9]+}}, #1
+; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+ %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
+ ret <4 x i16> %r
+}
+
+define <4 x i32> @concat_vector_v4i32_const() {
+; CHECK-LABEL: concat_vector_v4i32_const:
+; CHECK: dup {{v[0-9]+}}.4s, wzr
+ %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %r
+}
+
+define <8 x i8> @concat_vector_v8i8_const() {
+; CHECK-LABEL: concat_vector_v8i8_const:
+; CHECK: dup {{v[0-9]+}}.8b, wzr
+ %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
+ ret <8 x i8> %r
+}
+
+define <8 x i16> @concat_vector_v8i16_const() {
+; CHECK-LABEL: concat_vector_v8i16_const:
+; CHECK: dup {{v[0-9]+}}.8h, wzr
+ %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r
+}
+
+define <8 x i16> @concat_vector_v8i16_const_one() {
+; CHECK-LABEL: concat_vector_v8i16_const_one:
+; CHECK: movz {{w[0-9]+}}, #1
+; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+ %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r
+}
+
+define <16 x i8> @concat_vector_v16i8_const() {
+; CHECK-LABEL: concat_vector_v16i8_const:
+; CHECK: dup {{v[0-9]+}}.16b, wzr
+ %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %r
+}
+
+define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
+; CHECK-LABEL: concat_vector_v4i16:
+; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+ %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
+ ret <4 x i16> %r
+}
+
+define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
+; CHECK-LABEL: concat_vector_v4i32:
+; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+ %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %r
+}
+
+define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
+; CHECK-LABEL: concat_vector_v8i8:
+; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[0]
+ %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
+ ret <8 x i8> %r
+}
+
+define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
+; CHECK-LABEL: concat_vector_v8i16:
+; CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+ %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r
+}
+
+define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
+; CHECK-LABEL: concat_vector_v16i8:
+; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[0]
+ %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %r
+}