diff options
author | Jiangning Liu <jiangning.liu@arm.com> | 2013-09-24 02:47:27 +0000 |
---|---|---|
committer | Jiangning Liu <jiangning.liu@arm.com> | 2013-09-24 02:47:27 +0000 |
commit | 477fc628b3c9ce1c970d4a678dd5607b15242cc8 (patch) | |
tree | 9f6708fbdd79d63957d03eaed153c19d44f4fb5e /lib | |
parent | 44e84417679db0f2dc7e93f8153ba7ef1812f5d3 (diff) | |
download | llvm-477fc628b3c9ce1c970d4a678dd5607b15242cc8.tar.gz llvm-477fc628b3c9ce1c970d4a678dd5607b15242cc8.tar.bz2 llvm-477fc628b3c9ce1c970d4a678dd5607b15242cc8.tar.xz |
Initial support for Neon scalar instructions.
Patch by Ana Pazos.
1.Added support for v1ix and v1fx types.
2.Added Scalar Pairwise Reduce instructions.
3.Added initial implementation of Scalar Arithmetic instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191263 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 3 | ||||
-rw-r--r-- | lib/IR/Function.cpp | 7 | ||||
-rw-r--r-- | lib/IR/ValueTypes.cpp | 6 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64CallingConv.td | 9 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 11 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrFormats.td | 21 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrInfo.td | 16 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 253 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64RegisterInfo.td | 8 |
9 files changed, 285 insertions, 49 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 5df36dd819..50bd6c7c52 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -919,7 +919,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { // type does not have a strange size (eg: it is not i1). EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && + "Legal vector of one illegal element?"); // Promote the inserted value. The type does not need to match the // vector element type. Check that any extra bits introduced will be diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index a64a4fa64f..f4bf774efe 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -453,7 +453,8 @@ enum IIT_Info { IIT_STRUCT5 = 22, IIT_EXTEND_VEC_ARG = 23, IIT_TRUNC_VEC_ARG = 24, - IIT_ANYPTR = 25 + IIT_ANYPTR = 25, + IIT_V1 = 26 }; @@ -497,6 +498,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos, case IIT_I64: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64)); return; + case IIT_V1: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1)); + DecodeIITType(NextElt, Infos, OutputTable); + return; case IIT_V2: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2)); DecodeIITType(NextElt, Infos, OutputTable); diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp index 5aa4d06ffe..3740050c13 100644 --- a/lib/IR/ValueTypes.cpp +++ b/lib/IR/ValueTypes.cpp @@ -134,6 +134,7 @@ std::string EVT::getEVTString() const { case MVT::v16i1: return "v16i1"; case MVT::v32i1: return "v32i1"; case MVT::v64i1: return "v64i1"; + case MVT::v1i8: return "v1i8"; case MVT::v2i8: return "v2i8"; case MVT::v4i8: return "v4i8"; case MVT::v8i8: return "v8i8"; @@ -156,12 +157,14 @@ std::string EVT::getEVTString() const { case MVT::v4i64: return "v4i64"; case MVT::v8i64: return "v8i64"; case MVT::v16i64: return "v16i64"; + case MVT::v1f32: return "v1f32"; case MVT::v2f32: return "v2f32"; case MVT::v2f16: return "v2f16"; case MVT::v8f16: return "v8f16"; case MVT::v4f32: return "v4f32"; case MVT::v8f32: return "v8f32"; case MVT::v16f32: return "v16f32"; + case MVT::v1f64: return "v1f64"; case MVT::v2f64: return "v2f64"; case MVT::v4f64: return "v4f64"; case MVT::v8f64: return "v8f64"; @@ -198,6 +201,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16); case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32); case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64); + case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1); case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2); case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4); case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8); @@ -222,10 +226,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16); case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); + case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8); case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16); + case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1); case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td index bff7eebe00..a2a9f3f674 100644 --- a/lib/Target/AArch64/AArch64CallingConv.td +++ b/lib/Target/AArch64/AArch64CallingConv.td @@ -59,9 +59,9 @@ def CC_A64_APCS : CallingConv<[ // Canonicalise the various types that live in different floating-point // registers. This makes sense because the PCS does not distinguish Short // Vectors and Floating-point types. - CCIfType<[v2i8], CCBitConvertToType<f16>>, - CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>, - CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType<f64>>, + CCIfType<[v1i16, v2i8], CCBitConvertToType<f16>>, + CCIfType<[v1i32, v4i8, v2i16, v1f32], CCBitConvertToType<f32>>, + CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType<f64>>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCBitConvertToType<f128>>, @@ -70,7 +70,8 @@ def CC_A64_APCS : CallingConv<[ // argument is allocated to the least significant bits of register // v[NSRN]. The NSRN is incremented by one. The argument has now been // allocated." - CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 8597f073fc..48f34c00b3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -57,6 +57,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) if (Subtarget->hasNEON()) { // And the vectors + addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass); + addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass); + addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass); addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass); addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass); @@ -274,16 +280,21 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setExceptionSelectorRegister(AArch64::X1); if (Subtarget->hasNEON()) { + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 735670bf0a..4f48712b35 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1074,8 +1074,7 @@ class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode, class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode, dag outs, dag ins, string asmstr, list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> -{ + : A64InstRdn<outs, ins, asmstr, patterns, itin> { bits<7> Imm; let Inst{31} = 0b0; let Inst{30} = q; @@ -1129,5 +1128,23 @@ class NeonI_insert<bit q, bit op, // Inherit Rd in 4-0 } +// Format AdvSIMD scalar pairwise +class NeonI_ScalarPair<bit u, bits<2> size, bits<5> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + let Inst{31} = 0b0; + let Inst{30} = 0b1; + let Inst{29} = u; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b11000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index fef3019ef8..233279954a 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2189,22 +2189,22 @@ def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; // Extra patterns for when we're allowed to optimise separate multiplication and // addition. let Predicates = [UseFusedMAC] in { -def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)), +def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)), +def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra), +def : Pat<(f32 (fsub (f32 (fmul FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)), (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)), +def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fadd FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), +def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), +def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub (fmul (f64 FPR64:$Rn), FPR64:$Rm), FPR64:$Ra), +def : Pat<(f64 (fsub (f64 (fmul FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)), (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub (fneg (f64 FPR64:$Ra)), (fmul FPR64:$Rn, FPR64:$Rm)), +def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 5506affc07..4bd5a67ffc 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -2504,11 +2504,12 @@ defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>; multiclass NeonI_Op_High<SDPatternOperator op> { def _16B : PatFrag<(ops node:$Rn, node:$Rm), - (op (Neon_top16B node:$Rn), (Neon_top16B node:$Rm))>; + (op (v8i8 (Neon_top16B node:$Rn)), (v8i8 (Neon_top16B node:$Rm)))>; def _8H : PatFrag<(ops node:$Rn, node:$Rm), - (op (Neon_top8H node:$Rn), (Neon_top8H node:$Rm))>; + (op (v4i16 (Neon_top8H node:$Rn)), (v4i16 (Neon_top8H node:$Rm)))>; def _4S : PatFrag<(ops node:$Rn, node:$Rm), - (op (Neon_top4S node:$Rn), (Neon_top4S node:$Rm))>; + (op (v2i32 (Neon_top4S node:$Rn)), (v2i32 (Neon_top4S node:$Rm)))>; + } defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>; @@ -2868,9 +2869,25 @@ multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode, } } -class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD> - : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), - (INSTD VPR64:$Rn, VPR64:$Rm)>; +multiclass Neon_Scalar_D_size_patterns<SDPatternOperator opnode, + Instruction INSTD> { + def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (INSTD FPR64:$Rn, FPR64:$Rm)>; +} + +multiclass Neon_Scalar_BHSD_size_patterns<SDPatternOperator opnode, + Instruction INSTB, Instruction INSTH, + Instruction INSTS, Instruction INSTD> + : Neon_Scalar_D_size_patterns<opnode, INSTD> { + def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), + (INSTB FPR8:$Rn, FPR8:$Rm)>; + + def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), + (INSTH FPR16:$Rn, FPR16:$Rm)>; + + def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; +} // Scalar Integer Add let isCommutable = 1 in { @@ -2880,9 +2897,15 @@ def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; // Scalar Integer Sub def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">; -// Pattern for Scalar Integer Add and Sub with D register -def : Neon_Scalar_D_size_patterns<add, ADDddd>; -def : Neon_Scalar_D_size_patterns<sub, SUBddd>; +// Pattern for Scalar Integer Add and Sub with D register only +defm : Neon_Scalar_D_size_patterns<add, ADDddd>; +defm : Neon_Scalar_D_size_patterns<sub, SUBddd>; + +// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub +defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>; +defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>; +defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>; +defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>; // Scalar Integer Saturating Add (Signed, Unsigned) defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>; @@ -2892,40 +2915,160 @@ defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>; defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>; defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; -// Patterns for Scalar Integer Saturating Add, Sub with D register only -def : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>; -def : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>; -def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>; -def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>; +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Add, Sub (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>; +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>; +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>; +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Add, Sub (Signed, Unsigned) +defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb, SQADDhhh, + SQADDsss, SQADDddd>; +defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb, UQADDhhh, + UQADDsss, UQADDddd>; +defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb, SQSUBhhh, + SQSUBsss, SQSUBddd>; +defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb, UQSUBhhh, + UQSUBsss, UQSUBddd>; // Scalar Integer Shift Left (Signed, Unsigned) def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">; +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>; +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>; +defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>; + // Scalar Integer Saturating Shift Left (Signed, Unsigned) defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>; defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>; -// Scalar Integer Rouding Shift Left (Signed, Unsigned) +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Shift Letf (Signed, Unsigned) +defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb, SQSHLhhh, + SQSHLsss, SQSHLddd>; +defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb, UQSHLhhh, + UQSHLsss, UQSHLddd>; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Shift Letf (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>; +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>; + +// Scalar Integer Rounding Shift Left (Signed, Unsigned) def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">; def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">; +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>; +defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>; +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>; + // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>; defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>; -// Patterns for Scalar Integer Shift Lef, Saturating Shift Left, -// Rounding Shift Left, Rounding Saturating Shift Left with D register only -def : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>; -def : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>; -def : Neon_Scalar_D_size_patterns<shl, SSHLddd>; -def : Neon_Scalar_D_size_patterns<shl, USHLddd>; -def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>; -def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>; -def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>; -def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>; -def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>; -def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>; +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb, SQRSHLhhh, + SQRSHLsss, SQRSHLddd>; +defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb, UQRSHLhhh, + UQRSHLsss, UQRSHLddd>; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>; +defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>; + +// Scalar Reduce Pairwise + +multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode, + string asmop, bit Commutable = 0> { + let isCommutable = Commutable in { + def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode, + (outs FPR64:$Rd), (ins VPR128:$Rn), + !strconcat(asmop, " $Rd, $Rn.2d"), + [], + NoItinerary>; + } +} + +multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode, + string asmop, bit Commutable = 0> + : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> { + let isCommutable = Commutable in { + def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode, + (outs FPR32:$Rd), (ins VPR64:$Rn), + !strconcat(asmop, " $Rd, $Rn.2s"), + [], + NoItinerary>; + } +} + +// Scalar Reduce Addition Pairwise (Integer) with +// Pattern to match llvm.arm.* intrinsic +defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>; + +// Pattern to match llvm.aarch64.* intrinsic for +// Scalar Reduce Addition Pairwise (Integer) +def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))), + (ADDPvv_D_2D VPR128:$Rn)>; + +// Scalar Reduce Addition Pairwise (Floating Point) +defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>; + +// Scalar Reduce Maximum Pairwise (Floating Point) +defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>; + +// Scalar Reduce Minimum Pairwise (Floating Point) +defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>; + +// Scalar Reduce maxNum Pairwise (Floating Point) +defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>; + +// Scalar Reduce minNum Pairwise (Floating Point) +defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>; + +multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS, + SDPatternOperator opnodeD, + Instruction INSTS, + Instruction INSTD> { + def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))), + (INSTS VPR64:$Rn)>; + def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))), + (INSTD VPR128:$Rn)>; +} + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point) +defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd, + int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>; + +defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax, + int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>; + +defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin, + int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>; + +defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm, + int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>; + +defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm, + int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>; + //===----------------------------------------------------------------------===// @@ -2999,6 +3142,14 @@ def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>; // ...and scalar bitcasts... +def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>; +def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>; +def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>; +def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; + +def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>; +def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>; def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; @@ -3017,6 +3168,15 @@ def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>; def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>; def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>; +def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>; +def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; + +def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; +def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>; + def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; @@ -3349,8 +3509,6 @@ def UMOVwh_pattern : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare, neon_uimm2_bare, UMOVwh>; def UMOVws_pattern : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare, neon_uimm1_bare, UMOVws>; -def UMOVxd_pattern : Neon_UMOV_pattern<v2i64, v1i64, i64, neon_uimm1_bare, - neon_uimm0_bare, UMOVxd>; def : Pat<(i32 (and (i32 (vector_extract @@ -3389,3 +3547,40 @@ def : Pat<(i64 (zext (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), neon_uimm0_bare:$Imm)>; +// Additional copy patterns for scalar types +def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))), + (UMOVwb (v16i8 + (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>; + +def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))), + (UMOVwh (v8i16 + (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>; + +def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))), + (FMOVws FPR32:$Rn)>; + +def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))), + (FMOVxd FPR64:$Rn)>; + +def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))), + (f64 FPR64:$Rn)>; + +def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))), + (f32 FPR32:$Rn)>; + +def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)), + (v1i8 (EXTRACT_SUBREG (v16i8 + (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_8))>; + +def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)), + (v1i16 (EXTRACT_SUBREG (v8i16 + (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_16))>; + +def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), + (FMOVsw $src)>; + +def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), + (FMOVdx $src)>; + diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index e0eca23c64..089cc086e9 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -133,19 +133,19 @@ foreach Index = 0-31 in { } -def FPR8 : RegisterClass<"AArch64", [i8], 8, +def FPR8 : RegisterClass<"AArch64", [i8, v1i8], 8, (sequence "B%u", 0, 31)> { } -def FPR16 : RegisterClass<"AArch64", [f16], 16, +def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16, (sequence "H%u", 0, 31)> { } -def FPR32 : RegisterClass<"AArch64", [f32], 32, +def FPR32 : RegisterClass<"AArch64", [f32, v1i32, v1f32], 32, (sequence "S%u", 0, 31)> { } -def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64], +def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64], 64, (sequence "D%u", 0, 31)>; def FPR128 : RegisterClass<"AArch64", |