From 38348240d179131d9292c28c7540ced97b29ed8b Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 3 Dec 2013 07:38:30 +0000 Subject: Merging r196151: ------------------------------------------------------------------------ r196151 | mcrosier | 2013-12-02 13:05:16 -0800 (Mon, 02 Dec 2013) | 2 lines [AArch64] Implemented vcopy_lane patterns using scalar DUP instruction. Patch by Ana Pazos! ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196230 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrNEON.td | 125 +++++++++++++++++++++++++-------- 1 file changed, 97 insertions(+), 28 deletions(-) (limited to 'lib/Target/AArch64') diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 6a339c8b09..581ebae248 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -5731,28 +5731,13 @@ multiclass NeonI_Scalar_DUP_Elt_pattern; } -multiclass NeonI_SDUP { - def : Pat<(ResTy (GetLow VPR128:$Rn)), - (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>; - def : Pat<(ResTy (GetHigh VPR128:$Rn)), - (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>; -} - -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; - // Patterns for vector extract of FP data using scalar DUP instructions defm : NeonI_Scalar_DUP_Elt_pattern; defm : NeonI_Scalar_DUP_Elt_pattern; -multiclass NeonI_Scalar_DUP_Vec_pattern { @@ -5764,14 +5749,87 @@ multiclass NeonI_Scalar_DUP_Vec_pattern; } -// Patterns for extract subvectors of v1ix data using scalar DUP instructions -defm : NeonI_Scalar_DUP_Vec_pattern; -defm : NeonI_Scalar_DUP_Vec_pattern; -defm : NeonI_Scalar_DUP_Vec_pattern; +// Patterns for extract subvectors of v1ix data using scalar DUP instructions. +defm : NeonI_Scalar_DUP_Ext_Vec_pattern; +defm : NeonI_Scalar_DUP_Ext_Vec_pattern; +defm : NeonI_Scalar_DUP_Ext_Vec_pattern; + +multiclass NeonI_Scalar_DUP_Copy_pattern1 { + + def : Pat<(ResTy (vector_insert (ResTy undef), + (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), + (neon_uimm0_bare:$Imm))), + (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; + + def : Pat<(ResTy (vector_insert (ResTy undef), + (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), + (OpNImm:$Imm))), + (ResTy (DUPI + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + OpNImm:$Imm))>; +} + +multiclass NeonI_Scalar_DUP_Copy_pattern2 { + + def : Pat<(ResTy (scalar_to_vector + (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))), + (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; + + def : Pat<(ResTy (scalar_to_vector + (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))), + (ResTy (DUPI + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + OpNImm:$Imm))>; +} + +// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP +// instructions. +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; multiclass NeonI_Scalar_DUP_alias; defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>; defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>; +multiclass NeonI_SDUP { + def : Pat<(ResTy (GetLow VPR128:$Rn)), + (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>; + def : Pat<(ResTy (GetHigh VPR128:$Rn)), + (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>; +} + +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -7110,13 +7182,10 @@ def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), (FMOVdx $src)>; -def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))), - (v1f32 FPR32:$Rn)>; -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))), - (v1f64 FPR64:$Rn)>; - def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), (FMOVdd $src)>; +def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$src))), + (FMOVss $src)>; def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))), (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), -- cgit v1.2.3