summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChad Rosier <mcrosier@codeaurora.org>2013-12-02 21:05:16 +0000
committerChad Rosier <mcrosier@codeaurora.org>2013-12-02 21:05:16 +0000
commitd4809bb0e389787a1682d45e01290a3da8d631e3 (patch)
treeb6a96a88c1d01d17aff0c8100a45100fd6253132 /lib
parent6e2cf928d0bac91866463f25247625061dfebf34 (diff)
downloadllvm-d4809bb0e389787a1682d45e01290a3da8d631e3.tar.gz
llvm-d4809bb0e389787a1682d45e01290a3da8d631e3.tar.bz2
llvm-d4809bb0e389787a1682d45e01290a3da8d631e3.tar.xz
[AArch64] Implemented vcopy_lane patterns using scalar DUP instruction.
Patch by Ana Pazos! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196151 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td125
1 files changed, 97 insertions, 28 deletions
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 6a339c8b09..581ebae248 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -5731,28 +5731,13 @@ multiclass NeonI_Scalar_DUP_Elt_pattern<Instruction DUPI, ValueType ResTy,
OpNImm:$Imm))>;
}
-multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh,
- ValueType ResTy, ValueType OpTy> {
- def : Pat<(ResTy (GetLow VPR128:$Rn)),
- (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
- def : Pat<(ResTy (GetHigh VPR128:$Rn)),
- (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
-}
-
-defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
-defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
-defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
-defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
-defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
-defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
-
// Patterns for vector extract of FP data using scalar DUP instructions
defm : NeonI_Scalar_DUP_Elt_pattern<DUPsv_S, f32,
v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
defm : NeonI_Scalar_DUP_Elt_pattern<DUPdv_D, f64,
v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
-multiclass NeonI_Scalar_DUP_Vec_pattern<Instruction DUPI,
+multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI,
ValueType ResTy, ValueType OpTy,Operand OpLImm,
ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
@@ -5764,14 +5749,87 @@ multiclass NeonI_Scalar_DUP_Vec_pattern<Instruction DUPI,
(ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
OpNImm:$Imm))>;
}
-// Patterns for extract subvectors of v1ix data using scalar DUP instructions
-defm : NeonI_Scalar_DUP_Vec_pattern<DUPbv_B,
- v1i8, v16i8, neon_uimm4_bare, v8i8, v16i8, neon_uimm3_bare>;
-defm : NeonI_Scalar_DUP_Vec_pattern<DUPhv_H,
- v1i16, v8i16, neon_uimm3_bare, v4i16, v8i16, neon_uimm2_bare>;
-defm : NeonI_Scalar_DUP_Vec_pattern<DUPsv_S,
- v1i32, v4i32, neon_uimm2_bare, v2i32, v4i32, neon_uimm1_bare>;
+// Patterns for extract subvectors of v1ix data using scalar DUP instructions.
+defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare,
+ v8i8, v16i8, neon_uimm3_bare>;
+defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare,
+ v4i16, v8i16, neon_uimm2_bare>;
+defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare,
+ v2i32, v4i32, neon_uimm1_bare>;
+
+multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy,
+ ValueType OpTy, ValueType ElemTy,
+ Operand OpImm, ValueType OpNTy,
+ ValueType ExTy, Operand OpNImm> {
+
+ def : Pat<(ResTy (vector_insert (ResTy undef),
+ (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
+ (neon_uimm0_bare:$Imm))),
+ (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (vector_insert (ResTy undef),
+ (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
+ (OpNImm:$Imm))),
+ (ResTy (DUPI
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy,
+ ValueType OpTy, ValueType ElemTy,
+ Operand OpImm, ValueType OpNTy,
+ ValueType ExTy, Operand OpNImm> {
+
+ def : Pat<(ResTy (scalar_to_vector
+ (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))),
+ (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (scalar_to_vector
+ (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))),
+ (ResTy (DUPI
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP
+// instructions.
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
+ v1i64, v2i64, i64, neon_uimm1_bare,
+ v1i64, v2i64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
+ v1i32, v4i32, i32, neon_uimm2_bare,
+ v2i32, v4i32, neon_uimm1_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H,
+ v1i16, v8i16, i32, neon_uimm3_bare,
+ v4i16, v8i16, neon_uimm2_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B,
+ v1i8, v16i8, i32, neon_uimm4_bare,
+ v8i8, v16i8, neon_uimm3_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
+ v1f64, v2f64, f64, neon_uimm1_bare,
+ v1f64, v2f64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
+ v1f32, v4f32, f32, neon_uimm2_bare,
+ v2f32, v4f32, neon_uimm1_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
+ v1i64, v2i64, i64, neon_uimm1_bare,
+ v1i64, v2i64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
+ v1i32, v4i32, i32, neon_uimm2_bare,
+ v2i32, v4i32, neon_uimm1_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H,
+ v1i16, v8i16, i32, neon_uimm3_bare,
+ v4i16, v8i16, neon_uimm2_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B,
+ v1i8, v16i8, i32, neon_uimm4_bare,
+ v8i8, v16i8, neon_uimm3_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
+ v1f64, v2f64, f64, neon_uimm1_bare,
+ v1f64, v2f64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
+ v1f32, v4f32, f32, neon_uimm2_bare,
+ v2f32, v4f32, neon_uimm1_bare>;
multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
Instruction DUPI, Operand OpImm,
@@ -5788,6 +5846,20 @@ defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
+multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy,
+ ValueType OpTy> {
+ def : Pat<(ResTy (GetLow VPR128:$Rn)),
+ (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
+ def : Pat<(ResTy (GetHigh VPR128:$Rn)),
+ (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
+}
+
+defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
+defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
+defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
+defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
+defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
+defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -7110,13 +7182,10 @@ def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
(FMOVdx $src)>;
-def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
- (v1f32 FPR32:$Rn)>;
-def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
- (v1f64 FPR64:$Rn)>;
-
def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
(FMOVdd $src)>;
+def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$src))),
+ (FMOVss $src)>;
def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),