summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAna Pazos <apazos@codeaurora.org>2013-11-21 08:16:15 +0000
committerAna Pazos <apazos@codeaurora.org>2013-11-21 08:16:15 +0000
commit6345249972b73a400e31c0618c4d67411a0387e6 (patch)
treed90029e3605e0c6d8202614f0c82e1ac2d453acb
parent1c93766aa547f9cf049f8daa0e0049a8921b53d9 (diff)
downloadllvm-6345249972b73a400e31c0618c4d67411a0387e6.tar.gz
llvm-6345249972b73a400e31c0618c4d67411a0387e6.tar.bz2
llvm-6345249972b73a400e31c0618c4d67411a0387e6.tar.xz
Implemented Neon scalar vdup_lane intrinsics.
Fixed scalar dup alias and added test case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195330 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td25
-rw-r--r--test/CodeGen/AArch64/neon-scalar-copy.ll80
-rw-r--r--test/MC/AArch64/neon-scalar-dup.s26
3 files changed, 129 insertions, 2 deletions
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 5b6168eb08..c0c572a62e 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -5883,16 +5883,37 @@ defm : NeonI_SDUP<Neon_low2D, Neon_High2D, v1i64, v2i64>;
defm : NeonI_SDUP<Neon_low4f, Neon_High4f, v2f32, v4f32>;
defm : NeonI_SDUP<Neon_low2d, Neon_High2d, v1f64, v2f64>;
-// Patterns for vector extract of FP data using scalar DUP instructions
+// Patterns for vector extract of FP data using scalar DUP instructions
defm : NeonI_Scalar_DUP_Elt_pattern<DUPsv_S, f32,
v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
defm : NeonI_Scalar_DUP_Elt_pattern<DUPdv_D, f64,
v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
+multiclass NeonI_Scalar_DUP_Vec_pattern<Instruction DUPI,
+ ValueType ResTy, ValueType OpTy,Operand OpLImm,
+ ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
+
+ def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)),
+ (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>;
+
+ def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)),
+ (ResTy (DUPI
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ OpNImm:$Imm))>;
+}
+// Patterns for extract subvectors of v1ix data using scalar DUP instructions
+defm : NeonI_Scalar_DUP_Vec_pattern<DUPbv_B,
+ v1i8, v16i8, neon_uimm4_bare, v8i8, v16i8, neon_uimm3_bare>;
+defm : NeonI_Scalar_DUP_Vec_pattern<DUPhv_H,
+ v1i16, v8i16, neon_uimm3_bare, v4i16, v8i16, neon_uimm2_bare>;
+defm : NeonI_Scalar_DUP_Vec_pattern<DUPsv_S,
+ v1i32, v4i32, neon_uimm2_bare, v2i32, v4i32, neon_uimm1_bare>;
+
+
multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
Instruction DUPI, Operand OpImm,
RegisterClass ResRC> {
- def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn." # asmlane # "[$Imm]"),
+ def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"),
(DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
}
diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll
new file mode 100644
index 0000000000..59f62374d4
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-copy.ll
@@ -0,0 +1,80 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define float @test_dup_sv2S(<2 x float> %v) {
+ ;CHECK: test_dup_sv2S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+ %tmp1 = extractelement <2 x float> %v, i32 1
+ ret float %tmp1
+}
+
+define float @test_dup_sv4S(<4 x float> %v) {
+ ;CHECK: test_dup_sv4S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[0]
+ %tmp1 = extractelement <4 x float> %v, i32 0
+ ret float %tmp1
+}
+
+define double @test_dup_dvD(<1 x double> %v) {
+ ;CHECK: test_dup_dvD
+ ;CHECK-NOT: dup {{d[0-31]+}}, {{v[0-31]+}}.d[0]
+ ;CHECK: ret
+ %tmp1 = extractelement <1 x double> %v, i32 0
+ ret double %tmp1
+}
+
+define double @test_dup_dv2D(<2 x double> %v) {
+ ;CHECK: test_dup_dv2D
+ ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+ %tmp1 = extractelement <2 x double> %v, i32 1
+ ret double %tmp1
+}
+
+define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) {
+ ;CHECK: test_vector_dup_bv16B
+ ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[14]
+ %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> <i32 14>
+ ret <1 x i8> %shuffle.i
+}
+
+define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) {
+ ;CHECK: test_vector_dup_bv8B
+ ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[7]
+ %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> <i32 7>
+ ret <1 x i8> %shuffle.i
+}
+
+define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) {
+ ;CHECK: test_vector_dup_hv8H
+ ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[7]
+ %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> <i32 7>
+ ret <1 x i16> %shuffle.i
+}
+
+define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) {
+ ;CHECK: test_vector_dup_hv4H
+ ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[3]
+ %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> <i32 3>
+ ret <1 x i16> %shuffle.i
+}
+
+define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) {
+ ;CHECK: test_vector_dup_sv4S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[3]
+ %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> <i32 3>
+ ret <1 x i32> %shuffle
+}
+
+define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) {
+ ;CHECK: test_vector_dup_sv2S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+ %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> <i32 1>
+ ret <1 x i32> %shuffle
+}
+
+define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) {
+ ;CHECK: test_vector_dup_dv2D
+ ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+ %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> <i32 1>
+ ret <1 x i64> %shuffle.i
+}
+
diff --git a/test/MC/AArch64/neon-scalar-dup.s b/test/MC/AArch64/neon-scalar-dup.s
index 64366f2edc..77c638df09 100644
--- a/test/MC/AArch64/neon-scalar-dup.s
+++ b/test/MC/AArch64/neon-scalar-dup.s
@@ -27,3 +27,29 @@
// CHECK: dup d3, v5.d[0] // encoding: [0xa3,0x04,0x08,0x5e]
// CHECK: dup d6, v5.d[1] // encoding: [0xa6,0x04,0x18,0x5e]
+//------------------------------------------------------------------------------
+// Aliases for Duplicate element (scalar)
+//------------------------------------------------------------------------------
+ mov b0, v0.b[15]
+ mov b1, v0.b[7]
+ mov b17, v0.b[0]
+ mov h5, v31.h[7]
+ mov h9, v1.h[4]
+ mov h11, v17.h[0]
+ mov s2, v2.s[3]
+ mov s4, v21.s[0]
+ mov s31, v21.s[2]
+ mov d3, v5.d[0]
+ mov d6, v5.d[1]
+
+// CHECK: dup b0, v0.b[15] // encoding: [0x00,0x04,0x1f,0x5e]
+// CHECK: dup b1, v0.b[7] // encoding: [0x01,0x04,0x0f,0x5e]
+// CHECK: dup b17, v0.b[0] // encoding: [0x11,0x04,0x01,0x5e]
+// CHECK: dup h5, v31.h[7] // encoding: [0xe5,0x07,0x1e,0x5e]
+// CHECK: dup h9, v1.h[4] // encoding: [0x29,0x04,0x12,0x5e]
+// CHECK: dup h11, v17.h[0] // encoding: [0x2b,0x06,0x02,0x5e]
+// CHECK: dup s2, v2.s[3] // encoding: [0x42,0x04,0x1c,0x5e]
+// CHECK: dup s4, v21.s[0] // encoding: [0xa4,0x06,0x04,0x5e]
+// CHECK: dup s31, v21.s[2] // encoding: [0xbf,0x06,0x14,0x5e]
+// CHECK: dup d3, v5.d[0] // encoding: [0xa3,0x04,0x08,0x5e]
+// CHECK: dup d6, v5.d[1] // encoding: [0xa6,0x04,0x18,0x5e]