diff options
author | Tim Northover <tnorthover@apple.com> | 2014-04-18 14:54:53 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2014-04-18 14:54:53 +0000 |
commit | 7b4b26161160d93a0b1bd16962c66b5b626fa696 (patch) | |
tree | 95d5a29ff8dc638a8d9dc791cf0c8142eeb28bab /test | |
parent | d781e40b6a3fe4a0dfcf9b1e4100c8664b2f3918 (diff) | |
download | llvm-7b4b26161160d93a0b1bd16962c66b5b626fa696.tar.gz llvm-7b4b26161160d93a0b1bd16962c66b5b626fa696.tar.bz2 llvm-7b4b26161160d93a0b1bd16962c66b5b626fa696.tar.xz |
AArch64/ARM64: add more NEON tests.
Mostly no testing this time, since they were just wrangling
target-specific intrinsics.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206613 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/AArch64/neon-shift-left-long.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-shift.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-simd-ldst-one.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-simd-ldst.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll | 482 |
6 files changed, 489 insertions, 0 deletions
diff --git a/test/CodeGen/AArch64/neon-shift-left-long.ll b/test/CodeGen/AArch64/neon-shift-left-long.ll index d10d551805..d16b131559 100644 --- a/test/CodeGen/AArch64/neon-shift-left-long.ll +++ b/test/CodeGen/AArch64/neon-shift-left-long.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) { ; CHECK: test_sshll_v8i8: diff --git a/test/CodeGen/AArch64/neon-shift.ll b/test/CodeGen/AArch64/neon-shift.ll index 33b04ceb48..088200d972 100644 --- a/test/CodeGen/AArch64/neon-shift.ll +++ b/test/CodeGen/AArch64/neon-shift.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; arm64 already has these tests: pure intrinsics & trivial shifts. declare <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8>, <8 x i8>) diff --git a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll index d5557c0c85..34056c7d56 100644 --- a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll +++ b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll @@ -1,5 +1,8 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; arm64 already has these. Essentially just a copy/paste from Clang output from +; arm_neon.h + define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) { ; CHECK-LABEL: test_ldst1_v16i8: ; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}] diff --git a/test/CodeGen/AArch64/neon-simd-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-ldst-one.ll index 927c93301b..0ec5876325 100644 --- a/test/CodeGen/AArch64/neon-simd-ldst-one.ll +++ b/test/CodeGen/AArch64/neon-simd-ldst-one.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; interesting parts copied into arm64 directory as aarch64-neon-simd-ldst-one.ll %struct.uint8x16x2_t = type { [2 x <16 x i8>] } %struct.poly8x16x2_t = type { [2 x <16 x i8>] } diff --git a/test/CodeGen/AArch64/neon-simd-ldst.ll b/test/CodeGen/AArch64/neon-simd-ldst.ll index afc0901bbc..cb0a2d237c 100644 --- a/test/CodeGen/AArch64/neon-simd-ldst.ll +++ b/test/CodeGen/AArch64/neon-simd-ldst.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; Just intrinsic mashing. Duplicates existing arm64 tests. define void @test_ldstq_4v(i8* noalias %io, i32 %count) { ; CHECK-LABEL: test_ldstq_4v diff --git a/test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll b/test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll new file mode 100644 index 0000000000..cca6bfef73 --- /dev/null +++ b/test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll @@ -0,0 +1,482 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s + + +%struct.uint8x16x2_t = type { [2 x <16 x i8>] } +%struct.poly8x16x2_t = type { [2 x <16 x i8>] } +%struct.uint8x16x3_t = type { [3 x <16 x i8>] } +%struct.int8x16x2_t = type { [2 x <16 x i8>] } +%struct.int16x8x2_t = type { [2 x <8 x i16>] } +%struct.int32x4x2_t = type { [2 x <4 x i32>] } +%struct.int64x2x2_t = type { [2 x <2 x i64>] } +%struct.float32x4x2_t = type { [2 x <4 x float>] } +%struct.float64x2x2_t = type { [2 x <2 x double>] } +%struct.int8x8x2_t = type { [2 x <8 x i8>] } +%struct.int16x4x2_t = type { [2 x <4 x i16>] } +%struct.int32x2x2_t = type { [2 x <2 x i32>] } +%struct.int64x1x2_t = type { [2 x <1 x i64>] } +%struct.float32x2x2_t = type { [2 x <2 x float>] } +%struct.float64x1x2_t = type { [2 x <1 x double>] } +%struct.int8x16x3_t = type { [3 x <16 x i8>] } +%struct.int16x8x3_t = type { [3 x <8 x i16>] } +%struct.int32x4x3_t = type { [3 x <4 x i32>] } +%struct.int64x2x3_t = type { [3 x <2 x i64>] } +%struct.float32x4x3_t = type { [3 x <4 x float>] } +%struct.float64x2x3_t = type { [3 x <2 x double>] } +%struct.int8x8x3_t = type { [3 x <8 x i8>] } +%struct.int16x4x3_t = type { [3 x <4 x i16>] } +%struct.int32x2x3_t = type { [3 x <2 x i32>] } +%struct.int64x1x3_t = type { [3 x <1 x i64>] } +%struct.float32x2x3_t = type { [3 x <2 x float>] } +%struct.float64x1x3_t = type { [3 x <1 x double>] } +%struct.int8x16x4_t = type { [4 x <16 x i8>] } +%struct.int16x8x4_t = type { [4 x <8 x i16>] } +%struct.int32x4x4_t = type { [4 x <4 x i32>] } +%struct.int64x2x4_t = type { [4 x <2 x i64>] } +%struct.float32x4x4_t = type { [4 x <4 x float>] } +%struct.float64x2x4_t = type { [4 x <2 x double>] } +%struct.int8x8x4_t = type { [4 x <8 x i8>] } +%struct.int16x4x4_t = type { [4 x <4 x i16>] } +%struct.int32x2x4_t = type { [4 x <2 x i32>] } +%struct.int64x1x4_t = type { [4 x <1 x i64>] } +%struct.float32x2x4_t = type { [4 x <2 x float>] } +%struct.float64x1x4_t = type { [4 x <1 x double>] } + +define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) { +; CHECK-LABEL: test_ld_from_poll_v16i8: +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <16 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 2, i8 13, i8 14, i8 15, i8 16> + ret <16 x i8> %b +} + +define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) { +; CHECK-LABEL: test_ld_from_poll_v8i16: +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <8 x i16> %a, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> + ret <8 x i16> %b +} + +define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) { +; CHECK-LABEL: test_ld_from_poll_v4i32: +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <4 x i32> %a, <i32 1, i32 2, i32 3, i32 4> + ret <4 x i32> %b +} + +define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) { +; CHECK-LABEL: test_ld_from_poll_v2i64: +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <2 x i64> %a, <i64 1, i64 2> + ret <2 x i64> %b +} + +define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) { +; CHECK-LABEL: test_ld_from_poll_v4f32: +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = fadd <4 x float> %a, <float 1.0, float 2.0, float 3.0, float 4.0> + ret <4 x float> %b +} + +define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_ld_from_poll_v2f64: +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = fadd <2 x double> %a, <double 1.0, double 2.0> + ret <2 x double> %b +} + +define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) { +; CHECK-LABEL: test_ld_from_poll_v8i8: +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <8 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8> + ret <8 x i8> %b +} + +define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) { +; CHECK-LABEL: test_ld_from_poll_v4i16: +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <4 x i16> %a, <i16 1, i16 2, i16 3, i16 4> + ret <4 x i16> %b +} + +define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) { +; CHECK-LABEL: test_ld_from_poll_v2i32: +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <2 x i32> %a, <i32 1, i32 2> + ret <2 x i32> %b +} + +define <16 x i8> @test_vld1q_dup_s8(i8* %a) { +; CHECK-LABEL: test_vld1q_dup_s8: +; CHECK: ld1r {{{ ?v[0-9]+.16b ?}}}, [x0] +entry: + %0 = load i8* %a, align 1 + %1 = insertelement <16 x i8> undef, i8 %0, i32 0 + %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %lane +} + +define <8 x i16> @test_vld1q_dup_s16(i16* %a) { +; CHECK-LABEL: test_vld1q_dup_s16: +; CHECK: ld1r {{{ ?v[0-9]+.8h ?}}}, [x0] +entry: + %0 = load i16* %a, align 2 + %1 = insertelement <8 x i16> undef, i16 %0, i32 0 + %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %lane +} + +define <4 x i32> @test_vld1q_dup_s32(i32* %a) { +; CHECK-LABEL: test_vld1q_dup_s32: +; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0] +entry: + %0 = load i32* %a, align 4 + %1 = insertelement <4 x i32> undef, i32 %0, i32 0 + %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %lane +} + +define <2 x i64> @test_vld1q_dup_s64(i64* %a) { +; CHECK-LABEL: test_vld1q_dup_s64: +; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0] +entry: + %0 = load i64* %a, align 8 + %1 = insertelement <2 x i64> undef, i64 %0, i32 0 + %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %lane +} + +define <4 x float> @test_vld1q_dup_f32(float* %a) { +; CHECK-LABEL: test_vld1q_dup_f32: +; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0] +entry: + %0 = load float* %a, align 4 + %1 = insertelement <4 x float> undef, float %0, i32 0 + %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %lane +} + +define <2 x double> @test_vld1q_dup_f64(double* %a) { +; CHECK-LABEL: test_vld1q_dup_f64: +; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0] +entry: + %0 = load double* %a, align 8 + %1 = insertelement <2 x double> undef, double %0, i32 0 + %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer + ret <2 x double> %lane +} + +define <8 x i8> @test_vld1_dup_s8(i8* %a) { +; CHECK-LABEL: test_vld1_dup_s8: +; CHECK: ld1r {{{ ?v[0-9]+.8b ?}}}, [x0] +entry: + %0 = load i8* %a, align 1 + %1 = insertelement <8 x i8> undef, i8 %0, i32 0 + %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer + ret <8 x i8> %lane +} + +define <4 x i16> @test_vld1_dup_s16(i16* %a) { +; CHECK-LABEL: test_vld1_dup_s16: +; CHECK: ld1r {{{ ?v[0-9]+.4h ?}}}, [x0] +entry: + %0 = load i16* %a, align 2 + %1 = insertelement <4 x i16> undef, i16 %0, i32 0 + %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer + ret <4 x i16> %lane +} + +define <2 x i32> @test_vld1_dup_s32(i32* %a) { +; CHECK-LABEL: test_vld1_dup_s32: +; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0] +entry: + %0 = load i32* %a, align 4 + %1 = insertelement <2 x i32> undef, i32 %0, i32 0 + %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer + ret <2 x i32> %lane +} + +define <1 x i64> @test_vld1_dup_s64(i64* %a) { +; CHECK-LABEL: test_vld1_dup_s64: +; CHECK: ldr {{d[0-9]+}}, [x0] +entry: + %0 = load i64* %a, align 8 + %1 = insertelement <1 x i64> undef, i64 %0, i32 0 + ret <1 x i64> %1 +} + +define <2 x float> @test_vld1_dup_f32(float* %a) { +; CHECK-LABEL: test_vld1_dup_f32: +; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0] +entry: + %0 = load float* %a, align 4 + %1 = insertelement <2 x float> undef, float %0, i32 0 + %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer + ret <2 x float> %lane +} + +define <1 x double> @test_vld1_dup_f64(double* %a) { +; CHECK-LABEL: test_vld1_dup_f64: +; CHECK: ldr {{d[0-9]+}}, [x0] +entry: + %0 = load double* %a, align 8 + %1 = insertelement <1 x double> undef, double %0, i32 0 + ret <1 x double> %1 +} + +define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 { +; As there is a store operation depending on %1, LD1R pattern can't be selected. +; So LDR and FMOV should be emitted. +; CHECK-LABEL: testDUP.v1i64: +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK-DAG: fmov {{d[0-9]+}}, {{x[0-9]+}} +; CHECK-DAG: str {{x[0-9]+}}, [{{x[0-9]+}}] + %1 = load i64* %a, align 8 + store i64 %1, i64* %b, align 8 + %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0 + ret <1 x i64> %vecinit.i +} + +define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 { +; As there is a store operation depending on %1, LD1R pattern can't be selected. +; So LDR and FMOV should be emitted. +; CHECK-LABEL: testDUP.v1f64: +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}] +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] + %1 = load double* %a, align 8 + store double %1, double* %b, align 8 + %vecinit.i = insertelement <1 x double> undef, double %1, i32 0 + ret <1 x double> %vecinit.i +} + +define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) { +; CHECK-LABEL: test_vld1q_lane_s8: +; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] +entry: + %0 = load i8* %a, align 1 + %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15 + ret <16 x i8> %vld1_lane +} + +define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) { +; CHECK-LABEL: test_vld1q_lane_s16: +; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] +entry: + %0 = load i16* %a, align 2 + %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7 + ret <8 x i16> %vld1_lane +} + +define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) { +; CHECK-LABEL: test_vld1q_lane_s32: +; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +entry: + %0 = load i32* %a, align 4 + %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3 + ret <4 x i32> %vld1_lane +} + +define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) { +; CHECK-LABEL: test_vld1q_lane_s64: +; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] +entry: + %0 = load i64* %a, align 8 + %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1 + ret <2 x i64> %vld1_lane +} + +define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) { +; CHECK-LABEL: test_vld1q_lane_f32: +; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +entry: + %0 = load float* %a, align 4 + %vld1_lane = insertelement <4 x float> %b, float %0, i32 3 + ret <4 x float> %vld1_lane +} + +define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) { +; CHECK-LABEL: test_vld1q_lane_f64: +; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] +entry: + %0 = load double* %a, align 8 + %vld1_lane = insertelement <2 x double> %b, double %0, i32 1 + ret <2 x double> %vld1_lane +} + +define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) { +; CHECK-LABEL: test_vld1_lane_s8: +; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] +entry: + %0 = load i8* %a, align 1 + %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7 + ret <8 x i8> %vld1_lane +} + +define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) { +; CHECK-LABEL: test_vld1_lane_s16: +; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] +entry: + %0 = load i16* %a, align 2 + %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3 + ret <4 x i16> %vld1_lane +} + +define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) { +; CHECK-LABEL: test_vld1_lane_s32: +; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +entry: + %0 = load i32* %a, align 4 + %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1 + ret <2 x i32> %vld1_lane +} + +define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) { +; CHECK-LABEL: test_vld1_lane_s64: +; CHECK: ldr {{d[0-9]+}}, [x0] +entry: + %0 = load i64* %a, align 8 + %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0 + ret <1 x i64> %vld1_lane +} + +define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) { +; CHECK-LABEL: test_vld1_lane_f32: +; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +entry: + %0 = load float* %a, align 4 + %vld1_lane = insertelement <2 x float> %b, float %0, i32 1 + ret <2 x float> %vld1_lane +} + +define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) { +; CHECK-LABEL: test_vld1_lane_f64: +; CHECK: ldr {{d[0-9]+}}, [x0] +entry: + %0 = load double* %a, align 8 + %vld1_lane = insertelement <1 x double> undef, double %0, i32 0 + ret <1 x double> %vld1_lane +} + +define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) { +; CHECK-LABEL: test_vst1q_lane_s8: +; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] +entry: + %0 = extractelement <16 x i8> %b, i32 15 + store i8 %0, i8* %a, align 1 + ret void +} + +define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) { +; CHECK-LABEL: test_vst1q_lane_s16: +; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] +entry: + %0 = extractelement <8 x i16> %b, i32 7 + store i16 %0, i16* %a, align 2 + ret void +} + +define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) { +; CHECK-LABEL: test_vst1q_lane_s32: +; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +entry: + %0 = extractelement <4 x i32> %b, i32 3 + store i32 %0, i32* %a, align 4 + ret void +} + +define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) { +; CHECK-LABEL: test_vst1q_lane_s64: +; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] +entry: + %0 = extractelement <2 x i64> %b, i32 1 + store i64 %0, i64* %a, align 8 + ret void +} + +define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) { +; CHECK-LABEL: test_vst1q_lane_f32: +; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +entry: + %0 = extractelement <4 x float> %b, i32 3 + store float %0, float* %a, align 4 + ret void +} + +define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) { +; CHECK-LABEL: test_vst1q_lane_f64: +; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] +entry: + %0 = extractelement <2 x double> %b, i32 1 + store double %0, double* %a, align 8 + ret void +} + +define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) { +; CHECK-LABEL: test_vst1_lane_s8: +; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] +entry: + %0 = extractelement <8 x i8> %b, i32 7 + store i8 %0, i8* %a, align 1 + ret void +} + +define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) { +; CHECK-LABEL: test_vst1_lane_s16: +; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] +entry: + %0 = extractelement <4 x i16> %b, i32 3 + store i16 %0, i16* %a, align 2 + ret void +} + +define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) { +; CHECK-LABEL: test_vst1_lane_s32: +; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +entry: + %0 = extractelement <2 x i32> %b, i32 1 + store i32 %0, i32* %a, align 4 + ret void +} + +define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) { +; CHECK-LABEL: test_vst1_lane_s64: +; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] +entry: + %0 = extractelement <1 x i64> %b, i32 0 + store i64 %0, i64* %a, align 8 + ret void +} + +define void @test_vst1_lane_f32(float* %a, <2 x float> %b) { +; CHECK-LABEL: test_vst1_lane_f32: +; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +entry: + %0 = extractelement <2 x float> %b, i32 1 + store float %0, float* %a, align 4 + ret void +} + +define void @test_vst1_lane_f64(double* %a, <1 x double> %b) { +; CHECK-LABEL: test_vst1_lane_f64: +; CHECK: str {{d[0-9]+}}, [x0] +entry: + %0 = extractelement <1 x double> %b, i32 0 + store double %0, double* %a, align 8 + ret void +} |