From 082ac99cc86b17c7cd2a1f2a6faa2d1adc184e17 Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Thu, 14 Nov 2013 01:57:32 +0000 Subject: Implement AArch64 NEON instruction set AdvSIMD (table). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194648 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/neon-simd-tbl.ll | 828 +++++++++++++++++++++ test/MC/AArch64/neon-diagnostics.s | 48 ++ test/MC/AArch64/neon-tbl.s | 56 ++ test/MC/Disassembler/AArch64/neon-instructions.txt | 41 +- 4 files changed, 972 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AArch64/neon-simd-tbl.ll create mode 100644 test/MC/AArch64/neon-tbl.s (limited to 'test') diff --git a/test/CodeGen/AArch64/neon-simd-tbl.ll b/test/CodeGen/AArch64/neon-simd-tbl.ll new file mode 100644 index 0000000000..8eac1e88c4 --- /dev/null +++ b/test/CodeGen/AArch64/neon-simd-tbl.ll @@ -0,0 +1,828 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +declare <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) + +declare <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) + +declare <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) + +declare <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) + +declare <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) + +declare <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) + +declare <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) + +declare <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8>, <16 x i8>, <8 x i8>) + +declare <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8>, <16 x i8>, <8 x i8>) + +declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) + +declare <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8>, <8 x i8>) + +declare <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) + +declare <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) + +declare <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) + +declare <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8>, <16 x i8>) + +declare <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) + +declare <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) + +define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtbl1_s8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + ret <8 x i8> %vtbl11.i +} + +define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) { +; CHECK: test_vqtbl1_s8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b) + ret <8 x i8> %vtbl1.i +} + +define <8 x i8> @test_vtbl2_s8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vtbl2_s8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 + %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> + %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + ret <8 x i8> %vtbl17.i +} + +define <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vqtbl2_s8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 + %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) + ret <8 x i8> %vtbl2.i +} + +define <8 x i8> @test_vtbl3_s8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vtbl3_s8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 + %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> + %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) + ret <8 x i8> %vtbl212.i +} + +define <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vqtbl3_s8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 + %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) + ret <8 x i8> %vtbl3.i +} + +define <8 x i8> @test_vtbl4_s8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vtbl4_s8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2 + %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 + %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> + %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> + %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) + ret <8 x i8> %vtbl216.i +} + +define <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vqtbl4_s8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 + %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 + %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) + ret <8 x i8> %vtbl4.i +} + +define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vqtbl1q_s8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %vtbl1.i +} + +define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { +; CHECK: test_vqtbl2q_s8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 + %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) + ret <16 x i8> %vtbl2.i +} + +define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) { +; CHECK: test_vqtbl3q_s8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 + %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) + ret <16 x i8> %vtbl3.i +} + +define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) { +; CHECK: test_vqtbl4q_s8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 + %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 + %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) + ret <16 x i8> %vtbl4.i +} + +define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +; CHECK: test_vtbx1_s8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c) + %0 = icmp uge <8 x i8> %c, + %1 = sext <8 x i1> %0 to <8 x i8> + %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) + ret <8 x i8> %vbsl.i +} + +define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vtbx2_s8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 + %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> + %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) + ret <8 x i8> %vtbx17.i +} + +define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vtbx3_s8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 + %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> + %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) + %0 = icmp uge <8 x i8> %c, + %1 = sext <8 x i1> %0 to <8 x i8> + %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) + ret <8 x i8> %vbsl.i +} + +define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vtbx4_s8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2 + %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 + %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> + %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> + %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) + ret <8 x i8> %vtbx216.i +} + +define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { +; CHECK: test_vqtbx1_s8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) + ret <8 x i8> %vtbx1.i +} + +define <8 x i8> @test_vqtbx2_s8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vqtbx2_s8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 + %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) + ret <8 x i8> %vtbx2.i +} + +define <8 x i8> @test_vqtbx3_s8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vqtbx3_s8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 + %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) + ret <8 x i8> %vtbx3.i +} + +define <8 x i8> @test_vqtbx4_s8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vqtbx4_s8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 + %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 + %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) + ret <8 x i8> %vtbx4.i +} + +define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK: test_vqtbx1q_s8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %vtbx1.i +} + +define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) { +; CHECK: test_vqtbx2q_s8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 + %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) + ret <16 x i8> %vtbx2.i +} + +define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) { +; CHECK: test_vqtbx3q_s8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 + %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) + ret <16 x i8> %vtbx3.i +} + +define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) { +; CHECK: test_vqtbx4q_s8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 + %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 + %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) + ret <16 x i8> %vtbx4.i +} + +define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtbl1_u8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + ret <8 x i8> %vtbl11.i +} + +define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) { +; CHECK: test_vqtbl1_u8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b) + ret <8 x i8> %vtbl1.i +} + +define <8 x i8> @test_vtbl2_u8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vtbl2_u8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 + %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> + %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + ret <8 x i8> %vtbl17.i +} + +define <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vqtbl2_u8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 + %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) + ret <8 x i8> %vtbl2.i +} + +define <8 x i8> @test_vtbl3_u8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vtbl3_u8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 + %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> + %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) + ret <8 x i8> %vtbl212.i +} + +define <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vqtbl3_u8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 + %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) + ret <8 x i8> %vtbl3.i +} + +define <8 x i8> @test_vtbl4_u8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vtbl4_u8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2 + %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 + %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> + %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> + %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) + ret <8 x i8> %vtbl216.i +} + +define <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vqtbl4_u8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 + %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 + %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) + ret <8 x i8> %vtbl4.i +} + +define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vqtbl1q_u8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %vtbl1.i +} + +define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { +; CHECK: test_vqtbl2q_u8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 + %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) + ret <16 x i8> %vtbl2.i +} + +define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) { +; CHECK: test_vqtbl3q_u8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 + %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) + ret <16 x i8> %vtbl3.i +} + +define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) { +; CHECK: test_vqtbl4q_u8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 + %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 + %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) + ret <16 x i8> %vtbl4.i +} + +define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +; CHECK: test_vtbx1_u8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c) + %0 = icmp uge <8 x i8> %c, + %1 = sext <8 x i1> %0 to <8 x i8> + %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) + ret <8 x i8> %vbsl.i +} + +define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vtbx2_u8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 + %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> + %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) + ret <8 x i8> %vtbx17.i +} + +define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vtbx3_u8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 + %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> + %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) + %0 = icmp uge <8 x i8> %c, + %1 = sext <8 x i1> %0 to <8 x i8> + %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) + ret <8 x i8> %vbsl.i +} + +define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vtbx4_u8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2 + %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 + %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> + %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> + %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) + ret <8 x i8> %vtbx216.i +} + +define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { +; CHECK: test_vqtbx1_u8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) + ret <8 x i8> %vtbx1.i +} + +define <8 x i8> @test_vqtbx2_u8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vqtbx2_u8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 + %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) + ret <8 x i8> %vtbx2.i +} + +define <8 x i8> @test_vqtbx3_u8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vqtbx3_u8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 + %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) + ret <8 x i8> %vtbx3.i +} + +define <8 x i8> @test_vqtbx4_u8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vqtbx4_u8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 + %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 + %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) + ret <8 x i8> %vtbx4.i +} + +define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK: test_vqtbx1q_u8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %vtbx1.i +} + +define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) { +; CHECK: test_vqtbx2q_u8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 + %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) + ret <16 x i8> %vtbx2.i +} + +define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) { +; CHECK: test_vqtbx3q_u8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 + %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) + ret <16 x i8> %vtbx3.i +} + +define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) { +; CHECK: test_vqtbx4q_u8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 + %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 + %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) + ret <16 x i8> %vtbx4.i +} + +define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtbl1_p8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + ret <8 x i8> %vtbl11.i +} + +define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) { +; CHECK: test_vqtbl1_p8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b) + ret <8 x i8> %vtbl1.i +} + +define <8 x i8> @test_vtbl2_p8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vtbl2_p8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 + %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> + %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + ret <8 x i8> %vtbl17.i +} + +define <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vqtbl2_p8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 + %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) + ret <8 x i8> %vtbl2.i +} + +define <8 x i8> @test_vtbl3_p8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vtbl3_p8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 + %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> + %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) + ret <8 x i8> %vtbl212.i +} + +define <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vqtbl3_p8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 + %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) + ret <8 x i8> %vtbl3.i +} + +define <8 x i8> @test_vtbl4_p8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vtbl4_p8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2 + %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 + %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> + %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> + %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) + ret <8 x i8> %vtbl216.i +} + +define <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) { +; CHECK: test_vqtbl4_p8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 + %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 + %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) + ret <8 x i8> %vtbl4.i +} + +define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vqtbl1q_p8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %vtbl1.i +} + +define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { +; CHECK: test_vqtbl2q_p8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 + %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) + ret <16 x i8> %vtbl2.i +} + +define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) { +; CHECK: test_vqtbl3q_p8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 + %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) + ret <16 x i8> %vtbl3.i +} + +define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) { +; CHECK: test_vqtbl4q_p8: +; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 + %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 + %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 + %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 + %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) + ret <16 x i8> %vtbl4.i +} + +define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +; CHECK: test_vtbx1_p8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c) + %0 = icmp uge <8 x i8> %c, + %1 = sext <8 x i1> %0 to <8 x i8> + %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) + ret <8 x i8> %vbsl.i +} + +define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vtbx2_p8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 + %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> + %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) + ret <8 x i8> %vtbx17.i +} + +define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vtbx3_p8: +; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 + %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> + %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) + %0 = icmp uge <8 x i8> %c, + %1 = sext <8 x i1> %0 to <8 x i8> + %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) + ret <8 x i8> %vbsl.i +} + +define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vtbx4_p8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2 + %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 + %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> + %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> + %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) + ret <8 x i8> %vtbx216.i +} + +define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { +; CHECK: test_vqtbx1_p8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) + ret <8 x i8> %vtbx1.i +} + +define <8 x i8> @test_vqtbx2_p8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vqtbx2_p8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 + %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) + ret <8 x i8> %vtbx2.i +} + +define <8 x i8> @test_vqtbx3_p8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vqtbx3_p8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 + %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) + ret <8 x i8> %vtbx3.i +} + +define <8 x i8> @test_vqtbx4_p8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) { +; CHECK: test_vqtbx4_p8: +; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 + %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 + %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) + ret <8 x i8> %vtbx4.i +} + +define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK: test_vqtbx1q_p8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %vtbx1.i +} + +define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) { +; CHECK: test_vqtbx2q_p8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 + %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) + ret <16 x i8> %vtbx2.i +} + +define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) { +; CHECK: test_vqtbx3q_p8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 + %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) + ret <16 x i8> %vtbx3.i +} + +define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) { +; CHECK: test_vqtbx4q_p8: +; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b +entry: + %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 + %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 + %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 + %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 + %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) + ret <16 x i8> %vtbx4.i +} + diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 0a2332b41e..12d56a5fb0 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -5928,3 +5928,51 @@ // CHECK-ERROR: error: lane number incompatible with layout // CHECK-ERROR: dup b1, v3.b[16] // CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Table look up +//---------------------------------------------------------------------- + + tbl v0.8b, {v1.8b}, v2.8b + tbl v0.8b, {v1.8b, v2.8b}, v2.8b + tbl v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b + tbl v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b + tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: tbl v0.8b, {v1.8b}, v2.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: tbl v0.8b, {v1.8b, v2.8b}, v2.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: tbl v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: tbl v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid number of vectors +// CHECK-ERROR: tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b +// CHECK-ERROR: ^ + + tbx v0.8b, {v1.8b}, v2.8b + tbx v0.8b, {v1.8b, v2.8b}, v2.8b + tbx v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b + tbx v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b + tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: tbx v0.8b, {v1.8b}, v2.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: tbx v0.8b, {v1.8b, v2.8b}, v2.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: tbx v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: tbx v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid number of vectors +// CHECK-ERROR: tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b +// CHECK-ERROR: ^ diff --git a/test/MC/AArch64/neon-tbl.s b/test/MC/AArch64/neon-tbl.s new file mode 100644 index 0000000000..ff3e86b1c9 --- /dev/null +++ b/test/MC/AArch64/neon-tbl.s @@ -0,0 +1,56 @@ +// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + +//------------------------------------------------------------------------------ +// Instructions across vector registers +//------------------------------------------------------------------------------ + + tbl v0.8b, {v1.16b}, v2.8b + tbl v0.8b, {v1.16b, v2.16b}, v2.8b + tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b + tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b + tbl v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b + +// CHECK: tbl v0.8b, {v1.16b}, v2.8b // encoding: [0x20,0x00,0x02,0x0e] +// CHECK: tbl v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x20,0x02,0x0e] +// CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x40,0x02,0x0e] +// CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x60,0x02,0x0e] +// CHECK: tbl v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x63,0x02,0x0e] + + tbl v0.16b, {v1.16b}, v2.16b + tbl v0.16b, {v1.16b, v2.16b}, v2.16b + tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b + tbl v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b + tbl v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b + +// CHECK: tbl v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x00,0x02,0x4e] +// CHECK: tbl v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x20,0x02,0x4e] +// CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x40,0x02,0x4e] +// CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x60,0x02,0x4e] +// CHECK: tbl v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x63,0x02,0x4e] + + tbx v0.8b, {v1.16b}, v2.8b + tbx v0.8b, {v1.16b, v2.16b}, v2.8b + tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b + tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b + tbx v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b + +// CHECK: tbx v0.8b, {v1.16b}, v2.8b // encoding: [0x20,0x10,0x02,0x0e] +// CHECK: tbx v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x30,0x02,0x0e] +// CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x50,0x02,0x0e] +// CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x70,0x02,0x0e] +// CHECK: tbx v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x73,0x02,0x0e] + + tbx v0.16b, {v1.16b}, v2.16b + tbx v0.16b, {v1.16b, v2.16b}, v2.16b + tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b + tbx v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b + tbx v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b + +// CHECK: tbx v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x10,0x02,0x4e] +// CHECK: tbx v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x30,0x02,0x4e] +// CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x50,0x02,0x4e] +// CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x70,0x02,0x4e] +// CHECK: tbx v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x73,0x02,0x4e] + diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index b9ea7c140b..c1659019a8 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -1,4 +1,4 @@ -G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | FileCheck %s #------------------------------------------------------------------------------ # Vector Integer Add/Sub @@ -2387,3 +2387,42 @@ G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | 0x51 0x04 0x14 0x5e 0x86 0x05 0x18 0x5e +#---------------------------------------------------------------------- +# Table look up +#---------------------------------------------------------------------- +0x20,0x00,0x02,0x0e +0xf0,0x23,0x02,0x0e +0x20,0x40,0x02,0x0e +0xf0,0x62,0x02,0x0e +# CHECK: tbl v0.8b, {v1.16b}, v2.8b +# CHECK: tbl v16.8b, {v31.16b, v0.16b}, v2.8b +# CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b +# CHECK: tbl v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b + +0x20,0x00,0x02,0x4e +0xf0,0x23,0x02,0x4e +0x20,0x40,0x02,0x4e +0xe0,0x63,0x02,0x4e +# CHECK: tbl v0.16b, {v1.16b}, v2.16b +# CHECK: tbl v16.16b, {v31.16b, v0.16b}, v2.16b +# CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b +# CHECK: tbl v0.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b + +0x20,0x10,0x02,0x0e +0xf0,0x33,0x02,0x0e +0x20,0x50,0x02,0x0e +0xf0,0x72,0x02,0x0e +# CHECK: tbx v0.8b, {v1.16b}, v2.8b +# CHECK: tbx v16.8b, {v31.16b, v0.16b}, v2.8b +# CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b +# CHECK: tbx v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b + +0x20,0x10,0x02,0x4e +0xf0,0x33,0x02,0x4e +0x20,0x50,0x02,0x4e +0xf0,0x73,0x02,0x4e +# CHECK: tbx v0.16b, {v1.16b}, v2.16b +# CHECK: tbx v16.16b, {v31.16b, v0.16b}, v2.16b +# CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b +# CHECK: tbx v16.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b + -- cgit v1.2.3