Implement AArch64 Neon instruction set Perm.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194123 91177308-0d34-0410-b5e6-96231b3b80d8
author: Jiangning Liu <jiangning.liu@arm.com> 2013-11-06 03:35:27 +0000
committer: Jiangning Liu <jiangning.liu@arm.com> 2013-11-06 03:35:27 +0000
commit: 8458f371b84ee0cd22c4a433059d53ea6e3ec4f4 (patch)
tree: 0a4d59f36e877036558838acfc8eb34a84ae132a /test
parent: 258115258f8fe15e9d74b5fb524f90b75bb917d1 (diff)
download: llvm-8458f371b84ee0cd22c4a433059d53ea6e3ec4f4.tar.gz
llvm-8458f371b84ee0cd22c4a433059d53ea6e3ec4f4.tar.bz2
llvm-8458f371b84ee0cd22c4a433059d53ea6e3ec4f4.tar.xz
4 files changed, 2302 insertions, 0 deletions
diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll
new file mode 100644
index 0000000000..4db4771cf1
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-perm.ll
@@ -0,0 +1,1676 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+%struct.int8x8x2_t = type { [2 x <8 x i8>] }
+%struct.int16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int32x2x2_t = type { [2 x <2 x i32>] }
+%struct.uint8x8x2_t = type { [2 x <8 x i8>] }
+%struct.uint16x4x2_t = type { [2 x <4 x i16>] }
+%struct.uint32x2x2_t = type { [2 x <2 x i32>] }
+%struct.float32x2x2_t = type { [2 x <2 x float>] }
+%struct.poly8x8x2_t = type { [2 x <8 x i8>] }
+%struct.poly16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int8x16x2_t = type { [2 x <16 x i8>] }
+%struct.int16x8x2_t = type { [2 x <8 x i16>] }
+%struct.int32x4x2_t = type { [2 x <4 x i32>] }
+%struct.uint8x16x2_t = type { [2 x <16 x i8>] }
+%struct.uint16x8x2_t = type { [2 x <8 x i16>] }
+%struct.uint32x4x2_t = type { [2 x <4 x i32>] }
+%struct.float32x4x2_t = type { [2 x <4 x float>] }
+%struct.poly8x16x2_t = type { [2 x <16 x i8>] }
+%struct.poly16x8x2_t = type { [2 x <8 x i16>] }
+
+define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_s8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_s8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_s16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_s16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp1q_s32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_u8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_u8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_u16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_u16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp1q_u32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzp1q_f32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vuzp1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_p8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_p8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_p16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_p16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_s8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_s8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_s16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_s16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp2q_s32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_u8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_u8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_u16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_u16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp2q_u32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzp2q_f32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vuzp2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_p8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_p8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_p16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_p16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_s8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_s8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_s16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_s16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip1q_s32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_u8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_u8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_u16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_u16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip1q_u32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzip1q_f32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vzip1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_p8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_p8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_p16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_p16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_s8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_s8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_s16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_s16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip2q_s32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_u8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_u8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_u16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_u16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip2q_u32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzip2q_f32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vzip2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_p8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_p8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_p16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_p16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_s8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_s8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_s16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_s16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn1q_s32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_u8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_u8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_u16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_u16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn1q_u32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrn1q_f32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vtrn1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_p8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_p8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_p16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_p16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_s8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_s8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_s16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_s16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn2q_s32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_u8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_u8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_u16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_u16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn2q_u32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrn2q_f32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vtrn2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_p8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_p8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_p16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_p16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_s8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_s16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_u8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+  ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_u16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+  ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1
+  ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vuzp1.i, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_p8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+  ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_p16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+  ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_s8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_s16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzpq_s32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_u8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+  ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_u16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+  ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzpq_u32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1
+  ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzpq_f32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vuzp.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vuzp1.i, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_p8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+  ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_p16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+  ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_s8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_s16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_u8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+  ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_u16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+  ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1
+  ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vzip1.i, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_p8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+  ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_p16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+  ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_s8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_s16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzipq_s32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_u8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+  ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_u16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+  ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzipq_u32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1
+  ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzipq_f32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vzip.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vzip1.i, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_p8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+  ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_p16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+  ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_s8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_s16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_u8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+  ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_u16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+  ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1
+  ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vtrn1.i, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_p8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+  ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_p16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+  ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_s8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_s16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrnq_s32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_u8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+  ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_u16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+  ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrnq_u32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1
+  ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrnq_f32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vtrn.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vtrn1.i, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_p8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+  ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_p16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+  ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index b549480a8f..d9afcb1418 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -5235,3 +5235,419 @@
 // CHECK-ERROR:         ext v0.2d, v1.2d, v2.2d, #0x0
 // CHECK-ERROR:                ^
 
+
+//----------------------------------------------------------------------
+// Permutation with 3 vectors
+//----------------------------------------------------------------------
+
+        uzp1 v0.16b, v1.8b, v2.8b
+        uzp1 v0.8b, v1.4b, v2.4b
+        uzp1 v0.8h, v1.4h, v2.4h
+        uzp1 v0.4h, v1.2h, v2.2h
+        uzp1 v0.4s, v1.2s, v2.2s
+        uzp1 v0.2s, v1.1s, v2.1s
+        uzp1 v0.2d, v1.1d, v2.1d
+        uzp1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        uzp2 v0.16b, v1.8b, v2.8b
+        uzp2 v0.8b, v1.4b, v2.4b
+        uzp2 v0.8h, v1.4h, v2.4h
+        uzp2 v0.4h, v1.2h, v2.2h
+        uzp2 v0.4s, v1.2s, v2.2s
+        uzp2 v0.2s, v1.1s, v2.1s
+        uzp2 v0.2d, v1.1d, v2.1d
+        uzp2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip1 v0.16b, v1.8b, v2.8b
+        zip1 v0.8b, v1.4b, v2.4b
+        zip1 v0.8h, v1.4h, v2.4h
+        zip1 v0.4h, v1.2h, v2.2h
+        zip1 v0.4s, v1.2s, v2.2s
+        zip1 v0.2s, v1.1s, v2.1s
+        zip1 v0.2d, v1.1d, v2.1d
+        zip1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip2 v0.16b, v1.8b, v2.8b
+        zip2 v0.8b, v1.4b, v2.4b
+        zip2 v0.8h, v1.4h, v2.4h
+        zip2 v0.4h, v1.2h, v2.2h
+        zip2 v0.4s, v1.2s, v2.2s
+        zip2 v0.2s, v1.1s, v2.1s
+        zip2 v0.2d, v1.1d, v2.1d
+        zip2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn1 v0.16b, v1.8b, v2.8b
+        trn1 v0.8b, v1.4b, v2.4b
+        trn1 v0.8h, v1.4h, v2.4h
+        trn1 v0.4h, v1.2h, v2.2h
+        trn1 v0.4s, v1.2s, v2.2s
+        trn1 v0.2s, v1.1s, v2.1s
+        trn1 v0.2d, v1.1d, v2.1d
+        trn1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn2 v0.16b, v1.8b, v2.8b
+        trn2 v0.8b, v1.4b, v2.4b
+        trn2 v0.8h, v1.4h, v2.4h
+        trn2 v0.4h, v1.2h, v2.2h
+        trn2 v0.4s, v1.2s, v2.2s
+        trn2 v0.2s, v1.1s, v2.1s
+        trn2 v0.2d, v1.1d, v2.1d
+        trn2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+//----------------------------------------------------------------------
+// Permutation with 3 vectors
+//----------------------------------------------------------------------
+
+        uzp1 v0.16b, v1.8b, v2.8b
+        uzp1 v0.8b, v1.4b, v2.4b
+        uzp1 v0.8h, v1.4h, v2.4h
+        uzp1 v0.4h, v1.2h, v2.2h
+        uzp1 v0.4s, v1.2s, v2.2s
+        uzp1 v0.2s, v1.1s, v2.1s
+        uzp1 v0.2d, v1.1d, v2.1d
+        uzp1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        uzp2 v0.16b, v1.8b, v2.8b
+        uzp2 v0.8b, v1.4b, v2.4b
+        uzp2 v0.8h, v1.4h, v2.4h
+        uzp2 v0.4h, v1.2h, v2.2h
+        uzp2 v0.4s, v1.2s, v2.2s
+        uzp2 v0.2s, v1.1s, v2.1s
+        uzp2 v0.2d, v1.1d, v2.1d
+        uzp2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip1 v0.16b, v1.8b, v2.8b
+        zip1 v0.8b, v1.4b, v2.4b
+        zip1 v0.8h, v1.4h, v2.4h
+        zip1 v0.4h, v1.2h, v2.2h
+        zip1 v0.4s, v1.2s, v2.2s
+        zip1 v0.2s, v1.1s, v2.1s
+        zip1 v0.2d, v1.1d, v2.1d
+        zip1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip2 v0.16b, v1.8b, v2.8b
+        zip2 v0.8b, v1.4b, v2.4b
+        zip2 v0.8h, v1.4h, v2.4h
+        zip2 v0.4h, v1.2h, v2.2h
+        zip2 v0.4s, v1.2s, v2.2s
+        zip2 v0.2s, v1.1s, v2.1s
+        zip2 v0.2d, v1.1d, v2.1d
+        zip2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn1 v0.16b, v1.8b, v2.8b
+        trn1 v0.8b, v1.4b, v2.4b
+        trn1 v0.8h, v1.4h, v2.4h
+        trn1 v0.4h, v1.2h, v2.2h
+        trn1 v0.4s, v1.2s, v2.2s
+        trn1 v0.2s, v1.1s, v2.1s
+        trn1 v0.2d, v1.1d, v2.1d
+        trn1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn2 v0.16b, v1.8b, v2.8b
+        trn2 v0.8b, v1.4b, v2.4b
+        trn2 v0.8h, v1.4h, v2.4h
+        trn2 v0.4h, v1.2h, v2.2h
+        trn2 v0.4s, v1.2s, v2.2s
+        trn2 v0.2s, v1.1s, v2.1s
+        trn2 v0.2d, v1.1d, v2.1d
+        trn2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
diff --git a/test/MC/AArch64/neon-perm.s b/test/MC/AArch64/neon-perm.s
new file mode 100644
index 0000000000..20a4acde37
--- /dev/null
+++ b/test/MC/AArch64/neon-perm.s
@@ -0,0 +1,103 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions for permute
+//------------------------------------------------------------------------------
+
+        uzp1 v0.8b, v1.8b, v2.8b
+        uzp1 v0.16b, v1.16b, v2.16b
+        uzp1 v0.4h, v1.4h, v2.4h
+        uzp1 v0.8h, v1.8h, v2.8h
+        uzp1 v0.2s, v1.2s, v2.2s
+        uzp1 v0.4s, v1.4s, v2.4s
+        uzp1 v0.2d, v1.2d, v2.2d
+
+// CHECK: uzp1	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x18,0x02,0x0e]
+// CHECK: uzp1	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x18,0x02,0x4e]
+// CHECK: uzp1	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x18,0x42,0x0e]
+// CHECK: uzp1	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x18,0x42,0x4e]
+// CHECK: uzp1	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x18,0x82,0x0e]
+// CHECK: uzp1	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x18,0x82,0x4e]
+// CHECK: uzp1	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x18,0xc2,0x4e]
+
+        trn1 v0.8b, v1.8b, v2.8b
+        trn1 v0.16b, v1.16b, v2.16b
+        trn1 v0.4h, v1.4h, v2.4h
+        trn1 v0.8h, v1.8h, v2.8h
+        trn1 v0.2s, v1.2s, v2.2s
+        trn1 v0.4s, v1.4s, v2.4s
+        trn1 v0.2d, v1.2d, v2.2d
+
+// CHECK: trn1	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x28,0x02,0x0e]
+// CHECK: trn1	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x28,0x02,0x4e]
+// CHECK: trn1	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x28,0x42,0x0e]
+// CHECK: trn1	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x28,0x42,0x4e]
+// CHECK: trn1	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x28,0x82,0x0e]
+// CHECK: trn1	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x28,0x82,0x4e]
+// CHECK: trn1	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x28,0xc2,0x4e]
+
+        zip1 v0.8b, v1.8b, v2.8b
+        zip1 v0.16b, v1.16b, v2.16b
+        zip1 v0.4h, v1.4h, v2.4h
+        zip1 v0.8h, v1.8h, v2.8h
+        zip1 v0.2s, v1.2s, v2.2s
+        zip1 v0.4s, v1.4s, v2.4s
+        zip1 v0.2d, v1.2d, v2.2d
+
+// CHECK: zip1	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x38,0x02,0x0e]
+// CHECK: zip1	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x38,0x02,0x4e]
+// CHECK: zip1	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x38,0x42,0x0e]
+// CHECK: zip1	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x38,0x42,0x4e]
+// CHECK: zip1	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x38,0x82,0x0e]
+// CHECK: zip1	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x38,0x82,0x4e]
+// CHECK: zip1	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x38,0xc2,0x4e]
+
+        uzp2 v0.8b, v1.8b, v2.8b
+        uzp2 v0.16b, v1.16b, v2.16b
+        uzp2 v0.4h, v1.4h, v2.4h
+        uzp2 v0.8h, v1.8h, v2.8h
+        uzp2 v0.2s, v1.2s, v2.2s
+        uzp2 v0.4s, v1.4s, v2.4s
+        uzp2 v0.2d, v1.2d, v2.2d
+
+// CHECK: uzp2	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x58,0x02,0x0e]
+// CHECK: uzp2	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x58,0x02,0x4e]
+// CHECK: uzp2	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x58,0x42,0x0e]
+// CHECK: uzp2	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x58,0x42,0x4e]
+// CHECK: uzp2	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x58,0x82,0x0e]
+// CHECK: uzp2	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x58,0x82,0x4e]
+// CHECK: uzp2	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x58,0xc2,0x4e]
+
+        trn2 v0.8b, v1.8b, v2.8b
+        trn2 v0.16b, v1.16b, v2.16b
+        trn2 v0.4h, v1.4h, v2.4h
+        trn2 v0.8h, v1.8h, v2.8h
+        trn2 v0.2s, v1.2s, v2.2s
+        trn2 v0.4s, v1.4s, v2.4s
+        trn2 v0.2d, v1.2d, v2.2d
+
+// CHECK: trn2	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x68,0x02,0x0e]
+// CHECK: trn2	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x68,0x02,0x4e]
+// CHECK: trn2	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x68,0x42,0x0e]
+// CHECK: trn2	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x68,0x42,0x4e]
+// CHECK: trn2	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x68,0x82,0x0e]
+// CHECK: trn2	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x68,0x82,0x4e]
+// CHECK: trn2	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x68,0xc2,0x4e]
+
+        zip2 v0.8b, v1.8b, v2.8b
+        zip2 v0.16b, v1.16b, v2.16b
+        zip2 v0.4h, v1.4h, v2.4h
+        zip2 v0.8h, v1.8h, v2.8h
+        zip2 v0.2s, v1.2s, v2.2s
+        zip2 v0.4s, v1.4s, v2.4s
+        zip2 v0.2d, v1.2d, v2.2d
+
+// CHECK: zip2	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x78,0x02,0x0e]
+// CHECK: zip2	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x78,0x02,0x4e]
+// CHECK: zip2	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x78,0x42,0x0e]
+// CHECK: zip2	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x78,0x42,0x4e]
+// CHECK: zip2	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x78,0x82,0x0e]
+// CHECK: zip2	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x78,0x82,0x4e]
+// CHECK: zip2	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x78,0xc2,0x4e]
diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt
index 225bb16212..e4aad48f47 100644
--- a/test/MC/Disassembler/AArch64/neon-instructions.txt
+++ b/test/MC/Disassembler/AArch64/neon-instructions.txt
@@ -2051,3 +2051,110 @@ G# RUN: llvm-mc  -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s |
 # CHECK: ext v0.8b, v1.8b, v2.8b, #0x3
 # CHECK: ext v0.16b, v1.16b, v2.16b, #0x3
 
+#----------------------------------------------------------------------
+# unzip with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: uzp1	v1.8b, v1.8b, v2.8b
+# CHECK: uzp1	v2.16b, v1.16b, v2.16b
+# CHECK: uzp1	v3.4h, v1.4h, v2.4h
+# CHECK: uzp1	v4.8h, v1.8h, v2.8h
+# CHECK: uzp1	v5.2s, v1.2s, v2.2s
+# CHECK: uzp1	v6.4s, v1.4s, v2.4s
+# CHECK: uzp1	v7.2d, v1.2d, v2.2d
+0x21,0x18,0x02,0x0e
+0x22,0x18,0x02,0x4e
+0x23,0x18,0x42,0x0e
+0x24,0x18,0x42,0x4e
+0x25,0x18,0x82,0x0e
+0x26,0x18,0x82,0x4e
+0x27,0x18,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# transpose with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: trn1	v8.8b, v1.8b, v2.8b
+# CHECK: trn1	v9.16b, v1.16b, v2.16b
+# CHECK: trn1	v10.4h, v1.4h, v2.4h
+# CHECK: trn1	v27.8h, v7.8h, v2.8h
+# CHECK: trn1	v12.2s, v7.2s, v2.2s
+# CHECK: trn1	v29.4s, v6.4s, v2.4s
+# CHECK: trn1	v14.2d, v6.2d, v2.2d
+0x28,0x28,0x02,0x0e
+0x29,0x28,0x02,0x4e
+0x2a,0x28,0x42,0x0e
+0xfb,0x28,0x42,0x4e
+0xec,0x28,0x82,0x0e
+0xdd,0x28,0x82,0x4e
+0xce,0x28,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# zip with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: zip1	v31.8b, v5.8b, v2.8b
+# CHECK: zip1	v0.16b, v5.16b, v2.16b
+# CHECK: zip1	v17.4h, v4.4h, v2.4h
+# CHECK: zip1	v2.8h, v4.8h, v2.8h
+# CHECK: zip1	v19.2s, v3.2s, v2.2s
+# CHECK: zip1	v4.4s, v3.4s, v2.4s
+# CHECK: zip1	v21.2d, v2.2d, v2.2d
+0xbf,0x38,0x02,0x0e
+0xa0,0x38,0x02,0x4e
+0x91,0x38,0x42,0x0e
+0x82,0x38,0x42,0x4e
+0x73,0x38,0x82,0x0e
+0x64,0x38,0x82,0x4e
+0x55,0x38,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# unzip with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: uzp2	v6.8b, v2.8b, v2.8b
+# CHECK: uzp2	v23.16b, v1.16b, v2.16b
+# CHECK: uzp2	v8.4h, v1.4h, v2.4h
+# CHECK: uzp2	v25.8h, v0.8h, v2.8h
+# CHECK: uzp2	v10.2s, v0.2s, v2.2s
+# CHECK: uzp2	v27.4s, v7.4s, v2.4s
+# CHECK: uzp2	v12.2d, v7.2d, v2.2d
+0x46,0x58,0x02,0x0e
+0x37,0x58,0x02,0x4e
+0x28,0x58,0x42,0x0e
+0x19,0x58,0x42,0x4e
+0x0a,0x58,0x82,0x0e
+0xfb,0x58,0x82,0x4e
+0xec,0x58,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# transpose with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: trn2	v29.8b, v6.8b, v2.8b
+# CHECK: trn2	v14.16b, v6.16b, v2.16b
+# CHECK: trn2	v31.4h, v5.4h, v2.4h
+# CHECK: trn2	v0.8h, v5.8h, v2.8h
+# CHECK: trn2	v17.2s, v4.2s, v2.2s
+# CHECK: trn2	v2.4s, v4.4s, v2.4s
+# CHECK: trn2	v19.2d, v3.2d, v2.2d
+0xdd,0x68,0x02,0x0e
+0xce,0x68,0x02,0x4e
+0xbf,0x68,0x42,0x0e
+0xa0,0x68,0x42,0x4e
+0x91,0x68,0x82,0x0e
+0x82,0x68,0x82,0x4e
+0x73,0x68,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# zip with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: zip2	v4.8b, v3.8b, v2.8b
+# CHECK: zip2	v21.16b, v2.16b, v2.16b
+# CHECK: zip2	v6.4h, v2.4h, v2.4h
+# CHECK: zip2	v23.8h, v1.8h, v2.8h
+# CHECK: zip2	v8.2s, v1.2s, v2.2s
+# CHECK: zip2	v25.4s, v0.4s, v2.4s
+# CHECK: zip2	v10.2d, v0.2d, v2.2d
+0x64,0x78,0x02,0x0e
+0x55,0x78,0x02,0x4e
+0x46,0x78,0x42,0x0e
+0x37,0x78,0x42,0x4e
+0x28,0x78,0x82,0x0e
+0x19,0x78,0x82,0x4e
+0x0a,0x78,0xc2,0x4e
author	Jiangning Liu <jiangning.liu@arm.com>	2013-11-06 03:35:27 +0000
committer	Jiangning Liu <jiangning.liu@arm.com>	2013-11-06 03:35:27 +0000
commit	8458f371b84ee0cd22c4a433059d53ea6e3ec4f4 (patch)
tree	0a4d59f36e877036558838acfc8eb34a84ae132a /test
parent	258115258f8fe15e9d74b5fb524f90b75bb917d1 (diff)
download	llvm-8458f371b84ee0cd22c4a433059d53ea6e3ec4f4.tar.gz llvm-8458f371b84ee0cd22c4a433059d53ea6e3ec4f4.tar.bz2 llvm-8458f371b84ee0cd22c4a433059d53ea6e3ec4f4.tar.xz