summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJiangning Liu <jiangning.liu@arm.com>2013-11-06 03:35:27 +0000
committerJiangning Liu <jiangning.liu@arm.com>2013-11-06 03:35:27 +0000
commit8458f371b84ee0cd22c4a433059d53ea6e3ec4f4 (patch)
tree0a4d59f36e877036558838acfc8eb34a84ae132a
parent258115258f8fe15e9d74b5fb524f90b75bb917d1 (diff)
downloadllvm-8458f371b84ee0cd22c4a433059d53ea6e3ec4f4.tar.gz
llvm-8458f371b84ee0cd22c4a433059d53ea6e3ec4f4.tar.bz2
llvm-8458f371b84ee0cd22c4a433059d53ea6e3ec4f4.tar.xz
Implement AArch64 Neon instruction set Perm.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194123 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td18
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td329
-rw-r--r--test/CodeGen/AArch64/neon-perm.ll1676
-rw-r--r--test/MC/AArch64/neon-diagnostics.s416
-rw-r--r--test/MC/AArch64/neon-perm.s103
-rw-r--r--test/MC/Disassembler/AArch64/neon-instructions.txt107
6 files changed, 2649 insertions, 0 deletions
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 8a2142646e..b0aadfb031 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1001,6 +1001,24 @@ class NeonI_BitExtract<bit q, bits<2> op2,
// Inherit Rd in 4-0
}
+// Format AdvSIMD perm
+class NeonI_Perm<bit q, bits<2> size, bits<3> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-24} = 0b001110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15} = 0b0;
+ let Inst{14-12} = opcode;
+ let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
// Format AdvSIMD 3 vector registers with same vector type
class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 8a78d14b8c..5cdac1ef88 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -2360,6 +2360,335 @@ defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
int_aarch64_neon_vminv>;
+// The followings are for instruction class (Perm)
+
+class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
+ string asmop, RegisterOperand OpVPR, string OpS>
+ : NeonI_Perm<q, size, opcode,
+ (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [], NoItinerary>;
+
+multiclass NeonI_Perm_pat<bits<3> opcode, string asmop> {
+ def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, VPR64, "8b">;
+ def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, VPR128, "16b">;
+ def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, VPR64, "4h">;
+ def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, VPR128, "8h">;
+ def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, VPR64, "2s">;
+ def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, VPR128, "4s">;
+ def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, VPR128, "2d">;
+}
+
+defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1">;
+defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1">;
+defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1">;
+defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2">;
+defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2">;
+defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2">;
+
+// Extract and Insert
+def NI_ei_i32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
+ (vector_insert node:$Rn,
+ (i32 (vector_extract node:$Rm, node:$Ext)),
+ node:$Ins)>;
+
+def NI_ei_f32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
+ (vector_insert node:$Rn,
+ (f32 (vector_extract node:$Rm, node:$Ext)),
+ node:$Ins)>;
+
+// uzp1
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rn),
+ (v16i8 VPR128:$Rn), 2, 1)),
+ (v16i8 VPR128:$Rn), 4, 2)),
+ (v16i8 VPR128:$Rn), 6, 3)),
+ (v16i8 VPR128:$Rn), 8, 4)),
+ (v16i8 VPR128:$Rn), 10, 5)),
+ (v16i8 VPR128:$Rn), 12, 6)),
+ (v16i8 VPR128:$Rn), 14, 7)),
+ (v16i8 VPR128:$Rm), 0, 8)),
+ (v16i8 VPR128:$Rm), 2, 9)),
+ (v16i8 VPR128:$Rm), 4, 10)),
+ (v16i8 VPR128:$Rm), 6, 11)),
+ (v16i8 VPR128:$Rm), 8, 12)),
+ (v16i8 VPR128:$Rm), 10, 13)),
+ (v16i8 VPR128:$Rm), 12, 14)),
+ (v16i8 VPR128:$Rm), 14, 15)),
+ (UZP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Uzp1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rn), 2, 1)),
+ (Ty VPR:$Rn), 4, 2)),
+ (Ty VPR:$Rn), 6, 3)),
+ (Ty VPR:$Rm), 0, 4)),
+ (Ty VPR:$Rm), 2, 5)),
+ (Ty VPR:$Rm), 4, 6)),
+ (Ty VPR:$Rm), 6, 7)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Uzp1_v8<v8i8, VPR64, UZP1vvv_8b>;
+def : NI_Uzp1_v8<v8i16, VPR128, UZP1vvv_8h>;
+
+class NI_Uzp1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei (Ty (ei
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rn), 2, 1)),
+ (Ty VPR:$Rm), 0, 2)),
+ (Ty VPR:$Rm), 2, 3)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Uzp1_v4<v4i16, VPR64, UZP1vvv_4h, NI_ei_i32>;
+def : NI_Uzp1_v4<v4i32, VPR128, UZP1vvv_4s, NI_ei_i32>;
+def : NI_Uzp1_v4<v4f32, VPR128, UZP1vvv_4s, NI_ei_f32>;
+
+// uzp2
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rm),
+ (v16i8 VPR128:$Rn), 1, 0)),
+ (v16i8 VPR128:$Rn), 3, 1)),
+ (v16i8 VPR128:$Rn), 5, 2)),
+ (v16i8 VPR128:$Rn), 7, 3)),
+ (v16i8 VPR128:$Rn), 9, 4)),
+ (v16i8 VPR128:$Rn), 11, 5)),
+ (v16i8 VPR128:$Rn), 13, 6)),
+ (v16i8 VPR128:$Rn), 15, 7)),
+ (v16i8 VPR128:$Rm), 1, 8)),
+ (v16i8 VPR128:$Rm), 3, 9)),
+ (v16i8 VPR128:$Rm), 5, 10)),
+ (v16i8 VPR128:$Rm), 7, 11)),
+ (v16i8 VPR128:$Rm), 9, 12)),
+ (v16i8 VPR128:$Rm), 11, 13)),
+ (v16i8 VPR128:$Rm), 13, 14)),
+ (UZP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Uzp2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 1, 0)),
+ (Ty VPR:$Rn), 3, 1)),
+ (Ty VPR:$Rn), 5, 2)),
+ (Ty VPR:$Rn), 7, 3)),
+ (Ty VPR:$Rm), 1, 4)),
+ (Ty VPR:$Rm), 3, 5)),
+ (Ty VPR:$Rm), 5, 6)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Uzp2_v8<v8i8, VPR64, UZP2vvv_8b>;
+def : NI_Uzp2_v8<v8i16, VPR128, UZP2vvv_8h>;
+
+class NI_Uzp2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei (Ty (ei
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 1, 0)),
+ (Ty VPR:$Rn), 3, 1)),
+ (Ty VPR:$Rm), 1, 2)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Uzp2_v4<v4i16, VPR64, UZP2vvv_4h, NI_ei_i32>;
+def : NI_Uzp2_v4<v4i32, VPR128, UZP2vvv_4s, NI_ei_i32>;
+def : NI_Uzp2_v4<v4f32, VPR128, UZP2vvv_4s, NI_ei_f32>;
+
+// zip1
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rn),
+ (v16i8 VPR128:$Rm), 0, 1)),
+ (v16i8 VPR128:$Rn), 1, 2)),
+ (v16i8 VPR128:$Rm), 1, 3)),
+ (v16i8 VPR128:$Rn), 2, 4)),
+ (v16i8 VPR128:$Rm), 2, 5)),
+ (v16i8 VPR128:$Rn), 3, 6)),
+ (v16i8 VPR128:$Rm), 3, 7)),
+ (v16i8 VPR128:$Rn), 4, 8)),
+ (v16i8 VPR128:$Rm), 4, 9)),
+ (v16i8 VPR128:$Rn), 5, 10)),
+ (v16i8 VPR128:$Rm), 5, 11)),
+ (v16i8 VPR128:$Rn), 6, 12)),
+ (v16i8 VPR128:$Rm), 6, 13)),
+ (v16i8 VPR128:$Rn), 7, 14)),
+ (v16i8 VPR128:$Rm), 7, 15)),
+ (ZIP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Zip1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rm), 0, 1)),
+ (Ty VPR:$Rn), 1, 2)),
+ (Ty VPR:$Rm), 1, 3)),
+ (Ty VPR:$Rn), 2, 4)),
+ (Ty VPR:$Rm), 2, 5)),
+ (Ty VPR:$Rn), 3, 6)),
+ (Ty VPR:$Rm), 3, 7)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Zip1_v8<v8i8, VPR64, ZIP1vvv_8b>;
+def : NI_Zip1_v8<v8i16, VPR128, ZIP1vvv_8h>;
+
+class NI_Zip1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei (Ty (ei
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rm), 0, 1)),
+ (Ty VPR:$Rn), 1, 2)),
+ (Ty VPR:$Rm), 1, 3)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Zip1_v4<v4i16, VPR64, ZIP1vvv_4h, NI_ei_i32>;
+def : NI_Zip1_v4<v4i32, VPR128, ZIP1vvv_4s, NI_ei_i32>;
+def : NI_Zip1_v4<v4f32, VPR128, ZIP1vvv_4s, NI_ei_f32>;
+
+// zip2
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rm),
+ (v16i8 VPR128:$Rn), 8, 0)),
+ (v16i8 VPR128:$Rm), 8, 1)),
+ (v16i8 VPR128:$Rn), 9, 2)),
+ (v16i8 VPR128:$Rm), 9, 3)),
+ (v16i8 VPR128:$Rn), 10, 4)),
+ (v16i8 VPR128:$Rm), 10, 5)),
+ (v16i8 VPR128:$Rn), 11, 6)),
+ (v16i8 VPR128:$Rm), 11, 7)),
+ (v16i8 VPR128:$Rn), 12, 8)),
+ (v16i8 VPR128:$Rm), 12, 9)),
+ (v16i8 VPR128:$Rn), 13, 10)),
+ (v16i8 VPR128:$Rm), 13, 11)),
+ (v16i8 VPR128:$Rn), 14, 12)),
+ (v16i8 VPR128:$Rm), 14, 13)),
+ (v16i8 VPR128:$Rn), 15, 14)),
+ (ZIP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Zip2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 4, 0)),
+ (Ty VPR:$Rm), 4, 1)),
+ (Ty VPR:$Rn), 5, 2)),
+ (Ty VPR:$Rm), 5, 3)),
+ (Ty VPR:$Rn), 6, 4)),
+ (Ty VPR:$Rm), 6, 5)),
+ (Ty VPR:$Rn), 7, 6)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Zip2_v8<v8i8, VPR64, ZIP2vvv_8b>;
+def : NI_Zip2_v8<v8i16, VPR128, ZIP2vvv_8h>;
+
+class NI_Zip2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei (Ty (ei
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 2, 0)),
+ (Ty VPR:$Rm), 2, 1)),
+ (Ty VPR:$Rn), 3, 2)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Zip2_v4<v4i16, VPR64, ZIP2vvv_4h, NI_ei_i32>;
+def : NI_Zip2_v4<v4i32, VPR128, ZIP2vvv_4s, NI_ei_i32>;
+def : NI_Zip2_v4<v4f32, VPR128, ZIP2vvv_4s, NI_ei_f32>;
+
+// trn1
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rn),
+ (v16i8 VPR128:$Rm), 0, 1)),
+ (v16i8 VPR128:$Rm), 2, 3)),
+ (v16i8 VPR128:$Rm), 4, 5)),
+ (v16i8 VPR128:$Rm), 6, 7)),
+ (v16i8 VPR128:$Rm), 8, 9)),
+ (v16i8 VPR128:$Rm), 10, 11)),
+ (v16i8 VPR128:$Rm), 12, 13)),
+ (v16i8 VPR128:$Rm), 14, 15)),
+ (TRN1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Trn1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rm), 0, 1)),
+ (Ty VPR:$Rm), 2, 3)),
+ (Ty VPR:$Rm), 4, 5)),
+ (Ty VPR:$Rm), 6, 7)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Trn1_v8<v8i8, VPR64, TRN1vvv_8b>;
+def : NI_Trn1_v8<v8i16, VPR128, TRN1vvv_8h>;
+
+class NI_Trn1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rm), 0, 1)),
+ (Ty VPR:$Rm), 2, 3)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Trn1_v4<v4i16, VPR64, TRN1vvv_4h, NI_ei_i32>;
+def : NI_Trn1_v4<v4i32, VPR128, TRN1vvv_4s, NI_ei_i32>;
+def : NI_Trn1_v4<v4f32, VPR128, TRN1vvv_4s, NI_ei_f32>;
+
+// trn2
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rm),
+ (v16i8 VPR128:$Rn), 1, 0)),
+ (v16i8 VPR128:$Rn), 3, 2)),
+ (v16i8 VPR128:$Rn), 5, 4)),
+ (v16i8 VPR128:$Rn), 7, 6)),
+ (v16i8 VPR128:$Rn), 9, 8)),
+ (v16i8 VPR128:$Rn), 11, 10)),
+ (v16i8 VPR128:$Rn), 13, 12)),
+ (v16i8 VPR128:$Rn), 15, 14)),
+ (TRN2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Trn2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 1, 0)),
+ (Ty VPR:$Rn), 3, 2)),
+ (Ty VPR:$Rn), 5, 4)),
+ (Ty VPR:$Rn), 7, 6)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Trn2_v8<v8i8, VPR64, TRN2vvv_8b>;
+def : NI_Trn2_v8<v8i16, VPR128, TRN2vvv_8h>;
+
+class NI_Trn2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 1, 0)),
+ (Ty VPR:$Rn), 3, 2)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Trn2_v4<v4i16, VPR64, TRN2vvv_4h, NI_ei_i32>;
+def : NI_Trn2_v4<v4i32, VPR128, TRN2vvv_4s, NI_ei_i32>;
+def : NI_Trn2_v4<v4f32, VPR128, TRN2vvv_4s, NI_ei_f32>;
+
+// End of implementation for instruction class (Perm)
+
// The followings are for instruction class (3V Diff)
// normal long/long2 pattern
diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll
new file mode 100644
index 0000000000..4db4771cf1
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-perm.ll
@@ -0,0 +1,1676 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+%struct.int8x8x2_t = type { [2 x <8 x i8>] }
+%struct.int16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int32x2x2_t = type { [2 x <2 x i32>] }
+%struct.uint8x8x2_t = type { [2 x <8 x i8>] }
+%struct.uint16x4x2_t = type { [2 x <4 x i16>] }
+%struct.uint32x2x2_t = type { [2 x <2 x i32>] }
+%struct.float32x2x2_t = type { [2 x <2 x float>] }
+%struct.poly8x8x2_t = type { [2 x <8 x i8>] }
+%struct.poly16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int8x16x2_t = type { [2 x <16 x i8>] }
+%struct.int16x8x2_t = type { [2 x <8 x i16>] }
+%struct.int32x4x2_t = type { [2 x <4 x i32>] }
+%struct.uint8x16x2_t = type { [2 x <16 x i8>] }
+%struct.uint16x8x2_t = type { [2 x <8 x i16>] }
+%struct.uint32x4x2_t = type { [2 x <4 x i32>] }
+%struct.float32x4x2_t = type { [2 x <4 x float>] }
+%struct.poly8x16x2_t = type { [2 x <16 x i8>] }
+%struct.poly16x8x2_t = type { [2 x <8 x i16>] }
+
+define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_s8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_s8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_s16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_s16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp1q_s32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_u8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_u8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_u16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_u16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp1q_u32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+ %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzp1q_f32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vuzp1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_p8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_p8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_p16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_p16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_s8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_s8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_s16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_s16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp2q_s32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_u8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_u8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_u16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_u16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp2q_u32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzp2q_f32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vuzp2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+ %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_p8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_p8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_p16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_p16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_s8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_s8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_s16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_s16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip1q_s32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_u8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_u8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_u16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_u16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip1q_u32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+ %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzip1q_f32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vzip1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_p8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_p8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_p16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_p16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_s8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_s8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_s16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_s16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip2q_s32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_u8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_u8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_u16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_u16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip2q_u32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzip2q_f32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vzip2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+ %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_p8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_p8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_p16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_p16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_s8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_s8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_s16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_s16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn1q_s32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_u8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_u8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_u16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_u16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn1q_u32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+ %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrn1q_f32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vtrn1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+ %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+ ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_p8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_p8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_p16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_p16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_s8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_s8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_s16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_s16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn2q_s32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_u8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_u8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_u16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_u16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn2q_u32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+ %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrn2q_f32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vtrn2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+ %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+ ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_p8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_p8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+ ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_p16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_p16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ ret <8 x i16> %shuffle.i
+}
+
+define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_s8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+ ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_s16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+ ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1
+ ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_u8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+ ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_u16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+ ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1
+ ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+ %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+ %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vuzp1.i, 0, 1
+ ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_p8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+ ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_p16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+ ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_s8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+ %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+ ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_s16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+ ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzpq_s32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1
+ ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_u8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+ %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+ ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_u16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+ ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzpq_u32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1
+ ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzpq_f32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vuzp.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %vuzp1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vuzp1.i, 0, 1
+ ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_p8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+ %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+ ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_p16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+ ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_s8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+ ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_s16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+ ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1
+ ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_u8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+ ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_u16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+ ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1
+ ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+ %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+ %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vzip1.i, 0, 1
+ ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_p8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+ ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_p16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+ ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_s8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+ ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_s16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+ ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzipq_s32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1
+ ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_u8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+ ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_u16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+ ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzipq_u32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1
+ ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzipq_f32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vzip.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %vzip1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vzip1.i, 0, 1
+ ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_p8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+ ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_p16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+ ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_s8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+ ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_s16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+ ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1
+ ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_u8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+ ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_u16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+ ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+ %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+ %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1
+ ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+ %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+ %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+ %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vtrn1.i, 0, 1
+ ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_p8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+ ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_p16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+ ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_s8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+ %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+ %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+ ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_s16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+ ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrnq_s32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1
+ ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_u8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+ %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+ %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+ ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_u16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+ ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrnq_u32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1
+ ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrnq_f32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vtrn.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ %vtrn1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vtrn1.i, 0, 1
+ ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_p8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+ %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+ %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+ ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_p16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+ ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index b549480a8f..d9afcb1418 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -5235,3 +5235,419 @@
// CHECK-ERROR: ext v0.2d, v1.2d, v2.2d, #0x0
// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Permutation with 3 vectors
+//----------------------------------------------------------------------
+
+ uzp1 v0.16b, v1.8b, v2.8b
+ uzp1 v0.8b, v1.4b, v2.4b
+ uzp1 v0.8h, v1.4h, v2.4h
+ uzp1 v0.4h, v1.2h, v2.2h
+ uzp1 v0.4s, v1.2s, v2.2s
+ uzp1 v0.2s, v1.1s, v2.1s
+ uzp1 v0.2d, v1.1d, v2.1d
+ uzp1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
+
+ uzp2 v0.16b, v1.8b, v2.8b
+ uzp2 v0.8b, v1.4b, v2.4b
+ uzp2 v0.8h, v1.4h, v2.4h
+ uzp2 v0.4h, v1.2h, v2.2h
+ uzp2 v0.4s, v1.2s, v2.2s
+ uzp2 v0.2s, v1.1s, v2.1s
+ uzp2 v0.2d, v1.1d, v2.1d
+ uzp2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
+
+ zip1 v0.16b, v1.8b, v2.8b
+ zip1 v0.8b, v1.4b, v2.4b
+ zip1 v0.8h, v1.4h, v2.4h
+ zip1 v0.4h, v1.2h, v2.2h
+ zip1 v0.4s, v1.2s, v2.2s
+ zip1 v0.2s, v1.1s, v2.1s
+ zip1 v0.2d, v1.1d, v2.1d
+ zip1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
+
+ zip2 v0.16b, v1.8b, v2.8b
+ zip2 v0.8b, v1.4b, v2.4b
+ zip2 v0.8h, v1.4h, v2.4h
+ zip2 v0.4h, v1.2h, v2.2h
+ zip2 v0.4s, v1.2s, v2.2s
+ zip2 v0.2s, v1.1s, v2.1s
+ zip2 v0.2d, v1.1d, v2.1d
+ zip2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
+
+ trn1 v0.16b, v1.8b, v2.8b
+ trn1 v0.8b, v1.4b, v2.4b
+ trn1 v0.8h, v1.4h, v2.4h
+ trn1 v0.4h, v1.2h, v2.2h
+ trn1 v0.4s, v1.2s, v2.2s
+ trn1 v0.2s, v1.1s, v2.1s
+ trn1 v0.2d, v1.1d, v2.1d
+ trn1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
+
+ trn2 v0.16b, v1.8b, v2.8b
+ trn2 v0.8b, v1.4b, v2.4b
+ trn2 v0.8h, v1.4h, v2.4h
+ trn2 v0.4h, v1.2h, v2.2h
+ trn2 v0.4s, v1.2s, v2.2s
+ trn2 v0.2s, v1.1s, v2.1s
+ trn2 v0.2d, v1.1d, v2.1d
+ trn2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
+
+//----------------------------------------------------------------------
+// Permutation with 3 vectors
+//----------------------------------------------------------------------
+
+ uzp1 v0.16b, v1.8b, v2.8b
+ uzp1 v0.8b, v1.4b, v2.4b
+ uzp1 v0.8h, v1.4h, v2.4h
+ uzp1 v0.4h, v1.2h, v2.2h
+ uzp1 v0.4s, v1.2s, v2.2s
+ uzp1 v0.2s, v1.1s, v2.1s
+ uzp1 v0.2d, v1.1d, v2.1d
+ uzp1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
+// CHECK-ERROR uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
+
+ uzp2 v0.16b, v1.8b, v2.8b
+ uzp2 v0.8b, v1.4b, v2.4b
+ uzp2 v0.8h, v1.4h, v2.4h
+ uzp2 v0.4h, v1.2h, v2.2h
+ uzp2 v0.4s, v1.2s, v2.2s
+ uzp2 v0.2s, v1.1s, v2.1s
+ uzp2 v0.2d, v1.1d, v2.1d
+ uzp2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
+// CHECK-ERROR uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
+
+ zip1 v0.16b, v1.8b, v2.8b
+ zip1 v0.8b, v1.4b, v2.4b
+ zip1 v0.8h, v1.4h, v2.4h
+ zip1 v0.4h, v1.2h, v2.2h
+ zip1 v0.4s, v1.2s, v2.2s
+ zip1 v0.2s, v1.1s, v2.1s
+ zip1 v0.2d, v1.1d, v2.1d
+ zip1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
+// CHECK-ERROR zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
+
+ zip2 v0.16b, v1.8b, v2.8b
+ zip2 v0.8b, v1.4b, v2.4b
+ zip2 v0.8h, v1.4h, v2.4h
+ zip2 v0.4h, v1.2h, v2.2h
+ zip2 v0.4s, v1.2s, v2.2s
+ zip2 v0.2s, v1.1s, v2.1s
+ zip2 v0.2d, v1.1d, v2.1d
+ zip2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
+// CHECK-ERROR zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
+
+ trn1 v0.16b, v1.8b, v2.8b
+ trn1 v0.8b, v1.4b, v2.4b
+ trn1 v0.8h, v1.4h, v2.4h
+ trn1 v0.4h, v1.2h, v2.2h
+ trn1 v0.4s, v1.2s, v2.2s
+ trn1 v0.2s, v1.1s, v2.1s
+ trn1 v0.2d, v1.1d, v2.1d
+ trn1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
+// CHECK-ERROR trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
+
+ trn2 v0.16b, v1.8b, v2.8b
+ trn2 v0.8b, v1.4b, v2.4b
+ trn2 v0.8h, v1.4h, v2.4h
+ trn2 v0.4h, v1.2h, v2.2h
+ trn2 v0.4s, v1.2s, v2.2s
+ trn2 v0.2s, v1.1s, v2.1s
+ trn2 v0.2d, v1.1d, v2.1d
+ trn2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR ^
+// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
+// CHECK-ERROR trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR ^
diff --git a/test/MC/AArch64/neon-perm.s b/test/MC/AArch64/neon-perm.s
new file mode 100644
index 0000000000..20a4acde37
--- /dev/null
+++ b/test/MC/AArch64/neon-perm.s
@@ -0,0 +1,103 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions for permute
+//------------------------------------------------------------------------------
+
+ uzp1 v0.8b, v1.8b, v2.8b
+ uzp1 v0.16b, v1.16b, v2.16b
+ uzp1 v0.4h, v1.4h, v2.4h
+ uzp1 v0.8h, v1.8h, v2.8h
+ uzp1 v0.2s, v1.2s, v2.2s
+ uzp1 v0.4s, v1.4s, v2.4s
+ uzp1 v0.2d, v1.2d, v2.2d
+
+// CHECK: uzp1 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x18,0x02,0x0e]
+// CHECK: uzp1 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x18,0x02,0x4e]
+// CHECK: uzp1 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x18,0x42,0x0e]
+// CHECK: uzp1 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x18,0x42,0x4e]
+// CHECK: uzp1 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x18,0x82,0x0e]
+// CHECK: uzp1 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x18,0x82,0x4e]
+// CHECK: uzp1 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x18,0xc2,0x4e]
+
+ trn1 v0.8b, v1.8b, v2.8b
+ trn1 v0.16b, v1.16b, v2.16b
+ trn1 v0.4h, v1.4h, v2.4h
+ trn1 v0.8h, v1.8h, v2.8h
+ trn1 v0.2s, v1.2s, v2.2s
+ trn1 v0.4s, v1.4s, v2.4s
+ trn1 v0.2d, v1.2d, v2.2d
+
+// CHECK: trn1 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x28,0x02,0x0e]
+// CHECK: trn1 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x28,0x02,0x4e]
+// CHECK: trn1 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x28,0x42,0x0e]
+// CHECK: trn1 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x28,0x42,0x4e]
+// CHECK: trn1 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x28,0x82,0x0e]
+// CHECK: trn1 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x28,0x82,0x4e]
+// CHECK: trn1 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x28,0xc2,0x4e]
+
+ zip1 v0.8b, v1.8b, v2.8b
+ zip1 v0.16b, v1.16b, v2.16b
+ zip1 v0.4h, v1.4h, v2.4h
+ zip1 v0.8h, v1.8h, v2.8h
+ zip1 v0.2s, v1.2s, v2.2s
+ zip1 v0.4s, v1.4s, v2.4s
+ zip1 v0.2d, v1.2d, v2.2d
+
+// CHECK: zip1 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x38,0x02,0x0e]
+// CHECK: zip1 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x38,0x02,0x4e]
+// CHECK: zip1 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x38,0x42,0x0e]
+// CHECK: zip1 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x38,0x42,0x4e]
+// CHECK: zip1 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x38,0x82,0x0e]
+// CHECK: zip1 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x38,0x82,0x4e]
+// CHECK: zip1 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x38,0xc2,0x4e]
+
+ uzp2 v0.8b, v1.8b, v2.8b
+ uzp2 v0.16b, v1.16b, v2.16b
+ uzp2 v0.4h, v1.4h, v2.4h
+ uzp2 v0.8h, v1.8h, v2.8h
+ uzp2 v0.2s, v1.2s, v2.2s
+ uzp2 v0.4s, v1.4s, v2.4s
+ uzp2 v0.2d, v1.2d, v2.2d
+
+// CHECK: uzp2 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x58,0x02,0x0e]
+// CHECK: uzp2 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x58,0x02,0x4e]
+// CHECK: uzp2 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x58,0x42,0x0e]
+// CHECK: uzp2 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x58,0x42,0x4e]
+// CHECK: uzp2 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x58,0x82,0x0e]
+// CHECK: uzp2 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x58,0x82,0x4e]
+// CHECK: uzp2 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x58,0xc2,0x4e]
+
+ trn2 v0.8b, v1.8b, v2.8b
+ trn2 v0.16b, v1.16b, v2.16b
+ trn2 v0.4h, v1.4h, v2.4h
+ trn2 v0.8h, v1.8h, v2.8h
+ trn2 v0.2s, v1.2s, v2.2s
+ trn2 v0.4s, v1.4s, v2.4s
+ trn2 v0.2d, v1.2d, v2.2d
+
+// CHECK: trn2 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x68,0x02,0x0e]
+// CHECK: trn2 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x68,0x02,0x4e]
+// CHECK: trn2 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x68,0x42,0x0e]
+// CHECK: trn2 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x68,0x42,0x4e]
+// CHECK: trn2 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x68,0x82,0x0e]
+// CHECK: trn2 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x68,0x82,0x4e]
+// CHECK: trn2 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x68,0xc2,0x4e]
+
+ zip2 v0.8b, v1.8b, v2.8b
+ zip2 v0.16b, v1.16b, v2.16b
+ zip2 v0.4h, v1.4h, v2.4h
+ zip2 v0.8h, v1.8h, v2.8h
+ zip2 v0.2s, v1.2s, v2.2s
+ zip2 v0.4s, v1.4s, v2.4s
+ zip2 v0.2d, v1.2d, v2.2d
+
+// CHECK: zip2 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x78,0x02,0x0e]
+// CHECK: zip2 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x78,0x02,0x4e]
+// CHECK: zip2 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x78,0x42,0x0e]
+// CHECK: zip2 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x78,0x42,0x4e]
+// CHECK: zip2 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x78,0x82,0x0e]
+// CHECK: zip2 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x78,0x82,0x4e]
+// CHECK: zip2 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x78,0xc2,0x4e]
diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt
index 225bb16212..e4aad48f47 100644
--- a/test/MC/Disassembler/AArch64/neon-instructions.txt
+++ b/test/MC/Disassembler/AArch64/neon-instructions.txt
@@ -2051,3 +2051,110 @@ G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s |
# CHECK: ext v0.8b, v1.8b, v2.8b, #0x3
# CHECK: ext v0.16b, v1.16b, v2.16b, #0x3
+#----------------------------------------------------------------------
+# unzip with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: uzp1 v1.8b, v1.8b, v2.8b
+# CHECK: uzp1 v2.16b, v1.16b, v2.16b
+# CHECK: uzp1 v3.4h, v1.4h, v2.4h
+# CHECK: uzp1 v4.8h, v1.8h, v2.8h
+# CHECK: uzp1 v5.2s, v1.2s, v2.2s
+# CHECK: uzp1 v6.4s, v1.4s, v2.4s
+# CHECK: uzp1 v7.2d, v1.2d, v2.2d
+0x21,0x18,0x02,0x0e
+0x22,0x18,0x02,0x4e
+0x23,0x18,0x42,0x0e
+0x24,0x18,0x42,0x4e
+0x25,0x18,0x82,0x0e
+0x26,0x18,0x82,0x4e
+0x27,0x18,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# transpose with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: trn1 v8.8b, v1.8b, v2.8b
+# CHECK: trn1 v9.16b, v1.16b, v2.16b
+# CHECK: trn1 v10.4h, v1.4h, v2.4h
+# CHECK: trn1 v27.8h, v7.8h, v2.8h
+# CHECK: trn1 v12.2s, v7.2s, v2.2s
+# CHECK: trn1 v29.4s, v6.4s, v2.4s
+# CHECK: trn1 v14.2d, v6.2d, v2.2d
+0x28,0x28,0x02,0x0e
+0x29,0x28,0x02,0x4e
+0x2a,0x28,0x42,0x0e
+0xfb,0x28,0x42,0x4e
+0xec,0x28,0x82,0x0e
+0xdd,0x28,0x82,0x4e
+0xce,0x28,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# zip with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: zip1 v31.8b, v5.8b, v2.8b
+# CHECK: zip1 v0.16b, v5.16b, v2.16b
+# CHECK: zip1 v17.4h, v4.4h, v2.4h
+# CHECK: zip1 v2.8h, v4.8h, v2.8h
+# CHECK: zip1 v19.2s, v3.2s, v2.2s
+# CHECK: zip1 v4.4s, v3.4s, v2.4s
+# CHECK: zip1 v21.2d, v2.2d, v2.2d
+0xbf,0x38,0x02,0x0e
+0xa0,0x38,0x02,0x4e
+0x91,0x38,0x42,0x0e
+0x82,0x38,0x42,0x4e
+0x73,0x38,0x82,0x0e
+0x64,0x38,0x82,0x4e
+0x55,0x38,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# unzip with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: uzp2 v6.8b, v2.8b, v2.8b
+# CHECK: uzp2 v23.16b, v1.16b, v2.16b
+# CHECK: uzp2 v8.4h, v1.4h, v2.4h
+# CHECK: uzp2 v25.8h, v0.8h, v2.8h
+# CHECK: uzp2 v10.2s, v0.2s, v2.2s
+# CHECK: uzp2 v27.4s, v7.4s, v2.4s
+# CHECK: uzp2 v12.2d, v7.2d, v2.2d
+0x46,0x58,0x02,0x0e
+0x37,0x58,0x02,0x4e
+0x28,0x58,0x42,0x0e
+0x19,0x58,0x42,0x4e
+0x0a,0x58,0x82,0x0e
+0xfb,0x58,0x82,0x4e
+0xec,0x58,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# transpose with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: trn2 v29.8b, v6.8b, v2.8b
+# CHECK: trn2 v14.16b, v6.16b, v2.16b
+# CHECK: trn2 v31.4h, v5.4h, v2.4h
+# CHECK: trn2 v0.8h, v5.8h, v2.8h
+# CHECK: trn2 v17.2s, v4.2s, v2.2s
+# CHECK: trn2 v2.4s, v4.4s, v2.4s
+# CHECK: trn2 v19.2d, v3.2d, v2.2d
+0xdd,0x68,0x02,0x0e
+0xce,0x68,0x02,0x4e
+0xbf,0x68,0x42,0x0e
+0xa0,0x68,0x42,0x4e
+0x91,0x68,0x82,0x0e
+0x82,0x68,0x82,0x4e
+0x73,0x68,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# zip with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: zip2 v4.8b, v3.8b, v2.8b
+# CHECK: zip2 v21.16b, v2.16b, v2.16b
+# CHECK: zip2 v6.4h, v2.4h, v2.4h
+# CHECK: zip2 v23.8h, v1.8h, v2.8h
+# CHECK: zip2 v8.2s, v1.2s, v2.2s
+# CHECK: zip2 v25.4s, v0.4s, v2.4s
+# CHECK: zip2 v10.2d, v0.2d, v2.2d
+0x64,0x78,0x02,0x0e
+0x55,0x78,0x02,0x4e
+0x46,0x78,0x42,0x0e
+0x37,0x78,0x42,0x4e
+0x28,0x78,0x82,0x0e
+0x19,0x78,0x82,0x4e
+0x0a,0x78,0xc2,0x4e