6 files changed, 2080 insertions, 129 deletions
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td
index 4d2e0530c9..d015d91507 100644
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -22,6 +22,39 @@ def int_aarch64_neon_vacgeq :
 def int_aarch64_neon_vacgtq :
   Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
 
+// Vector saturating accumulate
+def int_aarch64_neon_suqadd : Neon_2Arg_Intrinsic;
+def int_aarch64_neon_usqadd : Neon_2Arg_Intrinsic;
+
+// Vector Bitwise reverse
+def int_aarch64_neon_rbit : Neon_1Arg_Intrinsic;
+
+// Vector extract and narrow
+def int_aarch64_neon_xtn : 
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+
+// Vector floating-point convert
+def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
+def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
+def int_aarch64_neon_fcvtxn :
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+def int_aarch64_neon_fcvtns : 
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+def int_aarch64_neon_fcvtnu :
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+def int_aarch64_neon_fcvtps :
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+def int_aarch64_neon_fcvtpu :
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+def int_aarch64_neon_fcvtms :
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+def int_aarch64_neon_fcvtmu :
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+def int_aarch64_neon_fcvtas :
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+def int_aarch64_neon_fcvtau :
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+
 // Vector maxNum (Floating Point)
 def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
 
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index ab46d7f7b3..b437ce1b45 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -338,6 +338,30 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
     setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
     setOperationAction(ISD::SETCC, MVT::v1f64, Custom);
     setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
+
+    setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
+    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+    setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+
+    setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
+    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+    setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+
+    setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+
+    setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
+    setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
+    setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
+
+    setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal);
+    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+
+    setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
+    setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+    setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
   }
 }
 
@@ -891,6 +915,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "AArch64ISD::NEON_VDUP";
   case AArch64ISD::NEON_VDUPLANE:
     return "AArch64ISD::NEON_VDUPLANE";
+  case AArch64ISD::NEON_REV16:
+    return "AArch64ISD::NEON_REV16";
+  case AArch64ISD::NEON_REV32:
+    return "AArch64ISD::NEON_REV32";
+  case AArch64ISD::NEON_REV64:
+    return "AArch64ISD::NEON_REV64";
   case AArch64ISD::NEON_LD1_UPD:
     return "AArch64ISD::NEON_LD1_UPD";
   case AArch64ISD::NEON_LD2_UPD:
@@ -3797,6 +3827,36 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// isREVMask - Check if a vector shuffle corresponds to a REV
+/// instruction with the specified blocksize.  (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
+  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
+         "Only possible block sizes for REV are: 16, 32, 64");
+
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned BlockElts = M[0] + 1;
+  // If the first shuffle index is UNDEF, be optimistic.
+  if (M[0] < 0)
+    BlockElts = BlockSize / EltSz;
+
+  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+    return false;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    if (M[i] < 0)
+      continue; // ignore UNDEF indices
+    if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
+      return false;
+  }
+
+  return true;
+}
+
 SDValue
 AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                                            SelectionDAG &DAG) const {
@@ -3816,6 +3876,13 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   if (EltSize > 64)
     return SDValue();
 
+  if (isREVMask(ShuffleMask, VT, 64))
+    return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1);
+  if (isREVMask(ShuffleMask, VT, 32))
+    return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1);
+  if (isREVMask(ShuffleMask, VT, 16))
+    return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1);
+
   // If the element of shuffle mask are all the same constant, we can
   // transform it into either NEON_VDUP or NEON_VDUPLANE
   if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 83fd79d6ba..be55395a0d 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -125,6 +125,11 @@ namespace AArch64ISD {
     // Vector FP move immediate
     NEON_FMOVIMM,
 
+    // Vector Element reverse
+    NEON_REV64,
+    NEON_REV32,
+    NEON_REV16,
+
     // Vector compare
     NEON_CMP,
 
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 958d1a0549..6822f0ce27 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -46,6 +46,10 @@ def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
 def Neon_sqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
 def Neon_uqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
 
+def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def Neon_rev64    : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
+def Neon_rev32    : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
+def Neon_rev16    : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
                        [SDTCisVec<0>]>>;
 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
@@ -1610,6 +1614,21 @@ def Neon_low4f : PatFrag<(ops node:$in),
                          (v2f32 (extract_subvector (v4f32 node:$in),
                                                    (iPTR 0)))>;
 
+def neon_uimm3_shift : Operand<i32>,
+                         ImmLeaf<i32, [{return Imm < 8;}]> {
+  let ParserMatchClass = uimm3_asmoperand;
+}
+
+def neon_uimm4_shift : Operand<i32>,
+                         ImmLeaf<i32, [{return Imm < 16;}]> {
+  let ParserMatchClass = uimm4_asmoperand;
+}
+
+def neon_uimm5_shift : Operand<i32>,
+                         ImmLeaf<i32, [{return Imm < 32;}]> {
+  let ParserMatchClass = uimm5_asmoperand;
+}
+
 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                    string SrcT, ValueType DestTy, ValueType SrcTy,
                    Operand ImmTy, SDPatternOperator ExtOp>
@@ -1619,7 +1638,7 @@ class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                      [(set (DestTy VPR128:$Rd),
                         (DestTy (shl
                           (DestTy (ExtOp (SrcTy VPR64:$Rn))),
-                            (DestTy (Neon_vdup (i32 imm:$Imm))))))],
+                            (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
                      NoItinerary>;
 
 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
@@ -1633,40 +1652,40 @@ class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                         (DestTy (shl
                           (DestTy (ExtOp
                             (SrcTy (getTop VPR128:$Rn)))),
-                              (DestTy (Neon_vdup (i32 imm:$Imm))))))],
+                              (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
                      NoItinerary>;
 
 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
                          SDNode ExtOp> {
   // 64-bit vector types.
   def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
-                         uimm3, ExtOp> {
+                         neon_uimm3_shift, ExtOp> {
     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
   }
 
   def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
-                         uimm4, ExtOp> {
+                         neon_uimm4_shift, ExtOp> {
     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
   }
 
   def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
-                         uimm5, ExtOp> {
+                         neon_uimm5_shift, ExtOp> {
     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
   }
 
   // 128-bit vector types
-  def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
-                              v8i16, v8i8, 8, uimm3, ExtOp, Neon_High16B> {
+  def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
+                              8, neon_uimm3_shift, ExtOp, Neon_High16B> {
     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
   }
 
-  def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
-                             v4i32, v4i16, 4, uimm4, ExtOp, Neon_High8H> {
+  def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
+                             4, neon_uimm4_shift, ExtOp, Neon_High8H> {
     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
   }
 
-  def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
-                             v2i64, v2i32, 2, uimm5, ExtOp, Neon_High4S> {
+  def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
+                             2, neon_uimm5_shift, ExtOp, Neon_High4S> {
     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
   }
 
@@ -4693,25 +4712,25 @@ def neon_uimm0_bare : Operand<i64>,
 }
 
 def neon_uimm1_bare : Operand<i64>,
-                        ImmLeaf<i64, [{(void)Imm; return true;}]> {
+                        ImmLeaf<i64, [{return Imm < 2;}]> {
   let ParserMatchClass = neon_uimm1_asmoperand;
   let PrintMethod = "printUImmBareOperand";
 }
 
 def neon_uimm2_bare : Operand<i64>,
-                        ImmLeaf<i64, [{(void)Imm; return true;}]> {
+                        ImmLeaf<i64, [{return Imm < 4;}]> {
   let ParserMatchClass = neon_uimm2_asmoperand;
   let PrintMethod = "printUImmBareOperand";
 }
 
 def neon_uimm3_bare : Operand<i64>,
-                        ImmLeaf<i64, [{(void)Imm; return true;}]> {
+                        ImmLeaf<i64, [{return Imm < 8;}]> {
   let ParserMatchClass = uimm3_asmoperand;
   let PrintMethod = "printUImmBareOperand";
 }
 
 def neon_uimm4_bare : Operand<i64>,
-                        ImmLeaf<i64, [{(void)Imm; return true;}]> {
+                        ImmLeaf<i64, [{return Imm < 16;}]> {
   let ParserMatchClass = uimm4_asmoperand;
   let PrintMethod = "printUImmBareOperand";
 }
@@ -5096,13 +5115,13 @@ def : Pat<(v4f32  (bitconvert (f128   FPR128:$src))), (v4f32 FPR128:$src)>;
 def : Pat<(v2f64  (bitconvert (f128   FPR128:$src))), (v2f64 FPR128:$src)>;
 
 def neon_uimm3 : Operand<i64>,
-                   ImmLeaf<i64, [{(void)Imm; return true;}]> {
+                   ImmLeaf<i64, [{return Imm < 8;}]> {
   let ParserMatchClass = uimm3_asmoperand;
   let PrintMethod = "printUImmHexOperand";
 }
 
 def neon_uimm4 : Operand<i64>,
-                   ImmLeaf<i64, [{(void)Imm; return true;}]> {
+                   ImmLeaf<i64, [{return Imm < 16;}]> {
   let ParserMatchClass = uimm4_asmoperand;
   let PrintMethod = "printUImmHexOperand";
 }
@@ -6538,6 +6557,855 @@ def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
           (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
 
+class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
+                bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
+                SDPatternOperator Neon_Rev>
+  : NeonI_2VMisc<Q, U, size, opcode,
+               (outs ResVPR:$Rd), (ins ResVPR:$Rn),
+               asmop # "\t$Rd." # Res # ", $Rn." # Res,
+               [(set (ResTy ResVPR:$Rd),
+                  (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
+               NoItinerary> ;
+
+def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
+                          v16i8, Neon_rev64>;
+def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
+                         v8i16, Neon_rev64>;
+def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
+                         v4i32, Neon_rev64>;
+def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
+                         v8i8, Neon_rev64>;
+def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
+                         v4i16, Neon_rev64>;
+def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
+                         v2i32, Neon_rev64>;
+
+def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
+def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
+
+def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
+                          v16i8, Neon_rev32>;
+def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
+                          v8i16, Neon_rev32>;
+def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
+                         v8i8, Neon_rev32>;
+def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
+                         v4i16, Neon_rev32>;
+
+def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
+                          v16i8, Neon_rev16>;
+def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
+                         v8i8, Neon_rev16>;
+
+multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
+                             SDPatternOperator Neon_Padd> {
+  def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+                           (outs VPR128:$Rd), (ins VPR128:$Rn),
+                           asmop # "\t$Rd.8h, $Rn.16b",
+                           [(set (v8i16 VPR128:$Rd),
+                              (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
+                           NoItinerary>;
+  
+  def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+                          (outs VPR64:$Rd), (ins VPR64:$Rn),
+                          asmop # "\t$Rd.4h, $Rn.8b",
+                          [(set (v4i16 VPR64:$Rd),
+                             (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
+                          NoItinerary>;
+  
+  def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+                           (outs VPR128:$Rd), (ins VPR128:$Rn),
+                           asmop # "\t$Rd.4s, $Rn.8h",
+                           [(set (v4i32 VPR128:$Rd),
+                              (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
+                           NoItinerary>;
+  
+  def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+                          (outs VPR64:$Rd), (ins VPR64:$Rn),
+                          asmop # "\t$Rd.2s, $Rn.4h",
+                          [(set (v2i32 VPR64:$Rd),
+                             (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
+                          NoItinerary>;
+  
+  def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
+                           (outs VPR128:$Rd), (ins VPR128:$Rn),
+                           asmop # "\t$Rd.2d, $Rn.4s",
+                           [(set (v2i64 VPR128:$Rd),
+                              (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
+                           NoItinerary>;
+  
+  def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
+                          (outs VPR64:$Rd), (ins VPR64:$Rn),
+                          asmop # "\t$Rd.1d, $Rn.2s",
+                          [(set (v1i64 VPR64:$Rd),
+                             (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
+                          NoItinerary>;
+}
+
+defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
+                                int_arm_neon_vpaddls>;
+defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
+                                int_arm_neon_vpaddlu>;
+
+multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
+                             SDPatternOperator Neon_Padd> {
+  let Constraints = "$src = $Rd" in {
+    def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                             asmop # "\t$Rd.8h, $Rn.16b",
+                             [(set (v8i16 VPR128:$Rd),
+                                (v8i16 (Neon_Padd 
+                                  (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
+                             NoItinerary>;
+    
+    def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+                            (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+                            asmop # "\t$Rd.4h, $Rn.8b",
+                            [(set (v4i16 VPR64:$Rd),
+                               (v4i16 (Neon_Padd 
+                                 (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
+                            NoItinerary>;
+    
+    def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                            asmop # "\t$Rd.4s, $Rn.8h",
+                            [(set (v4i32 VPR128:$Rd),
+                               (v4i32 (Neon_Padd
+                                 (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
+                            NoItinerary>;
+    
+    def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+                            (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+                            asmop # "\t$Rd.2s, $Rn.4h",
+                            [(set (v2i32 VPR64:$Rd),
+                               (v2i32 (Neon_Padd
+                                 (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
+                            NoItinerary>;
+    
+    def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
+                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                            asmop # "\t$Rd.2d, $Rn.4s",
+                            [(set (v2i64 VPR128:$Rd),
+                               (v2i64 (Neon_Padd
+                                 (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
+                            NoItinerary>;
+    
+    def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
+                            (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+                            asmop # "\t$Rd.1d, $Rn.2s",
+                            [(set (v1i64 VPR64:$Rd),
+                               (v1i64 (Neon_Padd
+                                 (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
+                            NoItinerary>;
+  }
+}
+
+defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
+                                   int_arm_neon_vpadals>;
+defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
+                                   int_arm_neon_vpadalu>;
+
+multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
+  def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
+                         (outs VPR128:$Rd), (ins VPR128:$Rn),
+                         asmop # "\t$Rd.16b, $Rn.16b",
+                         [], NoItinerary>;
+  
+  def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
+                        (outs VPR128:$Rd), (ins VPR128:$Rn),
+                        asmop # "\t$Rd.8h, $Rn.8h",
+                        [], NoItinerary>;
+  
+  def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+                        (outs VPR128:$Rd), (ins VPR128:$Rn),
+                        asmop # "\t$Rd.4s, $Rn.4s",
+                        [], NoItinerary>;
+  
+  def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
+                        (outs VPR128:$Rd), (ins VPR128:$Rn),
+                        asmop # "\t$Rd.2d, $Rn.2d",
+                        [], NoItinerary>;
+  
+  def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
+                         (outs VPR64:$Rd), (ins VPR64:$Rn),
+                         asmop # "\t$Rd.8b, $Rn.8b",
+                         [], NoItinerary>;
+  
+  def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
+                        (outs VPR64:$Rd), (ins VPR64:$Rn),
+                        asmop # "\t$Rd.4h, $Rn.4h",
+                        [], NoItinerary>;
+  
+  def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+                        (outs VPR64:$Rd), (ins VPR64:$Rn),
+                        asmop # "\t$Rd.2s, $Rn.2s",
+                        [], NoItinerary>;
+}
+
+defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
+defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
+defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
+defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
+
+multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
+                                          SDPatternOperator Neon_Op> {
+  def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
+            (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
+
+  def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
+            (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
+
+  def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
+            (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
+
+  def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
+            (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
+
+  def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
+            (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
+
+  def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
+            (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
+
+  def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
+            (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
+}
+
+defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
+defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
+defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
+
+def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
+def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
+
+def : Pat<(v16i8 (sub 
+            (v16i8 Neon_AllZero),
+            (v16i8 VPR128:$Rn))),
+          (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (sub 
+            (v8i8 Neon_AllZero),
+            (v8i8 VPR64:$Rn))),
+          (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
+def : Pat<(v8i16 (sub 
+            (v8i16 (bitconvert (v16i8 Neon_AllZero))),
+            (v8i16 VPR128:$Rn))),
+          (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
+def : Pat<(v4i16 (sub 
+            (v4i16 (bitconvert (v8i8 Neon_AllZero))),
+            (v4i16 VPR64:$Rn))),
+          (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
+def : Pat<(v4i32 (sub 
+            (v4i32 (bitconvert (v16i8 Neon_AllZero))),
+            (v4i32 VPR128:$Rn))),
+          (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
+def : Pat<(v2i32 (sub 
+            (v2i32 (bitconvert (v8i8 Neon_AllZero))),
+            (v2i32 VPR64:$Rn))),
+          (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
+def : Pat<(v2i64 (sub 
+            (v2i64 (bitconvert (v16i8 Neon_AllZero))),
+            (v2i64 VPR128:$Rn))),
+          (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
+
+multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
+  let Constraints = "$src = $Rd" in {
+    def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
+                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                           asmop # "\t$Rd.16b, $Rn.16b",
+                           [], NoItinerary>;
+    
+    def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
+                          (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                          asmop # "\t$Rd.8h, $Rn.8h",
+                          [], NoItinerary>;
+    
+    def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+                          (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                          asmop # "\t$Rd.4s, $Rn.4s",
+                          [], NoItinerary>;
+    
+    def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
+                          (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                          asmop # "\t$Rd.2d, $Rn.2d",
+                          [], NoItinerary>;
+    
+    def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
+                          (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+                          asmop # "\t$Rd.8b, $Rn.8b",
+                          [], NoItinerary>;
+    
+    def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
+                          (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+                          asmop # "\t$Rd.4h, $Rn.4h",
+                          [], NoItinerary>;
+    
+    def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+                          (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+                          asmop # "\t$Rd.2s, $Rn.2s",
+                          [], NoItinerary>;
+  }
+}
+
+defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
+defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
+
+multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
+                                           SDPatternOperator Neon_Op> {
+  def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
+            (v16i8 (!cast<Instruction>(Prefix # 16b)
+              (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
+
+  def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
+            (v8i16 (!cast<Instruction>(Prefix # 8h)
+              (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
+
+  def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
+            (v4i32 (!cast<Instruction>(Prefix # 4s)
+              (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
+
+  def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
+            (v2i64 (!cast<Instruction>(Prefix # 2d)
+              (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
+
+  def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
+            (v8i8 (!cast<Instruction>(Prefix # 8b)
+              (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
+
+  def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
+            (v4i16 (!cast<Instruction>(Prefix # 4h)
+              (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
+
+  def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
+            (v2i32 (!cast<Instruction>(Prefix # 2s)
+              (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
+}
+
+defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
+defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
+
+multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
+                          SDPatternOperator Neon_Op> {
+  def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
+                         (outs VPR128:$Rd), (ins VPR128:$Rn),
+                         asmop # "\t$Rd.16b, $Rn.16b",
+                         [(set (v16i8 VPR128:$Rd),
+                            (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
+                         NoItinerary>;
+  
+  def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
+                        (outs VPR128:$Rd), (ins VPR128:$Rn),
+                        asmop # "\t$Rd.8h, $Rn.8h",
+                        [(set (v8i16 VPR128:$Rd),
+                           (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
+                        NoItinerary>;
+  
+  def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
+                        (outs VPR128:$Rd), (ins VPR128:$Rn),
+                        asmop # "\t$Rd.4s, $Rn.4s",
+                        [(set (v4i32 VPR128:$Rd),
+                           (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
+                        NoItinerary>;
+  
+  def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
+                        (outs VPR64:$Rd), (ins VPR64:$Rn),
+                        asmop # "\t$Rd.8b, $Rn.8b",
+                        [(set (v8i8 VPR64:$Rd),
+                           (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
+                        NoItinerary>;
+  
+  def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
+                        (outs VPR64:$Rd), (ins VPR64:$Rn),
+                        asmop # "\t$Rd.4h, $Rn.4h",
+                        [(set (v4i16 VPR64:$Rd),
+                           (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
+                        NoItinerary>;
+  
+  def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
+                        (outs VPR64:$Rd), (ins VPR64:$Rn),
+                        asmop # "\t$Rd.2s, $Rn.2s",
+                        [(set (v2i32 VPR64:$Rd),
+                           (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
+                        NoItinerary>;
+}
+
+defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
+defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
+
+multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
+                              bits<5> Opcode> {
+  def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
+                         (outs VPR128:$Rd), (ins VPR128:$Rn),
+                         asmop # "\t$Rd.16b, $Rn.16b",
+                         [], NoItinerary>;
+  
+  def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
+                        (outs VPR64:$Rd), (ins VPR64:$Rn),
+                        asmop # "\t$Rd.8b, $Rn.8b",
+                        [], NoItinerary>;
+}
+
+defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
+defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
+defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
+
+def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
+                    (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
+def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
+                    (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
+
+def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
+          (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
+          (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
+
+def : Pat<(v16i8 (xor 
+            (v16i8 VPR128:$Rn),
+            (v16i8 Neon_AllOne))),
+          (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (xor 
+            (v8i8 VPR64:$Rn),
+            (v8i8 Neon_AllOne))),
+          (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
+def : Pat<(v8i16 (xor 
+            (v8i16 VPR128:$Rn),
+            (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
+          (NOT16b VPR128:$Rn)>;
+def : Pat<(v4i16 (xor 
+            (v4i16 VPR64:$Rn),
+            (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
+          (NOT8b VPR64:$Rn)>;
+def : Pat<(v4i32 (xor 
+            (v4i32 VPR128:$Rn),
+            (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
+          (NOT16b VPR128:$Rn)>;
+def : Pat<(v2i32 (xor 
+            (v2i32 VPR64:$Rn),
+            (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
+          (NOT8b VPR64:$Rn)>;
+def : Pat<(v2i64 (xor 
+            (v2i64 VPR128:$Rn),
+            (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
+          (NOT16b VPR128:$Rn)>;
+
+def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
+          (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
+          (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
+
+multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
+                                SDPatternOperator Neon_Op> {
+  def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+                        (outs VPR128:$Rd), (ins VPR128:$Rn),
+                        asmop # "\t$Rd.4s, $Rn.4s",
+                        [(set (v4f32 VPR128:$Rd),
+                           (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
+                        NoItinerary>;
+  
+  def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
+                        (outs VPR128:$Rd), (ins VPR128:$Rn),
+                        asmop # "\t$Rd.2d, $Rn.2d",
+                        [(set (v2f64 VPR128:$Rd),
+                           (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
+                        NoItinerary>;
+  
+  def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+                        (outs VPR64:$Rd), (ins VPR64:$Rn),
+                        asmop # "\t$Rd.2s, $Rn.2s",
+                        [(set (v2f32 VPR64:$Rd),
+                           (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
+                        NoItinerary>;
+}
+
+defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
+defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
+
+multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
+  def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
+                          (outs VPR64:$Rd), (ins VPR128:$Rn),
+                          asmop # "\t$Rd.8b, $Rn.8h",
+                          [], NoItinerary>;
+
+  def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
+                          (outs VPR64:$Rd), (ins VPR128:$Rn),
+                          asmop # "\t$Rd.4h, $Rn.4s",
+                          [], NoItinerary>;
+
+  def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+                          (outs VPR64:$Rd), (ins VPR128:$Rn),
+                          asmop # "\t$Rd.2s, $Rn.2d",
+                          [], NoItinerary>;
+
+  let Constraints = "$Rd = $src" in {
+    def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
+                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                             asmop # "2\t$Rd.16b, $Rn.8h",
+                             [], NoItinerary>;
+  
+    def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
+                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                            asmop # "2\t$Rd.8h, $Rn.4s",
+                            [], NoItinerary>;
+  
+    def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                            asmop # "2\t$Rd.4s, $Rn.2d",
+                            [], NoItinerary>;
+  }
+}
+
+defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
+defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
+defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
+defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
+
+multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix, 
+                                        SDPatternOperator Neon_Op> {
+  def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
+            (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
+
+  def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
+            (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
+
+  def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
+            (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
+  
+  def : Pat<(v16i8 (concat_vectors
+              (v8i8 VPR64:$src),
+              (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
+            (!cast<Instruction>(Prefix # 8h16b) 
+              (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
+              VPR128:$Rn)>;
+
+  def : Pat<(v8i16 (concat_vectors
+              (v4i16 VPR64:$src),
+              (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
+            (!cast<Instruction>(Prefix # 4s8h)
+              (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
+              VPR128:$Rn)>;
+
+  def : Pat<(v4i32 (concat_vectors
+              (v2i32 VPR64:$src),
+              (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
+            (!cast<Instruction>(Prefix # 2d4s)
+              (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
+              VPR128:$Rn)>;
+}
+
+defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
+defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
+defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
+defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
+
+multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
+  def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+                          (outs VPR128:$Rd),
+                          (ins VPR64:$Rn, uimm_exact8:$Imm),
+                          asmop # "\t$Rd.8h, $Rn.8b, $Imm",
+                          [], NoItinerary>;
+
+  def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+                          (outs VPR128:$Rd),
+                          (ins VPR64:$Rn, uimm_exact16:$Imm),
+                          asmop # "\t$Rd.4s, $Rn.4h, $Imm",
+                          [], NoItinerary>;
+
+  def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
+                          (outs VPR128:$Rd),
+                          (ins VPR64:$Rn, uimm_exact32:$Imm),
+                          asmop # "\t$Rd.2d, $Rn.2s, $Imm",
+                          [], NoItinerary>;
+
+  def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+                          (outs VPR128:$Rd),
+                          (ins VPR128:$Rn, uimm_exact8:$Imm),
+                          asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
+                          [], NoItinerary>;
+
+  def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+                          (outs VPR128:$Rd),
+                          (ins VPR128:$Rn, uimm_exact16:$Imm),
+                          asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
+                          [], NoItinerary>;
+
+  def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
+                          (outs VPR128:$Rd),
+                          (ins VPR128:$Rn, uimm_exact32:$Imm),
+                          asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
+                          [], NoItinerary>;
+}
+
+defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
+
+class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
+                          SDPatternOperator ExtOp, Operand Neon_Imm,
+                          string suffix> 
+  : Pat<(DesTy (shl
+          (DesTy (ExtOp (OpTy VPR64:$Rn))),
+            (DesTy (Neon_vdup
+              (i32 Neon_Imm:$Imm))))),
+        (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
+    
+class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
+                               SDPatternOperator ExtOp, Operand Neon_Imm,
+                               string suffix, PatFrag GetHigh> 
+  : Pat<(DesTy (shl
+          (DesTy (ExtOp
+            (OpTy (GetHigh VPR128:$Rn)))),
+              (DesTy (Neon_vdup
+                (i32 Neon_Imm:$Imm))))),
+        (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
+
+def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
+def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
+def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
+def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
+def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
+def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
+def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
+                               Neon_High16B>;
+def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
+                               Neon_High16B>;
+def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
+                               Neon_High8H>;
+def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
+                               Neon_High8H>;
+def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
+                               Neon_High4S>;
+def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
+                               Neon_High4S>;
+
+multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
+  def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+                          (outs VPR64:$Rd), (ins VPR128:$Rn),
+                          asmop # "\t$Rd.4h, $Rn.4s",
+                          [], NoItinerary>;
+
+  def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+                          (outs VPR64:$Rd), (ins VPR128:$Rn),
+                          asmop # "\t$Rd.2s, $Rn.2d",
+                          [], NoItinerary>;
+  
+  let Constraints = "$src = $Rd" in {
+    def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                            asmop # "2\t$Rd.8h, $Rn.4s",
+                            [], NoItinerary>;
+  
+    def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                            asmop # "2\t$Rd.4s, $Rn.2d",
+                            [], NoItinerary>;
+  }
+}
+
+defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
+
+multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
+                                       SDPatternOperator f32_to_f16_Op,
+                                       SDPatternOperator f64_to_f32_Op> {
+  
+  def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
+              (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
+  
+  def : Pat<(v8i16 (concat_vectors
+                (v4i16 VPR64:$src),
+                (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
+                  (!cast<Instruction>(prefix # "4s8h")
+                    (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
+                    (v4f32 VPR128:$Rn))>;  
+    
+  def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
+            (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
+  
+  def : Pat<(v4f32 (concat_vectors
+              (v2f32 VPR64:$src),
+              (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
+                (!cast<Instruction>(prefix # "2d4s")
+                  (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
+                  (v2f64 VPR128:$Rn))>;
+}
+
+defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
+
+multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
+                                 bits<5> opcode> {
+  def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+                          (outs VPR64:$Rd), (ins VPR128:$Rn),
+                          asmop # "\t$Rd.2s, $Rn.2d",
+                          [], NoItinerary>;
+
+  def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+                          (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+                          asmop # "2\t$Rd.4s, $Rn.2d",
+                          [], NoItinerary> {
+    let Constraints = "$src = $Rd";
+  }
+  
+  def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))),
+            (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
+
+  def : Pat<(v4f32 (concat_vectors
+              (v2f32 VPR64:$src),
+              (v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))),
+            (!cast<Instruction>(prefix # "2d4s")
+               (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
+               VPR128:$Rn)>;
+}
+
+defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
+
+def Neon_High4Float : PatFrag<(ops node:$in),
+                              (extract_subvector (v4f32 node:$in), (iPTR 2))>;
+
+multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
+  def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
+                          (outs VPR128:$Rd), (ins VPR64:$Rn),
+                          asmop # "\t$Rd.4s, $Rn.4h",
+                          [], NoItinerary>;
+
+  def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
+                          (outs VPR128:$Rd), (ins VPR64:$Rn),
+                          asmop # "\t$Rd.2d, $Rn.2s",
+                          [], NoItinerary>;
+
+  def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
+                          (outs VPR128:$Rd), (ins VPR128:$Rn),
+                          asmop # "2\t$Rd.4s, $Rn.8h",
+                          [], NoItinerary>;
+
+  def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
+                          (outs VPR128:$Rd), (ins VPR128:$Rn),
+                          asmop # "2\t$Rd.2d, $Rn.4s",
+                          [], NoItinerary>;
+}
+
+defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
+
+multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
+  def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
+            (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
+  
+  def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
+              (v4i16 (Neon_High8H
+                (v8i16 VPR128:$Rn))))),
+            (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
+  
+  def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
+            (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
+  
+  def : Pat<(v2f64 (fextend
+              (v2f32 (Neon_High4Float
+                (v4f32 VPR128:$Rn))))),
+            (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
+}
+
+defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
+
+multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
+                                ValueType ResTy4s, ValueType OpTy4s,
+                                ValueType ResTy2d, ValueType OpTy2d,
+                                ValueType ResTy2s, ValueType OpTy2s,
+                                SDPatternOperator Neon_Op> {
+  
+  def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
+                        (outs VPR128:$Rd), (ins VPR128:$Rn),
+                        asmop # "\t$Rd.4s, $Rn.4s",
+                        [(set (ResTy4s VPR128:$Rd),
+                           (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
+                        NoItinerary>;
+
+  def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
+                        (outs VPR128:$Rd), (ins VPR128:$Rn),
+                        asmop # "\t$Rd.2d, $Rn.2d",
+                        [(set (ResTy2d VPR128:$Rd),
+                           (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
+                        NoItinerary>;
+  
+  def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
+                        (outs VPR64:$Rd), (ins VPR64:$Rn),
+                        asmop # "\t$Rd.2s, $Rn.2s",
+                        [(set (ResTy2s VPR64:$Rd),
+                           (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
+                        NoItinerary>;
+}
+
+multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
+                                  bits<5> opcode, SDPatternOperator Neon_Op> {
+  defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
+                                v2f64, v2i32, v2f32, Neon_Op>;
+}
+
+defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
+                                     int_aarch64_neon_fcvtns>;
+defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
+                                     int_aarch64_neon_fcvtnu>;
+defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
+                                     int_aarch64_neon_fcvtps>;
+defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
+                                     int_aarch64_neon_fcvtpu>;
+defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
+                                     int_aarch64_neon_fcvtms>;
+defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
+                                     int_aarch64_neon_fcvtmu>;
+defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
+defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
+defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
+                                     int_aarch64_neon_fcvtas>;
+defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
+                                     int_aarch64_neon_fcvtau>;
+
+multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
+                                  bits<5> opcode, SDPatternOperator Neon_Op> {
+  defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
+                                v2i64, v2f32, v2i32, Neon_Op>;
+}
+
+defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
+defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
+
+multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
+                                 bits<5> opcode, SDPatternOperator Neon_Op> {
+  defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
+                                v2f64, v2f32, v2f32, Neon_Op>;
+}
+
+defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
+                                     int_aarch64_neon_frintn>;
+defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
+defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
+defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
+defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
+defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
+defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
+defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
+                                    int_arm_neon_vrecpe>;
+defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
+                                     int_arm_neon_vrsqrte>;
+defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111,
+                                   int_aarch64_neon_fsqrt>;
+
+multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
+                               bits<5> opcode, SDPatternOperator Neon_Op> {
+  def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
+                        (outs VPR128:$Rd), (ins VPR128:$Rn),
+                        asmop # "\t$Rd.4s, $Rn.4s",
+                        [(set (v4i32 VPR128:$Rd),
+                           (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
+                        NoItinerary>;
+  
+  def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
+                        (outs VPR64:$Rd), (ins VPR64:$Rn),
+                        asmop # "\t$Rd.2s, $Rn.2s",
+                        [(set (v2i32 VPR64:$Rd),
+                           (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
+                        NoItinerary>;
+}
+
+defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
+                                  int_arm_neon_vrecpe>;
+defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
+                                   int_arm_neon_vrsqrte>;
+
 // Crypto Class
 class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
                          string asmop, SDPatternOperator opnode>
diff --git a/test/CodeGen/AArch64/neon-compare-instructions.ll b/test/CodeGen/AArch64/neon-compare-instructions.ll
index 0848f9b03d..68f03425b2 100644
--- a/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -51,8 +51,7 @@ define <2 x i64> @cmeq2xi64(<2 x i64> %A, <2 x i64> %B) {
 
 define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -60,8 +59,7 @@ define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) {
 
 define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -69,8 +67,7 @@ define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) {
 
 define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -78,8 +75,7 @@ define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) {
 
 define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -87,8 +83,7 @@ define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) {
 
 define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -96,8 +91,7 @@ define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) {
 
 define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -105,8 +99,7 @@ define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) {
 
 define <2 x i64> @cmne2xi64(<2 x i64> %A, <2 x i64> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -867,8 +860,7 @@ define <2 x i64> @cmltz2xi64(<2 x i64> %A) {
 
 define <8 x i8> @cmneqz8xi8(<8 x i8> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -876,8 +868,7 @@ define <8 x i8> @cmneqz8xi8(<8 x i8> %A) {
 
 define <16 x i8> @cmneqz16xi8(<16 x i8> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -885,8 +876,7 @@ define <16 x i8> @cmneqz16xi8(<16 x i8> %A) {
 
 define <4 x i16> @cmneqz4xi16(<4 x i16> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -894,8 +884,7 @@ define <4 x i16> @cmneqz4xi16(<4 x i16> %A) {
 
 define <8 x i16> @cmneqz8xi16(<8 x i16> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -903,8 +892,7 @@ define <8 x i16> @cmneqz8xi16(<8 x i16> %A) {
 
 define <2 x i32> @cmneqz2xi32(<2 x i32> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -912,8 +900,7 @@ define <2 x i32> @cmneqz2xi32(<2 x i32> %A) {
 
 define <4 x i32> @cmneqz4xi32(<4 x i32> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -921,8 +908,7 @@ define <4 x i32> @cmneqz4xi32(<4 x i32> %A) {
 
 define <2 x i64> @cmneqz2xi64(<2 x i64> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1369,8 +1355,7 @@ define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
 ;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
 ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uno <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1382,8 +1367,7 @@ define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
 ;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1395,8 +1379,7 @@ define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
 ;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1408,8 +1391,7 @@ define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
 ;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
 ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ueq <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1421,8 +1403,7 @@ define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
 ;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1434,8 +1415,7 @@ define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) {
 ;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1445,8 +1425,7 @@ define <2 x i32> @fcmuge2xfloat(<2 x float> %A, <2 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGE = ULE with swapped operands, ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uge <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1456,8 +1435,7 @@ define <4 x i32> @fcmuge4xfloat(<4 x float> %A, <4 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGE = ULE with swapped operands, ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1467,8 +1445,7 @@ define <2 x i64> @fcmuge2xdouble(<2 x double> %A, <2 x double> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGE = ULE with swapped operands, ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1478,8 +1455,7 @@ define <2 x i32> @fcmugt2xfloat(<2 x float> %A, <2 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGT = ULT with swapped operands, ULT implemented as !OGE.
 ;CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ugt <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1489,16 +1465,14 @@ define <4 x i32> @fcmugt4xfloat(<4 x float> %A, <4 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGT = ULT with swapped operands, ULT implemented as !OGE.
 ;CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmugt2xdouble(<2 x double> %A, <2 x double> %B) {
 ;CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1508,8 +1482,7 @@ define <2 x i32> @fcmule2xfloat(<2 x float> %A, <2 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ule <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1519,8 +1492,7 @@ define <4 x i32> @fcmule4xfloat(<4 x float> %A, <4 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1529,8 +1501,7 @@ define <2 x i64> @fcmule2xdouble(<2 x double> %A, <2 x double> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1540,8 +1511,7 @@ define <2 x i32> @fcmult2xfloat(<2 x float> %A, <2 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULT implemented as !OGE.
 ;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ult <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1551,8 +1521,7 @@ define <4 x i32> @fcmult4xfloat(<4 x float> %A, <4 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULT implemented as !OGE.
 ;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1561,8 +1530,7 @@ define <2 x i64> @fcmult2xdouble(<2 x double> %A, <2 x double> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULT implemented as !OGE.
 ;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1572,8 +1540,7 @@ define <2 x i32> @fcmune2xfloat(<2 x float> %A, <2 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNE = !OEQ.
 ;CHECK: fcmeq {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp une <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1583,8 +1550,7 @@ define <4 x i32> @fcmune4xfloat(<4 x float> %A, <4 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNE = !OEQ.
 ;CHECK: fcmeq {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1593,8 +1559,7 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNE = !OEQ.
 ;CHECK: fcmeq {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1766,8 +1731,7 @@ define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
 ;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ueq <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1778,8 +1742,7 @@ define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
 ;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1790,8 +1753,7 @@ define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) {
 ;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1800,8 +1762,7 @@ define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) {
 define <2 x i32> @fcmugez2xfloat(<2 x float> %A) {
 ; UGE with zero = !OLT
 ;CHECK: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uge <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1810,8 +1771,7 @@ define <2 x i32> @fcmugez2xfloat(<2 x float> %A) {
 define <4 x i32> @fcmugez4xfloat(<4 x float> %A) {
 ; UGE with zero = !OLT
 ;CHECK: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1819,8 +1779,7 @@ define <4 x i32> @fcmugez4xfloat(<4 x float> %A) {
 define <2 x i64> @fcmugez2xdouble(<2 x double> %A) {
 ; UGE with zero = !OLT
 ;CHECK: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1829,8 +1788,7 @@ define <2 x i64> @fcmugez2xdouble(<2 x double> %A) {
 define <2 x i32> @fcmugtz2xfloat(<2 x float> %A) {
 ; UGT with zero = !OLE
 ;CHECK: fcmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ugt <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1839,8 +1797,7 @@ define <2 x i32> @fcmugtz2xfloat(<2 x float> %A) {
 define <4 x i32> @fcmugtz4xfloat(<4 x float> %A) {
 ; UGT with zero = !OLE
 ;CHECK: fcmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1848,8 +1805,7 @@ define <4 x i32> @fcmugtz4xfloat(<4 x float> %A) {
 define <2 x i64> @fcmugtz2xdouble(<2 x double> %A) {
 ; UGT with zero = !OLE
 ;CHECK: fcmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1858,8 +1814,7 @@ define <2 x i64> @fcmugtz2xdouble(<2 x double> %A) {
 define <2 x i32> @fcmultz2xfloat(<2 x float> %A) {
 ; ULT with zero = !OGE
 ;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ult <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1867,8 +1822,7 @@ define <2 x i32> @fcmultz2xfloat(<2 x float> %A) {
 
 define <4 x i32> @fcmultz4xfloat(<4 x float> %A) {
 ;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1876,8 +1830,7 @@ define <4 x i32> @fcmultz4xfloat(<4 x float> %A) {
 
 define <2 x i64> @fcmultz2xdouble(<2 x double> %A) {
 ;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1887,8 +1840,7 @@ define <2 x i64> @fcmultz2xdouble(<2 x double> %A) {
 define <2 x i32> @fcmulez2xfloat(<2 x float> %A) {
 ; ULE with zero = !OGT
 ;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ule <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1897,8 +1849,7 @@ define <2 x i32> @fcmulez2xfloat(<2 x float> %A) {
 define <4 x i32> @fcmulez4xfloat(<4 x float> %A) {
 ; ULE with zero = !OGT
 ;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1907,8 +1858,7 @@ define <4 x i32> @fcmulez4xfloat(<4 x float> %A) {
 define <2 x i64> @fcmulez2xdouble(<2 x double> %A) {
 ; ULE with zero = !OGT
 ;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1917,8 +1867,7 @@ define <2 x i64> @fcmulez2xdouble(<2 x double> %A) {
 define <2 x i32> @fcmunez2xfloat(<2 x float> %A) {
 ; UNE with zero = !OEQ with zero
 ;CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp une <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1927,8 +1876,7 @@ define <2 x i32> @fcmunez2xfloat(<2 x float> %A) {
 define <4 x i32> @fcmunez4xfloat(<4 x float> %A) {
 ; UNE with zero = !OEQ with zero
 ;CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1936,8 +1884,7 @@ define <4 x i32> @fcmunez4xfloat(<4 x float> %A) {
 define <2 x i64> @fcmunez2xdouble(<2 x double> %A) {
 ; UNE with zero = !OEQ with zero
 ;CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1949,8 +1896,7 @@ define <2 x i32> @fcmunoz2xfloat(<2 x float> %A) {
 ;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uno <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1961,8 +1907,7 @@ define <4 x i32> @fcmunoz4xfloat(<4 x float> %A) {
 ;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1973,8 +1918,7 @@ define <2 x i64> @fcmunoz2xdouble(<2 x double> %A) {
 ;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index 12d56a5fb0..044827e070 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -5089,6 +5089,1040 @@
 // CHECK-ERROR:        ucvtf d21, s14, #64
 // CHECK-ERROR:                   ^
 
+//------------------------------------------------------------------------------
+// Element reverse
+//------------------------------------------------------------------------------
+         rev64 v6.2d, v8.2d
+         rev32 v30.2s, v31.2s
+         rev32 v30.4s, v31.4s
+         rev32 v30.2d, v31.2d
+         rev16 v21.4h, v1.4h
+         rev16 v21.8h, v1.8h
+         rev16 v21.2s, v1.2s
+         rev16 v21.4s, v1.4s
+         rev16 v21.2d, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev64 v6.2d, v8.2d
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev32 v30.2s, v31.2s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev32 v30.4s, v31.4s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev32 v30.2d, v31.2d
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.4h, v1.4h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.8h, v1.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.2s, v1.2s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.4s, v1.4s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.2d, v1.2d
+// CHECK-ERROR:                   ^
+
+//------------------------------------------------------------------------------
+// Signed integer pairwise add long
+//------------------------------------------------------------------------------
+
+         saddlp v3.8h, v21.8h
+         saddlp v8.8b, v5.8b
+         saddlp v9.8h, v1.4s
+         saddlp v0.4s, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v3.8h, v21.8h
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v8.8b, v5.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v9.8h, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v0.4s, v1.2d
+// CHECK-ERROR:                          ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer pairwise add long
+//------------------------------------------------------------------------------
+
+         uaddlp v3.8h, v21.8h
+         uaddlp v8.8b, v5.8b
+         uaddlp v9.8h, v1.4s
+         uaddlp v0.4s, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v3.8h, v21.8h
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v8.8b, v5.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v9.8h, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v0.4s, v1.2d
+// CHECK-ERROR:                          ^
+
+//------------------------------------------------------------------------------
+// Signed integer pairwise add and accumulate long
+//------------------------------------------------------------------------------
+
+         sadalp v3.16b, v21.16b
+         sadalp v8.4h, v5.4h
+         sadalp v9.4s, v1.4s
+         sadalp v0.4h, v1.2s
+         sadalp v12.2d, v4.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v3.16b, v21.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v8.4h, v5.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v9.4s, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v0.4h, v1.2s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v12.2d, v4.8h
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer pairwise add and accumulate long
+//------------------------------------------------------------------------------
+
+         uadalp v3.16b, v21.16b
+         uadalp v8.4h, v5.4h
+         uadalp v9.4s, v1.4s
+         uadalp v0.4h, v1.2s
+         uadalp v12.2d, v4.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v3.16b, v21.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v8.4h, v5.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v9.4s, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v0.4h, v1.2s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v12.2d, v4.8h
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating accumulate of unsigned value
+//------------------------------------------------------------------------------
+
+         suqadd v0.16b, v31.8b
+         suqadd v1.8b, v9.8h
+         suqadd v13.4h, v21.4s
+         suqadd v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v0.16b, v31.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v1.8b, v9.8h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v13.4h, v21.4s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v4.2s, v0.2d
+// CHECK-ERROR:                       ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer saturating accumulate of signed value
+//------------------------------------------------------------------------------
+
+         usqadd v0.16b, v31.8b
+         usqadd v2.8h, v4.4h
+         usqadd v13.4h, v21.4s
+         usqadd v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v0.16b, v31.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v2.8h, v4.4h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v13.4h, v21.4s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v4.2s, v0.2d
+// CHECK-ERROR:                       ^
+
+//------------------------------------------------------------------------------
+// Integer saturating absolute
+//------------------------------------------------------------------------------
+
+         sqabs v0.16b, v31.8b
+         sqabs v2.8h, v4.4h
+         sqabs v6.4s, v8.2s
+         sqabs v6.2d, v8.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v0.16b, v31.8b
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v2.8h, v4.4h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v6.4s, v8.2s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v6.2d, v8.2s
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating negate
+//------------------------------------------------------------------------------
+
+         sqneg v0.16b, v31.8b
+         sqneg v2.8h, v4.4h
+         sqneg v6.4s, v8.2s
+         sqneg v6.2d, v8.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v0.16b, v31.8b
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v2.8h, v4.4h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v6.4s, v8.2s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v6.2d, v8.2s
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Integer absolute
+//------------------------------------------------------------------------------
+
+         abs v0.16b, v31.8b
+         abs v2.8h, v4.4h
+         abs v6.4s, v8.2s
+         abs v6.2d, v8.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v0.16b, v31.8b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v2.8h, v4.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v6.4s, v8.2s
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v6.2d, v8.2s
+// CHECK-ERROR:                    ^
+
+//------------------------------------------------------------------------------
+// Integer count leading sign bits
+//------------------------------------------------------------------------------
+
+         cls v0.2d, v31.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cls v0.2d, v31.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Integer count leading zeros
+//------------------------------------------------------------------------------
+
+         clz v0.2d, v31.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         clz v0.2d, v31.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Population count
+//------------------------------------------------------------------------------
+
+         cnt v2.8h, v4.8h
+         cnt v6.4s, v8.4s
+         cnt v6.2d, v8.2d
+         cnt v13.4h, v21.4h
+         cnt v4.2s, v0.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v2.8h, v4.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v6.4s, v8.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v6.2d, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v13.4h, v21.4h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v4.2s, v0.2s
+// CHECK-ERROR:                ^
+
+
+//------------------------------------------------------------------------------
+// Bitwise NOT
+//------------------------------------------------------------------------------
+
+         not v2.8h, v4.8h
+         not v6.4s, v8.4s
+         not v6.2d, v8.2d
+         not v13.4h, v21.4h
+         not v4.2s, v0.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v2.8h, v4.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v6.4s, v8.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v6.2d, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v13.4h, v21.4h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v4.2s, v0.2s
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Bitwise reverse
+//------------------------------------------------------------------------------
+
+         rbit v2.8h, v4.8h
+         rbit v6.4s, v8.4s
+         rbit v6.2d, v8.2d
+         rbit v13.4h, v21.4h
+         rbit v4.2s, v0.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v2.8h, v4.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v6.4s, v8.4s
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v6.2d, v8.2d
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v13.4h, v21.4h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v4.2s, v0.2s
+// CHECK-ERROR:                 ^
+
+//------------------------------------------------------------------------------
+// Floating-point absolute
+//------------------------------------------------------------------------------
+
+         fabs v0.16b, v31.16b
+         fabs v2.8h, v4.8h
+         fabs v1.8b, v9.8b
+         fabs v13.4h, v21.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v0.16b, v31.16b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v2.8h, v4.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v1.8b, v9.8b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v13.4h, v21.4h
+// CHECK-ERROR:                  ^
+
+//------------------------------------------------------------------------------
+// Floating-point negate
+//------------------------------------------------------------------------------
+
+         fneg v0.16b, v31.16b
+         fneg v2.8h, v4.8h
+         fneg v1.8b, v9.8b
+         fneg v13.4h, v21.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v0.16b, v31.16b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v2.8h, v4.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v1.8b, v9.8b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v13.4h, v21.4h
+// CHECK-ERROR:                  ^
+
+//------------------------------------------------------------------------------
+// Integer extract and narrow
+//------------------------------------------------------------------------------
+
+         xtn v0.16b, v31.8h
+         xtn v2.8h, v4.4s
+         xtn v6.4s, v8.2d
+         xtn2 v1.8b, v9.8h
+         xtn2 v13.4h, v21.4s
+         xtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn v0.16b, v31.8h
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn v2.8h, v4.4s
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn v6.4s, v8.2d
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn2 v1.8b, v9.8h
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn2 v13.4h, v21.4s
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn2 v4.2s, v0.2d
+// CHECK-ERROR:              ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating extract and unsigned narrow
+//------------------------------------------------------------------------------
+
+         sqxtun v0.16b, v31.8h
+         sqxtun v2.8h, v4.4s
+         sqxtun v6.4s, v8.2d
+         sqxtun2 v1.8b, v9.8h
+         sqxtun2 v13.4h, v21.4s
+         sqxtun2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun v0.16b, v31.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun v2.8h, v4.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun v6.4s, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun2 v1.8b, v9.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun2 v13.4h, v21.4s
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun2 v4.2s, v0.2d
+// CHECK-ERROR:                 ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating extract and narrow
+//------------------------------------------------------------------------------
+
+         sqxtn v0.16b, v31.8h
+         sqxtn v2.8h, v4.4s
+         sqxtn v6.4s, v8.2d
+         sqxtn2 v1.8b, v9.8h
+         sqxtn2 v13.4h, v21.4s
+         sqxtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn v0.16b, v31.8h
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn v2.8h, v4.4s
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn v6.4s, v8.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn2 v1.8b, v9.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn2 v13.4h, v21.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn2 v4.2s, v0.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer saturating extract and narrow
+//------------------------------------------------------------------------------
+
+         uqxtn v0.16b, v31.8h
+         uqxtn v2.8h, v4.4s
+         uqxtn v6.4s, v8.2d
+         uqxtn2 v1.8b, v9.8h
+         uqxtn2 v13.4h, v21.4s
+         uqxtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn v0.16b, v31.8h
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn v2.8h, v4.4s
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn v6.4s, v8.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn2 v1.8b, v9.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn2 v13.4h, v21.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn2 v4.2s, v0.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Integer shift left long
+//------------------------------------------------------------------------------
+
+         shll2 v2.8h, v4.16b, #7
+         shll2 v6.4s, v8.8h, #15
+         shll2 v6.2d, v8.4s, #31
+         shll v2.8h, v4.16b, #8
+         shll v6.4s, v8.8h, #16
+         shll v6.2d, v8.4s, #32
+         shll v2.8h, v4.8b, #8
+         shll v6.4s, v8.4h, #16
+         shll v6.2d, v8.2s, #32
+         shll2 v2.8h, v4.8b, #5
+         shll2 v6.4s, v8.4h, #14
+         shll2 v6.2d, v8.2s, #1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v2.8h, v4.16b, #7
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.4s, v8.8h, #15
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.2d, v8.4s, #31
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll v2.8h, v4.16b, #8
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll v6.4s, v8.8h, #16
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll v6.2d, v8.4s, #32
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v2.8h, v4.8b, #5
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.4s, v8.4h, #14
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.2d, v8.2s, #1
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert downsize
+//------------------------------------------------------------------------------
+
+         fcvtn v2.8h, v4.4s
+         fcvtn v6.4s, v8.2d
+         fcvtn2 v13.4h, v21.4s
+         fcvtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn v2.8h, v4.4s
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn v6.4s, v8.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn2 v13.4h, v21.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn2 v4.2s, v0.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert downsize with inexact
+//------------------------------------------------------------------------------
+
+         fcvtxn v6.4s, v8.2d
+         fcvtxn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtxn v6.4s, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtxn2 v4.2s, v0.2d
+// CHECK-ERROR:                 ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert upsize
+//------------------------------------------------------------------------------
+
+         fcvtl2 v9.4s, v1.4h
+         fcvtl2 v0.2d, v1.2s
+         fcvtl v12.4s, v4.8h
+         fcvtl v17.2d, v28.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl2 v9.4s, v1.4h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl2 v0.2d, v1.2s
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl v12.4s, v4.8h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl v17.2d, v28.4s
+// CHECK-ERROR:                       ^
+
+//------------------------------------------------------------------------------
+// Floating-point round to integral
+//------------------------------------------------------------------------------
+
+         frintn v0.16b, v31.16b
+         frintn v2.8h, v4.8h
+         frintn v1.8b, v9.8b
+         frintn v13.4h, v21.4h
+
+         frinta v0.16b, v31.16b
+         frinta v2.8h, v4.8h
+         frinta v1.8b, v9.8b
+         frinta v13.4h, v21.4h
+
+         frintp v0.16b, v31.16b
+         frintp v2.8h, v4.8h
+         frintp v1.8b, v9.8b
+         frintp v13.4h, v21.4h
+
+         frintm v0.16b, v31.16b
+         frintm v2.8h, v4.8h
+         frintm v1.8b, v9.8b
+         frintm v13.4h, v21.4h
+
+         frintx v0.16b, v31.16b
+         frintx v2.8h, v4.8h
+         frintx v1.8b, v9.8b
+         frintx v13.4h, v21.4h
+
+         frintz v0.16b, v31.16b
+         frintz v2.8h, v4.8h
+         frintz v1.8b, v9.8b
+         frintz v13.4h, v21.4h
+
+         frinti v0.16b, v31.16b
+         frinti v2.8h, v4.8h
+         frinti v1.8b, v9.8b
+         frinti v13.4h, v21.4h
+
+         fcvtns v0.16b, v31.16b
+         fcvtns v2.8h, v4.8h
+         fcvtns v1.8b, v9.8b
+         fcvtns v13.4h, v21.4h
+
+         fcvtnu v0.16b, v31.16b
+         fcvtnu v2.8h, v4.8h
+         fcvtnu v1.8b, v9.8b
+         fcvtnu v13.4h, v21.4h
+
+         fcvtps v0.16b, v31.16b
+         fcvtps v2.8h, v4.8h
+         fcvtps v1.8b, v9.8b
+         fcvtps v13.4h, v21.4h
+
+         fcvtpu v0.16b, v31.16b
+         fcvtpu v2.8h, v4.8h
+         fcvtpu v1.8b, v9.8b
+         fcvtpu v13.4h, v21.4h
+
+         fcvtms v0.16b, v31.16b
+         fcvtms v2.8h, v4.8h
+         fcvtms v1.8b, v9.8b
+         fcvtms v13.4h, v21.4h
+
+         fcvtmu v0.16b, v31.16b
+         fcvtmu v2.8h, v4.8h
+         fcvtmu v1.8b, v9.8b
+         fcvtmu v13.4h, v21.4h
+
+         fcvtzs v0.16b, v31.16b
+         fcvtzs v2.8h, v4.8h
+         fcvtzs v1.8b, v9.8b
+         fcvtzs v13.4h, v21.4h
+
+         fcvtzu v0.16b, v31.16b
+         fcvtzu v2.8h, v4.8h
+         fcvtzu v1.8b, v9.8b
+         fcvtzu v13.4h, v21.4h
+
+         fcvtas v0.16b, v31.16b
+         fcvtas v2.8h, v4.8h
+         fcvtas v1.8b, v9.8b
+         fcvtas v13.4h, v21.4h
+
+         fcvtau v0.16b, v31.16b
+         fcvtau v2.8h, v4.8h
+         fcvtau v1.8b, v9.8b
+         fcvtau v13.4h, v21.4h
+
+         urecpe v0.16b, v31.16b
+         urecpe v2.8h, v4.8h
+         urecpe v1.8b, v9.8b
+         urecpe v13.4h, v21.4h
+         urecpe v1.2d, v9.2d
+
+         ursqrte v0.16b, v31.16b
+         ursqrte v2.8h, v4.8h
+         ursqrte v1.8b, v9.8b
+         ursqrte v13.4h, v21.4h
+         ursqrte v1.2d, v9.2d
+
+         scvtf v0.16b, v31.16b
+         scvtf v2.8h, v4.8h
+         scvtf v1.8b, v9.8b
+         scvtf v13.4h, v21.4h
+
+         ucvtf v0.16b, v31.16b
+         ucvtf v2.8h, v4.8h
+         ucvtf v1.8b, v9.8b
+         ucvtf v13.4h, v21.4h
+
+         frecpe v0.16b, v31.16b
+         frecpe v2.8h, v4.8h
+         frecpe v1.8b, v9.8b
+         frecpe v13.4h, v21.4h
+
+         frsqrte v0.16b, v31.16b
+         frsqrte v2.8h, v4.8h
+         frsqrte v1.8b, v9.8b
+         frsqrte v13.4h, v21.4h
+
+         fsqrt v0.16b, v31.16b
+         fsqrt v2.8h, v4.8h
+         fsqrt v1.8b, v9.8b
+         fsqrt v13.4h, v21.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v1.2d, v9.2d
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v0.16b, v31.16b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v2.8h, v4.8h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v1.8b, v9.8b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v13.4h, v21.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v1.2d, v9.2d
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.16b, v31.16b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v2.8h, v4.8h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v1.8b, v9.8b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v13.4h, v21.4h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v0.16b, v31.16b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v2.8h, v4.8h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v1.8b, v9.8b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v13.4h, v21.4h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v0.16b, v31.16b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v2.8h, v4.8h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v1.8b, v9.8b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v13.4h, v21.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v0.16b, v31.16b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v2.8h, v4.8h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v1.8b, v9.8b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v13.4h, v21.4h
+// CHECK-ERROR:                   ^
+
 //----------------------------------------------------------------------
 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
 //----------------------------------------------------------------------