From 5df37dab763ce377095389c4ea1cff88db369954 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara.emerson@arm.com>
Date: Thu, 19 Sep 2013 11:59:01 +0000
Subject: [ARMv8] Add support for the v8 cryptography extensions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190996 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/IR/IntrinsicsARM.td                |  17 ++++
 lib/Target/ARM/ARM.td                           |   3 +
 lib/Target/ARM/ARMInstrFormats.td               |   4 +-
 lib/Target/ARM/ARMInstrInfo.td                  |   2 +
 lib/Target/ARM/ARMInstrNEON.td                  | 111 ++++++++++++++++++++++--
 lib/Target/ARM/ARMSubtarget.cpp                 |   1 +
 lib/Target/ARM/ARMSubtarget.h                   |   4 +
 lib/Target/ARM/AsmParser/ARMAsmParser.cpp       |  13 +--
 lib/Target/ARM/Disassembler/ARMDisassembler.cpp |  20 +++++
 test/CodeGen/ARM/intrinsics-crypto.ll           |  57 ++++++++++++
 test/MC/ARM/invalid-neon-v8.s                   |  46 +++++++++-
 test/MC/ARM/neon-crypto.s                       |  51 +++++++++++
 test/MC/ARM/thumb-invalid-crypto.txt            |  42 +++++++++
 test/MC/ARM/thumb-neon-crypto.s                 |  35 ++++++++
 test/MC/Disassembler/ARM/neon-crypto.txt        |  35 ++++++++
 test/MC/Disassembler/ARM/thumb-neon-crypto.txt  |  35 ++++++++
 16 files changed, 459 insertions(+), 17 deletions(-)
 create mode 100644 test/CodeGen/ARM/intrinsics-crypto.ll
 create mode 100644 test/MC/ARM/neon-crypto.s
 create mode 100644 test/MC/ARM/thumb-invalid-crypto.txt
 create mode 100644 test/MC/ARM/thumb-neon-crypto.s
 create mode 100644 test/MC/Disassembler/ARM/neon-crypto.txt
 create mode 100644 test/MC/Disassembler/ARM/thumb-neon-crypto.txt

diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td
index a2f062024b..8d8c4af850 100644
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@@ -466,4 +466,21 @@ def int_arm_neon_vbsl : Intrinsic<[llvm_anyvector_ty],
                         [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
                         [IntrNoMem]>;
 
+
+// Crypto instructions
+def int_arm_neon_aesd : Neon_2Arg_Intrinsic;
+def int_arm_neon_aese : Neon_2Arg_Intrinsic;
+def int_arm_neon_aesimc : Neon_1Arg_Intrinsic;
+def int_arm_neon_aesmc : Neon_1Arg_Intrinsic;
+def int_arm_neon_sha1h : Neon_1Arg_Intrinsic;
+def int_arm_neon_sha1su1 : Neon_2Arg_Intrinsic;
+def int_arm_neon_sha256su0 : Neon_2Arg_Intrinsic;
+def int_arm_neon_sha1c : Neon_3Arg_Intrinsic;
+def int_arm_neon_sha1m : Neon_3Arg_Intrinsic;
+def int_arm_neon_sha1p : Neon_3Arg_Intrinsic;
+def int_arm_neon_sha1su0: Neon_3Arg_Intrinsic;
+def int_arm_neon_sha256h: Neon_3Arg_Intrinsic;
+def int_arm_neon_sha256h2: Neon_3Arg_Intrinsic;
+def int_arm_neon_sha256su1: Neon_3Arg_Intrinsic;
+
 } // end TargetPrefix
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 5752005eac..c9d71bd1ec 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -67,6 +67,9 @@ def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
                            "Enable support for Performance Monitor extensions">;
 def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
                           "Enable support for TrustZone security extensions">;
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+                          "Enable support for Cryptography extensions",
+                          [FeatureNEON]>;
 
 // Some processors have FP multiply-accumulate instructions that don't
 // play nicely with other VFP / NEON instructions, and it's generally better
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index e505e1a9ae..27cfe96048 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -2015,7 +2015,7 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
 }
 
 // Same as N2V but not predicated.
-class N2Vnp<bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+class N2Vnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
             dag oops, dag iops, InstrItinClass itin, string OpcodeStr,
             string Dt, ValueType ResTy, ValueType OpTy, list<dag> pattern>
    : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin,
@@ -2032,7 +2032,7 @@ class N2Vnp<bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
   // Encode constant bits
   let Inst{27-23} = 0b00111;
   let Inst{21-20} = 0b11;
-  let Inst{19-18} = 0b10;
+  let Inst{19-18} = op19_18;
   let Inst{17-16} = op17_16;
   let Inst{11} = 0;
   let Inst{10-8} = op10_8;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index e432aca48b..93ecb1640d 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -212,6 +212,8 @@ def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
                                  AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">;
 def HasNEON          : Predicate<"Subtarget->hasNEON()">,
                                  AssemblerPredicate<"FeatureNEON", "NEON">;
+def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
+                                 AssemblerPredicate<"FeatureCrypto", "crypto">;
 def HasFP16          : Predicate<"Subtarget->hasFP16()">,
                                  AssemblerPredicate<"FeatureFP16","half-float">;
 def HasDivide        : Predicate<"Subtarget->hasDivide()">,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index f1bd37ea52..269c13dedb 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -2355,17 +2355,36 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
 class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
               InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
-  : N2Vnp<op17_16, op10_8, op7, 0,  (outs DPR:$Vd), (ins DPR:$Vm),
+  : N2Vnp<0b10, op17_16, op10_8, op7, 0,  (outs DPR:$Vd), (ins DPR:$Vm),
           itin, OpcodeStr, Dt, ResTy, OpTy,
           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
 
 class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
               InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
-  : N2Vnp<op17_16, op10_8, op7, 1,  (outs QPR:$Vd), (ins QPR:$Vm),
+  : N2Vnp<0b10, op17_16, op10_8, op7, 1,  (outs QPR:$Vd), (ins QPR:$Vm),
           itin, OpcodeStr, Dt, ResTy, OpTy,
           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
 
+// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
+class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,  (outs QPR:$Vd), (ins QPR:$Vm),
+          itin, OpcodeStr, Dt, ResTy, OpTy,
+          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Same as N2VQIntXnp but with Vd as a src register.
+class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
+          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
+          itin, OpcodeStr, Dt, ResTy, OpTy,
+          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
+  let Constraints = "$src = $Vd";
+}
+
 // Narrow 2-register operations.
 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
            bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -2534,7 +2553,7 @@ class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
                 string Dt, ValueType ResTy, ValueType OpTy,
                 SDPatternOperator IntOp, bit Commutable>
   : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
-          (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt,
+          (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
           ResTy, OpTy, IntOp, Commutable,
           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
 
@@ -2592,6 +2611,19 @@ class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
           ResTy, OpTy, IntOp, Commutable,
           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
 
+// Same as N3VQIntnp but with Vd as a src register.
+class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+                string Dt, ValueType ResTy, ValueType OpTy,
+                SDPatternOperator IntOp, bit Commutable>
+  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr,
+          Dt, ResTy, OpTy, IntOp, Commutable,
+          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
+                                       (OpTy QPR:$Vm))))]> {
+  let Constraints = "$src = $Vd";
+}
+
 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                 string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
@@ -2842,6 +2874,7 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
         [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
   let isCommutable = Commutable;
 }
+
 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
              InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType TyQ, ValueType TyD, SDNode OpNode>
@@ -2897,6 +2930,17 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
         [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
   let isCommutable = Commutable;
 }
+
+// Same as above, but not predicated.
+class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+                bit op4, InstrItinClass itin, string OpcodeStr,
+                string Dt, ValueType ResTy, ValueType OpTy,
+                SDPatternOperator IntOp, bit Commutable>
+  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+          (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
+          ResTy, OpTy, IntOp, Commutable,
+          [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+
 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                 string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
@@ -4078,12 +4122,18 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
                                   (SubReg_i32_lane imm:$lane)))>;
 
 //   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs   : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
-                         "vmull", "s", NEONvmulls, 1>;
-defm VMULLu   : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
-                         "vmull", "u", NEONvmullu, 1>;
-def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
-                        v8i16, v8i8, int_arm_neon_vmullp, 1>;
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+    DecoderNamespace = "NEONData" in {
+  defm VMULLs   : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                           "vmull", "s", NEONvmulls, 1>;
+  defm VMULLu   : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                           "vmull", "u", NEONvmullu, 1>;
+  def  VMULLp8   :  N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+                            v8i16, v8i8, int_arm_neon_vmullp, 1>;
+  def  VMULLp64  : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
+                          "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
+                    Requires<[HasV8, HasCrypto]>;
+}
 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
 
@@ -5818,6 +5868,49 @@ defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
 defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
 defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
 
+// Cryptography instructions
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+    DecoderNamespace = "v8Crypto" in {
+  class AES<string op, bit op7, bit op6, SDPatternOperator Int>
+    : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+                 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+      Requires<[HasV8, HasCrypto]>;
+  class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
+    : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+                 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+      Requires<[HasV8, HasCrypto]>;
+  class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+              SDPatternOperator Int>
+    : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+                 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+      Requires<[HasV8, HasCrypto]>;
+  class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+              SDPatternOperator Int>
+    : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+                 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+      Requires<[HasV8, HasCrypto]>;
+  class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
+    : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
+                !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
+      Requires<[HasV8, HasCrypto]>;
+}
+
+def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
+def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
+def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
+def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
+
+def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>;
+def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
+def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
+def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>;
+def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>;
+def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>;
+def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
+def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
+def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
+def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
+
 //===----------------------------------------------------------------------===//
 // NEON instructions for single-precision FP math
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index a227718e30..0692fbd1c1 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -110,6 +110,7 @@ void ARMSubtarget::initializeEnvironment() {
   FPOnlySP = false;
   HasPerfMon = false;
   HasTrustZone = false;
+  HasCrypto = false;
   AllowsUnalignedMem = false;
   Thumb2DSP = false;
   UseNaClTrap = false;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 65278a5846..a3b701a15a 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -159,6 +159,9 @@ protected:
   /// HasTrustZone - if true, processor supports TrustZone security extensions
   bool HasTrustZone;
 
+  /// HasCrypto - if true, processor supports Cryptography extensions
+  bool HasCrypto;
+
   /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
   /// accesses for some types.  For details, see
   /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
@@ -248,6 +251,7 @@ public:
   bool hasVFP4() const { return HasVFPv4; }
   bool hasFPARMv8() const { return HasFPARMv8; }
   bool hasNEON() const { return HasNEON;  }
+  bool hasCrypto() const { return HasCrypto; }
   bool useNEONForSinglePrecisionFP() const {
     return hasNEON() && UseNEONForSinglePrecisionFP; }
 
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ed9c8c4e00..edb7ccdc29 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -141,7 +141,8 @@ class ARMAsmParser : public MCTargetAsmParser {
   StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
                           bool &CarrySetting, unsigned &ProcessorIMod,
                           StringRef &ITMask);
-  void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+  void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+                             bool &CanAcceptCarrySet,
                              bool &CanAcceptPredicationCode);
 
   bool isThumb() const {
@@ -4784,8 +4785,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
 //
 // FIXME: It would be nice to autogen this.
 void ARMAsmParser::
-getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
-                      bool &CanAcceptPredicationCode) {
+getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+                     bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) {
   if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
       Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
       Mnemonic == "add" || Mnemonic == "adc" ||
@@ -4808,7 +4809,9 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
       Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
       Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
       Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
-      Mnemonic == "vrintm") {
+      Mnemonic == "vrintm" || Mnemonic.startswith("aes") ||
+      Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
+      (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
     // These mnemonics are never predicable
     CanAcceptPredicationCode = false;
   } else if (!isThumb()) {
@@ -5068,7 +5071,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // the matcher deal with finding the right instruction or generating an
   // appropriate error.
   bool CanAcceptCarrySet, CanAcceptPredicationCode;
-  getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode);
+  getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode);
 
   // If we had a carry-set on an instruction that can't do that, issue an
   // error.
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 8a066643f2..7f53240f21 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -506,6 +506,14 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return result;
   }
 
+  MI.clear();
+  result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address,
+                             this, STI);
+  if (result != MCDisassembler::Fail) {
+    Size = 4;
+    return result;
+  }
+
   MI.clear();
   Size = 0;
   return MCDisassembler::Fail;
@@ -825,6 +833,18 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     }
   }
 
+  MI.clear();
+  uint32_t NEONCryptoInsn = insn32;
+  NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
+  NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
+  NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25
+  result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
+                             Address, this, STI);
+  if (result != MCDisassembler::Fail) {
+    Size = 4;
+    return result;
+  }
+
   MI.clear();
   uint32_t NEONv8Insn = insn32;
   NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
diff --git a/test/CodeGen/ARM/intrinsics-crypto.ll b/test/CodeGen/ARM/intrinsics-crypto.ll
new file mode 100644
index 0000000000..c038fe6da8
--- /dev/null
+++ b/test/CodeGen/ARM/intrinsics-crypto.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=armv8 -mattr=+crypto | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) {
+  %tmp = load <16 x i8>* %a
+  %tmp2 = load <16 x i8>* %b
+  %tmp3 = call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %tmp, <16 x i8> %tmp2)
+  ; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp4 = call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %tmp3, <16 x i8> %tmp2)
+  ; CHECK: aese.8 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp5 = call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %tmp4)
+  ; CHECK: aesimc.8 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp6 = call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %tmp5)
+  ; CHECK: aesmc.8 q{{[0-9]+}}, q{{[0-9]+}}
+  ret <16 x i8> %tmp6
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i32> *%c) {
+  %tmp = load <4 x i32>* %a
+  %tmp2 = load <4 x i32>* %b
+  %tmp3 = load <4 x i32>* %c
+  %res1 = call <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32> %tmp)
+  ; CHECK: sha1h.32 q{{[0-9]+}}, q{{[0-9]+}}
+  %res2 = call <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1c.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res3 = call <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32> %res2, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1m.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res4 = call <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32> %res3, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1p.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res5 = call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1su0.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res6 = call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %res5, <4 x i32> %res1)
+  ; CHECK: sha1su1.32 q{{[0-9]+}}, q{{[0-9]+}}
+  %res7 = call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha256h.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res8 = call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha256h2.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res9 = call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha256su1.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res10 = call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %res9, <4 x i32> %tmp3)
+  ; CHECK: sha256su0.32 q{{[0-9]+}}, q{{[0-9]+}}
+  ret <4 x i32> %res10
+}
+
+declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>)
+declare <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>)
diff --git a/test/MC/ARM/invalid-neon-v8.s b/test/MC/ARM/invalid-neon-v8.s
index 06406f3f9c..7aaf2778a8 100644
--- a/test/MC/ARM/invalid-neon-v8.s
+++ b/test/MC/ARM/invalid-neon-v8.s
@@ -1,4 +1,4 @@
-@ RUN: not llvm-mc -triple armv8 -mattr=+neon -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple armv8 -mattr=+neon,+crypto -show-encoding < %s 2>&1 | FileCheck %s
 
 vmaxnm.f32 s4, d5, q1
 @ CHECK: error: invalid operand for instruction
@@ -24,3 +24,47 @@ vrintz.f32 d3, q12
 @ CHECK: error: invalid operand for instruction
 vrintmge.f32.f32 d3, d4
 @ CHECK: error: instruction 'vrintm' is not predicable, but condition code specified
+
+aesd.8  q0, s1
+@ CHECK: error: invalid operand for instruction
+aese.8  s0, q1
+@ CHECK: error: invalid operand for instruction
+aesimc.8  s0, q1
+@ CHECK: error: invalid operand for instruction
+aesmc.8  q0, d1
+@ CHECK: error: invalid operand for instruction
+aesdge.8 q0, q1
+@ CHECK: error: instruction 'aesd' is not predicable, but condition code specified
+
+sha1h.32  d0, q1
+@ CHECK: error: invalid operand for instruction
+sha1su1.32  q0, s1
+@ CHECK: error: invalid operand for instruction
+sha256su0.32  s0, q1
+@ CHECK: error: invalid operand for instruction
+sha1heq.32  q0, q1
+@ CHECK: error: instruction 'sha1h' is not predicable, but condition code specified
+
+sha1c.32  s0, d1, q2
+@ CHECK: error: invalid operand for instruction
+sha1m.32  q0, s1, q2
+@ CHECK: error: invalid operand for instruction
+sha1p.32  s0, q1, q2
+@ CHECK: error: invalid operand for instruction
+sha1su0.32  d0, q1, q2
+@ CHECK: error: invalid operand for instruction
+sha256h.32  q0, s1, q2
+@ CHECK: error: invalid operand for instruction
+sha256h2.32  q0, q1, s2
+@ CHECK: error: invalid operand for instruction
+sha256su1.32  s0, d1, q2
+@ CHECK: error: invalid operand for instruction
+sha256su1lt.32  q0, d1, q2
+@ CHECK: error: instruction 'sha256su1' is not predicable, but condition code specified
+
+vmull.p64 q0, s1, s3
+@ CHECK: error: invalid operand for instruction
+vmull.p64 s1, d2, d3
+@ CHECK: error: invalid operand for instruction
+vmullge.p64 q0, d16, d17
+@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified
diff --git a/test/MC/ARM/neon-crypto.s b/test/MC/ARM/neon-crypto.s
new file mode 100644
index 0000000000..92d24da6c6
--- /dev/null
+++ b/test/MC/ARM/neon-crypto.s
@@ -0,0 +1,51 @@
+@ RUN: llvm-mc -triple armv8 -mattr=+neon,+crypto -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+
+aesd.8  q0, q1
+aese.8  q0, q1
+aesimc.8  q0, q1
+aesmc.8  q0, q1
+@ CHECK: aesd.8 q0, q1          @ encoding: [0x42,0x03,0xb0,0xf3]
+@ CHECK: aese.8 q0, q1          @ encoding: [0x02,0x03,0xb0,0xf3]
+@ CHECK: aesimc.8 q0, q1        @ encoding: [0xc2,0x03,0xb0,0xf3]
+@ CHECK: aesmc.8 q0, q1         @ encoding: [0x82,0x03,0xb0,0xf3]
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+
+sha1h.32  q0, q1
+sha1su1.32  q0, q1
+sha256su0.32  q0, q1
+@ CHECK: sha1h.32  q0, q1       @ encoding: [0xc2,0x02,0xb9,0xf3]
+@ CHECK: sha1su1.32 q0, q1      @ encoding: [0x82,0x03,0xba,0xf3]
+@ CHECK: sha256su0.32 q0, q1    @ encoding: [0xc2,0x03,0xba,0xf3]
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+
+sha1c.32  q0, q1, q2
+sha1m.32  q0, q1, q2
+sha1p.32  q0, q1, q2
+sha1su0.32  q0, q1, q2
+sha256h.32  q0, q1, q2
+sha256h2.32  q0, q1, q2
+sha256su1.32  q0, q1, q2
+@ CHECK: sha1c.32  q0, q1, q2   @ encoding: [0x44,0x0c,0x02,0xf2]
+@ CHECK: sha1m.32  q0, q1, q2   @ encoding: [0x44,0x0c,0x22,0xf2]
+@ CHECK: sha1p.32 q0, q1, q2    @ encoding: [0x44,0x0c,0x12,0xf2]
+@ CHECK: sha1su0.32  q0, q1, q2      @ encoding: [0x44,0x0c,0x32,0xf2]
+@ CHECK: sha256h.32  q0, q1, q2      @ encoding: [0x44,0x0c,0x02,0xf3]
+@ CHECK: sha256h2.32 q0, q1, q2      @ encoding: [0x44,0x0c,0x12,0xf3]
+@ CHECK: sha256su1.32 q0, q1, q2     @ encoding: [0x44,0x0c,0x22,0xf3]
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+
+vmull.p64 q8, d16, d17
+@ CHECK: vmull.p64  q8, d16, d17    @ encoding: [0xa1,0x0e,0xe0,0xf2]
+@ CHECK-V7: instruction requires: crypto armv8
diff --git a/test/MC/ARM/thumb-invalid-crypto.txt b/test/MC/ARM/thumb-invalid-crypto.txt
new file mode 100644
index 0000000000..a5f9a19690
--- /dev/null
+++ b/test/MC/ARM/thumb-invalid-crypto.txt
@@ -0,0 +1,42 @@
+@ RUN: not llvm-mc -triple thumbv8 -mattr=+neon,+crypto -show-encoding < %s 2>&1 | FileCheck %s
+
+iteee lo
+aesdlo.8  q0, q1
+@ CHECK: error: instruction 'aesd' is not predicable, but condition code specified
+aesimchs.8  q0, q1
+@ CHECK: error: instruction 'aesimc' is not predicable, but condition code specified
+aesmchs.8  q0, q1
+@ CHECK: error: instruction 'aesmc' is not predicable, but condition code specified
+aesehs.8 q0, q1
+@ CHECK: error: instruction 'aese' is not predicable, but condition code specified
+
+itee hs
+sha1hhs.32  q0, q1
+@ CHECK: error: instruction 'sha1h' is not predicable, but condition code specified
+sha1su1lo.32  q0, q1
+@ CHECK: error: instruction 'sha1su1' is not predicable, but condition code specified
+sha256su0lo.32  q0, q1
+@ CHECK: error: instruction 'sha256su0' is not predicable, but condition code specified
+
+iteee lo
+sha1clo.32  s0, d1, q2
+@ CHECK: error: instruction 'sha1c' is not predicable, but condition code specified
+sha1mhs.32  q0, s1, q2
+@ CHECK: error: instruction 'sha1m' is not predicable, but condition code specified
+sha1phs.32  s0, q1, q2
+@ CHECK: error: instruction 'sha1p' is not predicable, but condition code specified
+sha1su0hs.32  d0, q1, q2
+@ CHECK: error: instruction 'sha1su0' is not predicable, but condition code specified
+itee hs
+sha256hhs.32  q0, s1, q2
+@ CHECK: error: instruction 'sha256h' is not predicable, but condition code specified
+sha256h2lo.32  q0, q1, s2
+@ CHECK: error: instruction 'sha256h2' is not predicable, but condition code specified
+sha256su1lo.32  s0, d1, q2
+@ CHECK: error: instruction 'sha256su1' is not predicable, but condition code specified
+
+ite lo
+vmulllo.p64 q0, s1, s3
+@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified
+vmullhs.p64 q0, d16, d17
+@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified
diff --git a/test/MC/ARM/thumb-neon-crypto.s b/test/MC/ARM/thumb-neon-crypto.s
new file mode 100644
index 0000000000..096e9e81b1
--- /dev/null
+++ b/test/MC/ARM/thumb-neon-crypto.s
@@ -0,0 +1,35 @@
+@ RUN: llvm-mc -triple thumbv8 -mattr=+neon,+crypto -show-encoding < %s | FileCheck %s
+
+aesd.8  q0, q1
+@ CHECK: aesd.8  q0, q1         @ encoding: [0xb0,0xff,0x42,0x03]
+aese.8  q0, q1
+@ CHECK: aese.8 q0, q1          @ encoding: [0xb0,0xff,0x02,0x03]
+aesimc.8  q0, q1
+@ CHECK: aesimc.8 q0, q1        @ encoding: [0xb0,0xff,0xc2,0x03]
+aesmc.8  q0, q1
+@ CHECK: aesmc.8 q0, q1         @ encoding: [0xb0,0xff,0x82,0x03]
+
+sha1h.32  q0, q1
+@ CHECK: sha1h.32  q0, q1       @ encoding: [0xb9,0xff,0xc2,0x02]
+sha1su1.32  q0, q1
+@ CHECK: sha1su1.32 q0, q1      @ encoding: [0xba,0xff,0x82,0x03]
+sha256su0.32  q0, q1
+@ CHECK: sha256su0.32 q0, q1    @ encoding: [0xba,0xff,0xc2,0x03]
+
+sha1c.32  q0, q1, q2
+@ CHECK: sha1c.32  q0, q1, q2   @ encoding: [0x02,0xef,0x44,0x0c]
+sha1m.32  q0, q1, q2
+@ CHECK: sha1m.32  q0, q1, q2   @ encoding: [0x22,0xef,0x44,0x0c]
+sha1p.32  q0, q1, q2
+@ CHECK: sha1p.32 q0, q1, q2    @ encoding: [0x12,0xef,0x44,0x0c]
+sha1su0.32  q0, q1, q2
+@ CHECK: sha1su0.32  q0, q1, q2      @ encoding: [0x32,0xef,0x44,0x0c]
+sha256h.32  q0, q1, q2
+@ CHECK: sha256h.32  q0, q1, q2      @ encoding: [0x02,0xff,0x44,0x0c]
+sha256h2.32  q0, q1, q2
+@ CHECK: sha256h2.32 q0, q1, q2      @ encoding: [0x12,0xff,0x44,0x0c]
+sha256su1.32  q0, q1, q2
+@ CHECK: sha256su1.32 q0, q1, q2     @ encoding: [0x22,0xff,0x44,0x0c]
+
+vmull.p64 q8, d16, d17
+@ CHECK: vmull.p64  q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0e]
diff --git a/test/MC/Disassembler/ARM/neon-crypto.txt b/test/MC/Disassembler/ARM/neon-crypto.txt
new file mode 100644
index 0000000000..086c781991
--- /dev/null
+++ b/test/MC/Disassembler/ARM/neon-crypto.txt
@@ -0,0 +1,35 @@
+# RUN: llvm-mc -triple armv8-unknown-unknown -mattr=+neon,+crypto -disassemble < %s | FileCheck %s
+
+0x42,0x03,0xb0,0xf3
+# CHECK: aesd.8 q0, q1
+0x02,0x03,0xb0,0xf3
+# CHECK: aese.8 q0, q1
+0xc2,0x03,0xb0,0xf3
+# CHECK: aesimc.8 q0, q1
+0x82,0x03,0xb0,0xf3
+# CHECK: aesmc.8 q0, q1
+
+0xc2,0x02,0xb9,0xf3
+# CHECK: sha1h.32  q0, q1
+0x82,0x03,0xba,0xf3
+# CHECK: sha1su1.32 q0, q1
+0xc2,0x03,0xba,0xf3
+# CHECK: sha256su0.32 q0, q1
+
+0x44,0x0c,0x02,0xf2
+# CHECK: sha1c.32  q0, q1, q2
+0x44,0x0c,0x22,0xf2
+# CHECK: sha1m.32  q0, q1, q2
+0x44,0x0c,0x12,0xf2
+# CHECK: sha1p.32 q0, q1, q2
+0x44,0x0c,0x32,0xf2
+# CHECK: sha1su0.32  q0, q1, q2
+0x44,0x0c,0x02,0xf3
+# CHECK: sha256h.32  q0, q1, q2
+0x44,0x0c,0x12,0xf3
+# CHECK: sha256h2.32 q0, q1, q2
+0x44,0x0c,0x22,0xf3
+# CHECK: sha256su1.32 q0, q1, q2
+
+0xa1,0x0e,0xe0,0xf2
+# CHECK: vmull.p64  q8, d16, d17
diff --git a/test/MC/Disassembler/ARM/thumb-neon-crypto.txt b/test/MC/Disassembler/ARM/thumb-neon-crypto.txt
new file mode 100644
index 0000000000..30b5fb6695
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb-neon-crypto.txt
@@ -0,0 +1,35 @@
+# RUN: llvm-mc -triple thumbv8-unknown-unknown -mattr=+neon,+crypto -disassemble < %s | FileCheck %s
+
+0xb0 0xff 0x42 0x03
+# CHECK: aesd.8  q0, q1
+0xb0 0xff 0x02 0x03
+# CHECK: aese.8 q0, q1
+0xb0 0xff 0xc2 0x03
+# CHECK: aesimc.8 q0, q1
+0xb0 0xff 0x82 0x03
+# CHECK: aesmc.8 q0, q1
+
+0xb9 0xff 0xc2 0x02
+# CHECK: sha1h.32  q0, q1
+0xba 0xff 0x82 0x03
+# CHECK: sha1su1.32 q0, q1
+0xba 0xff 0xc2 0x03
+# CHECK: sha256su0.32 q0, q1
+
+0x02 0xef 0x44 0x0c
+# CHECK: sha1c.32  q0, q1, q2
+0x22 0xef 0x44 0x0c
+# CHECK: sha1m.32  q0, q1, q2
+0x12 0xef 0x44 0x0c
+# CHECK: sha1p.32 q0, q1, q2
+0x32 0xef 0x44 0x0c
+# CHECK: sha1su0.32  q0, q1, q2
+0x02 0xff 0x44 0x0c
+# CHECK: sha256h.32  q0, q1, q2
+0x12 0xff 0x44 0x0c
+# CHECK: sha256h2.32 q0, q1, q2
+0x22 0xff 0x44 0x0c
+# CHECK: sha256su1.32 q0, q1, q2
+
+0xe0 0xef 0xa1 0x0e
+# CHECK: vmull.p64  q8, d16, d17
-- 
cgit v1.2.3