192 files changed, 45628 insertions, 13 deletions
diff --git a/autoconf/config.sub b/autoconf/config.sub
index 9942491533..a8d85281f9 100755
--- a/autoconf/config.sub
+++ b/autoconf/config.sub
@@ -251,7 +251,8 @@ case $basic_machine in
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
 	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
-        | be32 | be64 \
+   | aarch64 \
+   | be32 | be64 \
 	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
@@ -359,6 +360,7 @@ case $basic_machine in
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+   | aarch64-* \
 	| avr-* | avr32-* \
 	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 7d0f9f1aa1..21c3eeba53 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -389,6 +389,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -422,6 +423,7 @@ case $host in
   sparc*-*)               host_arch="Sparc" ;;
   powerpc*-*)             host_arch="PowerPC" ;;
   arm*-*)                 host_arch="ARM" ;;
+  aarch64*-*)             host_arch="AArch64" ;;
   mips-* | mips64-*)      host_arch="Mips" ;;
   mipsel-* | mips64el-*)  host_arch="Mips" ;;
   xcore-*)                host_arch="XCore" ;;
@@ -640,6 +642,7 @@ else
     PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
     x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
     ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    AArch64)     AC_SUBST(TARGET_HAS_JIT,0) ;;
     Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
     XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
@@ -771,7 +774,7 @@ dnl Allow specific targets to be specified for building (or not)
 TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
     [Build specific host targets: all or target1,target2,... Valid targets are:
-     host, x86, x86_64, sparc, powerpc, arm, mips, hexagon,
+     host, x86, x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
      xcore, msp430, nvptx, and cpp (default=all)]),,
     enableval=all)
 if test "$enableval" = host-only ; then
diff --git a/configure b/configure
index 73326a773d..9e74ab61b6 100755
--- a/configure
+++ b/configure
@@ -1438,8 +1438,8 @@ Optional Features:
                           YES)
   --enable-targets        Build specific host targets: all or
                           target1,target2,... Valid targets are: host, x86,
-                          x86_64, sparc, powerpc, arm, mips, hexagon, xcore,
-                          msp430, nvptx, and cpp (default=all)
+                          x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
+                          xcore, msp430, nvptx, and cpp (default=all)
   --enable-experimental-targets
                           Build experimental host targets: disable or
                           target1,target2,... (default=disable)
@@ -4008,6 +4008,7 @@ else
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -4041,6 +4042,7 @@ case $host in
   sparc*-*)               host_arch="Sparc" ;;
   powerpc*-*)             host_arch="PowerPC" ;;
   arm*-*)                 host_arch="ARM" ;;
+  aarch64*-*)             host_arch="AArch64" ;;
   mips-* | mips64-*)      host_arch="Mips" ;;
   mipsel-* | mips64el-*)  host_arch="Mips" ;;
   xcore-*)                host_arch="XCore" ;;
@@ -5373,6 +5375,8 @@ else
  ;;
     ARM)         TARGET_HAS_JIT=1
  ;;
+    AArch64)     TARGET_HAS_JIT=0
+ ;;
     Mips)        TARGET_HAS_JIT=1
  ;;
     XCore)       TARGET_HAS_JIT=0
@@ -10489,7 +10493,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10492 "configure"
+#line 10496 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst
index 0744656f03..40434fcb1f 100644
--- a/docs/CompilerWriterInfo.rst
+++ b/docs/CompilerWriterInfo.rst
@@ -22,6 +22,11 @@ ARM
 
 * `ABI <http://www.arm.com/products/DevTools/ABI.html>`_
 
+AArch64
+-------
+
+* `ARMv8 Instruction Set Overview <http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.genc010197a/index.html>`_
+
 Itanium (ia64)
 --------------
 
@@ -99,6 +104,8 @@ Linux
 -----
 
 * `PowerPC 64-bit ELF ABI Supplement <http://www.linuxbase.org/spec/ELF/ppc64/>`_
+* `Procedure Call Standard for the AArch64 Architecture <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055a/IHI0055A_aapcs64.pdf>`_
+* `ELF for the ARM 64-bit Architecture (AArch64) <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf>`_
 
 OS X
 ----
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index e987ffc8de..8fac222c13 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -44,6 +44,7 @@ public:
     UnknownArch,
 
     arm,     // ARM; arm, armv.*, xscale
+    aarch64, // AArch64: aarch64
     hexagon, // Hexagon: hexagon
     mips,    // MIPS: mips, mipsallegrex
     mipsel,  // MIPSEL: mipsel, mipsallegrexel
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 475981fa90..b5bfed18ec 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -472,6 +472,8 @@ public:
   virtual void AddValueSymbols(MCAssembler *) const = 0;
   virtual const MCSection *FindAssociatedSection() const = 0;
 
+  virtual void fixELFSymbolsInTLSFixups(MCAssembler &) const = 0;
+
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Target;
   }
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index 9d5c1a785e..00483c0278 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_MC_MCOBJECTWRITER_H
 #define LLVM_MC_MCOBJECTWRITER_H
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index 1e84831c51..919c229232 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -1624,6 +1624,86 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       res = "Unknown";
     }
     break;
+  case ELF::EM_AARCH64:
+    switch (type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G3);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD_PREL_LO19);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_LO21);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_PG_HI21);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADD_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST8_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TSTBR14);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CONDBR19);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_JUMP26);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CALL26);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST16_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST32_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST64_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST128_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_GOT_PAGE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD64_GOT_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_HI12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_HI12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADR_PAGE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_LD64_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADD_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_CALL);
+
+    default:
+      res = "Unknown";
+    }
+    break;
   case ELF::EM_ARM:
     switch (type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_NONE);
@@ -1937,6 +2017,7 @@ error_code ELFObjectFile<ELFT>::getRelocationValueString(
       res = "Unknown";
     }
     break;
+  case ELF::EM_AARCH64:
   case ELF::EM_ARM:
   case ELF::EM_HEXAGON:
     res = symname;
@@ -2356,6 +2437,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
       return "ELF64-i386";
     case ELF::EM_X86_64:
       return "ELF64-x86-64";
+    case ELF::EM_AARCH64:
+      return "ELF64-aarch64";
     case ELF::EM_PPC64:
       return "ELF64-ppc64";
     default:
@@ -2374,6 +2457,8 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
     return Triple::x86;
   case ELF::EM_X86_64:
     return Triple::x86_64;
+  case ELF::EM_AARCH64:
+    return Triple::aarch64;
   case ELF::EM_ARM:
     return Triple::arm;
   case ELF::EM_HEXAGON:
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index 746ea317bd..a1b8f54448 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -271,6 +271,7 @@ enum {
   EM_SLE9X         = 179, // Infineon Technologies SLE9X core
   EM_L10M          = 180, // Intel L10M
   EM_K10M          = 181, // Intel K10M
+  EM_AARCH64       = 183, // ARM AArch64
   EM_AVR32         = 185, // Atmel Corporation 32-bit microprocessor family
   EM_STM8          = 186, // STMicroeletronics STM8 8-bit microcontroller
   EM_TILE64        = 187, // Tilera TILE64 multicore architecture family
@@ -494,6 +495,96 @@ enum {
   R_PPC64_TLSLD               = 108
 };
 
+// ELF Relocation types for AArch64
+
+enum {
+  R_AARCH64_NONE                        = 0x100,
+
+  R_AARCH64_ABS64                       = 0x101,
+  R_AARCH64_ABS32                       = 0x102,
+  R_AARCH64_ABS16                       = 0x103,
+  R_AARCH64_PREL64                      = 0x104,
+  R_AARCH64_PREL32                      = 0x105,
+  R_AARCH64_PREL16                      = 0x106,
+
+  R_AARCH64_MOVW_UABS_G0                = 0x107,
+  R_AARCH64_MOVW_UABS_G0_NC             = 0x108,
+  R_AARCH64_MOVW_UABS_G1                = 0x109,
+  R_AARCH64_MOVW_UABS_G1_NC             = 0x10a,
+  R_AARCH64_MOVW_UABS_G2                = 0x10b,
+  R_AARCH64_MOVW_UABS_G2_NC             = 0x10c,
+  R_AARCH64_MOVW_UABS_G3                = 0x10d,
+  R_AARCH64_MOVW_SABS_G0                = 0x10e,
+  R_AARCH64_MOVW_SABS_G1                = 0x10f,
+  R_AARCH64_MOVW_SABS_G2                = 0x110,
+
+  R_AARCH64_LD_PREL_LO19                = 0x111,
+  R_AARCH64_ADR_PREL_LO21               = 0x112,
+  R_AARCH64_ADR_PREL_PG_HI21            = 0x113,
+  R_AARCH64_ADD_ABS_LO12_NC             = 0x115,
+  R_AARCH64_LDST8_ABS_LO12_NC           = 0x116,
+
+  R_AARCH64_TSTBR14                     = 0x117,
+  R_AARCH64_CONDBR19                    = 0x118,
+  R_AARCH64_JUMP26                      = 0x11a,
+  R_AARCH64_CALL26                      = 0x11b,
+
+  R_AARCH64_LDST16_ABS_LO12_NC          = 0x11c,
+  R_AARCH64_LDST32_ABS_LO12_NC          = 0x11d,
+  R_AARCH64_LDST64_ABS_LO12_NC          = 0x11e,
+
+  R_AARCH64_LDST128_ABS_LO12_NC         = 0x12b,
+
+  R_AARCH64_ADR_GOT_PAGE                = 0x137,
+  R_AARCH64_LD64_GOT_LO12_NC            = 0x138,
+
+  R_AARCH64_TLSLD_MOVW_DTPREL_G2        = 0x20b,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1        = 0x20c,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC     = 0x20d,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0        = 0x20e,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC     = 0x20f,
+  R_AARCH64_TLSLD_ADD_DTPREL_HI12       = 0x210,
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12       = 0x211,
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC    = 0x212,
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12     = 0x213,
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC  = 0x214,
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12    = 0x215,
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC = 0x216,
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12    = 0x217,
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC = 0x218,
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12    = 0x219,
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC = 0x21a,
+
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G1      = 0x21b,
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC   = 0x21c,
+  R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21   = 0x21d,
+  R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC = 0x21e,
+  R_AARCH64_TLSIE_LD_GOTTPREL_PREL19    = 0x21f,
+
+  R_AARCH64_TLSLE_MOVW_TPREL_G2         = 0x220,
+  R_AARCH64_TLSLE_MOVW_TPREL_G1         = 0x221,
+  R_AARCH64_TLSLE_MOVW_TPREL_G1_NC      = 0x222,
+  R_AARCH64_TLSLE_MOVW_TPREL_G0         = 0x223,
+  R_AARCH64_TLSLE_MOVW_TPREL_G0_NC      = 0x224,
+  R_AARCH64_TLSLE_ADD_TPREL_HI12        = 0x225,
+  R_AARCH64_TLSLE_ADD_TPREL_LO12        = 0x226,
+  R_AARCH64_TLSLE_ADD_TPREL_LO12_NC     = 0x227,
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12      = 0x228,
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC   = 0x229,
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12     = 0x22a,
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC  = 0x22b,
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12     = 0x22c,
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC  = 0x22d,
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12     = 0x22e,
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC  = 0x22f,
+
+  R_AARCH64_TLSDESC_ADR_PAGE            = 0x232,
+  R_AARCH64_TLSDESC_LD64_LO12_NC        = 0x233,
+  R_AARCH64_TLSDESC_ADD_LO12_NC         = 0x234,
+
+  R_AARCH64_TLSDESC_CALL                = 0x239
+};
+
 // ARM Specific e_flags
 enum {
   EF_ARM_EABI_UNKNOWN =   0x00000000U,
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index e5b749e28b..c4c8e6e4e7 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -300,7 +300,9 @@ void MCELFStreamer::EmitFileDirective(StringRef Filename) {
 
 void  MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
   switch (expr->getKind()) {
-  case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+  case MCExpr::Target:
+    cast<MCTargetExpr>(expr)->fixELFSymbolsInTLSFixups(getAssembler());
+    break;
   case MCExpr::Constant:
     break;
 
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 1f5548f779..ae0abde86a 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -256,6 +256,25 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
       TTypeEncoding = (CMModel == CodeModel::Small)
         ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
     }
+  }  else if (T.getArch() ==  Triple::aarch64) {
+    FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
+    // The small model guarantees static code/data size < 4GB, but not where it
+    // will be in memory. Most of these could end up >2GB away so even a signed
+    // pc-relative 32-bit address is insufficient, theoretically.
+    if (RelocM == Reloc::PIC_) {
+      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata8;
+      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
+      FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata8;
+    } else {
+      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+      LSDAEncoding = dwarf::DW_EH_PE_absptr;
+      FDEEncoding = dwarf::DW_EH_PE_udata4;
+      TTypeEncoding = dwarf::DW_EH_PE_absptr;
+    }
   } else if (T.getArch() == Triple::ppc64) {
     PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
       dwarf::DW_EH_PE_udata8;
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index ad7b18942e..d2508ac1ef 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -19,6 +19,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   switch (Kind) {
   case UnknownArch: return "unknown";
 
+  case aarch64: return "aarch64";
   case arm:     return "arm";
   case hexagon: return "hexagon";
   case mips:    return "mips";
@@ -53,6 +54,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   default:
     return 0;
 
+  case aarch64: return "aarch64";
+
   case arm:
   case thumb:   return "arm";
 
@@ -152,6 +155,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
 
 Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
   return StringSwitch<Triple::ArchType>(Name)
+    .Case("aarch64", aarch64)
     .Case("arm", arm)
     .Case("mips", mips)
     .Case("mipsel", mipsel)
@@ -215,6 +219,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("powerpc", Triple::ppc)
     .Cases("powerpc64", "ppu", Triple::ppc64)
     .Case("mblaze", Triple::mblaze)
+    .Case("aarch64", Triple::aarch64)
     .Cases("arm", "xscale", Triple::arm)
     // FIXME: It would be good to replace these with explicit names for all the
     // various suffixes supported.
@@ -676,6 +681,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::spir:
     return 32;
 
+  case llvm::Triple::aarch64:
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
   case llvm::Triple::nvptx64:
@@ -704,6 +710,7 @@ Triple Triple::get32BitArchVariant() const {
   Triple T(*this);
   switch (getArch()) {
   case Triple::UnknownArch:
+  case Triple::aarch64:
   case Triple::msp430:
     T.setArch(UnknownArch);
     break;
@@ -755,6 +762,7 @@ Triple Triple::get64BitArchVariant() const {
     T.setArch(UnknownArch);
     break;
 
+  case Triple::aarch64:
   case Triple::spir64:
   case Triple::mips64:
   case Triple::mips64el:
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
new file mode 100644
index 0000000000..622814ddfd
--- /dev/null
+++ b/lib/Target/AArch64/AArch64.h
@@ -0,0 +1,42 @@
+//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AArch64 back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_H
+#define LLVM_TARGET_AARCH64_H
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64AsmPrinter;
+class FunctionPass;
+class AArch64TargetMachine;
+class MachineInstr;
+class MCInst;
+
+FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM,
+                                   CodeGenOpt::Level OptLevel);
+
+FunctionPass *createAArch64ConstantIslandPass();
+
+FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
+
+void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                      AArch64AsmPrinter &AP);
+
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
new file mode 100644
index 0000000000..750fec7931
--- /dev/null
+++ b/lib/Target/AArch64/AArch64.td
@@ -0,0 +1,68 @@
+//===- AArch64.td - Describe the AArch64 Target Machine ---------*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the AArch64 target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AArch64 Subtarget features.
+//
+
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+  "Enable Advanced SIMD instructions">;
+
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+  "Enable cryptographic instructions">;
+
+//===----------------------------------------------------------------------===//
+// AArch64 Processors
+//
+
+include "AArch64Schedule.td"
+
+def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AArch64RegisterInfo.td"
+
+include "AArch64CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AArch64InstrInfo.td"
+
+def AArch64InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+
+def A64InstPrinter : AsmWriter {
+  string AsmWriterClassName = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def AArch64 : Target {
+  let InstructionSet = AArch64InstrInfo;
+  let AssemblyWriters = [A64InstPrinter];
+}
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
new file mode 100644
index 0000000000..63cc88f815
--- /dev/null
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -0,0 +1,361 @@
+//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format AArch64 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64AsmPrinter.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MachineLocation
+AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  // See emitFrameIndexDebugValue in InstrInfo for where this instruction is
+  // expected to be created.
+  assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg()
+         && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE");
+  return MachineLocation(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+}
+
+/// Try to print a floating-point register as if it belonged to a specified
+/// register-class. For example the inline asm operand modifier "b" requires its
+/// argument to be printed as "bN".
+static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
+                                       const TargetRegisterInfo *TRI,
+                                       const TargetRegisterClass &RegClass,
+                                       raw_ostream &O) {
+  if (!MO.isReg())
+    return true;
+
+  for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+    if (RegClass.contains(*AR)) {
+      O << AArch64InstPrinter::getRegisterName(*AR);
+      return false;
+    }
+  }
+  return true;
+}
+
+/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
+/// with the obvious type and an immediate 0 as either wzr or xzr.
+static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
+                                       const TargetRegisterInfo *TRI,
+                                       const TargetRegisterClass &RegClass,
+                                       raw_ostream &O) {
+  char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';
+
+  if (MO.isImm() && MO.getImm() == 0) {
+    O << Prefix << "zr";
+    return false;
+  } else if (MO.isReg()) {
+    if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
+      O << (Prefix == 'x' ? "sp" : "wsp");
+      return false;
+    }
+
+    for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+      if (RegClass.contains(*AR)) {
+        O << AArch64InstPrinter::getRegisterName(*AR);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
+                                             bool PrintImmediatePrefix,
+                                             StringRef Suffix, raw_ostream &O) {
+  StringRef Name;
+  StringRef Modifier;
+  switch (MO.getType()) {
+  default: llvm_unreachable("Unexpected operand for symbolic address constraint");
+  case MachineOperand::MO_GlobalAddress:
+    Name = Mang->getSymbol(MO.getGlobal())->getName();
+
+    // Global variables may be accessed either via a GOT or in various fun and
+    // interesting TLS-model specific ways. Set the prefix modifier as
+    // appropriate here.
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) {
+      Reloc::Model RelocM = TM.getRelocationModel();
+      if (GV->isThreadLocal()) {
+        switch (TM.getTLSModel(GV)) {
+        case TLSModel::GeneralDynamic:
+          Modifier = "tlsdesc";
+          break;
+        case TLSModel::LocalDynamic:
+          Modifier = "dtprel";
+          break;
+        case TLSModel::InitialExec:
+          Modifier = "gottprel";
+          break;
+        case TLSModel::LocalExec:
+          Modifier = "tprel";
+          break;
+        }
+      } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+        Modifier = "got";
+      }
+    }
+    break;
+  case MachineOperand::MO_BlockAddress:
+    Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName();
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    Name = MO.getSymbolName();
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    Name = GetCPISymbol(MO.getIndex())->getName();
+    break;
+  }
+
+  // Some instructions (notably ADRP) don't take the # prefix for
+  // immediates. Only print it if asked to.
+  if (PrintImmediatePrefix)
+    O << '#';
+
+  // Only need the joining "_" if both the prefix and the suffix are
+  // non-null. This little block simply takes care of the four possibly
+  // combinations involved there.
+  if (Modifier == "" && Suffix == "")
+    O << Name;
+  else if (Modifier == "" && Suffix != "")
+    O << ":" << Suffix << ':' << Name;
+  else if (Modifier != "" && Suffix == "")
+    O << ":" << Modifier << ':' << Name;
+  else
+    O << ":" << Modifier << '_' << Suffix << ':' << Name;
+
+  return false;
+}
+
+bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                                        unsigned AsmVariant,
+                                        const char *ExtraCode, raw_ostream &O) {
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+  if (!ExtraCode || !ExtraCode[0]) {
+    // There's actually no operand modifier, which leads to a slightly eclectic
+    // set of behaviour which we have to handle here.
+    const MachineOperand &MO = MI->getOperand(OpNum);
+    switch (MO.getType()) {
+    default:
+      llvm_unreachable("Unexpected operand for inline assembly");
+    case MachineOperand::MO_Register:
+      // GCC prints the unmodified operand of a 'w' constraint as the vector
+      // register. Technically, we could allocate the argument as a VPR128, but
+      // that leads to extremely dodgy copies being generated to get the data
+      // there.
+      if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O))
+        O << AArch64InstPrinter::getRegisterName(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      O << '#' << MO.getImm();
+      break;
+    case MachineOperand::MO_FPImmediate:
+      assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
+      O << "#0.0";
+      break;
+    case MachineOperand::MO_BlockAddress:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      return printSymbolicAddress(MO, false, "", O);
+    }
+    return false;
+  }
+
+  // We have a real modifier to handle.
+  switch(ExtraCode[0]) {
+  default:
+    // See if this is a generic operand
+    return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+  case 'c': // Don't print "#" before an immediate operand.
+    if (!MI->getOperand(OpNum).isImm())
+      return true;
+    O << MI->getOperand(OpNum).getImm();
+    return false;
+  case 'w':
+    // Output 32-bit general register operand, constant zero as wzr, or stack
+    // pointer as wsp. Ignored when used with other operand types.
+    return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::GPR32RegClass, O);
+  case 'x':
+    // Output 64-bit general register operand, constant zero as xzr, or stack
+    // pointer as sp. Ignored when used with other operand types.
+    return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::GPR64RegClass, O);
+  case 'H':
+    // Output higher numbered of a 64-bit general register pair
+  case 'Q':
+    // Output least significant register of a 64-bit general register pair
+  case 'R':
+    // Output most significant register of a 64-bit general register pair
+
+    // FIXME note: these three operand modifiers will require, to some extent,
+    // adding a paired GPR64 register class. Initial investigation suggests that
+    // assertions are hit unless it has a type and is made legal for that type
+    // in ISelLowering. After that step is made, the number of modifications
+    // needed explodes (operation legality, calling conventions, stores, reg
+    // copies ...).
+    llvm_unreachable("FIXME: Unimplemented register pairs");
+  case 'b':
+    // Output 8-bit FP/SIMD scalar register operand, prefixed with b.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR8RegClass, O);
+  case 'h':
+    // Output 16-bit FP/SIMD scalar register operand, prefixed with h.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR16RegClass, O);
+  case 's':
+    // Output 32-bit FP/SIMD scalar register operand, prefixed with s.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR32RegClass, O);
+  case 'd':
+    // Output 64-bit FP/SIMD scalar register operand, prefixed with d.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR64RegClass, O);
+  case 'q':
+    // Output 128-bit FP/SIMD scalar register operand, prefixed with q.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR128RegClass, O);
+  case 'A':
+    // Output symbolic address with appropriate relocation modifier (also
+    // suitable for ADRP).
+    return printSymbolicAddress(MI->getOperand(OpNum), false, "", O);
+  case 'L':
+    // Output bits 11:0 of symbolic address with appropriate :lo12: relocation
+    // modifier.
+    return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O);
+  case 'G':
+    // Output bits 23:12 of symbolic address with appropriate :hi12: relocation
+    // modifier (currently only for TLS local exec).
+    return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O);
+  }
+
+
+}
+
+bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                              unsigned OpNum,
+                                              unsigned AsmVariant,
+                                              const char *ExtraCode,
+                                              raw_ostream &O) {
+  // Currently both the memory constraints (m and Q) behave the same and amount
+  // to the address as a single register. In future, we may allow "m" to provide
+  // both a base and an offset.
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isReg() && "unexpected inline assembly memory operand");
+  O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']';
+  return false;
+}
+
+void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                               raw_ostream &OS) {
+  unsigned NOps = MI->getNumOperands();
+  assert(NOps==4);
+  OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+  OS << V.getName();
+  OS << " <- ";
+  // Frame address.  Currently handles register +- offset only.
+  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+  OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg());
+  OS << '+' << MI->getOperand(1).getImm();
+  OS << ']';
+  OS << "+" << MI->getOperand(NOps - 2).getImm();
+}
+
+
+#include "AArch64GenMCPseudoLowering.inc"
+
+void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  // Do any auto-generated pseudo lowerings.
+  if (emitPseudoExpansionLowering(OutStreamer, MI))
+    return;
+
+  switch (MI->getOpcode()) {
+  case AArch64::CONSTPOOL_ENTRY: {
+    unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+    unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+
+    OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+
+    const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+    if (MCPE.isMachineConstantPoolEntry())
+      EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+    else
+      EmitGlobalConstant(MCPE.Val.ConstVal);
+
+    return;
+  }
+  case AArch64::DBG_VALUE: {
+    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+      SmallString<128> TmpStr;
+      raw_svector_ostream OS(TmpStr);
+      PrintDebugValueComment(MI, OS);
+      OutStreamer.EmitRawText(StringRef(OS.str()));
+    }
+    return;
+  }
+  }
+
+  MCInst TmpInst;
+  LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
+  if (Subtarget->isTargetELF()) {
+    const TargetLoweringObjectFileELF &TLOFELF =
+      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+      const DataLayout *TD = TM.getDataLayout();
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        OutStreamer.EmitLabel(Stubs[i].first);
+        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+                                    TD->getPointerSize(0), 0);
+      }
+      Stubs.clear();
+    }
+  }
+}
+
+bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  MCP = MF.getConstantPool();
+  return AsmPrinter::runOnMachineFunction(MF);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmPrinter() {
+    RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64Target);
+}
+
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h
new file mode 100644
index 0000000000..492be66fbd
--- /dev/null
+++ b/lib/Target/AArch64/AArch64AsmPrinter.h
@@ -0,0 +1,85 @@
+// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64ASMPRINTER_H
+#define LLVM_AARCH64ASMPRINTER_H
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter {
+
+  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
+  /// make the right decision when printing asm code for different targets.
+  const AArch64Subtarget *Subtarget;
+  const MachineConstantPool *MCP;
+
+  // emitPseudoExpansionLowering - tblgen'erated.
+  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+                                   const MachineInstr *MI);
+
+  public:
+  explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {
+    Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+  }
+
+  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+
+  MCOperand lowerSymbolOperand(const MachineOperand &MO,
+                               const MCSymbol *Sym) const;
+
+  void EmitInstruction(const MachineInstr *MI);
+  void EmitEndOfAsmFile(Module &M);
+
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O);
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                             unsigned AsmVariant, const char *ExtraCode,
+                             raw_ostream &O);
+
+  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  /// printSymbolicAddress - Given some kind of reasonably bare symbolic
+  /// reference, print out the appropriate asm string to represent it. If
+  /// appropriate, a relocation-specifier will be produced, composed of a
+  /// general class derived from the MO parameter and an instruction-specific
+  /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is
+  /// given.
+  bool printSymbolicAddress(const MachineOperand &MO,
+                            bool PrintImmediatePrefix,
+                            StringRef Suffix, raw_ostream &O);
+
+  MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+  virtual const char *getPassName() const {
+    return "AArch64 Assembly Printer";
+  }
+
+  /// A no-op on AArch64 because we emit our constant pool entries inline with
+  /// the function.
+  virtual void EmitConstantPool() {}
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td
new file mode 100644
index 0000000000..b880d8373d
--- /dev/null
+++ b/lib/Target/AArch64/AArch64CallingConv.td
@@ -0,0 +1,196 @@
+//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for AArch64 architecture.
+//===----------------------------------------------------------------------===//
+
+
+// The AArch64 Procedure Call Standard is unfortunately specified at a slightly
+// higher level of abstraction than LLVM's target interface presents. In
+// particular, it refers (like other ABIs, in fact) directly to
+// structs. However, generic LLVM code takes the liberty of lowering structure
+// arguments to the component fields before we see them.
+//
+// As a result, the obvious direct map from LLVM IR to PCS concepts can't be
+// implemented, so the goals of this calling convention are, in decreasing
+// priority order:
+//     1. Expose *some* way to express the concepts required to implement the
+//        generic PCS from a front-end.
+//     2. Provide a sane ABI for pure LLVM.
+//     3. Follow the generic PCS as closely as is naturally possible.
+//
+// The suggested front-end implementation of PCS features is:
+//     * Integer, float and vector arguments of all sizes which end up in
+//       registers are passed and returned via the natural LLVM type.
+//     * Structure arguments with size <= 16 bytes are passed and returned in
+//       registers as similar integer or composite types. For example:
+//       [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed).
+//     * HFAs in registers follow rules similar to small structs: appropriate
+//       composite types.
+//     * Structure arguments with size > 16 bytes are passed via a pointer,
+//       handled completely by the front-end.
+//     * Structure return values > 16 bytes via an sret pointer argument.
+//     * Other stack-based arguments (not large structs) are passed using byval
+//       pointers. Padding arguments are added beforehand to guarantee a large
+//       struct doesn't later use integer registers.
+//
+// N.b. this means that it is the front-end's responsibility (if it cares about
+// PCS compliance) to check whether enough registers are available for an
+// argument when deciding how to pass it.
+
+class CCIfAlign<int Align, CCAction A>:
+  CCIf<"ArgFlags.getOrigAlign() == " # Align, A>;
+
+def CC_A64_APCS : CallingConv<[
+  // SRet is an LLVM-specific concept, so it takes precedence over general ABI
+  // concerns. However, this rule will be used by C/C++ frontends to implement
+  // structure return.
+  CCIfSRet<CCAssignToReg<[X8]>>,
+
+  // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+  // slot is 64-bit.
+  CCIfByVal<CCPassByVal<8, 8>>,
+
+  // Canonicalise the various types that live in different floating-point
+  // registers. This makes sense because the PCS does not distinguish Short
+  // Vectors and Floating-point types.
+  CCIfType<[v2i8], CCBitConvertToType<f16>>,
+  CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
+  CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+           CCBitConvertToType<f128>>,
+
+  // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision
+  // Floating-point or Short Vector Type and the NSRN is less than 8, then the
+  // argument is allocated to the least significant bits of register
+  // v[NSRN]. The NSRN is incremented by one. The argument has now been
+  // allocated."
+  CCIfType<[f16],  CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+  CCIfType<[f32],  CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64],  CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+  // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated
+  // SIMD and Floating-point registers (NSRN - number of elements < 8), then the
+  // argument is allocated to SIMD and Floating-point registers (with one
+  // register per element of the HFA). The NSRN is incremented by the number of
+  // registers used. The argument has now been allocated."
+  //
+  // N.b. As above, this rule is the responsibility of the front-end.
+
+  // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of
+  // the argument is rounded up to the nearest multiple of 8 bytes."
+  //
+  // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short
+  // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural
+  // Alignment of the Argument's type."
+  //
+  // It is expected that these will be satisfied by adding dummy arguments to
+  // the prototype.
+
+  // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point
+  // type then the size of the argument is set to 8 bytes. The effect is as if
+  // the argument had been copied to the least significant bits of a 64-bit
+  // register and the remaining bits filled with unspecified values."
+  CCIfType<[f16, f32], CCPromoteToType<f64>>,
+
+  // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad-
+  // precision Floating-point or Short Vector Type, then the argument is copied
+  // to memory at the adjusted NSAA. The NSAA is incremented by the size of the
+  // argument. The argument has now been allocated."
+  CCIfType<[f64], CCAssignToStack<8, 8>>,
+  CCIfType<[f128], CCAssignToStack<16, 16>>,
+
+  // PCS: "C.7: If the argument is an Integral Type, the size of the argument is
+  // less than or equal to 8 bytes and the NGRN is less than 8, the argument is
+  // copied to the least significant bits of x[NGRN]. The NGRN is incremented by
+  // one. The argument has now been allocated."
+
+  // First we implement C.8 and C.9 (128-bit types get even registers). i128 is
+  // represented as two i64s, the first one being split. If we delayed this
+  // operation C.8 would never be reached.
+  CCIfType<[i64],
+        CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>,
+
+  // Note: the promotion also implements C.14.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // And now the real implementation of C.7
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+
+  // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded
+  // up to the next even number."
+  //
+  // "C.9: If the argument is an Integral Type, the size of the argument is
+  // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
+  // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the
+  // memory representation of the argument. The NGRN is incremented by two. The
+  // argument has now been allocated."
+  //
+  // Subtlety here: what if alignment is 16 but it is not an integral type? All
+  // floating-point types have been allocated already, which leaves composite
+  // types: this is why a front-end may need to produce i128 for a struct <= 16
+  // bytes.
+
+  // PCS: "C.10 If the argument is a Composite Type and the size in double-words
+  // of the argument is not more than 8 minus NGRN, then the argument is copied
+  // into consecutive general-purpose registers, starting at x[NGRN]. The
+  // argument is passed as though it had been loaded into the registers from a
+  // double-word aligned address with an appropriate sequence of LDR
+  // instructions loading consecutive registers from memory (the contents of any
+  // unused parts of the registers are unspecified by this standard). The NGRN
+  // is incremented by the number of registers used. The argument has now been
+  // allocated."
+  //
+  // Another one that's the responsibility of the front-end (sigh).
+
+  // PCS: "C.11: The NGRN is set to 8."
+  CCCustom<"CC_AArch64NoMoreRegs">,
+
+  // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural
+  // Alignment of the argument's type."
+  //
+  // PCS: "C.13: If the argument is a composite type then the argument is copied
+  // to memory at the adjusted NSAA. The NSAA is by the size of the
+  // argument. The argument has now been allocated."
+  //
+  // Note that the effect of this corresponds to a memcpy rather than register
+  // stores so that the struct ends up correctly addressable at the adjusted
+  // NSAA.
+
+  // PCS: "C.14: If the size of the argument is less than 8 bytes then the size
+  // of the argument is set to 8 bytes. The effect is as if the argument was
+  // copied to the least significant bits of a 64-bit register and the remaining
+  // bits filled with unspecified values."
+  //
+  // Integer types were widened above. Floating-point and composite types have
+  // already been allocated completely. Nothing to do.
+
+  // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA
+  // is incremented by the size of the argument. The argument has now been
+  // allocated."
+  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+  CCIfType<[i64], CCAssignToStack<8, 8>>
+
+]>;
+
+// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits
+// of vector registers (8-15) are callee-saved. The order here is is picked up
+// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of
+// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at
+// [sp-16], ...
+def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19),
+                                   (sequence "D%u", 15, 8))>;
+
+
+// TLS descriptor calls are extremely restricted in their changes, to allow
+// optimisations in the (hopefully) more common fast path where no real action
+// is needed. They actually have to preserve all registers, except for the
+// unavoidable X30 and the return register X0.
+def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1),
+                                   (sequence "Q%u", 31, 0))>;
diff --git a/lib/Target/AArch64/AArch64ConstantIslandPass.cpp b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp
new file mode 100644
index 0000000000..7734866689
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp
@@ -0,0 +1,1420 @@
+//===-- AArch64ConstantIslandPass.cpp - AArch64 constant islands ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function.  This is required due to the
+// limited pc-relative displacements that AArch64 has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-cp-islands"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCPEs,       "Number of constpool entries");
+STATISTIC(NumSplit,      "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
+
+// FIXME: This option should be removed once it has received sufficient testing.
+static cl::opt<bool>
+AlignConstantIslands("aarch64-align-constant-islands", cl::Hidden,
+                     cl::init(true), cl::desc("Align constant islands in code"));
+
+/// Return the worst case padding that could result from unknown offset bits.
+/// This does not include alignment padding caused by known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+  if (KnownBits < LogAlign)
+    return (1u << LogAlign) - (1u << KnownBits);
+  return 0;
+}
+
+namespace {
+  /// Due to limited PC-relative displacements, AArch64 requires constant pool
+  /// entries to be scattered among the instructions inside a function.  To do
+  /// this, it completely ignores the normal LLVM constant pool; instead, it
+  /// places constants wherever it feels like with special instructions.
+  ///
+  /// The terminology used in this pass includes:
+  ///   Islands - Clumps of constants placed in the function.
+  ///   Water   - Potential places where an island could be formed.
+  ///   CPE     - A constant pool entry that has been placed somewhere, which
+  ///             tracks a list of users.
+  class AArch64ConstantIslands : public MachineFunctionPass {
+    /// Information about the offset and size of a single basic block.
+    struct BasicBlockInfo {
+      /// Distance from the beginning of the function to the beginning of this
+      /// basic block.
+      ///
+      /// Offsets are computed assuming worst case padding before an aligned
+      /// block. This means that subtracting basic block offsets always gives a
+      /// conservative estimate of the real distance which may be smaller.
+      ///
+      /// Because worst case padding is used, the computed offset of an aligned
+      /// block may not actually be aligned.
+      unsigned Offset;
+
+      /// Size of the basic block in bytes.  If the block contains inline
+      /// assembly, this is a worst case estimate.
+      ///
+      /// The size does not include any alignment padding whether from the
+      /// beginning of the block, or from an aligned jump table at the end.
+      unsigned Size;
+
+      /// The number of low bits in Offset that are known to be exact.  The
+      /// remaining bits of Offset are an upper bound.
+      uint8_t KnownBits;
+
+      /// When non-zero, the block contains instructions (inline asm) of unknown
+      /// size.  The real size may be smaller than Size bytes by a multiple of 1
+      /// << Unalign.
+      uint8_t Unalign;
+
+      BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
+
+      /// Compute the number of known offset bits internally to this block.
+      /// This number should be used to predict worst case padding when
+      /// splitting the block.
+      unsigned internalKnownBits() const {
+        unsigned Bits = Unalign ? Unalign : KnownBits;
+        // If the block size isn't a multiple of the known bits, assume the
+        // worst case padding.
+        if (Size & ((1u << Bits) - 1))
+          Bits = CountTrailingZeros_32(Size);
+        return Bits;
+      }
+
+      /// Compute the offset immediately following this block.  If LogAlign is
+      /// specified, return the offset the successor block will get if it has
+      /// this alignment.
+      unsigned postOffset(unsigned LogAlign = 0) const {
+        unsigned PO = Offset + Size;
+        if (!LogAlign)
+          return PO;
+        // Add alignment padding from the terminator.
+        return PO + UnknownPadding(LogAlign, internalKnownBits());
+      }
+
+      /// Compute the number of known low bits of postOffset.  If this block
+      /// contains inline asm, the number of known bits drops to the
+      /// instruction alignment.  An aligned terminator may increase the number
+      /// of know bits.
+      /// If LogAlign is given, also consider the alignment of the next block.
+      unsigned postKnownBits(unsigned LogAlign = 0) const {
+        return std::max(LogAlign, internalKnownBits());
+      }
+    };
+
+    std::vector<BasicBlockInfo> BBInfo;
+
+    /// A sorted list of basic blocks where islands could be placed (i.e. blocks
+    /// that don't fall through to the following block, due to a return,
+    /// unreachable, or unconditional branch).
+    std::vector<MachineBasicBlock*> WaterList;
+
+    /// The subset of WaterList that was created since the previous iteration by
+    /// inserting unconditional branches.
+    SmallSet<MachineBasicBlock*, 4> NewWaterList;
+
+    typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
+
+    /// One user of a constant pool, keeping the machine instruction pointer,
+    /// the constant pool being referenced, and the number of bits used by the
+    /// instruction for displacement.  The HighWaterMark records the highest
+    /// basic block where a new CPEntry can be placed.  To ensure this pass
+    /// terminates, the CP entries are initially placed at the end of the
+    /// function and then move monotonically to lower addresses.  The exception
+    /// to this rule is when the current CP entry for a particular CPUser is out
+    /// of range, but there is another CP entry for the same constant value in
+    /// range.  We want to use the existing in-range CP entry, but if it later
+    /// moves out of range, the search for new water should resume where it left
+    /// off.  The HighWaterMark is used to record that point.
+    struct CPUser {
+      MachineInstr *MI;
+      MachineInstr *CPEMI;
+      MachineBasicBlock *HighWaterMark;
+    private:
+      unsigned OffsetBits;
+    public:
+      CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned offsetbits)
+        : MI(mi), CPEMI(cpemi), OffsetBits(offsetbits) {
+        HighWaterMark = CPEMI->getParent();
+      }
+      /// Returns the number of bits used to specify the offset.
+      unsigned getOffsetBits() const {
+        return OffsetBits;
+      }
+
+      /// Returns the maximum positive displacement possible from this CPUser
+      /// (essentially INT<N>_MAX * 4).
+      unsigned getMaxPosDisp() const {
+        return (1 << (OffsetBits - 1)) - 1;
+      }
+    };
+
+    /// Keep track of all of the machine instructions that use various constant
+    /// pools and their max displacement.
+    std::vector<CPUser> CPUsers;
+
+    /// One per constant pool entry, keeping the machine instruction pointer,
+    /// the constpool index, and the number of CPUser's which reference this
+    /// entry.
+    struct CPEntry {
+      MachineInstr *CPEMI;
+      unsigned CPI;
+      unsigned RefCount;
+      CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+        : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+    };
+
+    ///  Keep track of all of the constant pool entry machine instructions. For
+    /// each original constpool index (i.e. those that existed upon entry to
+    /// this pass), it keeps a vector of entries.  Original elements are cloned
+    /// as we go along; the clones are put in the vector of the original
+    /// element, but have distinct CPIs.
+    std::vector<std::vector<CPEntry> > CPEntries;
+
+    /// One per immediate branch, keeping the machine instruction pointer,
+    /// conditional or unconditional, the max displacement, and (if IsCond is
+    /// true) the corresponding inverted branch opcode.
+    struct ImmBranch {
+      MachineInstr *MI;
+      unsigned OffsetBits : 31;
+      bool IsCond : 1;
+      ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
+        : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
+    };
+
+    /// Keep track of all the immediate branch instructions.
+    ///
+    std::vector<ImmBranch> ImmBranches;
+
+    MachineFunction *MF;
+    MachineConstantPool *MCP;
+    const AArch64InstrInfo *TII;
+    const AArch64Subtarget *STI;
+    AArch64MachineFunctionInfo *AFI;
+  public:
+    static char ID;
+    AArch64ConstantIslands() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "AArch64 constant island placement pass";
+    }
+
+  private:
+    void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
+    CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+    unsigned getCPELogAlign(const MachineInstr *CPEMI);
+    void scanFunctionJumpTables();
+    void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
+    MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+    void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+    void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+    bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+    int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
+    bool findAvailableWater(CPUser&U, unsigned UserOffset,
+                            water_iterator &WaterIter);
+    void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
+                        MachineBasicBlock *&NewMBB);
+    bool handleConstantPoolUser(unsigned CPUserIndex);
+    void removeDeadCPEMI(MachineInstr *CPEMI);
+    bool removeUnusedCPEntries();
+    bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
+                          MachineInstr *CPEMI, unsigned OffsetBits,
+                          bool DoDump = false);
+    bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
+                        CPUser &U, unsigned &Growth);
+    bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
+                     unsigned OffsetBits);
+    bool fixupImmediateBr(ImmBranch &Br);
+    bool fixupConditionalBr(ImmBranch &Br);
+
+    void computeBlockSize(MachineBasicBlock *MBB);
+    unsigned getOffsetOf(MachineInstr *MI) const;
+    unsigned getUserOffset(CPUser&) const;
+    void dumpBBs();
+    void verify();
+
+    bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+                         unsigned BitsAvailable);
+    bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+                         const CPUser &U) {
+      return isOffsetInRange(UserOffset, TrialOffset, U.getOffsetBits());
+    }
+  };
+  char AArch64ConstantIslands::ID = 0;
+}
+
+/// check BBOffsets, BBSizes, alignment of islands
+void AArch64ConstantIslands::verify() {
+#ifndef NDEBUG
+  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = MBBI;
+    unsigned MBBId = MBB->getNumber();
+    assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
+  }
+  DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
+  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+    CPUser &U = CPUsers[i];
+    unsigned UserOffset = getUserOffset(U);
+    // Verify offset using the real max displacement without the safety
+    // adjustment.
+    if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getOffsetBits(),
+                         /* DoDump = */ true)) {
+      DEBUG(dbgs() << "OK\n");
+      continue;
+    }
+    DEBUG(dbgs() << "Out of range.\n");
+    dumpBBs();
+    DEBUG(MF->dump());
+    llvm_unreachable("Constant pool entry out of range!");
+  }
+#endif
+}
+
+/// print block size and offset information - debugging
+void AArch64ConstantIslands::dumpBBs() {
+  DEBUG({
+    for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+      const BasicBlockInfo &BBI = BBInfo[J];
+      dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+             << " kb=" << unsigned(BBI.KnownBits)
+             << " ua=" << unsigned(BBI.Unalign)
+             << format(" size=%#x\n", BBInfo[J].Size);
+    }
+  });
+}
+
+/// Returns an instance of the constpool island pass.
+FunctionPass *llvm::createAArch64ConstantIslandPass() {
+  return new AArch64ConstantIslands();
+}
+
+bool AArch64ConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  MCP = mf.getConstantPool();
+
+  DEBUG(dbgs() << "***** AArch64ConstantIslands: "
+               << MCP->getConstants().size() << " CP entries, aligned to "
+               << MCP->getConstantPoolAlignment() << " bytes *****\n");
+
+  TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
+  AFI = MF->getInfo<AArch64MachineFunctionInfo>();
+  STI = &MF->getTarget().getSubtarget<AArch64Subtarget>();
+
+  // This pass invalidates liveness information when it splits basic blocks.
+  MF->getRegInfo().invalidateLiveness();
+
+  // Renumber all of the machine basic blocks in the function, guaranteeing that
+  // the numbers agree with the position of the block in the function.
+  MF->RenumberBlocks();
+
+  // Perform the initial placement of the constant pool entries.  To start with,
+  // we put them all at the end of the function.
+  std::vector<MachineInstr*> CPEMIs;
+  if (!MCP->isEmpty())
+    doInitialPlacement(CPEMIs);
+
+  /// The next UID to take is the first unused one.
+  AFI->initPICLabelUId(CPEMIs.size());
+
+  // Do the initial scan of the function, building up information about the
+  // sizes of each block, the location of all the water, and finding all of the
+  // constant pool users.
+  initializeFunctionInfo(CPEMIs);
+  CPEMIs.clear();
+  DEBUG(dumpBBs());
+
+
+  /// Remove dead constant pool entries.
+  bool MadeChange = removeUnusedCPEntries();
+
+  // Iteratively place constant pool entries and fix up branches until there
+  // is no change.
+  unsigned NoCPIters = 0, NoBRIters = 0;
+  while (true) {
+    DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
+    bool CPChange = false;
+    for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+      CPChange |= handleConstantPoolUser(i);
+    if (CPChange && ++NoCPIters > 30)
+      report_fatal_error("Constant Island pass failed to converge!");
+    DEBUG(dumpBBs());
+
+    // Clear NewWaterList now.  If we split a block for branches, it should
+    // appear as "new water" for the next iteration of constant pool placement.
+    NewWaterList.clear();
+
+    DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
+    bool BRChange = false;
+    for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+      BRChange |= fixupImmediateBr(ImmBranches[i]);
+    if (BRChange && ++NoBRIters > 30)
+      report_fatal_error("Branch Fix Up pass failed to converge!");
+    DEBUG(dumpBBs());
+
+    if (!CPChange && !BRChange)
+      break;
+    MadeChange = true;
+  }
+
+  // After a while, this might be made debug-only, but it is not expensive.
+  verify();
+
+  DEBUG(dbgs() << '\n'; dumpBBs());
+
+  BBInfo.clear();
+  WaterList.clear();
+  CPUsers.clear();
+  CPEntries.clear();
+  ImmBranches.clear();
+
+  return MadeChange;
+}
+
+/// Perform the initial placement of the constant pool entries.  To start with,
+/// we put them all at the end of the function.
+void
+AArch64ConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
+  // Create the basic block to hold the CPE's.
+  MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+  MF->push_back(BB);
+
+  // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+  unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+  // Mark the basic block as required by the const-pool.
+  // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
+  BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+
+  // The function needs to be as aligned as the basic blocks. The linker may
+  // move functions around based on their alignment.
+  MF->ensureAlignment(BB->getAlignment());
+
+  // Order the entries in BB by descending alignment.  That ensures correct
+  // alignment of all entries as long as BB is sufficiently aligned.  Keep
+  // track of the insertion point for each alignment.  We are going to bucket
+  // sort the entries as they are created.
+  SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
+
+  // Add all of the constants from the constant pool to the end block, use an
+  // identity mapping of CPI's to CPE's.
+  const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
+
+  const DataLayout &TD = *MF->getTarget().getDataLayout();
+  for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+    unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+    assert(Size >= 4 && "Too small constant pool entry");
+    unsigned Align = CPs[i].getAlignment();
+    assert(isPowerOf2_32(Align) && "Invalid alignment");
+    // Verify that all constant pool entries are a multiple of their alignment.
+    // If not, we would have to pad them out so that instructions stay aligned.
+    assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+    // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+    unsigned LogAlign = Log2_32(Align);
+    MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
+    MachineInstr *CPEMI =
+      BuildMI(*BB, InsAt, DebugLoc(), TII->get(AArch64::CONSTPOOL_ENTRY))
+        .addImm(i).addConstantPoolIndex(i).addImm(Size);
+    CPEMIs.push_back(CPEMI);
+
+    // Ensure that future entries with higher alignment get inserted before
+    // CPEMI. This is bucket sort with iterators.
+    for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
+      if (InsPoint[a] == InsAt)
+        InsPoint[a] = CPEMI;
+
+    // Add a new CPEntry, but no corresponding CPUser yet.
+    std::vector<CPEntry> CPEs;
+    CPEs.push_back(CPEntry(CPEMI, i));
+    CPEntries.push_back(CPEs);
+    ++NumCPEs;
+    DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
+                 << Size << ", align = " << Align <<'\n');
+  }
+  DEBUG(BB->dump());
+}
+
+/// Return true if the specified basic block can fallthrough into the block
+/// immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+  // Get the next machine basic block in the function.
+  MachineFunction::iterator MBBI = MBB;
+  // Can't fall off end of function.
+  if (llvm::next(MBBI) == MBB->getParent()->end())
+    return false;
+
+  MachineBasicBlock *NextBB = llvm::next(MBBI);
+  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I)
+    if (*I == NextBB)
+      return true;
+
+  return false;
+}
+
+/// Given the constpool index and CONSTPOOL_ENTRY MI, look up the corresponding
+/// CPEntry.
+AArch64ConstantIslands::CPEntry
+*AArch64ConstantIslands::findConstPoolEntry(unsigned CPI,
+                                            const MachineInstr *CPEMI) {
+  std::vector<CPEntry> &CPEs = CPEntries[CPI];
+  // Number of entries per constpool index should be small, just do a
+  // linear search.
+  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+    if (CPEs[i].CPEMI == CPEMI)
+      return &CPEs[i];
+  }
+  return NULL;
+}
+
+/// Returns the required alignment of the constant pool entry represented by
+/// CPEMI.  Alignment is measured in log2(bytes) units.
+unsigned AArch64ConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+  assert(CPEMI && CPEMI->getOpcode() == AArch64::CONSTPOOL_ENTRY);
+
+  // Everything is 4-byte aligned unless AlignConstantIslands is set.
+  if (!AlignConstantIslands)
+    return 2;
+
+  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+  unsigned Align = MCP->getConstants()[CPI].getAlignment();
+  assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+  return Log2_32(Align);
+}
+
+/// Do the initial scan of the function, building up information about the sizes
+/// of each block, the location of all the water, and finding all of the
+/// constant pool users.
+void AArch64ConstantIslands::
+initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
+  BBInfo.clear();
+  BBInfo.resize(MF->getNumBlockIDs());
+
+  // First thing, compute the size of all basic blocks, and see if the function
+  // has any inline assembly in it. If so, we have to be conservative about
+  // alignment assumptions, as we don't know for sure the size of any
+  // instructions in the inline assembly.
+  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+    computeBlockSize(I);
+
+  // The known bits of the entry block offset are determined by the function
+  // alignment.
+  BBInfo.front().KnownBits = MF->getAlignment();
+
+  // Compute block offsets and known bits.
+  adjustBBOffsetsAfter(MF->begin());
+
+  // Now go back through the instructions and build up our data structures.
+  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock &MBB = *MBBI;
+
+    // If this block doesn't fall through into the next MBB, then this is
+    // 'water' that a constant pool island could be placed.
+    if (!BBHasFallthrough(&MBB))
+      WaterList.push_back(&MBB);
+
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I) {
+      if (I->isDebugValue())
+        continue;
+
+      int Opc = I->getOpcode();
+      if (I->isBranch()) {
+        bool IsCond = false;
+
+        // The offsets encoded in instructions here scale by the instruction
+        // size (4 bytes), effectively increasing their range by 2 bits.
+        unsigned Bits = 0;
+        switch (Opc) {
+        default:
+          continue;  // Ignore other JT branches
+        case AArch64::TBZxii:
+        case AArch64::TBZwii:
+        case AArch64::TBNZxii:
+        case AArch64::TBNZwii:
+          IsCond = true;
+          Bits = 14 + 2;
+          break;
+        case AArch64::Bcc:
+        case AArch64::CBZx:
+        case AArch64::CBZw:
+        case AArch64::CBNZx:
+        case AArch64::CBNZw:
+          IsCond = true;
+          Bits = 19 + 2;
+          break;
+        case AArch64::Bimm:
+          Bits = 26 + 2;
+          break;
+        }
+
+        // Record this immediate branch.
+        ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
+      }
+
+      if (Opc == AArch64::CONSTPOOL_ENTRY)
+        continue;
+
+      // Scan the instructions for constant pool operands.
+      for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+        if (I->getOperand(op).isCPI()) {
+          // We found one.  The addressing mode tells us the max displacement
+          // from the PC that this instruction permits.
+
+          // The offsets encoded in instructions here scale by the instruction
+          // size (4 bytes), effectively increasing their range by 2 bits.
+          unsigned Bits = 0;
+
+          switch (Opc) {
+          default:
+            llvm_unreachable("Unknown addressing mode for CP reference!");
+
+          case AArch64::LDRw_lit:
+          case AArch64::LDRx_lit:
+          case AArch64::LDRs_lit:
+          case AArch64::LDRd_lit:
+          case AArch64::LDRq_lit:
+          case AArch64::LDRSWx_lit:
+          case AArch64::PRFM_lit:
+            Bits = 19 + 2;
+          }
+
+          // Remember that this is a user of a CP entry.
+          unsigned CPI = I->getOperand(op).getIndex();
+          MachineInstr *CPEMI = CPEMIs[CPI];
+          CPUsers.push_back(CPUser(I, CPEMI, Bits));
+
+          // Increment corresponding CPEntry reference count.
+          CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+          assert(CPE && "Cannot find a corresponding CPEntry!");
+          CPE->RefCount++;
+
+          // Instructions can only use one CP entry, don't bother scanning the
+          // rest of the operands.
+          break;
+        }
+    }
+  }
+}
+
+/// Compute the size and some alignment information for MBB.  This function
+/// updates BBInfo directly.
+void AArch64ConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
+  BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+  BBI.Size = 0;
+  BBI.Unalign = 0;
+
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+       ++I) {
+    BBI.Size += TII->getInstSizeInBytes(*I);
+    // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+    // The actual size may be smaller, but still a multiple of the instr size.
+    if (I->isInlineAsm())
+      BBI.Unalign = 2;
+  }
+}
+
+/// Return the current offset of the specified machine instruction from the
+/// start of the function.  This offset changes as stuff is moved around inside
+/// the function.
+unsigned AArch64ConstantIslands::getOffsetOf(MachineInstr *MI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // The offset is composed of two things: the sum of the sizes of all MBB's
+  // before this instruction's block, and the offset from the start of the block
+  // it is in.
+  unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+  // Sum instructions before MI in MBB.
+  for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
+    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+    Offset += TII->getInstSizeInBytes(*I);
+  }
+  return Offset;
+}
+
+/// Little predicate function to sort the WaterList by MBB ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+                              const MachineBasicBlock *RHS) {
+  return LHS->getNumber() < RHS->getNumber();
+}
+
+/// When a block is newly inserted into the machine function, it upsets all of
+/// the block numbers.  Renumber the blocks and update the arrays that parallel
+/// this numbering.
+void AArch64ConstantIslands::
+updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+  // Renumber the MBB's to keep them consecutive.
+  NewBB->getParent()->RenumberBlocks(NewBB);
+
+  // Insert an entry into BBInfo to align it properly with the (newly
+  // renumbered) block numbers.
+  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+  // Next, update WaterList.  Specifically, we need to add NewMBB as having
+  // available water after it.
+  water_iterator IP =
+    std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+                     CompareMBBNumbers);
+  WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch.  Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *
+AArch64ConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
+  MachineBasicBlock *OrigBB = MI->getParent();
+
+  // Create a new MBB for the code after the OrigBB.
+  MachineBasicBlock *NewBB =
+    MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+  MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+  MF->insert(MBBI, NewBB);
+
+  // Splice the instructions starting with MI over to NewBB.
+  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+  // Add an unconditional branch from OrigBB to NewBB.
+  // Note the new unconditional branch is not being recorded.
+  // There doesn't seem to be meaningful DebugInfo available; this doesn't
+  // correspond to anything in the source.
+  BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
+  ++NumSplit;
+
+  // Update the CFG.  All succs of OrigBB are now succs of NewBB.
+  NewBB->transferSuccessors(OrigBB);
+
+  // OrigBB branches to NewBB.
+  OrigBB->addSuccessor(NewBB);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  // This is almost the same as updateForInsertedWaterBlock, except that
+  // the Water goes after OrigBB, not NewBB.
+  MF->RenumberBlocks(NewBB);
+
+  // Insert an entry into BBInfo to align it properly with the (newly
+  // renumbered) block numbers.
+  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+  // Next, update WaterList.  Specifically, we need to add OrigMBB as having
+  // available water after it (but not if it's already there, which happens
+  // when splitting before a conditional branch that is followed by an
+  // unconditional branch - in that case we want to insert NewBB).
+  water_iterator IP =
+    std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+                     CompareMBBNumbers);
+  MachineBasicBlock* WaterBB = *IP;
+  if (WaterBB == OrigBB)
+    WaterList.insert(llvm::next(IP), NewBB);
+  else
+    WaterList.insert(IP, OrigBB);
+  NewWaterList.insert(OrigBB);
+
+  // Figure out how large the OrigBB is.  As the first half of the original
+  // block, it cannot contain a tablejump.  The size includes
+  // the new jump we added.  (It should be possible to do this without
+  // recounting everything, but it's very confusing, and this is rarely
+  // executed.)
+  computeBlockSize(OrigBB);
+
+  // Figure out how large the NewMBB is.  As the second half of the original
+  // block, it may contain a tablejump.
+  computeBlockSize(NewBB);
+
+  // All BBOffsets following these blocks must be modified.
+  adjustBBOffsetsAfter(OrigBB);
+
+  return NewBB;
+}
+
+/// Compute the offset of U.MI as seen by the hardware displacement computation.
+unsigned AArch64ConstantIslands::getUserOffset(CPUser &U) const {
+  return getOffsetOf(U.MI);
+}
+
+/// Checks whether UserOffset (the location of a constant pool reference) is
+/// within OffsetBits of TrialOffset (a proposed location of a constant pool
+/// entry).
+bool AArch64ConstantIslands::isOffsetInRange(unsigned UserOffset,
+                                             unsigned TrialOffset,
+                                             unsigned OffsetBits) {
+  return isIntN(OffsetBits, static_cast<int64_t>(TrialOffset) - UserOffset);
+}
+
+/// Returns true if a CPE placed after the specified Water (a basic block) will
+/// be in range for the specific MI.
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
+bool AArch64ConstantIslands::isWaterInRange(unsigned UserOffset,
+                                            MachineBasicBlock* Water, CPUser &U,
+                                            unsigned &Growth) {
+  unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+  unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+  unsigned NextBlockOffset, NextBlockAlignment;
+  MachineFunction::const_iterator NextBlock = Water;
+  if (++NextBlock == MF->end()) {
+    NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+    NextBlockAlignment = 0;
+  } else {
+    NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+    NextBlockAlignment = NextBlock->getAlignment();
+  }
+  unsigned Size = U.CPEMI->getOperand(2).getImm();
+  unsigned CPEEnd = CPEOffset + Size;
+
+  // The CPE may be able to hide in the alignment padding before the next
+  // block. It may also cause more padding to be required if it is more aligned
+  // that the next block.
+  if (CPEEnd > NextBlockOffset) {
+    Growth = CPEEnd - NextBlockOffset;
+    // Compute the padding that would go at the end of the CPE to align the next
+    // block.
+    Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+    // If the CPE is to be inserted before the instruction, that will raise
+    // the offset of the instruction. Also account for unknown alignment padding
+    // in blocks between CPE and the user.
+    if (CPEOffset < UserOffset)
+      UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+  } else
+    // CPE fits in existing padding.
+    Growth = 0;
+
+  return isOffsetInRange(UserOffset, CPEOffset, U);
+}
+
+/// Returns true if the distance between specific MI and specific ConstPool
+/// entry instruction can fit in MI's displacement field.
+bool AArch64ConstantIslands::isCPEntryInRange(MachineInstr *MI,
+                                              unsigned UserOffset,
+                                              MachineInstr *CPEMI,
+                                              unsigned OffsetBits, bool DoDump) {
+  unsigned CPEOffset  = getOffsetOf(CPEMI);
+
+  if (DoDump) {
+    DEBUG({
+      unsigned Block = MI->getParent()->getNumber();
+      const BasicBlockInfo &BBI = BBInfo[Block];
+      dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+             << " bits available=" << OffsetBits
+             << format(" insn address=%#x", UserOffset)
+             << " in BB#" << Block << ": "
+             << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+             << format("CPE address=%#x offset=%+d: ", CPEOffset,
+                       int(CPEOffset-UserOffset));
+    });
+  }
+
+  return isOffsetInRange(UserOffset, CPEOffset, OffsetBits);
+}
+
+#ifndef NDEBUG
+/// Return true of the specified basic block's only predecessor unconditionally
+/// branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+    return false;
+
+  MachineBasicBlock *Succ = *MBB->succ_begin();
+  MachineBasicBlock *Pred = *MBB->pred_begin();
+  MachineInstr *PredMI = &Pred->back();
+  if (PredMI->getOpcode() == AArch64::Bimm)
+    return PredMI->getOperand(0).getMBB() == Succ;
+  return false;
+}
+#endif // NDEBUG
+
+void AArch64ConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+  unsigned BBNum = BB->getNumber();
+  for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+    // Get the offset and known bits at the end of the layout predecessor.
+    // Include the alignment of the current block.
+    unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+    unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+    unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+    // This is where block i begins.  Stop if the offset is already correct,
+    // and we have updated 2 blocks.  This is the maximum number of blocks
+    // changed before calling this function.
+    if (i > BBNum + 2 &&
+        BBInfo[i].Offset == Offset &&
+        BBInfo[i].KnownBits == KnownBits)
+      break;
+
+    BBInfo[i].Offset = Offset;
+    BBInfo[i].KnownBits = KnownBits;
+  }
+}
+
+/// Find the constant pool entry with index CPI and instruction CPEMI, and
+/// decrement its refcount.  If the refcount becomes 0 remove the entry and
+/// instruction.  Returns true if we removed the entry, false if we didn't.
+bool AArch64ConstantIslands::decrementCPEReferenceCount(unsigned CPI,
+                                                        MachineInstr *CPEMI) {
+  // Find the old entry. Eliminate it if it is no longer used.
+  CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+  assert(CPE && "Unexpected!");
+  if (--CPE->RefCount == 0) {
+    removeDeadCPEMI(CPEMI);
+    CPE->CPEMI = NULL;
+    --NumCPEs;
+    return true;
+  }
+  return false;
+}
+
+/// See if the currently referenced CPE is in range; if not, see if an in-range
+/// clone of the CPE is in range, and if so, change the data structures so the
+/// user references the clone.  Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int AArch64ConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
+{
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+
+  // Check to see if the CPE is already in-range.
+  if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getOffsetBits(), true)) {
+    DEBUG(dbgs() << "In range\n");
+    return 1;
+  }
+
+  // No.  Look for previously created clones of the CPE that are in range.
+  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  std::vector<CPEntry> &CPEs = CPEntries[CPI];
+  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+    // We already tried this one
+    if (CPEs[i].CPEMI == CPEMI)
+      continue;
+    // Removing CPEs can leave empty entries, skip
+    if (CPEs[i].CPEMI == NULL)
+      continue;
+    if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getOffsetBits())) {
+      DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
+                   << CPEs[i].CPI << "\n");
+      // Point the CPUser node to the replacement
+      U.CPEMI = CPEs[i].CPEMI;
+      // Change the CPI in the instruction operand to refer to the clone.
+      for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+        if (UserMI->getOperand(j).isCPI()) {
+          UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+          break;
+        }
+      // Adjust the refcount of the clone...
+      CPEs[i].RefCount++;
+      // ...and the original.  If we didn't remove the old entry, none of the
+      // addresses changed, so we don't need another pass.
+      return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
+    }
+  }
+  return 0;
+}
+
+/// Look for an existing entry in the WaterList in which we can place the CPE
+/// referenced from U so it's within range of U's MI.  Returns true if found,
+/// false if not.  If it returns true, WaterIter is set to the WaterList
+/// entry. To ensure that this pass terminates, the CPE location for a
+/// particular CPUser is only allowed to move to a lower address, so search
+/// backward from the end of the list and prefer the first water that is in
+/// range.
+bool AArch64ConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
+                                                water_iterator &WaterIter) {
+  if (WaterList.empty())
+    return false;
+
+  unsigned BestGrowth = ~0u;
+  for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+       --IP) {
+    MachineBasicBlock* WaterBB = *IP;
+    // Check if water is in range and is either at a lower address than the
+    // current "high water mark" or a new water block that was created since
+    // the previous iteration by inserting an unconditional branch.  In the
+    // latter case, we want to allow resetting the high water mark back to
+    // this new water since we haven't seen it before.  Inserting branches
+    // should be relatively uncommon and when it does happen, we want to be
+    // sure to take advantage of it for all the CPEs near that block, so that
+    // we don't insert more branches than necessary.
+    unsigned Growth;
+    if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
+        (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+         NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+      // This is the least amount of required padding seen so far.
+      BestGrowth = Growth;
+      WaterIter = IP;
+      DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+                   << " Growth=" << Growth << '\n');
+
+      // Keep looking unless it is perfect.
+      if (BestGrowth == 0)
+        return true;
+    }
+    if (IP == B)
+      break;
+  }
+  return BestGrowth != ~0u;
+}
+
+/// No existing WaterList entry will work for CPUsers[CPUserIndex], so create a
+/// place to put the CPE.  The end of the block is used if in range, and the
+/// conditional branch munged so control flow is correct.  Otherwise the block
+/// is split to create a hole with an unconditional branch around it.  In either
+/// case NewMBB is set to a block following which the new island can be inserted
+/// (the WaterList is not adjusted).
+void AArch64ConstantIslands::createNewWater(unsigned CPUserIndex,
+                                            unsigned UserOffset,
+                                            MachineBasicBlock *&NewMBB) {
+  CPUser &U = CPUsers[CPUserIndex];
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+  unsigned CPELogAlign = getCPELogAlign(CPEMI);
+  MachineBasicBlock *UserMBB = UserMI->getParent();
+  const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
+
+  // If the block does not end in an unconditional branch already, and if the
+  // end of the block is within range, make new water there.
+  if (BBHasFallthrough(UserMBB)) {
+    // Size of branch to insert.
+    unsigned InstrSize = 4;
+    // Compute the offset where the CPE will begin.
+    unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + InstrSize;
+
+    if (isOffsetInRange(UserOffset, CPEOffset, U)) {
+      DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+            << format(", expected CPE offset %#x\n", CPEOffset));
+      NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+      // Add an unconditional branch from UserMBB to fallthrough block.  Record
+      // it for branch lengthening; this new branch will not get out of range,
+      // but if the preceding conditional branch is out of range, the targets
+      // will be exchanged, and the altered branch may be out of range, so the
+      // machinery has to know about it.
+      BuildMI(UserMBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewMBB);
+
+      // 26 bits written down, specifying a multiple of 4.
+      unsigned OffsetBits = 26 + 2;
+      ImmBranches.push_back(ImmBranch(&UserMBB->back(), OffsetBits, false));
+      BBInfo[UserMBB->getNumber()].Size += InstrSize;
+      adjustBBOffsetsAfter(UserMBB);
+      return;
+    }
+  }
+
+  // What a big block.  Find a place within the block to split it.  We make a
+  // first guess, then walk through the instructions between the one currently
+  // being looked at and the possible insertion point, and make sure any other
+  // instructions that reference CPEs will be able to use the same island area;
+  // if not, we back up the insertion point.
+
+  // Try to split the block so it's fully aligned.  Compute the latest split
+  // point where we can add a 4-byte branch instruction, and then align to
+  // LogAlign which is the largest possible alignment in the function.
+  unsigned LogAlign = MF->getAlignment();
+  assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+  unsigned KnownBits = UserBBI.internalKnownBits();
+  unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+  unsigned BaseInsertOffset = UserOffset + U.getMaxPosDisp() - UPad;
+  DEBUG(dbgs() << format("Split in middle of big block before %#x",
+                         BaseInsertOffset));
+
+  // The 4 in the following is for the unconditional branch we'll be inserting
+  // Alignment of the island is handled inside isOffsetInRange.
+  BaseInsertOffset -= 4;
+
+  DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+               << " la=" << LogAlign
+               << " kb=" << KnownBits
+               << " up=" << UPad << '\n');
+
+  // This could point off the end of the block if we've already got constant
+  // pool entries following this block; only the last one is in the water list.
+  // Back past any possible branches (allow for a conditional and a maximally
+  // long unconditional).
+  if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
+    BaseInsertOffset = UserBBI.postOffset() - UPad - 8;
+    DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
+  }
+  unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
+    CPEMI->getOperand(2).getImm();
+  MachineBasicBlock::iterator MI = UserMI;
+  ++MI;
+  unsigned CPUIndex = CPUserIndex+1;
+  unsigned NumCPUsers = CPUsers.size();
+  for (unsigned Offset = UserOffset+TII->getInstSizeInBytes(*UserMI);
+       Offset < BaseInsertOffset;
+       Offset += TII->getInstSizeInBytes(*MI),
+       MI = llvm::next(MI)) {
+    assert(MI != UserMBB->end() && "Fell off end of block");
+    if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+      CPUser &U = CPUsers[CPUIndex];
+      if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
+        // Shift intertion point by one unit of alignment so it is within reach.
+        BaseInsertOffset -= 1u << LogAlign;
+        EndInsertOffset  -= 1u << LogAlign;
+      }
+      // This is overly conservative, as we don't account for CPEMIs being
+      // reused within the block, but it doesn't matter much.  Also assume CPEs
+      // are added in order with alignment padding.  We may eventually be able
+      // to pack the aligned CPEs better.
+      EndInsertOffset += U.CPEMI->getOperand(2).getImm();
+      CPUIndex++;
+    }
+  }
+
+  --MI;
+  NewMBB = splitBlockBeforeInstr(MI);
+}
+
+/// Analyze the specified user, checking to see if it is out-of-range.  If so,
+/// pick up the constant pool value and move it some place in-range.  Return
+/// true if we changed any addresses, false otherwise.
+bool AArch64ConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
+  CPUser &U = CPUsers[CPUserIndex];
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  unsigned Size = CPEMI->getOperand(2).getImm();
+  // Compute this only once, it's expensive.
+  unsigned UserOffset = getUserOffset(U);
+
+  // See if the current entry is within range, or there is a clone of it
+  // in range.
+  int result = findInRangeCPEntry(U, UserOffset);
+  if (result==1) return false;
+  else if (result==2) return true;
+
+  // No existing clone of this CPE is within range.
+  // We will be generating a new clone.  Get a UID for it.
+  unsigned ID = AFI->createPICLabelUId();
+
+  // Look for water where we can place this CPE.
+  MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
+  MachineBasicBlock *NewMBB;
+  water_iterator IP;
+  if (findAvailableWater(U, UserOffset, IP)) {
+    DEBUG(dbgs() << "Found water in range\n");
+    MachineBasicBlock *WaterBB = *IP;
+
+    // If the original WaterList entry was "new water" on this iteration,
+    // propagate that to the new island.  This is just keeping NewWaterList
+    // updated to match the WaterList, which will be updated below.
+    if (NewWaterList.count(WaterBB)) {
+      NewWaterList.erase(WaterBB);
+      NewWaterList.insert(NewIsland);
+    }
+    // The new CPE goes before the following block (NewMBB).
+    NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+
+  } else {
+    // No water found.
+    DEBUG(dbgs() << "No water found\n");
+    createNewWater(CPUserIndex, UserOffset, NewMBB);
+
+    // splitBlockBeforeInstr adds to WaterList, which is important when it is
+    // called while handling branches so that the water will be seen on the
+    // next iteration for constant pools, but in this context, we don't want
+    // it.  Check for this so it will be removed from the WaterList.
+    // Also remove any entry from NewWaterList.
+    MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+    IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+    if (IP != WaterList.end())
+      NewWaterList.erase(WaterBB);
+
+    // We are adding new water.  Update NewWaterList.
+    NewWaterList.insert(NewIsland);
+  }
+
+  // Remove the original WaterList entry; we want subsequent insertions in
+  // this vicinity to go after the one we're about to insert.  This
+  // considerably reduces the number of times we have to move the same CPE
+  // more than once and is also important to ensure the algorithm terminates.
+  if (IP != WaterList.end())
+    WaterList.erase(IP);
+
+  // Okay, we know we can put an island before NewMBB now, do it!
+  MF->insert(NewMBB, NewIsland);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  updateForInsertedWaterBlock(NewIsland);
+
+  // Decrement the old entry, and remove it if refcount becomes 0.
+  decrementCPEReferenceCount(CPI, CPEMI);
+
+  // Now that we have an island to add the CPE to, clone the original CPE and
+  // add it to the island.
+  U.HighWaterMark = NewIsland;
+  U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(AArch64::CONSTPOOL_ENTRY))
+                .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+  CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+  ++NumCPEs;
+
+  // Mark the basic block as aligned as required by the const-pool entry.
+  NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
+  // Increase the size of the island block to account for the new entry.
+  BBInfo[NewIsland->getNumber()].Size += Size;
+  adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
+
+  // Finally, change the CPI in the instruction operand to be ID.
+  for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+    if (UserMI->getOperand(i).isCPI()) {
+      UserMI->getOperand(i).setIndex(ID);
+      break;
+    }
+
+  DEBUG(dbgs() << "  Moved CPE to #" << ID << " CPI=" << CPI
+        << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
+
+  return true;
+}
+
+/// Remove a dead constant pool entry instruction. Update sizes and offsets of
+/// impacted basic blocks.
+void AArch64ConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
+  MachineBasicBlock *CPEBB = CPEMI->getParent();
+  unsigned Size = CPEMI->getOperand(2).getImm();
+  CPEMI->eraseFromParent();
+  BBInfo[CPEBB->getNumber()].Size -= Size;
+  // All succeeding offsets have the current size value added in, fix this.
+  if (CPEBB->empty()) {
+    BBInfo[CPEBB->getNumber()].Size = 0;
+
+    // This block no longer needs to be aligned. <rdar://problem/10534709>.
+    CPEBB->setAlignment(0);
+  } else
+    // Entries are sorted by descending alignment, so realign from the front.
+    CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+  adjustBBOffsetsAfter(CPEBB);
+  // An island has only one predecessor BB and one successor BB. Check if
+  // this BB's predecessor jumps directly to this BB's successor. This
+  // shouldn't happen currently.
+  assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+  // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// Remove constant pool entries whose refcounts are zero.
+bool AArch64ConstantIslands::removeUnusedCPEntries() {
+  unsigned MadeChange = false;
+  for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+      std::vector<CPEntry> &CPEs = CPEntries[i];
+      for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+        if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+          removeDeadCPEMI(CPEs[j].CPEMI);
+          CPEs[j].CPEMI = NULL;
+          MadeChange = true;
+        }
+      }
+  }
+  return MadeChange;
+}
+
+/// Returns true if the distance between specific MI and specific BB can fit in
+/// MI's displacement field.
+bool AArch64ConstantIslands::isBBInRange(MachineInstr *MI,
+                                         MachineBasicBlock *DestBB,
+                                         unsigned OffsetBits) {
+  int64_t BrOffset   = getOffsetOf(MI);
+  int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+  DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
+               << " from BB#" << MI->getParent()->getNumber()
+               << " bits available=" << OffsetBits
+               << " from " << getOffsetOf(MI) << " to " << DestOffset
+               << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+  return isIntN(OffsetBits, DestOffset - BrOffset);
+}
+
+/// Fix up an immediate branch whose destination is too far away to fit in its
+/// displacement field.
+bool AArch64ConstantIslands::fixupImmediateBr(ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *DestBB = 0;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    if (MI->getOperand(i).isMBB()) {
+      DestBB = MI->getOperand(i).getMBB();
+      break;
+    }
+  }
+  assert(DestBB && "Branch with no destination BB?");
+
+  // Check to see if the DestBB is already in-range.
+  if (isBBInRange(MI, DestBB, Br.OffsetBits))
+    return false;
+
+  assert(Br.IsCond && "Only conditional branches should need fixup");
+  return fixupConditionalBr(Br);
+}
+
+/// Fix up a conditional branch whose destination is too far away to fit in its
+/// displacement field. It is converted to an inverse conditional branch + an
+/// unconditional branch to the destination.
+bool
+AArch64ConstantIslands::fixupConditionalBr(ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *MBB = MI->getParent();
+  unsigned CondBrMBBOperand = 0;
+
+  // The general idea is to add an unconditional branch to the destination and
+  // invert the conditional branch to jump over it. Complications occur around
+  // fallthrough and unreachable ends to the block.
+  //   b.lt L1
+  //   =>
+  //   b.ge L2
+  //   b   L1
+  // L2:
+
+  // First we invert the conditional branch, by creating a replacement if
+  // necessary. This if statement contains all the special handling of different
+  // branch types.
+  if (MI->getOpcode() == AArch64::Bcc) {
+    // The basic block is operand number 1 for Bcc
+    CondBrMBBOperand = 1;
+
+    A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
+    CC = A64InvertCondCode(CC);
+    MI->getOperand(0).setImm(CC);
+  } else {
+    MachineInstrBuilder InvertedMI;
+    int InvertedOpcode;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("Unknown branch type");
+    case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
+    case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
+    case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
+    case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
+    case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
+    case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
+    case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
+    case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
+    }
+
+    InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
+    for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
+      InvertedMI.addOperand(MI->getOperand(i));
+      if (MI->getOperand(i).isMBB())
+        CondBrMBBOperand = i;
+    }
+
+    MI->eraseFromParent();
+    MI = Br.MI = InvertedMI;
+  }
+
+  // If the branch is at the end of its MBB and that has a fall-through block,
+  // direct the updated conditional branch to the fall-through
+  // block. Otherwise, split the MBB before the next instruction.
+  MachineInstr *BMI = &MBB->back();
+  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+  ++NumCBrFixed;
+  if (BMI != MI) {
+    if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+        BMI->getOpcode() == AArch64::Bimm) {
+      // Last MI in the BB is an unconditional branch. We can swap destinations:
+      // b.eq L1 (temporarily b.ne L1 after first change)
+      // b   L2
+      // =>
+      // b.ne L2
+      // b   L1
+      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+      if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
+        DEBUG(dbgs() << "  Invert Bcc condition and swap its destination with "
+                     << *BMI);
+        MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
+        BMI->getOperand(0).setMBB(DestBB);
+        MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
+        return true;
+      }
+    }
+  }
+
+  if (NeedSplit) {
+    MachineBasicBlock::iterator MBBI = MI; ++MBBI;
+    splitBlockBeforeInstr(MBBI);
+    // No need for the branch to the next block. We're adding an unconditional
+    // branch to the destination.
+    int delta = TII->getInstSizeInBytes(MBB->back());
+    BBInfo[MBB->getNumber()].Size -= delta;
+    MBB->back().eraseFromParent();
+    // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
+  }
+
+  // After splitting and removing the unconditional branch from the original BB,
+  // the structure is now:
+  // oldbb:
+  //   [things]
+  //   b.invertedCC L1
+  // splitbb/fallthroughbb:
+  //   [old b L2/real continuation]
+  //
+  // We now have to change the conditional branch to point to splitbb and add an
+  // unconditional branch after it to L1, giving the final structure:
+  // oldbb:
+  //   [things]
+  //   b.invertedCC splitbb
+  //   b L1
+  // splitbb/fallthroughbb:
+  //   [old b L2/real continuation]
+  MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+  DEBUG(dbgs() << "  Insert B to BB#"
+               << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
+               << " also invert condition and change dest. to BB#"
+               << NextBB->getNumber() << "\n");
+
+  // Insert a new unconditional branch and fixup the destination of the
+  // conditional one.  Also update the ImmBranch as well as adding a new entry
+  // for the new branch.
+  BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
+    .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
+  MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
+
+  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
+
+  // 26 bits written down in Bimm, specifying a multiple of 4.
+  unsigned OffsetBits = 26 + 2;
+  ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
+
+  adjustBBOffsetsAfter(MBB);
+  return true;
+}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
new file mode 100644
index 0000000000..2301114219
--- /dev/null
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -0,0 +1,644 @@
+//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64InstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+void AArch64FrameLowering::splitSPAdjustments(uint64_t Total,
+                                              uint64_t &Initial,
+                                              uint64_t &Residual) const {
+  // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP
+  // instructions have a 7-bit signed immediate scaled by 8, giving a reach of
+  // 0x1f8, but stack adjustment should always be a multiple of 16.
+  if (Total <= 0x1f0) {
+    Initial = Total;
+    Residual = 0;
+  } else {
+    Initial = 0x1f0;
+    Residual = Total - Initial;
+  }
+}
+
+void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
+  AArch64MachineFunctionInfo *FuncInfo =
+    MF.getInfo<AArch64MachineFunctionInfo>();
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  MachineModuleInfo &MMI = MF.getMMI();
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  bool NeedsFrameMoves = MMI.hasDebugInfo()
+    || MF.getFunction()->needsUnwindTableEntry();
+
+  uint64_t NumInitialBytes, NumResidualBytes;
+
+  // Currently we expect the stack to be laid out by
+  //     sub sp, sp, #initial
+  //     stp x29, x30, [sp, #offset]
+  //     ...
+  //     str xxx, [sp, #offset]
+  //     sub sp, sp, #rest (possibly via extra instructions).
+  if (MFI->getCalleeSavedInfo().size()) {
+    // If there are callee-saved registers, we want to store them efficiently as
+    // a block, and virtual base assignment happens too early to do it for us so
+    // we adjust the stack in two phases: first just for callee-saved fiddling,
+    // then to allocate the rest of the frame.
+    splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
+  } else {
+    // If there aren't any callee-saved registers, two-phase adjustment is
+    // inefficient. It's more efficient to adjust with NumInitialBytes too
+    // because when we're in a "callee pops argument space" situation, that pop
+    // must be tacked onto Initial for correctness.
+    NumInitialBytes = MFI->getStackSize();
+    NumResidualBytes = 0;
+  }
+
+  // Tell everyone else how much adjustment we're expecting them to use. In
+  // particular if an adjustment is required for a tail call the epilogue could
+  // have a different view of things.
+  FuncInfo->setInitialStackAdjust(NumInitialBytes);
+
+  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
+               MachineInstr::FrameSetup);
+
+  if (NeedsFrameMoves && NumInitialBytes) {
+    // We emit this update even if the CFA is set from a frame pointer later so
+    // that the CFA is valid in the interim.
+    MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+      .addSym(SPLabel);
+
+    MachineLocation Dst(MachineLocation::VirtualFP);
+    MachineLocation Src(AArch64::XSP, NumInitialBytes);
+    Moves.push_back(MachineMove(SPLabel, Dst, Src));
+  }
+
+  // Otherwise we need to set the frame pointer and/or add a second stack
+  // adjustment.
+
+  bool FPNeedsSetting = hasFP(MF);
+  for (; MBBI != MBB.end(); ++MBBI) {
+    // Note that this search makes strong assumptions about the operation used
+    // to store the frame-pointer: it must be "STP x29, x30, ...". This could
+    // change in future, but until then there's no point in implementing
+    // untestable more generic cases.
+    if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
+                       && MBBI->getOperand(0).getReg() == AArch64::X29) {
+      int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
+      FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));
+
+      ++MBBI;
+      emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
+                    AArch64::X29,
+                    NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
+                    MachineInstr::FrameSetup);
+
+      // The offset adjustment used when emitting debugging locations relative
+      // to whatever frame base is set. AArch64 uses the default frame base (FP
+      // or SP) and this adjusts the calculations to be correct.
+      MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
+                               - MFI->getStackSize());
+
+      if (NeedsFrameMoves) {
+        MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol();
+        BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+          .addSym(FPLabel);
+        MachineLocation Dst(MachineLocation::VirtualFP);
+        MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx));
+        Moves.push_back(MachineMove(FPLabel, Dst, Src));
+      }
+
+      FPNeedsSetting = false;
+    }
+
+    if (!MBBI->getFlag(MachineInstr::FrameSetup))
+      break;
+  }
+
+  assert(!FPNeedsSetting && "Frame pointer couldn't be set");
+
+  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
+               MachineInstr::FrameSetup);
+
+  // Now we emit the rest of the frame setup information, if necessary: we've
+  // already noted the FP and initial SP moves so we're left with the prologue's
+  // final SP update and callee-saved register locations.
+  if (!NeedsFrameMoves)
+    return;
+
+  // Reuse the label if appropriate, so create it in this outer scope.
+  MCSymbol *CSLabel = 0;
+
+  // The rest of the stack adjustment
+  if (!hasFP(MF) && NumResidualBytes) {
+    CSLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+      .addSym(CSLabel);
+
+    MachineLocation Dst(MachineLocation::VirtualFP);
+    MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes);
+    Moves.push_back(MachineMove(CSLabel, Dst, Src));
+  }
+
+  // And any callee-saved registers (it's fine to leave them to the end here,
+  // because the old values are still valid at this point.
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  if (CSI.size()) {
+    if (!CSLabel) {
+      CSLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+        .addSym(CSLabel);
+    }
+
+    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+           E = CSI.end(); I != E; ++I) {
+      MachineLocation Dst(MachineLocation::VirtualFP, MFI->getObjectOffset(I->getFrameIdx()));
+      MachineLocation Src(I->getReg());
+      Moves.push_back(MachineMove(CSLabel, Dst, Src));
+    }
+  }
+}
+
+void
+AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  AArch64MachineFunctionInfo *FuncInfo =
+    MF.getInfo<AArch64MachineFunctionInfo>();
+
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  DebugLoc DL = MBBI->getDebugLoc();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned RetOpcode = MBBI->getOpcode();
+
+  // Initial and residual are named for consitency with the prologue. Note that
+  // in the epilogue, the residual adjustment is executed first.
+  uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
+  uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
+  uint64_t ArgumentPopSize = 0;
+  if (RetOpcode == AArch64::TC_RETURNdi ||
+      RetOpcode == AArch64::TC_RETURNxi) {
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    MachineOperand &StackAdjust = MBBI->getOperand(1);
+
+    MachineInstrBuilder MIB;
+    if (RetOpcode == AArch64::TC_RETURNdi) {
+      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
+      if (JumpTarget.isGlobal()) {
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      } else {
+        assert(JumpTarget.isSymbol() && "unexpected tail call destination");
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
+    } else {
+      assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
+             && "Unexpected tail call");
+
+      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
+      MIB.addReg(JumpTarget.getReg(), RegState::Kill);
+    }
+
+    // Add the extra operands onto the new tail call instruction even though
+    // they're not used directly (so that liveness is tracked properly etc).
+    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+        MIB->addOperand(MBBI->getOperand(i));
+
+
+    // Delete the pseudo instruction TC_RETURN.
+    MachineInstr *NewMI = prior(MBBI);
+    MBB.erase(MBBI);
+    MBBI = NewMI;
+
+    // For a tail-call in a callee-pops-arguments environment, some or all of
+    // the stack may actually be in use for the call's arguments, this is
+    // calculated during LowerCall and consumed here...
+    ArgumentPopSize = StackAdjust.getImm();
+  } else {
+    // ... otherwise the amount to pop is *all* of the argument space,
+    // conveniently stored in the MachineFunctionInfo by
+    // LowerFormalArguments. This will, of course, be zero for the C calling
+    // convention.
+    ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
+  }
+
+  assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
+         && "refusing to adjust stack by misaligned amt");
+
+  // We may need to address callee-saved registers differently, so find out the
+  // bound on the frame indices.
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+  int MinCSFI = 0;
+  int MaxCSFI = -1;
+
+  if (CSI.size()) {
+    MinCSFI = CSI[0].getFrameIdx();
+    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+  }
+
+  // The "residual" stack update comes first from this direction and guarantees
+  // that SP is NumInitialBytes below its value on function entry, either by a
+  // direct update or restoring it from the frame pointer.
+  if (NumInitialBytes + ArgumentPopSize != 0) {
+    emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
+                 NumInitialBytes + ArgumentPopSize);
+    --MBBI;
+  }
+
+
+  // MBBI now points to the instruction just past the last callee-saved
+  // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
+  // otherwise).
+
+  // Now we need to find out where to put the bulk of the stack adjustment
+  MachineBasicBlock::iterator FirstEpilogue = MBBI;
+  while (MBBI != MBB.begin()) {
+    --MBBI;
+
+    unsigned FrameOp;
+    for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
+      if (MBBI->getOperand(FrameOp).isFI())
+        break;
+    }
+
+    // If this instruction doesn't have a frame index we've reached the end of
+    // the callee-save restoration.
+    if (FrameOp == MBBI->getNumOperands())
+      break;
+
+    // Likewise if it *is* a local reference, but not to a callee-saved object.
+    int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
+    if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
+      break;
+
+    FirstEpilogue = MBBI;
+  }
+
+  if (MF.getFrameInfo()->hasVarSizedObjects()) {
+    int64_t StaticFrameBase;
+    StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
+    emitRegUpdate(MBB, FirstEpilogue, DL, TII,
+                  AArch64::XSP, AArch64::X29, AArch64::NoRegister,
+                  StaticFrameBase);
+  } else {
+    emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
+  }
+}
+
+int64_t
+AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
+                                                 int FrameIndex,
+                                                 unsigned &FrameReg,
+                                                 int SPAdj,
+                                                 bool IsCalleeSaveOp) const {
+  AArch64MachineFunctionInfo *FuncInfo =
+    MF.getInfo<AArch64MachineFunctionInfo>();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex);
+
+  assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0)
+         && "callee-saved register in unexpected place");
+
+  // If the frame for this function is particularly large, we adjust the stack
+  // in two phases which means the callee-save related operations see a
+  // different (intermediate) stack size.
+  int64_t FrameRegPos;
+  if (IsCalleeSaveOp) {
+    FrameReg = AArch64::XSP;
+    FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust());
+  } else if (useFPForAddressing(MF)) {
+    // Have to use the frame pointer since we have no idea where SP is.
+    FrameReg = AArch64::X29;
+    FrameRegPos = FuncInfo->getFramePointerOffset();
+  } else {
+    FrameReg = AArch64::XSP;
+    FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj;
+  }
+
+  return TopOfFrameOffset - FrameRegPos;
+}
+
+/// Estimate and return the size of the frame.
+static unsigned estimateStackSize(MachineFunction &MF) {
+  // FIXME: Make generic? Really consider after upstreaming.  This code is now
+  // shared between PEI, ARM *and* here.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+  int Offset = 0;
+
+  // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+  // It really should be refactored to share code. Until then, changes
+  // should keep in mind that there's tight coupling between the two.
+
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -MFI->getObjectOffset(i);
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (MFI->isDeadObjectIndex(i))
+      continue;
+    Offset += MFI->getObjectSize(i);
+    unsigned Align = MFI->getObjectAlignment(i);
+    // Adjust to alignment boundary
+    Offset = (Offset+Align-1)/Align*Align;
+
+    MaxAlign = std::max(Align, MaxAlign);
+  }
+
+  if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
+    Offset += MFI->getMaxCallFrameSize();
+
+  // Round up the size to a multiple of the alignment.  If the function has
+  // any calls or alloca's, align to the target's StackAlignment value to
+  // ensure that the callee's frame or the alloca data is suitably aligned;
+  // otherwise, for leaf functions, align to the TransientStackAlignment
+  // value.
+  unsigned StackAlign;
+  if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+      (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
+    StackAlign = TFI->getStackAlignment();
+  else
+    StackAlign = TFI->getTransientStackAlignment();
+
+  // If the frame pointer is eliminated, all frame offsets will be relative to
+  // SP not FP. Align to MaxAlign so this works.
+  StackAlign = std::max(StackAlign, MaxAlign);
+  unsigned AlignMask = StackAlign - 1;
+  Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+  return (unsigned)Offset;
+}
+
+void
+AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                       RegScavenger *RS) const {
+  const AArch64RegisterInfo *RegInfo =
+    static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo());
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const AArch64InstrInfo &TII =
+    *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+
+  if (hasFP(MF)) {
+    MF.getRegInfo().setPhysRegUsed(AArch64::X29);
+    MF.getRegInfo().setPhysRegUsed(AArch64::X30);
+  }
+
+  // If addressing of local variables is going to be more complicated than
+  // shoving a base register and an offset into the instruction then we may well
+  // need to scavenge registers. We should either specifically add an
+  // callee-save register for this purpose or allocate an extra spill slot.
+
+  bool BigStack =
+    (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+    || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
+    || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
+
+  if (!BigStack)
+    return;
+
+  // We certainly need some slack space for the scavenger, preferably an extra
+  // register.
+  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+  uint16_t ExtraReg = AArch64::NoRegister;
+
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
+        !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
+      ExtraReg = CSRegs[i];
+      break;
+    }
+  }
+
+  if (ExtraReg != 0) {
+    MF.getRegInfo().setPhysRegUsed(ExtraReg);
+  } else {
+    // Create a stack slot for scavenging purposes. PrologEpilogInserter
+    // helpfully places it near either SP or FP for us to avoid
+    // infinitely-regression during scavenging.
+    const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
+    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment(),
+                                                       false));
+  }
+}
+
+bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB,
+                                                  unsigned Reg) const {
+  // If @llvm.returnaddress is called then it will refer to X30 by some means;
+  // the prologue store does not kill the register.
+  if (Reg == AArch64::X30) {
+    if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken()
+        && MBB.getParent()->getRegInfo().isLiveIn(Reg))
+    return false;
+  }
+
+  // In all other cases, physical registers are dead after they've been saved
+  // but live at the beginning of the prologue block.
+  MBB.addLiveIn(Reg);
+  return true;
+}
+
+void
+AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      const std::vector<CalleeSavedInfo> &CSI,
+                                      const TargetRegisterInfo *TRI,
+                                      LoadStoreMethod PossClasses[],
+                                      unsigned NumClasses) const {
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  // A certain amount of implicit contract is present here. The actual stack
+  // offsets haven't been allocated officially yet, so for strictly correct code
+  // we rely on the fact that the elements of CSI are allocated in order
+  // starting at SP, purely as dictated by size and alignment. In practice since
+  // this function handles the only accesses to those slots it's not quite so
+  // important.
+  //
+  // We have also ordered the Callee-saved register list in AArch64CallingConv
+  // so that the above scheme puts registers in order: in particular we want
+  // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2)
+  for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+
+    // First we need to find out which register class the register belongs to so
+    // that we can use the correct load/store instrucitons.
+    unsigned ClassIdx;
+    for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) {
+      if (PossClasses[ClassIdx].RegClass->contains(Reg))
+        break;
+    }
+    assert(ClassIdx != NumClasses
+           && "Asked to store register in unexpected class");
+    const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass;
+
+    // Now we need to decide whether it's possible to emit a paired instruction:
+    // for this we want the next register to be in the same class.
+    MachineInstrBuilder NewMI;
+    bool Pair = false;
+    if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) {
+      Pair = true;
+      unsigned StLow = 0, StHigh = 0;
+      if (isPrologue) {
+        // Most of these registers will be live-in to the MBB and killed by our
+        // store, though there are exceptions (see determinePrologueDeath).
+        StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg()));
+        StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+      } else {
+        StLow = RegState::Define;
+        StHigh = RegState::Define;
+      }
+
+      NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode))
+                .addReg(CSI[i+1].getReg(), StLow)
+                .addReg(CSI[i].getReg(), StHigh);
+
+      // If it's a paired op, we've consumed two registers
+      ++i;
+    } else {
+      unsigned State;
+      if (isPrologue) {
+        State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+      } else {
+        State = RegState::Define;
+      }
+
+      NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].SingleOpcode))
+                .addReg(CSI[i].getReg(), State);
+    }
+
+    // Note that the FrameIdx refers to the second register in a pair: it will
+    // be allocated the smaller numeric address and so is the one an LDP/STP
+    // address must use.
+    int FrameIdx = CSI[i].getFrameIdx();
+    MachineMemOperand::MemOperandFlags Flags;
+    Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                              Flags,
+                              Pair ? TheClass.getSize() * 2 : TheClass.getSize(),
+                              MFI.getObjectAlignment(FrameIdx));
+
+    NewMI.addFrameIndex(FrameIdx)
+      .addImm(0)                  // address-register offset
+      .addMemOperand(MMO);
+
+    if (isPrologue)
+      NewMI.setMIFlags(MachineInstr::FrameSetup);
+
+    // For aesthetic reasons, during an epilogue we want to emit complementary
+    // operations to the prologue, but in the opposite order. So we still
+    // iterate through the CalleeSavedInfo list in order, but we put the
+    // instructions successively earlier in the MBB.
+    if (!isPrologue)
+      --MBBI;
+  }
+}
+
+bool
+AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MBBI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  static LoadStoreMethod PossibleClasses[] = {
+    {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR},
+    {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR},
+  };
+  unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+  emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI,
+                  PossibleClasses, NumClasses);
+
+  return true;
+}
+
+bool
+AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MBBI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+
+  if (CSI.empty())
+    return false;
+
+  static LoadStoreMethod PossibleClasses[] = {
+    {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR},
+    {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR},
+  };
+  unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+  emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI,
+                  PossibleClasses, NumClasses);
+
+  return true;
+}
+
+bool
+AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+  // This is a decision of ABI compliance. The AArch64 PCS gives various options
+  // for conformance, and even at the most stringent level more or less permits
+  // elimination for leaf functions because there's no loss of functionality
+  // (for debugging etc)..
+  if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls())
+    return true;
+
+  // The following are hard-limits: incorrect code will be generated if we try
+  // to omit the frame.
+  return (RI->needsStackRealignment(MF) ||
+          MFI->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken());
+}
+
+bool
+AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const {
+  return MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+bool
+AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Of the various reasons for having a frame pointer, it's actually only
+  // variable-sized objects that prevent reservation of a call frame.
+  return !(hasFP(MF) && MFI->hasVarSizedObjects());
+}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
new file mode 100644
index 0000000000..dfa66ec236
--- /dev/null
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -0,0 +1,103 @@
+//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_FRAMEINFO_H
+#define LLVM_AARCH64_FRAMEINFO_H
+
+#include "AArch64Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class AArch64Subtarget;
+
+class AArch64FrameLowering : public TargetFrameLowering {
+private:
+  // In order to unify the spilling and restoring of callee-saved registers into
+  // emitFrameMemOps, we need to be able to specify which instructions to use
+  // for the relevant memory operations on each register class. An array of the
+  // following struct is populated and passed in to achieve this.
+  struct LoadStoreMethod {
+    const TargetRegisterClass *RegClass; // E.g. GPR64RegClass
+
+    // The preferred instruction. 
+    unsigned PairOpcode; // E.g. LSPair64_STR
+
+    // Sometimes only a single register can be handled at once.
+    unsigned SingleOpcode; // E.g. LS64_STR
+  };
+protected:
+  const AArch64Subtarget &STI;
+
+public:
+  explicit AArch64FrameLowering(const AArch64Subtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16),
+      STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  virtual void emitPrologue(MachineFunction &MF) const;
+  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  /// Decides how much stack adjustment to perform in each phase of the prologue
+  /// and epilogue.
+  void splitSPAdjustments(uint64_t Total, uint64_t &Initial,
+                          uint64_t &Residual) const;
+
+  int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex,
+                                     unsigned &FrameReg, int SPAdj,
+                                     bool IsCalleeSaveOp) const;
+
+  virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                    RegScavenger *RS) const;
+
+  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const;
+  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const;
+
+  /// If the register is X30 (i.e. LR) and the return address is used in the
+  /// function then the callee-save store doesn't actually kill the register,
+  /// otherwise it does.
+  bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const;
+
+  /// This function emits the loads or stores required during prologue and
+  /// epilogue as efficiently as possible.
+  ///
+  /// The operations involved in setting up and tearing down the frame are
+  /// similar enough to warrant a shared function, particularly as discrepancies
+  /// between the two would be disastrous.
+  void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MI,
+                       const std::vector<CalleeSavedInfo> &CSI,
+                       const TargetRegisterInfo *TRI,
+                       LoadStoreMethod PossibleClasses[],
+                       unsigned NumClasses) const;
+
+
+  virtual bool hasFP(const MachineFunction &MF) const;
+
+  virtual bool useFPForAddressing(const MachineFunction &MF) const;
+
+  /// On AA
+  virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
new file mode 100644
index 0000000000..9be8ba1096
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -0,0 +1,422 @@
+//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AArch64 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-isel"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===--------------------------------------------------------------------===//
+/// AArch64 specific code to select AArch64 machine instructions for
+/// SelectionDAG operations.
+///
+namespace {
+
+class AArch64DAGToDAGISel : public SelectionDAGISel {
+  AArch64TargetMachine &TM;
+  const AArch64InstrInfo *TII;
+
+  /// Keep a pointer to the AArch64Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const AArch64Subtarget *Subtarget;
+
+public:
+  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
+                               CodeGenOpt::Level OptLevel)
+    : SelectionDAGISel(tm, OptLevel), TM(tm),
+      TII(static_cast<const AArch64InstrInfo*>(TM.getInstrInfo())),
+      Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+  }
+
+  virtual const char *getPassName() const {
+    return "AArch64 Instruction Selection";
+  }
+
+  // Include the pieces autogenerated from the target description.
+#include "AArch64GenDAGISel.inc"
+
+  template<unsigned MemSize>
+  bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
+    const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+    if (!CN || CN->getZExtValue() % MemSize != 0
+        || CN->getZExtValue() / MemSize > 0xfff)
+      return false;
+
+    UImm12 =  CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
+    return true;
+  }
+
+  template<unsigned RegWidth>
+  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
+    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
+  }
+
+  bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
+
+  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
+
+  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                    char ConstraintCode,
+                                    std::vector<SDValue> &OutOps);
+
+  bool SelectLogicalImm(SDValue N, SDValue &Imm);
+
+  template<unsigned RegWidth>
+  bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
+    return SelectTSTBOperand(N, FixedPos, RegWidth);
+  }
+
+  bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
+
+  SDNode *TrySelectToMoveImm(SDNode *N);
+  SDNode *SelectToLitPool(SDNode *N);
+  SDNode *SelectToFPLitPool(SDNode *N);
+
+  SDNode* Select(SDNode*);
+private:
+};
+}
+
+bool
+AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+                                              unsigned RegWidth) {
+  const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+  if (!CN) return false;
+
+  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
+  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
+  // x-register.
+  //
+  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
+  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
+  // integers.
+  bool IsExact;
+
+  // fbits is between 1 and 64 in the worst-case, which means the fmul
+  // could have 2^64 as an actual operand. Need 65 bits of precision.
+  APSInt IntVal(65, true);
+  CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
+
+  // N.b. isPowerOf2 also checks for > 0.
+  if (!IsExact || !IntVal.isPowerOf2()) return false;
+  unsigned FBits = IntVal.logBase2();
+
+  // Checks above should have guaranteed that we haven't lost information in
+  // finding FBits, but it must still be in range.
+  if (FBits == 0 || FBits > RegWidth) return false;
+
+  FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
+  return true;
+}
+
+bool
+AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                                  char ConstraintCode,
+                                                  std::vector<SDValue> &OutOps) {
+  switch (ConstraintCode) {
+  default: llvm_unreachable("Unrecognised AArch64 memory constraint");
+  case 'm':
+    // FIXME: more freedom is actually permitted for 'm'. We can go
+    // hunting for a base and an offset if we want. Of course, since
+    // we don't really know how the operand is going to be used we're
+    // probably restricted to the load/store pair's simm7 as an offset
+    // range anyway.
+  case 'Q':
+    OutOps.push_back(Op);
+  }
+
+  return false;
+}
+
+bool
+AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
+  ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
+  if (!Imm || !Imm->getValueAPF().isPosZero())
+    return false;
+  
+  // Doesn't actually carry any information, but keeps TableGen quiet.
+  Dummy = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
+  uint32_t Bits;
+  uint32_t RegWidth = N.getValueType().getSizeInBits();
+
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+  if (!CN) return false;
+
+  if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
+    return false;
+
+  Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
+  return true;
+}
+
+SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
+  SDNode *ResNode;
+  DebugLoc dl = Node->getDebugLoc();
+  EVT DestType = Node->getValueType(0);
+  unsigned DestWidth = DestType.getSizeInBits();
+
+  unsigned MOVOpcode;
+  EVT MOVType;
+  int UImm16, Shift;
+  uint32_t LogicalBits;
+
+  uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
+  if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
+    MOVType = DestType;
+    MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
+  } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
+    MOVType = DestType;
+    MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
+  } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
+    // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
+    // use a 32-bit instruction: "movn w0, 0xedbc".
+    MOVType = MVT::i32;
+    MOVOpcode = AArch64::MOVNwii;
+  } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits))  {
+    MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
+    uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
+
+    return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
+                              CurDAG->getRegister(ZR, DestType),
+                              CurDAG->getTargetConstant(LogicalBits, MVT::i32));
+  } else {
+    // Can't handle it in one instruction. There's scope for permitting two (or
+    // more) instructions, but that'll need more thought.
+    return NULL;
+  }
+
+  ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
+                                   CurDAG->getTargetConstant(UImm16, MVT::i32),
+                                   CurDAG->getTargetConstant(Shift, MVT::i32));
+
+  if (MOVType != DestType) {
+    ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
+                          MVT::i64, MVT::i32, MVT::Other,
+                          CurDAG->getTargetConstant(0, MVT::i64),
+                          SDValue(ResNode, 0),
+                          CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
+  }
+
+  return ResNode;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
+  DebugLoc dl = Node->getDebugLoc();
+  uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
+  int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
+  EVT DestType = Node->getValueType(0);
+
+  // Since we may end up loading a 64-bit constant from a 32-bit entry the
+  // constant in the pool may have a different type to the eventual node.
+  SDValue PoolEntry;
+  EVT LoadType;
+  unsigned LoadInst;
+
+  assert((DestType == MVT::i64 || DestType == MVT::i32)
+         && "Only expect integer constants at the moment");
+
+  if (DestType == MVT::i32 || UnsignedVal <= UINT32_MAX) {
+    // LDR w3, lbl
+    LoadInst = AArch64::LDRw_lit;
+    LoadType = MVT::i32;
+
+    PoolEntry = CurDAG->getTargetConstantPool(
+      ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), UnsignedVal),
+      MVT::i32);
+  } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
+    // We can use a sign-extending 32-bit load: LDRSW x3, lbl
+    LoadInst = AArch64::LDRSWx_lit;
+    LoadType = MVT::i64;
+
+    PoolEntry = CurDAG->getTargetConstantPool(
+      ConstantInt::getSigned(Type::getInt32Ty(*CurDAG->getContext()),
+                             SignedVal),
+      MVT::i32);
+  } else {
+    // Full 64-bit load needed: LDR x3, lbl
+    LoadInst = AArch64::LDRx_lit;
+    LoadType = MVT::i64;
+
+    PoolEntry = CurDAG->getTargetConstantPool(
+      ConstantInt::get(Type::getInt64Ty(*CurDAG->getContext()), UnsignedVal),
+      MVT::i64);
+  }
+
+  SDNode *ResNode = CurDAG->getMachineNode(LoadInst, dl,
+                                           LoadType, MVT::Other,
+                                           PoolEntry, CurDAG->getEntryNode());
+
+  if (DestType != LoadType) {
+    // We used the implicit zero-extension of "LDR w3, lbl", tell LLVM this
+    // fact.
+    assert(DestType == MVT::i64 && LoadType == MVT::i32
+           && "Unexpected load combination");
+
+    ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
+                          MVT::i64, MVT::i32, MVT::Other,
+                          CurDAG->getTargetConstant(0, MVT::i64),
+                          SDValue(ResNode, 0),
+                          CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
+  }
+
+  return ResNode;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectToFPLitPool(SDNode *Node) {
+  DebugLoc dl = Node->getDebugLoc();
+  const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
+  EVT DestType = Node->getValueType(0);
+
+  unsigned LoadInst;
+  switch (DestType.getSizeInBits()) {
+  case 32:
+      LoadInst = AArch64::LDRs_lit;
+      break;
+  case 64:
+      LoadInst = AArch64::LDRd_lit;
+      break;
+  case 128:
+      LoadInst = AArch64::LDRq_lit;
+      break;
+  default: llvm_unreachable("cannot select floating-point litpool");
+  }
+
+  SDValue PoolEntry = CurDAG->getTargetConstantPool(FV, DestType);
+  SDNode *ResNode = CurDAG->getMachineNode(LoadInst, dl,
+                                           DestType, MVT::Other,
+                                           PoolEntry, CurDAG->getEntryNode());
+
+  return ResNode;
+}
+
+bool
+AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
+                                       unsigned RegWidth) {
+  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+  if (!CN) return false;
+
+  uint64_t Val = CN->getZExtValue();
+
+  if (!isPowerOf2_64(Val)) return false;
+
+  unsigned TestedBit = Log2_64(Val);
+  // Checks above should have guaranteed that we haven't lost information in
+  // finding TestedBit, but it must still be in range.
+  if (TestedBit >= RegWidth) return false;
+
+  FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
+  return true;
+}
+
+SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
+  // Dump information about the Node being selected
+  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
+
+  if (Node->isMachineOpcode()) {
+    DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
+    return NULL;
+  }
+
+  switch (Node->getOpcode()) {
+  case ISD::FrameIndex: {
+    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+    EVT PtrTy = TLI.getPointerTy();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
+    return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
+                                TFI, CurDAG->getTargetConstant(0, PtrTy));
+  }
+  case ISD::ConstantPool: {
+    // Constant pools are fine, just create a Target entry.
+    ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
+    const Constant *C = CN->getConstVal();
+    SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
+
+    ReplaceUses(SDValue(Node, 0), CP);
+    return NULL;
+  }
+  case ISD::Constant: {
+    SDNode *ResNode = 0;
+    if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
+      // XZR and WZR are probably even better than an actual move: most of the
+      // time they can be folded into another instruction with *no* cost.
+
+      EVT Ty = Node->getValueType(0);
+      assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
+      uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
+      ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+                                       Node->getDebugLoc(),
+                                       Register, Ty).getNode();
+    }
+
+    // Next best option is a move-immediate, see if we can do that.
+    if (!ResNode) {
+      ResNode = TrySelectToMoveImm(Node);
+    }
+
+    // If even that fails we fall back to a lit-pool entry at the moment. Future
+    // tuning or restrictions like non-readable code-sections may mandate a
+    // sequence of MOVZ/MOVN/MOVK instructions.
+    if (!ResNode) {
+      ResNode = SelectToLitPool(Node);
+    }
+
+    assert(ResNode && "We need *some* way to materialise a constant");
+
+    ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+    return NULL;
+  }
+  case ISD::ConstantFP: {
+    if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
+      // FMOV will take care of it from TableGen
+      break;
+    }
+
+    SDNode *ResNode = SelectToFPLitPool(Node);
+    ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+    return NULL;
+  }
+  default:
+    break; // Let generic code handle it
+  }
+
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(dbgs() << "=> ";
+        if (ResNode == NULL || ResNode == Node)
+          Node->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        dbgs() << "\n");
+
+  return ResNode;
+}
+
+/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
+/// instruction scheduling.
+FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
+                                         CodeGenOpt::Level OptLevel) {
+  return new AArch64DAGToDAGISel(TM, OptLevel);
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
new file mode 100644
index 0000000000..42e8f090ce
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -0,0 +1,2957 @@
+//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-isel"
+#include "AArch64.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "AArch64TargetObjectFile.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+
+using namespace llvm;
+
+static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
+  const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+
+  if (Subtarget->isTargetLinux())
+    return new AArch64LinuxTargetObjectFile();
+  if (Subtarget->isTargetELF())
+    return new TargetLoweringObjectFileELF();
+  llvm_unreachable("unknown subtarget type");
+}
+
+
+AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
+  : TargetLowering(TM, createTLOF(TM)),
+    Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
+    RegInfo(TM.getRegisterInfo()),
+    Itins(TM.getInstrItineraryData()) {
+
+  // SIMD compares set the entire lane's bits to 1
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+  // Scalar register <-> type mapping
+  addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
+  addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
+  addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
+  addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
+  addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
+  addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
+
+  // And the vectors
+  addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass);
+  addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass);
+  addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass);
+  addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass);
+  addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass);
+  addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass);
+  addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass);
+  addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass);
+  addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass);
+
+  computeRegisterProperties();
+
+  // Some atomic operations can be folded into load-acquire or store-release
+  // instructions on AArch64. It's marginally simpler to let LLVM expand
+  // everything out to a barrier and then recombine the (few) barriers we can.
+  setInsertFencesForAtomic(true);
+  setTargetDAGCombine(ISD::ATOMIC_FENCE);
+  setTargetDAGCombine(ISD::ATOMIC_STORE);
+
+  // We combine OR nodes for bitfield and NEON BSL operations.
+  setTargetDAGCombine(ISD::OR);
+
+  setTargetDAGCombine(ISD::AND);
+  setTargetDAGCombine(ISD::SRA);
+
+  // AArch64 does not have i1 loads, or much of anything for i1 really.
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+
+  setStackPointerRegisterToSaveRestore(AArch64::XSP);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+
+  // We'll lower globals to wrappers for selection.
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+  // A64 instructions have the comparison predicate attached to the user of the
+  // result, but having a separate comparison is valuable for matching.
+  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+  setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+  setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+  setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+  setOperationAction(ISD::SELECT, MVT::i32, Custom);
+  setOperationAction(ISD::SELECT, MVT::i64, Custom);
+  setOperationAction(ISD::SELECT, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT, MVT::f64, Custom);
+
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+
+  setOperationAction(ISD::SETCC, MVT::i32, Custom);
+  setOperationAction(ISD::SETCC, MVT::i64, Custom);
+  setOperationAction(ISD::SETCC, MVT::f32, Custom);
+  setOperationAction(ISD::SETCC, MVT::f64, Custom);
+
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+  setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+  setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+  setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+
+  setOperationAction(ISD::ROTL, MVT::i32, Expand);
+  setOperationAction(ISD::ROTL, MVT::i64, Expand);
+
+  setOperationAction(ISD::UREM, MVT::i32, Expand);
+  setOperationAction(ISD::UREM, MVT::i64, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+  setOperationAction(ISD::SREM, MVT::i32, Expand);
+  setOperationAction(ISD::SREM, MVT::i64, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+  // Legal floating-point operations.
+  setOperationAction(ISD::FABS, MVT::f32, Legal);
+  setOperationAction(ISD::FABS, MVT::f64, Legal);
+
+  setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+  setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+
+  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+  setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+
+  setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+  setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+
+  setOperationAction(ISD::FNEG, MVT::f32, Legal);
+  setOperationAction(ISD::FNEG, MVT::f64, Legal);
+
+  setOperationAction(ISD::FRINT, MVT::f32, Legal);
+  setOperationAction(ISD::FRINT, MVT::f64, Legal);
+
+  setOperationAction(ISD::FSQRT, MVT::f32, Legal);
+  setOperationAction(ISD::FSQRT, MVT::f64, Legal);
+
+  setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+  setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f128, Legal);
+
+  // Illegal floating-point operations.
+  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+
+  setOperationAction(ISD::FCOS, MVT::f32, Expand);
+  setOperationAction(ISD::FCOS, MVT::f64, Expand);
+
+  setOperationAction(ISD::FEXP, MVT::f32, Expand);
+  setOperationAction(ISD::FEXP, MVT::f64, Expand);
+
+  setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+
+  setOperationAction(ISD::FLOG, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG, MVT::f64, Expand);
+
+  setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+
+  setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+
+  setOperationAction(ISD::FPOW, MVT::f32, Expand);
+  setOperationAction(ISD::FPOW, MVT::f64, Expand);
+
+  setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+  setOperationAction(ISD::FPOWI, MVT::f64, Expand);
+
+  setOperationAction(ISD::FREM, MVT::f32, Expand);
+  setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+  setOperationAction(ISD::FSIN, MVT::f32, Expand);
+  setOperationAction(ISD::FSIN, MVT::f64, Expand);
+
+
+  // Virtually no operation on f128 is legal, but LLVM can't expand them when
+  // there's a valid register class, so we need custom operations in most cases.
+  setOperationAction(ISD::FABS,       MVT::f128, Expand);
+  setOperationAction(ISD::FADD,       MVT::f128, Custom);
+  setOperationAction(ISD::FCOPYSIGN,  MVT::f128, Expand);
+  setOperationAction(ISD::FCOS,       MVT::f128, Expand);
+  setOperationAction(ISD::FDIV,       MVT::f128, Custom);
+  setOperationAction(ISD::FMA,        MVT::f128, Expand);
+  setOperationAction(ISD::FMUL,       MVT::f128, Custom);
+  setOperationAction(ISD::FNEG,       MVT::f128, Expand);
+  setOperationAction(ISD::FP_EXTEND,  MVT::f128, Expand);
+  setOperationAction(ISD::FP_ROUND,   MVT::f128, Expand);
+  setOperationAction(ISD::FPOW,       MVT::f128, Expand);
+  setOperationAction(ISD::FREM,       MVT::f128, Expand);
+  setOperationAction(ISD::FRINT,      MVT::f128, Expand);
+  setOperationAction(ISD::FSIN,       MVT::f128, Expand);
+  setOperationAction(ISD::FSQRT,      MVT::f128, Expand);
+  setOperationAction(ISD::FSUB,       MVT::f128, Custom);
+  setOperationAction(ISD::FTRUNC,     MVT::f128, Expand);
+  setOperationAction(ISD::SETCC,      MVT::f128, Custom);
+  setOperationAction(ISD::BR_CC,      MVT::f128, Custom);
+  setOperationAction(ISD::SELECT,     MVT::f128, Expand);
+  setOperationAction(ISD::SELECT_CC,  MVT::f128, Custom);
+  setOperationAction(ISD::FP_EXTEND,  MVT::f128, Custom);
+
+  // Lowering for many of the conversions is actually specified by the non-f128
+  // type. The LowerXXX function will be trivial when f128 isn't involved.
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
+  setOperationAction(ISD::FP_ROUND,  MVT::f32, Custom);
+  setOperationAction(ISD::FP_ROUND,  MVT::f64, Custom);
+
+  // This prevents LLVM trying to compress double constants into a floating
+  // constant-pool entry and trying to load from there. It's of doubtful benefit
+  // for A64: we'd need LDR followed by FCVT, I believe.
+  setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+
+  setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+  setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f128, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+  setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+
+  setExceptionPointerRegister(AArch64::X0);
+  setExceptionSelectorRegister(AArch64::X1);
+}
+
+EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
+  // It's reasonably important that this value matches the "natural" legal
+  // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
+  // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
+  if (!VT.isVector()) return MVT::i32;
+  return VT.changeVectorElementTypeToInteger();
+}
+
+static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc,
+                                  unsigned &strOpc) {
+  switch (Size) {
+  default: llvm_unreachable("unsupported size for atomic binary op!");
+  case 1:
+    ldrOpc = AArch64::LDXR_byte;
+    strOpc = AArch64::STXR_byte;
+    break;
+  case 2:
+    ldrOpc = AArch64::LDXR_hword;
+    strOpc = AArch64::STXR_hword;
+    break;
+  case 4:
+    ldrOpc = AArch64::LDXR_word;
+    strOpc = AArch64::STXR_word;
+    break;
+  case 8:
+    ldrOpc = AArch64::LDXR_dword;
+    strOpc = AArch64::STXR_dword;
+    break;
+  }
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+                                        unsigned Size,
+                                        unsigned BinOpcode) const {
+  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *MF = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptr = MI->getOperand(1).getReg();
+  unsigned incr = MI->getOperand(2).getReg();
+  DebugLoc dl = MI->getDebugLoc();
+
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+  unsigned ldrOpc, strOpc;
+  getExclusiveOperation(Size, ldrOpc, strOpc);
+
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  const TargetRegisterClass *TRC
+    = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+  unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //   ldxr dest, ptr
+  //   <binop> scratch, dest, incr
+  //   stxr stxr_status, scratch, ptr
+  //   cmp stxr_status, #0
+  //   b.ne loopMBB
+  //   fallthrough --> exitMBB
+  BB = loopMBB;
+  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+  if (BinOpcode) {
+    // All arithmetic operations we'll be creating are designed to take an extra
+    // shift or extend operand, which we can conveniently set to zero.
+
+    // Operand order needs to go the other way for NAND.
+    if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl)
+      BuildMI(BB, dl, TII->get(BinOpcode), scratch)
+        .addReg(incr).addReg(dest).addImm(0);
+    else
+      BuildMI(BB, dl, TII->get(BinOpcode), scratch)
+        .addReg(dest).addReg(incr).addImm(0);
+  }
+
+  // From the stxr, the register is GPR32; from the cmp it's GPR32wsp
+  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+  BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
+  BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
+    .addReg(stxr_status).addImm(0);
+  BuildMI(BB, dl, TII->get(AArch64::Bcc))
+    .addImm(A64CC::NE).addMBB(loopMBB);
+
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
+                                              MachineBasicBlock *BB,
+                                              unsigned Size,
+                                              unsigned CmpOp,
+                                              A64CC::CondCodes Cond) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *MF = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptr = MI->getOperand(1).getReg();
+  unsigned incr = MI->getOperand(2).getReg();
+  unsigned oldval = dest;
+  DebugLoc dl = MI->getDebugLoc();
+
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+  const TargetRegisterClass *TRC, *TRCsp;
+  if (Size == 8) {
+    TRC = &AArch64::GPR64RegClass;
+    TRCsp = &AArch64::GPR64xspRegClass;
+  } else {
+    TRC = &AArch64::GPR32RegClass;
+    TRCsp = &AArch64::GPR32wspRegClass;
+  }
+
+  unsigned ldrOpc, strOpc;
+  getExclusiveOperation(Size, ldrOpc, strOpc);
+
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  unsigned scratch = MRI.createVirtualRegister(TRC);
+  MRI.constrainRegClass(scratch, TRCsp);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //   ldxr dest, ptr
+  //   cmp incr, dest (, sign extend if necessary)
+  //   csel scratch, dest, incr, cond
+  //   stxr stxr_status, scratch, ptr
+  //   cmp stxr_status, #0
+  //   b.ne loopMBB
+  //   fallthrough --> exitMBB
+  BB = loopMBB;
+  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+
+  // Build compare and cmov instructions.
+  MRI.constrainRegClass(incr, TRCsp);
+  BuildMI(BB, dl, TII->get(CmpOp))
+    .addReg(incr).addReg(oldval).addImm(0);
+
+  BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
+          scratch)
+    .addReg(oldval).addReg(incr).addImm(Cond);
+
+  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+  BuildMI(BB, dl, TII->get(strOpc), stxr_status)
+    .addReg(scratch).addReg(ptr);
+  BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
+    .addReg(stxr_status).addImm(0);
+  BuildMI(BB, dl, TII->get(AArch64::Bcc))
+    .addImm(A64CC::NE).addMBB(loopMBB);
+
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
+                                         MachineBasicBlock *BB,
+                                         unsigned Size) const {
+  unsigned dest    = MI->getOperand(0).getReg();
+  unsigned ptr     = MI->getOperand(1).getReg();
+  unsigned oldval  = MI->getOperand(2).getReg();
+  unsigned newval  = MI->getOperand(3).getReg();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+  const TargetRegisterClass *TRCsp;
+  TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
+
+  unsigned ldrOpc, strOpc;
+  getExclusiveOperation(Size, ldrOpc, strOpc);
+
+  MachineFunction *MF = BB->getParent();
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It; // insert the new blocks after the current block
+
+  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loop1MBB);
+  MF->insert(It, loop2MBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loop1MBB
+  BB->addSuccessor(loop1MBB);
+
+  // loop1MBB:
+  //   ldxr dest, [ptr]
+  //   cmp dest, oldval
+  //   b.ne exitMBB
+  BB = loop1MBB;
+  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+
+  unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
+  MRI.constrainRegClass(dest, TRCsp);
+  BuildMI(BB, dl, TII->get(CmpOp))
+    .addReg(dest).addReg(oldval).addImm(0);
+  BuildMI(BB, dl, TII->get(AArch64::Bcc))
+    .addImm(A64CC::NE).addMBB(exitMBB);
+  BB->addSuccessor(loop2MBB);
+  BB->addSuccessor(exitMBB);
+
+  // loop2MBB:
+  //   strex stxr_status, newval, [ptr]
+  //   cmp stxr_status, #0
+  //   b.ne loop1MBB
+  BB = loop2MBB;
+  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+  BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
+  BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
+    .addReg(stxr_status).addImm(0);
+  BuildMI(BB, dl, TII->get(AArch64::Bcc))
+    .addImm(A64CC::NE).addMBB(loop1MBB);
+  BB->addSuccessor(loop1MBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
+                                    MachineBasicBlock *MBB) const {
+  // We materialise the F128CSEL pseudo-instruction using conditional branches
+  // and loads, giving an instruciton sequence like:
+  //     str q0, [sp]
+  //     b.ne IfTrue
+  //     b Finish
+  // IfTrue:
+  //     str q1, [sp]
+  // Finish:
+  //     ldr q0, [sp]
+  //
+  // Using virtual registers would probably not be beneficial since COPY
+  // instructions are expensive for f128 (there's no actual instruction to
+  // implement them).
+  //
+  // An alternative would be to do an integer-CSEL on some address. E.g.:
+  //     mov x0, sp
+  //     add x1, sp, #16
+  //     str q0, [x0]
+  //     str q1, [x1]
+  //     csel x0, x0, x1, ne
+  //     ldr q0, [x0]
+  //
+  // It's unclear which approach is actually optimal.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  MachineFunction *MF = MBB->getParent();
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  DebugLoc DL = MI->getDebugLoc();
+  MachineFunction::iterator It = MBB;
+  ++It;
+
+  unsigned DestReg = MI->getOperand(0).getReg();
+  unsigned IfTrueReg = MI->getOperand(1).getReg();
+  unsigned IfFalseReg = MI->getOperand(2).getReg();
+  unsigned CondCode = MI->getOperand(3).getImm();
+  bool NZCVKilled = MI->getOperand(4).isKill();
+
+  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, TrueBB);
+  MF->insert(It, EndBB);
+
+  // Transfer rest of current basic-block to EndBB
+  EndBB->splice(EndBB->begin(), MBB,
+                llvm::next(MachineBasicBlock::iterator(MI)),
+                MBB->end());
+  EndBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // We need somewhere to store the f128 value needed.
+  int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
+
+  //     [... start of incoming MBB ...]
+  //     str qIFFALSE, [sp]
+  //     b.cc IfTrue
+  //     b Done
+  BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
+    .addReg(IfFalseReg)
+    .addFrameIndex(ScratchFI)
+    .addImm(0);
+  BuildMI(MBB, DL, TII->get(AArch64::Bcc))
+    .addImm(CondCode)
+    .addMBB(TrueBB);
+  BuildMI(MBB, DL, TII->get(AArch64::Bimm))
+    .addMBB(EndBB);
+  MBB->addSuccessor(TrueBB);
+  MBB->addSuccessor(EndBB);
+
+  // IfTrue:
+  //     str qIFTRUE, [sp]
+  BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
+    .addReg(IfTrueReg)
+    .addFrameIndex(ScratchFI)
+    .addImm(0);
+
+  // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
+  // blocks.
+  TrueBB->addSuccessor(EndBB);
+
+  // Done:
+  //     ldr qDEST, [sp]
+  //     [... rest of incoming MBB ...]
+  if (!NZCVKilled)
+    EndBB->addLiveIn(AArch64::NZCV);
+  MachineInstr *StartOfEnd = EndBB->begin();
+  BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
+    .addFrameIndex(ScratchFI)
+    .addImm(0);
+
+  MI->eraseFromParent();
+  return EndBB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                 MachineBasicBlock *MBB) const {
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unhandled instruction with custom inserter");
+  case AArch64::F128CSEL:
+    return EmitF128CSEL(MI, MBB);
+  case AArch64::ATOMIC_LOAD_ADD_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
+  case AArch64::ATOMIC_LOAD_ADD_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
+  case AArch64::ATOMIC_LOAD_ADD_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
+  case AArch64::ATOMIC_LOAD_ADD_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_SUB_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
+  case AArch64::ATOMIC_LOAD_SUB_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
+  case AArch64::ATOMIC_LOAD_SUB_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
+  case AArch64::ATOMIC_LOAD_SUB_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_AND_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
+  case AArch64::ATOMIC_LOAD_AND_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
+  case AArch64::ATOMIC_LOAD_AND_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
+  case AArch64::ATOMIC_LOAD_AND_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_OR_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
+  case AArch64::ATOMIC_LOAD_OR_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
+  case AArch64::ATOMIC_LOAD_OR_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
+  case AArch64::ATOMIC_LOAD_OR_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_XOR_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
+  case AArch64::ATOMIC_LOAD_XOR_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
+  case AArch64::ATOMIC_LOAD_XOR_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
+  case AArch64::ATOMIC_LOAD_XOR_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_NAND_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
+  case AArch64::ATOMIC_LOAD_NAND_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
+  case AArch64::ATOMIC_LOAD_NAND_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
+  case AArch64::ATOMIC_LOAD_NAND_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_MIN_I8:
+    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
+  case AArch64::ATOMIC_LOAD_MIN_I16:
+    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
+  case AArch64::ATOMIC_LOAD_MIN_I32:
+    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
+  case AArch64::ATOMIC_LOAD_MIN_I64:
+    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
+
+  case AArch64::ATOMIC_LOAD_MAX_I8:
+    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
+  case AArch64::ATOMIC_LOAD_MAX_I16:
+    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
+  case AArch64::ATOMIC_LOAD_MAX_I32:
+    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
+  case AArch64::ATOMIC_LOAD_MAX_I64:
+    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
+
+  case AArch64::ATOMIC_LOAD_UMIN_I8:
+    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
+  case AArch64::ATOMIC_LOAD_UMIN_I16:
+    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
+  case AArch64::ATOMIC_LOAD_UMIN_I32:
+    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
+  case AArch64::ATOMIC_LOAD_UMIN_I64:
+    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
+
+  case AArch64::ATOMIC_LOAD_UMAX_I8:
+    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
+  case AArch64::ATOMIC_LOAD_UMAX_I16:
+    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
+  case AArch64::ATOMIC_LOAD_UMAX_I32:
+    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
+  case AArch64::ATOMIC_LOAD_UMAX_I64:
+    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
+
+  case AArch64::ATOMIC_SWAP_I8:
+    return emitAtomicBinary(MI, MBB, 1, 0);
+  case AArch64::ATOMIC_SWAP_I16:
+    return emitAtomicBinary(MI, MBB, 2, 0);
+  case AArch64::ATOMIC_SWAP_I32:
+    return emitAtomicBinary(MI, MBB, 4, 0);
+  case AArch64::ATOMIC_SWAP_I64:
+    return emitAtomicBinary(MI, MBB, 8, 0);
+
+  case AArch64::ATOMIC_CMP_SWAP_I8:
+    return emitAtomicCmpSwap(MI, MBB, 1);
+  case AArch64::ATOMIC_CMP_SWAP_I16:
+    return emitAtomicCmpSwap(MI, MBB, 2);
+  case AArch64::ATOMIC_CMP_SWAP_I32:
+    return emitAtomicCmpSwap(MI, MBB, 4);
+  case AArch64::ATOMIC_CMP_SWAP_I64:
+    return emitAtomicCmpSwap(MI, MBB, 8);
+  }
+}
+
+
+const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  case AArch64ISD::BR_CC:          return "AArch64ISD::BR_CC";
+  case AArch64ISD::Call:           return "AArch64ISD::Call";
+  case AArch64ISD::FPMOV:          return "AArch64ISD::FPMOV";
+  case AArch64ISD::GOTLoad:        return "AArch64ISD::GOTLoad";
+  case AArch64ISD::BFI:            return "AArch64ISD::BFI";
+  case AArch64ISD::EXTR:           return "AArch64ISD::EXTR";
+  case AArch64ISD::Ret:            return "AArch64ISD::Ret";
+  case AArch64ISD::SBFX:           return "AArch64ISD::SBFX";
+  case AArch64ISD::SELECT_CC:      return "AArch64ISD::SELECT_CC";
+  case AArch64ISD::SETCC:          return "AArch64ISD::SETCC";
+  case AArch64ISD::TC_RETURN:      return "AArch64ISD::TC_RETURN";
+  case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
+  case AArch64ISD::TLSDESCCALL:    return "AArch64ISD::TLSDESCCALL";
+  case AArch64ISD::WrapperSmall:   return "AArch64ISD::WrapperSmall";
+
+  default:                       return NULL;
+  }
+}
+
+static const uint16_t AArch64FPRArgRegs[] = {
+  AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
+  AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
+};
+static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs);
+
+static const uint16_t AArch64ArgRegs[] = {
+  AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
+  AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
+};
+static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs);
+
+static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
+                                 CCValAssign::LocInfo LocInfo,
+                                 ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  // Mark all remaining general purpose registers as allocated. We don't
+  // backtrack: if (for example) an i128 gets put on the stack, no subsequent
+  // i64 will go in registers (C.11).
+  for (unsigned i = 0; i < NumArgRegs; ++i)
+    State.AllocateReg(AArch64ArgRegs[i]);
+
+  return false;
+}
+
+#include "AArch64GenCallingConv.inc"
+
+CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
+
+  switch(CC) {
+  default: llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+  case CallingConv::C:
+    return CC_A64_APCS;
+  }
+}
+
+void
+AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
+                                           DebugLoc DL, SDValue &Chain) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  AArch64MachineFunctionInfo *FuncInfo = MF.getInfo<AArch64MachineFunctionInfo>();
+
+  SmallVector<SDValue, 8> MemOps;
+
+  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
+                                                         NumArgRegs);
+  unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
+                                                         NumFPRArgRegs);
+
+  unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
+  int GPRIdx = 0;
+  if (GPRSaveSize != 0) {
+    GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
+
+    SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+
+    for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
+      unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
+      SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+      SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+                                   MachinePointerInfo::getStack(i * 8),
+                                   false, false, 0);
+      MemOps.push_back(Store);
+      FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+                        DAG.getConstant(8, getPointerTy()));
+    }
+  }
+
+  unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
+  int FPRIdx = 0;
+  if (FPRSaveSize != 0) {
+    FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
+
+    SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+
+    for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
+      unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
+                                   &AArch64::FPR128RegClass);
+      SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
+      SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+                                   MachinePointerInfo::getStack(i * 16),
+                                   false, false, 0);
+      MemOps.push_back(Store);
+      FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+                        DAG.getConstant(16, getPointerTy()));
+    }
+  }
+
+  int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true);
+
+  FuncInfo->setVariadicStackIdx(StackIdx);
+  FuncInfo->setVariadicGPRIdx(GPRIdx);
+  FuncInfo->setVariadicGPRSize(GPRSaveSize);
+  FuncInfo->setVariadicFPRIdx(FPRIdx);
+  FuncInfo->setVariadicFPRSize(FPRSaveSize);
+
+  if (!MemOps.empty()) {
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
+                        MemOps.size());
+  }
+}
+
+
+SDValue
+AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  AArch64MachineFunctionInfo *FuncInfo
+    = MF.getInfo<AArch64MachineFunctionInfo>();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
+
+  SmallVector<SDValue, 16> ArgValues;
+
+  SDValue ArgValue;
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+    if (Flags.isByVal()) {
+      // Byval is used for small structs and HFAs in the PCS, but the system
+      // should work in a non-compliant manner for larger structs.
+      EVT PtrTy = getPointerTy();
+      int Size = Flags.getByValSize();
+      unsigned NumRegs = (Size + 7) / 8;
+
+      unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
+                                                 VA.getLocMemOffset(),
+                                                 false);
+      SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
+      InVals.push_back(FrameIdxN);
+
+      continue;
+    } else if (VA.isRegLoc()) {
+      MVT RegVT = VA.getLocVT();
+      const TargetRegisterClass *RC = getRegClassFor(RegVT);
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+
+      ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+    } else { // VA.isRegLoc()
+      assert(VA.isMemLoc());
+
+      int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+                                      VA.getLocMemOffset(), true);
+
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+                             MachinePointerInfo::getFixedStack(FI),
+                             false, false, false, 0);
+
+
+    }
+
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::BCvt:
+      ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
+      break;
+    case CCValAssign::SExt:
+    case CCValAssign::ZExt:
+    case CCValAssign::AExt: {
+      unsigned DestSize = VA.getValVT().getSizeInBits();
+      unsigned DestSubReg;
+
+      switch (DestSize) {
+      case 8: DestSubReg = AArch64::sub_8; break;
+      case 16: DestSubReg = AArch64::sub_16; break;
+      case 32: DestSubReg = AArch64::sub_32; break;
+      case 64: DestSubReg = AArch64::sub_64; break;
+      default: llvm_unreachable("Unexpected argument promotion");
+      }
+
+      ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
+                                   VA.getValVT(), ArgValue,
+                                   DAG.getTargetConstant(DestSubReg, MVT::i32)),
+                         0);
+      break;
+    }
+    }
+
+    InVals.push_back(ArgValue);
+  }
+
+  if (isVarArg)
+    SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
+
+  unsigned StackArgSize = CCInfo.getNextStackOffset();
+  if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
+    // This is a non-standard ABI so by fiat I say we're allowed to make full
+    // use of the stack area to be popped, which must be aligned to 16 bytes in
+    // any case:
+    StackArgSize = RoundUpToAlignment(StackArgSize, 16);
+
+    // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
+    // a multiple of 16.
+    FuncInfo->setArgumentStackToRestore(StackArgSize);
+
+    // This realignment carries over to the available bytes below. Our own
+    // callers will guarantee the space is free by giving an aligned value to
+    // CALLSEQ_START.
+  }
+  // Even if we're not expected to free up the space, it's useful to know how
+  // much is there while considering tail calls (because we can reuse it).
+  FuncInfo->setBytesInStackArgArea(StackArgSize);
+
+  return Chain;
+}
+
+SDValue
+AArch64TargetLowering::LowerReturn(SDValue Chain,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                   const SmallVectorImpl<SDValue> &OutVals,
+                                   DebugLoc dl, SelectionDAG &DAG) const {
+  // CCValAssign - represent the assignment of the return value to a location.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slots.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  // Analyze outgoing return values.
+  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+    // PCS: "If the type, T, of the result of a function is such that void func(T
+    // arg) would require that arg be passed as a value in a register (or set of
+    // registers) according to the rules in 5.4, then the result is returned in
+    // the same registers as would be used for such an argument.
+    //
+    // Otherwise, the caller shall reserve a block of memory of sufficient
+    // size and alignment to hold the result. The address of the memory block
+    // shall be passed as an additional argument to the function in x8."
+    //
+    // This is implemented in two places. The register-return values are dealt
+    // with here, more complex returns are passed as an sret parameter, which
+    // means we don't have to worry about it during actual return.
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
+
+
+    SDValue Arg = OutVals[i];
+
+    // There's no convenient note in the ABI about this as there is for normal
+    // arguments, but it says return values are passed in the same registers as
+    // an argument would be. I believe that includes the comments about
+    // unspecified higher bits, putting the burden of widening on the *caller*
+    // for return values.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+    case CCValAssign::ZExt:
+    case CCValAssign::AExt:
+      // Floating-point values should only be extended when they're going into
+      // memory, which can't happen here so an integer extend is acceptable.
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode()) {
+    return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, Chain, Flag);
+  } else {
+    return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, Chain);
+  }
+}
+
+SDValue
+AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
+                                 SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG                     = CLI.DAG;
+  DebugLoc &dl                          = CLI.DL;
+  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
+  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
+  SDValue Chain                         = CLI.Chain;
+  SDValue Callee                        = CLI.Callee;
+  bool &IsTailCall                      = CLI.IsTailCall;
+  CallingConv::ID CallConv              = CLI.CallConv;
+  bool IsVarArg                         = CLI.IsVarArg;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  AArch64MachineFunctionInfo *FuncInfo
+    = MF.getInfo<AArch64MachineFunctionInfo>();
+  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+  bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
+  bool IsSibCall = false;
+
+  if (IsTailCall) {
+    IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+                    IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+                                                   Outs, OutVals, Ins, DAG);
+
+    // A sibling call is one where we're under the usual C ABI and not planning
+    // to change that but can still do a tail call:
+    if (!TailCallOpt && IsTailCall)
+      IsSibCall = true;
+  }
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+  // On AArch64 (and all other architectures I'm aware of) the most this has to
+  // do is adjust the stack pointer.
+  unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
+  if (IsSibCall) {
+    // Since we're not changing the ABI to make this a tail call, the memory
+    // operands are already available in the caller's incoming argument space.
+    NumBytes = 0;
+  }
+
+  // FPDiff is the byte offset of the call's argument area from the callee's.
+  // Stores to callee stack arguments will be placed in FixedStackSlots offset
+  // by this amount for a tail call. In a sibling call it must be 0 because the
+  // caller will deallocate the entire stack and the callee still expects its
+  // arguments to begin at SP+0. Completely unused for non-tail calls.
+  int FPDiff = 0;
+
+  if (IsTailCall && !IsSibCall) {
+    unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
+
+    // FPDiff will be negative if this tail call requires more space than we
+    // would automatically have in our incoming argument space. Positive if we
+    // can actually shrink the stack.
+    FPDiff = NumReusableBytes - NumBytes;
+
+    // The stack pointer must be 16-byte aligned at all times it's used for a
+    // memory operation, which in practice means at *all* times and in
+    // particular across call boundaries. Therefore our own arguments started at
+    // a 16-byte aligned SP and the delta applied for the tail call should
+    // satisfy the same constraint.
+    assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
+  }
+
+  if (!IsSibCall)
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP, getPointerTy());
+
+  SmallVector<SDValue, 8> MemOpChains;
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    SDValue Arg = OutVals[i];
+
+    // Callee does the actual widening, so all extensions just use an implicit
+    // definition of the rest of the Loc. Aesthetically, this would be nicer as
+    // an ANY_EXTEND, but that isn't valid for floating-point types and this
+    // alternative works on integer types too.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+    case CCValAssign::ZExt:
+    case CCValAssign::AExt: {
+      unsigned SrcSize = VA.getValVT().getSizeInBits();
+      unsigned SrcSubReg;
+
+      switch (SrcSize) {
+      case 8: SrcSubReg = AArch64::sub_8; break;
+      case 16: SrcSubReg = AArch64::sub_16; break;
+      case 32: SrcSubReg = AArch64::sub_32; break;
+      case 64: SrcSubReg = AArch64::sub_64; break;
+      default: llvm_unreachable("Unexpected argument promotion");
+      }
+
+      Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
+                                    VA.getLocVT(),
+                                    DAG.getUNDEF(VA.getLocVT()),
+                                    Arg,
+                                    DAG.getTargetConstant(SrcSubReg, MVT::i32)),
+                    0);
+
+      break;
+    }
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (VA.isRegLoc()) {
+      // A normal register (sub-) argument. For now we just note it down because
+      // we want to copy things into registers as late as possible to avoid
+      // register-pressure (and possibly worse).
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      continue;
+    }
+
+    assert(VA.isMemLoc() && "unexpected argument location");
+
+    SDValue DstAddr;
+    MachinePointerInfo DstInfo;
+    if (IsTailCall) {
+      uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
+                                          VA.getLocVT().getSizeInBits();
+      OpSize = (OpSize + 7) / 8;
+      int32_t Offset = VA.getLocMemOffset() + FPDiff;
+      int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+
+      DstAddr = DAG.getFrameIndex(FI, getPointerTy());
+      DstInfo = MachinePointerInfo::getFixedStack(FI);
+
+      // Make sure any stack arguments overlapping with where we're storing are
+      // loaded before this eventual operation. Otherwise they'll be clobbered.
+      Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
+    } else {
+      SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
+
+      DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+      DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset());
+    }
+
+    if (Flags.isByVal()) {
+      SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
+      SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
+                                  Flags.getByValAlign(),
+                                  /*isVolatile = */ false,
+                                  /*alwaysInline = */ false,
+                                  DstInfo, MachinePointerInfo(0));
+      MemOpChains.push_back(Cpy);
+    } else {
+      // Normal stack argument, put it where it's needed.
+      SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
+                                   false, false, 0);
+      MemOpChains.push_back(Store);
+    }
+  }
+
+  // The loads and stores generated above shouldn't clash with each
+  // other. Combining them with this TokenFactor notes that fact for the rest of
+  // the backend.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Most of the rest of the instructions need to be glued together; we don't
+  // want assignments to actual registers used by a call to be rearranged by a
+  // well-meaning scheduler.
+  SDValue InFlag;
+
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // The linker is responsible for inserting veneers when necessary to put a
+  // function call destination in range, so we don't need to bother with a
+  // wrapper here.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
+    Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    const char *Sym = S->getSymbol();
+    Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+  }
+
+  // We don't usually want to end the call-sequence here because we would tidy
+  // the frame up *after* the call, however in the ABI-changing tail-call case
+  // we've carefully laid out the parameters so that when sp is reset they'll be
+  // in the correct location.
+  if (IsTailCall && !IsSibCall) {
+    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               DAG.getIntPtrConstant(0, true), InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // We produce the following DAG scheme for the actual call instruction:
+  //     (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
+  //
+  // Most arguments aren't going to be used and just keep the values live as
+  // far as LLVM is concerned. It's expected to be selected as simply "bl
+  // callee" (for a direct, non-tail call).
+  std::vector<SDValue> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  if (IsTailCall) {
+    // Each tail call may have to adjust the stack by a different amount, so
+    // this information must travel along with the operation for eventual
+    // consumption by emitEpilogue.
+    Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
+  }
+
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+
+  // Add a register mask operand representing the call-preserved registers. This
+  // is used later in codegen to constrain register-allocation.
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
+  // If we needed glue, put it in as the last argument.
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+  if (IsTailCall) {
+    return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+  }
+
+  Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Now we can reclaim the stack, just as well do it before working out where
+  // our return value is.
+  if (!IsSibCall) {
+    uint64_t CalleePopBytes
+      = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
+
+    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               DAG.getIntPtrConstant(CalleePopBytes, true),
+                               InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  return LowerCallResult(Chain, InFlag, CallConv,
+                         IsVarArg, Ins, dl, DAG, InVals);
+}
+
+SDValue
+AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                      CallingConv::ID CallConv, bool IsVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
+
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign VA = RVLocs[i];
+
+    // Return values that are too big to fit into registers should use an sret
+    // pointer, so this can be a lot simpler than the main argument code.
+    assert(VA.isRegLoc() && "Memory locations not expected for call return");
+
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+                                     InFlag);
+    Chain = Val.getValue(1);
+    InFlag = Val.getValue(2);
+
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::BCvt:
+      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+      break;
+    case CCValAssign::ZExt:
+    case CCValAssign::SExt:
+    case CCValAssign::AExt:
+      // Floating-point arguments only get extended/truncated if they're going
+      // in memory, so using the integer operation is acceptable here.
+      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+      break;
+    }
+
+    InVals.push_back(Val);
+  }
+
+  return Chain;
+}
+
+bool
+AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+                                    CallingConv::ID CalleeCC,
+                                    bool IsVarArg,
+                                    bool IsCalleeStructRet,
+                                    bool IsCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    SelectionDAG& DAG) const {
+
+  // For CallingConv::C this function knows whether the ABI needs
+  // changing. That's not true for other conventions so they will have to opt in
+  // manually.
+  if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+    return false;
+
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const Function *CallerF = MF.getFunction();
+  CallingConv::ID CallerCC = CallerF->getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
+  // Byval parameters hand the function a pointer directly into the stack area
+  // we want to reuse during a tail call. Working around this *is* possible (see
+  // X86) but less efficient and uglier in LowerCall.
+  for (Function::const_arg_iterator i = CallerF->arg_begin(),
+         e = CallerF->arg_end(); i != e; ++i)
+    if (i->hasByValAttr())
+      return false;
+
+  if (getTargetMachine().Options.GuaranteedTailCallOpt) {
+    if (IsTailCallConvention(CalleeCC) && CCMatch)
+      return true;
+    return false;
+  }
+
+  // Now we search for cases where we can use a tail call without changing the
+  // ABI. Sibcall is used in some places (particularly gcc) to refer to this
+  // concept.
+
+  // I want anyone implementing a new calling convention to think long and hard
+  // about this assert.
+  assert((!IsVarArg || CalleeCC == CallingConv::C)
+         && "Unexpected variadic calling convention");
+
+  if (IsVarArg && !Outs.empty()) {
+    // At least two cases here: if caller is fastcc then we can't have any
+    // memory arguments (we'd be expected to clean up the stack afterwards). If
+    // caller is C then we could potentially use its argument area.
+
+    // FIXME: for now we take the most conservative of these in both cases:
+    // disallow all variadic memory operands.
+    SmallVector<CCValAssign, 16> ArgLocs;
+    CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+                   getTargetMachine(), ArgLocs, *DAG.getContext());
+
+    CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+      if (!ArgLocs[i].isRegLoc())
+        return false;
+  }
+
+  // If the calling conventions do not match, then we'd better make sure the
+  // results are returned in the same way as what the caller expects.
+  if (!CCMatch) {
+    SmallVector<CCValAssign, 16> RVLocs1;
+    CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+                    getTargetMachine(), RVLocs1, *DAG.getContext());
+    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
+
+    SmallVector<CCValAssign, 16> RVLocs2;
+    CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+                    getTargetMachine(), RVLocs2, *DAG.getContext());
+    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
+
+    if (RVLocs1.size() != RVLocs2.size())
+      return false;
+    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+        return false;
+      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+        return false;
+      if (RVLocs1[i].isRegLoc()) {
+        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+          return false;
+      } else {
+        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+          return false;
+      }
+    }
+  }
+
+  // Nothing more to check if the callee is taking no arguments
+  if (Outs.empty())
+    return true;
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+
+  const AArch64MachineFunctionInfo *FuncInfo
+    = MF.getInfo<AArch64MachineFunctionInfo>();
+
+  // If the stack arguments for this call would fit into our own save area then
+  // the call can be made tail.
+  return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
+}
+
+bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
+                                                   bool TailCallOpt) const {
+  return CallCC == CallingConv::Fast && TailCallOpt;
+}
+
+bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
+  return CallCC == CallingConv::Fast;
+}
+
+SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
+                                                   SelectionDAG &DAG,
+                                                   MachineFrameInfo *MFI,
+                                                   int ClobberedFI) const {
+  SmallVector<SDValue, 8> ArgChains;
+  int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
+  int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
+
+  // Include the original chain at the beginning of the list. When this is
+  // used by target LowerCall hooks, this helps legalize find the
+  // CALLSEQ_BEGIN node.
+  ArgChains.push_back(Chain);
+
+  // Add a chain value for each stack argument corresponding
+  for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
+         UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
+    if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+        if (FI->getIndex() < 0) {
+          int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
+          int64_t InLastByte = InFirstByte;
+          InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
+
+          if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
+              (FirstByte <= InFirstByte && InFirstByte <= LastByte))
+            ArgChains.push_back(SDValue(L, 1));
+        }
+
+   // Build a tokenfactor for all the chains.
+   return DAG.getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+                      &ArgChains[0], ArgChains.size());
+}
+
+static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) {
+  switch (CC) {
+  case ISD::SETEQ:  return A64CC::EQ;
+  case ISD::SETGT:  return A64CC::GT;
+  case ISD::SETGE:  return A64CC::GE;
+  case ISD::SETLT:  return A64CC::LT;
+  case ISD::SETLE:  return A64CC::LE;
+  case ISD::SETNE:  return A64CC::NE;
+  case ISD::SETUGT: return A64CC::HI;
+  case ISD::SETUGE: return A64CC::HS;
+  case ISD::SETULT: return A64CC::LO;
+  case ISD::SETULE: return A64CC::LS;
+  default: llvm_unreachable("Unexpected condition code");
+  }
+}
+
+bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
+  // icmp is implemented using adds/subs immediate, which take an unsigned
+  // 12-bit immediate, optionally shifted left by 12 bits.
+
+  // Symmetric by using adds/subs
+  if (Val < 0)
+    Val = -Val;
+
+  return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0;
+}
+
+SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
+                                        ISD::CondCode CC, SDValue &A64cc,
+                                        SelectionDAG &DAG, DebugLoc &dl) const {
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+    int64_t C = 0;
+    EVT VT = RHSC->getValueType(0);
+    bool knownInvalid = false;
+
+    // I'm not convinced the rest of LLVM handles these edge cases properly, but
+    // we can at least get it right.
+    if (isSignedIntSetCC(CC)) {
+      C = RHSC->getSExtValue();
+    } else if (RHSC->getZExtValue() > INT64_MAX) {
+      // A 64-bit constant not representable by a signed 64-bit integer is far
+      // too big to fit into a SUBS immediate anyway.
+      knownInvalid = true;
+    } else {
+      C = RHSC->getZExtValue();
+    }
+
+    if (!knownInvalid && !isLegalICmpImmediate(C)) {
+      // Constant does not fit, try adjusting it by one?
+      switch (CC) {
+      default: break;
+      case ISD::SETLT:
+      case ISD::SETGE:
+        if (isLegalICmpImmediate(C-1)) {
+          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+          RHS = DAG.getConstant(C-1, VT);
+        }
+        break;
+      case ISD::SETULT:
+      case ISD::SETUGE:
+        if (isLegalICmpImmediate(C-1)) {
+          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+          RHS = DAG.getConstant(C-1, VT);
+        }
+        break;
+      case ISD::SETLE:
+      case ISD::SETGT:
+        if (isLegalICmpImmediate(C+1)) {
+          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+          RHS = DAG.getConstant(C+1, VT);
+        }
+        break;
+      case ISD::SETULE:
+      case ISD::SETUGT:
+        if (isLegalICmpImmediate(C+1)) {
+          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+          RHS = DAG.getConstant(C+1, VT);
+        }
+        break;
+      }
+    }
+  }
+
+  A64CC::CondCodes CondCode = IntCCToA64CC(CC);
+  A64cc = DAG.getConstant(CondCode, MVT::i32);
+  return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+                     DAG.getCondCode(CC));
+}
+
+static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
+                                    A64CC::CondCodes &Alternative) {
+  A64CC::CondCodes CondCode = A64CC::Invalid;
+  Alternative = A64CC::Invalid;
+
+  switch (CC) {
+  default: llvm_unreachable("Unknown FP condition!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ: CondCode = A64CC::EQ; break;
+  case ISD::SETGT:
+  case ISD::SETOGT: CondCode = A64CC::GT; break;
+  case ISD::SETGE:
+  case ISD::SETOGE: CondCode = A64CC::GE; break;
+  case ISD::SETOLT: CondCode = A64CC::MI; break;
+  case ISD::SETOLE: CondCode = A64CC::LS; break;
+  case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
+  case ISD::SETO:   CondCode = A64CC::VC; break;
+  case ISD::SETUO:  CondCode = A64CC::VS; break;
+  case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
+  case ISD::SETUGT: CondCode = A64CC::HI; break;
+  case ISD::SETUGE: CondCode = A64CC::PL; break;
+  case ISD::SETLT:
+  case ISD::SETULT: CondCode = A64CC::LT; break;
+  case ISD::SETLE:
+  case ISD::SETULE: CondCode = A64CC::LE; break;
+  case ISD::SETNE:
+  case ISD::SETUNE: CondCode = A64CC::NE; break;
+  }
+  return CondCode;
+}
+
+SDValue
+AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT PtrVT = getPointerTy();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+  assert(getTargetMachine().getCodeModel() == CodeModel::Small
+         && "Only small code model supported at the moment");
+
+  // The most efficient code is PC-relative anyway for the small memory model,
+  // so we don't need to worry about relocation model.
+  return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                     DAG.getTargetBlockAddress(BA, PtrVT, 0,
+                                               AArch64II::MO_NO_FLAG),
+                     DAG.getTargetBlockAddress(BA, PtrVT, 0,
+                                               AArch64II::MO_LO12),
+                     DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+}
+
+
+// (BRCOND chain, val, dest)
+SDValue
+AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Chain = Op.getOperand(0);
+  SDValue TheBit = Op.getOperand(1);
+  SDValue DestBB = Op.getOperand(2);
+
+  // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
+  // that as the consumer we are responsible for ignoring rubbish in higher
+  // bits.
+  TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
+                       DAG.getConstant(1, MVT::i32));
+
+  SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
+                               DAG.getConstant(0, TheBit.getValueType()),
+                               DAG.getCondCode(ISD::SETNE));
+
+  return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
+                     A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
+                     DestBB);
+}
+
+// (BR_CC chain, condcode, lhs, rhs, dest)
+SDValue
+AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue DestBB = Op.getOperand(4);
+
+  if (LHS.getValueType() == MVT::f128) {
+    // f128 comparisons are lowered to runtime calls by a routine which sets
+    // LHS, RHS and CC appropriately for the rest of this function to continue.
+    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+    // If softenSetCCOperands returned a scalar, we need to compare the result
+    // against zero to select between true and false values.
+    if (RHS.getNode() == 0) {
+      RHS = DAG.getConstant(0, LHS.getValueType());
+      CC = ISD::SETNE;
+    }
+  }
+
+  if (LHS.getValueType().isInteger()) {
+    SDValue A64cc;
+
+    // Integers are handled in a separate function because the combinations of
+    // immediates and tests can get hairy and we may want to fiddle things.
+    SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+    return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+                       Chain, CmpOp, A64cc, DestBB);
+  }
+
+  // Note that some LLVM floating-point CondCodes can't be lowered to a single
+  // conditional branch, hence FPCCToA64CC can set a second test, where either
+  // passing is sufficient.
+  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+  CondCode = FPCCToA64CC(CC, Alternative);
+  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+                              DAG.getCondCode(CC));
+  SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+                                 Chain, SetCC, A64cc, DestBB);
+
+  if (Alternative != A64CC::Invalid) {
+    A64cc = DAG.getConstant(Alternative, MVT::i32);
+    A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+                           A64BR_CC, SetCC, A64cc, DestBB);
+
+  }
+
+  return A64BR_CC;
+}
+
+SDValue
+AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
+                                       RTLIB::Libcall Call) const {
+  ArgListTy Args;
+  ArgListEntry Entry;
+  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Op.getOperand(i).getValueType();
+    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
+
+  Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
+
+  // By default, the input chain to this libcall is the entry node of the
+  // function. If the libcall is going to be emitted as a tail call then
+  // isUsedByReturnOnly will change it to the right chain if the return
+  // node which is being folded has a non-entry input chain.
+  SDValue InChain = DAG.getEntryNode();
+
+  // isTailCall may be true since the callee does not reference caller stack
+  // frame. Check if it's in the right position.
+  SDValue TCChain = InChain;
+  bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
+  if (isTailCall)
+    InChain = TCChain;
+
+  TargetLowering::
+  CallLoweringInfo CLI(InChain, RetTy, false, false, false, false,
+                    0, getLibcallCallingConv(Call), isTailCall,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, Op->getDebugLoc());
+  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+  if (!CallInfo.second.getNode())
+    // It's a tailcall, return the chain (which is the DAG root).
+    return DAG.getRoot();
+
+  return CallInfo.first;
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+  if (Op.getOperand(0).getValueType() != MVT::f128) {
+    // It's legal except when f128 is involved
+    return Op;
+  }
+
+  RTLIB::Libcall LC;
+  LC  = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+
+  SDValue SrcVal = Op.getOperand(0);
+  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+                     /*isSigned*/ false, Op.getDebugLoc());
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
+
+  RTLIB::Libcall LC;
+  LC  = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+  return LowerF128ToCall(Op, DAG, LC);
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+                                      bool IsSigned) const {
+  if (Op.getOperand(0).getValueType() != MVT::f128) {
+    // It's legal except when f128 is involved
+    return Op;
+  }
+
+  RTLIB::Libcall LC;
+  if (IsSigned)
+    LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
+  else
+    LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+  return LowerF128ToCall(Op, DAG, LC);
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
+  // we make that distinction here.
+
+  // We support the static, small memory model for now.
+  assert(getTargetMachine().getCodeModel() == CodeModel::Small);
+
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GN->getGlobal();
+  unsigned Alignment = GV->getAlignment();
+
+  if (Alignment == 0) {
+    const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
+    if (GVPtrTy->getElementType()->isSized())
+      Alignment = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType());
+    else {
+      // Be conservative if we can't guess, not that it really matters:
+      // functions and labels aren't valid for loads, and the methods used to
+      // actually calculate an address work with any alignment.
+      Alignment = 1;
+    }
+  }
+
+  unsigned char HiFixup, LoFixup;
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM);
+
+  if (UseGOT) {
+    HiFixup = AArch64II::MO_GOT;
+    LoFixup = AArch64II::MO_GOT_LO12;
+    Alignment = 8;
+  } else {
+    HiFixup = AArch64II::MO_NO_FLAG;
+    LoFixup = AArch64II::MO_LO12;
+  }
+
+  // AArch64's small model demands the following sequence:
+  // ADRP x0, somewhere
+  // ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
+  SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                                             HiFixup),
+                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                                             LoFixup),
+                                  DAG.getConstant(Alignment, MVT::i32));
+
+  if (UseGOT) {
+    GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
+                            GlobalRef);
+  }
+
+  if (GN->getOffset() != 0)
+    return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
+                       DAG.getConstant(GN->getOffset(), PtrVT));
+
+  return GlobalRef;
+}
+
+SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
+                                                SDValue DescAddr,
+                                                DebugLoc DL,
+                                                SelectionDAG &DAG) const {
+  EVT PtrVT = getPointerTy();
+
+  // The function we need to call is simply the first entry in the GOT for this
+  // descriptor, load it in preparation.
+  SDValue Func, Chain;
+  Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
+                     DescAddr);
+
+  // The function takes only one argument: the address of the descriptor itself
+  // in X0.
+  SDValue Glue;
+  Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
+  Glue = Chain.getValue(1);
+
+  // Finally, there's a special calling-convention which means that the lookup
+  // must preserve all registers (except X0, obviously).
+  const TargetRegisterInfo *TRI  = getTargetMachine().getRegisterInfo();
+  const AArch64RegisterInfo *A64RI
+    = static_cast<const AArch64RegisterInfo *>(TRI);
+  const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
+
+  // We're now ready to populate the argument list, as with a normal call:
+  std::vector<SDValue> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Func);
+  Ops.push_back(SymAddr);
+  Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
+  Ops.push_back(DAG.getRegisterMask(Mask));
+  Ops.push_back(Glue);
+
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0], Ops.size());
+  Glue = Chain.getValue(1);
+
+  // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
+  // back to the generic handling code.
+  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  assert(Subtarget->isTargetELF() &&
+         "TLS not implemented for non-ELF targets");
+  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+
+  TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+  SDValue TPOff;
+  EVT PtrVT = getPointerTy();
+  DebugLoc DL = Op.getDebugLoc();
+  const GlobalValue *GV = GA->getGlobal();
+
+  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
+
+  if (Model == TLSModel::InitialExec) {
+    TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+                                                   AArch64II::MO_GOTTPREL),
+                        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+                                                   AArch64II::MO_GOTTPREL_LO12),
+                        DAG.getConstant(8, MVT::i32));
+    TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
+                        TPOff);
+  } else if (Model == TLSModel::LocalExec) {
+    SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+                                               AArch64II::MO_TPREL_G1);
+    SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+                                               AArch64II::MO_TPREL_G0_NC);
+
+    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
+                                       DAG.getTargetConstant(0, MVT::i32)), 0);
+    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, TPOff, LoVar,
+                                       DAG.getTargetConstant(0, MVT::i32)), 0);
+  } else if (Model == TLSModel::GeneralDynamic) {
+    // Accesses used in this sequence go via the TLS descriptor which lives in
+    // the GOT. Prepare an address we can use to handle this.
+    SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+                                                AArch64II::MO_TLSDESC);
+    SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+                                                AArch64II::MO_TLSDESC_LO12);
+    SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                                   HiDesc, LoDesc, DAG.getConstant(8, MVT::i32));
+    SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
+
+    TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
+  } else if (Model == TLSModel::LocalDynamic) {
+    // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
+    // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
+    // the beginning of the module's TLS region, followed by a DTPREL offset
+    // calculation.
+
+    // These accesses will need deduplicating if there's more than one.
+    AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction()
+      .getInfo<AArch64MachineFunctionInfo>();
+    MFI->incNumLocalDynamicTLSAccesses();
+
+
+    // Get the location of _TLS_MODULE_BASE_:
+    SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
+                                                AArch64II::MO_TLSDESC);
+    SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
+                                                AArch64II::MO_TLSDESC_LO12);
+    SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                                   HiDesc, LoDesc, DAG.getConstant(8, MVT::i32));
+    SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
+
+    ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
+
+    // Get the variable's offset from _TLS_MODULE_BASE_
+    SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+                                               AArch64II::MO_DTPREL_G1);
+    SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+                                               AArch64II::MO_DTPREL_G0_NC);
+
+    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
+                                       DAG.getTargetConstant(0, MVT::i32)), 0);
+    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, TPOff, LoVar,
+                                       DAG.getTargetConstant(0, MVT::i32)), 0);
+  } else
+      llvm_unreachable("Unsupported TLS access model");
+
+
+  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
+}
+
+SDValue
+AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+                                      bool IsSigned) const {
+  if (Op.getValueType() != MVT::f128) {
+    // Legal for everything except f128.
+    return Op;
+  }
+
+  RTLIB::Libcall LC;
+  if (IsSigned)
+    LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+  else
+    LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+
+  return LowerF128ToCall(Op, DAG, LC);
+}
+
+
+SDValue
+AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  DebugLoc dl = JT->getDebugLoc();
+
+  // When compiling PIC, jump tables get put in the code section so a static
+  // relocation-style is acceptable for both cases.
+  return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(),
+                     DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()),
+                     DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+                                            AArch64II::MO_LO12),
+                     DAG.getConstant(1, MVT::i32));
+}
+
+// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
+SDValue
+AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue IfTrue = Op.getOperand(2);
+  SDValue IfFalse = Op.getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+  if (LHS.getValueType() == MVT::f128) {
+    // f128 comparisons are lowered to libcalls, but slot in nicely here
+    // afterwards.
+    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+    // If softenSetCCOperands returned a scalar, we need to compare the result
+    // against zero to select between true and false values.
+    if (RHS.getNode() == 0) {
+      RHS = DAG.getConstant(0, LHS.getValueType());
+      CC = ISD::SETNE;
+    }
+  }
+
+  if (LHS.getValueType().isInteger()) {
+    SDValue A64cc;
+
+    // Integers are handled in a separate function because the combinations of
+    // immediates and tests can get hairy and we may want to fiddle things.
+    SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+    return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+                       CmpOp, IfTrue, IfFalse, A64cc);
+  }
+
+  // Note that some LLVM floating-point CondCodes can't be lowered to a single
+  // conditional branch, hence FPCCToA64CC can set a second test, where either
+  // passing is sufficient.
+  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+  CondCode = FPCCToA64CC(CC, Alternative);
+  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+                              DAG.getCondCode(CC));
+  SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+                                     SetCC, IfTrue, IfFalse, A64cc);
+
+  if (Alternative != A64CC::Invalid) {
+    A64cc = DAG.getConstant(Alternative, MVT::i32);
+    A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+                               SetCC, IfTrue, A64SELECT_CC, A64cc);
+
+  }
+
+  return A64SELECT_CC;
+}
+
+// (SELECT testbit, iftrue, iffalse)
+SDValue
+AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue TheBit = Op.getOperand(0);
+  SDValue IfTrue = Op.getOperand(1);
+  SDValue IfFalse = Op.getOperand(2);
+
+  // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
+  // that as the consumer we are responsible for ignoring rubbish in higher
+  // bits.
+  TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
+                       DAG.getConstant(1, MVT::i32));
+  SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
+                               DAG.getConstant(0, TheBit.getValueType()),
+                               DAG.getCondCode(ISD::SETNE));
+
+  return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+                     A64CMP, IfTrue, IfFalse,
+                     DAG.getConstant(A64CC::NE, MVT::i32));
+}
+
+// (SETCC lhs, rhs, condcode)
+SDValue
+AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  EVT VT = Op.getValueType();
+
+  if (LHS.getValueType() == MVT::f128) {
+    // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
+    // for the rest of the function (some i32 or i64 values).
+    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+    // If softenSetCCOperands returned a scalar, use it.
+    if (RHS.getNode() == 0) {
+      assert(LHS.getValueType() == Op.getValueType() &&
+             "Unexpected setcc expansion!");
+      return LHS;
+    }
+  }
+
+  if (LHS.getValueType().isInteger()) {
+    SDValue A64cc;
+
+    // Integers are handled in a separate function because the combinations of
+    // immediates and tests can get hairy and we may want to fiddle things.
+    SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+    return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
+                       CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       A64cc);
+  }
+
+  // Note that some LLVM floating-point CondCodes can't be lowered to a single
+  // conditional branch, hence FPCCToA64CC can set a second test, where either
+  // passing is sufficient.
+  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+  CondCode = FPCCToA64CC(CC, Alternative);
+  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+                              DAG.getCondCode(CC));
+  SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
+                                     CmpOp, DAG.getConstant(1, VT),
+                                     DAG.getConstant(0, VT), A64cc);
+
+  if (Alternative != A64CC::Invalid) {
+    A64cc = DAG.getConstant(Alternative, MVT::i32);
+    A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
+                               DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
+  }
+
+  return A64SELECT_CC;
+}
+
+SDValue
+AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+
+  // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
+  // rather than just 8.
+  return DAG.getMemcpy(Op.getOperand(0), Op.getDebugLoc(),
+                       Op.getOperand(1), Op.getOperand(2),
+                       DAG.getConstant(32, MVT::i32), 8, false, false,
+                       MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue
+AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  // The layout of the va_list struct is specified in the AArch64 Procedure Call
+  // Standard, section B.3.
+  MachineFunction &MF = DAG.getMachineFunction();
+  AArch64MachineFunctionInfo *FuncInfo = MF.getInfo<AArch64MachineFunctionInfo>();
+  DebugLoc DL = Op.getDebugLoc();
+
+  SDValue Chain = Op.getOperand(0);
+  SDValue VAList = Op.getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  SmallVector<SDValue, 4> MemOps;
+
+  // void *__stack at offset 0
+  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
+                                    getPointerTy());
+  MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
+                                MachinePointerInfo(SV), false, false, 0));
+
+  // void *__gr_top at offset 8
+  int GPRSize = FuncInfo->getVariadicGPRSize();
+  if (GPRSize > 0) {
+    SDValue GRTop, GRTopAddr;
+
+    GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+                            DAG.getConstant(8, getPointerTy()));
+
+    GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
+    GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
+                        DAG.getConstant(GPRSize, getPointerTy()));
+
+    MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
+                                  MachinePointerInfo(SV, 8),
+                                  false, false, 0));
+  }
+
+  // void *__vr_top at offset 16
+  int FPRSize = FuncInfo->getVariadicFPRSize();
+  if (FPRSize > 0) {
+    SDValue VRTop, VRTopAddr;
+    VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+                            DAG.getConstant(16, getPointerTy()));
+
+    VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
+    VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
+                        DAG.getConstant(FPRSize, getPointerTy()));
+
+    MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
+                                  MachinePointerInfo(SV, 16),
+                                  false, false, 0));
+  }
+
+  // int __gr_offs at offset 24
+  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+                                   DAG.getConstant(24, getPointerTy()));
+  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
+                                GROffsAddr, MachinePointerInfo(SV, 24),
+                                false, false, 0));
+
+  // int __vr_offs at offset 28
+  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+                                   DAG.getConstant(28, getPointerTy()));
+  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
+                                VROffsAddr, MachinePointerInfo(SV, 28),
+                                false, false, 0));
+
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
+                     MemOps.size());
+}
+
+SDValue
+AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Don't know how to custom lower this!");
+  case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
+  case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
+  case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
+  case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
+  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
+  case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
+  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
+  case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
+  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+
+  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+  case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
+  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+  case ISD::SELECT: return LowerSELECT(Op, DAG);
+  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+  case ISD::SETCC: return LowerSETCC(Op, DAG);
+  case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+  case ISD::VASTART: return LowerVASTART(Op, DAG);
+  }
+
+  return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc DL = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  // We're looking for an SRA/SHL pair which form an SBFX.
+
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  if (!isa<ConstantSDNode>(N->getOperand(1)))
+    return SDValue();
+
+  uint64_t TruncMask = N->getConstantOperandVal(1);
+  if (!isMask_64(TruncMask))
+    return SDValue();
+
+  uint64_t Width = CountPopulation_64(TruncMask);
+  SDValue Shift = N->getOperand(0);
+
+  if (Shift.getOpcode() != ISD::SRL)
+    return SDValue();
+
+  if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+    return SDValue();
+  uint64_t LSB = Shift->getConstantOperandVal(1);
+
+  if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
+    return SDValue();
+
+  return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
+                     DAG.getConstant(LSB, MVT::i64),
+                     DAG.getConstant(LSB + Width - 1, MVT::i64));
+}
+
+static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
+                                          TargetLowering::DAGCombinerInfo &DCI) {
+  // An atomic operation followed by an acquiring atomic fence can be reduced to
+  // an acquiring load. The atomic operation provides a convenient pointer to
+  // load from. If the original operation was a load anyway we can actually
+  // combine the two operations into an acquiring load.
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue AtomicOp = FenceNode->getOperand(0);
+  AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp);
+
+  // A fence on its own can't be optimised
+  if (!AtomicNode)
+    return SDValue();
+
+  uint64_t FenceOrder = FenceNode->getConstantOperandVal(1);
+  uint64_t FenceScope = FenceNode->getConstantOperandVal(2);
+
+  if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
+    return SDValue();
+
+  // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
+  // the chain we use should be its input, otherwise we'll put our store after
+  // it so we use its output chain.
+  SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
+    AtomicNode->getChain() : AtomicOp;
+
+  // We have an acquire fence with a handy atomic operation nearby, we can
+  // convert the fence into a load-acquire, discarding the result.
+  DebugLoc DL = FenceNode->getDebugLoc();
+  SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
+                             AtomicNode->getValueType(0),
+                             Chain,                  // Chain
+                             AtomicOp.getOperand(1), // Pointer
+                             AtomicNode->getMemOperand(), Acquire,
+                             static_cast<SynchronizationScope>(FenceScope));
+
+  if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
+    DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
+
+  return Op.getValue(1);
+}
+
+static SDValue PerformATOMIC_STORECombine(SDNode *N,
+                                          TargetLowering::DAGCombinerInfo &DCI) {
+  // A releasing atomic fence followed by an atomic store can be combined into a
+  // single store operation.
+  SelectionDAG &DAG = DCI.DAG;
+  AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N);
+  SDValue FenceOp = AtomicNode->getOperand(0);
+
+  if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
+    return SDValue();
+
+  uint64_t FenceOrder
+    = cast<ConstantSDNode>(FenceOp.getOperand(1))->getZExtValue();
+  uint64_t FenceScope
+    = cast<ConstantSDNode>(FenceOp.getOperand(2))->getZExtValue();
+
+  if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
+    return SDValue();
+
+  DebugLoc DL = AtomicNode->getDebugLoc();
+  return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
+                       FenceOp.getOperand(0),  // Chain
+                       AtomicNode->getOperand(1),       // Pointer
+                       AtomicNode->getOperand(2),       // Value
+                       AtomicNode->getMemOperand(), Release,
+                       static_cast<SynchronizationScope>(FenceScope));
+}
+
+/// For a true bitfield insert, the bits getting into that contiguous mask
+/// should come from the low part of an existing value: they must be formed from
+/// a compatible SHL operation (unless they're already low). This function
+/// checks that condition and returns the least-significant bit that's
+/// intended. If the operation not a field preparation, -1 is returned.
+static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT,
+                            SDValue &MaskedVal, uint64_t Mask) {
+  if (!isShiftedMask_64(Mask))
+    return -1;
+
+  // Now we need to alter MaskedVal so that it is an appropriate input for a BFI
+  // instruction. BFI will do a left-shift by LSB before applying the mask we've
+  // spotted, so in general we should pre-emptively "undo" that by making sure
+  // the incoming bits have had a right-shift applied to them.
+  //
+  // This right shift, however, will combine with existing left/right shifts. In
+  // the simplest case of a completely straight bitfield operation, it will be
+  // expected to completely cancel out with an existing SHL. More complicated
+  // cases (e.g. bitfield to bitfield copy) may still need a real shift before
+  // the BFI.
+
+  uint64_t LSB = CountTrailingZeros_64(Mask);
+  int64_t ShiftRightRequired = LSB;
+  if (MaskedVal.getOpcode() == ISD::SHL &&
+      isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
+    ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
+    MaskedVal = MaskedVal.getOperand(0);
+  } else if (MaskedVal.getOpcode() == ISD::SRL &&
+             isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
+    ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
+    MaskedVal = MaskedVal.getOperand(0);
+  }
+
+  if (ShiftRightRequired > 0)
+    MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
+                            DAG.getConstant(ShiftRightRequired, MVT::i64));
+  else if (ShiftRightRequired < 0) {
+    // We could actually end up with a residual left shift, for example with
+    // "struc.bitfield = val << 1".
+    MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
+                            DAG.getConstant(-ShiftRightRequired, MVT::i64));
+  }
+
+  return LSB;
+}
+
+/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
+/// a mask and an extension. Returns true if a BFI was found and provides
+/// information on its surroundings.
+static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
+                          bool &Extended) {
+  Extended = false;
+  if (N.getOpcode() == ISD::ZERO_EXTEND) {
+    Extended = true;
+    N = N.getOperand(0);
+  }
+
+  if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
+    Mask = N->getConstantOperandVal(1);
+    N = N.getOperand(0);
+  } else {
+    // Mask is the whole width.
+    Mask = (1ULL << N.getValueType().getSizeInBits()) - 1;
+  }
+
+  if (N.getOpcode() == AArch64ISD::BFI) {
+    BFI = N;
+    return true;
+  }
+
+  return false;
+}
+
+/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
+/// is roughly equivalent to (and (BFI ...), mask). This form is used because it
+/// can often be further combined with a larger mask. Ultimately, we want mask
+/// to be 2^32-1 or 2^64-1 so the AND can be skipped.
+static SDValue tryCombineToBFI(SDNode *N,
+                               TargetLowering::DAGCombinerInfo &DCI,
+                               const AArch64Subtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc DL = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  assert(N->getOpcode() == ISD::OR && "Unexpected root");
+
+  // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
+  // abandon the effort.
+  SDValue LHS = N->getOperand(0);
+  if (LHS.getOpcode() != ISD::AND)
+    return SDValue();
+
+  uint64_t LHSMask;
+  if (isa<ConstantSDNode>(LHS.getOperand(1)))
+    LHSMask = LHS->getConstantOperandVal(1);
+  else
+    return SDValue();
+
+  // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
+  // is or abandon the effort.
+  SDValue RHS = N->getOperand(1);
+  if (RHS.getOpcode() != ISD::AND)
+    return SDValue();
+
+  uint64_t RHSMask;
+  if (isa<ConstantSDNode>(RHS.getOperand(1)))
+    RHSMask = RHS->getConstantOperandVal(1);
+  else
+    return SDValue();
+
+  // Can't do anything if the masks are incompatible.
+  if (LHSMask & RHSMask)
+    return SDValue();
+
+  // Now we need one of the masks to be a contiguous field. Without loss of
+  // generality that should be the RHS one.
+  SDValue Bitfield = LHS.getOperand(0);
+  if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
+    // We know that LHS is a candidate new value, and RHS isn't already a better
+    // one.
+    std::swap(LHS, RHS);
+    std::swap(LHSMask, RHSMask);
+  }
+
+  // We've done our best to put the right operands in the right places, all we
+  // can do now is check whether a BFI exists.
+  Bitfield = RHS.getOperand(0);
+  int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
+  if (LSB == -1)
+    return SDValue();
+
+  uint32_t Width = CountPopulation_64(RHSMask);
+  assert(Width && "Expected non-zero bitfield width");
+
+  SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
+                            LHS.getOperand(0), Bitfield,
+                            DAG.getConstant(LSB, MVT::i64),
+                            DAG.getConstant(Width, MVT::i64));
+
+  // Mask is trivial
+  if ((LHSMask | RHSMask) == (1ULL << VT.getSizeInBits()) - 1)
+    return BFI;
+
+  return DAG.getNode(ISD::AND, DL, VT, BFI,
+                     DAG.getConstant(LHSMask | RHSMask, VT));
+}
+
+/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
+/// original input. This is surprisingly common because SROA splits things up
+/// into i8 chunks, so the originally detected MaskedBFI may actually only act
+/// on the low (say) byte of a word. This is then orred into the rest of the
+/// word afterwards.
+///
+/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
+///
+/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
+/// MaskedBFI. We can also deal with a certain amount of extend/truncate being
+/// involved.
+static SDValue tryCombineToLargerBFI(SDNode *N,
+                                     TargetLowering::DAGCombinerInfo &DCI,
+                                     const AArch64Subtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc DL = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  // First job is to hunt for a MaskedBFI on either the left or right. Swap
+  // operands if it's actually on the right.
+  SDValue BFI;
+  SDValue PossExtraMask;
+  uint64_t ExistingMask = 0;
+  bool Extended = false;
+  if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
+    PossExtraMask = N->getOperand(1);
+  else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
+    PossExtraMask = N->getOperand(0);
+  else
+    return SDValue();
+
+  // We can only combine a BFI with another compatible mask.
+  if (PossExtraMask.getOpcode() != ISD::AND ||
+      !isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
+    return SDValue();
+
+  uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
+
+  // Masks must be compatible.
+  if (ExtraMask & ExistingMask)
+    return SDValue();
+
+  SDValue OldBFIVal = BFI.getOperand(0);
+  SDValue NewBFIVal = BFI.getOperand(1);
+  if (Extended) {
+    // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
+    // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
+    // need to be made compatible.
+    assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
+           && "Invalid types for BFI");
+    OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
+    NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
+  }
+
+  // We need the MaskedBFI to be combined with a mask of the *same* value.
+  if (PossExtraMask.getOperand(0) != OldBFIVal)
+    return SDValue();
+
+  BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
+                    OldBFIVal, NewBFIVal,
+                    BFI.getOperand(2), BFI.getOperand(3));
+
+  // If the masking is trivial, we don't need to create it.
+  if ((ExtraMask | ExistingMask) == (1ULL << VT.getSizeInBits()) - 1)
+    return BFI;
+
+  return DAG.getNode(ISD::AND, DL, VT, BFI,
+                     DAG.getConstant(ExtraMask | ExistingMask, VT));
+}
+
+/// An EXTR instruction is made up of two shifts, ORed together. This helper
+/// searches for and classifies those shifts.
+static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
+                         bool &FromHi) {
+  if (N.getOpcode() == ISD::SHL)
+    FromHi = false;
+  else if (N.getOpcode() == ISD::SRL)
+    FromHi = true;
+  else
+    return false;
+
+  if (!isa<ConstantSDNode>(N.getOperand(1)))
+    return false;
+
+  ShiftAmount = N->getConstantOperandVal(1);
+  Src = N->getOperand(0);
+  return true;
+}
+
+/// EXTR instruciton extracts a contiguous chunk of bits from two existing
+/// registers viewed as a high/low pair. This function looks for the pattern:
+/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
+/// EXTR. Can't quite be done in TableGen because the two immediates aren't
+/// independent.
+static SDValue tryCombineToEXTR(SDNode *N,
+                                TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc DL = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  assert(N->getOpcode() == ISD::OR && "Unexpected root");
+
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  SDValue LHS;
+  uint32_t ShiftLHS = 0;
+  bool LHSFromHi = 0;
+  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
+    return SDValue();
+
+  SDValue RHS;
+  uint32_t ShiftRHS = 0;
+  bool RHSFromHi = 0;
+  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
+    return SDValue();
+
+  // If they're both trying to come from the high part of the register, they're
+  // not really an EXTR.
+  if (LHSFromHi == RHSFromHi)
+    return SDValue();
+
+  if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
+    return SDValue();
+
+  if (LHSFromHi) {
+    std::swap(LHS, RHS);
+    std::swap(ShiftLHS, ShiftRHS);
+  }
+
+  return DAG.getNode(AArch64ISD::EXTR, DL, VT,
+                     LHS, RHS,
+                     DAG.getConstant(ShiftRHS, MVT::i64));
+}
+
+/// Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+                                TargetLowering::DAGCombinerInfo &DCI,
+                                const AArch64Subtarget *Subtarget) {
+
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+
+  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  // Attempt to recognise bitfield-insert operations.
+  SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
+  if (Res.getNode())
+    return Res;
+
+  // Attempt to combine an existing MaskedBFI operation into one with a larger
+  // mask.
+  Res = tryCombineToLargerBFI(N, DCI, Subtarget);
+  if (Res.getNode())
+    return Res;
+
+  Res = tryCombineToEXTR(N, DCI);
+  if (Res.getNode())
+    return Res;
+
+  return SDValue();
+}
+
+/// Target-specific dag combine xforms for ISD::SRA
+static SDValue PerformSRACombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc DL = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  // We're looking for an SRA/SHL pair which form an SBFX.
+
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  if (!isa<ConstantSDNode>(N->getOperand(1)))
+    return SDValue();
+
+  uint64_t ExtraSignBits = N->getConstantOperandVal(1);
+  SDValue Shift = N->getOperand(0);
+
+  if (Shift.getOpcode() != ISD::SHL)
+    return SDValue();
+
+  if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+    return SDValue();
+
+  uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
+  uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
+  uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
+
+  if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
+    return SDValue();
+
+  return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
+                     DAG.getConstant(LSB, MVT::i64),
+                     DAG.getConstant(LSB + Width - 1, MVT::i64));
+}
+
+
+SDValue
+AArch64TargetLowering::PerformDAGCombine(SDNode *N,
+                                         DAGCombinerInfo &DCI) const {
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::AND: return PerformANDCombine(N, DCI);
+  case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
+  case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
+  case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+  case ISD::SRA: return PerformSRACombine(N, DCI);
+  }
+  return SDValue();
+}
+
+AArch64TargetLowering::ConstraintType
+AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 'w': // An FP/SIMD vector register
+      return C_RegisterClass;
+    case 'I': // Constant that can be used with an ADD instruction
+    case 'J': // Constant that can be used with a SUB instruction
+    case 'K': // Constant that can be used with a 32-bit logical instruction
+    case 'L': // Constant that can be used with a 64-bit logical instruction
+    case 'M': // Constant that can be used as a 32-bit MOV immediate
+    case 'N': // Constant that can be used as a 64-bit MOV immediate
+    case 'Y': // Floating point constant zero
+    case 'Z': // Integer constant zero
+      return C_Other;
+    case 'Q': // A memory reference with base register and no offset
+      return C_Memory;
+    case 'S': // A symbolic address
+      return C_Other;
+    }
+  }
+
+  // FIXME: Ump, Utf, Usa, Ush
+  // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, whatever they may be
+  // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
+  // Usa: An absolute symbolic address
+  // Ush: The high part (bits 32:12) of a pc-relative symbolic address
+  assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
+         && Constraint != "Ush" && "Unimplemented constraints");
+
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+TargetLowering::ConstraintWeight
+AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+                                                const char *Constraint) const {
+
+  llvm_unreachable("Constraint weight unimplemented");
+}
+
+void
+AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                    std::string &Constraint,
+                                                    std::vector<SDValue> &Ops,
+                                                    SelectionDAG &DAG) const {
+  SDValue Result(0, 0);
+
+  // Only length 1 constraints are C_Other.
+  if (Constraint.size() != 1) return;
+
+  // Only C_Other constraints get lowered like this. That means constants for us
+  // so return early if there's no hope the constraint can be lowered.
+
+  switch(Constraint[0]) {
+  default: break;
+  case 'I': case 'J': case 'K': case 'L':
+  case 'M': case 'N': case 'Z': {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+    if (!C)
+      return;
+
+    uint64_t CVal = C->getZExtValue();
+    uint32_t Bits;
+
+    switch (Constraint[0]) {
+    default:
+      // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
+      // is a peculiarly useless SUB constraint.
+      llvm_unreachable("Unimplemented C_Other constraint");
+    case 'I':
+      if (CVal <= 0xfff)
+        break;
+      return;
+    case 'K':
+      if (A64Imms::isLogicalImm(32, CVal, Bits))
+        break;
+      return;
+    case 'L':
+      if (A64Imms::isLogicalImm(64, CVal, Bits))
+        break;
+      return;
+    case 'Z':
+      if (CVal == 0)
+        break;
+      return;
+    }
+
+    Result = DAG.getTargetConstant(CVal, Op.getValueType());
+    break;
+  }
+  case 'S': {
+    // An absolute symbolic address or label reference.
+    if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
+      Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(),
+                                          GA->getValueType(0));
+    } else if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+      Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
+                                         BA->getValueType(0));
+    } else if (const ExternalSymbolSDNode *ES
+                 = dyn_cast<ExternalSymbolSDNode>(Op)) {
+      Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
+                                           ES->getValueType(0));
+    } else
+      return;
+    break;
+  }
+  case 'Y':
+    if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+      if (CFP->isExactlyValue(0.0)) {
+        Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
+        break;
+      }
+    }
+    return;
+  }
+
+  if (Result.getNode()) {
+    Ops.push_back(Result);
+    return;
+  }
+
+  // It's an unknown constraint for us. Let generic code have a go.
+  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+AArch64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                    EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      if (VT.getSizeInBits() <= 32)
+        return std::make_pair(0U, &AArch64::GPR32RegClass);
+      else if (VT == MVT::i64)
+        return std::make_pair(0U, &AArch64::GPR64RegClass);
+      break;
+    case 'w':
+      if (VT == MVT::f16)
+        return std::make_pair(0U, &AArch64::FPR16RegClass);
+      else if (VT == MVT::f32)
+        return std::make_pair(0U, &AArch64::FPR32RegClass);
+      else if (VT == MVT::f64)
+        return std::make_pair(0U, &AArch64::FPR64RegClass);
+      else if (VT.getSizeInBits() == 64)
+        return std::make_pair(0U, &AArch64::VPR64RegClass);
+      else if (VT == MVT::f128)
+        return std::make_pair(0U, &AArch64::FPR128RegClass);
+      else if (VT.getSizeInBits() == 128)
+        return std::make_pair(0U, &AArch64::VPR128RegClass);
+      break;
+    }
+  }
+
+  // Use the default implementation in TargetLowering to convert the register
+  // constraint into a member of a register class.
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
new file mode 100644
index 0000000000..66985c1d2d
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -0,0 +1,247 @@
+//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H
+#define LLVM_TARGET_AARCH64_ISELLOWERING_H
+
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+
+namespace llvm {
+namespace AArch64ISD {
+  enum NodeType {
+    // Start the numbering from where ISD NodeType finishes.
+    FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+    // This is a conditional branch which also notes the flag needed
+    // (eq/sgt/...). A64 puts this information on the branches rather than
+    // compares as LLVM does.
+    BR_CC,
+
+    // A node to be selected to an actual call operation: either BL or BLR in
+    // the absence of tail calls.
+    Call,
+
+    // Indicates a floating-point immediate which fits into the format required
+    // by the FMOV instructions. First (and only) operand is the 8-bit encoded
+    // value of that immediate.
+    FPMOV,
+
+    // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS
+    // and an LSB.
+    EXTR,
+
+    // Wraps a load from the GOT, which should always be performed with a 64-bit
+    // load instruction. This prevents the DAG combiner folding a truncate to
+    // form a smaller memory access.
+    GOTLoad,
+
+    // Performs a bitfield insert. Arguments are: the value being inserted into;
+    // the value being inserted; least significant bit changed; width of the
+    // field.
+    BFI,
+
+    // Simply a convenient node inserted during ISelLowering to represent
+    // procedure return. Will almost certainly be selected to "RET".
+    Ret,
+
+    /// Extracts a field of contiguous bits from the source and sign extends
+    /// them into a single register. Arguments are: source; immr; imms. Note
+    /// these are pre-encoded since DAG matching can't cope with combining LSB
+    /// and Width into these values itself.
+    SBFX,
+
+    /// This is an A64-ification of the standard LLVM SELECT_CC operation. The
+    /// main difference is that it only has the values and an A64 condition,
+    /// which will be produced by a setcc instruction.
+    SELECT_CC,
+
+    /// This serves most of the functions of the LLVM SETCC instruction, for two
+    /// purposes. First, it prevents optimisations from fiddling with the
+    /// compare after we've moved the CondCode information onto the SELECT_CC or
+    /// BR_CC instructions. Second, it gives a legal instruction for the actual
+    /// comparison.
+    ///
+    /// It keeps a record of the condition flags asked for because certain
+    /// instructions are only valid for a subset of condition codes.
+    SETCC,
+
+    // Designates a node which is a tail call: both a call and a return
+    // instruction as far as selction is concerned. It should be selected to an
+    // unconditional branch. Has the usual plethora of call operands, but: 1st
+    // is callee, 2nd is stack adjustment required immediately before branch.
+    TC_RETURN,
+
+    // Designates a call used to support the TLS descriptor ABI. The call itself
+    // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall
+    // var") must be attached somehow during code generation. It takes two
+    // operands: the callee and the symbol to be relocated against.
+    TLSDESCCALL,
+
+    // Leaf node which will be lowered to an appropriate MRS to obtain the
+    // thread pointer: TPIDR_EL0.
+    THREAD_POINTER,
+
+    /// Extracts a field of contiguous bits from the source and zero extends
+    /// them into a single register. Arguments are: source; immr; imms. Note
+    /// these are pre-encoded since DAG matching can't cope with combining LSB
+    /// and Width into these values itself.
+    UBFX,
+
+    // Wraps an address which the ISelLowering phase has decided should be
+    // created using the small absolute memory model: i.e. adrp/add or
+    // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
+    // get selected.
+    WrapperSmall
+  };
+}
+
+
+class AArch64Subtarget;
+class AArch64TargetMachine;
+
+class AArch64TargetLowering : public TargetLowering {
+public:
+  explicit AArch64TargetLowering(AArch64TargetMachine &TM);
+
+  const char *getTargetNodeName(unsigned Opcode) const;
+
+  CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const;
+
+  SDValue LowerFormalArguments(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const;
+
+  SDValue LowerReturn(SDValue Chain,
+                      CallingConv::ID CallConv, bool isVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      const SmallVectorImpl<SDValue> &OutVals,
+                      DebugLoc dl, SelectionDAG &DAG) const;
+
+  SDValue LowerCall(CallLoweringInfo &CLI,
+                    SmallVectorImpl<SDValue> &InVals) const;
+
+  SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                          CallingConv::ID CallConv, bool IsVarArg,
+                          const SmallVectorImpl<ISD::InputArg> &Ins,
+                          DebugLoc dl, SelectionDAG &DAG,
+                          SmallVectorImpl<SDValue> &InVals) const;
+
+  void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
+                           DebugLoc DL, SDValue &Chain) const;
+
+
+  /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+  /// for tail call optimization. Targets which want to do tail call
+  /// optimization should implement this function.
+  bool IsEligibleForTailCallOptimization(SDValue Callee,
+                                    CallingConv::ID CalleeCC,
+                                    bool IsVarArg,
+                                    bool IsCalleeStructRet,
+                                    bool IsCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    SelectionDAG& DAG) const;
+
+  /// Finds the incoming stack arguments which overlap the given fixed stack
+  /// object and incorporates their load into the current chain. This prevents an
+  /// upcoming store from clobbering the stack argument before it's used.
+  SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
+                              MachineFrameInfo *MFI, int ClobberedFI) const;
+
+  EVT getSetCCResultType(EVT VT) const;
+
+  bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
+
+  bool IsTailCallConvention(CallingConv::ID CallCC) const;
+
+  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+  bool isLegalICmpImmediate(int64_t Val) const;
+  SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                         SDValue &A64cc, SelectionDAG &DAG, DebugLoc &dl) const;
+
+  virtual MachineBasicBlock *
+  EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+  MachineBasicBlock *
+  emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB,
+                   unsigned Size, unsigned Opcode) const;
+
+  MachineBasicBlock *
+  emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB,
+                         unsigned Size, unsigned CmpOp,
+                         A64CC::CondCodes Cond) const;
+  MachineBasicBlock *
+  emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
+                    unsigned Size) const;
+
+  MachineBasicBlock *
+  EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+  SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
+                          RTLIB::Libcall Call) const;
+  SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+  SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL,
+                           SelectionDAG &DAG) const;
+  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+  virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+  /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+  /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+  /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+  /// is expanded to mul + add.
+  virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
+  ConstraintType getConstraintType(const std::string &Constraint) const;
+
+  ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+                                                  const char *Constraint) const;
+  void LowerAsmOperandForConstraint(SDValue Op,
+                                    std::string &Constraint,
+                                    std::vector<SDValue> &Ops,
+                                    SelectionDAG &DAG) const;
+
+  std::pair<unsigned, const TargetRegisterClass*>
+  getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+private:
+  const AArch64Subtarget *Subtarget;
+  const TargetRegisterInfo *RegInfo;
+  const InstrItineraryData *Itins;
+};
+} // namespace llvm
+
+#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
new file mode 100644
index 0000000000..ce663960d4
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -0,0 +1,1011 @@
+//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// A64 Instruction Format Definitions.
+//
+
+// A64 is currently the only instruction set supported by the AArch64
+// architecture.
+class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns,
+              InstrItinClass itin>
+    : Instruction
+{
+  // All A64 instructions are 32-bit. This field will be filled in
+  // graually going down the hierarchy.
+  field bits<32> Inst;
+
+  field bits<32> Unpredictable = 0;
+  // SoftFail is the generic name for this field, but we alias it so
+  // as to make it more obvious what it means in ARM-land.
+  field bits<32> SoftFail = Unpredictable;
+
+  // LLVM-level model of the AArch64/A64 distinction.
+  let Namespace = "AArch64";
+  let DecoderNamespace = "A64";
+  let Size = 4;
+
+  // Set the templated fields
+  let OutOperandList = outs;
+  let InOperandList = ins;
+  let AsmString = asmstr;
+  let Pattern = patterns;
+  let Itinerary = itin;
+}
+
+class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction
+{
+  let Namespace = "AArch64";
+
+  let OutOperandList = outs;
+  let InOperandList= ins;
+  let Pattern = patterns;
+  let isCodeGenOnly = 1;
+  let isPseudo = 1;
+}
+
+// Represents a pseudo-instruction that represents a single A64 instruction for
+// whatever reason, the eventual result will be a 32-bit real instruction.
+class A64PseudoInst<dag outs, dag ins, list<dag> patterns>
+  : PseudoInst<outs, ins, patterns>
+{
+  let Size = 4;
+}
+
+// As above, this will be a single A64 instruction, but we can actually give the
+// expansion in TableGen.
+class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result>
+  : A64PseudoInst<outs, ins, patterns>,
+    PseudoInstExpansion<Result>;
+
+
+// First, some common cross-hierarchy register formats.
+
+class A64InstRd<dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin>
+{
+  bits<5> Rd;
+
+  let Inst{4-0} = Rd;
+}
+
+class A64InstRt<dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin>
+{
+  bits<5> Rt;
+
+  let Inst{4-0} = Rt;
+}
+
+
+class A64InstRdn<dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+    : A64InstRd<outs, ins, asmstr, patterns, itin>
+{
+  // Inherit rdt
+  bits<5> Rn;
+
+  let Inst{9-5} = Rn;
+}
+
+class A64InstRtn<dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+    : A64InstRt<outs, ins, asmstr, patterns, itin>
+{
+  // Inherit rdt
+  bits<5> Rn;
+
+  let Inst{9-5} = Rn;
+}
+
+// Instructions taking Rt,Rt2,Rn
+class A64InstRtt2n<dag outs, dag ins, string asmstr,
+                   list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+  bits<5> Rt2;
+
+  let Inst{14-10} = Rt2;
+}
+
+class A64InstRdnm<dag outs, dag ins, string asmstr,
+                  list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin>
+{
+  bits<5> Rm;
+
+  let Inst{20-16} = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Actual A64 Instruction Formats
+//
+
+// Format for Add-subtract (extended register) instructions.
+class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option,
+                     dag outs, dag ins, string asmstr, list<dag> patterns,
+                     InstrItinClass itin>
+    : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+    bits<3> Imm3;
+
+    let Inst{31} = sf;
+    let Inst{30} = op;
+    let Inst{29} = S;
+    let Inst{28-24} = 0b01011;
+    let Inst{23-22} = opt;
+    let Inst{21} = 0b1;
+    // Rm inherited in 20-16
+    let Inst{15-13} = option;
+    let Inst{12-10} = Imm3;
+    // Rn inherited in 9-5
+    // Rd inherited in 4-0
+}
+
+// Format for Add-subtract (immediate) instructions.
+class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift,
+                     dag outs, dag ins, string asmstr,
+                     list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin>
+{
+  bits<12> Imm12;
+
+  let Inst{31} = sf;
+  let Inst{30} = op;
+  let Inst{29} = S;
+  let Inst{28-24} = 0b10001;
+  let Inst{23-22} = shift;
+  let Inst{21-10} = Imm12;
+}
+
+// Format for Add-subtract (shifted register) instructions.
+class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift,
+                       dag outs, dag ins, string asmstr, list<dag> patterns,
+                       InstrItinClass itin>
+    : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+    bits<6> Imm6;
+
+    let Inst{31} = sf;
+    let Inst{30} = op;
+    let Inst{29} = S;
+    let Inst{28-24} = 0b01011;
+    let Inst{23-22} = shift;
+    let Inst{21} = 0b0;
+    // Rm inherited in 20-16
+    let Inst{15-10} = Imm6;
+    // Rn inherited in 9-5
+    // Rd inherited in 4-0
+}
+
+// Format for Add-subtract (with carry) instructions.
+class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2,
+                       dag outs, dag ins, string asmstr, list<dag> patterns,
+                       InstrItinClass itin>
+    : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+    let Inst{31} = sf;
+    let Inst{30} = op;
+    let Inst{29} = S;
+    let Inst{28-21} = 0b11010000;
+    // Rm inherited in 20-16
+    let Inst{15-10} = opcode2;
+    // Rn inherited in 9-5
+    // Rd inherited in 4-0
+}
+
+
+// Format for Bitfield instructions
+class A64I_bitfield<bit sf, bits<2> opc, bit n,
+                    dag outs, dag ins, string asmstr,
+                    list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin>
+{
+  bits<6> ImmR;
+  bits<6> ImmS;
+
+  let Inst{31} = sf;
+  let Inst{30-29} = opc;
+  let Inst{28-23} = 0b100110;
+  let Inst{22} = n;
+  let Inst{21-16} = ImmR;
+  let Inst{15-10} = ImmS;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for compare and branch (immediate) instructions.
+class A64I_cmpbr<bit sf, bit op,
+                  dag outs, dag ins, string asmstr,
+                  list<dag> patterns, InstrItinClass itin>
+  : A64InstRt<outs, ins, asmstr, patterns, itin>
+{
+  bits<19> Label;
+
+  let Inst{31} = sf;
+  let Inst{30-25} = 0b011010;
+  let Inst{24} = op;
+  let Inst{23-5} = Label;
+  // Inherit Rt in 4-0
+}
+
+// Format for conditional branch (immediate) instructions.
+class A64I_condbr<bit o1, bit o0,
+                  dag outs, dag ins, string asmstr,
+                  list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin>
+{
+  bits<19> Label;
+  bits<4> Cond;
+
+  let Inst{31-25} = 0b0101010;
+  let Inst{24} = o1;
+  let Inst{23-5} = Label;
+  let Inst{4} = o0;
+  let Inst{3-0} = Cond;
+}
+
+// Format for conditional compare (immediate) instructions.
+class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin>
+{
+  bits<5> Rn;
+  bits<5> UImm5;
+  bits<4> NZCVImm;
+  bits<4> Cond;
+
+  let Inst{31} = sf;
+  let Inst{30} = op;
+  let Inst{29} = s;
+  let Inst{28-21} = 0b11010010;
+  let Inst{20-16} = UImm5;
+  let Inst{15-12} = Cond;
+  let Inst{11} = 0b1;
+  let Inst{10} = o2;
+  let Inst{9-5} = Rn;
+  let Inst{4} = o3;
+  let Inst{3-0} = NZCVImm;
+}
+
+// Format for conditional compare (register) instructions.
+class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin>
+{
+  bits<5> Rn;
+  bits<5> Rm;
+  bits<4> NZCVImm;
+  bits<4> Cond;
+
+
+  let Inst{31} = sf;
+  let Inst{30} = op;
+  let Inst{29} = s;
+  let Inst{28-21} = 0b11010010;
+  let Inst{20-16} = Rm;
+  let Inst{15-12} = Cond;
+  let Inst{11} = 0b0;
+  let Inst{10} = o2;
+  let Inst{9-5} = Rn;
+  let Inst{4} = o3;
+  let Inst{3-0} = NZCVImm;
+}
+
+// Format for conditional select instructions.
+class A64I_condsel<bit sf, bit op, bit s, bits<2> op2,
+                   dag outs, dag ins, string asmstr,
+                   list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+  bits<4> Cond;
+
+  let Inst{31} = sf;
+  let Inst{30} = op;
+  let Inst{29} = s;
+  let Inst{28-21} = 0b11010100;
+  // Inherit Rm in 20-16
+  let Inst{15-12} = Cond;
+  let Inst{11-10} = op2;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for data processing (1 source) instructions
+class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode,
+                string asmstr, dag outs, dag ins,
+                list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin>
+{
+  let Inst{31} = sf;
+  let Inst{30} = 0b1;
+  let Inst{29} = S;
+  let Inst{28-21} = 0b11010110;
+  let Inst{20-16} = opcode2;
+  let Inst{15-10} = opcode;
+}
+
+// Format for data processing (2 source) instructions
+class A64I_dp_2src<bit sf, bits<6> opcode, bit S,
+                string asmstr, dag outs, dag ins,
+                list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+  let Inst{31} = sf;
+  let Inst{30} = 0b0;
+  let Inst{29} = S;
+  let Inst{28-21} = 0b11010110;
+  let Inst{15-10} = opcode;
+}
+
+// Format for data-processing (3 source) instructions
+
+class A64I_dp3<bit sf, bits<6> opcode,
+               dag outs, dag ins, string asmstr,
+               list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+  bits<5> Ra;
+
+  let Inst{31} = sf;
+  let Inst{30-29} = opcode{5-4};
+  let Inst{28-24} = 0b11011;
+  let Inst{23-21} = opcode{3-1};
+  // Inherits Rm in 20-16
+  let Inst{15} = opcode{0};
+  let Inst{14-10} = Ra;
+  // Inherits Rn in 9-5
+  // Inherits Rd in 4-0
+}
+
+// Format for exception generation instructions
+class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll,
+                     dag outs, dag ins, string asmstr,
+                     list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin>
+{
+  bits<16> UImm16;
+
+  let Inst{31-24} = 0b11010100;
+  let Inst{23-21} = opc;
+  let Inst{20-5} = UImm16;
+  let Inst{4-2} = op2;
+  let Inst{1-0} = ll;
+}
+
+// Format for extract (immediate) instructions
+class A64I_extract<bit sf, bits<3> op, bit n,
+                   dag outs, dag ins, string asmstr,
+                   list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+  bits<6> LSB;
+
+  let Inst{31} = sf;
+  let Inst{30-29} = op{2-1};
+  let Inst{28-23} = 0b100111;
+  let Inst{22} = n;
+  let Inst{21} = op{0};
+  // Inherits Rm in bits 20-16
+  let Inst{15-10} = LSB;
+  // Inherits Rn in 9-5
+  // Inherits Rd in 4-0
+}
+
+// Format for floating-point compare instructions.
+class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2,
+                dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin>
+{
+  bits<5> Rn;
+  bits<5> Rm;
+
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  let Inst{20-16} = Rm;
+  let Inst{15-14} = op;
+  let Inst{13-10} = 0b1000;
+  let Inst{9-5} = Rn;
+  let Inst{4-0} = opcode2;
+}
+
+// Format for floating-point conditional compare instructions.
+class A64I_fpccmp<bit m, bit s, bits<2> type, bit op,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin>
+{
+  bits<5> Rn;
+  bits<5> Rm;
+  bits<4> NZCVImm;
+  bits<4> Cond;
+
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  let Inst{20-16} = Rm;
+  let Inst{15-12} = Cond;
+  let Inst{11-10} = 0b01;
+  let Inst{9-5} = Rn;
+  let Inst{4} = op;
+  let Inst{3-0} = NZCVImm;
+}
+
+// Format for floating-point conditional select instructions.
+class A64I_fpcondsel<bit m, bit s, bits<2> type,
+                     dag outs, dag ins, string asmstr,
+                     list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+  bits<4> Cond;
+
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  // Inherit Rm in 20-16
+  let Inst{15-12} = Cond;
+  let Inst{11-10} = 0b11;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+
+// Format for floating-point data-processing (1 source) instructions.
+class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin>
+{
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  let Inst{20-15} = opcode;
+  let Inst{14-10} = 0b10000;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for floating-point data-processing (2 sources) instructions.
+class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  // Inherit Rm in 20-16
+  let Inst{15-12} = opcode;
+  let Inst{11-10} = 0b10;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for floating-point data-processing (3 sources) instructions.
+class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+  bits<5> Ra;
+
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11111;
+  let Inst{23-22} = type;
+  let Inst{21} = o1;
+  // Inherit Rm in 20-16
+  let Inst{15} = o0;
+  let Inst{14-10} = Ra;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for floating-point <-> fixed-point conversion instructions.
+class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin>
+{
+  bits<6> Scale;
+
+  let Inst{31} = sf;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b0;
+  let Inst{20-19} = mode;
+  let Inst{18-16} = opcode;
+  let Inst{15-10} = Scale;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for floating-point <-> integer conversion instructions.
+class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin>
+{
+  let Inst{31} = sf;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = rmode;
+  let Inst{18-16} = opcode;
+  let Inst{15-10} = 0b000000;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+
+// Format for floating-point immediate instructions.
+class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRd<outs, ins, asmstr, patterns, itin>
+{
+  bits<8> Imm8;
+
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  let Inst{20-13} = Imm8;
+  let Inst{12-10} = 0b100;
+  let Inst{9-5} = imm5;
+  // Inherit Rd in 4-0
+}
+
+// Format for load-register (literal) instructions.
+class A64I_LDRlit<bits<2> opc, bit v,
+                  dag outs, dag ins, string asmstr,
+                  list<dag> patterns, InstrItinClass itin>
+  : A64InstRt<outs, ins, asmstr, patterns, itin>
+{
+  bits<19> Imm19;
+
+  let Inst{31-30} = opc;
+  let Inst{29-27} = 0b011;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-5} = Imm19;
+  // Inherit Rt in 4-0
+}
+
+// Format for load-store exclusive instructions.
+class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0,
+                 dag outs, dag ins, string asmstr,
+                 list <dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+  let Inst{31-30} = size;
+  let Inst{29-24} = 0b001000;
+  let Inst{23} = o2;
+  let Inst{22} = L;
+  let Inst{21} = o1;
+  let Inst{15} = o0;
+}
+
+class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
+                     dag outs, dag ins, string asmstr,
+                     list <dag> patterns, InstrItinClass itin>:
+      A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+   bits<5> Rt2;
+   let Inst{14-10} = Rt2;
+}
+
+class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0,
+                     dag outs, dag ins, string asmstr,
+                     list <dag> patterns, InstrItinClass itin>:
+      A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+   bits<5> Rs;
+   let Inst{20-16} = Rs;
+}
+
+class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
+                     dag outs, dag ins, string asmstr,
+                     list <dag> patterns, InstrItinClass itin>:
+      A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+   bits<5> Rt2;
+   let Inst{14-10} = Rt2;
+}
+
+// Format for load-store register (immediate post-indexed) instructions
+class A64I_LSpostind<bits<2> size, bit v, bits<2> opc,
+                     dag outs, dag ins, string asmstr,
+                     list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+  bits<9> SImm9;
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-22} = opc;
+  let Inst{21} = 0b0;
+  let Inst{20-12} = SImm9;
+  let Inst{11-10} = 0b01;
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for load-store register (immediate pre-indexed) instructions
+class A64I_LSpreind<bits<2> size, bit v, bits<2> opc,
+                    dag outs, dag ins, string asmstr,
+                    list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+  bits<9> SImm9;
+
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-22} = opc;
+  let Inst{21} = 0b0;
+  let Inst{20-12} = SImm9;
+  let Inst{11-10} = 0b11;
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for load-store register (unprivileged) instructions
+class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc,
+                    dag outs, dag ins, string asmstr,
+                    list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+  bits<9> SImm9;
+
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-22} = opc;
+  let Inst{21} = 0b0;
+  let Inst{20-12} = SImm9;
+  let Inst{11-10} = 0b10;
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for load-store (unscaled immediate) instructions.
+class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc,
+                     dag outs, dag ins, string asmstr,
+                     list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+  bits<9> SImm9;
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-22} = opc;
+  let Inst{21} = 0b0;
+  let Inst{20-12} = SImm9;
+  let Inst{11-10} = 0b00;
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+
+// Format for load-store (unsigned immediate) instructions.
+class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+  bits<12> UImm12;
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b01;
+  let Inst{23-22} = opc;
+  let Inst{21-10} = UImm12;
+}
+
+// Format for load-store register (register offset) instructions.
+class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo,
+                    dag outs, dag ins, string asmstr,
+                    list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+  bits<5> Rm;
+
+  // Complex operand selection needed for these instructions, so they
+  // need an "addr" field for encoding/decoding to be generated.
+  bits<3> Ext;
+  // OptionHi = Ext{2-1}
+  // S = Ext{0}
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-22} = opc;
+  let Inst{21} = 0b1;
+  let Inst{20-16} = Rm;
+  let Inst{15-14} = Ext{2-1};
+  let Inst{13} = optionlo;
+  let Inst{12} = Ext{0};
+  let Inst{11-10} = 0b10;
+  // Inherits Rn in 9-5
+  // Inherits Rt in 4-0
+
+  let AddedComplexity = 50;
+}
+
+// Format for Load-store register pair (offset) instructions
+class A64I_LSPoffset<bits<2> opc, bit v, bit l,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRtt2n<outs, ins, asmstr, patterns, itin>
+{
+  bits<7> SImm7;
+
+  let Inst{31-30} = opc;
+  let Inst{29-27} = 0b101;
+  let Inst{26} = v;
+  let Inst{25-23} = 0b010;
+  let Inst{22} = l;
+  let Inst{21-15} = SImm7;
+  // Inherit Rt2 in 14-10
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for Load-store register pair (post-indexed) instructions
+class A64I_LSPpostind<bits<2> opc, bit v, bit l,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRtt2n<outs, ins, asmstr, patterns, itin>
+{
+  bits<7> SImm7;
+
+  let Inst{31-30} = opc;
+  let Inst{29-27} = 0b101;
+  let Inst{26} = v;
+  let Inst{25-23} = 0b001;
+  let Inst{22} = l;
+  let Inst{21-15} = SImm7;
+  // Inherit Rt2 in 14-10
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for Load-store register pair (pre-indexed) instructions
+class A64I_LSPpreind<bits<2> opc, bit v, bit l,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRtt2n<outs, ins, asmstr, patterns, itin>
+{
+  bits<7> SImm7;
+
+  let Inst{31-30} = opc;
+  let Inst{29-27} = 0b101;
+  let Inst{26} = v;
+  let Inst{25-23} = 0b011;
+  let Inst{22} = l;
+  let Inst{21-15} = SImm7;
+  // Inherit Rt2 in 14-10
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for Load-store non-temporal register pair (offset) instructions
+class A64I_LSPnontemp<bits<2> opc, bit v, bit l,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRtt2n<outs, ins, asmstr, patterns, itin>
+{
+  bits<7> SImm7;
+
+  let Inst{31-30} = opc;
+  let Inst{29-27} = 0b101;
+  let Inst{26} = v;
+  let Inst{25-23} = 0b000;
+  let Inst{22} = l;
+  let Inst{21-15} = SImm7;
+  // Inherit Rt2 in 14-10
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for Logical (immediate) instructions
+class A64I_logicalimm<bit sf, bits<2> opc,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin>
+{
+  bit N;
+  bits<6> ImmR;
+  bits<6> ImmS;
+
+  // N, ImmR and ImmS have no separate existence in any assembly syntax (or for
+  // selection), so we'll combine them into a single field here.
+  bits<13> Imm;
+  // N = Imm{12};
+  // ImmR = Imm{11-6};
+  // ImmS = Imm{5-0};
+
+  let Inst{31} = sf;
+  let Inst{30-29} = opc;
+  let Inst{28-23} = 0b100100;
+  let Inst{22} = Imm{12};
+  let Inst{21-16} = Imm{11-6};
+  let Inst{15-10} = Imm{5-0};
+  // Rn inherited in 9-5
+  // Rd inherited in 4-0
+}
+
+// Format for Logical (shifted register) instructions
+class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N,
+                        dag outs, dag ins, string asmstr,
+                        list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+  bits<6> Imm6;
+
+  let Inst{31} = sf;
+  let Inst{30-29} = opc;
+  let Inst{28-24} = 0b01010;
+  let Inst{23-22} = shift;
+  let Inst{21} = N;
+  // Rm inherited
+  let Inst{15-10} = Imm6;
+  // Rn inherited
+  // Rd inherited
+}
+
+// Format for Move wide (immediate)
+class A64I_movw<bit sf, bits<2> opc,
+                dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64InstRd<outs, ins, asmstr, patterns, itin>
+{
+  bits<16> UImm16;
+  bits<2> Shift; // Called "hw" officially
+
+  let Inst{31} = sf;
+  let Inst{30-29} = opc;
+  let Inst{28-23} = 0b100101;
+  let Inst{22-21} = Shift;
+  let Inst{20-5} = UImm16;
+  // Inherits Rd in 4-0
+}
+
+// Format for PC-relative addressing instructions, ADR and ADRP.
+class A64I_PCADR<bit op,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRd<outs, ins, asmstr, patterns, itin>
+{
+  bits<21> Label;
+
+  let Inst{31} = op;
+  let Inst{30-29} = Label{1-0};
+  let Inst{28-24} = 0b10000;
+  let Inst{23-5} = Label{20-2};
+}
+
+// Format for system instructions
+class A64I_system<bit l,
+                  dag outs, dag ins, string asmstr,
+                  list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin>
+{
+  bits<2> Op0;
+  bits<3> Op1;
+  bits<4> CRn;
+  bits<4> CRm;
+  bits<3> Op2;
+  bits<5> Rt;
+
+  let Inst{31-22} = 0b1101010100;
+  let Inst{21} = l;
+  let Inst{20-19} = Op0;
+  let Inst{18-16} = Op1;
+  let Inst{15-12} = CRn;
+  let Inst{11-8} = CRm;
+  let Inst{7-5} = Op2;
+  let Inst{4-0} = Rt;
+
+  // These instructions can do horrible things.
+  let hasSideEffects = 1;
+}
+
+// Format for unconditional branch (immediate) instructions
+class A64I_Bimm<bit op,
+                dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin>
+{
+  // Doubly special in not even sharing register fields with other
+  // instructions, so we create our own Rn here.
+  bits<26> Label;
+
+  let Inst{31} = op;
+  let Inst{30-26} = 0b00101;
+  let Inst{25-0} = Label;
+}
+
+// Format for Test & branch (immediate) instructions
+class A64I_TBimm<bit op,
+                dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64InstRt<outs, ins, asmstr, patterns, itin>
+{
+  // Doubly special in not even sharing register fields with other
+  // instructions, so we create our own Rn here.
+  bits<6> Imm;
+  bits<14> Label;
+
+  let Inst{31} = Imm{5};
+  let Inst{30-25} = 0b011011;
+  let Inst{24} = op;
+  let Inst{23-19} = Imm{4-0};
+  let Inst{18-5} = Label;
+  // Inherit Rt in 4-0
+}
+
+// Format for Unconditional branch (register) instructions, including
+// RET.  Shares no fields with instructions further up the hierarchy
+// so top-level.
+class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
+                dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin>
+{
+  // Doubly special in not even sharing register fields with other
+  // instructions, so we create our own Rn here.
+  bits<5> Rn;
+
+  let Inst{31-25} = 0b1101011;
+  let Inst{24-21} = opc;
+  let Inst{20-16} = op2;
+  let Inst{15-10} = op3;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = op4;
+}
+
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
new file mode 100644
index 0000000000..967960c1e5
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -0,0 +1,805 @@
+//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#include <algorithm>
+
+#define GET_INSTRINFO_CTOR
+#include "AArch64GenInstrInfo.inc"
+
+using namespace llvm;
+
+AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
+  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
+    RI(*this, STI), Subtarget(STI) {}
+
+void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I, DebugLoc DL,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   bool KillSrc) const {
+  unsigned Opc = 0;
+  unsigned ZeroReg = 0;
+  if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
+    // E.g. ADD xDst, xsp, #0 (, lsl #0)
+    BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
+      .addReg(SrcReg)
+      .addImm(0);
+    return;
+  } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
+    // E.g. ADD wDST, wsp, #0 (, lsl #0)
+    BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
+      .addReg(SrcReg)
+      .addImm(0);
+    return;
+  } else if (DestReg == AArch64::NZCV) {
+    assert(AArch64::GPR64RegClass.contains(SrcReg));
+    // E.g. MSR NZCV, xDST
+    BuildMI(MBB, I, DL, get(AArch64::MSRix))
+      .addImm(A64SysReg::NZCV)
+      .addReg(SrcReg);
+  } else if (SrcReg == AArch64::NZCV) {
+    assert(AArch64::GPR64RegClass.contains(DestReg));
+    // E.g. MRS xDST, NZCV
+    BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
+      .addImm(A64SysReg::NZCV);
+  } else if (AArch64::GPR64RegClass.contains(DestReg)) {
+    assert(AArch64::GPR64RegClass.contains(SrcReg));
+    Opc = AArch64::ORRxxx_lsl;
+    ZeroReg = AArch64::XZR;
+  } else if (AArch64::GPR32RegClass.contains(DestReg)) {
+    assert(AArch64::GPR32RegClass.contains(SrcReg));
+    Opc = AArch64::ORRwww_lsl;
+    ZeroReg = AArch64::WZR;
+  } else if (AArch64::FPR32RegClass.contains(DestReg)) {
+    assert(AArch64::FPR32RegClass.contains(SrcReg));
+    BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
+      .addReg(SrcReg);
+    return;
+  } else if (AArch64::FPR64RegClass.contains(DestReg)) {
+    assert(AArch64::FPR64RegClass.contains(SrcReg));
+    BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
+      .addReg(SrcReg);
+    return;
+  } else if (AArch64::FPR128RegClass.contains(DestReg)) {
+    assert(AArch64::FPR128RegClass.contains(SrcReg));
+
+    // FIXME: there's no good way to do this, at least without NEON:
+    //   + There's no single move instruction for q-registers
+    //   + We can't create a spill slot and use normal STR/LDR because stack
+    //     allocation has already happened
+    //   + We can't go via X-registers with FMOV because register allocation has
+    //     already happened.
+    // This may not be efficient, but at least it works.
+    BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+      .addReg(SrcReg)
+      .addReg(AArch64::XSP)
+      .addImm(0x1ff & -16);
+
+    BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+      .addReg(AArch64::XSP, RegState::Define)
+      .addReg(AArch64::XSP)
+      .addImm(16);
+    return;
+  } else {
+    llvm_unreachable("Unknown register class in copyPhysReg");
+  }
+
+  // E.g. ORR xDst, xzr, xSrc, lsl #0
+  BuildMI(MBB, I, DL, get(Opc), DestReg)
+    .addReg(ZeroReg)
+    .addReg(SrcReg)
+    .addImm(0);
+}
+
+MachineInstr *
+AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+                                           uint64_t Offset, const MDNode *MDPtr,
+                                           DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
+    .addFrameIndex(FrameIx).addImm(0)
+    .addImm(Offset)
+    .addMetadata(MDPtr);
+  return &*MIB;
+}
+
+/// Does the Opcode represent a conditional branch that we can remove and re-add
+/// at the end of a basic block?
+static bool isCondBranch(unsigned Opc) {
+  return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
+         Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
+         Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
+         Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
+}
+
+/// Takes apart a given conditional branch MachineInstr (see isCondBranch),
+/// setting TBB to the destination basic block and populating the Cond vector
+/// with data necessary to recreate the conditional branch at a later
+/// date. First element will be the opcode, and subsequent ones define the
+/// conditions being branched on in an instruction-specific manner.
+static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
+                               SmallVectorImpl<MachineOperand> &Cond) {
+  switch(I->getOpcode()) {
+  case AArch64::Bcc:
+  case AArch64::CBZw:
+  case AArch64::CBZx:
+  case AArch64::CBNZw:
+  case AArch64::CBNZx:
+    // These instructions just have one predicate operand in position 0 (either
+    // a condition code or a register being compared).
+    Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+    Cond.push_back(I->getOperand(0));
+    TBB = I->getOperand(1).getMBB();
+    return;
+  case AArch64::TBZwii:
+  case AArch64::TBZxii:
+  case AArch64::TBNZwii:
+  case AArch64::TBNZxii:
+    // These have two predicate operands: a register and a bit position.
+    Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+    Cond.push_back(I->getOperand(0));
+    Cond.push_back(I->getOperand(1));
+    TBB = I->getOperand(2).getMBB();
+    return;
+  default:
+    llvm_unreachable("Unknown conditional branch to classify");
+  }
+}
+
+
+bool
+AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                MachineBasicBlock *&FBB,
+                                SmallVectorImpl<MachineOperand> &Cond,
+                                bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (LastOpc == AArch64::Bimm) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (isCondBranch(LastOpc)) {
+      classifyCondBranch(LastInst, TBB, Cond);
+      return false;
+    }
+    return true;  // Can't handle indirect branch.
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If AllowModify is true and the block ends with two or more unconditional
+  // branches, delete all but the first unconditional branch.
+  if (AllowModify && LastOpc == AArch64::Bimm) {
+    while (SecondLastOpc == AArch64::Bimm) {
+      LastInst->eraseFromParent();
+      LastInst = SecondLastInst;
+      LastOpc = LastInst->getOpcode();
+      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+        // Return now the only terminator is an unconditional branch.
+        TBB = LastInst->getOperand(0).getMBB();
+        return false;
+      } else {
+        SecondLastInst = I;
+        SecondLastOpc = SecondLastInst->getOpcode();
+      }
+    }
+  }
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with a B and a Bcc, handle it.
+  if (LastOpc == AArch64::Bimm) {
+    if (SecondLastOpc == AArch64::Bcc) {
+      TBB =  SecondLastInst->getOperand(1).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
+      Cond.push_back(SecondLastInst->getOperand(0));
+      FBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (isCondBranch(SecondLastOpc)) {
+      classifyCondBranch(SecondLastInst, TBB, Cond);
+      FBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+  }
+
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed, so remove it.
+  if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+bool AArch64InstrInfo::ReverseBranchCondition(
+                                  SmallVectorImpl<MachineOperand> &Cond) const {
+  switch (Cond[0].getImm()) {
+  case AArch64::Bcc: {
+    A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
+    CC = A64InvertCondCode(CC);
+    Cond[1].setImm(CC);
+    return false;
+  }
+  case AArch64::CBZw:
+    Cond[0].setImm(AArch64::CBNZw);
+    return false;
+  case AArch64::CBZx:
+    Cond[0].setImm(AArch64::CBNZx);
+    return false;
+  case AArch64::CBNZw:
+    Cond[0].setImm(AArch64::CBZw);
+    return false;
+  case AArch64::CBNZx:
+    Cond[0].setImm(AArch64::CBZx);
+    return false;
+  case AArch64::TBZwii:
+    Cond[0].setImm(AArch64::TBNZwii);
+    return false;
+  case AArch64::TBZxii:
+    Cond[0].setImm(AArch64::TBNZxii);
+    return false;
+  case AArch64::TBNZwii:
+    Cond[0].setImm(AArch64::TBZwii);
+    return false;
+  case AArch64::TBNZxii:
+    Cond[0].setImm(AArch64::TBZxii);
+    return false;
+  default:
+    llvm_unreachable("Unknown branch type");
+  }
+}
+
+
+unsigned
+AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               DebugLoc DL) const {
+  if (FBB == 0 && Cond.empty()) {
+    BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
+    return 1;
+  } else if (FBB == 0) {
+    MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+    for (int i = 1, e = Cond.size(); i != e; ++i)
+      MIB.addOperand(Cond[i]);
+    MIB.addMBB(TBB);
+    return 1;
+  }
+
+  MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+  for (int i = 1, e = Cond.size(); i != e; ++i)
+    MIB.addOperand(Cond[i]);
+  MIB.addMBB(TBB);
+
+  BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
+  return 2;
+}
+
+unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (!isCondBranch(I->getOpcode()))
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+bool
+AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  case AArch64::TLSDESC_BLRx: {
+    MachineInstr *NewMI =
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
+        .addOperand(MI.getOperand(1));
+    MI.setDesc(get(AArch64::BLRx));
+
+    llvm::finalizeBundle(MBB, NewMI, *++MBBI);
+    return true;
+    }
+  default:
+    return false;
+  }
+
+  return false;
+}
+
+void
+AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      unsigned SrcReg, bool isKill,
+                                      int FrameIdx,
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+  MachineMemOperand *MMO
+    = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                              MachineMemOperand::MOStore,
+                              MFI.getObjectSize(FrameIdx),
+                              Align);
+
+  unsigned StoreOp = 0;
+  if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+    switch(RC->getSize()) {
+    case 4: StoreOp = AArch64::LS32_STR; break;
+    case 8: StoreOp = AArch64::LS64_STR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  } else {
+    assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
+            RC->hasType(MVT::f128))
+           && "Expected integer or floating type for store");
+    switch (RC->getSize()) {
+    case 4: StoreOp = AArch64::LSFP32_STR; break;
+    case 8: StoreOp = AArch64::LSFP64_STR; break;
+    case 16: StoreOp = AArch64::LSFP128_STR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  }
+
+  MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
+  NewMI.addReg(SrcReg, getKillRegState(isKill))
+    .addFrameIndex(FrameIdx)
+    .addImm(0)
+    .addMemOperand(MMO);
+
+}
+
+void
+AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI,
+                                       unsigned DestReg, int FrameIdx,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+  MachineMemOperand *MMO
+    = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                              MachineMemOperand::MOLoad,
+                              MFI.getObjectSize(FrameIdx),
+                              Align);
+
+  unsigned LoadOp = 0;
+  if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+    switch(RC->getSize()) {
+    case 4: LoadOp = AArch64::LS32_LDR; break;
+    case 8: LoadOp = AArch64::LS64_LDR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  } else {
+    assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64)
+            || RC->hasType(MVT::f128))
+           && "Expected integer or floating type for store");
+    switch (RC->getSize()) {
+    case 4: LoadOp = AArch64::LSFP32_LDR; break;
+    case 8: LoadOp = AArch64::LSFP64_LDR; break;
+    case 16: LoadOp = AArch64::LSFP128_LDR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  }
+
+  MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
+  NewMI.addFrameIndex(FrameIdx)
+       .addImm(0)
+       .addMemOperand(MMO);
+}
+
+unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
+  unsigned Limit = (1 << 16) - 1;
+  for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+        if (!I->getOperand(i).isFI()) continue;
+
+        // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
+        // is the largest offset guaranteed to fit in the immediate offset.
+        if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
+          Limit = std::min(Limit, 0xfffu);
+          break;
+        }
+
+        int AccessScale, MinOffset, MaxOffset;
+        getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
+        Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
+
+        break; // At most one FI per instruction
+      }
+    }
+  }
+
+  return Limit;
+}
+void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
+                                             int &AccessScale, int &MinOffset,
+                                             int &MaxOffset) const {
+  switch (MI.getOpcode()) {
+  default: llvm_unreachable("Unkown load/store kind");
+  case TargetOpcode::DBG_VALUE:
+    AccessScale = 1;
+    MinOffset = INT_MIN;
+    MaxOffset = INT_MAX;
+    return;
+  case AArch64::LS8_LDR: case AArch64::LS8_STR:
+  case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
+  case AArch64::LDRSBw:
+  case AArch64::LDRSBx:
+    AccessScale = 1;
+    MinOffset = 0;
+    MaxOffset = 0xfff;
+    return;
+  case AArch64::LS16_LDR: case AArch64::LS16_STR:
+  case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
+  case AArch64::LDRSHw:
+  case AArch64::LDRSHx:
+    AccessScale = 2;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LS32_LDR:  case AArch64::LS32_STR:
+  case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
+  case AArch64::LDRSWx:
+  case AArch64::LDPSWx:
+    AccessScale = 4;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LS64_LDR: case AArch64::LS64_STR:
+  case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
+  case AArch64::PRFM:
+    AccessScale = 8;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
+    AccessScale = 16;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
+  case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
+    AccessScale = 4;
+    MinOffset = -0x40 * AccessScale;
+    MaxOffset = 0x3f * AccessScale;
+    return;
+  case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
+  case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
+    AccessScale = 8;
+    MinOffset = -0x40 * AccessScale;
+    MaxOffset = 0x3f * AccessScale;
+    return;
+  case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
+    AccessScale = 16;
+    MinOffset = -0x40 * AccessScale;
+    MaxOffset = 0x3f * AccessScale;
+    return;
+  }
+}
+
+unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+  const MCInstrDesc &MCID = MI.getDesc();
+  const MachineBasicBlock &MBB = *MI.getParent();
+  const MachineFunction &MF = *MBB.getParent();
+  const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
+
+  if (MCID.getSize())
+    return MCID.getSize();
+
+  if (MI.getOpcode() == AArch64::INLINEASM)
+    return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
+
+  if (MI.isLabel())
+    return 0;
+
+  switch (MI.getOpcode()) {
+  case TargetOpcode::BUNDLE:
+    return getInstBundleLength(MI);
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
+  case TargetOpcode::PROLOG_LABEL:
+  case TargetOpcode::EH_LABEL:
+  case TargetOpcode::DBG_VALUE:
+    return 0;
+  case AArch64::CONSTPOOL_ENTRY:
+    return MI.getOperand(2).getImm();
+  case AArch64::TLSDESCCALL:
+    return 0;
+  default:
+    llvm_unreachable("Unknown instruction class");
+  }
+}
+
+unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
+  unsigned Size = 0;
+  MachineBasicBlock::const_instr_iterator I = MI;
+  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
+  while (++I != E && I->isInsideBundle()) {
+    assert(!I->isBundle() && "No nested bundle!");
+    Size += getInstSizeInBytes(*I);
+  }
+  return Size;
+}
+
+bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                                unsigned FrameReg, int &Offset,
+                                const AArch64InstrInfo &TII) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  MFI.getObjectOffset(FrameRegIdx);
+  llvm_unreachable("Unimplemented rewriteFrameIndex");
+}
+
+void llvm::emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                         DebugLoc dl, const TargetInstrInfo &TII,
+                         unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+                         int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
+  if (NumBytes == 0 && DstReg == SrcReg)
+    return;
+  else if (abs(NumBytes) & ~0xffffff) {
+    // Generically, we have to materialize the offset into a temporary register
+    // and subtract it. There are a couple of ways this could be done, for now
+    // we'll go for a literal-pool load.
+    MachineFunction &MF = *MBB.getParent();
+    MachineConstantPool *MCP = MF.getConstantPool();
+    const Constant *C
+      = ConstantInt::get(Type::getInt64Ty(MF.getFunction()->getContext()),
+                         abs(NumBytes));
+    unsigned CPI = MCP->getConstantPoolIndex(C, 8);
+
+    // LDR xTMP, .LITPOOL
+    BuildMI(MBB, MBBI, dl, TII.get(AArch64::LDRx_lit), ScratchReg)
+      .addConstantPoolIndex(CPI)
+      .setMIFlag(MIFlags);
+
+    // ADD DST, SRC, xTMP (, lsl #0)
+    unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
+    BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
+      .addReg(SrcReg, RegState::Kill)
+      .addReg(ScratchReg, RegState::Kill)
+      .addImm(0)
+      .setMIFlag(MIFlags);
+    return;
+  }
+
+  // Now we know that the adjustment can be done in at most two add/sub
+  // (immediate) instructions, which is always more efficient than a
+  // literal-pool load, or even a hypothetical movz/movk/add sequence
+
+  // Decide whether we're doing addition or subtraction
+  unsigned LowOp, HighOp;
+  if (NumBytes >= 0) {
+    LowOp = AArch64::ADDxxi_lsl0_s;
+    HighOp = AArch64::ADDxxi_lsl12_s;
+  } else {
+    LowOp = AArch64::SUBxxi_lsl0_s;
+    HighOp = AArch64::SUBxxi_lsl12_s;
+    NumBytes = abs(NumBytes);
+  }
+
+  // If we're here, at the very least a move needs to be produced, which just
+  // happens to be materializable by an ADD.
+  if ((NumBytes & 0xfff) || NumBytes == 0) {
+    BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
+      .addReg(SrcReg, RegState::Kill)
+      .addImm(NumBytes & 0xfff)
+      .setMIFlag(MIFlags);
+
+    // Next update should use the register we've just defined.
+    SrcReg = DstReg;
+  }
+
+  if (NumBytes & 0xfff000) {
+    BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
+      .addReg(SrcReg, RegState::Kill)
+      .addImm(NumBytes >> 12)
+      .setMIFlag(MIFlags);
+  }
+}
+
+void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                        DebugLoc dl, const TargetInstrInfo &TII,
+                        unsigned ScratchReg, int64_t NumBytes,
+                        MachineInstr::MIFlag MIFlags) {
+  emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
+                NumBytes, MIFlags);
+}
+
+
+namespace {
+  struct LDTLSCleanup : public MachineFunctionPass {
+    static char ID;
+    LDTLSCleanup() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      AArch64MachineFunctionInfo* MFI = MF.getInfo<AArch64MachineFunctionInfo>();
+      if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+        // No point folding accesses if there isn't at least two.
+        return false;
+      }
+
+      MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+      return VisitNode(DT->getRootNode(), 0);
+    }
+
+    // Visit the dominator subtree rooted at Node in pre-order.
+    // If TLSBaseAddrReg is non-null, then use that to replace any
+    // TLS_base_addr instructions. Otherwise, create the register
+    // when the first such instruction is seen, and then use it
+    // as we encounter more instructions.
+    bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
+      MachineBasicBlock *BB = Node->getBlock();
+      bool Changed = false;
+
+      // Traverse the current block.
+      for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+           ++I) {
+        switch (I->getOpcode()) {
+        case AArch64::TLSDESC_BLRx:
+          // Make sure it's a local dynamic access.
+          if (!I->getOperand(1).isSymbol() ||
+              strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+            break;
+
+          if (TLSBaseAddrReg)
+            I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
+          else
+            I = SetRegister(I, &TLSBaseAddrReg);
+          Changed = true;
+          break;
+        default:
+          break;
+        }
+      }
+
+      // Visit the children of this block in the dominator tree.
+      for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
+           I != E; ++I) {
+        Changed |= VisitNode(*I, TLSBaseAddrReg);
+      }
+
+      return Changed;
+    }
+
+    // Replace the TLS_base_addr instruction I with a copy from
+    // TLSBaseAddrReg, returning the new instruction.
+    MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
+                                         unsigned TLSBaseAddrReg) {
+      MachineFunction *MF = I->getParent()->getParent();
+      const AArch64TargetMachine *TM =
+          static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+      const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+      // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
+      // code sequence assumes the address will be.
+      MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+                                   TII->get(TargetOpcode::COPY),
+                                   AArch64::X0)
+        .addReg(TLSBaseAddrReg);
+
+      // Erase the TLS_base_addr instruction.
+      I->eraseFromParent();
+
+      return Copy;
+    }
+
+    // Create a virtal register in *TLSBaseAddrReg, and populate it by
+    // inserting a copy instruction after I. Returns the new instruction.
+    MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
+      MachineFunction *MF = I->getParent()->getParent();
+      const AArch64TargetMachine *TM =
+          static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+      const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+      // Create a virtual register for the TLS base address.
+      MachineRegisterInfo &RegInfo = MF->getRegInfo();
+      *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
+
+      // Insert a copy from X0 to TLSBaseAddrReg for later.
+      MachineInstr *Next = I->getNextNode();
+      MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+                                   TII->get(TargetOpcode::COPY),
+                                   *TLSBaseAddrReg)
+        .addReg(AArch64::X0);
+
+      return Copy;
+    }
+
+    virtual const char *getPassName() const {
+      return "Local Dynamic TLS Access Clean-up";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+char LDTLSCleanup::ID = 0;
+FunctionPass*
+llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
new file mode 100644
index 0000000000..8084f78179
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -0,0 +1,110 @@
+//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64INSTRINFO_H
+#define LLVM_TARGET_AARCH64INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AArch64RegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AArch64GenInstrInfo.inc"
+
+namespace llvm {
+
+class AArch64Subtarget;
+
+class AArch64InstrInfo : public AArch64GenInstrInfo {
+  const AArch64RegisterInfo RI;
+  const AArch64Subtarget &Subtarget;
+public:
+  explicit AArch64InstrInfo(const AArch64Subtarget &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+  const AArch64Subtarget &getSubTarget() const { return Subtarget; }
+
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator I, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const;
+
+  MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+                                         uint64_t Offset, const MDNode *MDPtr,
+                                         DebugLoc DL) const;
+
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const;
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            unsigned DestReg, int FrameIdx,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const;
+
+  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                     MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify = false) const;
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                        MachineBasicBlock *FBB,
+                        const SmallVectorImpl<MachineOperand> &Cond,
+                        DebugLoc DL) const;
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+  /// Look through the instructions in this function and work out the largest
+  /// the stack frame can be while maintaining the ability to address local
+  /// slots with no complexities.
+  unsigned estimateRSStackLimit(MachineFunction &MF) const;
+
+  /// getAddressConstraints - For loads and stores (and PRFMs) taking an
+  /// immediate offset, this function determines the constraints required for
+  /// the immediate. It must satisfy:
+  ///    + MinOffset <= imm <= MaxOffset
+  ///    + imm % OffsetScale == 0
+  void getAddressConstraints(const MachineInstr &MI, int &AccessScale,
+                             int &MinOffset, int &MaxOffset) const;
+
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const;
+
+  unsigned getInstBundleLength(const MachineInstr &MI) const;
+};
+
+bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                          unsigned FrameReg, int &Offset,
+                          const AArch64InstrInfo &TII);
+
+
+void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                   DebugLoc dl, const TargetInstrInfo &TII,
+                   unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+                   int64_t NumBytes,
+                   MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                  DebugLoc dl, const TargetInstrInfo &TII,
+                  unsigned ScratchReg, int64_t NumBytes,
+                  MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
new file mode 100644
index 0000000000..3c15200cc4
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -0,0 +1,5298 @@
+include "AArch64InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Target-specific ISD nodes and profiles
+//===----------------------------------------------------------------------===//
+
+def SDT_A64ret : SDTypeProfile<0, 0, []>;
+def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain,
+                                                    SDNPOptInGlue]>;
+
+// (ins NZCV, Condition, Dest)
+def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>;
+def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>;
+
+// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition)
+def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>,
+                                            SDTCisSameAs<0, 2>,
+                                            SDTCisSameAs<2, 3>]>;
+def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>;
+
+// (outs NZCV), (ins LHS, RHS, Condition)
+def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+                                        SDTCisSameAs<1, 2>]>;
+def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>;
+
+
+// (outs GPR64), (ins)
+def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
+
+// A64 compares don't care about the cond really (they set all flags) so a
+// simple binary operator is useful.
+def A64cmp : PatFrag<(ops node:$lhs, node:$rhs),
+                     (A64setcc node:$lhs, node:$rhs, cond)>;
+
+
+// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN
+// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C
+// and V flags can be set differently by this operation. It comes down to
+// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are
+// then everything is fine. If not then the optimization is wrong. Thus general
+// comparisons are only valid if op2 != 0.
+
+// So, finally, the only LLVM-native comparisons that don't mention C and V are
+// SETEQ and SETNE. They're the only ones we can safely use CMN for in the
+// absence of information about op2.
+def equality_cond : PatLeaf<(cond), [{
+  return N->get() == ISD::SETEQ || N->get() == ISD::SETNE;
+}]>;
+
+def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
+                     (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>;
+
+// There are two layers of indirection here, driven by the following
+// considerations.
+//     + TableGen does not know CodeModel or Reloc so that decision should be
+//       made for a variable/address at ISelLowering.
+//     + The output of ISelLowering should be selectable (hence the Wrapper,
+//       rather than a bare target opcode)
+def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<1, 2>,
+                                             SDTCisVT<3, i32>,
+                                             SDTCisPtrTy<0>]>;
+
+def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>;
+
+
+def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad,
+                        [SDNPHasChain]>;
+
+
+// (A64BFI LHS, RHS, LSB, Width)
+def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+                                     SDTCisSameAs<1, 2>,
+                                     SDTCisVT<3, i64>,
+                                     SDTCisVT<4, i64>]>;
+
+def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>;
+
+// (A64EXTR HiReg, LoReg, LSB)
+def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+                                      SDTCisVT<3, i64>]>;
+def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
+
+// (A64[SU]BFX Field, ImmR, ImmS).
+//
+// Note that ImmR and ImmS are already encoded for the actual instructions. The
+// more natural LSB and Width mix together to form ImmR and ImmS, something
+// which TableGen can't handle.
+def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>;
+def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
+
+def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
+
+//===----------------------------------------------------------------------===//
+// Call sequence pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+
+def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call,
+                     [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call,
+                          [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// The TLSDESCCALL node is a variant call which goes to an indirectly calculated
+// destination but needs a relocation against a fixed symbol. As such it has two
+// certain operands: the callee and the relocated variable.
+//
+// The TLS ABI only allows it to be selected to a BLR instructin (with
+// appropriate relocation).
+def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+
+def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall,
+                            [SDNPInGlue, SDNPOutGlue, SDNPHasChain, SDNPVariadic]>;
+
+
+def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>;
+def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart,
+                                  [SDNPHasChain, SDNPOutGlue]>;
+
+def SDT_AArch64CallSeqEnd   : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>;
+def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END",   SDT_AArch64CallSeqEnd,
+                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+
+
+// These pseudo-instructions have special semantics by virtue of being passed to
+// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by
+// LowerCall to (in our case) tell the back-end about stack adjustments for
+// arguments passed on the stack. Here we select those markers to
+// pseudo-instructions which explicitly set the stack, and finally in the
+// RegisterInfo we convert them to a true stack adjustment.
+let Defs = [XSP], Uses = [XSP] in
+{
+  def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt),
+                                    [(AArch64callseq_start timm:$amt)]>;
+
+  def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+                                 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic operation pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1, Defs = [NZCV] in {
+multiclass AtomicSizes<string opname>
+{
+  def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+    [(set GPR32:$dst, (!cast<SDNode>(opname # "_8") GPR64:$ptr, GPR32:$incr))]>;
+  def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+    [(set GPR32:$dst, (!cast<SDNode>(opname # "_16") GPR64:$ptr, GPR32:$incr))]>;
+  def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+    [(set GPR32:$dst, (!cast<SDNode>(opname # "_32") GPR64:$ptr, GPR32:$incr))]>;
+  def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr),
+    [(set GPR64:$dst, (!cast<SDNode>(opname # "_64") GPR64:$ptr, GPR64:$incr))]>;
+}
+}
+
+defm ATOMIC_LOAD_ADD  : AtomicSizes<"atomic_load_add">;
+defm ATOMIC_LOAD_SUB  : AtomicSizes<"atomic_load_sub">;
+defm ATOMIC_LOAD_AND  : AtomicSizes<"atomic_load_and">;
+defm ATOMIC_LOAD_OR   : AtomicSizes<"atomic_load_or">;
+defm ATOMIC_LOAD_XOR  : AtomicSizes<"atomic_load_xor">;
+defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
+defm ATOMIC_LOAD_MIN  : AtomicSizes<"atomic_load_min">;
+defm ATOMIC_LOAD_MAX  : AtomicSizes<"atomic_load_max">;
+defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
+defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
+defm ATOMIC_SWAP      : AtomicSizes<"atomic_swap">;
+
+let usesCustomInserter = 1, Defs = [NZCV] in {
+def ATOMIC_CMP_SWAP_I8
+  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+               [(set GPR32:$dst,
+                     (atomic_cmp_swap_8 GPR64:$ptr, GPR32:$old, GPR32:$new))]>;
+def ATOMIC_CMP_SWAP_I16
+  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+               [(set GPR32:$dst,
+                     (atomic_cmp_swap_16 GPR64:$ptr, GPR32:$old, GPR32:$new))]>;
+def ATOMIC_CMP_SWAP_I32
+  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+               [(set GPR32:$dst,
+                     (atomic_cmp_swap_32 GPR64:$ptr, GPR32:$old, GPR32:$new))]>;
+def ATOMIC_CMP_SWAP_I64
+  : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new),
+               [(set GPR64:$dst,
+                     (atomic_cmp_swap_64 GPR64:$ptr, GPR64:$old, GPR64:$new))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (extended register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP
+
+// The RHS of these operations is conceptually a sign/zero-extended
+// register, optionally shifted left by 1-4. The extension can be a
+// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but
+// must be specified with one exception:
+
+// If one of the registers is sp/wsp then LSL is an alias for UXTW in
+// 32-bit instructions and UXTX in 64-bit versions, the shift amount
+// is not optional in that case (but can explicitly be 0), and the
+// entire suffix can be skipped (e.g. "add sp, x3, x2").
+
+multiclass extend_operands<string PREFIX>
+{
+     def _asmoperand : AsmOperandClass
+     {
+         let Name = PREFIX;
+         let RenderMethod = "addRegExtendOperands";
+         let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">";
+     }
+
+     def _operand : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]>
+     {
+         let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">";
+         let DecoderMethod = "DecodeRegExtendOperand";
+         let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand");
+     }
+}
+
+defm UXTB : extend_operands<"UXTB">;
+defm UXTH : extend_operands<"UXTH">;
+defm UXTW : extend_operands<"UXTW">;
+defm UXTX : extend_operands<"UXTX">;
+defm SXTB : extend_operands<"SXTB">;
+defm SXTH : extend_operands<"SXTH">;
+defm SXTW : extend_operands<"SXTW">;
+defm SXTX : extend_operands<"SXTX">;
+
+def LSL_extasmoperand : AsmOperandClass
+{
+    let Name = "RegExtendLSL";
+    let RenderMethod = "addRegExtendOperands";
+}
+
+def LSL_extoperand : Operand<i64>
+{
+    let ParserMatchClass = LSL_extasmoperand;
+}
+
+
+// The patterns for various sign-extensions are a little ugly and
+// non-uniform because everything has already been promoted to the
+// legal i64 and i32 types. We'll wrap the various variants up in a
+// class for use later.
+class extend_types
+{
+    dag uxtb; dag uxth; dag uxtw; dag uxtx;
+    dag sxtb; dag sxth; dag sxtw; dag sxtx;
+}
+
+def extends_to_i64 : extend_types
+{
+    let uxtb = (and (anyext GPR32:$Rm), 255);
+    let uxth = (and (anyext GPR32:$Rm), 65535);
+    let uxtw = (zext GPR32:$Rm);
+    let uxtx = (i64 GPR64:$Rm);
+
+    let sxtb = (sext_inreg (anyext GPR32:$Rm), i8);
+    let sxth = (sext_inreg (anyext GPR32:$Rm), i16);
+    let sxtw = (sext GPR32:$Rm);
+    let sxtx = (i64 GPR64:$Rm);
+}
+
+
+def extends_to_i32 : extend_types
+{
+    let uxtb = (and GPR32:$Rm, 255);
+    let uxth = (and GPR32:$Rm, 65535);
+    let uxtw = (i32 GPR32:$Rm);
+    let uxtx = (i32 GPR32:$Rm);
+
+    let sxtb = (sext_inreg GPR32:$Rm, i8);
+    let sxth = (sext_inreg GPR32:$Rm, i16);
+    let sxtw = (i32 GPR32:$Rm);
+    let sxtx = (i32 GPR32:$Rm);
+}
+
+// Now, six of the extensions supported are easy and uniform: if the source size
+// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate
+// those instructions in one block.
+
+// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me:
+//     + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would
+//       be impossible.
+//     + Patterns are very different as well.
+//     + Passing different registers would be ugly (more fields in extend_types
+//       would probably be the best option).
+multiclass addsub_exts<bit sf, bit op, bit S, string asmop, SDPatternOperator opfrag,
+                       dag outs, extend_types exts, RegisterClass GPRsp>
+{
+    def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000,
+                      outs,
+                      (ins GPRsp:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
+                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                      [(opfrag GPRsp:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
+                      NoItinerary>;
+    def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001,
+                      outs,
+                      (ins GPRsp:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
+                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                      [(opfrag GPRsp:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
+                      NoItinerary>;
+    def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010,
+                      outs,
+                      (ins GPRsp:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
+                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                      [(opfrag GPRsp:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
+                      NoItinerary>;
+
+    def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100,
+                      outs,
+                      (ins GPRsp:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
+                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                      [(opfrag GPRsp:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
+                      NoItinerary>;
+    def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101,
+                      outs,
+                      (ins GPRsp:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
+                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                      [(opfrag GPRsp:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
+                      NoItinerary>;
+    def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110,
+                      outs,
+                      (ins GPRsp:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
+                      !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                      [(opfrag GPRsp:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
+                      NoItinerary>;
+}
+
+// These two could be merge in with the above, but their patterns aren't really
+// necessary and the naming-scheme would necessarily break:
+multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag, dag outs>
+{
+    def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011,
+                   outs,
+                   (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3),
+                   !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                   [(opfrag GPR64xsp:$Rn, (shl GPR64:$Rm, UXTX_operand:$Imm3))],
+                   NoItinerary>;
+
+    def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111,
+                   outs,
+                   (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3),
+                   !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                   [/* No Pattern: same as uxtx */],
+                   NoItinerary>;
+}
+
+multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs>
+{
+    def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011,
+                              outs,
+                              (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3),
+                              !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                              [/* No pattern: probably same as uxtw */],
+                              NoItinerary>;
+
+    def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111,
+                              outs,
+                              (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3),
+                              !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                              [/* No Pattern: probably same as uxtw */],
+                              NoItinerary>;
+}
+
+class SetRD<RegisterClass RC, SDPatternOperator op>
+ : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>;
+class SetNZCV<SDPatternOperator op>
+  : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>;
+
+defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
+                        (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>,
+            addsub_xxtx<     0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
+                        (outs GPR64xsp:$Rd)>;
+defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>,
+                        (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>,
+            addsub_wxtx<     0b0, 0b0, "add\t$Rd, ",
+                        (outs GPR32wsp:$Rd)>;
+defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
+                        (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>,
+            addsub_xxtx<     0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
+                        (outs GPR64xsp:$Rd)>;
+defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>,
+                        (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>,
+            addsub_wxtx<     0b1, 0b0, "sub\t$Rd, ",
+                        (outs GPR32wsp:$Rd)>;
+
+let Defs = [NZCV] in {
+defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
+                         (outs GPR64:$Rd), extends_to_i64, GPR64xsp>,
+             addsub_xxtx<     0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
+                         (outs GPR64:$Rd)>;
+defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>,
+                         (outs GPR32:$Rd), extends_to_i32, GPR32wsp>,
+             addsub_wxtx<     0b0, 0b1, "adds\t$Rd, ",
+                         (outs GPR32:$Rd)>;
+defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
+                         (outs GPR64:$Rd), extends_to_i64, GPR64xsp>,
+             addsub_xxtx<     0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
+                         (outs GPR64:$Rd)>;
+defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
+                         (outs GPR32:$Rd), extends_to_i32, GPR32wsp>,
+             addsub_wxtx<     0b1, 0b1, "subs\t$Rd, ",
+                         (outs GPR32:$Rd)>;
+
+
+let Rd = 0b11111, isCompare = 1 in {
+defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
+                        (outs), extends_to_i64, GPR64xsp>,
+            addsub_xxtx<     0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
+defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
+                        (outs), extends_to_i32, GPR32wsp>,
+            addsub_wxtx<     0b0, 0b1, "cmn\t", (outs)>;
+defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
+                        (outs), extends_to_i64, GPR64xsp>,
+            addsub_xxtx<     0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>;
+defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
+                        (outs), extends_to_i32, GPR32wsp>,
+            addsub_wxtx<     0b1, 0b1, "cmp\t", (outs)>;
+}
+}
+
+// Now patterns for the operation without a shift being needed. No patterns are
+// created for uxtx/sxtx since they're non-uniform and it's expected that
+// add/sub (shifted register) will handle those cases anyway.
+multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop,
+                                      RegisterClass GPRsp, extend_types exts>
+{
+    def : Pat<(nodeop GPRsp:$Rn, exts.uxtb),
+              (!cast<Instruction>(prefix # "w_uxtb") GPRsp:$Rn, GPR32:$Rm, 0)>;
+    def : Pat<(nodeop GPRsp:$Rn, exts.uxth),
+              (!cast<Instruction>(prefix # "w_uxth") GPRsp:$Rn, GPR32:$Rm, 0)>;
+    def : Pat<(nodeop GPRsp:$Rn, exts.uxtw),
+              (!cast<Instruction>(prefix # "w_uxtw") GPRsp:$Rn, GPR32:$Rm, 0)>;
+
+    def : Pat<(nodeop GPRsp:$Rn, exts.sxtb),
+              (!cast<Instruction>(prefix # "w_sxtb") GPRsp:$Rn, GPR32:$Rm, 0)>;
+    def : Pat<(nodeop GPRsp:$Rn, exts.sxth),
+              (!cast<Instruction>(prefix # "w_sxth") GPRsp:$Rn, GPR32:$Rm, 0)>;
+    def : Pat<(nodeop GPRsp:$Rn, exts.sxtw),
+              (!cast<Instruction>(prefix # "w_sxtw") GPRsp:$Rn, GPR32:$Rm, 0)>;
+}
+
+defm : addsubext_noshift_patterns<"ADDxx", add, GPR64xsp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"ADDww", add, GPR32wsp, extends_to_i32>;
+defm : addsubext_noshift_patterns<"SUBxx", sub, GPR64xsp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"SUBww", sub, GPR32wsp, extends_to_i32>;
+
+defm : addsubext_noshift_patterns<"CMNx", A64cmn, GPR64xsp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMNw", A64cmn, GPR32wsp, extends_to_i32>;
+defm : addsubext_noshift_patterns<"CMPx", A64cmp, GPR64xsp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMPw", A64cmp, GPR32wsp, extends_to_i32>;
+
+// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is
+// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the
+// operation. Also permitted in this case is complete omission of the argument,
+// which implies "lsl #0".
+multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd,
+                       RegisterClass GPR_Rn, RegisterClass GPR_Rm>
+{
+    def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+                    (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
+
+    def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"),
+                (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
+
+}
+
+defm : lsl_aliases<"add",  ADDxxx_uxtx,  Rxsp, GPR64xsp, GPR64>;
+defm : lsl_aliases<"add",  ADDxxx_uxtx,  GPR64xsp, Rxsp, GPR64>;
+defm : lsl_aliases<"add",  ADDwww_uxtw,  Rwsp, GPR32wsp, GPR32>;
+defm : lsl_aliases<"add",  ADDwww_uxtw,  GPR32wsp, Rwsp, GPR32>;
+defm : lsl_aliases<"sub",  SUBxxx_uxtx,  Rxsp, GPR64xsp, GPR64>;
+defm : lsl_aliases<"sub",  SUBxxx_uxtx,  GPR64xsp, Rxsp, GPR64>;
+defm : lsl_aliases<"sub",  SUBwww_uxtw,  Rwsp, GPR32wsp, GPR32>;
+defm : lsl_aliases<"sub",  SUBwww_uxtw,  GPR32wsp, Rwsp, GPR32>;
+
+// Rd cannot be sp for flag-setting variants so only half of the aliases are
+// needed.
+defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>;
+defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>;
+defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>;
+defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>;
+
+// CMP unfortunately has to be different because the instruction doesn't have a
+// dest register.
+multiclass cmp_lsl_aliases<string asmop, Instruction inst,
+                       RegisterClass GPR_Rn, RegisterClass GPR_Rm>
+{
+    def : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
+                    (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
+
+    def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"),
+                    (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
+}
+
+defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>;
+defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>;
+defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>;
+defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV
+
+// These instructions accept a 12-bit unsigned immediate, optionally shifted
+// left by 12 bits. Official assembly format specifies a 12 bit immediate with
+// one of "", "LSL #0", "LSL #12" supplementary operands.
+
+// There are surprisingly few ways to make this work with TableGen, so this
+// implementation has separate instructions for the "LSL #0" and "LSL #12"
+// variants.
+
+// If the MCInst retained a single combined immediate (which could be 0x123000,
+// for example) then both components (imm & shift) would have to be delegated to
+// a single assembly operand. This would entail a separate operand parser
+// (because the LSL would have to live in the same AArch64Operand as the
+// immediate to be accessible); assembly parsing is rather complex and
+// error-prone C++ code.
+//
+// By splitting the immediate, we can delegate handling this optional operand to
+// an InstAlias. Supporting functions to generate the correct MCInst are still
+// required, but these are essentially trivial and parsing can remain generic.
+//
+// Rejected plans with rationale:
+// ------------------------------
+//
+// In an ideal world you'de have two first class immediate operands (in
+// InOperandList, specifying imm12 and shift). Unfortunately this is not
+// selectable by any means I could discover.
+//
+// An Instruction with two MCOperands hidden behind a single entry in
+// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional,
+// but required more C++ code to handle encoding/decoding. Parsing (the intended
+// main beneficiary) ended up equally complex because of the optional nature of
+// "LSL #0".
+//
+// Attempting to circumvent the need for a custom OperandParser above by giving
+// InstAliases without the "lsl #0" failed. add/sub could be accommodated but
+// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands
+// should be parsed: there was no way to accommodate an "lsl #12".
+
+let ParserMethod = "ParseImmWithLSLOperand",
+    RenderMethod = "addImmWithLSLOperands" in
+{
+  // Derived PredicateMethod fields are different for each
+  def addsubimm_lsl0_asmoperand : AsmOperandClass
+  {
+    let Name = "AddSubImmLSL0";
+  }
+
+  def addsubimm_lsl12_asmoperand : AsmOperandClass
+  {
+    let Name = "AddSubImmLSL12";
+  }
+}
+
+def shr_12_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32);
+}]>;
+
+def shr_12_neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32);
+}]>;
+
+def neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32);
+}]>;
+
+
+multiclass addsub_imm_operands<ValueType ty>
+{
+ let PrintMethod = "printAddSubImmLSL0Operand",
+      EncoderMethod = "getAddSubImmOpValue",
+      ParserMatchClass = addsubimm_lsl0_asmoperand in
+  {
+    def _posimm_lsl0 : Operand<ty>,
+        ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>;
+    def _negimm_lsl0 : Operand<ty>,
+        ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }],
+                neg_XFORM>;
+  }
+
+  let PrintMethod = "printAddSubImmLSL12Operand",
+      EncoderMethod = "getAddSubImmOpValue",
+      ParserMatchClass = addsubimm_lsl12_asmoperand in
+  {
+    def _posimm_lsl12 : Operand<ty>,
+        ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }],
+                shr_12_XFORM>;
+
+    def _negimm_lsl12 : Operand<ty>,
+        ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }],
+                shr_12_neg_XFORM>;
+  }
+}
+
+// The add operands don't need any transformation
+defm addsubimm_operand_i32 : addsub_imm_operands<i32>;
+defm addsubimm_operand_i64 : addsub_imm_operands<i64>;
+
+multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
+                               string asmop, string cmpasmop,
+                               Operand imm_operand, Operand cmp_imm_operand,
+                               RegisterClass GPR, RegisterClass GPRsp,
+                               AArch64Reg ZR>
+{
+    // All registers for non-S variants allow SP
+  def _s : A64I_addsubimm<sf, op, 0b0, shift,
+                         (outs GPRsp:$Rd),
+                         (ins GPRsp:$Rn, imm_operand:$Imm12),
+                         !strconcat(asmop, "\t$Rd, $Rn, $Imm12"),
+                         [(set GPRsp:$Rd,
+                               (add GPRsp:$Rn, imm_operand:$Imm12))],
+                         NoItinerary>;
+
+
+  // S variants can read SP but would write to ZR
+  def _S : A64I_addsubimm<sf, op, 0b1, shift,
+                         (outs GPR:$Rd),
+                         (ins GPRsp:$Rn, imm_operand:$Imm12),
+                         !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"),
+                         [(set GPR:$Rd, (addc GPRsp:$Rn, imm_operand:$Imm12))],
+                         NoItinerary> {
+    let Defs = [NZCV];
+  }
+
+  // Note that the pattern here for ADDS is subtle. Canonically CMP
+  // a, b becomes SUBS a, b. If b < 0 then this is equivalent to
+  // ADDS a, (-b). This is not true in general.
+  def _cmp : A64I_addsubimm<sf, op, 0b1, shift,
+                            (outs), (ins GPRsp:$Rn, imm_operand:$Imm12),
+                            !strconcat(cmpasmop, " $Rn, $Imm12"),
+                            [(set NZCV,
+                              (A64cmp GPRsp:$Rn, cmp_imm_operand:$Imm12))],
+                            NoItinerary>
+  {
+    let Rd = 0b11111;
+    let Defs = [NZCV];
+    let isCompare = 1;
+  }
+}
+
+
+multiclass addsubimm_shifts<string prefix, bit sf, bit op,
+           string asmop, string cmpasmop, string operand, string cmpoperand,
+           RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR>
+{
+  defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00,
+                                   asmop, cmpasmop,
+                                   !cast<Operand>(operand # "_lsl0"),
+                                   !cast<Operand>(cmpoperand # "_lsl0"),
+                                   GPR, GPRsp, ZR>;
+
+  defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01,
+                                    asmop, cmpasmop,
+                                    !cast<Operand>(operand # "_lsl12"),
+                                    !cast<Operand>(cmpoperand # "_lsl12"),
+                                    GPR, GPRsp, ZR>;
+}
+
+defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn",
+                              "addsubimm_operand_i32_posimm",
+                              "addsubimm_operand_i32_negimm",
+                              GPR32, GPR32wsp, WZR>;
+defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn",
+                              "addsubimm_operand_i64_posimm",
+                              "addsubimm_operand_i64_negimm",
+                              GPR64, GPR64xsp, XZR>;
+defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp",
+                              "addsubimm_operand_i32_negimm",
+                              "addsubimm_operand_i32_posimm",
+                              GPR32, GPR32wsp, WZR>;
+defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp",
+                              "addsubimm_operand_i64_negimm",
+                              "addsubimm_operand_i64_posimm",
+                              GPR64, GPR64xsp, XZR>;
+
+multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop>
+{
+  def _fromsp : InstAlias<"mov $Rd, $Rn",
+                          (addop GPRsp:$Rd, SP:$Rn, 0),
+                          0b1>;
+
+  def _tosp : InstAlias<"mov $Rd, $Rn",
+                        (addop SP:$Rd, GPRsp:$Rn, 0),
+                        0b1>;
+}
+
+// Recall Rxsp is a RegisterClass containing *just* xsp.
+defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>;
+defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (shifted register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS
+
+//===-------------------------------
+// 1. The "shifed register" operands. Shared with logical insts.
+//===-------------------------------
+
+multiclass shift_operands<string prefix, string form>
+{
+  def _asmoperand_i32 : AsmOperandClass
+  {
+    let Name = "Shift" # form # "i32";
+    let RenderMethod = "addShiftOperands";
+    let PredicateMethod
+          = "isShift<A64SE::" # form # ", false>";
+  }
+
+  // Note that the operand type is intentionally i64 because the DAGCombiner
+  // puts these into a canonical form.
+  def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]>
+  {
+    let ParserMatchClass
+          = !cast<AsmOperandClass>(prefix # "_asmoperand_i32");
+    let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
+    let DecoderMethod = "Decode32BitShiftOperand";
+  }
+
+  def _asmoperand_i64 : AsmOperandClass
+  {
+      let Name = "Shift" # form # "i64";
+      let RenderMethod = "addShiftOperands";
+      let PredicateMethod
+            = "isShift<A64SE::" # form # ", true>";
+  }
+
+  def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]>
+  {
+    let ParserMatchClass
+          = !cast<AsmOperandClass>(prefix # "_asmoperand_i64");
+    let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
+  }
+}
+
+defm lsl_operand : shift_operands<"lsl_operand", "LSL">;
+defm lsr_operand : shift_operands<"lsr_operand", "LSR">;
+defm asr_operand : shift_operands<"asr_operand", "ASR">;
+
+// Not used for add/sub, but defined here for completeness. The "logical
+// (shifted register)" instructions *do* have an ROR variant.
+defm ror_operand : shift_operands<"ror_operand", "ROR">;
+
+//===-------------------------------
+// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions.
+//===-------------------------------
+
+// N.b. the commutable parameter is just !N. It will be first against the wall
+// when the revolution comes.
+multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable,
+                         string asmop, SDPatternOperator opfrag, string sty,
+                         RegisterClass GPR, list<Register> defs>
+{
+  let isCommutable = commutable, Defs = defs in {
+  def _lsl : A64I_addsubshift<sf, op, s, 0b00,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # sty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # sty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _lsr : A64I_addsubshift<sf, op, s, 0b01,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # sty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # sty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _asr : A64I_addsubshift<sf, op, s, 0b10,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # sty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # sty):$Imm6))
+                       )],
+                       NoItinerary>;
+  }
+
+  def _noshift
+      : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+                 (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
+                                                      GPR:$Rm, 0)>;
+
+  def : Pat<(opfrag GPR:$Rn, GPR:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable,
+                         string asmop, SDPatternOperator opfrag,
+                         list<Register> defs>
+{
+  defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s,
+                           commutable, asmop, opfrag, "i64", GPR64, defs>;
+  defm www : addsub_shifts<prefix # "www", 0b0, op, s,
+                           commutable, asmop, opfrag, "i32", GPR32, defs>;
+}
+
+
+defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>;
+defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>;
+
+defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>;
+defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>;
+
+//===-------------------------------
+// 1. The NEG/NEGS aliases
+//===-------------------------------
+
+multiclass neg_alias<Instruction INST, RegisterClass GPR,
+                     Register ZR, Operand shift_operand, SDNode shiftop>
+{
+   def : InstAlias<"neg $Rd, $Rm, $Imm6",
+                   (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
+
+   def : Pat<(sub 0, (shiftop GPR:$Rm, shift_operand:$Imm6)),
+             (INST ZR, GPR:$Rm, shift_operand:$Imm6)>;
+}
+
+defm : neg_alias<SUBwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
+defm : neg_alias<SUBwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
+defm : neg_alias<SUBwww_asr, GPR32, WZR, asr_operand_i32, sra>;
+def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+def : Pat<(sub 0, GPR32:$Rm), (SUBwww_lsl WZR, GPR32:$Rm, 0)>;
+
+defm : neg_alias<SUBxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
+defm : neg_alias<SUBxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
+defm : neg_alias<SUBxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
+def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+def : Pat<(sub 0, GPR64:$Rm), (SUBxxx_lsl XZR, GPR64:$Rm, 0)>;
+
+// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to
+// be involved.
+class negs_alias<Instruction INST, RegisterClass GPR,
+                 Register ZR, Operand shift_operand, SDNode shiftop>
+  : InstAlias<"negs $Rd, $Rm, $Imm6",
+              (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
+
+def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
+def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
+def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>;
+def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+
+def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
+def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
+def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
+def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+
+//===-------------------------------
+// 1. The CMP/CMN aliases
+//===-------------------------------
+
+multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable,
+                      string asmop, SDPatternOperator opfrag, string sty,
+                      RegisterClass GPR>
+{
+  let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in {
+  def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # sty):$Imm6),
+                       !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+                       [(set NZCV, (opfrag GPR:$Rn, (shl GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # sty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # sty):$Imm6),
+                       !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+                       [(set NZCV, (opfrag GPR:$Rn, (srl GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # sty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _asr : A64I_addsubshift<sf, op, 0b1, 0b10,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # sty):$Imm6),
+                       !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+                       [(set NZCV, (opfrag GPR:$Rn, (sra GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # sty):$Imm6))
+                       )],
+                       NoItinerary>;
+  }
+
+  def _noshift
+      : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
+                 (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+  def : Pat<(opfrag GPR:$Rn, GPR:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, "i32", GPR32>;
+defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, "i64", GPR64>;
+
+defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, "i32", GPR32>;
+defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, "i64", GPR64>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (with carry) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS
+
+multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop>
+{
+  let Uses = [NZCV] in
+  {
+    def www : A64I_addsubcarry<0b0, op, s, 0b000000,
+                               (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm),
+                               !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+                               [], NoItinerary>;
+
+    def xxx : A64I_addsubcarry<0b1, op, s, 0b000000,
+                               (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
+                               !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+                               [], NoItinerary>;
+  }
+}
+
+let isCommutable = 1 in
+{
+  defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">;
+}
+
+defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">;
+
+let Defs = [NZCV] in
+{
+  let isCommutable = 1 in
+  {
+    defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">;
+  }
+
+  defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">;
+}
+
+def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>;
+def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
+def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>;
+def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
+
+// Note that adde and sube can form a chain longer than two (e.g. for 256-bit
+// addition). So the flag-setting instructions are appropriate.
+def : Pat<(adde GPR32:$Rn, GPR32:$Rm), (ADCSwww GPR32:$Rn, GPR32:$Rm)>;
+def : Pat<(adde GPR64:$Rn, GPR64:$Rm), (ADCSxxx GPR64:$Rn, GPR64:$Rm)>;
+def : Pat<(sube GPR32:$Rn, GPR32:$Rm), (SBCSwww GPR32:$Rn, GPR32:$Rm)>;
+def : Pat<(sube GPR64:$Rn, GPR64:$Rm), (SBCSxxx GPR64:$Rn, GPR64:$Rm)>;
+
+//===----------------------------------------------------------------------===//
+// Bitfield
+//===----------------------------------------------------------------------===//
+// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL,
+//     UBFIZ, UBFX
+
+// Because of the rather complicated nearly-overlapping aliases, the decoding of
+// this range of instructions is handled manually. The architectural
+// instructions are BFM, SBFM and UBFM but a disassembler should never produce
+// these.
+//
+// In the end, the best option was to use BFM instructions for decoding under
+// almost all circumstances, but to create aliasing *Instructions* for each of
+// the canonical forms and specify a completely custom decoder which would
+// substitute the correct MCInst as needed.
+//
+// This also simplifies instruction selection, parsing etc because the MCInsts
+// have a shape that's closer to their use in code.
+
+//===-------------------------------
+// 1. The architectural BFM instructions
+//===-------------------------------
+
+def uimm5_asmoperand : AsmOperandClass
+{
+  let Name = "UImm5";
+  let PredicateMethod = "isUImm<5>";
+  let RenderMethod = "addImmOperands";
+}
+
+def uimm6_asmoperand : AsmOperandClass
+{
+  let Name = "UImm6";
+  let PredicateMethod = "isUImm<6>";
+  let RenderMethod = "addImmOperands";
+}
+
+def bitfield32_imm : Operand<i64>,
+                     ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]>
+{
+  let ParserMatchClass = uimm5_asmoperand;
+
+  let DecoderMethod = "DecodeBitfield32ImmOperand";
+}
+
+
+def bitfield64_imm : Operand<i64>,
+                     ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]>
+{
+  let ParserMatchClass = uimm6_asmoperand;
+
+  // Default decoder works in 64-bit case: the 6-bit field can take any value.
+}
+
+multiclass A64I_bitfieldSizes<bits<2> opc, string asmop>
+{
+  def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+                    (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
+                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                    [], NoItinerary>
+  {
+    let DecoderMethod = "DecodeBitfieldInstruction";
+  }
+
+  def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+                    (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
+                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                    [], NoItinerary>
+  {
+    let DecoderMethod = "DecodeBitfieldInstruction";
+  }
+}
+
+defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">;
+defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">;
+
+// BFM instructions modify the destination register rather than defining it
+// completely.
+def BFMwwii :
+  A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+        (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
+        "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>
+{
+  let DecoderMethod = "DecodeBitfieldInstruction";
+  let Constraints = "$src = $Rd";
+}
+
+def BFMxxii :
+  A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+        (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
+        "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>
+{
+  let DecoderMethod = "DecodeBitfieldInstruction";
+  let Constraints = "$src = $Rd";
+}
+
+
+//===-------------------------------
+// 2. Extend aliases to 64-bit dest
+//===-------------------------------
+
+// Unfortunately the extensions that end up as 64-bits cannot be handled by an
+// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs
+// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is
+// not capable of such a map as far as I'm aware
+
+// Note that these instructions are strictly more specific than the
+// BFM ones (in ImmR) so they can handle their own decoding.
+class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, string asmop,
+                    bits<6> imms, dag pattern>
+  : A64I_bitfield<sf, opc, sf,
+                  (outs GPRDest:$Rd), (ins GPR32:$Rn),
+                  !strconcat(asmop, "\t$Rd, $Rn"),
+                  [(set GPRDest:$Rd, pattern)], NoItinerary>
+{
+  let ImmR = 0b000000;
+  let ImmS = imms;
+}
+
+// Signed extensions
+def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtb", 7,
+                         (sext_inreg (anyext GPR32:$Rn), i8)>;
+def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, "sxtb", 7,
+                         (sext_inreg GPR32:$Rn, i8)>;
+def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxth", 15,
+                         (sext_inreg (anyext GPR32:$Rn), i16)>;
+def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, "sxth", 15,
+                         (sext_inreg GPR32:$Rn, i16)>;
+def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtw", 31, (sext GPR32:$Rn)>;
+
+// Unsigned extensions
+def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, "uxtb", 7,
+                         (and GPR32:$Rn, 255)>;
+def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, "uxth", 15,
+                         (and GPR32:$Rn, 65535)>;
+
+// The 64-bit unsigned variants are not strictly architectural but recommended
+// for consistency.
+let isAsmParserOnly = 1 in
+{
+  def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxtb", 7,
+                           (and (anyext GPR32:$Rn), 255)>;
+  def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxth", 15,
+                           (and (anyext GPR32:$Rn), 65535)>;
+}
+
+// Extra patterns for when the source register is actually 64-bits
+// too. There's no architectural difference here, it's just LLVM
+// shinanigans. There's no need for equivalent zero-extension patterns
+// because they'll already be caught by logical (immediate) matching.
+def : Pat<(sext_inreg GPR64:$Rn, i8),
+          (SXTBxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>;
+def : Pat<(sext_inreg GPR64:$Rn, i16),
+          (SXTHxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>;
+def : Pat<(sext_inreg GPR64:$Rn, i32),
+          (SXTWxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>;
+
+
+//===-------------------------------
+// 3. Aliases for ASR and LSR (the simple shifts)
+//===-------------------------------
+
+// These also handle their own decoding because ImmS being set makes
+// them take precedence over BFM.
+multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode>
+{
+  def wwi : A64I_bitfield<0b0, opc, 0b0,
+                    (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR),
+                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
+                    [(set GPR32:$Rd, (opnode GPR32:$Rn, bitfield32_imm:$ImmR))],
+                    NoItinerary>
+  {
+    let ImmS = 31;
+  }
+
+  def xxi : A64I_bitfield<0b1, opc, 0b1,
+                    (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR),
+                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
+                    [(set GPR64:$Rd, (opnode GPR64:$Rn, bitfield64_imm:$ImmR))],
+                    NoItinerary>
+  {
+    let ImmS = 63;
+  }
+
+}
+
+defm ASR : A64I_shift<0b00, "asr", sra>;
+defm LSR : A64I_shift<0b10, "lsr", srl>;
+
+//===-------------------------------
+// 4. Aliases for LSL
+//===-------------------------------
+
+// Unfortunately LSL and subsequent aliases are much more complicated. We need
+// to be able to say certain output instruction fields depend in a complex
+// manner on combinations of input assembly fields).
+//
+// MIOperandInfo *might* have been able to do it, but at the cost of
+// significantly more C++ code.
+
+// N.b. contrary to usual practice these operands store the shift rather than
+// the machine bits in an MCInst. The complexity overhead of consistency
+// outweighed the benefits in this case (custom asmparser, printer and selection
+// vs custom encoder).
+def bitfield32_lsl_imm : Operand<i64>,
+                         ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]>
+{
+  let ParserMatchClass = uimm5_asmoperand;
+  let EncoderMethod = "getBitfield32LSLOpValue";
+}
+
+def bitfield64_lsl_imm : Operand<i64>,
+                         ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]>
+{
+  let ParserMatchClass = uimm6_asmoperand;
+  let EncoderMethod = "getBitfield64LSLOpValue";
+}
+
+class A64I_bitfield_lsl<bit sf, RegisterClass GPR, Operand operand>
+  : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm),
+                  "lsl\t$Rd, $Rn, $FullImm",
+                  [(set GPR:$Rd, (shl GPR:$Rn, operand:$FullImm))],
+                  NoItinerary>
+{
+  bits<12> FullImm;
+  let ImmR = FullImm{5-0};
+  let ImmS = FullImm{11-6};
+
+  // No disassembler allowed because it would overlap with BFM which does the
+  // actual work.
+  let isAsmParserOnly = 1;
+}
+
+def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, bitfield32_lsl_imm>;
+def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, bitfield64_lsl_imm>;
+
+//===-------------------------------
+// 5. Aliases for bitfield extract instructions
+//===-------------------------------
+
+def bfx32_width_asmoperand : AsmOperandClass
+{
+  let Name = "BFX32Width";
+  let PredicateMethod = "isBitfieldWidth<32>";
+  let RenderMethod = "addBFXWidthOperands";
+}
+
+def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]>
+{
+  let PrintMethod = "printBFXWidthOperand";
+  let ParserMatchClass = bfx32_width_asmoperand;
+}
+
+def bfx64_width_asmoperand : AsmOperandClass
+{
+  let Name = "BFX64Width";
+  let PredicateMethod = "isBitfieldWidth<64>";
+  let RenderMethod = "addBFXWidthOperands";
+}
+
+def bfx64_width : Operand<i64>
+{
+  let PrintMethod = "printBFXWidthOperand";
+  let ParserMatchClass = bfx64_width_asmoperand;
+}
+
+
+multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op>
+{
+  def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+                       (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
+                       !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                       [(set GPR32:$Rd, (op GPR32:$Rn, imm:$ImmR, imm:$ImmS))],
+                       NoItinerary>
+  {
+    // As above, no disassembler allowed.
+    let isAsmParserOnly = 1;
+  }
+
+  def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+                       (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
+                       !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                       [(set GPR64:$Rd, (op GPR64:$Rn, imm:$ImmR, imm:$ImmS))],
+                       NoItinerary>
+  {
+    // As above, no disassembler allowed.
+    let isAsmParserOnly = 1;
+  }
+}
+
+defm SBFX :  A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>;
+defm UBFX :  A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>;
+
+// Again, variants based on BFM modify Rd so need it as an input too.
+def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+           (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
+           "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>
+{
+  // As above, no disassembler allowed.
+  let isAsmParserOnly = 1;
+  let Constraints = "$src = $Rd";
+}
+
+def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+           (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
+           "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>
+{
+  // As above, no disassembler allowed.
+  let isAsmParserOnly = 1;
+  let Constraints = "$src = $Rd";
+}
+
+// SBFX instructions can do a 1-instruction sign-extension of boolean values.
+def : Pat<(sext_inreg GPR64:$Rn, i1), (SBFXxxii GPR64:$Rn, 0, 0)>;
+def : Pat<(sext_inreg GPR32:$Rn, i1), (SBFXwwii GPR32:$Rn, 0, 0)>;
+def : Pat<(i64 (sext_inreg (anyext GPR32:$Rn), i1)),
+          (SBFXxxii (SUBREG_TO_REG (i64 0), GPR32:$Rn, sub_32), 0, 0)>;
+
+// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
+// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
+def : Pat<(zext GPR32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii GPR32:$Rn, 0, 31), sub_32)>;
+
+//===-------------------------------
+// 6. Aliases for bitfield insert instructions
+//===-------------------------------
+
+def bfi32_lsb_asmoperand : AsmOperandClass
+{
+  let Name = "BFI32LSB";
+  let PredicateMethod = "isUImm<5>";
+  let RenderMethod = "addBFILSBOperands<32>";
+}
+
+def bfi32_lsb : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]>
+{
+  let PrintMethod = "printBFILSBOperand<32>";
+  let ParserMatchClass = bfi32_lsb_asmoperand;
+}
+
+def bfi64_lsb_asmoperand : AsmOperandClass
+{
+  let Name = "BFI64LSB";
+  let PredicateMethod = "isUImm<6>";
+  let RenderMethod = "addBFILSBOperands<64>";
+}
+
+def bfi64_lsb : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]>
+{
+  let PrintMethod = "printBFILSBOperand<64>";
+  let ParserMatchClass = bfi64_lsb_asmoperand;
+}
+
+// Width verification is performed during conversion so width operand can be
+// shared between 32/64-bit cases. Still needed for the print method though
+// because ImmR encodes "width - 1".
+def bfi32_width_asmoperand : AsmOperandClass
+{
+  let Name = "BFI32Width";
+  let PredicateMethod = "isBitfieldWidth<32>";
+  let RenderMethod = "addBFIWidthOperands";
+}
+
+def bfi32_width : Operand<i64>,
+                  ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]>
+{
+  let PrintMethod = "printBFIWidthOperand";
+  let ParserMatchClass = bfi32_width_asmoperand;
+}
+
+def bfi64_width_asmoperand : AsmOperandClass
+{
+  let Name = "BFI64Width";
+  let PredicateMethod = "isBitfieldWidth<64>";
+  let RenderMethod = "addBFIWidthOperands";
+}
+
+def bfi64_width : Operand<i64>,
+                  ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]>
+{
+  let PrintMethod = "printBFIWidthOperand";
+  let ParserMatchClass = bfi64_width_asmoperand;
+}
+
+multiclass A64I_bitfield_insert<bits<2> opc, string asmop>
+{
+  def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+                           (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
+                           !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                           [], NoItinerary>
+  {
+    // As above, no disassembler allowed.
+    let isAsmParserOnly = 1;
+  }
+
+  def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+                           (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
+                           !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                           [], NoItinerary>
+  {
+    // As above, no disassembler allowed.
+    let isAsmParserOnly = 1;
+  }
+
+}
+
+defm SBFIZ :  A64I_bitfield_insert<0b00, "sbfiz">;
+defm UBFIZ :  A64I_bitfield_insert<0b10, "ubfiz">;
+
+
+def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+                  (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
+                  "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>
+{
+  // As above, no disassembler allowed.
+  let isAsmParserOnly = 1;
+  let Constraints = "$src = $Rd";
+}
+
+def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+                  (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
+                  "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>
+{
+  // As above, no disassembler allowed.
+  let isAsmParserOnly = 1;
+  let Constraints = "$src = $Rd";
+}
+
+//===----------------------------------------------------------------------===//
+// Compare and branch (immediate)
+//===----------------------------------------------------------------------===//
+// Contains: CBZ, CBNZ
+
+class label_asmoperand<int width, int scale> : AsmOperandClass
+{
+  let Name = "Label" # width # "_" # scale;
+  let PredicateMethod = "isLabel<" # width # "," # scale # ">";
+  let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">";
+}
+
+def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>;
+
+// All conditional immediate branches are the same really: 19 signed bits scaled
+// by the instruction-size (4).
+def bcc_target : Operand<OtherVT>
+{
+  // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
+  let ParserMatchClass = label_wid19_scal4_asmoperand;
+  let PrintMethod = "printLabelOperand<19, 4>";
+  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>";
+  let OperandType = "OPERAND_PCREL";
+}
+
+multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP>
+{
+  let isBranch = 1, isTerminator = 1 in {
+  def x : A64I_cmpbr<0b1, op,
+                     (outs),
+                     (ins GPR64:$Rt, bcc_target:$Label),
+                     !strconcat(asmop,"\t$Rt, $Label"),
+                     [(A64br_cc (A64cmp GPR64:$Rt, 0), SETOP, bb:$Label)],
+                     NoItinerary>;
+
+  def w : A64I_cmpbr<0b0, op,
+                     (outs),
+                     (ins GPR32:$Rt, bcc_target:$Label),
+                     !strconcat(asmop,"\t$Rt, $Label"),
+                     [(A64br_cc (A64cmp GPR32:$Rt, 0), SETOP, bb:$Label)],
+                     NoItinerary>;
+  }
+}
+
+defm CBZ  : cmpbr_sizes<0b0, "cbz",  ImmLeaf<i32, [{
+  return Imm == A64CC::EQ;
+}]> >;
+defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{
+  return Imm == A64CC::NE;
+}]> >;
+
+//===----------------------------------------------------------------------===//
+// Conditional branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: B.cc
+
+def cond_code_asmoperand : AsmOperandClass
+{
+  let Name = "CondCode";
+}
+
+def cond_code : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm <= 15;
+}]>
+{
+  let PrintMethod = "printCondCodeOperand";
+  let ParserMatchClass = cond_code_asmoperand;
+}
+
+def Bcc : A64I_condbr<0b0, 0b0, (outs),
+                (ins cond_code:$Cond, bcc_target:$Label),
+                "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)],
+                NoItinerary>
+{
+  let Uses = [NZCV];
+  let isBranch = 1;
+  let isTerminator = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional compare (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CCMN, CCMP
+
+def uimm4_asmoperand : AsmOperandClass
+{
+  let Name = "UImm4";
+  let PredicateMethod = "isUImm<4>";
+  let RenderMethod = "addImmOperands";
+}
+
+def uimm4 : Operand<i32>
+{
+  let ParserMatchClass = uimm4_asmoperand;
+}
+
+def uimm5 : Operand<i32>
+{
+  let ParserMatchClass = uimm5_asmoperand;
+}
+
+// The only difference between this operand and the one for instructions like
+// B.cc is that it's parsed manually. The other get parsed implicitly as part of
+// the mnemonic handling.
+def cond_code_op_asmoperand : AsmOperandClass
+{
+  let Name = "CondCodeOp";
+  let RenderMethod = "addCondCodeOperands";
+  let PredicateMethod = "isCondCode";
+  let ParserMethod = "ParseCondCodeOperand";
+}
+
+def cond_code_op : Operand<i32>
+{
+  let PrintMethod = "printCondCodeOperand";
+  let ParserMatchClass = cond_code_op_asmoperand;
+}
+
+class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop>
+  : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs),
+                (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond),
+                !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"),
+                [], NoItinerary>
+{
+  let Defs = [NZCV];
+}
+
+def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">;
+def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">;
+def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">;
+def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">;
+
+//===----------------------------------------------------------------------===//
+// Conditional compare (register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CCMN, CCMP
+
+class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop>
+  : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1,
+                    (outs),
+                    (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
+                    !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
+                    [], NoItinerary>
+{
+  let Defs = [NZCV];
+}
+
+def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">;
+def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">;
+def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">;
+def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">;
+
+//===----------------------------------------------------------------------===//
+// Conditional select instructions
+//===----------------------------------------------------------------------===//
+// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG
+
+// Condition code which is encoded as the inversion (semantically rather than
+// bitwise) in the instruction.
+def inv_cond_code_op_asmoperand : AsmOperandClass
+{
+  let Name = "InvCondCodeOp";
+  let RenderMethod = "addInvCondCodeOperands";
+  let PredicateMethod = "isCondCode";
+  let ParserMethod = "ParseCondCodeOperand";
+}
+
+def inv_cond_code_op : Operand<i32>
+{
+  let ParserMatchClass = inv_cond_code_op_asmoperand;
+}
+
+// Having a separate operand for the selectable use-case is debatable, but gives
+// consistency with cond_code.
+def inv_cond_XFORM : SDNodeXForm<imm, [{
+  A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue());
+  return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32);
+}]>;
+
+def inv_cond_code
+  : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>;
+
+
+multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
+                             SDPatternOperator select>
+{
+  let Uses = [NZCV] in
+  {
+    def wwwc : A64I_condsel<0b0, op, 0b0, op2,
+                            (outs GPR32:$Rd),
+                            (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond),
+                            !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
+                            [(set GPR32:$Rd, (select GPR32:$Rn, GPR32:$Rm))],
+                            NoItinerary>;
+
+
+    def xxxc : A64I_condsel<0b1, op, 0b0, op2,
+                            (outs GPR64:$Rd),
+                            (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond),
+                            !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
+                            [(set GPR64:$Rd, (select GPR64:$Rn, GPR64:$Rm))],
+                            NoItinerary>;
+  }
+}
+
+def simple_select
+  : PatFrag<(ops node:$lhs, node:$rhs),
+            (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>;
+
+class complex_select<SDPatternOperator opnode>
+  : PatFrag<(ops node:$lhs, node:$rhs),
+        (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>;
+
+
+defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>;
+defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc",
+                               complex_select<PatFrag<(ops node:$val),
+                                                      (add node:$val, 1)>>>;
+defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>;
+defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>;
+
+// Now the instruction aliases, which fit nicely into LLVM's model:
+
+def : InstAlias<"cset $Rd, $Cond",
+                (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cset $Rd, $Cond",
+                (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"csetm $Rd, $Cond",
+                (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"csetm $Rd, $Cond",
+                (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinc $Rd, $Rn, $Cond",
+           (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinc $Rd, $Rn, $Cond",
+           (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinv $Rd, $Rn, $Cond",
+           (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinv $Rd, $Rn, $Cond",
+           (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cneg $Rd, $Rn, $Cond",
+           (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cneg $Rd, $Rn, $Cond",
+           (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+
+// Finally some helper patterns.
+
+// For CSET (a.k.a. zero-extension of icmp)
+def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
+          (CSINCwwwc WZR, WZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
+          (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>;
+
+def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
+          (CSINCxxxc XZR, XZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
+          (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>;
+
+// For CSETM (a.k.a. sign-extension of icmp)
+def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
+          (CSINVwwwc WZR, WZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
+          (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>;
+
+def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
+          (CSINVxxxc XZR, XZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
+          (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>;
+
+// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of
+// commutativity. The instructions are to complex for isCommutable to be used,
+// so we have to create the patterns manually:
+
+// No commutable pattern for CSEL since the commuted version is isomorphic.
+
+// CSINC
+def :Pat<(A64select_cc NZCV, (add GPR32:$Rm, 1), GPR32:$Rn, inv_cond_code:$Cond),
+         (CSINCwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (add GPR64:$Rm, 1), GPR64:$Rn, inv_cond_code:$Cond),
+         (CSINCxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>;
+
+// CSINV
+def :Pat<(A64select_cc NZCV, (not GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond),
+         (CSINVwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (not GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond),
+         (CSINVxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>;
+
+// CSNEG
+def :Pat<(A64select_cc NZCV, (ineg GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond),
+         (CSNEGwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (ineg GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond),
+         (CSNEGxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (1 source) instructions
+//===----------------------------------------------------------------------===//
+// Contains: RBIT, REV16, REV, REV32, CLZ, CLS.
+
+// We define an unary operator which always fails. We will use this to
+// define unary operators that cannot be matched.
+
+class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop,
+                   list<dag> patterns, RegisterClass GPRrc,
+                   InstrItinClass itin>:
+      A64I_dp_1src<sf,
+                   0,
+                   0b00000,
+                   opcode,
+                   !strconcat(asmop, "\t$Rd, $Rn"),
+                   (outs GPRrc:$Rd),
+                   (ins GPRrc:$Rn),
+                   patterns,
+                   itin>;
+
+multiclass A64I_dp_1src <bits<6> opcode, string asmop> {
+  let neverHasSideEffects = 1 in {
+    def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>;
+    def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>;
+  }
+}
+
+defm RBIT  : A64I_dp_1src<0b000000, "rbit">;
+defm CLS   : A64I_dp_1src<0b000101, "cls">;
+defm CLZ   : A64I_dp_1src<0b000100, "clz">;
+
+def : Pat<(ctlz GPR32:$Rn), (CLZww GPR32:$Rn)>;
+def : Pat<(ctlz GPR64:$Rn), (CLZxx GPR64:$Rn)>;
+def : Pat<(ctlz_zero_undef GPR32:$Rn), (CLZww GPR32:$Rn)>;
+def : Pat<(ctlz_zero_undef GPR64:$Rn), (CLZxx GPR64:$Rn)>;
+
+def : Pat<(cttz GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>;
+def : Pat<(cttz GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>;
+def : Pat<(cttz_zero_undef GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>;
+def : Pat<(cttz_zero_undef GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>;
+
+
+def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev",
+                              [(set GPR32:$Rd, (bswap GPR32:$Rn))],
+                              GPR32, NoItinerary>;
+def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev",
+                              [(set GPR64:$Rd, (bswap GPR64:$Rn))],
+                              GPR64, NoItinerary>;
+def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32",
+                          [(set GPR64:$Rd, (bswap (rotr GPR64:$Rn, (i64 32))))],
+                          GPR64, NoItinerary>;
+def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16",
+                          [(set GPR32:$Rd, (bswap (rotr GPR32:$Rn, (i64 16))))],
+                          GPR32,
+                          NoItinerary>;
+def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (2 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL, LSR, ASR, ROR
+
+class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns,
+                   RegisterClass GPRsp,
+                   InstrItinClass itin>:
+      A64I_dp_2src<sf,
+                   opcode,
+                   0,
+                   !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+                   (outs GPRsp:$Rd),
+                   (ins GPRsp:$Rn, GPRsp:$Rm),
+                   patterns,
+                   itin>;
+
+multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
+   def www : dp_2src_impl<0b0,
+                         opcode,
+                         asmop,
+                         [(set GPR32:$Rd, (op GPR32:$Rn, (i64 (zext GPR32:$Rm))))],
+                         GPR32,
+                         NoItinerary>;
+   def xxx : dp_2src_impl<0b1,
+                         opcode,
+                         asmop,
+                         [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))],
+                         GPR64,
+                         NoItinerary>;
+}
+
+
+multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> {
+    def www : dp_2src_impl<0b0,
+                         opcode,
+                         asmop,
+                         [(set GPR32:$Rd, (op GPR32:$Rn, GPR32:$Rm))],
+                         GPR32,
+                         NoItinerary>;
+   def xxx : dp_2src_impl<0b1,
+                         opcode,
+                         asmop,
+                         [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))],
+                         GPR64,
+                         NoItinerary>;
+}
+
+// Here we define the data processing 2 source instructions.
+defm UDIV : dp_2src<0b000010, "udiv", udiv>;
+defm SDIV : dp_2src<0b000011, "sdiv", sdiv>;
+
+defm LSLV : dp_2src_zext<0b001000, "lsl", shl>;
+defm LSRV : dp_2src_zext<0b001001, "lsr", srl>;
+defm ASRV : dp_2src_zext<0b001010, "asr", sra>;
+defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
+
+// Extra patterns for an incoming 64-bit value for a 32-bit
+// operation. Since the LLVM operations are undefined (as in C) if the
+// RHS is out of range, it's perfectly permissible to discard the high
+// bits of the GPR64.
+def : Pat<(shl GPR32:$Rn, GPR64:$Rm),
+          (LSLVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
+def : Pat<(srl GPR32:$Rn, GPR64:$Rm),
+          (LSRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
+def : Pat<(sra GPR32:$Rn, GPR64:$Rm),
+          (ASRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
+def : Pat<(rotr GPR32:$Rn, GPR64:$Rm),
+          (RORVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
+
+// Here we define the aliases for the data processing 2 source instructions.
+def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">;
+def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">;
+def ASR_menmonic : MnemonicAlias<"asrv", "asr">;
+def ROR_menmonic : MnemonicAlias<"rorv", "ror">;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (3 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH
+//    + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL
+
+class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg,
+                        RegisterClass SrcReg, string asmop, dag pattern>
+  : A64I_dp3<sf, opcode,
+             (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra),
+             !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"),
+             [(set AccReg:$Rd, pattern)], NoItinerary>
+{
+  RegisterClass AccGPR = AccReg;
+  RegisterClass SrcGPR = SrcReg;
+}
+
+def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, GPR32, "madd",
+                                 (add GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>;
+def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, GPR64, "madd",
+                                 (add GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>;
+
+def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, GPR32, "msub",
+                                 (sub GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>;
+def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, GPR64, "msub",
+                                 (sub GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>;
+
+def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, GPR32, "smaddl",
+               (add GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>;
+def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, GPR32, "smsubl",
+               (sub GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>;
+
+def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, GPR32, "umaddl",
+               (add GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>;
+def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, GPR32, "umsubl",
+               (sub GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>;
+
+let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in
+{
+  def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd),
+                          (ins GPR64:$Rn, GPR64:$Rm),
+                          "umulh\t$Rd, $Rn, $Rm",
+                          [(set GPR64:$Rd, (mulhu GPR64:$Rn, GPR64:$Rm))],
+                          NoItinerary>;
+
+  def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd),
+                          (ins GPR64:$Rn, GPR64:$Rm),
+                          "smulh\t$Rd, $Rn, $Rm",
+                          [(set GPR64:$Rd, (mulhs GPR64:$Rn, GPR64:$Rm))],
+                          NoItinerary>;
+}
+
+multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST,
+                             Register ZR, dag pattern>
+{
+  def : InstAlias<asmop # " $Rd, $Rn, $Rm",
+                  (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
+
+  def : Pat<pattern, (INST INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
+}
+
+defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul GPR32:$Rn, GPR32:$Rm)>;
+defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul GPR64:$Rn, GPR64:$Rm)>;
+
+defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR,
+                         (sub 0, (mul GPR32:$Rn, GPR32:$Rm))>;
+defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR,
+                         (sub 0, (mul GPR64:$Rn, GPR64:$Rm))>;
+
+defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR,
+                         (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm))>;
+defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR,
+                        (sub 0, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>;
+
+defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR,
+                         (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm))>;
+defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR,
+                        (sub 0, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>;
+
+
+//===----------------------------------------------------------------------===//
+// Exception generation
+//===----------------------------------------------------------------------===//
+// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
+
+def uimm16_asmoperand : AsmOperandClass
+{
+  let Name = "UImm16";
+  let PredicateMethod = "isUImm<16>";
+  let RenderMethod = "addImmOperands";
+}
+
+def uimm16 : Operand<i32>
+{
+  let ParserMatchClass = uimm16_asmoperand;
+}
+
+class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop>
+  : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16),
+                   !strconcat(asmop, "\t$UImm16"), [], NoItinerary>
+{
+  let isBranch = 1;
+  let isTerminator = 1;
+}
+
+def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">;
+def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">;
+def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">;
+def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">;
+def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">;
+
+def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">;
+def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">;
+def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">;
+
+// The immediate is optional for the DCPS instructions, defaulting to 0.
+def : InstAlias<"dcps1", (DCPS1i 0)>;
+def : InstAlias<"dcps2", (DCPS2i 0)>;
+def : InstAlias<"dcps3", (DCPS3i 0)>;
+
+//===----------------------------------------------------------------------===//
+// Extract (immediate)
+//===----------------------------------------------------------------------===//
+// Contains: EXTR + alias ROR
+
+def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0,
+                            (outs GPR32:$Rd),
+                            (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB),
+                            "extr\t$Rd, $Rn, $Rm, $LSB",
+                            [(set GPR32:$Rd,
+                                  (A64Extr GPR32:$Rn, GPR32:$Rm, imm:$LSB))],
+                            NoItinerary>;
+def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1,
+                            (outs GPR64:$Rd),
+                            (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB),
+                            "extr\t$Rd, $Rn, $Rm, $LSB",
+                            [(set GPR64:$Rd,
+                                  (A64Extr GPR64:$Rn, GPR64:$Rm, imm:$LSB))],
+                            NoItinerary>;
+
+def : InstAlias<"ror $Rd, $Rs, $LSB",
+               (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>;
+def : InstAlias<"ror $Rd, $Rs, $LSB",
+               (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>;
+
+def : Pat<(rotr GPR32:$Rn, bitfield32_imm:$LSB),
+          (EXTRwwwi GPR32:$Rn, GPR32:$Rn, bitfield32_imm:$LSB)>;
+def : Pat<(rotr GPR64:$Rn, bitfield64_imm:$LSB),
+          (EXTRxxxi GPR64:$Rn, GPR64:$Rn, bitfield64_imm:$LSB)>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point compare instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCMP, FCMPE
+
+def fpzero_asmoperand : AsmOperandClass
+{
+  let Name = "FPZero";
+  let ParserMethod = "ParseFPImmOperand";
+}
+
+def fpz32 : Operand<f32>, ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]>
+{
+  let ParserMatchClass = fpzero_asmoperand;
+  let PrintMethod = "printFPZeroOperand";
+}
+
+def fpz64 : Operand<f64>, ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]>
+{
+  let ParserMatchClass = fpzero_asmoperand;
+  let PrintMethod = "printFPZeroOperand";
+}
+
+multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, string asmop2,
+                            dag pattern>
+{
+  def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
+                          (outs), ins, !strconcat("fcmp\t$Rn, ", asmop2),
+                          [pattern], NoItinerary>
+  {
+    let Defs = [NZCV];
+  }
+
+  def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
+                        (outs), ins, !strconcat("fcmpe\t$Rn, ", asmop2),
+                        [], NoItinerary>
+  {
+    let Defs = [NZCV];
+  }
+}
+
+defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), "$Rm",
+                               (set NZCV, (A64cmp (f32 FPR32:$Rn), FPR32:$Rm))>;
+defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), "$Rm",
+                               (set NZCV, (A64cmp (f64 FPR64:$Rn), FPR64:$Rm))>;
+
+// What would be Rm should be written as 0, but anything is valid for
+// disassembly so we can't set the bits
+let PostEncoderMethod = "fixFCMPImm" in
+{
+  defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Imm), "$Imm",
+                              (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Imm))>;
+
+  defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Imm), "$Imm",
+                              (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Imm))>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Floating-point conditional compare instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCCMP, FCCMPE
+
+class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop>
+  : A64I_fpccmp<0b0, 0b0, type, op,
+                (outs),
+                (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
+                !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
+                [], NoItinerary>
+{
+  let Defs = [NZCV];
+}
+
+def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">;
+def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">;
+def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">;
+def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">;
+
+//===----------------------------------------------------------------------===//
+// Floating-point conditional select instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCSEL
+
+let Uses = [NZCV] in
+{
+  def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd),
+                                 (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond),
+                                 "fcsel\t$Rd, $Rn, $Rm, $Cond",
+                                 [(set FPR32:$Rd,
+                                       (simple_select (f32 FPR32:$Rn),
+                                                      FPR32:$Rm))],
+                                 NoItinerary>;
+
+
+  def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd),
+                                 (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond),
+                                 "fcsel\t$Rd, $Rn, $Rm, $Cond",
+                                 [(set FPR64:$Rd,
+                                       (simple_select (f64 FPR64:$Rn),
+                                                      FPR64:$Rm))],
+                                 NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (1 source)
+//===----------------------------------------------------------------------===//
+// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI].
+
+def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val),
+                       [{ (void)N; return false; }]>;
+
+// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d"
+// syntax. Default to no pattern because most are odd enough not to have one.
+multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr,
+                           SDPatternOperator opnode = FPNoUnop>
+{
+  def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn),
+                     !strconcat(asmstr, "\t$Rd, $Rn"),
+                     [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn))],
+                     NoItinerary>;
+
+  def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn),
+                     !strconcat(asmstr, "\t$Rd, $Rn"),
+                     [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn))],
+                     NoItinerary>;
+}
+
+defm FMOV   : A64I_fpdp1sizes<0b000000, "fmov">;
+defm FABS   : A64I_fpdp1sizes<0b000001, "fabs", fabs>;
+defm FNEG   : A64I_fpdp1sizes<0b000010, "fneg", fneg>;
+defm FSQRT  : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>;
+
+defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">;
+defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>;
+defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>;
+defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>;
+defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">;
+defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>;
+defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>;
+
+// The FCVT instrucitons have different source and destination register-types,
+// but the fields are uniform everywhere a D-register (say) crops up. Package
+// this information in a Record.
+class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt>
+{
+    RegisterClass Class = rc;
+    ValueType VT = vt;
+    bit t1 = fld{1};
+    bit t0 = fld{0};
+}
+
+def FCVT16 : FCVTRegType<FPR16, 0b11, f16>;
+def FCVT32 : FCVTRegType<FPR32, 0b00, f32>;
+def FCVT64 : FCVTRegType<FPR64, 0b01, f64>;
+
+class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode>
+  : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0},
+               {0,0,0,1, DestReg.t1, DestReg.t0},
+               (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn),
+               "fcvt\t$Rd, $Rn",
+               [(set (DestReg.VT DestReg.Class:$Rd),
+                     (opnode (SrcReg.VT SrcReg.Class:$Rn)))], NoItinerary>;
+
+def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>;
+def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>;
+def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>;
+def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>;
+def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>;
+def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (2 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL
+
+def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs),
+                      [{ (void)N; return false; }]>;
+
+multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr,
+                           SDPatternOperator opnode>
+{
+  def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode,
+                      (outs FPR32:$Rd),
+                      (ins FPR32:$Rn, FPR32:$Rm),
+                      !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
+                      [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn, FPR32:$Rm))],
+                      NoItinerary>;
+
+  def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode,
+                      (outs FPR64:$Rd),
+                      (ins FPR64:$Rn, FPR64:$Rm),
+                      !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
+                      [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn, FPR64:$Rm))],
+                      NoItinerary>;
+}
+
+let isCommutable = 1 in {
+  defm FMUL   : A64I_fpdp2sizes<0b0000, "fmul", fmul>;
+  defm FADD   : A64I_fpdp2sizes<0b0010, "fadd", fadd>;
+
+  // No patterns for these.
+  defm FMAX   : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>;
+  defm FMIN   : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>;
+  defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>;
+  defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>;
+
+  defm FNMUL  : A64I_fpdp2sizes<0b1000, "fnmul",
+                                PatFrag<(ops node:$lhs, node:$rhs),
+                                        (fneg (fmul node:$lhs, node:$rhs))> >;
+}
+
+defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>;
+defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (3 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMADD, FMSUB, FNMADD, FNMSUB
+
+def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+                    (fma (fneg node:$Rn),  node:$Rm, node:$Ra)>;
+def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+                     (fma node:$Rn,  node:$Rm, (fneg node:$Ra))>;
+def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+                     (fma (fneg node:$Rn),  node:$Rm, (fneg node:$Ra))>;
+
+class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT,
+                     bits<2> type, bit o1, bit o0, SDPatternOperator fmakind>
+  : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd),
+               (ins FPR:$Rn, FPR:$Rm, FPR:$Ra),
+               !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"),
+               [(set FPR:$Rd, (fmakind (VT FPR:$Rn), FPR:$Rm, FPR:$Ra))],
+               NoItinerary>;
+
+def FMADDssss  : A64I_fpdp3Impl<"fmadd",  FPR32, f32, 0b00, 0b0, 0b0, fma>;
+def FMSUBssss  : A64I_fpdp3Impl<"fmsub",  FPR32, f32, 0b00, 0b0, 0b1, fmsub>;
+def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>;
+def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>;
+
+def FMADDdddd  : A64I_fpdp3Impl<"fmadd",  FPR64, f64, 0b01, 0b0, 0b0, fma>;
+def FMSUBdddd  : A64I_fpdp3Impl<"fmsub",  FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
+def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
+def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point <-> fixed-point conversion instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
+
+// #1-#32 allowed, encoded as "64 - <specified imm>
+def fixedpos_asmoperand_i32 : AsmOperandClass
+{
+  let Name = "CVTFixedPos32";
+  let RenderMethod = "addCVTFixedPosOperands";
+  let PredicateMethod = "isCVTFixedPos<32>";
+}
+
+// Also encoded as "64 - <specified imm>" but #1-#64 allowed.
+def fixedpos_asmoperand_i64 : AsmOperandClass
+{
+  let Name = "CVTFixedPos64";
+  let RenderMethod = "addCVTFixedPosOperands";
+  let PredicateMethod = "isCVTFixedPos<64>";
+}
+
+// We need the cartesian product of f32/f64 i32/i64 operands for
+// conversions:
+//   + Selection needs to use operands of correct floating type
+//   + Assembly parsing and decoding depend on integer width
+class cvtfix_i32_op<ValueType FloatVT>
+  : Operand<FloatVT>,
+    ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]>
+{
+  let ParserMatchClass = fixedpos_asmoperand_i32;
+  let DecoderMethod = "DecodeCVT32FixedPosOperand";
+  let PrintMethod = "printCVTFixedPosOperand";
+}
+
+class cvtfix_i64_op<ValueType FloatVT>
+  : Operand<FloatVT>,
+    ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]>
+{
+  let ParserMatchClass = fixedpos_asmoperand_i64;
+  let PrintMethod = "printCVTFixedPosOperand";
+}
+
+// Because of the proliferation of weird operands, it's not really
+// worth going for a multiclass here. Oh well.
+
+class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode,
+                   RegisterClass GPR, RegisterClass FPR, Operand scale_op,
+                   string asmop, SDNode cvtop>
+  : A64I_fpfixed<sf, 0b0, type, 0b11, opcode,
+                 (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale),
+                 !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
+                 [(set GPR:$Rd, (cvtop (fmul FPR:$Rn, scale_op:$Scale)))],
+                 NoItinerary>;
+
+def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32,
+                             cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>;
+def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32,
+                             cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>;
+def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32,
+                             cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>;
+def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32,
+                             cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>;
+
+def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64,
+                             cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>;
+def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64,
+                             cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>;
+def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64,
+                             cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>;
+def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64,
+                             cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>;
+
+
+class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode,
+                   RegisterClass FPR, RegisterClass GPR, Operand scale_op,
+                   string asmop, SDNode cvtop>
+  : A64I_fpfixed<sf, 0b0, type, 0b00, opcode,
+                 (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale),
+                 !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
+                 [(set FPR:$Rd, (fdiv (cvtop GPR:$Rn), scale_op:$Scale))],
+                 NoItinerary>;
+
+def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32,
+                            cvtfix_i32_op<f32>, "scvtf", sint_to_fp>;
+def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64,
+                            cvtfix_i64_op<f32>, "scvtf", sint_to_fp>;
+def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32,
+                            cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>;
+def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64,
+                            cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>;
+def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32,
+                            cvtfix_i32_op<f64>, "scvtf", sint_to_fp>;
+def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64,
+                            cvtfix_i64_op<f64>, "scvtf", sint_to_fp>;
+def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32,
+                            cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>;
+def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64,
+                            cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point <-> integer conversion instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
+
+class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode,
+                   RegisterClass DestPR, RegisterClass SrcPR, string asmop>
+  : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn),
+               !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>;
+
+multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop>
+{
+  def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0}, GPR32, FPR32, asmop # "s">;
+  def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0}, GPR64, FPR32, asmop # "s">;
+  def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1}, GPR32, FPR32, asmop # "u">;
+  def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1}, GPR64, FPR32, asmop # "u">;
+
+  def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0}, GPR32, FPR64, asmop # "s">;
+  def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0}, GPR64, FPR64, asmop # "s">;
+  def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1}, GPR32, FPR64, asmop # "u">;
+  def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1}, GPR64, FPR64, asmop # "u">;
+}
+
+defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">;
+defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">;
+defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
+defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
+defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
+
+def : Pat<(i32 (fp_to_sint FPR32:$Rn)), (FCVTZSws FPR32:$Rn)>;
+def : Pat<(i64 (fp_to_sint FPR32:$Rn)), (FCVTZSxs FPR32:$Rn)>;
+def : Pat<(i32 (fp_to_uint FPR32:$Rn)), (FCVTZUws FPR32:$Rn)>;
+def : Pat<(i64 (fp_to_uint FPR32:$Rn)), (FCVTZUxs FPR32:$Rn)>;
+def : Pat<(i32 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSwd FPR64:$Rn)>;
+def : Pat<(i64 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSxd FPR64:$Rn)>;
+def : Pat<(i32 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUwd FPR64:$Rn)>;
+def : Pat<(i64 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUxd FPR64:$Rn)>;
+
+multiclass A64I_inttofp<bit o0, string asmop>
+{
+  def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
+  def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>;
+  def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>;
+  def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>;
+}
+
+defm S : A64I_inttofp<0b0, "scvtf">;
+defm U : A64I_inttofp<0b1, "ucvtf">;
+
+def : Pat<(f32 (sint_to_fp GPR32:$Rn)), (SCVTFsw GPR32:$Rn)>;
+def : Pat<(f32 (sint_to_fp GPR64:$Rn)), (SCVTFsx GPR64:$Rn)>;
+def : Pat<(f64 (sint_to_fp GPR32:$Rn)), (SCVTFdw GPR32:$Rn)>;
+def : Pat<(f64 (sint_to_fp GPR64:$Rn)), (SCVTFdx GPR64:$Rn)>;
+def : Pat<(f32 (uint_to_fp GPR32:$Rn)), (UCVTFsw GPR32:$Rn)>;
+def : Pat<(f32 (uint_to_fp GPR64:$Rn)), (UCVTFsx GPR64:$Rn)>;
+def : Pat<(f64 (uint_to_fp GPR32:$Rn)), (UCVTFdw GPR32:$Rn)>;
+def : Pat<(f64 (uint_to_fp GPR64:$Rn)), (UCVTFdx GPR64:$Rn)>;
+
+def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
+def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
+def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
+def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
+
+def : Pat<(i32 (bitconvert (f32 FPR32:$Rn))), (FMOVws FPR32:$Rn)>;
+def : Pat<(f32 (bitconvert (i32 GPR32:$Rn))), (FMOVsw GPR32:$Rn)>;
+def : Pat<(i64 (bitconvert (f64 FPR64:$Rn))), (FMOVxd FPR64:$Rn)>;
+def : Pat<(f64 (bitconvert (i64 GPR64:$Rn))), (FMOVdx GPR64:$Rn)>;
+
+def lane1_asmoperand : AsmOperandClass
+{
+  let Name = "Lane1";
+  let RenderMethod = "addImmOperands";
+}
+
+def lane1 : Operand<i32>
+{
+  let ParserMatchClass = lane1_asmoperand;
+  let PrintMethod = "printBareImmOperand";
+}
+
+let DecoderMethod =  "DecodeFMOVLaneInstruction" in
+{
+  def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110,
+                          (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane),
+                          "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>;
+
+  def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111,
+                          (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane),
+                          "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>;
+}
+
+def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]",
+                (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>;
+
+def : InstAlias<"fmov $Rd.2d[$Lane], $Rn",
+                (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point immediate instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMOV
+
+def fpimm_asmoperand : AsmOperandClass
+{
+  let Name = "FMOVImm";
+  let ParserMethod = "ParseFPImmOperand";
+}
+
+// The MCOperand for these instructions are the encoded 8-bit values.
+def SDXF_fpimm : SDNodeXForm<fpimm, [{
+  uint32_t Imm8;
+  A64Imms::isFPImm(N->getValueAPF(), Imm8);
+  return CurDAG->getTargetConstant(Imm8, MVT::i32);
+}]>;
+
+class fmov_operand<ValueType FT>
+  : Operand<i32>,
+    PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }],
+            SDXF_fpimm>
+{
+  let PrintMethod = "printFPImmOperand";
+  let ParserMatchClass = fpimm_asmoperand;
+}
+
+def fmov32_operand : fmov_operand<f32>;
+def fmov64_operand : fmov_operand<f64>;
+
+class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT,
+                      Operand fmov_operand>
+  : A64I_fpimm<0b0, 0b0, type, 0b00000,
+               (outs Reg:$Rd),
+               (ins fmov_operand:$Imm8),
+               "fmov\t$Rd, $Imm8",
+               [(set (VT Reg:$Rd), fmov_operand:$Imm8)],
+               NoItinerary>;
+
+def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>;
+def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>;
+
+//===----------------------------------------------------------------------===//
+// Load-register (literal) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDR, LDRSW, PRFM
+
+def ldrlit_label_asmoperand : AsmOperandClass
+{
+  let Name = "LoadLitLabel";
+  let RenderMethod = "addLabelOperands<19, 4>";
+}
+
+def ldrlit_label : Operand<i64>
+{
+  let EncoderMethod = "getLoadLitLabelOpValue";
+
+  // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
+  let PrintMethod = "printLabelOperand<19, 4>";
+  let ParserMatchClass = ldrlit_label_asmoperand;
+  let OperandType = "OPERAND_PCREL";
+}
+
+// Various instructions take an immediate value (which can always be used),
+// where some numbers have a symbolic name to make things easier. These operands
+// and the associated functions abstract away the differences.
+multiclass namedimm<string prefix, string mapper>
+{
+  def _asmoperand : AsmOperandClass
+  {
+    let Name = "NamedImm" # prefix;
+    let PredicateMethod = "isUImm";
+    let RenderMethod = "addImmOperands";
+    let ParserMethod = "ParseNamedImmOperand<" # mapper # ">";
+  }
+
+  def _op : Operand<i32>
+  {
+    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+    let PrintMethod = "printNamedImmOperand<" # mapper # ">";
+    let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">";
+  }
+}
+
+defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">;
+
+class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg,
+                      list<dag> patterns = []>
+   : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19),
+                 "ldr\t$Rt, $Imm19", patterns, NoItinerary>;
+
+let mayLoad = 1 in
+{
+  def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>;
+  def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>;
+}
+
+def LDRs_lit  : A64I_LDRlitSimple<0b00, 0b1, FPR32,
+                              [(set (f32 FPR32:$Rt), (load constpool:$Imm19))]>;
+def LDRd_lit  : A64I_LDRlitSimple<0b01, 0b1, FPR64,
+                              [(set (f64 FPR64:$Rt), (load constpool:$Imm19))]>;
+
+let mayLoad = 1 in
+{
+  def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>;
+
+
+  def LDRSWx_lit : A64I_LDRlit<0b10, 0b0,
+                               (outs GPR64:$Rt),
+                               (ins ldrlit_label:$Imm19),
+                               "ldrsw\t$Rt, $Imm19",
+                               [], NoItinerary>;
+
+  def PRFM_lit : A64I_LDRlit<0b11, 0b0,
+                             (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
+                             "prfm\t$Rt, $Imm19",
+                             [], NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load-store exclusive instructions
+//===----------------------------------------------------------------------===//
+// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB,
+//           STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB,
+//           STLRH, STLR, LDARB, LDARH, LDAR
+
+// Since these instructions have the undefined register bits set to 1 in
+// their canonical form, we need a post encoder method to set those bits
+// to 1 when encoding these instructions. We do this using the
+// fixLoadStoreExclusive function. This function has template parameters:
+//
+// fixLoadStoreExclusive<int hasRs, int hasRt2>
+//
+// hasRs indicates that the instruction uses the Rs field, so we won't set
+// it to 1 (and the same for Rt2). We don't need template parameters for
+// the other register fiels since Rt and Rn are always used.
+
+// This operand parses a GPR64xsp register, followed by an optional immediate
+// #0.
+def GPR64xsp0_asmoperand : AsmOperandClass
+{
+  let Name = "GPR64xsp0";
+  let PredicateMethod = "isWrappedReg";
+  let RenderMethod = "addRegOperands";
+  let ParserMethod = "ParseLSXAddressOperand";
+}
+
+def GPR64xsp0 : RegisterOperand<GPR64xsp>
+{
+  let ParserMatchClass = GPR64xsp0_asmoperand;
+}
+
+//===----------------------------------
+// Store-exclusive (releasing & normal)
+//===----------------------------------
+
+class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+                        dag ins, list<dag> pat,
+                        InstrItinClass itin> :
+	    A64I_LDSTex_stn <size,
+                        opcode{2}, 0, opcode{1}, opcode{0},
+                        outs, ins,
+                        !strconcat(asm, "\t$Rs, $Rt, [$Rn]"),
+                        pat, itin> {
+  let mayStore = 1;
+  let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+}
+
+multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
+  def _byte:  A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+                              (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                              [], NoItinerary>;
+
+  def _hword:  A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+                               (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                               [],NoItinerary>;
+
+  def _word:  A64I_SRexs_impl<0b10, opcode, asmstr,
+                              (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                              [], NoItinerary>;
+
+  def _dword: A64I_SRexs_impl<0b11, opcode, asmstr,
+                              (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
+                              [], NoItinerary>;
+}
+
+defm STXR  : A64I_SRex<"stxr",  0b000, "STXR">;
+defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">;
+
+//===----------------------------------
+// Loads
+//===----------------------------------
+
+class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+                        dag ins, list<dag> pat,
+                        InstrItinClass itin> :
+	    A64I_LDSTex_tn <size,
+                        opcode{2}, 1, opcode{1}, opcode{0},
+                        outs, ins,
+                        !strconcat(asm, "\t$Rt, [$Rn]"),
+                        pat, itin> {
+  let mayLoad = 1;
+  let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+multiclass A64I_LRex<string asmstr, bits<3> opcode> {
+  def _byte:  A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+                            (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+
+  def _hword:  A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+                            (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+
+  def _word:  A64I_LRexs_impl<0b10, opcode, asmstr,
+                            (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+
+  def _dword: A64I_LRexs_impl<0b11, opcode, asmstr,
+                            (outs GPR64:$Rt), (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+}
+
+defm LDXR  : A64I_LRex<"ldxr",  0b000>;
+defm LDAXR : A64I_LRex<"ldaxr", 0b001>;
+defm LDAR  : A64I_LRex<"ldar",  0b101>;
+
+class acquiring_load<PatFrag base>
+  : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+  return cast<AtomicSDNode>(N)->getOrdering() == Acquire;
+}]>;
+
+def atomic_load_acquire_8  : acquiring_load<atomic_load_8>;
+def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
+def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
+def atomic_load_acquire_64 : acquiring_load<atomic_load_64>;
+
+def : Pat<(atomic_load_acquire_8  GPR64xsp:$Rn), (LDAR_byte  GPR64xsp0:$Rn)>;
+def : Pat<(atomic_load_acquire_16 GPR64xsp:$Rn), (LDAR_hword GPR64xsp0:$Rn)>;
+def : Pat<(atomic_load_acquire_32 GPR64xsp:$Rn), (LDAR_word  GPR64xsp0:$Rn)>;
+def : Pat<(atomic_load_acquire_64 GPR64xsp:$Rn), (LDAR_dword GPR64xsp0:$Rn)>;
+
+//===----------------------------------
+// Store-release (no exclusivity)
+//===----------------------------------
+
+class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+                        dag ins, list<dag> pat,
+                        InstrItinClass itin> :
+	    A64I_LDSTex_tn <size,
+                        opcode{2}, 0, opcode{1}, opcode{0},
+                        outs, ins,
+                        !strconcat(asm, "\t$Rt, [$Rn]"),
+                        pat, itin> {
+  let mayStore = 1;
+  let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+class releasing_store<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+  return cast<AtomicSDNode>(N)->getOrdering() == Release;
+}]>;
+
+def atomic_store_release_8  : releasing_store<atomic_store_8>;
+def atomic_store_release_16 : releasing_store<atomic_store_16>;
+def atomic_store_release_32 : releasing_store<atomic_store_32>;
+def atomic_store_release_64 : releasing_store<atomic_store_64>;
+
+multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
+  def _byte:  A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+                            (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                            [(atomic_store_release_8 GPR64xsp0:$Rn, GPR32:$Rt)],
+                            NoItinerary>;
+
+  def _hword:  A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+                           (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                           [(atomic_store_release_16 GPR64xsp0:$Rn, GPR32:$Rt)],
+                           NoItinerary>;
+
+  def _word:  A64I_SLexs_impl<0b10, opcode, asmstr,
+                           (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                           [(atomic_store_release_32 GPR64xsp0:$Rn, GPR32:$Rt)],
+                           NoItinerary>;
+
+  def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
+                           (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
+                           [(atomic_store_release_64 GPR64xsp0:$Rn, GPR64:$Rt)],
+                           NoItinerary>;
+}
+
+defm STLR  : A64I_SLex<"stlr", 0b101, "STLR">;
+
+//===----------------------------------
+// Store-exclusive pair (releasing & normal)
+//===----------------------------------
+
+class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+                        dag ins, list<dag> pat,
+                        InstrItinClass itin> :
+	    A64I_LDSTex_stt2n <size,
+                        opcode{2}, 0, opcode{1}, opcode{0},
+                        outs, ins,
+                        !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"),
+                        pat, itin>
+{
+  let mayStore = 1;
+}
+
+
+multiclass A64I_SPex<string asmstr, bits<3> opcode> {
+  def _word:  A64I_SPexs_impl<0b10, opcode, asmstr, (outs),
+                            (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2,
+                                 GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+
+  def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs),
+                            (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2,
+                                            GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+}
+
+defm STXP  : A64I_SPex<"stxp", 0b010>;
+defm STLXP : A64I_SPex<"stlxp", 0b011>;
+
+//===----------------------------------
+// Load-exclusive pair (acquiring & normal)
+//===----------------------------------
+
+class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+                        dag ins, list<dag> pat,
+                        InstrItinClass itin> :
+	    A64I_LDSTex_tt2n <size,
+                        opcode{2}, 1, opcode{1}, opcode{0},
+                        outs, ins,
+                        !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"),
+                        pat, itin>{
+  let mayLoad = 1;
+  let DecoderMethod = "DecodeLoadPairExclusiveInstruction";
+  let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
+}
+
+multiclass A64I_LPex<string asmstr, bits<3> opcode> {
+  def _word:  A64I_LPexs_impl<0b10, opcode, asmstr,
+                            (outs GPR32:$Rt, GPR32:$Rt2),
+                            (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+
+  def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
+                            (outs GPR64:$Rt, GPR64:$Rt2),
+                            (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+}
+
+defm LDXP  : A64I_LPex<"ldxp", 0b010>;
+defm LDAXP : A64I_LPex<"ldaxp", 0b011>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register (unscaled immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (register offset) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (unsigned immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (immediate post-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (immediate pre-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
+
+// Note that patterns are much later on in a completely separate section (they
+// need ADRPxi to be defined).
+
+//===-------------------------------
+// 1. Various operands needed
+//===-------------------------------
+
+//===-------------------------------
+// 1.1 Unsigned 12-bit immediate operands
+//===-------------------------------
+// The addressing mode for these instructions consists of an unsigned 12-bit
+// immediate which is scaled by the size of the memory access.
+//
+// We represent this in the MC layer by two operands:
+//     1. A base register.
+//     2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]"
+//        would have '1' in this field.
+// This means that separate functions are needed for converting representations
+// which *are* aware of the intended access size.
+
+// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
+// know the access size via some means. An isolated operand does not have this
+// information unless told from here, which means we need separate tablegen
+// Operands for each access size. This multiclass takes care of instantiating
+// the correct template functions in the rest of the backend.
+
+//===-------------------------------
+// 1.1 Unsigned 12-bit immediate operands
+//===-------------------------------
+
+multiclass offsets_uimm12<int MemSize, string prefix>
+{
+  def uimm12_asmoperand : AsmOperandClass
+  {
+    let Name = "OffsetUImm12_" # MemSize;
+    let PredicateMethod = "isOffsetUImm12<" # MemSize # ">";
+    let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">";
+  }
+
+  // Pattern is really no more than an ImmLeaf, but predicated on MemSize which
+  // complicates things beyond TableGen's ken.
+  def uimm12 : Operand<i64>,
+               ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">">
+  {
+    let ParserMatchClass
+      = !cast<AsmOperandClass>(prefix # uimm12_asmoperand);
+
+    let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">";
+    let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">";
+  }
+}
+
+defm byte_  : offsets_uimm12<1, "byte_">;
+defm hword_ : offsets_uimm12<2, "hword_">;
+defm word_  : offsets_uimm12<4, "word_">;
+defm dword_ : offsets_uimm12<8, "dword_">;
+defm qword_ : offsets_uimm12<16, "qword_">;
+
+//===-------------------------------
+// 1.1 Signed 9-bit immediate operands
+//===-------------------------------
+
+// The MCInst is expected to store the bit-wise encoding of the value,
+// which amounts to lopping off the extended sign bits.
+def SDXF_simm9 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32);
+}]>;
+
+def simm9_asmoperand : AsmOperandClass
+{
+  let Name = "SImm9";
+  let PredicateMethod = "isSImm<9>";
+  let RenderMethod = "addSImmOperands<9>";
+}
+
+def simm9 : Operand<i64>,
+            ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }],
+            SDXF_simm9>
+{
+  let PrintMethod = "printOffsetSImm9Operand";
+  let ParserMatchClass = simm9_asmoperand;
+}
+
+
+//===-------------------------------
+// 1.3 Register offset extensions
+//===-------------------------------
+
+// The assembly-syntax for these addressing-modes is:
+//    [<Xn|SP>, <R><m> {, <extend> {<amount>}}]
+//
+// The essential semantics are:
+//     + <amount> is a shift: #<log(transfer size)> or #0
+//     + <R> can be W or X.
+//     + If <R> is W, <extend> can be UXTW or SXTW
+//     + If <R> is X, <extend> can be LSL or SXTX
+//
+// The trickiest of those constraints is that Rm can be either GPR32 or GPR64,
+// which will need separate instructions for LLVM type-consistency. We'll also
+// need separate operands, of course.
+multiclass regexts<int MemSize, int RmSize, RegisterClass GPR,
+                   string Rm, string prefix>
+{
+  def regext_asmoperand : AsmOperandClass
+  {
+    let Name = "AddrRegExtend_" # MemSize # "_" #  Rm;
+    let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">";
+    let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">";
+  }
+
+  def regext : Operand<i64>
+  {
+    let PrintMethod
+      = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">";
+
+    let DecoderMethod = "DecodeAddrRegExtendOperand";
+    let ParserMatchClass
+      = !cast<AsmOperandClass>(prefix # regext_asmoperand);
+  }
+}
+
+multiclass regexts_wx<int MemSize, string prefix>
+{
+  // Rm is an X-register if LSL or SXTX are specified as the shift.
+  defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">;
+
+  // Rm is a W-register if UXTW or SXTW are specified as the shift.
+  defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">;
+}
+
+defm byte_  : regexts_wx<1, "byte_">;
+defm hword_ : regexts_wx<2, "hword_">;
+defm word_  : regexts_wx<4, "word_">;
+defm dword_ : regexts_wx<8, "dword_">;
+defm qword_ : regexts_wx<16, "qword_">;
+
+
+//===------------------------------
+// 2. The instructions themselves.
+//===------------------------------
+
+// We have the following instructions to implement:
+// |                 | B     | H     | W     | X      |
+// |-----------------+-------+-------+-------+--------|
+// | unsigned str    | STRB  | STRH  | STR   | STR    |
+// | unsigned ldr    | LDRB  | LDRH  | LDR   | LDR    |
+// | signed ldr to W | LDRSB | LDRSH | -     | -      |
+// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) |
+
+// This will instantiate the LDR/STR instructions you'd expect to use for an
+// unsigned datatype (first two rows above) or floating-point register, which is
+// reasonably uniform across all access sizes.
+
+
+//===------------------------------
+// 2.1 Regular instructions
+//===------------------------------
+
+// This class covers the basic unsigned or irrelevantly-signed loads and stores,
+// to general-purpose and floating-point registers.
+
+class AddrParams<string prefix>
+{
+  Operand uimm12 = !cast<Operand>(prefix # "_uimm12");
+
+  Operand regextWm = !cast<Operand>(prefix # "_Wm_regext");
+  Operand regextXm = !cast<Operand>(prefix # "_Xm_regext");
+}
+
+def byte_addrparams : AddrParams<"byte">;
+def hword_addrparams : AddrParams<"hword">;
+def word_addrparams : AddrParams<"word">;
+def dword_addrparams : AddrParams<"dword">;
+def qword_addrparams : AddrParams<"qword">;
+
+multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
+                                bit high_opc, string asmsuffix,
+                                RegisterClass GPR, AddrParams params>
+{
+  // Unsigned immediate
+  def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0},
+                     (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12),
+                     "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
+                     [], NoItinerary>
+  {
+    let mayStore = 1;
+  }
+  def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]",
+                (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+  def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1},
+                      (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+                      "ldr" #  asmsuffix # "\t$Rt, [$Rn, $UImm12]",
+                      [], NoItinerary>
+  {
+    let mayLoad = 1;
+  }
+  def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]",
+                (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+  // Register offset (four of these: load/store and Wm/Xm).
+  let mayLoad = 1 in
+  {
+    def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0,
+                            (outs GPR:$Rt),
+                            (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+                            "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+
+    def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1,
+                            (outs GPR:$Rt),
+                            (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+                            "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+  }
+  def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]",
+        (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn,
+                                                          GPR64:$Rm, 2)>;
+
+  let mayStore = 1 in
+  {
+    def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0,
+                                  (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm,
+                                               params.regextWm:$Ext),
+                                  "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+                                  [], NoItinerary>;
+
+    def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1,
+                                  (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm,
+                                               params.regextXm:$Ext),
+                                  "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+                                  [], NoItinerary>;
+  }
+  def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]",
+      (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn,
+                                                        GPR64:$Rm, 2)>;
+
+  // Unaligned immediate
+  def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0},
+                             (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+                             "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+                             [], NoItinerary>
+  {
+    let mayStore = 1;
+  }
+  def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]",
+               (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+  def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1},
+                             (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
+                             "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+                             [], NoItinerary>
+  {
+    let mayLoad = 1;
+  }
+  def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]",
+               (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+  // Post-indexed
+  def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0},
+                               (outs GPR64xsp:$Rn_wb),
+                               (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+                               "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
+                               [], NoItinerary>
+  {
+    let Constraints = "$Rn = $Rn_wb";
+    let mayStore = 1;
+
+    // Decoder only needed for unpredictability checking (FIXME).
+    let DecoderMethod = "DecodeSingleIndexedInstruction";
+  }
+
+  def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1},
+                                    (outs GPR:$Rt, GPR64xsp:$Rn_wb),
+                                    (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                    "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
+                                    [], NoItinerary>
+  {
+    let mayLoad = 1;
+    let Constraints = "$Rn = $Rn_wb";
+    let DecoderMethod = "DecodeSingleIndexedInstruction";
+  }
+
+  // Pre-indexed
+  def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0},
+                               (outs GPR64xsp:$Rn_wb),
+                               (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+                               "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
+                               [], NoItinerary>
+  {
+    let Constraints = "$Rn = $Rn_wb";
+    let mayStore = 1;
+
+    // Decoder only needed for unpredictability checking (FIXME).
+    let DecoderMethod = "DecodeSingleIndexedInstruction";
+  }
+
+  def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1},
+                                    (outs GPR:$Rt, GPR64xsp:$Rn_wb),
+                                    (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                    "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
+                                    [], NoItinerary>
+  {
+    let mayLoad = 1;
+    let Constraints = "$Rn = $Rn_wb";
+    let DecoderMethod = "DecodeSingleIndexedInstruction";
+  }
+
+}
+
+// STRB/LDRB: First define the instructions
+defm LS8
+  : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>;
+
+// STRH/LDRH
+defm LS16
+  : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>;
+
+
+// STR/LDR to/from a W register
+defm LS32
+  : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>;
+
+// STR/LDR to/from an X register
+defm LS64
+  : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>;
+
+// STR/LDR to/from a B register
+defm LSFP8
+  : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>;
+
+// STR/LDR to/from an H register
+defm LSFP16
+  : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>;
+
+// STR/LDR to/from an S register
+defm LSFP32
+  : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>;
+// STR/LDR to/from a D register
+defm LSFP64
+  : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>;
+// STR/LDR to/from a Q register
+defm LSFP128
+  : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, qword_addrparams>;
+
+//===------------------------------
+// 2.3 Signed loads
+//===------------------------------
+
+// Byte and half-word signed loads can both go into either an X or a W register,
+// so it's worth factoring out. Signed word loads don't fit because there is no
+// W version.
+multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
+                           string prefix>
+{
+  // Unsigned offset
+  def w : A64I_LSunsigimm<size, 0b0, 0b11,
+                          (outs GPR32:$Rt),
+                          (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+                          "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
+                          [], NoItinerary>
+  {
+    let mayLoad = 1;
+  }
+  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
+                  (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>;
+
+  def x : A64I_LSunsigimm<size, 0b0, 0b10,
+                          (outs GPR64:$Rt),
+                          (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+                          "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
+                          [], NoItinerary>
+  {
+    let mayLoad = 1;
+  }
+  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
+                  (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+  // Register offset
+  let mayLoad = 1 in
+  {
+    def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0,
+                            (outs GPR32:$Rt),
+                            (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+
+    def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1,
+                            (outs GPR32:$Rt),
+                            (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+
+    def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0,
+                            (outs GPR64:$Rt),
+                            (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+
+    def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1,
+                            (outs GPR64:$Rt),
+                            (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+  }
+  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
+        (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn,
+                                                       GPR64:$Rm, 2)>;
+
+  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
+        (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn,
+                                                       GPR64:$Rm, 2)>;
+
+
+  let mayLoad = 1 in
+  {
+    // Unaligned offset
+    def w_U : A64I_LSunalimm<size, 0b0, 0b11,
+                             (outs GPR32:$Rt),
+                             (ins GPR64xsp:$Rn, simm9:$SImm9),
+                             "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+                             [], NoItinerary>;
+
+    def x_U : A64I_LSunalimm<size, 0b0, 0b10,
+                             (outs GPR64:$Rt),
+                             (ins GPR64xsp:$Rn, simm9:$SImm9),
+                             "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+                             [], NoItinerary>;
+
+
+    // Post-indexed
+    def w_PostInd : A64I_LSpostind<size, 0b0, 0b11,
+                                 (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
+                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                 "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
+                                 [], NoItinerary>
+    {
+      let Constraints = "$Rn = $Rn_wb";
+      let DecoderMethod = "DecodeSingleIndexedInstruction";
+    }
+
+    def x_PostInd : A64I_LSpostind<size, 0b0, 0b10,
+                                   (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+                                   (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                   "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
+                                   [], NoItinerary>
+    {
+      let Constraints = "$Rn = $Rn_wb";
+      let DecoderMethod = "DecodeSingleIndexedInstruction";
+    }
+
+    // Pre-indexed
+    def w_PreInd : A64I_LSpreind<size, 0b0, 0b11,
+                                 (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
+                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                 "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
+                                 [], NoItinerary>
+    {
+      let Constraints = "$Rn = $Rn_wb";
+      let DecoderMethod = "DecodeSingleIndexedInstruction";
+    }
+
+    def x_PreInd : A64I_LSpreind<size, 0b0, 0b10,
+                                 (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                 "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
+                                 [], NoItinerary>
+    {
+      let Constraints = "$Rn = $Rn_wb";
+      let DecoderMethod = "DecodeSingleIndexedInstruction";
+    }
+  } // let mayLoad = 1
+}
+
+// LDRSB
+defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">;
+// LDRSH
+defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">;
+
+// LDRSW: load a 32-bit register, sign-extending to 64-bits.
+def LDRSWx
+    : A64I_LSunsigimm<0b10, 0b0, 0b10,
+                    (outs GPR64:$Rt),
+                    (ins GPR64xsp:$Rn, word_uimm12:$UImm12),
+                    "ldrsw\t$Rt, [$Rn, $UImm12]",
+                    [], NoItinerary>
+{
+  let mayLoad = 1;
+}
+def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+let mayLoad = 1 in
+{
+  def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0,
+                             (outs GPR64:$Rt),
+                             (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext),
+                             "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
+                             [], NoItinerary>;
+
+  def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1,
+                             (outs GPR64:$Rt),
+                             (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext),
+                             "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
+                             [], NoItinerary>;
+}
+def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]",
+                (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>;
+
+
+def LDURSWx
+    : A64I_LSunalimm<0b10, 0b0, 0b10,
+                    (outs GPR64:$Rt),
+                    (ins GPR64xsp:$Rn, simm9:$SImm9),
+                    "ldursw\t$Rt, [$Rn, $SImm9]",
+                    [], NoItinerary>
+{
+  let mayLoad = 1;
+}
+def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+def LDRSWx_PostInd
+    : A64I_LSpostind<0b10, 0b0, 0b10,
+                    (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+                    (ins GPR64xsp:$Rn, simm9:$SImm9),
+                    "ldrsw\t$Rt, [$Rn], $SImm9",
+                    [], NoItinerary>
+{
+  let mayLoad = 1;
+  let Constraints = "$Rn = $Rn_wb";
+  let DecoderMethod = "DecodeSingleIndexedInstruction";
+}
+
+def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10,
+                                 (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                 "ldrsw\t$Rt, [$Rn, $SImm9]!",
+                                 [], NoItinerary>
+{
+  let mayLoad = 1;
+  let Constraints = "$Rn = $Rn_wb";
+  let DecoderMethod = "DecodeSingleIndexedInstruction";
+}
+
+//===------------------------------
+// 2.4 Prefetch operations
+//===------------------------------
+
+def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs),
+                 (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12),
+                 "prfm\t$Rt, [$Rn, $UImm12]",
+                 [], NoItinerary>
+{
+  let mayLoad = 1;
+}
+def : InstAlias<"prfm $Rt, [$Rn]",
+                (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
+
+let mayLoad = 1 in
+{
+  def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs),
+                                        (ins prefetch_op:$Rt, GPR64xsp:$Rn,
+                                             GPR32:$Rm, dword_Wm_regext:$Ext),
+                                        "prfm\t$Rt, [$Rn, $Rm, $Ext]",
+                                        [], NoItinerary>;
+  def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs),
+                                        (ins prefetch_op:$Rt, GPR64xsp:$Rn,
+                                             GPR64:$Rm, dword_Xm_regext:$Ext),
+                                        "prfm\t$Rt, [$Rn, $Rm, $Ext]",
+                                        [], NoItinerary>;
+}
+
+def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
+                (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn,
+                                   GPR64:$Rm, 2)>;
+
+
+def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs),
+                         (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+                         "prfum\t$Rt, [$Rn, $SImm9]",
+                         [], NoItinerary>
+{
+  let mayLoad = 1;
+}
+def : InstAlias<"prfum $Rt, [$Rn]",
+                (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register (unprivileged) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH
+
+// These instructions very much mirror the "unscaled immediate" loads, but since
+// there are no floating-point variants we need to split them out into their own
+// section to avoid instantiation of "ldtr d0, [sp]" etc.
+
+multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR,
+                         string prefix>
+{
+  def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00,
+                              (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+                              "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+                              [], NoItinerary>
+  {
+    let mayStore = 1;
+  }
+
+  def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]",
+         (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+  def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01,
+                               (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
+                               "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+                               [], NoItinerary>
+  {
+    let mayLoad = 1;
+  }
+
+  def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]",
+         (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+}
+
+// STTRB/LDTRB: First define the instructions
+defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">;
+
+// STTRH/LDTRH
+defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">;
+
+// STTR/LDTR to/from a W register
+defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">;
+
+// STTR/LDTR to/from an X register
+defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">;
+
+// Now a class for the signed instructions that can go to either 32 or 64
+// bits...
+multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix>
+{
+  let mayLoad = 1 in
+  {
+    def w : A64I_LSunpriv<size, 0b0, 0b11,
+                          (outs GPR32:$Rt),
+                          (ins GPR64xsp:$Rn, simm9:$SImm9),
+                          "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+                          [], NoItinerary>;
+
+    def x : A64I_LSunpriv<size, 0b0, 0b10,
+                          (outs GPR64:$Rt),
+                          (ins GPR64xsp:$Rn, simm9:$SImm9),
+                          "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+                          [], NoItinerary>;
+  }
+
+  def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
+                 (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>;
+
+  def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
+                 (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+}
+
+// LDTRSB
+defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">;
+// LDTRSH
+defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">;
+
+// And finally LDTRSW which only goes to 64 bits.
+def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10,
+                            (outs GPR64:$Rt),
+                            (ins GPR64xsp:$Rn, simm9:$SImm9),
+                            "ldtrsw\t$Rt, [$Rn, $SImm9]",
+                            [], NoItinerary>
+{
+  let mayLoad = 1;
+}
+def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register pair (offset) instructions
+//===----------------------------------------------------------------------===//
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register pair (post-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STP, LDP, LDPSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register pair (pre-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STP, LDP, LDPSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store non-temporal register pair (offset) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STNP, LDNP
+
+
+// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
+// know the access size via some means. An isolated operand does not have this
+// information unless told from here, which means we need separate tablegen
+// Operands for each access size. This multiclass takes care of instantiating
+// the correct template functions in the rest of the backend.
+
+multiclass offsets_simm7<string MemSize, string prefix>
+{
+  // The bare signed 7-bit immediate is used in post-indexed instructions, but
+  // because of the scaling performed a generic "simm7" operand isn't
+  // appropriate here either.
+  def simm7_asmoperand : AsmOperandClass
+  {
+    let Name = "SImm7_Scaled" # MemSize;
+    let PredicateMethod = "isSImm7Scaled<" # MemSize # ">";
+    let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">";
+  }
+
+  def simm7 : Operand<i64>
+  {
+    let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">";
+    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand");
+  }
+}
+
+defm word_  : offsets_simm7<"4", "word_">;
+defm dword_ : offsets_simm7<"8", "dword_">;
+defm qword_ : offsets_simm7<"16", "qword_">;
+
+multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
+                          Operand simm7, string prefix>
+{
+  def _STR : A64I_LSPoffset<opc, v, 0b0, (outs),
+                    (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+                    "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>
+  {
+    let mayStore = 1;
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+  def : InstAlias<"stp $Rt, $Rt2, [$Rn]",
+                  (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt,
+                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+  def _LDR : A64I_LSPoffset<opc, v, 0b1,
+                            (outs SomeReg:$Rt, SomeReg:$Rt2),
+                            (ins GPR64xsp:$Rn, simm7:$SImm7),
+                            "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>
+  {
+    let mayLoad = 1;
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+  def : InstAlias<"ldp $Rt, $Rt2, [$Rn]",
+                  (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt,
+                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+  def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0,
+                               (outs GPR64xsp:$Rn_wb),
+                               (ins SomeReg:$Rt, SomeReg:$Rt2,
+                                    GPR64xsp:$Rn,
+                                    simm7:$SImm7),
+                               "stp\t$Rt, $Rt2, [$Rn], $SImm7",
+                               [], NoItinerary>
+  {
+    let mayStore = 1;
+    let Constraints = "$Rn = $Rn_wb";
+
+    // Decoder only needed for unpredictability checking (FIXME).
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+
+  def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1,
+                        (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
+                        (ins GPR64xsp:$Rn, simm7:$SImm7),
+                        "ldp\t$Rt, $Rt2, [$Rn], $SImm7",
+                        [], NoItinerary>
+  {
+    let mayLoad = 1;
+    let Constraints = "$Rn = $Rn_wb";
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+
+  def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb),
+                    (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+                    "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+                    [], NoItinerary>
+  {
+    let mayStore = 1;
+    let Constraints = "$Rn = $Rn_wb";
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+
+  def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1,
+                              (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
+                              (ins GPR64xsp:$Rn, simm7:$SImm7),
+                              "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+                              [], NoItinerary>
+  {
+    let mayLoad = 1;
+    let Constraints = "$Rn = $Rn_wb";
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+
+  def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs),
+                    (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+                    "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>
+  {
+    let mayStore = 1;
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+  def : InstAlias<"stnp $Rt, $Rt2, [$Rn]",
+                  (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt,
+                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+  def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1,
+                            (outs SomeReg:$Rt, SomeReg:$Rt2),
+                            (ins GPR64xsp:$Rn, simm7:$SImm7),
+                            "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>
+  {
+    let mayLoad = 1;
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+  def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]",
+                  (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt,
+                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+}
+
+
+defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">;
+defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">;
+defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">;
+defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64,  dword_simm7, "LSFPPair64">;
+defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, "LSFPPair128">;
+
+
+def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
+                           (outs GPR64:$Rt, GPR64:$Rt2),
+                           (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+                           "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>
+{
+  let mayLoad = 1;
+  let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]",
+                (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>;
+
+def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1,
+                                  (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
+                                  (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+                                  "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7",
+                                  [], NoItinerary>
+{
+  let mayLoad = 1;
+  let Constraints = "$Rn = $Rn_wb";
+  let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+
+def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1,
+                                   (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
+                                   (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+                                   "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
+                                   [], NoItinerary>
+{
+  let mayLoad = 1;
+  let Constraints = "$Rn = $Rn_wb";
+  let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+
+//===----------------------------------------------------------------------===//
+// Logical (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV
+
+multiclass logical_imm_operands<string prefix, string note,
+                                int size, ValueType VT>
+{
+  def _asmoperand : AsmOperandClass
+  {
+    let Name = "LogicalImm" # note # size;
+    let PredicateMethod = "isLogicalImm" # note # "<" # size # ">";
+    let RenderMethod = "addLogicalImmOperands<" # size # ">";
+  }
+
+  def _operand
+        : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]>
+  {
+    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+    let PrintMethod = "printLogicalImmOperand<" # size # ">";
+    let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">";
+  }
+}
+
+defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>;
+defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>;
+
+// The mov versions only differ in assembly parsing, where they
+// exclude values representable with either MOVZ or MOVN.
+defm logical_imm32_mov
+  : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>;
+defm logical_imm64_mov
+  : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>;
+
+
+multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode>
+{
+  def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd),
+                         (ins GPR32:$Rn, logical_imm32_operand:$Imm),
+                         !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+                         [(set GPR32wsp:$Rd,
+                               (opnode GPR32:$Rn, logical_imm32_operand:$Imm))],
+                         NoItinerary>;
+
+  def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd),
+                         (ins GPR64:$Rn, logical_imm64_operand:$Imm),
+                         !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+                         [(set GPR64xsp:$Rd,
+                               (opnode GPR64:$Rn, logical_imm64_operand:$Imm))],
+                         NoItinerary>;
+}
+
+defm AND : A64I_logimmSizes<0b00, "and", and>;
+defm ORR : A64I_logimmSizes<0b01, "orr", or>;
+defm EOR : A64I_logimmSizes<0b10, "eor", xor>;
+
+let Defs = [NZCV] in
+{
+  def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd),
+                                (ins GPR32:$Rn, logical_imm32_operand:$Imm),
+                                "ands\t$Rd, $Rn, $Imm",
+                                [], NoItinerary>;
+
+  def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd),
+                                (ins GPR64:$Rn, logical_imm64_operand:$Imm),
+                                "ands\t$Rd, $Rn, $Imm",
+                                [], NoItinerary>;
+}
+
+
+def : InstAlias<"tst $Rn, $Imm",
+                (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>;
+def : InstAlias<"tst $Rn, $Imm",
+                (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>;
+def : InstAlias<"mov $Rd, $Imm",
+                (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>;
+def : InstAlias<"mov $Rd, $Imm",
+                (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>;
+
+//===----------------------------------------------------------------------===//
+// Logical (shifted register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV
+
+// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS"
+// behaves differently for unsigned comparisons, so we defensively only allow
+// signed or n/a as the operand. In practice "unsigned greater than 0" is "not
+// equal to 0" and LLVM gives us this.
+def signed_cond : PatLeaf<(cond), [{
+  return !isUnsignedIntSetCC(N->get());
+}]>;
+
+
+// These instructions share their "shift" operands with add/sub (shifted
+// register instructions). They are defined there.
+
+// N.b. the commutable parameter is just !N. It will be first against the wall
+// when the revolution comes.
+multiclass logical_shifts<string prefix, bit sf, bits<2> opc,
+                          bit N, bit commutable,
+                          string asmop, SDPatternOperator opfrag, string sty,
+                          RegisterClass GPR, list<Register> defs>
+{
+  let isCommutable = commutable, Defs = defs in {
+  def _lsl : A64I_logicalshift<sf, opc, 0b00, N,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # sty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # sty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _lsr : A64I_logicalshift<sf, opc, 0b01, N,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # sty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # sty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _asr : A64I_logicalshift<sf, opc, 0b10, N,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # sty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # sty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _ror : A64I_logicalshift<sf, opc, 0b11, N,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("ror_operand_" # sty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set GPR:$Rd, (opfrag GPR:$Rn, (rotr GPR:$Rm,
+                            !cast<Operand>("ror_operand_" # sty):$Imm6))
+                       )],
+                       NoItinerary>;
+  }
+
+  def _noshift
+      : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+                 (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
+                                                      GPR:$Rm, 0)>;
+
+  def : Pat<(opfrag GPR:$Rn, GPR:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable,
+                         string asmop, SDPatternOperator opfrag,
+                         list<Register> defs>
+{
+  defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N,
+                            commutable, asmop, opfrag, "i64", GPR64, defs>;
+  defm www : logical_shifts<prefix # "www", 0b0, opc, N,
+                            commutable, asmop, opfrag, "i32", GPR32, defs>;
+}
+
+
+defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>;
+defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>;
+defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>;
+defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands",
+             PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs),
+                     [{ (void)N; return false; }]>,
+             [NZCV]>;
+
+defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic",
+                         PatFrag<(ops node:$lhs, node:$rhs),
+                                 (and node:$lhs, (not node:$rhs))>, []>;
+defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn",
+                         PatFrag<(ops node:$lhs, node:$rhs),
+                                 (or node:$lhs, (not node:$rhs))>, []>;
+defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon",
+                         PatFrag<(ops node:$lhs, node:$rhs),
+                                 (xor node:$lhs, (not node:$rhs))>, []>;
+defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics",
+                          PatFrag<(ops node:$lhs, node:$rhs),
+                                  (and node:$lhs, (not node:$rhs)),
+                                  [{ (void)N; return false; }]>,
+                          [NZCV]>;
+
+multiclass tst_shifts<string prefix, bit sf, string sty, RegisterClass GPR>
+{
+  let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in {
+  def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # sty):$Imm6),
+                       "tst\t$Rn, $Rm, $Imm6",
+                       [(set NZCV, (A64setcc (and GPR:$Rn, (shl GPR:$Rm,
+                           !cast<Operand>("lsl_operand_" # sty):$Imm6)),
+                                          0, signed_cond))],
+                       NoItinerary>;
+
+
+  def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # sty):$Imm6),
+                       "tst\t$Rn, $Rm, $Imm6",
+                       [(set NZCV, (A64setcc (and GPR:$Rn, (srl GPR:$Rm,
+                           !cast<Operand>("lsr_operand_" # sty):$Imm6)),
+                                          0, signed_cond))],
+                       NoItinerary>;
+
+  def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # sty):$Imm6),
+                       "tst\t$Rn, $Rm, $Imm6",
+                       [(set NZCV, (A64setcc (and GPR:$Rn, (sra GPR:$Rm,
+                           !cast<Operand>("asr_operand_" # sty):$Imm6)),
+                                          0, signed_cond))],
+                       NoItinerary>;
+
+  def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("ror_operand_" # sty):$Imm6),
+                       "tst\t$Rn, $Rm, $Imm6",
+                       [(set NZCV, (A64setcc (and GPR:$Rn, (rotr GPR:$Rm,
+                           !cast<Operand>("ror_operand_" # sty):$Imm6)),
+                                          0, signed_cond))],
+                       NoItinerary>;
+  }
+
+  def _noshift : InstAlias<"tst $Rn, $Rm",
+                     (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+  def : Pat<(A64setcc (and GPR:$Rn, GPR:$Rm), 0, signed_cond),
+            (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+defm TSTxx : tst_shifts<"TSTxx", 0b1, "i64", GPR64>;
+defm TSTww : tst_shifts<"TSTww", 0b0, "i32", GPR32>;
+
+
+multiclass mvn_shifts<string prefix, bit sf, string sty, RegisterClass GPR>
+{
+  let isCommutable = 0, Rn = 0b11111 in {
+  def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # sty):$Imm6),
+                       "mvn\t$Rd, $Rm, $Imm6",
+                       [(set GPR:$Rd, (not (shl GPR:$Rm,
+                         !cast<Operand>("lsl_operand_" # sty):$Imm6)))],
+                       NoItinerary>;
+
+
+  def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # sty):$Imm6),
+                       "mvn\t$Rd, $Rm, $Imm6",
+                       [(set GPR:$Rd, (not (srl GPR:$Rm,
+                         !cast<Operand>("lsr_operand_" # sty):$Imm6)))],
+                       NoItinerary>;
+
+  def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # sty):$Imm6),
+                       "mvn\t$Rd, $Rm, $Imm6",
+                       [(set GPR:$Rd, (not (sra GPR:$Rm,
+                         !cast<Operand>("asr_operand_" # sty):$Imm6)))],
+                       NoItinerary>;
+
+  def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rm,
+                            !cast<Operand>("ror_operand_" # sty):$Imm6),
+                       "mvn\t$Rd, $Rm, $Imm6",
+                       [(set GPR:$Rd, (not (rotr GPR:$Rm,
+                         !cast<Operand>("lsl_operand_" # sty):$Imm6)))],
+                       NoItinerary>;
+  }
+
+  def _noshift : InstAlias<"mvn $Rn, $Rm",
+                     (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+  def : Pat<(not GPR:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") GPR:$Rm, 0)>;
+}
+
+defm MVNxx : mvn_shifts<"MVNxx", 0b1, "i64", GPR64>;
+defm MVNww : mvn_shifts<"MVNww", 0b0, "i32", GPR32>;
+
+def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Move wide (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: MOVN, MOVZ, MOVK + MOV aliases
+
+// A wide variety of different relocations are needed for variants of these
+// instructions, so it turns out that we need a different operand for all of
+// them.
+multiclass movw_operands<string prefix, string instname, int width>
+{
+  def _imm_asmoperand : AsmOperandClass
+  {
+    let Name = instname # width # "Shifted" # shift;
+    let PredicateMethod = "is" # instname # width # "Imm";
+    let RenderMethod = "addMoveWideImmOperands";
+
+    let ParserMethod = "ParseImmWithLSLOperand";
+  }
+
+  def _imm : Operand<i32>
+  {
+    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
+    let PrintMethod = "printMoveWideImmOperand";
+    let EncoderMethod = "getMoveWideImmOpValue";
+    let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">";
+
+    let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
+  }
+}
+
+defm movn32 : movw_operands<"movn32", "MOVN", 32>;
+defm movn64 : movw_operands<"movn64", "MOVN", 64>;
+defm movz32 : movw_operands<"movz32", "MOVZ", 32>;
+defm movz64 : movw_operands<"movz64", "MOVZ", 64>;
+defm movk32 : movw_operands<"movk32", "MOVK", 32>;
+defm movk64 : movw_operands<"movk64", "MOVK", 64>;
+
+multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit, dag ins64bit>
+{
+
+  def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit,
+                      !strconcat(asmop, "\t$Rd, $FullImm"),
+                      [], NoItinerary>
+  {
+    bits<18> FullImm;
+    let UImm16 = FullImm{15-0};
+    let Shift = FullImm{17-16};
+  }
+
+  def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit,
+                      !strconcat(asmop, "\t$Rd, $FullImm"),
+                      [], NoItinerary>
+  {
+    bits<18> FullImm;
+    let UImm16 = FullImm{15-0};
+    let Shift = FullImm{17-16};
+  }
+}
+
+let isMoveImm = 1, isReMaterializable = 1,
+    isAsCheapAsAMove = 1, neverHasSideEffects = 1 in
+{
+  defm MOVN : A64I_movwSizes<0b00, "movn",
+                             (ins movn32_imm:$FullImm),
+                             (ins movn64_imm:$FullImm)>;
+
+  // Some relocations are able to convert between a MOVZ and a MOVN. If these
+  // are applied the instruction must be emitted with the corresponding bits as
+  // 0, which means a MOVZ needs to override that bit from the default.
+  let PostEncoderMethod = "fixMOVZ" in
+  defm MOVZ : A64I_movwSizes<0b10, "movz",
+                             (ins movz32_imm:$FullImm),
+                             (ins movz64_imm:$FullImm)>;
+}
+
+let Constraints = "$src = $Rd" in
+defm MOVK : A64I_movwSizes<0b11, "movk",
+                           (ins GPR32:$src, movk32_imm:$FullImm),
+                           (ins GPR64:$src, movk64_imm:$FullImm)>;
+
+
+// And now the "MOV" aliases. These also need their own operands because what
+// they accept is completely different to what the base instructions accept.
+multiclass movalias_operand<string prefix, string basename,
+                            string immpredicate, int width>
+{
+  def _asmoperand : AsmOperandClass
+  {
+    let Name = basename # width # "MovAlias";
+    let PredicateMethod
+          = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">";
+    let RenderMethod
+      = "addMoveWideMovAliasOperands<" # width # ", "
+                                       # "A64Imms::" # immpredicate # ">";
+  }
+
+  def _movimm : Operand<i32>
+  {
+    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+
+    let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
+  }
+}
+
+defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>;
+defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>;
+defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>;
+defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>;
+
+// FIXME: these are officially canonical aliases, but TableGen is too limited to
+// print them at the moment. I believe in this case an "AliasPredicate" method
+// will need to be implemented. to allow it, as well as the more generally
+// useful handling of non-register, non-constant operands.
+class movalias<Instruction INST, RegisterClass GPR, Operand operand>
+  : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>;
+
+def : movalias<MOVZwii, GPR32, movz32_movimm>;
+def : movalias<MOVZxii, GPR64, movz64_movimm>;
+def : movalias<MOVNwii, GPR32, movn32_movimm>;
+def : movalias<MOVNxii, GPR64, movn64_movimm>;
+
+//===----------------------------------------------------------------------===//
+// PC-relative addressing instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADR, ADRP
+
+def adr_label : Operand<i64> {
+  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>";
+
+  // This label is a 21-bit offset from PC, unscaled
+  let PrintMethod = "printLabelOperand<21, 1>";
+  let ParserMatchClass = label_asmoperand<21, 1>;
+  let OperandType = "OPERAND_PCREL";
+}
+
+def adrp_label_asmoperand : AsmOperandClass
+{
+  let Name = "AdrpLabel";
+  let RenderMethod = "addLabelOperands<21, 4096>";
+}
+
+def adrp_label : Operand<i64>
+{
+  let EncoderMethod = "getAdrpLabelOpValue";
+
+  // This label is a 21-bit offset from PC, scaled by the page-size: 4096.
+  let PrintMethod = "printLabelOperand<21, 4096>";
+  let ParserMatchClass = adrp_label_asmoperand;
+  let OperandType = "OPERAND_PCREL";
+}
+
+let neverHasSideEffects = 1 in
+{
+  def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label),
+                         "adr\t$Rd, $Label", [], NoItinerary>;
+
+  def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label),
+                          "adrp\t$Rd, $Label", [], NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// System instructions
+//===----------------------------------------------------------------------===//
+// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS
+//    + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL
+
+// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values.
+def uimm3_asmoperand : AsmOperandClass
+{
+  let Name = "UImm3";
+  let PredicateMethod = "isUImm<3>";
+  let RenderMethod = "addImmOperands";
+}
+
+def uimm3 : Operand<i32>
+{
+  let ParserMatchClass = uimm3_asmoperand;
+}
+
+// The HINT alias can accept a simple unsigned 7-bit immediate.
+def uimm7_asmoperand : AsmOperandClass
+{
+  let Name = "UImm7";
+  let PredicateMethod = "isUImm<7>";
+  let RenderMethod = "addImmOperands";
+}
+
+def uimm7 : Operand<i32>
+{
+  let ParserMatchClass = uimm7_asmoperand;
+}
+
+// Multiclass namedimm is defined with the prefetch operands. Most of these fit
+// into the NamedImmMapper scheme well: they either accept a named operand or
+// any immediate under a particular value (which may be 0, implying no immediate
+// is allowed).
+defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">;
+defm isb : namedimm<"isb", "A64ISB::ISBMapper">;
+defm ic : namedimm<"ic", "A64IC::ICMapper">;
+defm dc : namedimm<"dc", "A64DC::DCMapper">;
+defm at : namedimm<"at", "A64AT::ATMapper">;
+defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">;
+
+// However, MRS and MSR are more complicated for a few reasons:
+//   * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an
+//     implementation-defined effect
+//   * Most registers are shared, but some are read-only or write-only.
+//   * There is a variant of MSR which accepts the same register name (SPSel), but
+//     which would have a different encoding.
+
+// In principle these could be resolved in with more complicated subclasses of
+// NamedImmMapper, however that imposes an overhead on other "named
+// immediates". Both in concrete terms with virtual tables and in unnecessary
+// abstraction.
+
+// The solution adopted here is to take the MRS/MSR Mappers out of the usual
+// hierarchy (they're not derived from NamedImmMapper) and to add logic for
+// their special situation.
+def mrs_asmoperand : AsmOperandClass
+{
+  let Name = "MRS";
+  let ParserMethod = "ParseSysRegOperand";
+}
+
+def mrs_op : Operand<i32>
+{
+  let ParserMatchClass = mrs_asmoperand;
+  let PrintMethod = "printMRSOperand";
+  let DecoderMethod = "DecodeMRSOperand";
+}
+
+def msr_asmoperand : AsmOperandClass
+{
+  let Name = "MSRWithReg";
+
+  // Note that SPSel is valid for both this and the pstate operands, but with
+  // different immediate encodings. This is why these operands provide a string
+  // AArch64Operand rather than an immediate. The overlap is small enough that
+  // it could be resolved with hackery now, but who can say in future?
+  let ParserMethod = "ParseSysRegOperand";
+}
+
+def msr_op : Operand<i32>
+{
+  let ParserMatchClass = msr_asmoperand;
+  let PrintMethod = "printMSROperand";
+  let DecoderMethod = "DecodeMSROperand";
+}
+
+def pstate_asmoperand : AsmOperandClass
+{
+  let Name = "MSRPState";
+  // See comment above about parser.
+  let ParserMethod = "ParseSysRegOperand";
+}
+
+def pstate_op : Operand<i32>
+{
+  let ParserMatchClass = pstate_asmoperand;
+  let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>";
+  let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>";
+}
+
+// When <CRn> is specified, an assembler should accept something like "C4", not
+// the usual "#4" immediate.
+def CRx_asmoperand : AsmOperandClass
+{
+  let Name = "CRx";
+  let PredicateMethod = "isUImm<4>";
+  let RenderMethod = "addImmOperands";
+  let ParserMethod = "ParseCRxOperand";
+}
+
+def CRx : Operand<i32>
+{
+  let ParserMatchClass = CRx_asmoperand;
+  let PrintMethod = "printCRxOperand";
+}
+
+
+// Finally, we can start defining the instructions.
+
+// HINT is straightforward, with a few aliases.
+def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7",
+                        [], NoItinerary>
+{
+  bits<7> UImm7;
+  let CRm = UImm7{6-3};
+  let Op2 = UImm7{2-0};
+
+  let Op0 = 0b00;
+  let Op1 = 0b011;
+  let CRn = 0b0010;
+  let Rt = 0b11111;
+}
+
+def : InstAlias<"nop", (HINTi 0)>;
+def : InstAlias<"yield", (HINTi 1)>;
+def : InstAlias<"wfe", (HINTi 2)>;
+def : InstAlias<"wfi", (HINTi 3)>;
+def : InstAlias<"sev", (HINTi 4)>;
+def : InstAlias<"sevl", (HINTi 5)>;
+
+// Quite a few instructions then follow a similar pattern of fixing common
+// fields in the bitpattern, we'll define a helper-class for them.
+class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2,
+                 Operand operand, string asmop>
+  : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"),
+                [], NoItinerary>
+{
+  let Op0 = op0;
+  let Op1 = op1;
+  let CRn = crn;
+  let Op2 = op2;
+  let Rt = 0b11111;
+}
+
+
+def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">;
+def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">;
+def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">;
+def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">;
+
+def : InstAlias<"clrex", (CLREXi 0b1111)>;
+def : InstAlias<"isb", (ISBi 0b1111)>;
+
+// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP
+// configurations at least.
+def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>;
+
+// Any SYS bitpattern can be represented with a complex and opaque "SYS"
+// instruction.
+def SYSiccix : A64I_system<0b0, (outs),
+                           (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm,
+                                uimm3:$Op2, GPR64:$Rt),
+                           "sys\t$Op1, $CRn, $CRm, $Op2, $Rt",
+                           [], NoItinerary>
+{
+  let Op0 = 0b01;
+}
+
+// You can skip the Xt argument whether it makes sense or not for the generic
+// SYS instruction.
+def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2",
+                (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>;
+
+
+// But many have aliases, which obviously don't fit into
+class SYSalias<dag ins, string asmstring>
+  : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary>
+{
+  let isAsmParserOnly = 1;
+
+  bits<14> SysOp;
+  let Op0 = 0b01;
+  let Op1 = SysOp{13-11};
+  let CRn = SysOp{10-7};
+  let CRm = SysOp{6-3};
+  let Op2 = SysOp{2-0};
+}
+
+def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">;
+
+def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp">
+{
+  let Rt = 0b11111;
+}
+
+def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">;
+def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">;
+
+def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">;
+
+def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp">
+{
+  let Rt = 0b11111;
+}
+
+
+def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt),
+                            (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2),
+                            "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2",
+                            [], NoItinerary>
+{
+  let Op0 = 0b01;
+}
+
+// The instructions themselves are rather simple for MSR and MRS.
+def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt),
+                        "msr\t$SysReg, $Rt", [], NoItinerary>
+{
+  bits<16> SysReg;
+  let Op0 = SysReg{15-14};
+  let Op1 = SysReg{13-11};
+  let CRn = SysReg{10-7};
+  let CRm = SysReg{6-3};
+  let Op2 = SysReg{2-0};
+}
+
+def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg),
+                        "mrs\t$Rt, $SysReg", [], NoItinerary>
+{
+  bits<16> SysReg;
+  let Op0 = SysReg{15-14};
+  let Op1 = SysReg{13-11};
+  let CRn = SysReg{10-7};
+  let CRm = SysReg{6-3};
+  let Op2 = SysReg{2-0};
+}
+
+def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm),
+                        "msr\t$PState, $CRm", [], NoItinerary>
+{
+  bits<6> PState;
+
+  let Op0 = 0b00;
+  let Op1 = PState{5-3};
+  let CRn = 0b0100;
+  let Op2 = PState{2-0};
+  let Rt = 0b11111;
+}
+
+//===----------------------------------------------------------------------===//
+// Test & branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: TBZ, TBNZ
+
+// The bit to test is a simple unsigned 6-bit immediate in the X-register
+// versions.
+def uimm6 : Operand<i64>
+{
+  let ParserMatchClass = uimm6_asmoperand;
+}
+
+def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>;
+
+def tbimm_target : Operand<OtherVT>
+{
+  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>";
+
+  // This label is a 14-bit offset from PC, scaled by the instruction-width: 4.
+  let PrintMethod = "printLabelOperand<14, 4>";
+  let ParserMatchClass = label_wid14_scal4_asmoperand;
+
+  let OperandType = "OPERAND_PCREL";
+}
+
+def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>;
+def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>;
+
+// These instructions correspond to patterns involving "and" with a power of
+// two, which we need to be able to select.
+def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">;
+def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">;
+
+let isBranch = 1, isTerminator = 1 in
+{
+  def TBZxii : A64I_TBimm<0b0, (outs),
+                        (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
+                        "tbz\t$Rt, $Imm, $Label",
+                        [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0),
+                                   A64eq, bb:$Label)],
+                        NoItinerary>;
+
+  def TBNZxii : A64I_TBimm<0b1, (outs),
+                        (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
+                        "tbnz\t$Rt, $Imm, $Label",
+                        [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0),
+                                   A64ne, bb:$Label)],
+                        NoItinerary>;
+
+
+  // Note, these instructions overlap with the above 64-bit patterns. This is
+  // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both
+  // do the same thing and are both permitted assembly. They also both have
+  // sensible DAG patterns.
+  def TBZwii : A64I_TBimm<0b0, (outs),
+                        (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
+                        "tbz\t$Rt, $Imm, $Label",
+                        [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0),
+                                   A64eq, bb:$Label)],
+                        NoItinerary>
+  {
+    let Imm{5} = 0b0;
+  }
+
+  def TBNZwii : A64I_TBimm<0b1, (outs),
+                        (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
+                        "tbnz\t$Rt, $Imm, $Label",
+                        [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0),
+                                   A64ne, bb:$Label)],
+                        NoItinerary>
+  {
+    let Imm{5} = 0b0;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: B, BL
+
+def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>;
+
+def bimm_target : Operand<OtherVT>
+{
+  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>";
+
+  // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
+  let PrintMethod = "printLabelOperand<26, 4>";
+  let ParserMatchClass = label_wid26_scal4_asmoperand;
+
+  let OperandType = "OPERAND_PCREL";
+}
+
+def blimm_target : Operand<i64>
+{
+  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>";
+
+  // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
+  let PrintMethod = "printLabelOperand<26, 4>";
+  let ParserMatchClass = label_wid26_scal4_asmoperand;
+
+  let OperandType = "OPERAND_PCREL";
+}
+
+class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type>
+  : A64I_Bimm<op, (outs), (ins lbl_type:$Label),
+              !strconcat(asmop, "\t$Label"), patterns,
+              NoItinerary>;
+
+let isBranch = 1 in {
+  def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target>
+  {
+    let isTerminator = 1;
+    let isBarrier = 1;
+  }
+
+  def BLimm : A64I_BimmImpl<0b1, "bl",
+                            [(AArch64Call tglobaladdr:$Label)], blimm_target>
+  {
+    let isCall = 1;
+    let Defs = [X30];
+  }
+}
+
+def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>;
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: BR, BLR, RET, ERET, DRP.
+
+// Most of the notional opcode fields in the A64I_Breg format are fixed in A64
+// at the moment.
+class A64I_BregImpl<bits<4> opc,
+                    dag outs, dag ins, string asmstr, list<dag> patterns,
+                    InstrItinClass itin = NoItinerary>
+  : A64I_Breg<opc, 0b11111, 0b000000, 0b00000,
+              outs, ins, asmstr, patterns, itin>
+{
+  let isBranch         = 1;
+  let isIndirectBranch = 1;
+}
+
+// Note that these are not marked isCall or isReturn because as far as LLVM is
+// concerned they're not. "ret" is just another jump unless it has been selected
+// by LLVM as the function's return.
+
+let isBranch = 1 in {
+  def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn),
+                          "br\t$Rn", [(brind GPR64:$Rn)]>
+  {
+    let isBarrier = 1;
+    let isTerminator = 1;
+  }
+
+  def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
+                           "blr\t$Rn", [(AArch64Call GPR64:$Rn)]>
+  {
+    let isBarrier = 0;
+    let isCall = 1;
+    let Defs = [X30];
+  }
+
+  def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn),
+                           "ret\t$Rn", []>
+  {
+    let isBarrier = 1;
+    let isTerminator = 1;
+    let isReturn = 1;
+  }
+
+  // Create a separate pseudo-instruction for codegen to use so that we don't
+  // flag x30 as used in every function. It'll be restored before the RET by the
+  // epilogue if it's legitimately used.
+  def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))>
+  {
+    let isTerminator = 1;
+    let isBarrier = 1;
+    let isReturn = 1;
+  }
+
+  def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []>
+  {
+    let Rn = 0b11111;
+    let isBarrier = 1;
+    let isTerminator = 1;
+    let isReturn = 1;
+  }
+
+  def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []>
+  {
+    let Rn = 0b11111;
+    let isBarrier = 1;
+  }
+}
+
+def RETAlias : InstAlias<"ret", (RETx X30)>;
+
+
+//===----------------------------------------------------------------------===//
+// Address generation patterns
+//===----------------------------------------------------------------------===//
+
+// Primary method of address generation for the small/absolute memory model is
+// an ADRP/ADR pair:
+//     ADRP x0, some_variable
+//     ADD x0, x0, #:lo12:some_variable
+//
+// The load/store elision of the ADD is accomplished when selecting
+// addressing-modes. This just mops up the cases where that doesn't work and we
+// really need an address in some register.
+
+// This wrapper applies a LO12 modifier to the address. Otherwise we could just
+// use the same address.
+
+class ADRP_ADD<SDNode Wrapper, SDNode addrop>
+ : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)),
+       (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>;
+
+def : ADRP_ADD<A64WrapperSmall, tblockaddress>;
+def : ADRP_ADD<A64WrapperSmall, texternalsym>;
+def : ADRP_ADD<A64WrapperSmall, tglobaladdr>;
+def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>;
+def : ADRP_ADD<A64WrapperSmall, tjumptable>;
+
+//===----------------------------------------------------------------------===//
+// GOT access patterns
+//===----------------------------------------------------------------------===//
+
+// FIXME: Wibble
+
+class GOTLoadSmall<SDNode addrfrag>
+  : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
+        (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
+
+def : GOTLoadSmall<texternalsym>;
+def : GOTLoadSmall<tglobaladdr>;
+def : GOTLoadSmall<tglobaltlsaddr>;
+
+//===----------------------------------------------------------------------===//
+// Tail call handling
+//===----------------------------------------------------------------------===//
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in
+{
+  def TC_RETURNdi
+    : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff),
+                 [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>;
+
+  def TC_RETURNxi
+    : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff),
+                 [(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff))]>;
+}
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    Uses = [XSP] in
+{
+  def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [],
+                                  (Bimm bimm_target:$Label)>;
+
+  def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [],
+                                 (BRx GPR64:$Rd)>;
+}
+
+
+def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
+          (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>;
+
+//===----------------------------------------------------------------------===//
+// Thread local storage
+//===----------------------------------------------------------------------===//
+
+// This is a pseudo-instruction representing the ".tlsdesccall" directive in
+// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the
+// current location. It should always be immediately followed by a BLR
+// instruction, and is intended solely for relaxation by the linker.
+
+def : Pat<(A64threadpointer), (MRSxi 0xde82)>;
+
+def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []>
+{
+  let hasSideEffects = 1;
+}
+
+def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var),
+                              [(A64tlsdesc_blr GPR64:$Rn, tglobaltlsaddr:$Var)]>
+{
+  let isCall = 1;
+  let Defs = [X30];
+}
+
+def : Pat<(A64tlsdesc_blr GPR64:$Rn, texternalsym:$Var),
+          (TLSDESC_BLRx GPR64:$Rn, texternalsym:$Var)>;
+
+//===----------------------------------------------------------------------===//
+// Bitfield patterns
+//===----------------------------------------------------------------------===//
+
+def bfi32_lsb_to_immr : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64);
+}]>;
+
+def bfi64_lsb_to_immr : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64);
+}]>;
+
+def bfi_width_to_imms : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64);
+}]>;
+
+
+// The simpler patterns deal with cases where no AND mask is actually needed
+// (either all bits are used or the low 32 bits are used).
+let AddedComplexity = 10 in {
+
+def : Pat<(A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS),
+           (BFIxxii GPR64:$src, GPR64:$Rn,
+                    (bfi64_lsb_to_immr (i64 imm:$ImmR)),
+                    (bfi_width_to_imms (i64 imm:$ImmS)))>;
+
+def : Pat<(A64Bfi GPR32:$src, GPR32:$Rn, imm:$ImmR, imm:$ImmS),
+          (BFIwwii GPR32:$src, GPR32:$Rn,
+                   (bfi32_lsb_to_immr (i64 imm:$ImmR)),
+                   (bfi_width_to_imms (i64 imm:$ImmS)))>;
+
+
+def : Pat<(and (A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS),
+               (i64 4294967295)),
+          (SUBREG_TO_REG (i64 0),
+                         (BFIwwii (EXTRACT_SUBREG GPR64:$src, sub_32),
+                                  (EXTRACT_SUBREG GPR64:$Rn, sub_32),
+                                  (bfi32_lsb_to_immr (i64 imm:$ImmR)),
+                                  (bfi_width_to_imms (i64 imm:$ImmS))),
+                         sub_32)>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Constant island entries
+//===----------------------------------------------------------------------===//
+
+// The constant island pass needs to create "instructions" in the middle of the
+// instruction stream to reresent its constants.
+
+def cpinst_operand : Operand<i32>;
+
+def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid,
+                                              cpinst_operand:$cpidx,
+                                              i32imm:$size), []>
+{
+  let neverHasSideEffects = 1;
+  let isNotDuplicable = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous patterns
+//===----------------------------------------------------------------------===//
+
+// Truncation from 64 to 32-bits just involves renaming your register.
+def : Pat<(i32 (trunc (i64 GPR64:$val))), (EXTRACT_SUBREG GPR64:$val, sub_32)>;
+
+// Similarly, extension where we don't care about the high bits is
+// just a rename.
+def : Pat<(i64 (anyext (i32 GPR32:$val))),
+          (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$val, sub_32)>;
+
+// SELECT instructions providing f128 types need to be handled by a
+// pseudo-instruction since the eventual code will need to introduce basic
+// blocks and control flow.
+def F128CSEL : PseudoInst<(outs FPR128:$Rd),
+                          (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond),
+                          [(set FPR128:$Rd, (simple_select (f128 FPR128:$Rn),
+                                                           FPR128:$Rm))]>
+{
+  let Uses = [NZCV];
+  let usesCustomInserter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Load/store patterns
+//===----------------------------------------------------------------------===//
+
+// There are lots of patterns here, because we need to allow at least three
+// parameters to vary independently.
+//   1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ...
+//   2. LLVM source: zextloadi8, anyextloadi8, ...
+//   3. Address-generation: A64Wrapper, (add BASE, OFFSET), ...
+//
+// The biggest problem turns out to be the address-generation variable. At the
+// point of instantiation we need to produce two DAGs, one for the pattern and
+// one for the instruction. Doing this at the lowest level of classes doesn't
+// work.
+//
+// Consider the simple uimm12 addressing mode, and the desire to match both (add
+// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the
+// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or
+// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this
+// operation, and PatFrags are for selection not output.
+//
+// As a result, the address-generation patterns are the final
+// instantiations. However, we do still need to vary the operand for the address
+// further down (At the point we're deciding A64WrapperSmall, we don't know
+// the memory width of the operation).
+
+//===------------------------------
+// 1. Basic infrastructural defs
+//===------------------------------
+
+// First, some simple classes for !foreach and !subst to use:
+class Decls
+{
+  dag pattern;
+}
+
+def decls : Decls;
+def ALIGN;
+def INST;
+def OFFSET;
+def SHIFT;
+
+// You can't use !subst on an actual immediate, but you *can* use it on an
+// operand record that happens to match a single immediate. So we do.
+def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>;
+def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>;
+def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>;
+def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>;
+def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>;
+
+// If the low bits of a pointer are known to be 0 then an "or" is just as good
+// as addition for computing an offset. This fragment forwards that check for
+// TableGen's use.
+def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),
+[{
+  return CurDAG->isBaseWithConstantOffset(SDValue(N, 0));
+}]>;
+
+// Load/store (unsigned immediate) operations with relocations against global
+// symbols (for lo12) are only valid if those symbols have correct alignment
+// (since the immediate offset is divided by the access scale, it can't have a
+// remainder).
+//
+// The guaranteed alignment is provided as part of the WrapperSmall
+// operation, and checked against one of these.
+def any_align   : ImmLeaf<i32, [{ (void)Imm; return true; }]>;
+def min_align2  : ImmLeaf<i32, [{ return Imm >= 2; }]>;
+def min_align4  : ImmLeaf<i32, [{ return Imm >= 4; }]>;
+def min_align8  : ImmLeaf<i32, [{ return Imm >= 8; }]>;
+def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>;
+
+// "Normal" load/store instructions can be used on atomic operations, provided
+// the ordering parameter is at most "monotonic". Anything above that needs
+// special handling with acquire/release instructions.
+class simple_load<PatFrag base>
+  : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+  return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
+}]>;
+
+def atomic_load_simple_i8  : simple_load<atomic_load_8>;
+def atomic_load_simple_i16 : simple_load<atomic_load_16>;
+def atomic_load_simple_i32 : simple_load<atomic_load_32>;
+def atomic_load_simple_i64 : simple_load<atomic_load_64>;
+
+class simple_store<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+  return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
+}]>;
+
+def atomic_store_simple_i8  : simple_store<atomic_store_8>;
+def atomic_store_simple_i16 : simple_store<atomic_store_16>;
+def atomic_store_simple_i32 : simple_store<atomic_store_32>;
+def atomic_store_simple_i64 : simple_store<atomic_store_64>;
+
+//===------------------------------
+// 2. UImm12 and SImm9
+//===------------------------------
+
+// These instructions have two operands providing the address so they can be
+// treated similarly for most purposes.
+
+//===------------------------------
+// 2.1 Base patterns covering extend/truncate semantics
+//===------------------------------
+
+// Atomic patterns can be shared between integer operations of all sizes, a
+// quick multiclass here allows reuse.
+multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
+                          dag Offset, dag address, RegisterClass TPR,
+                          ValueType sty>
+{
+  def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
+            (LOAD Base, Offset)>;
+
+  def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, TPR:$Rt),
+            (STORE TPR:$Rt, Base, Offset)>;
+}
+
+// Instructions accessing a memory chunk smaller than a register (or, in a
+// pinch, the same size) have a characteristic set of patterns they want to
+// match: extending loads and truncating stores. This class deals with the
+// sign-neutral version of those patterns.
+//
+// It will be instantiated across multiple addressing-modes.
+multiclass ls_small_pats<Instruction LOAD, Instruction STORE,
+                         dag Base, dag Offset,
+                         dag address, ValueType sty>
+  : ls_atomic_pats<LOAD, STORE, Base, Offset, address, GPR32, sty>
+{
+  def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>;
+
+  def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>;
+
+  // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
+  // register was actually set.
+  def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
+            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
+
+  def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
+            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
+
+  def : Pat<(!cast<SDNode>(truncstore # sty) GPR32:$Rt, address),
+            (STORE GPR32:$Rt, Base, Offset)>;
+
+  // For truncating store from 64-bits, we have to manually tell LLVM to
+  // ignore the high bits of the x register.
+  def : Pat<(!cast<SDNode>(truncstore # sty) GPR64:$Rt, address),
+            (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>;
+}
+
+// Next come patterns for sign-extending loads.
+multiclass load_signed_pats<string T, string U, dag Base, dag Offset,
+                            dag address, ValueType sty>
+{
+  def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
+            (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>;
+
+  def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
+            (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>;
+
+}
+
+// and finally "natural-width" loads and stores come next.
+multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
+                           dag Offset, dag address, RegisterClass TPR,
+                           ValueType sty>
+{
+  def : Pat<(sty (load address)), (LOAD Base, Offset)>;
+  def : Pat<(store (sty TPR:$Rt), address), (STORE TPR:$Rt, Base, Offset)>;
+}
+
+// Integer operations also get atomic instructions to select for.
+multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
+                           dag Offset, dag address, RegisterClass TPR,
+                           ValueType sty>
+  : ls_neutral_pats<LOAD, STORE, Base, Offset, address, TPR, sty>,
+    ls_atomic_pats<LOAD, STORE, Base, Offset, address, TPR, sty>;
+
+//===------------------------------
+// 2.2. Addressing-mode instantiations
+//===------------------------------
+
+multiclass uimm12_pats<dag address, dag Base, dag Offset>
+{
+  defm : ls_small_pats<LS8_LDR, LS8_STR, Base,
+                       !foreach(decls.pattern, Offset,
+                                !subst(OFFSET, byte_uimm12, decls.pattern)),
+                       !foreach(decls.pattern, address,
+                                !subst(OFFSET, byte_uimm12,
+                                !subst(ALIGN, any_align, decls.pattern))),
+                       i8>;
+  defm : ls_small_pats<LS16_LDR, LS16_STR, Base,
+                       !foreach(decls.pattern, Offset,
+                                !subst(OFFSET, hword_uimm12, decls.pattern)),
+                       !foreach(decls.pattern, address,
+                                !subst(OFFSET, hword_uimm12,
+                                !subst(ALIGN, min_align2, decls.pattern))),
+                       i16>;
+  defm : ls_small_pats<LS32_LDR, LS32_STR, Base,
+                       !foreach(decls.pattern, Offset,
+                                !subst(OFFSET, word_uimm12, decls.pattern)),
+                       !foreach(decls.pattern, address,
+                                !subst(OFFSET, word_uimm12,
+                                !subst(ALIGN, min_align4, decls.pattern))),
+                       i32>;
+
+  defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, word_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, word_uimm12,
+                                   !subst(ALIGN, min_align4, decls.pattern))),
+                          GPR32, i32>;
+
+  defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, dword_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, dword_uimm12,
+                                   !subst(ALIGN, min_align8, decls.pattern))),
+                          GPR64, i64>;
+
+  defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, hword_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, hword_uimm12,
+                                   !subst(ALIGN, min_align2, decls.pattern))),
+                          FPR16, f16>;
+
+  defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, word_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, word_uimm12,
+                                   !subst(ALIGN, min_align4, decls.pattern))),
+                          FPR32, f32>;
+
+  defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, dword_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, dword_uimm12,
+                                   !subst(ALIGN, min_align8, decls.pattern))),
+                          FPR64, f64>;
+
+  defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, qword_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, qword_uimm12,
+                                   !subst(ALIGN, min_align16, decls.pattern))),
+                          FPR128, f128>;
+
+  defm : load_signed_pats<"B", "", Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, byte_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, byte_uimm12,
+                                   !subst(ALIGN, any_align, decls.pattern))),
+                          i8>;
+
+  defm : load_signed_pats<"H", "", Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, hword_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, hword_uimm12,
+                                   !subst(ALIGN, min_align2, decls.pattern))),
+                          i16>;
+
+  def : Pat<(sextloadi32 !foreach(decls.pattern, address,
+                                  !subst(OFFSET, word_uimm12,
+                                  !subst(ALIGN, min_align4, decls.pattern)))),
+            (LDRSWx Base, !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, word_uimm12, decls.pattern)))>;
+}
+
+// Straightforward patterns of last resort: a pointer with or without an
+// appropriate offset.
+defm : uimm12_pats<(i64 GPR64xsp:$Rn), (i64 GPR64xsp:$Rn), (i64 0)>;
+defm : uimm12_pats<(add GPR64xsp:$Rn, OFFSET:$UImm12),
+                   (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>;
+
+// The offset could be hidden behind an "or", of course:
+defm : uimm12_pats<(add_like_or GPR64xsp:$Rn, OFFSET:$UImm12),
+                   (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>;
+
+// Global addresses under the small-absolute model should use these
+// instructions. There are ELF relocations specifically for it.
+defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN),
+                   (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>;
+
+defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12, ALIGN),
+                   (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>;
+
+// External symbols that make it this far should also get standard relocations.
+defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12, ALIGN),
+                   (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>;
+
+
+// We also want to use uimm12 instructions for local variables at the moment.
+def tframeindex_XFORM : SDNodeXForm<frameindex, [{
+  int FI = cast<FrameIndexSDNode>(N)->getIndex();
+  return CurDAG->getTargetFrameIndex(FI, MVT::i64);
+}]>;
+
+defm : uimm12_pats<(i64 frameindex:$Rn),
+                   (tframeindex_XFORM tframeindex:$Rn), (i64 0)>;
+
+// These can be much simpler than uimm12 because we don't to change the operand
+// type (e.g. LDURB and LDURH take the same operands).
+multiclass simm9_pats<dag address, dag Base, dag Offset>
+{
+  defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>;
+  defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>;
+
+  defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address,
+                             GPR32, i32>;
+  defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address,
+                             GPR64, i64>;
+
+  defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address,
+                         FPR16, f16>;
+  defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address,
+                         FPR32, f32>;
+  defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address,
+                         FPR64, f64>;
+  defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address,
+                         FPR128, f128>;
+
+  def : Pat<(i64 (zextloadi32 address)),
+            (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>;
+
+  def : Pat<(truncstorei32 GPR64:$Rt, address),
+            (LS32_STUR (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>;
+
+  defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>;
+  defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>;
+  def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>;
+}
+
+defm : simm9_pats<(add GPR64xsp:$Rn, simm9:$SImm9),
+                  (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>;
+
+defm : simm9_pats<(add_like_or GPR64xsp:$Rn, simm9:$SImm9),
+                  (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>;
+
+
+//===------------------------------
+// 3. Register offset patterns
+//===------------------------------
+
+// Atomic patterns can be shared between integer operations of all sizes, a
+// quick multiclass here allows reuse.
+multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
+                          dag Offset, dag Extend, dag address,
+                          RegisterClass TPR, ValueType sty>
+{
+  def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
+            (LOAD Base, Offset, Extend)>;
+
+  def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, TPR:$Rt),
+            (STORE TPR:$Rt, Base, Offset, Extend)>;
+}
+
+// The register offset instructions take three operands giving the instruction,
+// and have an annoying split between instructions where Rm is 32-bit and
+// 64-bit. So we need a special hierarchy to describe them. Other than that the
+// same operations should be supported as for simm9 and uimm12 addressing.
+
+multiclass ro_small_pats<Instruction LOAD, Instruction STORE,
+                         dag Base, dag Offset, dag Extend,
+                         dag address, ValueType sty>
+  : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, GPR32, sty>
+{
+  def : Pat<(!cast<SDNode>(zextload # sty) address),
+            (LOAD Base, Offset, Extend)>;
+
+  def : Pat<(!cast<SDNode>(extload # sty) address),
+            (LOAD Base, Offset, Extend)>;
+
+  // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
+  // register was actually set.
+  def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
+            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
+
+  def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
+            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
+
+  def : Pat<(!cast<SDNode>(truncstore # sty) GPR32:$Rt, address),
+            (STORE GPR32:$Rt, Base, Offset, Extend)>;
+
+  // For truncating store from 64-bits, we have to manually tell LLVM to
+  // ignore the high bits of the x register.
+  def : Pat<(!cast<SDNode>(truncstore # sty) GPR64:$Rt, address),
+            (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset, Extend)>;
+
+}
+
+// Next come patterns for sign-extending loads.
+multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
+                          dag address, ValueType sty>
+{
+  def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
+            (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset")
+              Base, Offset, Extend)>;
+
+  def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
+            (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset")
+              Base, Offset, Extend)>;
+}
+
+// and finally "natural-width" loads and stores come next.
+multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE,
+                           dag Base, dag Offset, dag Extend, dag address,
+                           RegisterClass TPR, ValueType sty>
+{
+  def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>;
+  def : Pat<(store (sty TPR:$Rt), address),
+            (STORE TPR:$Rt, Base, Offset, Extend)>;
+}
+
+multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE,
+                           dag Base, dag Offset, dag Extend, dag address,
+                           RegisterClass TPR, ValueType sty>
+  : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, TPR, sty>,
+    ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, TPR, sty>;
+
+multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset, dag Extend>
+{
+  defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"),
+                       !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"),
+                       Base, Offset, Extend,
+                       !foreach(decls.pattern, address,
+                                !subst(SHIFT, imm_eq0, decls.pattern)),
+                       i8>;
+  defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"),
+                       !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"),
+                       Base, Offset, Extend,
+                       !foreach(decls.pattern, address,
+                                !subst(SHIFT, imm_eq1, decls.pattern)),
+                       i16>;
+  defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
+                       !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
+                       Base, Offset, Extend,
+                       !foreach(decls.pattern, address,
+                                !subst(SHIFT, imm_eq2, decls.pattern)),
+                       i32>;
+
+  defm : ro_int_neutral_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
+                         !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
+                         Base, Offset, Extend,
+                         !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq2, decls.pattern)),
+                         GPR32, i32>;
+
+  defm : ro_int_neutral_pats<!cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"),
+                         !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"),
+                         Base, Offset, Extend,
+                         !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq3, decls.pattern)),
+                         GPR64, i64>;
+
+  defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"),
+                         !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"),
+                         Base, Offset, Extend,
+                         !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq1, decls.pattern)),
+                         FPR16, f16>;
+
+  defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"),
+                         !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"),
+                         Base, Offset, Extend,
+                         !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq2, decls.pattern)),
+                         FPR32, f32>;
+
+  defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"),
+                         !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"),
+                         Base, Offset, Extend,
+                         !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq3, decls.pattern)),
+                         FPR64, f64>;
+
+  defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"),
+                         !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"),
+                         Base, Offset, Extend,
+                         !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq4, decls.pattern)),
+                         FPR128, f128>;
+
+  defm : ro_signed_pats<"B", Rm, Base, Offset, Extend,
+                          !foreach(decls.pattern, address,
+                                   !subst(SHIFT, imm_eq0, decls.pattern)),
+                          i8>;
+
+  defm : ro_signed_pats<"H", Rm, Base, Offset, Extend,
+                          !foreach(decls.pattern, address,
+                                   !subst(SHIFT, imm_eq1, decls.pattern)),
+                          i16>;
+
+  def : Pat<(sextloadi32 !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq2, decls.pattern))),
+            (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset")
+              Base, Offset, Extend)>;
+}
+
+
+// Finally we're in a position to tell LLVM exactly what addresses are reachable
+// using register-offset instructions. Essentially a base plus a possibly
+// extended, possibly shifted (by access size) offset.
+
+defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (sext GPR32:$Rm)),
+                   (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 6)>;
+
+defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (sext GPR32:$Rm), SHIFT)),
+                   (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 7)>;
+
+defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (zext GPR32:$Rm)),
+                   (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 2)>;
+
+defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (zext GPR32:$Rm), SHIFT)),
+                   (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 3)>;
+
+defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, GPR64:$Rm),
+                   (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 2)>;
+
+defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, (shl GPR64:$Rm, SHIFT)),
+                   (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 3)>;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
new file mode 100644
index 0000000000..0603574306
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -0,0 +1,140 @@
+//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower AArch64 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64AsmPrinter.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MCOperand
+AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO,
+                                      const MCSymbol *Sym) const {
+  const MCExpr *Expr = 0;
+
+  Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext);
+
+  switch (MO.getTargetFlags()) {
+  case AArch64II::MO_GOT:
+    Expr = AArch64MCExpr::CreateGOT(Expr, OutContext);
+    break;
+  case AArch64II::MO_GOT_LO12:
+    Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_LO12:
+    Expr = AArch64MCExpr::CreateLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_DTPREL_G1:
+    Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext);
+    break;
+  case AArch64II::MO_DTPREL_G0_NC:
+    Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext);
+    break;
+  case AArch64II::MO_GOTTPREL:
+    Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext);
+    break;
+  case AArch64II::MO_GOTTPREL_LO12:
+    Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_TLSDESC:
+    Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext);
+    break;
+  case AArch64II::MO_TLSDESC_LO12:
+    Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_TPREL_G1:
+    Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext);
+    break;
+  case AArch64II::MO_TPREL_G0_NC:
+    Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext);
+    break;
+  case AArch64II::MO_NO_FLAG:
+    // Expr is already correct
+    break;
+  default:
+    llvm_unreachable("Unexpected MachineOperand flag");
+  }
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(),
+                                                          OutContext),
+                                   OutContext);
+
+  return MCOperand::CreateExpr(Expr);
+}
+
+bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,
+                                     MCOperand &MCOp) const {
+  switch (MO.getType()) {
+  default: llvm_unreachable("unknown operand type");
+  case MachineOperand::MO_Register:
+    if (MO.isImplicit())
+      return false;
+    assert(!MO.getSubReg() && "Subregs should be eliminated!");
+    MCOp = MCOperand::CreateReg(MO.getReg());
+    break;
+  case MachineOperand::MO_Immediate:
+    MCOp = MCOperand::CreateImm(MO.getImm());
+    break;
+  case MachineOperand::MO_BlockAddress:
+    MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()));
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                                   MO.getMBB()->getSymbol(), OutContext));
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_RegisterMask:
+    // Ignore call clobbers
+    return false;
+
+  }
+
+  return true;
+}
+
+void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI,
+                                            MCInst &OutMI,
+                                            AArch64AsmPrinter &AP) {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    if (AP.lowerOperand(MO, MCOp))
+      OutMI.addOperand(MCOp);
+  }
+}
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
new file mode 100644
index 0000000000..012a4f8a68
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MachineFunctionInfo.h"
+
+using namespace llvm;
+
+void AArch64MachineFunctionInfo::anchor() { }
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
new file mode 100644
index 0000000000..bf5cadf430
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -0,0 +1,158 @@
+//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AArch64-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AARCH64MACHINEFUNCTIONINFO_H
+#define AARCH64MACHINEFUNCTIONINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// This class is derived from MachineFunctionInfo and contains private AArch64
+/// target-specific information for each MachineFunction.
+class AArch64MachineFunctionInfo : public MachineFunctionInfo {
+  virtual void anchor();
+
+  /// Number of bytes of arguments this function has on the stack. If the callee
+  /// is expected to restore the argument stack this should be a multiple of 16,
+  /// all usable during a tail call.
+  ///
+  /// The alternative would forbid tail call optimisation in some cases: if we
+  /// want to transfer control from a function with 8-bytes of stack-argument
+  /// space to a function with 16-bytes then misalignment of this value would
+  /// make a stack adjustment necessary, which could not be undone by the
+  /// callee.
+  unsigned BytesInStackArgArea;
+
+  /// The number of bytes to restore to deallocate space for incoming
+  /// arguments. Canonically 0 in the C calling convention, but non-zero when
+  /// callee is expected to pop the args.
+  unsigned ArgumentStackToRestore;
+
+  /// If the stack needs to be adjusted on frame entry in two stages, this
+  /// records the size of the first adjustment just prior to storing
+  /// callee-saved registers. The callee-saved slots are addressed assuming
+  /// SP == <incoming-SP> - InitialStackAdjust.
+  unsigned InitialStackAdjust;
+
+  /// Number of local-dynamic TLS accesses.
+  unsigned NumLocalDynamics;
+
+  /// Keep track of the next label to be created within this function to
+  /// represent a cloned constant pool entry. Used by constant islands pass.
+  unsigned PICLabelUId;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The Frame index of the area where LowerFormalArguments puts the
+  /// general-purpose registers that might contain variadic parameters.
+  int VariadicGPRIdx;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The size of the frame object used to store the general-purpose registers
+  /// which might contain variadic arguments. This is the offset from
+  /// VariadicGPRIdx to what's stored in __gr_top.
+  unsigned VariadicGPRSize;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The Frame index of the area where LowerFormalArguments puts the
+  /// floating-point registers that might contain variadic parameters.
+  int VariadicFPRIdx;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The size of the frame object used to store the floating-point registers
+  /// which might contain variadic arguments. This is the offset from
+  /// VariadicFPRIdx to what's stored in __vr_top.
+  unsigned VariadicFPRSize;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The Frame index of an object pointing just past the last known stacked
+  /// argument on entry to a variadic function. This goes into the __stack field
+  /// of the va_list type.
+  int VariadicStackIdx;
+
+  /// The offset of the frame pointer from the stack pointer on function
+  /// entry. This is expected to be negative.
+  int FramePointerOffset;
+
+public:
+  AArch64MachineFunctionInfo()
+    : BytesInStackArgArea(0),
+      ArgumentStackToRestore(0),
+      InitialStackAdjust(0),
+      NumLocalDynamics(0),
+      PICLabelUId(0),
+      VariadicGPRIdx(0),
+      VariadicGPRSize(0),
+      VariadicFPRIdx(0),
+      VariadicFPRSize(0),
+      VariadicStackIdx(0),
+      FramePointerOffset(0) {}
+
+  explicit AArch64MachineFunctionInfo(MachineFunction &MF)
+    : BytesInStackArgArea(0),
+      ArgumentStackToRestore(0),
+      InitialStackAdjust(0),
+      NumLocalDynamics(0),
+      PICLabelUId(0),
+      VariadicGPRIdx(0),
+      VariadicGPRSize(0),
+      VariadicFPRIdx(0),
+      VariadicFPRSize(0),
+      VariadicStackIdx(0),
+      FramePointerOffset(0) {}
+
+  unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; }
+  void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;}
+
+  unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; }
+  void setArgumentStackToRestore(unsigned bytes) { ArgumentStackToRestore = bytes; }
+
+  unsigned getInitialStackAdjust() const { return InitialStackAdjust; }
+  void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; }
+
+  unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+  void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+
+  void initPICLabelUId(unsigned UId) { PICLabelUId = UId; }
+  unsigned getNumPICLabels() const { return PICLabelUId; }
+  unsigned createPICLabelUId() { return PICLabelUId++; }
+
+  int getVariadicGPRIdx() const { return VariadicGPRIdx; }
+  void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; }
+
+  unsigned getVariadicGPRSize() const { return VariadicGPRSize; }
+  void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; }
+
+  int getVariadicFPRIdx() const { return VariadicFPRIdx; }
+  void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; }
+
+  unsigned getVariadicFPRSize() const { return VariadicFPRSize; }
+  void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; }
+
+  int getVariadicStackIdx() const { return VariadicStackIdx; }
+  void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; }
+
+  int getFramePointerOffset() const { return FramePointerOffset; }
+  void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
new file mode 100644
index 0000000000..ce665048c3
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -0,0 +1,211 @@
+//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64RegisterInfo.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+using namespace llvm;
+
+AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo &tii,
+                                         const AArch64Subtarget &sti)
+  : AArch64GenRegisterInfo(AArch64::X30), TII(tii) {
+}
+
+const uint16_t *
+AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  return CSR_PCS_SaveList;
+}
+
+const uint32_t*
+AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+  return CSR_PCS_RegMask;
+}
+
+const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const {
+  return TLSDesc_RegMask;
+}
+
+const TargetRegisterClass *
+AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+  if (RC == &AArch64::FlagClassRegClass)
+    return &AArch64::GPR64RegClass;
+
+  return RC;
+}
+
+
+
+BitVector
+AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  Reserved.set(AArch64::XSP);
+  Reserved.set(AArch64::WSP);
+
+  Reserved.set(AArch64::XZR);
+  Reserved.set(AArch64::WZR);
+
+  if (TFI->hasFP(MF)) {
+    Reserved.set(AArch64::X29);
+    Reserved.set(AArch64::W29);
+  }
+
+  return Reserved;
+}
+
+void
+AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
+                                         int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet");
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const AArch64FrameLowering *TFI =
+    static_cast<const AArch64FrameLowering *>(MF.getTarget().getFrameLowering());
+
+  unsigned i = 0;
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have a FrameIndex Operand");
+  }
+
+  // In order to work out the base and offset for addressing, the FrameLowering
+  // code needs to know (sometimes) whether the instruction is storing/loading a
+  // callee-saved register, or whether it's a more generic
+  // operation. Fortunately the frame indices are used *only* for that purpose
+  // and are contiguous, so we can check here.
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  int MinCSFI = 0;
+  int MaxCSFI = -1;
+
+  if (CSI.size()) {
+    MinCSFI = CSI[0].getFrameIdx();
+    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+  bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI;
+
+  unsigned FrameReg;
+  int64_t Offset;
+  Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj,
+                                           IsCalleeSaveOp);
+
+  Offset += MI.getOperand(i+1).getImm();
+
+  // DBG_VALUE instructions have no real restrictions so they can be handled
+  // easily.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).ChangeToRegister(FrameReg, /*isDef=*/ false);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  int MinOffset, MaxOffset, OffsetScale;
+  if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) {
+    MinOffset = 0;
+    MaxOffset = 0xfff;
+    OffsetScale = 1;
+  } else {
+    // Load/store of a stack object
+    TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset);
+  }
+
+  // The frame lowering has told us a base and offset it thinks we should use to
+  // access this variable, but it's still up to us to make sure the values are
+  // legal for the instruction in question.
+  if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) {
+    unsigned BaseReg =
+      MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+    emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII,
+                  BaseReg, FrameReg, BaseReg, Offset);
+    FrameReg = BaseReg;
+    Offset = 0;
+  }
+
+  // Negative offsets are expected if we address from FP, but for
+  // now this checks nothing has gone horribly wrong.
+  assert(Offset >= 0 && "Unexpected negative offset from SP");
+
+  MI.getOperand(i).ChangeToRegister(FrameReg, false, false, true);
+  MI.getOperand(i+1).ChangeToImmediate(Offset / OffsetScale);
+}
+
+void
+AArch64RegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                         MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  DebugLoc dl = MI->getDebugLoc();
+  int Opcode = MI->getOpcode();
+  bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+  uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0;
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    unsigned Align = TFI->getStackAlignment();
+
+    uint64_t Amount = MI->getOperand(0).getImm();
+    Amount = (Amount + Align - 1)/Align * Align;
+    if (!IsDestroy) Amount = -Amount;
+
+    // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
+    // doesn't have to pop anything), then the first operand will be zero too so
+    // this adjustment is a no-op.
+    if (CalleePopAmount == 0) {
+      // FIXME: in-function stack adjustment for calls is limited to 12-bits
+      // because there's no guaranteed temporary register available. Mostly call
+      // frames will be allocated at the start of a function so this is OK, but
+      // it is a limitation that needs dealing with.
+      assert(abs(Amount) < 0xfff && "call frame too large");
+      emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount);
+    }
+  } else if (CalleePopAmount != 0) {
+    // If the calling convention demands that the callee pops arguments from the
+    // stack, we want to add it back if we have a reserved call frame.
+    assert(CalleePopAmount < 0xfff && "call frame too large");
+    emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount);
+  }
+
+  MBB.erase(MI);
+}
+
+unsigned
+AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF))
+    return AArch64::X29;
+  else
+    return AArch64::XSP;
+}
+
+bool
+AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const AArch64FrameLowering *AFI = static_cast<const AArch64FrameLowering*>(TFI);
+  return AFI->useFPForAddressing(MF);
+}
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
new file mode 100644
index 0000000000..ea538e2bd3
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -0,0 +1,79 @@
+//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H
+#define LLVM_TARGET_AARCH64REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "AArch64GenRegisterInfo.inc"
+
+namespace llvm {
+
+class AArch64InstrInfo;
+class AArch64Subtarget;
+
+struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
+private:
+  const AArch64InstrInfo &TII;
+
+public:
+  AArch64RegisterInfo(const AArch64InstrInfo &tii,
+                      const AArch64Subtarget &sti);
+
+  const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+  const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+
+  const uint32_t *getTLSDescCallPreservedMask() const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                           RegScavenger *Rs = NULL) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI) const;
+
+  /// getCrossCopyRegClass - Returns a legal register class to copy a register
+  /// in the specified class to or from. Returns original class if it is
+  /// possible to copy between a two registers of the specified class.
+  const TargetRegisterClass *
+  getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+  /// getLargestLegalSuperClass - Returns the largest super class of RC that is
+  /// legal to use in the current sub-target and has the same spill size.
+  const TargetRegisterClass*
+  getLargestLegalSuperClass(const TargetRegisterClass *RC) const {
+    if (RC == &AArch64::tcGPR64RegClass)
+      return &AArch64::GPR64RegClass;
+
+    return RC;
+  }
+
+  bool requiresRegisterScavenging(const MachineFunction &MF) const {
+    return true;
+  }
+
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+    return true;
+  }
+
+  bool useFPForScavengingIndex(const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_AARCH64REGISTERINFO_H
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
new file mode 100644
index 0000000000..f1f7fd1610
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -0,0 +1,205 @@
+//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AArch64" in {
+def sub_128 : SubRegIndex;
+def sub_64 : SubRegIndex;
+def sub_32 : SubRegIndex;
+def sub_16 : SubRegIndex;
+def sub_8  : SubRegIndex;
+
+// The VPR registers are handled as sub-registers of FPR equivalents, but
+// they're really the same thing. We give this concept a special index.
+def sub_alias : SubRegIndex;
+}
+
+// Registers are identified with 5-bit ID numbers.
+class AArch64Reg<bits<16> enc, string n> : Register<n> {
+  let HWEncoding = enc;
+  let Namespace = "AArch64";
+}
+
+class AArch64RegWithSubs<bits<16> enc, string n, list<Register> subregs = [],
+                         list<SubRegIndex> inds = []>
+      : AArch64Reg<enc, n> {
+  let SubRegs = subregs;
+  let SubRegIndices = inds;
+}
+
+//===----------------------------------------------------------------------===//
+//  Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-30 in {
+  def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>;
+}
+
+def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>;
+def WZR : AArch64Reg<31, "wzr">;
+
+// Could be combined with previous loop, but this way leaves w and x registers
+// consecutive as LLVM register numbers, which makes for easier debugging.
+foreach Index = 0-30 in {
+  def X#Index : AArch64RegWithSubs<Index, "x"#Index,
+                                   [!cast<Register>("W"#Index)], [sub_32]>,
+                DwarfRegNum<[Index]>;
+}
+
+def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>;
+def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>;
+
+// Most instructions treat register 31 as zero for reads and a black-hole for
+// writes.
+
+// Note that the order of registers is important for the Disassembler here:
+// tablegen uses it to form MCRegisterClass::getRegister, which we assume can
+// take an encoding value.
+def GPR32 : RegisterClass<"AArch64", [i32], 32,
+                          (add (sequence "W%u", 0, 30), WZR)> {
+}
+
+def GPR64 : RegisterClass<"AArch64", [i64], 64,
+                          (add (sequence "X%u", 0, 30), XZR)> {
+}
+
+def GPR32nowzr : RegisterClass<"AArch64", [i32], 32,
+                               (sequence "W%u", 0, 30)> {
+}
+
+def GPR64noxzr : RegisterClass<"AArch64", [i64], 64,
+                               (sequence "X%u", 0, 30)> {
+}
+
+// For tail calls, we can't use callee-saved registers or the structure-return
+// register, as they are supposed to be live across function calls and may be
+// clobbered by the epilogue.
+def tcGPR64 : RegisterClass<"AArch64", [i64], 64,
+                            (add (sequence "X%u", 0, 7),
+                                 (sequence "X%u", 9, 18))> {
+}
+
+
+// Certain addressing-useful instructions accept sp directly. Again the order of
+// registers is important to the Disassembler.
+def GPR32wsp : RegisterClass<"AArch64", [i32], 32,
+                             (add (sequence "W%u", 0, 30), WSP)> {
+}
+
+def GPR64xsp : RegisterClass<"AArch64", [i64], 64,
+                             (add (sequence "X%u", 0, 30), XSP)> {
+}
+
+// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and
+// non-SP variants). We can't use a bare register in those patterns because
+// TableGen doesn't like it, so we need a class containing just stack registers
+def Rxsp : RegisterClass<"AArch64", [i64], 64,
+                         (add XSP)> {
+}
+
+def Rwsp : RegisterClass<"AArch64", [i32], 32,
+                         (add WSP)> {
+}
+
+//===----------------------------------------------------------------------===//
+//  Scalar registers in the vector unit:
+//  b0-b31, h0-h31, s0-s31, d0-d31, q0-q31
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-31 in {
+  def B # Index : AArch64Reg< Index, "b" # Index>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def H # Index : AArch64RegWithSubs<Index, "h" # Index,
+                                     [!cast<Register>("B" # Index)], [sub_8]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def S # Index : AArch64RegWithSubs<Index, "s" # Index,
+                                     [!cast<Register>("H" # Index)], [sub_16]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def D # Index : AArch64RegWithSubs<Index, "d" # Index,
+                                     [!cast<Register>("S" # Index)], [sub_32]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def Q # Index : AArch64RegWithSubs<Index, "q" # Index,
+                                     [!cast<Register>("D" # Index)], [sub_64]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+}
+
+
+def FPR8 : RegisterClass<"AArch64", [i8], 8,
+                          (sequence "B%u", 0, 31)> {
+}
+
+def FPR16 : RegisterClass<"AArch64", [f16], 16,
+                          (sequence "H%u", 0, 31)> {
+}
+
+def FPR32 : RegisterClass<"AArch64", [f32], 32,
+                          (sequence "S%u", 0, 31)> {
+}
+
+def FPR64 : RegisterClass<"AArch64", [f64], 64,
+                          (sequence "D%u", 0, 31)> {
+}
+
+def FPR128 : RegisterClass<"AArch64", [f128], 128,
+                          (sequence "Q%u", 0, 31)> {
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Vector registers:
+//===----------------------------------------------------------------------===//
+
+// NEON registers simply specify the overall vector, and it's expected that
+// Instructions will individually specify the acceptable data layout. In
+// principle this leaves two approaches open:
+//   + An operand, giving a single ADDvvv instruction (for example). This turns
+//     out to be unworkable in the assembly parser (without every Instruction
+//     having a "cvt" function, at least) because the constraints can't be
+//     properly enforced. It also complicates specifying patterns since each
+//     instruction will accept many types.
+//  + A bare token (e.g. ".2d"). This means the AsmParser has to know specific
+//    details about NEON registers, but simplifies most other details.
+//
+// The second approach was taken.
+
+foreach Index = 0-31 in {
+  def V # Index  : AArch64RegWithSubs<Index, "v" # Index,
+                                      [!cast<Register>("Q" # Index)],
+                                      [sub_alias]>,
+            DwarfRegNum<[!add(Index, 64)]>;
+}
+
+// These two classes contain the same registers, which should be reasonably
+// sensible for MC and allocation purposes, but allows them to be treated
+// separately for things like stack spilling.
+def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64,
+                          (sequence "V%u", 0, 31)>;
+
+def VPR128 : RegisterClass<"AArch64",
+                           [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
+                           (sequence "V%u", 0, 31)>;
+
+// Flags register
+def NZCV : Register<"nzcv">
+{
+  let Namespace = "AArch64";
+}
+
+def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)>
+{
+  let CopyCost = -1;
+  let isAllocatable = 0;
+}
diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td
new file mode 100644
index 0000000000..e17cdaa1f6
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Schedule.td
@@ -0,0 +1,10 @@
+//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
new file mode 100644
index 0000000000..6bbe075a1b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -0,0 +1,25 @@
+//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM)
+  : TargetSelectionDAGInfo(TM),
+    Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+}
+
+AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {
+}
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
new file mode 100644
index 0000000000..8d3889eef4
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -0,0 +1,32 @@
+//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AArch64 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64SELECTIONDAGINFO_H
+#define LLVM_AARCH64SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class AArch64TargetMachine;
+
+class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
+  const AArch64Subtarget *Subtarget;
+public:
+  explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM);
+  ~AArch64SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
new file mode 100644
index 0000000000..d17b738209
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -0,0 +1,43 @@
+//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64Subtarget.h"
+#include "AArch64RegisterInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS)
+  : AArch64GenSubtargetInfo(TT, CPU, FS)
+  , HasNEON(true)
+  , HasCrypto(true)
+  , TargetTriple(TT) {
+
+  ParseSubtargetFeatures(CPU, FS);
+}
+
+bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+                                          Reloc::Model RelocM) const {
+  if (RelocM == Reloc::Static)
+    return false;
+
+  return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility();
+}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
new file mode 100644
index 0000000000..2e9205fc99
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -0,0 +1,54 @@
+//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H
+#define LLVM_TARGET_AARCH64_SUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AArch64GenSubtargetInfo.inc"
+
+#include <string>
+
+namespace llvm {
+class StringRef;
+class GlobalValue;
+
+class AArch64Subtarget : public AArch64GenSubtargetInfo {
+protected:
+  bool HasNEON;
+  bool HasCrypto;
+
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+
+  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+  bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+
+};
+} // End llvm namespace
+
+#endif  // LLVM_TARGET_AARCH64_SUBTARGET_H
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
new file mode 100644
index 0000000000..68e3643d8e
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -0,0 +1,78 @@
+//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeAArch64Target() {
+  RegisterTargetMachine<AArch64TargetMachine> X(TheAArch64Target);
+}
+
+AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM, CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, CPU, FS),
+    InstrInfo(Subtarget),
+    DL("e-p:64:64-i64:64:64-i128:128:128-s0:32:32-f128:128:128-n32:64-S128"),
+    TLInfo(*this),
+    TSInfo(*this),
+    FrameLowering(Subtarget) {
+}
+
+namespace {
+/// AArch64 Code Generator Pass Configuration Options.
+class AArch64PassConfig : public TargetPassConfig {
+public:
+  AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  AArch64TargetMachine &getAArch64TargetMachine() const {
+    return getTM<AArch64TargetMachine>();
+  }
+
+  const AArch64Subtarget &getAArch64Subtarget() const {
+    return *getAArch64TargetMachine().getSubtargetImpl();
+  }
+
+  virtual bool addInstSelector();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new AArch64PassConfig(this, PM);
+}
+
+bool AArch64PassConfig::addPreEmitPass() {
+  addPass(&UnpackMachineBundlesID);
+  addPass(createAArch64ConstantIslandPass());
+  return true;
+}
+
+bool AArch64PassConfig::addInstSelector() {
+  addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel()));
+
+  // For ELF, cleanup any local-dynamic TLS accesses.
+  if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
+    addPass(createAArch64CleanupLocalDynamicTLSPass());
+
+  return false;
+}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
new file mode 100644
index 0000000000..c1f47c2e53
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -0,0 +1,69 @@
+//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64TARGETMACHINE_H
+#define LLVM_AARCH64TARGETMACHINE_H
+
+#include "AArch64FrameLowering.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64SelectionDAGInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64TargetMachine : public LLVMTargetMachine {
+  AArch64Subtarget          Subtarget;
+  AArch64InstrInfo          InstrInfo;
+  const DataLayout          DL;
+  AArch64TargetLowering     TLInfo;
+  AArch64SelectionDAGInfo   TSInfo;
+  AArch64FrameLowering      FrameLowering;
+
+public:
+  AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
+
+  const AArch64InstrInfo *getInstrInfo() const {
+    return &InstrInfo;
+  }
+
+  const AArch64FrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+
+  const AArch64TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  const AArch64SelectionDAGInfo *getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+  const DataLayout *getDataLayout() const { return &DL; }
+
+  const TargetRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
new file mode 100644
index 0000000000..3bb961a4b8
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -0,0 +1,19 @@
+//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64TargetObjectFile.h"
+
+using namespace llvm;
+
+void
+AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx,
+                                         const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
new file mode 100644
index 0000000000..07caac1d93
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -0,0 +1,27 @@
+//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+  /// AArch64LinuxTargetObjectFile - This implementation is used for linux
+  /// AArch64.
+  class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+    virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
new file mode 100644
index 0000000000..34026344b0
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -0,0 +1,2025 @@
+//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AArch64Operand;
+
+class AArch64AsmParser : public MCTargetAsmParser {
+  MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+
+#define GET_ASSEMBLER_HEADER
+#include "AArch64GenAsmMatcher.inc"
+
+public:
+  AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+    : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+    MCAsmParserExtension::Initialize(_Parser);
+
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  // These are the public interface of the MCTargetAsmParser
+  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 
+                        SMLoc NameLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  bool ParseDirective(AsmToken DirectiveID);
+  bool ParseDirectiveTLSDescCall(SMLoc L);
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer&Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
+
+  // The rest of the sub-parsers have more freedom over interface: they return
+  // an OperandMatchResultTy because it's less ambiguous than true/false or
+  // -1/0/1 even if it is more verbose
+  OperandMatchResultTy
+  ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+               StringRef Mnemonic);
+
+  OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal);
+
+  OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind);
+
+  OperandMatchResultTy
+  ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                uint32_t NumLanes);
+
+  OperandMatchResultTy
+  ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                uint32_t &NumLanes);
+
+  OperandMatchResultTy
+  ParseImmWithLSLOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseCondCodeOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseCRxOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseFPImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  template<typename SomeNamedImmMapper> OperandMatchResultTy
+  ParseNamedImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return ParseNamedImmOperand(SomeNamedImmMapper(), Operands);
+  }
+
+  OperandMatchResultTy
+  ParseNamedImmOperand(const NamedImmMapper &Mapper,
+                       SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseLSXAddressOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseShiftExtend(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  bool validateInstruction(MCInst &Inst,
+                           const SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  /// Scan the next token (which had better be an identifier) and determine
+  /// whether it represents a general-purpose or vector register. It returns
+  /// true if an identifier was found and populates its reference arguments. It
+  /// does not consume the token.
+  bool
+  IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec,
+                   SMLoc &LayoutLoc) const;
+
+};
+
+}
+
+namespace {
+
+/// Instances of this class represent a parsed AArch64 machine instruction.
+class AArch64Operand : public MCParsedAsmOperand {
+private:
+  enum KindTy {
+    k_ImmWithLSL,     // #uimm {, LSL #amt }
+    k_CondCode,       // eq/ne/...
+    k_FPImmediate,    // Limited-precision floating-point imm
+    k_Immediate,      // Including expressions referencing symbols
+    k_Register,
+    k_ShiftExtend,
+    k_SysReg,         // The register operand of MRS and MSR instructions
+    k_Token,          // The mnemonic; other raw tokens the auto-generated
+    k_WrappedRegister // Load/store exclusive permit a wrapped register.
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+
+  union {
+    struct {
+      const MCExpr *Val;
+      unsigned ShiftAmount;
+      bool ImplicitAmount;
+    } ImmWithLSL;
+
+    struct {
+      A64CC::CondCodes Code;
+    } CondCode;
+
+    struct {
+      double Val;
+    } FPImm;
+
+    struct {
+      const MCExpr *Val;
+    } Imm;
+
+    struct {
+      unsigned RegNum;
+    } Reg;
+
+    struct {
+      A64SE::ShiftExtSpecifiers ShiftType;
+      unsigned Amount;
+      bool ImplicitAmount;
+    } ShiftExtend;
+
+    struct {
+      const char *Data;
+      unsigned Length;
+    } SysReg;
+
+    struct {
+      const char *Data;
+      unsigned Length;
+    } Tok;
+  };
+
+  AArch64Operand(KindTy K, SMLoc S, SMLoc E)
+    : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {}
+
+public:
+  AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() {
+  }
+
+  SMLoc getStartLoc() const { return StartLoc; }
+  SMLoc getEndLoc() const { return EndLoc; }
+  void print(raw_ostream&) const;
+  void dump() const;
+
+  StringRef getToken() const {
+    assert(Kind == k_Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  unsigned getReg() const {
+    assert((Kind == k_Register || Kind == k_WrappedRegister)
+           && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == k_Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  A64CC::CondCodes getCondCode() const {
+    assert(Kind == k_CondCode && "Invalid access!");
+    return CondCode.Code;
+  }
+
+  static bool isNonConstantExpr(const MCExpr *E,
+                                AArch64MCExpr::VariantKind &Variant) {
+    if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(E)) {
+      Variant = A64E->getKind();
+      return true;
+    } else if (!isa<MCConstantExpr>(E)) {
+      Variant = AArch64MCExpr::VK_AARCH64_None;
+      return true;
+    }
+
+    return false;
+  }
+
+  bool isCondCode() const { return Kind == k_CondCode; }
+  bool isToken() const { return Kind == k_Token; }
+  bool isReg() const { return Kind == k_Register; }
+  bool isImm() const { return Kind == k_Immediate; }
+  bool isMem() const { return false; }
+  bool isFPImm() const { return Kind == k_FPImmediate; }
+  bool isShiftOrExtend() const { return Kind == k_ShiftExtend; }
+  bool isSysReg() const { return Kind == k_SysReg; }
+  bool isImmWithLSL() const { return Kind == k_ImmWithLSL; }
+  bool isWrappedReg() const { return Kind == k_WrappedRegister; }
+
+  bool isAddSubImmLSL0() const {
+    if (!isImmWithLSL()) return false;
+    if (ImmWithLSL.ShiftAmount != 0) return false;
+
+    AArch64MCExpr::VariantKind Variant;
+    if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+      return Variant == AArch64MCExpr::VK_AARCH64_LO12
+          || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12
+          || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC
+          || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12
+          || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC
+          || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12;
+    }
+
+    // Otherwise it should be a real immediate in range:
+    const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
+    return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+  }
+
+  bool isAddSubImmLSL12() const {
+    if (!isImmWithLSL()) return false;
+    if (ImmWithLSL.ShiftAmount != 12) return false;
+
+    AArch64MCExpr::VariantKind Variant;
+    if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+      return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12
+          || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12;
+    }
+
+    // Otherwise it should be a real immediate in range:
+    const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
+    return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+  }
+
+  template<unsigned MemSize, unsigned RmSize> bool isAddrRegExtend() const {
+    if (!isShiftOrExtend()) return false;
+
+    A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType;
+    if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW))
+      return false;
+
+    if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX))
+      return false;
+
+    return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0;
+  }
+
+  bool isAdrpLabel() const {
+    if (!isImm()) return false;
+
+    AArch64MCExpr::VariantKind Variant;
+    if (isNonConstantExpr(getImm(), Variant)) {
+      return Variant == AArch64MCExpr::VK_AARCH64_None
+        || Variant == AArch64MCExpr::VK_AARCH64_GOT
+        || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL
+        || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC;
+    }
+
+    return isLabel<21, 4096>();
+  }
+
+  template<unsigned RegWidth>  bool isBitfieldWidth() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+  }
+
+  template<int RegWidth>
+  bool isCVTFixedPos() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+  }
+
+  bool isFMOVImm() const {
+    if (!isFPImm()) return false;
+
+    APFloat RealVal(FPImm.Val);
+    uint32_t ImmVal;
+    return A64Imms::isFPImm(RealVal, ImmVal);
+  }
+
+  bool isFPZero() const {
+    if (!isFPImm()) return false;
+
+    APFloat RealVal(FPImm.Val);
+    return RealVal.isPosZero();
+  }
+
+  template<unsigned field_width, unsigned scale>
+  bool isLabel() const {
+    if (!isImm()) return false;
+
+    if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) {
+      return true;
+    } else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Val = CE->getValue();
+      int64_t Min = - (scale * (1LL << (field_width - 1)));
+      int64_t Max = scale * ((1LL << (field_width - 1)) - 1);
+      return (Val % scale) == 0 && Val >= Min && Val <= Max;
+    }
+
+    // N.b. this disallows explicit relocation specifications via an
+    // AArch64MCExpr. Users needing that behaviour
+    return false;
+  }
+
+  bool isLane1() const {
+    if (!isImm()) return false;
+
+    // Because it's come through custom assembly parsing, it must always be a
+    // constant expression.
+    return cast<MCConstantExpr>(getImm())->getValue() == 1;
+  }
+
+  bool isLoadLitLabel() const {
+    if (!isImm()) return false;
+
+    AArch64MCExpr::VariantKind Variant;
+    if (isNonConstantExpr(getImm(), Variant)) {
+      return Variant == AArch64MCExpr::VK_AARCH64_None
+          || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL;
+    }
+
+    return isLabel<19, 4>();
+  }
+
+  template<unsigned RegWidth> bool isLogicalImm() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+    if (!CE) return false;
+
+    uint32_t Bits;
+    return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+  }
+
+  template<unsigned RegWidth> bool isLogicalImmMOV() const {
+    if (!isLogicalImm<RegWidth>()) return false;
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
+
+    // The move alias for ORR is only valid if the immediate cannot be
+    // represented with a move (immediate) instruction; they take priority.
+    int UImm16, Shift;
+    return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift)
+      && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift);
+  }
+
+  template<int MemSize>
+  bool isOffsetUImm12() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+
+    // Assume they know what they're doing for now if they've given us a
+    // non-constant expression. In principle we could check for ridiculous
+    // things that can't possibly work or relocations that would almost
+    // certainly break resulting code.
+    if (!CE)
+      return true;
+
+    int64_t Val = CE->getValue();
+
+    // Must be a multiple of the access size in bytes.
+    if ((Val & (MemSize - 1)) != 0) return false;
+
+    // Must be 12-bit unsigned
+    return Val >= 0 && Val <= 0xfff * MemSize;
+  }
+
+  template<A64SE::ShiftExtSpecifiers SHKind, bool is64Bit>
+  bool isShift() const {
+    if (!isShiftOrExtend()) return false;
+
+    if (ShiftExtend.ShiftType != SHKind)
+      return false;
+
+    return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31;
+  }
+
+  bool isMOVN32Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_SABS_G0,
+      AArch64MCExpr::VK_AARCH64_SABS_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+  }
+
+  bool isMOVN64Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_SABS_G0,
+      AArch64MCExpr::VK_AARCH64_SABS_G1,
+      AArch64MCExpr::VK_AARCH64_SABS_G2,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G2,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G2,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+  }
+
+
+  bool isMOVZ32Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_ABS_G0,
+      AArch64MCExpr::VK_AARCH64_ABS_G1,
+      AArch64MCExpr::VK_AARCH64_SABS_G0,
+      AArch64MCExpr::VK_AARCH64_SABS_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+  }
+
+  bool isMOVZ64Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_ABS_G0,
+      AArch64MCExpr::VK_AARCH64_ABS_G1,
+      AArch64MCExpr::VK_AARCH64_ABS_G2,
+      AArch64MCExpr::VK_AARCH64_ABS_G3,
+      AArch64MCExpr::VK_AARCH64_SABS_G0,
+      AArch64MCExpr::VK_AARCH64_SABS_G1,
+      AArch64MCExpr::VK_AARCH64_SABS_G2,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G2,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G2,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+  }
+
+  bool isMOVK32Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
+      AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+  }
+
+  bool isMOVK64Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
+      AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
+      AArch64MCExpr::VK_AARCH64_ABS_G2_NC,
+      AArch64MCExpr::VK_AARCH64_ABS_G3,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+  }
+
+  bool isMoveWideImm(unsigned RegWidth,
+                     AArch64MCExpr::VariantKind *PermittedModifiers,
+                     unsigned NumModifiers) const {
+    if (!isImmWithLSL()) return false;
+
+    if (ImmWithLSL.ShiftAmount % 16 != 0) return false;
+    if (ImmWithLSL.ShiftAmount >= RegWidth) return false;
+
+    AArch64MCExpr::VariantKind Modifier;
+    if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) {
+      // E.g. "#:abs_g0:sym, lsl #16" makes no sense.
+      if (!ImmWithLSL.ImplicitAmount) return false;
+
+      for (unsigned i = 0; i < NumModifiers; ++i)
+        if (PermittedModifiers[i] == Modifier) return true;
+
+      return false;
+    }
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmWithLSL.Val);
+    return CE && CE->getValue() >= 0  && CE->getValue() <= 0xffff;
+  }
+
+  template<int RegWidth, bool (*isValidImm)(int, uint64_t, int&, int&)>
+  bool isMoveWideMovAlias() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    int UImm16, Shift;
+    uint64_t Value = CE->getValue();
+
+    // If this is a 32-bit instruction then all bits above 32 should be the
+    // same: either of these is fine because signed/unsigned values should be
+    // permitted.
+    if (RegWidth == 32) {
+      if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff)
+        return false;
+
+      Value &= 0xffffffffULL;
+    }
+
+    return isValidImm(RegWidth, Value, UImm16, Shift);
+  }
+
+  bool isMSRWithReg() const {
+    if (!isSysReg()) return false;
+
+    bool IsKnownRegister;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    A64SysReg::MSRMapper().fromString(Name, IsKnownRegister);
+
+    return IsKnownRegister;
+  }
+
+  bool isMSRPState() const {
+    if (!isSysReg()) return false;
+
+    bool IsKnownRegister;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    A64PState::PStateMapper().fromString(Name, IsKnownRegister);
+
+    return IsKnownRegister;
+  }
+
+  bool isMRS() const {
+    if (!isSysReg()) return false;
+
+    // First check against specific MSR-only (write-only) registers
+    bool IsKnownRegister;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    A64SysReg::MRSMapper().fromString(Name, IsKnownRegister);
+
+    return IsKnownRegister;
+  }
+
+  bool isPRFM() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+
+    if (!CE)
+      return false;
+
+    return CE->getValue() >= 0 && CE->getValue() <= 31;
+  }
+
+  template<A64SE::ShiftExtSpecifiers SHKind> bool isRegExtend() const {
+    if (!isShiftOrExtend()) return false;
+
+    if (ShiftExtend.ShiftType != SHKind)
+      return false;
+
+    return ShiftExtend.Amount <= 4;
+  }
+
+  bool isRegExtendLSL() const {
+    if (!isShiftOrExtend()) return false;
+
+    if (ShiftExtend.ShiftType != A64SE::LSL)
+      return false;
+
+    return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
+  }
+
+  template<int MemSize>  bool isSImm7Scaled() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    int64_t Val = CE->getValue();
+    if (Val % MemSize != 0) return false;
+
+    Val /= MemSize;
+
+    return Val >= -64 && Val < 64;
+  }
+
+  template<int BitWidth>
+  bool isSImm() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    return CE->getValue() >= -(1LL << (BitWidth - 1))
+      && CE->getValue() < (1LL << (BitWidth - 1));
+  }
+
+  template<int bitWidth>
+  bool isUImm() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth);
+  }
+
+  bool isUImm() const {
+    if (!isImm()) return false;
+
+    return isa<MCConstantExpr>(getImm());
+  }
+
+  static AArch64Operand *CreateImmWithLSL(const MCExpr *Val,
+                                          unsigned ShiftAmount,
+                                          bool ImplicitAmount,
+                                          SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E);
+    Op->ImmWithLSL.Val = Val;
+    Op->ImmWithLSL.ShiftAmount = ShiftAmount;
+    Op->ImmWithLSL.ImplicitAmount = ImplicitAmount;
+    return Op;
+  }
+
+  static AArch64Operand *CreateCondCode(A64CC::CondCodes Code,
+                                        SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E);
+    Op->CondCode.Code = Code;
+    return Op;
+  }
+
+  static AArch64Operand *CreateFPImm(double Val,
+                                     SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E);
+    Op->FPImm.Val = Val;
+    return Op;
+  }
+
+  static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E);
+    Op->Imm.Val = Val;
+    return Op;
+  }
+
+  static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_Register, S, E);
+    Op->Reg.RegNum = RegNum;
+    return Op;
+  }
+
+  static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E);
+    Op->Reg.RegNum = RegNum;
+    return Op;
+  }
+
+  static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp,
+                                           unsigned Amount,
+                                           bool ImplicitAmount,
+                                           SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E);
+    Op->ShiftExtend.ShiftType = ShiftTyp;
+    Op->ShiftExtend.Amount = Amount;
+    Op->ShiftExtend.ImplicitAmount = ImplicitAmount;
+    return Op;
+  }
+
+  static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) {
+    AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    return Op;
+  }
+
+  static AArch64Operand *CreateToken(StringRef Str, SMLoc S) {
+    AArch64Operand *Op = new AArch64Operand(k_Token, S, S);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    return Op;
+  }
+
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  template<unsigned RegWidth>
+  void addBFILSBOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth;
+    Inst.addOperand(MCOperand::CreateImm(EncodedVal));
+  }
+
+  void addBFIWidthOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+  }
+
+  void addBFXWidthOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm();
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+
+    Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1));
+  }
+
+  void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getCondCode()));
+  }
+
+  void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue()));
+  }
+
+  void addFMOVImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    APFloat RealVal(FPImm.Val);
+    uint32_t ImmVal;
+    A64Imms::isFPImm(RealVal, ImmVal);
+
+    Inst.addOperand(MCOperand::CreateImm(ImmVal));
+  }
+
+  void addFPZeroOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands");
+    Inst.addOperand(MCOperand::CreateImm(0));
+  }
+
+  void addInvCondCodeOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    unsigned Encoded = A64InvertCondCode(getCondCode());
+    Inst.addOperand(MCOperand::CreateImm(Encoded));
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  template<int MemSize>
+  void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    uint64_t Val = CE->getValue() / MemSize;
+    Inst.addOperand(MCOperand::CreateImm(Val  & 0x7f));
+  }
+
+  template<int BitWidth>
+  void addSImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    uint64_t Val = CE->getValue();
+    Inst.addOperand(MCOperand::CreateImm(Val  & ((1ULL << BitWidth) - 1)));
+  }
+
+  void addImmWithLSLOperands(MCInst &Inst, unsigned N) const {
+    assert (N == 1 && "Invalid number of operands!");
+
+    addExpr(Inst, ImmWithLSL.Val);
+  }
+
+  template<unsigned field_width, unsigned scale>
+  void addLabelOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+
+    if (!CE) {
+      addExpr(Inst, Imm.Val);
+      return;
+    }
+
+    int64_t Val = CE->getValue();
+    assert(Val % scale == 0 && "Unaligned immediate in instruction");
+    Val /= scale;
+
+    Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1)));
+  }
+
+  template<int MemSize>
+  void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize));
+    } else {
+      Inst.addOperand(MCOperand::CreateExpr(getImm()));
+    }
+  }
+
+  template<unsigned RegWidth>
+  void addLogicalImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands");
+    const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
+
+    uint32_t Bits;
+    A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+
+    Inst.addOperand(MCOperand::CreateImm(Bits));
+  }
+
+  void addMRSOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    bool Valid;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid);
+
+    Inst.addOperand(MCOperand::CreateImm(Bits));
+  }
+
+  void addMSRWithRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    bool Valid;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid);
+
+    Inst.addOperand(MCOperand::CreateImm(Bits));
+  }
+
+  void addMSRPStateOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    bool Valid;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid);
+
+    Inst.addOperand(MCOperand::CreateImm(Bits));
+  }
+
+  void addMoveWideImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+
+    addExpr(Inst, ImmWithLSL.Val);
+
+    AArch64MCExpr::VariantKind Variant;
+    if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+      Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16));
+      return;
+    }
+
+    // We know it's relocated
+    switch (Variant) {
+    case AArch64MCExpr::VK_AARCH64_ABS_G0:
+    case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
+    case AArch64MCExpr::VK_AARCH64_SABS_G0:
+    case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+    case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
+    case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
+    case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+    case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
+      Inst.addOperand(MCOperand::CreateImm(0));
+      break;
+    case AArch64MCExpr::VK_AARCH64_ABS_G1:
+    case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
+    case AArch64MCExpr::VK_AARCH64_SABS_G1:
+    case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+    case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
+    case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+    case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+    case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
+      Inst.addOperand(MCOperand::CreateImm(1));
+      break;
+    case AArch64MCExpr::VK_AARCH64_ABS_G2:
+    case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
+    case AArch64MCExpr::VK_AARCH64_SABS_G2:
+    case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+    case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+      Inst.addOperand(MCOperand::CreateImm(2));
+      break;
+    case AArch64MCExpr::VK_AARCH64_ABS_G3:
+      Inst.addOperand(MCOperand::CreateImm(3));
+      break;
+    default: llvm_unreachable("Inappropriate move wide relocation");
+    }
+  }
+
+  template<int RegWidth, bool isValidImm(int, uint64_t, int&, int&)>
+  void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    int UImm16, Shift;
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    uint64_t Value = CE->getValue();
+
+    if (RegWidth == 32) {
+      Value &= 0xffffffffULL;
+    }
+
+    bool Valid = isValidImm(RegWidth, Value, UImm16, Shift);
+    (void)Valid;
+    assert(Valid && "Invalid immediates should have been weeded out by now");
+
+    Inst.addOperand(MCOperand::CreateImm(UImm16));
+    Inst.addOperand(MCOperand::CreateImm(Shift));
+  }
+
+  void addPRFMOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    assert(CE->getValue() >= 0 && CE->getValue() <= 31
+           && "PRFM operand should be 5-bits");
+
+    Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+  }
+
+  // For Add-sub (extended register) operands.
+  void addRegExtendOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+  }
+
+  // For the extend in load-store (register offset) instructions.
+  template<unsigned MemSize>
+  void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const {
+    addAddrRegExtendOperands(Inst, N, MemSize);
+  }
+
+  void addAddrRegExtendOperands(MCInst &Inst, unsigned N,
+                                unsigned MemSize) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    // First bit of Option is set in instruction classes, the high two bits are
+    // as follows:
+    unsigned OptionHi = 0;
+    switch (ShiftExtend.ShiftType) {
+    case A64SE::UXTW:
+    case A64SE::LSL:
+      OptionHi = 1;
+      break;
+    case A64SE::SXTW:
+    case A64SE::SXTX:
+      OptionHi = 3;
+      break;
+    default:
+      llvm_unreachable("Invalid extend type for register offset");
+    }
+
+    unsigned S = 0;
+    if (MemSize == 1 && !ShiftExtend.ImplicitAmount)
+      S = 1;
+    else if (MemSize != 1 && ShiftExtend.Amount != 0)
+      S = 1;
+
+    Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S));
+  }
+  void addShiftOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+  }
+};
+
+} // end anonymous namespace.
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               StringRef Mnemonic) {
+
+  // See if the operand has a custom parser
+  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+
+  // It could either succeed, fail or just not care.
+  if (ResTy != MatchOperand_NoMatch)
+    return ResTy;
+
+  switch (getLexer().getKind()) {
+  default:
+    Error(Parser.getTok().getLoc(), "unexpected token in operand");
+    return MatchOperand_ParseFail;
+  case AsmToken::Identifier: {
+    // It might be in the LSL/UXTB family ...
+    OperandMatchResultTy GotShift = ParseShiftExtend(Operands);
+
+    // We can only continue if no tokens were eaten.
+    if (GotShift != MatchOperand_NoMatch)
+      return GotShift;
+
+    // ... or it might be a register ...
+    uint32_t NumLanes = 0;
+    OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes);
+    assert(GotReg != MatchOperand_ParseFail
+           && "register parsing shouldn't partially succeed");
+
+    if (GotReg == MatchOperand_Success) {
+      if (Parser.getTok().is(AsmToken::LBrac))
+        return ParseNEONLane(Operands, NumLanes);
+      else
+        return MatchOperand_Success;
+    }
+
+    // ... or it might be a symbolish thing
+  }
+    // Fall through
+  case AsmToken::LParen:  // E.g. (strcmp-4)
+  case AsmToken::Integer: // 1f, 2b labels
+  case AsmToken::String:  // quoted labels
+  case AsmToken::Dot:     // . is Current location
+  case AsmToken::Dollar:  // $ is PC
+  case AsmToken::Colon: {
+    SMLoc StartLoc  = Parser.getTok().getLoc();
+    SMLoc EndLoc;
+    const MCExpr *ImmVal = 0;
+
+    if (ParseImmediate(ImmVal) != MatchOperand_Success)
+      return MatchOperand_ParseFail;
+
+    EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
+    return MatchOperand_Success;
+  }
+  case AsmToken::Hash: {   // Immediates
+    SMLoc StartLoc = Parser.getTok().getLoc();
+    SMLoc EndLoc;
+    const MCExpr *ImmVal = 0;
+    Parser.Lex();
+
+    if (ParseImmediate(ImmVal) != MatchOperand_Success)
+      return MatchOperand_ParseFail;
+
+    EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
+    return MatchOperand_Success;
+  }
+  case AsmToken::LBrac: {
+    SMLoc Loc = Parser.getTok().getLoc();
+    Operands.push_back(AArch64Operand::CreateToken("[", Loc));
+    Parser.Lex(); // Eat '['
+
+    // There's no comma after a '[', so we can parse the next operand
+    // immediately.
+    return ParseOperand(Operands, Mnemonic);
+  }
+  // The following will likely be useful later, but not in very early cases
+  case AsmToken::LCurly:  // Weird SIMD lists
+    llvm_unreachable("Don't know how to deal with '{' in operand");
+    return MatchOperand_ParseFail;
+  }
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) {
+  if (getLexer().is(AsmToken::Colon)) {
+    AArch64MCExpr::VariantKind RefKind;
+
+    OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind);
+    if (ResTy != MatchOperand_Success)
+      return ResTy;
+
+    const MCExpr *SubExprVal;
+    if (getParser().ParseExpression(SubExprVal))
+      return MatchOperand_ParseFail;
+
+    ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext());
+    return MatchOperand_Success;
+  }
+
+  // No weird AArch64MCExpr prefix
+  return getParser().ParseExpression(ExprVal)
+    ? MatchOperand_ParseFail : MatchOperand_Success;
+}
+
+// A lane attached to a NEON register. "[N]", which should yield three tokens:
+// '[', N, ']'. A hash is not allowed to precede the immediate here.
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                                uint32_t NumLanes) {
+  SMLoc Loc = Parser.getTok().getLoc();
+
+  assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand");
+  Operands.push_back(AArch64Operand::CreateToken("[", Loc));
+  Parser.Lex(); // Eat '['
+
+  if (Parser.getTok().isNot(AsmToken::Integer)) {
+    Error(Parser.getTok().getLoc(), "expected lane number");
+    return MatchOperand_ParseFail;
+  }
+
+  if (Parser.getTok().getIntVal() >= NumLanes) {
+    Error(Parser.getTok().getLoc(), "lane number incompatible with layout");
+    return MatchOperand_ParseFail;
+  }
+
+  const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(),
+                                              getContext());
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat actual lane
+  SMLoc E = Parser.getTok().getLoc();
+  Operands.push_back(AArch64Operand::CreateImm(Lane, S, E));
+
+
+  if (Parser.getTok().isNot(AsmToken::RBrac)) {
+    Error(Parser.getTok().getLoc(), "expected ']' after lane");
+    return MatchOperand_ParseFail;
+  }
+
+  Operands.push_back(AArch64Operand::CreateToken("]", Loc));
+  Parser.Lex(); // Eat ']'
+
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) {
+  assert(getLexer().is(AsmToken::Colon) && "expected a ':'");
+  Parser.Lex();
+
+  if (getLexer().isNot(AsmToken::Identifier)) {
+    Error(Parser.getTok().getLoc(),
+          "expected relocation specifier in operand after ':'");
+    return MatchOperand_ParseFail;
+  }
+
+  StringRef lowerCase = Parser.getTok().getIdentifier().lower();
+  RefKind = StringSwitch<AArch64MCExpr::VariantKind>(lowerCase)
+    .Case("got",              AArch64MCExpr::VK_AARCH64_GOT)
+    .Case("got_lo12",         AArch64MCExpr::VK_AARCH64_GOT_LO12)
+    .Case("lo12",             AArch64MCExpr::VK_AARCH64_LO12)
+    .Case("abs_g0",           AArch64MCExpr::VK_AARCH64_ABS_G0)
+    .Case("abs_g0_nc",        AArch64MCExpr::VK_AARCH64_ABS_G0_NC)
+    .Case("abs_g1",           AArch64MCExpr::VK_AARCH64_ABS_G1)
+    .Case("abs_g1_nc",        AArch64MCExpr::VK_AARCH64_ABS_G1_NC)
+    .Case("abs_g2",           AArch64MCExpr::VK_AARCH64_ABS_G2)
+    .Case("abs_g2_nc",        AArch64MCExpr::VK_AARCH64_ABS_G2_NC)
+    .Case("abs_g3",           AArch64MCExpr::VK_AARCH64_ABS_G3)
+    .Case("abs_g0_s",         AArch64MCExpr::VK_AARCH64_SABS_G0)
+    .Case("abs_g1_s",         AArch64MCExpr::VK_AARCH64_SABS_G1)
+    .Case("abs_g2_s",         AArch64MCExpr::VK_AARCH64_SABS_G2)
+    .Case("dtprel_g2",        AArch64MCExpr::VK_AARCH64_DTPREL_G2)
+    .Case("dtprel_g1",        AArch64MCExpr::VK_AARCH64_DTPREL_G1)
+    .Case("dtprel_g1_nc",     AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC)
+    .Case("dtprel_g0",        AArch64MCExpr::VK_AARCH64_DTPREL_G0)
+    .Case("dtprel_g0_nc",     AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC)
+    .Case("dtprel_hi12",      AArch64MCExpr::VK_AARCH64_DTPREL_HI12)
+    .Case("dtprel_lo12",      AArch64MCExpr::VK_AARCH64_DTPREL_LO12)
+    .Case("dtprel_lo12_nc",   AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC)
+    .Case("gottprel_g1",      AArch64MCExpr::VK_AARCH64_GOTTPREL_G1)
+    .Case("gottprel_g0_nc",   AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC)
+    .Case("gottprel",         AArch64MCExpr::VK_AARCH64_GOTTPREL)
+    .Case("gottprel_lo12",    AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12)
+    .Case("tprel_g2",         AArch64MCExpr::VK_AARCH64_TPREL_G2)
+    .Case("tprel_g1",         AArch64MCExpr::VK_AARCH64_TPREL_G1)
+    .Case("tprel_g1_nc",      AArch64MCExpr::VK_AARCH64_TPREL_G1_NC)
+    .Case("tprel_g0",         AArch64MCExpr::VK_AARCH64_TPREL_G0)
+    .Case("tprel_g0_nc",      AArch64MCExpr::VK_AARCH64_TPREL_G0_NC)
+    .Case("tprel_hi12",       AArch64MCExpr::VK_AARCH64_TPREL_HI12)
+    .Case("tprel_lo12",       AArch64MCExpr::VK_AARCH64_TPREL_LO12)
+    .Case("tprel_lo12_nc",    AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC)
+    .Case("tlsdesc",          AArch64MCExpr::VK_AARCH64_TLSDESC)
+    .Case("tlsdesc_lo12",     AArch64MCExpr::VK_AARCH64_TLSDESC_LO12)
+    .Default(AArch64MCExpr::VK_AARCH64_None);
+
+  if (RefKind == AArch64MCExpr::VK_AARCH64_None) {
+    Error(Parser.getTok().getLoc(),
+          "expected relocation specifier in operand after ':'");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex(); // Eat identifier
+
+  if (getLexer().isNot(AsmToken::Colon)) {
+    Error(Parser.getTok().getLoc(),
+          "expected ':' after relocation specifier");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex();
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseImmWithLSLOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // FIXME?: I want to live in a world where immediates must start with
+  // #. Please don't dash my hopes (well, do if you have a good reason).
+  if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch;
+
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat '#'
+
+  const MCExpr *Imm;
+  if (ParseImmediate(Imm) != MatchOperand_Success)
+    return MatchOperand_ParseFail;
+  else if (Parser.getTok().isNot(AsmToken::Comma)) {
+    SMLoc E = Parser.getTok().getLoc();
+    Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E));
+    return MatchOperand_Success;
+  }
+
+  // Eat ','
+  Parser.Lex();
+
+  // The optional operand must be "lsl #N" where N is non-negative.
+  if (Parser.getTok().is(AsmToken::Identifier)
+      && Parser.getTok().getIdentifier().lower() == "lsl") {
+    Parser.Lex();
+
+    if (Parser.getTok().is(AsmToken::Hash)) {
+      Parser.Lex();
+
+      if (Parser.getTok().isNot(AsmToken::Integer)) {
+        Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate");
+        return MatchOperand_ParseFail;
+      }
+    }
+  }
+
+  int64_t ShiftAmount = Parser.getTok().getIntVal();
+
+  if (ShiftAmount < 0) {
+    Error(Parser.getTok().getLoc(), "positive shift amount required");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex(); // Eat the number
+
+  SMLoc E = Parser.getTok().getLoc();
+  Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount,
+                                                      false, S, E));
+  return MatchOperand_Success;
+}
+
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseCondCodeOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  if (Parser.getTok().isNot(AsmToken::Identifier))
+    return MatchOperand_NoMatch;
+
+  StringRef Tok = Parser.getTok().getIdentifier();
+  A64CC::CondCodes CondCode = A64StringToCondCode(Tok);
+
+  if (CondCode == A64CC::Invalid)
+    return MatchOperand_NoMatch;
+
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat condition code
+  SMLoc E = Parser.getTok().getLoc();
+
+  Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E));
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseCRxOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  if (Parser.getTok().isNot(AsmToken::Identifier)) {
+    Error(S, "Expected cN operand where 0 <= N <= 15");
+    return MatchOperand_ParseFail;
+  }
+
+  StringRef Tok = Parser.getTok().getIdentifier().lower();
+  if (Tok[0] != 'c') {
+    Error(S, "Expected cN operand where 0 <= N <= 15");
+    return MatchOperand_ParseFail;
+  }
+
+  uint32_t CRNum;
+  bool BadNum = Tok.drop_front().getAsInteger(10, CRNum);
+  if (BadNum || CRNum > 15) {
+    Error(S, "Expected cN operand where 0 <= N <= 15");
+    return MatchOperand_ParseFail;
+  }
+
+  const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext());
+
+  Parser.Lex();
+  SMLoc E = Parser.getTok().getLoc();
+
+  Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E));
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseFPImmOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+  // FIXME?: I want to live in a world where immediates must start with
+  // #. Please don't dash my hopes (well, do if you have a good reason).
+  if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch;
+
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat '#'
+
+  bool Negative = false;
+  if (Parser.getTok().is(AsmToken::Minus)) {
+    Negative = true;
+    Parser.Lex(); // Eat '-'
+  } else if (Parser.getTok().is(AsmToken::Plus)) {
+    Parser.Lex(); // Eat '+'
+  }
+
+  if (Parser.getTok().isNot(AsmToken::Real)) {
+    Error(S, "Expected floating-point immediate");
+    return MatchOperand_ParseFail;
+  }
+
+  APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString());
+  if (Negative) RealVal.changeSign();
+  double DblVal = RealVal.convertToDouble();
+
+  Parser.Lex(); // Eat real number
+  SMLoc E = Parser.getTok().getLoc();
+
+  Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E));
+  return MatchOperand_Success;
+}
+
+
+// Automatically generated
+static unsigned MatchRegisterName(StringRef Name);
+
+bool
+AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
+                                   StringRef &Layout,
+                                   SMLoc &LayoutLoc) const {
+  const AsmToken &Tok = Parser.getTok();
+
+  if (Tok.isNot(AsmToken::Identifier))
+    return false;
+
+  std::string LowerReg = Tok.getString().lower();
+  size_t DotPos = LowerReg.find('.');
+
+  RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
+  if (RegNum == AArch64::NoRegister) {
+    RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
+      .Case("ip0", AArch64::X16)
+      .Case("ip1", AArch64::X17)
+      .Case("fp", AArch64::X29)
+      .Case("lr", AArch64::X30)
+      .Default(AArch64::NoRegister);
+  }
+  if (RegNum == AArch64::NoRegister)
+    return false;
+
+  SMLoc S = Tok.getLoc();
+  RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos);
+
+  if (DotPos == StringRef::npos) {
+    Layout = StringRef();
+  } else {
+    // Everything afterwards needs to be a literal token, expected to be
+    // '.2d','.b' etc for vector registers.
+
+    // This StringSwitch validates the input and (perhaps more importantly)
+    // gives us a permanent string to use in the token (a pointer into LowerReg
+    // would go out of scope when we return).
+    LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1);
+    Layout = LowerReg.substr(DotPos, StringRef::npos);
+    Layout = StringSwitch<const char *>(Layout)
+      .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d")
+      .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s")
+      .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h")
+      .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b")
+      .Default("");
+
+    if (Layout.size() == 0) {
+      // Malformed register
+      return false;
+    }
+  }
+
+  return true;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                                uint32_t &NumLanes) {
+  unsigned RegNum;
+  StringRef Layout;
+  SMLoc RegEndLoc, LayoutLoc;
+  SMLoc S = Parser.getTok().getLoc();
+
+  if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc))
+    return MatchOperand_NoMatch;
+
+  Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc));
+
+  if (Layout.size() != 0) {
+    unsigned long long TmpLanes = 0;
+    llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes);
+    if (TmpLanes != 0) {
+      NumLanes = TmpLanes;
+    } else {
+      // If the number of lanes isn't specified explicitly, a valid instruction
+      // will have an element specifier and be capable of acting on the entire
+      // vector register.
+      switch (Layout.back()) {
+      default: llvm_unreachable("Invalid layout specifier");
+      case 'b': NumLanes = 16; break;
+      case 'h': NumLanes = 8; break;
+      case 's': NumLanes = 4; break;
+      case 'd': NumLanes = 2; break;
+      }
+    }
+
+    Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc));
+  }
+
+  Parser.Lex();
+  return MatchOperand_Success;
+}
+
+bool
+AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                                SMLoc &EndLoc) {
+  // This callback is used for things like DWARF frame directives in
+  // assembly. They don't care about things like NEON layouts or lanes, they
+  // just want to be able to produce the DWARF register number.
+  StringRef LayoutSpec;
+  SMLoc RegEndLoc, LayoutLoc;
+  StartLoc = Parser.getTok().getLoc();
+
+  if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc))
+    return true;
+
+  Parser.Lex();
+  EndLoc = Parser.getTok().getLoc();
+
+  return false;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Since these operands occur in very limited circumstances, without
+  // alternatives, we actually signal an error if there is no match. If relaxing
+  // this, beware of unintended consequences: an immediate will be accepted
+  // during matching, no matter how it gets into the AArch64Operand.
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc S = Tok.getLoc();
+
+  if (Tok.is(AsmToken::Identifier)) {
+    bool ValidName;
+    uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName);
+
+    if (!ValidName) {
+      Error(S, "operand specifier not recognised");
+      return MatchOperand_ParseFail;
+    }
+
+    Parser.Lex(); // We're done with the identifier. Eat it
+
+    SMLoc E = Parser.getTok().getLoc();
+    const MCExpr *Imm = MCConstantExpr::Create(Code, getContext());
+    Operands.push_back(AArch64Operand::CreateImm(Imm, S, E));
+    return MatchOperand_Success;
+  } else if (Tok.is(AsmToken::Hash)) {
+    Parser.Lex();
+
+    const MCExpr *ImmVal;
+    if (ParseImmediate(ImmVal) != MatchOperand_Success)
+      return MatchOperand_ParseFail;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
+    if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) {
+      Error(S, "Invalid immediate for instruction");
+      return MatchOperand_ParseFail;
+    }
+
+    SMLoc E = Parser.getTok().getLoc();
+    Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E));
+    return MatchOperand_Success;
+  }
+
+  Error(S, "unexpected operand for instruction");
+  return MatchOperand_ParseFail;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseSysRegOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  const AsmToken &Tok = Parser.getTok();
+
+  // Any MSR/MRS operand will be an identifier, and we want to store it as some
+  // kind of string: SPSel is valid for two different forms of MSR with two
+  // different encodings. There's no collision at the moment, but the potential
+  // is there.
+  if (!Tok.is(AsmToken::Identifier)) {
+    return MatchOperand_NoMatch;
+  }
+
+  SMLoc S = Tok.getLoc();
+  Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S));
+  Parser.Lex(); // Eat identifier
+
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseLSXAddressOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+
+  unsigned RegNum;
+  SMLoc RegEndLoc, LayoutLoc;
+  StringRef Layout;
+  if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)
+     || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum)
+     || Layout.size() != 0) {
+    // Check Layout.size because we don't want to let "x3.4s" or similar
+    // through.
+    return MatchOperand_NoMatch;
+  }
+  Parser.Lex(); // Eat register
+
+  if (Parser.getTok().is(AsmToken::RBrac)) {
+    // We're done
+    SMLoc E = Parser.getTok().getLoc();
+    Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
+    return MatchOperand_Success;
+  }
+
+  // Otherwise, only ", #0" is valid
+
+  if (Parser.getTok().isNot(AsmToken::Comma)) {
+    Error(Parser.getTok().getLoc(), "expected ',' or ']' after register");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex(); // Eat ','
+
+  if (Parser.getTok().isNot(AsmToken::Hash)) {
+    Error(Parser.getTok().getLoc(), "expected '#0'");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex(); // Eat '#'
+
+  if (Parser.getTok().isNot(AsmToken::Integer)
+      || Parser.getTok().getIntVal() != 0 ) {
+    Error(Parser.getTok().getLoc(), "expected '#0'");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex(); // Eat '0'
+
+  SMLoc E = Parser.getTok().getLoc();
+  Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseShiftExtend(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  StringRef IDVal = Parser.getTok().getIdentifier();
+  std::string LowerID = IDVal.lower();
+
+  A64SE::ShiftExtSpecifiers Spec =
+    StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID)
+      .Case("lsl", A64SE::LSL)
+      .Case("lsr", A64SE::LSR)
+      .Case("asr", A64SE::ASR)
+      .Case("ror", A64SE::ROR)
+      .Case("uxtb", A64SE::UXTB)
+      .Case("uxth", A64SE::UXTH)
+      .Case("uxtw", A64SE::UXTW)
+      .Case("uxtx", A64SE::UXTX)
+      .Case("sxtb", A64SE::SXTB)
+      .Case("sxth", A64SE::SXTH)
+      .Case("sxtw", A64SE::SXTW)
+      .Case("sxtx", A64SE::SXTX)
+      .Default(A64SE::Invalid);
+
+  if (Spec == A64SE::Invalid)
+    return MatchOperand_NoMatch;
+
+  // Eat the shift
+  SMLoc S, E;
+  S = Parser.getTok().getLoc();
+  Parser.Lex();
+
+  if (Spec != A64SE::LSL && Spec != A64SE::LSR &&
+      Spec != A64SE::ASR && Spec != A64SE::ROR) {
+    // The shift amount can be omitted for the extending versions, but not real
+    // shifts:
+    //     add x0, x0, x0, uxtb
+    // is valid, and equivalent to
+    //     add x0, x0, x0, uxtb #0
+
+    if (Parser.getTok().is(AsmToken::Comma) ||
+        Parser.getTok().is(AsmToken::EndOfStatement) ||
+        Parser.getTok().is(AsmToken::RBrac)) {
+      Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true, S, E));
+      return MatchOperand_Success;
+    }
+  }
+
+  // Eat # at beginning of immediate
+  if (!Parser.getTok().is(AsmToken::Hash)) {
+    Error(Parser.getTok().getLoc(),
+          "expected #imm after shift specifier");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex();
+
+  // Make sure we do actually have a number
+  if (!Parser.getTok().is(AsmToken::Integer)) {
+    Error(Parser.getTok().getLoc(),
+          "expected integer shift amount");
+    return MatchOperand_ParseFail;
+  }
+  unsigned Amount = Parser.getTok().getIntVal();
+  Parser.Lex();
+  E = Parser.getTok().getLoc();
+
+  Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false, S, E));
+
+  return MatchOperand_Success;
+}
+
+// FIXME: We would really like to be able to tablegen'erate this.
+bool AArch64AsmParser::
+validateInstruction(MCInst &Inst,
+                    const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  switch (Inst.getOpcode()) {
+  case AArch64::BFIwwii:
+  case AArch64::BFIxxii:
+  case AArch64::SBFIZwwii:
+  case AArch64::SBFIZxxii:
+  case AArch64::UBFIZwwii:
+  case AArch64::UBFIZxxii:  {
+    unsigned ImmOps = Inst.getNumOperands() - 2;
+    int64_t ImmR = Inst.getOperand(ImmOps).getImm();
+    int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
+
+    if (ImmR == 0) {
+      // Bitfield inserts are preferred disassembly if ImmS < ImmR. However,
+      // there is this one case where insert is valid syntax but the bfx
+      // disassembly should be used: e.g. "sbfiz w0, w0, #0, #1".
+      return false;
+    } else if (ImmS >= ImmR) {
+      return Error(Operands[4]->getStartLoc(),
+                   "requested insert overflows register");
+    }
+    return false;
+  }
+  case AArch64::BFXILwwii:
+  case AArch64::BFXILxxii:
+  case AArch64::SBFXwwii:
+  case AArch64::SBFXxxii:
+  case AArch64::UBFXwwii:
+  case AArch64::UBFXxxii: {
+    unsigned ImmOps = Inst.getNumOperands() - 2;
+    int64_t ImmR = Inst.getOperand(ImmOps).getImm();
+    int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
+    int64_t RegWidth = 0;
+    switch (Inst.getOpcode()) {
+    case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii:
+      RegWidth = 64;
+      break;
+    case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii:
+      RegWidth = 32;
+      break;
+    }
+
+    if (ImmS >= RegWidth || ImmS < ImmR) {
+      return Error(Operands[4]->getStartLoc(),
+                   "requested extract overflows register");
+    }
+    return false;
+  }
+  case AArch64::ICix: {
+    int64_t ImmVal = Inst.getOperand(0).getImm();
+    A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
+    if (!A64IC::NeedsRegister(ICOp)) {
+      return Error(Operands[1]->getStartLoc(),
+                   "specified IC op does not use a register");
+    }
+    return false;
+  }
+  case AArch64::ICi: {
+    int64_t ImmVal = Inst.getOperand(0).getImm();
+    A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
+    if (A64IC::NeedsRegister(ICOp)) {
+      return Error(Operands[1]->getStartLoc(),
+                   "specified IC op requires a register");
+    }
+    return false;
+  }
+  case AArch64::TLBIix: {
+    int64_t ImmVal = Inst.getOperand(0).getImm();
+    A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
+    if (!A64TLBI::NeedsRegister(TLBIOp)) {
+      return Error(Operands[1]->getStartLoc(),
+                   "specified TLBI op does not use a register");
+    }
+    return false;
+  }
+  case AArch64::TLBIi: {
+    int64_t ImmVal = Inst.getOperand(0).getImm();
+    A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
+    if (A64TLBI::NeedsRegister(TLBIOp)) {
+      return Error(Operands[1]->getStartLoc(),
+                   "specified TLBI op requires a register");
+    }
+    return false;
+  }
+  }
+
+  return false;
+}
+
+
+// Parses the instruction *together with* all operands, appending each parsed
+// operand to the "Operands" list
+bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+                                        StringRef Name, SMLoc NameLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  size_t CondCodePos = Name.find('.');
+
+  StringRef Mnemonic = Name.substr(0, CondCodePos);
+  Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc));
+
+  if (CondCodePos != StringRef::npos) {
+    // We have a condition code
+    SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1);
+    StringRef CondStr = Name.substr(CondCodePos + 1, StringRef::npos);
+    A64CC::CondCodes Code;
+
+    Code = A64StringToCondCode(CondStr);
+
+    if (Code == A64CC::Invalid) {
+      Error(S, "invalid condition code");
+      Parser.EatToEndOfStatement();
+      return true;
+    }
+
+    SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos);
+
+    Operands.push_back(AArch64Operand::CreateToken(".",  DotL));
+    SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3);
+    Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E));
+  }
+
+  // Now we parse the operands of this instruction
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    // Read the first operand.
+    if (ParseOperand(Operands, Mnemonic)) {
+      Parser.EatToEndOfStatement();
+      return true;
+    }
+
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex();  // Eat the comma.
+
+      // Parse and remember the operand.
+      if (ParseOperand(Operands, Mnemonic)) {
+        Parser.EatToEndOfStatement();
+        return true;
+      }
+
+
+      // After successfully parsing some operands there are two special cases to
+      // consider (i.e. notional operands not separated by commas). Both are due
+      // to memory specifiers:
+      //  + An RBrac will end an address for load/store/prefetch
+      //  + An '!' will indicate a pre-indexed operation.
+      //
+      // It's someone else's responsibility to make sure these tokens are sane
+      // in the given context!
+      if (Parser.getTok().is(AsmToken::RBrac)) {
+        SMLoc Loc = Parser.getTok().getLoc();
+        Operands.push_back(AArch64Operand::CreateToken("]", Loc));
+        Parser.Lex();
+      }
+
+      if (Parser.getTok().is(AsmToken::Exclaim)) {
+        SMLoc Loc = Parser.getTok().getLoc();
+        Operands.push_back(AArch64Operand::CreateToken("!", Loc));
+        Parser.Lex();
+      }
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    SMLoc Loc = getLexer().getLoc();
+    Parser.EatToEndOfStatement();
+    return Error(Loc, "");
+  }
+
+  // Eat the EndOfStatement
+  Parser.Lex();
+
+  return false;
+}
+
+bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".hword")
+    return ParseDirectiveWord(2, DirectiveID.getLoc());
+  else if (IDVal == ".word")
+    return ParseDirectiveWord(4, DirectiveID.getLoc());
+  else if (IDVal == ".xword")
+    return ParseDirectiveWord(8, DirectiveID.getLoc());
+  else if (IDVal == ".tlsdesccall")
+    return ParseDirectiveTLSDescCall(DirectiveID.getLoc());
+
+  return true;
+}
+
+/// parseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().ParseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
+// parseDirectiveTLSDescCall:
+//   ::= .tlsdesccall symbol
+bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return Error(L, "expected symbol after directive");
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
+
+  MCInst Inst;
+  Inst.setOpcode(AArch64::TLSDESCCALL);
+  Inst.addOperand(MCOperand::CreateExpr(Expr));
+
+  getParser().getStreamer().EmitInstruction(Inst);
+  return false;
+}
+
+
+bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                                 MCStreamer &Out, unsigned &ErrorInfo,
+                                 bool MatchingInlineAsm) {
+  MCInst Inst;
+  unsigned MatchResult;
+  MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, 
+                                     MatchingInlineAsm);
+  switch (MatchResult) {
+  default: break;
+  case Match_Success:
+    if (validateInstruction(Inst, Operands))
+      return true;
+
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  case Match_MnemonicFail:
+    return Error(IDLoc, "invalid instruction");
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+  return true;
+}
+
+void AArch64Operand::print(raw_ostream &OS) const {
+  switch (Kind) {
+  case k_CondCode:
+    OS << "<CondCode: " << CondCode.Code << ">";
+    break;
+  case k_FPImmediate:
+    OS << "<fpimm: " << FPImm.Val << ">";
+    break;
+  case k_ImmWithLSL:
+    OS << "<immwithlsl: imm=" << ImmWithLSL.Val
+       << ", shift=" << ImmWithLSL.ShiftAmount << ">";
+    break;
+  case k_Immediate:
+    getImm()->print(OS);
+    break;
+  case k_Register:
+    OS << "<register " << getReg() << '>';
+    break;
+  case k_Token:
+    OS << '\'' << getToken() << '\'';
+    break;
+  case k_ShiftExtend:
+    OS << "<shift: type=" << ShiftExtend.ShiftType
+       << ", amount=" << ShiftExtend.Amount << ">";
+    break;
+  case k_SysReg: {
+    StringRef Name(SysReg.Data, SysReg.Length);
+    OS << "<sysreg: " << Name << '>';
+    break;
+  }
+  default:
+    llvm_unreachable("No idea how to print this kind of operand");
+    break;
+  }
+}
+
+void AArch64Operand::dump() const {
+  print(errs());
+}
+
+
+/// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmParser() {
+  RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64Target);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "AArch64GenAsmMatcher.inc"
diff --git a/lib/Target/AArch64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt
new file mode 100644
index 0000000000..a018a0aa7b
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64AsmParser
+  AArch64AsmParser.cpp
+  )
+
+add_dependencies(LLVMAArch64AsmParser AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
new file mode 100644
index 0000000000..bd1fcaf1ff
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64AsmParser
+parent = AArch64
+required_libraries = AArch64Desc AArch64Info MC MCParser Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile
new file mode 100644
index 0000000000..56c9ef52ea
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64AsmParser
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
new file mode 100644
index 0000000000..a89861fe53
--- /dev/null
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -0,0 +1,35 @@
+set(LLVM_TARGET_DEFINITIONS AArch64.td)
+
+tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering)
+tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(AArch64CommonTableGen)
+
+add_llvm_target(AArch64CodeGen
+  AArch64AsmPrinter.cpp
+  AArch64ConstantIslandPass.cpp
+  AArch64FrameLowering.cpp
+  AArch64ISelDAGToDAG.cpp
+  AArch64ISelLowering.cpp
+  AArch64InstrInfo.cpp
+  AArch64MachineFunctionInfo.cpp
+  AArch64MCInstLower.cpp
+  AArch64RegisterInfo.cpp
+  AArch64SelectionDAGInfo.cpp
+  AArch64Subtarget.cpp
+  AArch64TargetMachine.cpp
+  AArch64TargetObjectFile.cpp
+  )
+
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
new file mode 100644
index 0000000000..e98285bc5c
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -0,0 +1,791 @@
+//===- AArch64Disassembler.cpp - Disassembler for AArch64/Thumb ISA -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+/// AArch64 disassembler for all AArch64 platforms.
+class AArch64Disassembler : public MCDisassembler {
+  const MCRegisterInfo *RegInfo;
+public:
+  /// Initializes the disassembler.
+  ///
+  AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info)
+    : MCDisassembler(STI), RegInfo(Info) {
+  }
+
+  ~AArch64Disassembler() {
+  }
+
+  /// See MCDisassembler.
+  DecodeStatus getInstruction(MCInst &instr,
+                              uint64_t &size,
+                              const MemoryObject &region,
+                              uint64_t address,
+                              raw_ostream &vStream,
+                              raw_ostream &cStream) const;
+
+  const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+};
+
+}
+
+// Forward-declarations used in the auto-generated files.
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                              uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                              uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+                                               unsigned OptionHiS,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+                                             unsigned FullImm,
+                                             uint64_t Address,
+                                             const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+                                            unsigned Bits,
+                                            uint64_t Address,
+                                            const void *Decoder);
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+                                           unsigned ShiftAmount,
+                                           uint64_t Address,
+                                           const void *Decoder);
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+                                            unsigned ShiftAmount,
+                                            uint64_t Address,
+                                            const void *Decoder);
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+                                              unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+                                                       unsigned Val,
+                                                       uint64_t Address,
+                                                       const void *Decoder);
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+                                          unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder);
+
+static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper,
+                                        llvm::MCInst &Inst,
+                                        unsigned Val,
+                                        uint64_t Address,
+                                        const void *Decoder);
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder);
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder);
+
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+                                                   unsigned Val,
+                                                   uint64_t Address,
+                                                   const void *Decoder);
+
+
+static bool Check(DecodeStatus &Out, DecodeStatus In);
+
+#include "AArch64GenDisassemblerTables.inc"
+#include "AArch64GenInstrInfo.inc"
+
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+  switch (In) {
+    case MCDisassembler::Success:
+      // Out stays the same.
+      return true;
+    case MCDisassembler::SoftFail:
+      Out = In;
+      return true;
+    case MCDisassembler::Fail:
+      Out = In;
+      return false;
+  }
+  llvm_unreachable("Invalid DecodeStatus!");
+}
+
+DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
+                                                 const MemoryObject &Region,
+                                                 uint64_t Address,
+                                                 raw_ostream &os,
+                                                 raw_ostream &cs) const {
+  CommentStream = &cs;
+
+  uint8_t bytes[4];
+
+  // We want to read exactly 4 bytes of data.
+  if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+    Size = 0;
+    return MCDisassembler::Fail;
+  }
+
+  // Encoded as a small-endian 32-bit word in the stream.
+  uint32_t insn = (bytes[3] << 24) |
+    (bytes[2] << 16) |
+    (bytes[1] <<  8) |
+    (bytes[0] <<  0);
+
+  // Calling the auto-generated decoder function.
+  DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address,
+                                          this, STI);
+  if (result != MCDisassembler::Fail) {
+    Size = 4;
+    return result;
+  }
+
+  MI.clear();
+  Size = 0;
+  return MCDisassembler::Fail;
+}
+
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+  const AArch64Disassembler *Dis = static_cast<const AArch64Disassembler*>(D);
+  return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo);
+}
+
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                        uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                         uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                         uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+                                               unsigned OptionHiS,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1},
+  // S}. Hence we want to check bit 1.
+  if (!(OptionHiS & 2))
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(OptionHiS));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be
+  // between 0 and 31.
+  if (Imm6Bits > 31)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32.
+  if (Imm6Bits < 32)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+  return MCDisassembler::Success;
+}
+
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+                                             unsigned FullImm,
+                                             uint64_t Address,
+                                             const void *Decoder) {
+  unsigned Imm16 = FullImm & 0xffff;
+  unsigned Shift = FullImm >> 16;
+
+  if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Imm16));
+  Inst.addOperand(MCOperand::CreateImm(Shift));
+  return MCDisassembler::Success;
+}
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+                                            unsigned Bits,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  uint64_t Imm;
+  if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm))
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Bits));
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+                                           unsigned ShiftAmount,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  // Only values 0-4 are valid for this 3-bit field
+  if (ShiftAmount > 4)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+                                            unsigned ShiftAmount,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  // Only values below 32 are valid for a 32-bit register
+  if (ShiftAmount > 31)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned ImmS = fieldFromInstruction(Insn, 10, 6);
+  unsigned ImmR = fieldFromInstruction(Insn, 16, 6);
+  unsigned SF = fieldFromInstruction(Insn, 31, 1);
+
+  // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise
+  // out assertions that it thinks should never be hit.
+  enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc;
+  Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2);
+
+  if (!SF) {
+    // ImmR and ImmS must be between 0 and 31 for 32-bit instructions.
+    if (ImmR > 31 || ImmS > 31)
+      return MCDisassembler::Fail;
+  }
+
+  if (SF) {
+    DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+    // BFM MCInsts use Rd as a source too.
+    if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+  } else {
+    DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+    // BFM MCInsts use Rd as a source too.
+    if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  // ASR and LSR have more specific patterns so they won't get here:
+  assert(!(ImmS == 31 && !SF && Opc != BFM) && "shift should have used auto decode");
+  assert(!(ImmS == 63 && SF && Opc != BFM) && "shift should have used auto decode");
+
+  // Extension instructions similarly:
+  if (Opc == SBFM && ImmR == 0) {
+    assert((ImmS != 7 && ImmS != 15) && "extension got here");
+    assert((ImmS != 31 || SF == 0) && "extension got here");
+  } else if (Opc == UBFM && ImmR == 0) {
+    assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here");
+  }
+
+  if (Opc == UBFM) {
+    // It might be a LSL instruction, which actually takes the shift amount
+    // itself as an MCInst operand.
+    if (SF && (ImmS + 1) % 64 == ImmR) {
+      Inst.setOpcode(AArch64::LSLxxi);
+      Inst.addOperand(MCOperand::CreateImm(63 - ImmS));
+      return MCDisassembler::Success;
+    } else if (!SF && (ImmS + 1) % 32 == ImmR) {
+      Inst.setOpcode(AArch64::LSLwwi);
+      Inst.addOperand(MCOperand::CreateImm(31 - ImmS));
+      return MCDisassembler::Success;
+    }
+  }
+
+  // Otherwise it's definitely either an extract or an insert depending on which
+  // of ImmR or ImmS is larger.
+  unsigned ExtractOp, InsertOp;
+  switch (Opc) {
+  default: llvm_unreachable("unexpected instruction trying to decode bitfield");
+  case SBFM:
+    ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii;
+    InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii;
+    break;
+  case BFM:
+    ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii;
+    InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii;
+    break;
+  case UBFM:
+    ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii;
+    InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii;
+    break;
+  }
+
+  // Otherwise it's a boring insert or extract
+  Inst.addOperand(MCOperand::CreateImm(ImmR));
+  Inst.addOperand(MCOperand::CreateImm(ImmS));
+
+
+  if (ImmS < ImmR)
+    Inst.setOpcode(InsertOp);
+  else
+    Inst.setOpcode(ExtractOp);
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder) {
+  // This decoder exists to add the dummy Lane operand to the MCInst, which must
+  // be 1 in assembly but has no other real manifestation.
+  unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned IsToVec = fieldFromInstruction(Insn, 16, 1);
+
+  if (IsToVec) {
+    DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+  } else {
+    DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  // Add the lane
+  Inst.addOperand(MCOperand::CreateImm(1));
+
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+                                              unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder) {
+  DecodeStatus Result = MCDisassembler::Success;
+  unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned Rt2 = fieldFromInstruction(Insn, 10, 5);
+  unsigned SImm7 = fieldFromInstruction(Insn, 15, 7);
+  unsigned L = fieldFromInstruction(Insn, 22, 1);
+  unsigned V = fieldFromInstruction(Insn, 26, 1);
+  unsigned Opc = fieldFromInstruction(Insn, 30, 2);
+
+  // Not an official name, but it turns out that bit 23 distinguishes indexed
+  // from non-indexed operations.
+  unsigned Indexed = fieldFromInstruction(Insn, 23, 1);
+
+  if (Indexed && L == 0) {
+    // The MCInst for an indexed store has an out operand and 4 ins:
+    //    Rn_wb, Rt, Rt2, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  // You shouldn't load to the same register twice in an instruction...
+  if (L && Rt == Rt2)
+    Result = MCDisassembler::SoftFail;
+
+  // ... or do any operation that writes-back to a transfer register. But note
+  // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different.
+  if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn))
+    Result = MCDisassembler::SoftFail;
+
+  // Exactly how we decode the MCInst's registers depends on the Opc and V
+  // fields of the instruction. These also obviously determine the size of the
+  // operation so we can fill in that information while we're at it.
+  if (V) {
+    // The instruction operates on the FP/SIMD registers
+    switch (Opc) {
+    default: return MCDisassembler::Fail;
+    case 0:
+      DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 1:
+      DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 2:
+      DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    }
+  } else {
+    switch (Opc) {
+    default: return MCDisassembler::Fail;
+    case 0:
+      DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 1:
+      assert(L && "unexpected \"store signed\" attempt");
+      DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 2:
+      DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    }
+  }
+
+  if (Indexed && L == 1) {
+    // The MCInst for an indexed load has 3 out operands and an 3 ins:
+    //    Rt, Rt2, Rn_wb, Rt2, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+
+  DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  Inst.addOperand(MCOperand::CreateImm(SImm7));
+
+  return Result;
+}
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+                                                       uint32_t Val,
+                                                       uint64_t Address,
+                                                       const void *Decoder) {
+  unsigned Rt = fieldFromInstruction(Val, 0, 5);
+  unsigned Rn = fieldFromInstruction(Val, 5, 5);
+  unsigned Rt2 = fieldFromInstruction(Val, 10, 5);
+  unsigned MemSize = fieldFromInstruction(Val, 30, 2);
+
+  DecodeStatus S = MCDisassembler::Success;
+  if (Rt == Rt2) S = MCDisassembler::SoftFail;
+
+  switch (MemSize) {
+    case 2:
+      if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder)))
+        return MCDisassembler::Fail;
+      if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder)))
+        return MCDisassembler::Fail;
+      break;
+    case 3:
+      if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder)))
+        return MCDisassembler::Fail;
+      if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder)))
+        return MCDisassembler::Fail;
+      break;
+    default:
+      llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction");
+  }
+
+  if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+                                          unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  SomeNamedImmMapper Mapper;
+  bool ValidNamed;
+  Mapper.toString(Val, ValidNamed);
+  if (ValidNamed || Mapper.validImm(Val)) {
+    Inst.addOperand(MCOperand::CreateImm(Val));
+    return MCDisassembler::Success;
+  }
+
+  return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+                                        llvm::MCInst &Inst,
+                                        unsigned Val,
+                                        uint64_t Address,
+                                        const void *Decoder) {
+  bool ValidNamed;
+  Mapper.toString(Val, ValidNamed);
+
+  Inst.addOperand(MCOperand::CreateImm(Val));
+
+  return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder) {
+  return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address,
+                             Decoder);
+}
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder) {
+  return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address,
+                             Decoder);
+}
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+                                                   unsigned Insn,
+                                                   uint64_t Address,
+                                                   const void *Decoder) {
+  unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned Imm9 = fieldFromInstruction(Insn, 12, 9);
+
+  unsigned Opc = fieldFromInstruction(Insn, 22, 2);
+  unsigned V = fieldFromInstruction(Insn, 26, 1);
+  unsigned Size = fieldFromInstruction(Insn, 30, 2);
+
+  if (Opc == 0 || (V == 1 && Opc == 2)) {
+    // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  if (V == 0 && (Opc == 2 || Size == 3)) {
+    DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+  } else if (V == 0) {
+    DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+  } else if (V == 1 && (Opc & 2)) {
+    DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+  } else {
+    switch (Size) {
+    case 0:
+      DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    case 1:
+      DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    case 2:
+      DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    case 3:
+      DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    }
+  }
+
+  if (Opc != 0 && (V != 1 || Opc != 2)) {
+    // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+  Inst.addOperand(MCOperand::CreateImm(Imm9));
+
+  // N.b. The official documentation says undpredictable if Rt == Rn, but this
+  // takes place at the architectural rather than encoding level:
+  //
+  // "STR xzr, [sp], #4" is perfectly valid.
+  if (V == 0 && Rt == Rn && Rn != 31)
+    return MCDisassembler::SoftFail;
+  else
+    return MCDisassembler::Success;
+}
+
+static MCDisassembler *createAArch64Disassembler(const Target &T,
+                                                 const MCSubtargetInfo &STI) {
+  return new AArch64Disassembler(STI, T.createMCRegInfo(""));
+}
+
+extern "C" void LLVMInitializeAArch64Disassembler() {
+  TargetRegistry::RegisterMCDisassembler(TheAArch64Target,
+                                         createAArch64Disassembler);
+}
+
+
diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt
new file mode 100644
index 0000000000..d4bd163dad
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Disassembler
+  AArch64Disassembler.cpp
+  )
+
+add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
new file mode 100644
index 0000000000..123eb3e504
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Disassembler
+parent = AArch64
+required_libraries = AArch64CodeGen AArch64Desc AArch64Info MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile
new file mode 100644
index 0000000000..5c861207f8
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Disassembler
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
new file mode 100644
index 0000000000..909810f4d9
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -0,0 +1,408 @@
+//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "AArch64GenAsmWriter.inc"
+
+static int64_t unpackSignedImm(int BitWidth, uint64_t Value) {
+  assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit");
+  if (Value & (1ULL <<  (BitWidth - 1)))
+    return static_cast<int64_t>(Value) - (1LL << BitWidth);
+  else
+    return Value;
+}
+
+AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
+                                       const MCInstrInfo &MII,
+                                       const MCRegisterInfo &MRI,
+                                       const MCSubtargetInfo &STI) :
+  MCInstPrinter(MAI, MII, MRI) {
+  // Initialize the set of available features.
+  setAvailableFeatures(STI.getFeatureBits());
+}
+
+void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << getRegisterName(RegNo);
+}
+
+void
+AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI,
+                                              unsigned OpNum, raw_ostream &O) {
+  const MCOperand &MOImm = MI->getOperand(OpNum);
+  int32_t Imm = unpackSignedImm(9, MOImm.getImm());
+
+  O << '#' << Imm;
+}
+
+void
+AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O, unsigned MemSize,
+                                          unsigned RmSize) {
+  unsigned ExtImm = MI->getOperand(OpNum).getImm();
+  unsigned OptionHi = ExtImm >> 1;
+  unsigned S = ExtImm & 1;
+  bool IsLSL = OptionHi == 1 && RmSize == 64;
+
+  const char *Ext;
+  switch (OptionHi) {
+  case 1:
+    Ext = (RmSize == 32) ? "uxtw" : "lsl";
+    break;
+  case 3:
+    Ext = (RmSize == 32) ? "sxtw" : "sxtx";
+    break;
+  default:
+    llvm_unreachable("Incorrect Option on load/store (reg offset)");
+  }
+  O << Ext;
+
+  if (S) {
+    unsigned ShiftAmt = Log2_32(MemSize);
+    O << " #" << ShiftAmt;
+  } else if (IsLSL) {
+    O << " #0";
+  }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI,
+                                              unsigned OpNum, raw_ostream &O) {
+  const MCOperand &Imm12Op = MI->getOperand(OpNum);
+
+  if (Imm12Op.isImm()) {
+    int64_t Imm12 = Imm12Op.getImm();
+    assert(Imm12 >= 0 && "Invalid immediate for add/sub imm");
+    O << "#" << Imm12;
+  } else {
+    assert(Imm12Op.isExpr() && "Unexpected shift operand type");
+    O << "#" << *Imm12Op.getExpr();
+  }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O) {
+
+  printAddSubImmLSL0Operand(MI, OpNum, O);
+
+  O << ", lsl #12";
+}
+
+void
+AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  O << MO.getImm();
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &ImmROp = MI->getOperand(OpNum);
+  unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm();
+
+  O << '#' << LSB;
+}
+
+void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  const MCOperand &ImmSOp = MI->getOperand(OpNum);
+  unsigned Width = ImmSOp.getImm() + 1;
+
+  O << '#' << Width;
+}
+
+void
+AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &ImmSOp = MI->getOperand(OpNum);
+  const MCOperand &ImmROp = MI->getOperand(OpNum - 1);
+
+  unsigned ImmR = ImmROp.getImm();
+  unsigned ImmS = ImmSOp.getImm();
+
+  assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract");
+
+  O << '#' << (ImmS - ImmR + 1);
+}
+
+void
+AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum,
+                                    raw_ostream &O) {
+    const MCOperand &CRx = MI->getOperand(OpNum);
+
+    O << 'c' << CRx.getImm();
+}
+
+
+void
+AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+    const MCOperand &ScaleOp = MI->getOperand(OpNum);
+
+    O << '#' << (64 - ScaleOp.getImm());
+}
+
+
+void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &o) {
+  const MCOperand &MOImm8 = MI->getOperand(OpNum);
+
+  assert(MOImm8.isImm()
+         && "Immediate operand required for floating-point immediate inst");
+
+  uint32_t Imm8 = MOImm8.getImm();
+  uint32_t Fraction = Imm8 & 0xf;
+  uint32_t Exponent = (Imm8 >> 4) & 0x7;
+  uint32_t Negative = (Imm8 >> 7) & 0x1;
+
+  float Val = 1.0f + Fraction / 16.0f;
+
+  // That is:
+  // 000 -> 2^1,  001 -> 2^2,  010 -> 2^3,  011 -> 2^4,
+  // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0
+  if (Exponent & 0x4) {
+    Val /= 1 << (7 - Exponent);
+  } else {
+    Val *= 1 << (Exponent + 1);
+  }
+
+  Val = Negative ? -Val : Val;
+
+  o << '#' << format("%.8f", Val);
+}
+
+void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &o) {
+  o << "#0.0";
+}
+
+void
+AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  O << A64CondCodeToString(static_cast<A64CC::CondCodes>(MO.getImm()));
+}
+
+template <unsigned field_width, unsigned scale> void
+AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  if (!MO.isImm()) {
+    printOperand(MI, OpNum, O);
+    return;
+  }
+
+  // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which
+  // is multiplied by 4 (because all A64 instructions are 32-bits wide).
+  uint64_t UImm = MO.getImm();
+  uint64_t Sign = UImm & (1LL << (field_width - 1));
+  int64_t SImm = scale * ((UImm & ~Sign) - Sign);
+
+  O << "#" << SImm;
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  uint64_t Val;
+  A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val);
+  O << "#0x";
+  O.write_hex(Val);
+}
+
+void
+AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O, int MemSize) {
+  const MCOperand &MOImm = MI->getOperand(OpNum);
+
+  if (MOImm.isImm()) {
+    uint32_t Imm = MOImm.getImm() * MemSize;
+
+    O << "#" << Imm;
+  } else {
+    O << "#" << *MOImm.getExpr();
+  }
+}
+
+void
+AArch64InstPrinter::printShiftOperand(const MCInst *MI,  unsigned OpNum,
+                                      raw_ostream &O,
+                                      A64SE::ShiftExtSpecifiers Shift) {
+    const MCOperand &MO = MI->getOperand(OpNum);
+
+    // LSL #0 is not printed
+    if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0)
+        return;
+
+    switch (Shift) {
+    case A64SE::LSL: O << "lsl"; break;
+    case A64SE::LSR: O << "lsr"; break;
+    case A64SE::ASR: O << "asr"; break;
+    case A64SE::ROR: O << "ror"; break;
+    default: llvm_unreachable("Invalid shift specifier in logical instruction");
+    }
+
+  O << " #" << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI,  unsigned OpNum,
+                                            raw_ostream &O) {
+  const MCOperand &UImm16MO = MI->getOperand(OpNum);
+  const MCOperand &ShiftMO = MI->getOperand(OpNum + 1);
+
+  if (UImm16MO.isImm()) {
+    O << '#' << UImm16MO.getImm();
+
+    if (ShiftMO.getImm() != 0)
+      O << ", lsl #" << (ShiftMO.getImm() * 16);
+
+    return;
+  }
+
+  O << "#" << *UImm16MO.getExpr();
+}
+
+void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper,
+                                              const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  bool ValidName;
+  const MCOperand &MO = MI->getOperand(OpNum);
+  StringRef Name = Mapper.toString(MO.getImm(), ValidName);
+
+  if (ValidName)
+    O << Name;
+  else
+    O << '#' << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+                                       const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  bool ValidName;
+  std::string Name = Mapper.toString(MO.getImm(), ValidName);
+  if (ValidName) {
+    O << Name;
+    return;
+  }
+}
+
+
+void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI,
+                                               unsigned OpNum,
+                                               raw_ostream &O,
+                                               A64SE::ShiftExtSpecifiers Ext) {
+  // FIXME: In principle TableGen should be able to detect this itself far more
+  // easily. We will only accumulate more of these hacks.
+  unsigned Reg0 = MI->getOperand(0).getReg();
+  unsigned Reg1 = MI->getOperand(1).getReg();
+
+  if (isStackReg(Reg0) || isStackReg(Reg1)) {
+    A64SE::ShiftExtSpecifiers LSLEquiv;
+
+    if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP)
+      LSLEquiv = A64SE::UXTX;
+    else
+      LSLEquiv = A64SE::UXTW;
+
+    if (Ext == LSLEquiv) {
+      O << "lsl #" << MI->getOperand(OpNum).getImm();
+      return;
+    }
+  }
+
+  switch (Ext) {
+  case A64SE::UXTB: O << "uxtb"; break;
+  case A64SE::UXTH: O << "uxth"; break;
+  case A64SE::UXTW: O << "uxtw"; break;
+  case A64SE::UXTX: O << "uxtx"; break;
+  case A64SE::SXTB: O << "sxtb"; break;
+  case A64SE::SXTH: O << "sxth"; break;
+  case A64SE::SXTW: O << "sxtw"; break;
+  case A64SE::SXTX: O << "sxtx"; break;
+  default: llvm_unreachable("Unexpected shift type for printing");
+  }
+
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.getImm() != 0)
+    O << " #" << MO.getImm();
+}
+
+template<int MemScale> void
+AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  const MCOperand &MOImm = MI->getOperand(OpNum);
+  int32_t Imm = unpackSignedImm(7, MOImm.getImm());
+
+  O << "#" << (Imm * MemScale);
+}
+
+void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    O << getRegisterName(Reg);
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    // If a symbolic branch target was added as a constant expression then print
+    // that address in hex.
+    const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+    int64_t Address;
+    if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+      O << "0x";
+      O.write_hex(Address);
+    }
+    else {
+      // Otherwise, just print the expression.
+      O << *Op.getExpr();
+    }
+  }
+}
+
+
+void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                   StringRef Annot) {
+  if (MI->getOpcode() == AArch64::TLSDESCCALL) {
+    // This is a special assembler directive which applies an
+    // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed
+    // form outside the normal TableGenerated scheme.
+    O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr();
+  } else if (!printAliasInstr(MI, O))
+    printInstruction(MI, O);
+
+  printAnnotation(O, Annot);
+}
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
new file mode 100644
index 0000000000..1890082491
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -0,0 +1,171 @@
+//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64INSTPRINTER_H
+#define LLVM_AARCH64INSTPRINTER_H
+
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class AArch64InstPrinter : public MCInstPrinter {
+public:
+  AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                     const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+
+  // Autogenerated by tblgen
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+  static const char *getInstructionName(unsigned Opcode);
+
+  void printRegName(raw_ostream &O, unsigned RegNum) const;
+
+  template<unsigned MemSize, unsigned RmSize>
+  void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                                 raw_ostream &O) {
+    printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize);
+  }
+
+
+  void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                                 raw_ostream &O, unsigned MemSize,
+                                 unsigned RmSize);
+
+  void printAddSubImmLSL0Operand(const MCInst *MI,
+                                 unsigned OpNum, raw_ostream &O);
+  void printAddSubImmLSL12Operand(const MCInst *MI,
+                                  unsigned OpNum, raw_ostream &O);
+
+  void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  template<unsigned RegWidth>
+  void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+
+  void printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+                            raw_ostream &O);
+
+  void printCRxOperand(const MCInst *MI, unsigned OpNum,
+                       raw_ostream &O);
+
+  void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
+
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+  void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+  template<int MemScale>
+  void printOffsetUImm12Operand(const MCInst *MI,
+                                  unsigned OpNum, raw_ostream &o) {
+    printOffsetUImm12Operand(MI, OpNum, o, MemScale);
+  }
+
+  void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &o, int MemScale);
+
+  template<unsigned field_width, unsigned scale>
+  void printLabelOperand(const MCInst *MI, unsigned OpNum,
+                         raw_ostream &O);
+
+  template<unsigned RegWidth>
+  void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  template<typename SomeNamedImmMapper>
+  void printNamedImmOperand(const MCInst *MI, unsigned OpNum,
+                            raw_ostream &O) {
+    printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O);
+  }
+
+  void printNamedImmOperand(const NamedImmMapper &Mapper,
+                            const MCInst *MI, unsigned OpNum,
+                            raw_ostream &O);
+
+  void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+                          const MCInst *MI, unsigned OpNum,
+                          raw_ostream &O);
+
+  void printMRSOperand(const MCInst *MI, unsigned OpNum,
+                       raw_ostream &O) {
+    printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O);
+  }
+
+  void printMSROperand(const MCInst *MI, unsigned OpNum,
+                       raw_ostream &O) {
+    printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O);
+  }
+
+  void printShiftOperand(const char *name, const MCInst *MI,
+                         unsigned OpIdx, raw_ostream &O);  
+
+  void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand("lsr", MI, OpNum, O);
+  }
+  void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand("asr", MI, OpNum, O);
+  }
+  void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand("ror", MI, OpNum, O);
+  }
+
+  template<A64SE::ShiftExtSpecifiers Shift>
+  void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand(MI, OpNum, O, Shift);
+  }
+
+  void printShiftOperand(const MCInst *MI, unsigned OpNum,
+                         raw_ostream &O, A64SE::ShiftExtSpecifiers Sh);
+
+
+  void printMoveWideImmOperand(const  MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
+
+  template<int MemSize> void
+  printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
+
+  void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  template<A64SE::ShiftExtSpecifiers EXT>
+  void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                             raw_ostream &O) {
+    printRegExtendOperand(MI, OpNum, O, EXT);
+  }
+
+  void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                             raw_ostream &O, A64SE::ShiftExtSpecifiers Ext);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+  bool isStackReg(unsigned RegNo) {
+    return RegNo == AArch64::XSP || RegNo == AArch64::WSP;
+  }
+
+
+};
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000000..d4b980a94d
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64AsmPrinter
+  AArch64InstPrinter.cpp
+  )
+
+add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen)
+
diff --git a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
new file mode 100644
index 0000000000..40fdc558b9
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64AsmPrinter
+parent = AArch64
+required_libraries = MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile
new file mode 100644
index 0000000000..1c36a8dea7
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64AsmPrinter
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt
new file mode 100644
index 0000000000..09c7448e7e
--- /dev/null
+++ b/lib/Target/AArch64/LLVMBuild.txt
@@ -0,0 +1,36 @@
+;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = AArch64
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+;has_jit = 1
+
+[component_1]
+type = Library
+name = AArch64CodeGen
+parent = AArch64
+required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AsmPrinter CodeGen Core MC SelectionDAG Support Target
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
new file mode 100644
index 0000000000..1c093693a8
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -0,0 +1,580 @@
+//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class AArch64AsmBackend : public MCAsmBackend {
+  const MCSubtargetInfo* STI;
+public:
+  AArch64AsmBackend(const Target &T, const StringRef TT)
+    : MCAsmBackend(),
+      STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", ""))
+    {}
+
+
+  ~AArch64AsmBackend() {
+    delete STI;
+  }
+
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+  virtual void processFixupValue(const MCAssembler &Asm,
+                                 const MCAsmLayout &Layout,
+                                 const MCFixup &Fixup, const MCFragment *DF,
+                                 MCValue &Target, uint64_t &Value,
+                                 bool &IsResolved);
+};
+} // end anonymous namespace
+
+void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
+                                          const MCAsmLayout &Layout,
+                                          const MCFixup &Fixup,
+                                          const MCFragment *DF,
+                                          MCValue &Target, uint64_t &Value,
+                                          bool &IsResolved) {
+  // The ADRP instruction adds some multiple of 0x1000 to the current PC &
+  // ~0xfff. This means that the required offset to reach a symbol can vary by
+  // up to one step depending on where the ADRP is in memory. For example:
+  //
+  //     ADRP x0, there
+  //  there:
+  //
+  // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
+  // we'll need that as an offset. At any other address "there" will be in the
+  // same page as the ADRP and the instruction should encode 0x0. Assuming the
+  // section isn't 0x1000-aligned, we therefore need to delegate this decision
+  // to the linker -- a relocation!
+  if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page ||
+      (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page ||
+      (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page ||
+      (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page)
+    IsResolved = false;
+}
+
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value);
+
+namespace {
+
+class ELFAArch64AsmBackend : public AArch64AsmBackend {
+public:
+  uint8_t OSABI;
+  ELFAArch64AsmBackend(const Target &T, const StringRef TT,
+                       uint8_t _OSABI)
+    : AArch64AsmBackend(T, TT), OSABI(_OSABI) { }
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const;
+
+  unsigned int getNumFixupKinds() const {
+    return AArch64::NumTargetFixupKinds;
+  }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// AArch64FixupKinds.h.
+//
+// Name                         Offset (bits)    Size (bits)      Flags
+      { "fixup_a64_ld_prel",               0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_adr_prel",              0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_adr_prel_page",         0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_add_lo12",              0,    32,             0 },
+      { "fixup_a64_ldst8_lo12",            0,    32,             0 },
+      { "fixup_a64_ldst16_lo12",           0,    32,             0 },
+      { "fixup_a64_ldst32_lo12",           0,    32,             0 },
+      { "fixup_a64_ldst64_lo12",           0,    32,             0 },
+      { "fixup_a64_ldst128_lo12",          0,    32,             0 },
+      { "fixup_a64_tstbr",                 0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_condbr",                0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_uncondbr",              0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_call",                  0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_movw_uabs_g0",          0,    32,             0 },
+      { "fixup_a64_movw_uabs_g0_nc",       0,    32,             0 },
+      { "fixup_a64_movw_uabs_g1",          0,    32,             0 },
+      { "fixup_a64_movw_uabs_g1_nc",       0,    32,             0 },
+      { "fixup_a64_movw_uabs_g2",          0,    32,             0 },
+      { "fixup_a64_movw_uabs_g2_nc",       0,    32,             0 },
+      { "fixup_a64_movw_uabs_g3",          0,    32,             0 },
+      { "fixup_a64_movw_sabs_g0",          0,    32,             0 },
+      { "fixup_a64_movw_sabs_g1",          0,    32,             0 },
+      { "fixup_a64_movw_sabs_g2",          0,    32,             0 },
+      { "fixup_a64_adr_prel_got_page",     0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_ld64_got_lo12_nc",      0,    32,             0 },
+      { "fixup_a64_movw_dtprel_g2",        0,    32,             0 },
+      { "fixup_a64_movw_dtprel_g1",        0,    32,             0 },
+      { "fixup_a64_movw_dtprel_g1_nc",     0,    32,             0 },
+      { "fixup_a64_movw_dtprel_g0",        0,    32,             0 },
+      { "fixup_a64_movw_dtprel_g0_nc",     0,    32,             0 },
+      { "fixup_a64_add_dtprel_hi12",       0,    32,             0 },
+      { "fixup_a64_add_dtprel_lo12",       0,    32,             0 },
+      { "fixup_a64_add_dtprel_lo12_nc",    0,    32,             0 },
+      { "fixup_a64_ldst8_dtprel_lo12",     0,    32,             0 },
+      { "fixup_a64_ldst8_dtprel_lo12_nc",  0,    32,             0 },
+      { "fixup_a64_ldst16_dtprel_lo12",    0,    32,             0 },
+      { "fixup_a64_ldst16_dtprel_lo12_nc", 0,    32,             0 },
+      { "fixup_a64_ldst32_dtprel_lo12",    0,    32,             0 },
+      { "fixup_a64_ldst32_dtprel_lo12_nc", 0,    32,             0 },
+      { "fixup_a64_ldst64_dtprel_lo12",    0,    32,             0 },
+      { "fixup_a64_ldst64_dtprel_lo12_nc", 0,    32,             0 },
+      { "fixup_a64_movw_gottprel_g1",      0,    32,             0 },
+      { "fixup_a64_movw_gottprel_g0_nc",   0,    32,             0 },
+      { "fixup_a64_adr_gottprel_page",     0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_ld64_gottprel_lo12_nc", 0,    32,             0 },
+      { "fixup_a64_ld_gottprel_prel19",    0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_movw_tprel_g2",         0,    32,             0 },
+      { "fixup_a64_movw_tprel_g1",         0,    32,             0 },
+      { "fixup_a64_movw_tprel_g1_nc",      0,    32,             0 },
+      { "fixup_a64_movw_tprel_g0",         0,    32,             0 },
+      { "fixup_a64_movw_tprel_g0_nc",      0,    32,             0 },
+      { "fixup_a64_add_tprel_hi12",        0,    32,             0 },
+      { "fixup_a64_add_tprel_lo12",        0,    32,             0 },
+      { "fixup_a64_add_tprel_lo12_nc",     0,    32,             0 },
+      { "fixup_a64_ldst8_tprel_lo12",      0,    32,             0 },
+      { "fixup_a64_ldst8_tprel_lo12_nc",   0,    32,             0 },
+      { "fixup_a64_ldst16_tprel_lo12",     0,    32,             0 },
+      { "fixup_a64_ldst16_tprel_lo12_nc",  0,    32,             0 },
+      { "fixup_a64_ldst32_tprel_lo12",     0,    32,             0 },
+      { "fixup_a64_ldst32_tprel_lo12_nc",  0,    32,             0 },
+      { "fixup_a64_ldst64_tprel_lo12",     0,    32,             0 },
+      { "fixup_a64_ldst64_tprel_lo12_nc",  0,    32,             0 },
+      { "fixup_a64_tlsdesc_adr_page",      0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_tlsdesc_ld64_lo12_nc",  0,    32,             0 },
+      { "fixup_a64_tlsdesc_add_lo12_nc",   0,    32,             0 },
+      { "fixup_a64_tlsdesc_call",          0,     0,             0 }
+    };
+    if (Kind < FirstTargetFixupKind)
+      return MCAsmBackend::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const {
+    unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8;
+    Value = adjustFixupValue(Fixup.getKind(), Value);
+    if (!Value) return;           // Doesn't change encoding.
+
+    unsigned Offset = Fixup.getOffset();
+    assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+    // For each byte of the fragment that the fixup touches, mask in the bits
+    // from the fixup value.
+    for (unsigned i = 0; i != NumBytes; ++i) {
+      Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+    }
+  }
+
+  bool mayNeedRelaxation(const MCInst&) const {
+    return false;
+  }
+
+  void relaxInstruction(const MCInst&, llvm::MCInst&) const {
+    llvm_unreachable("Cannot relax instructions");
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createAArch64ELFObjectWriter(OS, OSABI);
+  }
+};
+
+} // end anonymous namespace
+
+bool
+ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                           uint64_t Value,
+                                           const MCRelaxableFragment *DF,
+                                           const MCAsmLayout &Layout) const {
+  // Correct for now. With all instructions 32-bit only very low-level
+  // considerations could make you select something which may fail.
+  return false;
+}
+
+
+bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+  // Can't emit NOP with size not multiple of 32-bits
+  if (Count % 4 != 0)
+    return false;
+
+  uint64_t NumNops = Count / 4;
+  for (uint64_t i = 0; i != NumNops; ++i)
+    OW->Write32(0xd503201f);
+
+  return true;
+}
+
+static unsigned ADRImmBits(unsigned Value) {
+  unsigned lo2 = Value & 0x3;
+  unsigned hi19 = (Value & 0x1fffff) >> 2;
+
+  return (hi19 << 5) | (lo2 << 29);
+}
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  case FK_Data_2:
+    assert((int64_t)Value >= -32768 &&
+           (int64_t)Value <= 65536 &&
+           "Out of range ABS16 fixup");
+    return Value;
+  case FK_Data_4:
+    assert((int64_t)Value >= -(1LL << 31) &&
+           (int64_t)Value <= (1LL << 32) - 1 &&
+           "Out of range ABS32 fixup");
+    return Value;
+  case FK_Data_8:
+    return Value;
+
+  case AArch64::fixup_a64_ld_gottprel_prel19:
+    // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F
+    // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20.
+  case AArch64::fixup_a64_ld_prel:
+    // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits
+    // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+    assert((int64_t)Value >= -(1LL << 20) &&
+           (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup");
+    return (Value & 0x1ffffc) << 3;
+
+  case AArch64::fixup_a64_adr_prel:
+    // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of
+    // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+    assert((int64_t)Value >= -(1LL << 20) &&
+           (int64_t)Value < (1LL << 20) && "Out of range ADR fixup");
+    return ADRImmBits(Value & 0x1fffff);
+
+  case AArch64::fixup_a64_adr_prel_page:
+    // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF
+    // F000 of the result of the operation, checking that -2^32 <= result <
+    // 2^32.
+    assert((int64_t)Value >= -(1LL << 32) &&
+           (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+    return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
+
+  case AArch64::fixup_a64_add_dtprel_hi12:
+    // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+    // FF F000 of DTPREL(S+A), check 0 <= X < 2^24.
+  case AArch64::fixup_a64_add_tprel_hi12:
+    // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+    // FF F000 of TPREL(S+A), check 0 <= X < 2^24.
+    assert((int64_t)Value >= 0 &&
+           (int64_t)Value < (1LL << 24) && "Out of range ADD fixup");
+    return (Value & 0xfff000) >> 2;
+
+  case AArch64::fixup_a64_add_dtprel_lo12:
+    // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+    // FFF of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_add_tprel_lo12:
+    // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+    // FFF of TPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t)Value >= 0 &&
+           (int64_t)Value < (1LL << 12) && "Out of range ADD fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_add_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits
+    // FFF of DTPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_add_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits
+    // FFF of TPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+    // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits
+    // FFF of G(TLSDESC(S+A)), with no overflow check.
+  case AArch64::fixup_a64_add_lo12:
+    // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of
+    // S+A, with no overflow check.
+    return (Value & 0xfff) << 10;
+
+  case AArch64::fixup_a64_ldst8_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst8_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst8_lo12:
+    // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF
+    // of S+A, with no overflow check.
+    return (Value & 0xfff) << 10;
+
+  case AArch64::fixup_a64_ldst16_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst16_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst16_lo12:
+    // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE
+    // of S+A, with no overflow check.
+    return (Value & 0xffe) << 9;
+
+  case AArch64::fixup_a64_ldst32_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst32_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst32_lo12:
+    // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC
+    // of S+A, with no overflow check.
+    return (Value & 0xffc) << 8;
+
+  case AArch64::fixup_a64_ldst64_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst64_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst64_lo12:
+    // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8
+    // of S+A, with no overflow check.
+    return (Value & 0xff8) << 7;
+
+  case AArch64::fixup_a64_ldst128_lo12:
+    // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0
+    // of S+A, with no overflow check.
+    return (Value & 0xff0) << 6;
+
+  case AArch64::fixup_a64_movw_uabs_g0:
+    // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A
+    // with a check that S+A < 2^16
+    assert(Value <= 0xffff && "Out of range move wide fixup");
+    return (Value & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_dtprel_g0_nc:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits
+    // FFFF of DTPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_movw_gottprel_g0_nc:
+    // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits
+    // FFFF of G(TPREL(S+A)) - GOT with no overflow check.
+  case AArch64::fixup_a64_movw_tprel_g0_nc:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits
+    // FFFF of TPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_movw_uabs_g0_nc:
+    // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of
+    // S+A with no overflow check.
+    return (Value & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g1:
+    // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of
+    // S+A with a check that S+A < 2^32
+    assert(Value <= 0xffffffffull && "Out of range move wide fixup");
+    return ((Value >> 16) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_dtprel_g1_nc:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field
+    // to bits FFFF0000 of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_movw_tprel_g1_nc:
+    // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field
+    // to bits FFFF0000 of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_movw_uabs_g1_nc:
+    // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits
+    // FFFF0000 of S+A with no overflow check.
+    return ((Value >> 16) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g2:
+    // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000
+    // 0000 of S+A with a check that S+A < 2^48
+    assert(Value <= 0xffffffffffffull && "Out of range move wide fixup");
+    return ((Value >> 32) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g2_nc:
+    // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000
+    // 0000 of S+A with no overflow check.
+    return ((Value >> 32) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g3:
+    // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000
+    // 0000 0000 of S+A (no overflow check needed)
+    return ((Value >> 48) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_dtprel_g0:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field
+    // to bits FFFF of DTPREL(S+A).
+  case AArch64::fixup_a64_movw_tprel_g0:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to
+    // bits FFFF of TPREL(S+A).
+  case AArch64::fixup_a64_movw_sabs_g0: {
+    // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of
+    // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we
+    // should convert between MOVN and MOVZ to achieve our goals).
+    int64_t Signed = Value;
+    assert(Signed >= -(1LL << 16) && Signed < (1LL << 16)
+           && "Out of range move wide fixup");
+    if (Signed >= 0) {
+      Value = (Value & 0xffff) << 5;
+      // Bit 30 converts the MOVN encoding into a MOVZ
+      Value |= 1 << 30;
+    } else {
+      // MCCodeEmitter should have encoded a MOVN, which is fine.
+      Value = (~Value & 0xffff) << 5;
+    }
+    return Value;
+  }
+
+  case AArch64::fixup_a64_movw_dtprel_g1:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field
+    // to bits FFFF0000 of DTPREL(S+A).
+  case AArch64::fixup_a64_movw_gottprel_g1:
+    // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field
+    // to bits FFFF0000 of G(TPREL(S+A)) - GOT.
+  case AArch64::fixup_a64_movw_tprel_g1:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to
+    // bits FFFF0000 of TPREL(S+A).
+  case AArch64::fixup_a64_movw_sabs_g1: {
+    // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000
+    // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we
+    // should convert between MOVN and MOVZ to achieve our goals).
+    int64_t Signed = Value;
+    assert(Signed >= -(1LL << 32) && Signed < (1LL << 32)
+           && "Out of range move wide fixup");
+    if (Signed >= 0) {
+      Value = ((Value >> 16) & 0xffff) << 5;
+      // Bit 30 converts the MOVN encoding into a MOVZ
+      Value |= 1 << 30;
+    } else {
+      Value = ((~Value >> 16) & 0xffff) << 5;
+    }
+    return Value;
+  }
+
+  case AArch64::fixup_a64_movw_dtprel_g2:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field
+    // to bits FFFF 0000 0000 of DTPREL(S+A).
+  case AArch64::fixup_a64_movw_tprel_g2:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to
+    // bits FFFF 0000 0000 of TPREL(S+A).
+  case AArch64::fixup_a64_movw_sabs_g2: {
+    // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000
+    // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that
+    // we should convert between MOVN and MOVZ to achieve our goals).
+    int64_t Signed = Value;
+    assert(Signed >= -(1LL << 48) && Signed < (1LL << 48)
+           && "Out of range move wide fixup");
+    if (Signed >= 0) {
+      Value = ((Value >> 32) & 0xffff) << 5;
+      // Bit 30 converts the MOVN encoding into a MOVZ
+      Value |= 1 << 30;
+    } else {
+      Value = ((~Value >> 32) & 0xffff) << 5;
+    }
+    return Value;
+  }
+
+  case AArch64::fixup_a64_tstbr:
+    // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to
+    // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15.
+    assert((int64_t)Value >= -(1LL << 15) &&
+           (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup");
+    return (Value & 0xfffc) << (5 - 2);
+
+  case AArch64::fixup_a64_condbr:
+    // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch
+    // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20.
+    assert((int64_t)Value >= -(1LL << 20) &&
+           (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup");
+    return (Value & 0x1ffffc) << (5 - 2);
+
+  case AArch64::fixup_a64_uncondbr:
+    // R_AARCH64_JUMP26 same as below (except to a linker, possibly).
+  case AArch64::fixup_a64_call:
+    // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P,
+    // checking that -2^27 <= S+A-P < 2^27.
+    assert((int64_t)Value >= -(1LL << 27) &&
+           (int64_t)Value < (1LL << 27) && "Out of range branch fixup");
+    return (Value & 0xffffffc) >> 2;
+
+  case AArch64::fixup_a64_adr_gottprel_page:
+    // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits
+    // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32.
+  case AArch64::fixup_a64_tlsdesc_adr_page:
+    // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000
+    // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32.
+  case AArch64::fixup_a64_adr_prel_got_page:
+    // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits
+    // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) <
+    // 2^32.
+    assert((int64_t)Value >= -(1LL << 32) &&
+           (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+    return ADRImmBits((Value & 0x1fffff000) >> 12);
+
+  case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+    // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8
+    // of X, with no overflow check. Check that X & 7 == 0.
+  case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+    // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of
+    // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0.
+  case AArch64::fixup_a64_ld64_got_lo12_nc:
+    // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of
+    // G(S) with no overflow check. Check X & 7 == 0
+    assert(((int64_t)Value & 7) == 0 && "Misaligned fixup");
+    return (Value & 0xff8) << 7;
+
+  case AArch64::fixup_a64_tlsdesc_call:
+    // R_AARCH64_TLSDESC_CALL: For relaxation only.
+    return 0;
+  }
+}
+
+MCAsmBackend *
+llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+  Triple TheTriple(TT);
+
+  return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS());
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h
new file mode 100644
index 0000000000..b71eb0da54
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h
@@ -0,0 +1,779 @@
+//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the AArch64 target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_BASEINFO_H
+#define LLVM_AARCH64_BASEINFO_H
+
+#include "AArch64MCTargetDesc.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+// // Enums corresponding to AArch64 condition codes
+namespace A64CC {
+  // The CondCodes constants map directly to the 4-bit encoding of the
+  // condition field for predicated instructions.
+  enum CondCodes {   // Meaning (integer)          Meaning (floating-point)
+    EQ = 0,        // Equal                      Equal
+    NE,            // Not equal                  Not equal, or unordered
+    HS,            // Unsigned higher or same    >, ==, or unordered
+    LO,            // Unsigned lower or same     Less than
+    MI,            // Minus, negative            Less than
+    PL,            // Plus, positive or zero     >, ==, or unordered
+    VS,            // Overflow                   Unordered
+    VC,            // No overflow                Ordered
+    HI,            // Unsigned higher            Greater than, or unordered
+    LS,            // Unsigned lower or same     Less than or equal
+    GE,            // Greater than or equal      Greater than or equal
+    LT,            // Less than                  Less than, or unordered
+    GT,            // Signed greater than        Greater than
+    LE,            // Signed less than or equal  <, ==, or unordered
+    AL,            // Always (unconditional)     Always (unconditional)
+    NV,             // Always (unconditional)     Always (unconditional)
+    // Note the NV exists purely to disassemble 0b1111. Execution
+    // is "always".
+    Invalid
+  };
+
+} // namespace A64CC
+
+inline static const char *A64CondCodeToString(A64CC::CondCodes CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown condition code");
+  case A64CC::EQ:  return "eq";
+  case A64CC::NE:  return "ne";
+  case A64CC::HS:  return "hs";
+  case A64CC::LO:  return "lo";
+  case A64CC::MI:  return "mi";
+  case A64CC::PL:  return "pl";
+  case A64CC::VS:  return "vs";
+  case A64CC::VC:  return "vc";
+  case A64CC::HI:  return "hi";
+  case A64CC::LS:  return "ls";
+  case A64CC::GE:  return "ge";
+  case A64CC::LT:  return "lt";
+  case A64CC::GT:  return "gt";
+  case A64CC::LE:  return "le";
+  case A64CC::AL:  return "al";
+  case A64CC::NV:  return "nv";
+  }
+}
+
+inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) {
+  return StringSwitch<A64CC::CondCodes>(CondStr.lower())
+             .Case("eq", A64CC::EQ)
+             .Case("ne", A64CC::NE)
+             .Case("ne", A64CC::NE)
+             .Case("hs", A64CC::HS)
+             .Case("cs", A64CC::HS)
+             .Case("lo", A64CC::LO)
+             .Case("cc", A64CC::LO)
+             .Case("mi", A64CC::MI)
+             .Case("pl", A64CC::PL)
+             .Case("vs", A64CC::VS)
+             .Case("vc", A64CC::VC)
+             .Case("hi", A64CC::HI)
+             .Case("ls", A64CC::LS)
+             .Case("ge", A64CC::GE)
+             .Case("lt", A64CC::LT)
+             .Case("gt", A64CC::GT)
+             .Case("le", A64CC::LE)
+             .Case("al", A64CC::AL)
+             .Case("nv", A64CC::NV)
+             .Default(A64CC::Invalid);
+}
+
+inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) {
+  // It turns out that the condition codes have been designed so that in order
+  // to reverse the intent of the condition you only have to invert the low bit:
+
+  return static_cast<A64CC::CondCodes>(static_cast<unsigned>(CC) ^ 0x1);
+}
+
+/// Instances of this class can perform bidirectional mapping from random
+/// identifier strings to operand encodings. For example "MSR" takes a named
+/// system-register which must be encoded somehow and decoded for printing. This
+/// central location means that the information for those transformations is not
+/// duplicated and remains in sync.
+///
+/// FIXME: currently the algorithm is a completely unoptimised linear
+/// search. Obviously this could be improved, but we would probably want to work
+/// out just how often these instructions are emitted before working on it. It
+/// might even be optimal to just reorder the tables for the common instructions
+/// rather than changing the algorithm.
+struct NamedImmMapper {
+  struct Mapping {
+    const char *Name;
+    uint32_t Value;
+  };
+
+  template<int N>
+  NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
+    : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
+
+  StringRef toString(uint32_t Value, bool &Valid) const;
+  uint32_t fromString(StringRef Name, bool &Valid) const;
+
+  /// Many of the instructions allow an alternative assembly form consisting of
+  /// a simple immediate. Currently the only valid forms are ranges [0, N) where
+  /// N being 0 indicates no immediate syntax-form is allowed.
+  bool validImm(uint32_t Value) const;
+protected:
+  const Mapping *Pairs;
+  size_t NumPairs;
+  uint32_t TooBigImm;
+};
+
+namespace A64AT {
+  enum ATValues {
+    Invalid = -1,    // Op0 Op1  CRn   CRm   Op2
+    S1E1R = 0x43c0,  // 01  000  0111  1000  000
+    S1E2R = 0x63c0,  // 01  100  0111  1000  000
+    S1E3R = 0x73c0,  // 01  110  0111  1000  000
+    S1E1W = 0x43c1,  // 01  000  0111  1000  001
+    S1E2W = 0x63c1,  // 01  100  0111  1000  001
+    S1E3W = 0x73c1,  // 01  110  0111  1000  001
+    S1E0R = 0x43c2,  // 01  000  0111  1000  010
+    S1E0W = 0x43c3,  // 01  000  0111  1000  011
+    S12E1R = 0x63c4, // 01  100  0111  1000  100
+    S12E1W = 0x63c5, // 01  100  0111  1000  101
+    S12E0R = 0x63c6, // 01  100  0111  1000  110
+    S12E0W = 0x63c7  // 01  100  0111  1000  111
+  };
+
+  struct ATMapper : NamedImmMapper {
+    const static Mapping ATPairs[];
+
+    ATMapper();
+  };
+
+}
+namespace A64DB {
+  enum DBValues {
+    Invalid = -1,
+    OSHLD = 0x1,
+    OSHST = 0x2,
+    OSH =   0x3,
+    NSHLD = 0x5,
+    NSHST = 0x6,
+    NSH =   0x7,
+    ISHLD = 0x9,
+    ISHST = 0xa,
+    ISH =   0xb,
+    LD =    0xd,
+    ST =    0xe,
+    SY =    0xf
+  };
+
+  struct DBarrierMapper : NamedImmMapper {
+    const static Mapping DBarrierPairs[];
+
+    DBarrierMapper();
+  };
+}
+
+namespace  A64DC {
+  enum DCValues {
+    Invalid = -1,   // Op1  CRn   CRm   Op2
+    ZVA   = 0x5ba1, // 01  011  0111  0100  001
+    IVAC  = 0x43b1, // 01  000  0111  0110  001
+    ISW   = 0x43b2, // 01  000  0111  0110  010
+    CVAC  = 0x5bd1, // 01  011  0111  1010  001
+    CSW   = 0x43d2, // 01  000  0111  1010  010
+    CVAU  = 0x5bd9, // 01  011  0111  1011  001
+    CIVAC = 0x5bf1, // 01  011  0111  1110  001
+    CISW  = 0x43f2  // 01  000  0111  1110  010
+  };
+
+  struct DCMapper : NamedImmMapper {
+    const static Mapping DCPairs[];
+
+    DCMapper();
+  };
+
+}
+
+namespace  A64IC {
+  enum ICValues {
+    Invalid = -1,     // Op1  CRn   CRm   Op2
+    IALLUIS = 0x0388, // 000  0111  0001  000
+    IALLU = 0x03a8,   // 000  0111  0101  000
+    IVAU = 0x1ba9     // 011  0111  0101  001
+  };
+
+
+  struct ICMapper : NamedImmMapper {
+    const static Mapping ICPairs[];
+
+    ICMapper();
+  };
+
+  static inline bool NeedsRegister(ICValues Val) {
+    return Val == IVAU;
+  }
+}
+
+namespace  A64ISB {
+  enum ISBValues {
+    Invalid = -1,
+    SY = 0xf
+  };
+  struct ISBMapper : NamedImmMapper {
+    const static Mapping ISBPairs[];
+
+    ISBMapper();
+  };
+}
+
+namespace A64PRFM {
+  enum PRFMValues {
+    Invalid = -1,
+    PLDL1KEEP = 0x00,
+    PLDL1STRM = 0x01,
+    PLDL2KEEP = 0x02,
+    PLDL2STRM = 0x03,
+    PLDL3KEEP = 0x04,
+    PLDL3STRM = 0x05,
+    PSTL1KEEP = 0x10,
+    PSTL1STRM = 0x11,
+    PSTL2KEEP = 0x12,
+    PSTL2STRM = 0x13,
+    PSTL3KEEP = 0x14,
+    PSTL3STRM = 0x15
+  };
+
+  struct PRFMMapper : NamedImmMapper {
+    const static Mapping PRFMPairs[];
+
+    PRFMMapper();
+  };
+}
+
+namespace A64PState {
+  enum PStateValues {
+    Invalid = -1,
+    SPSel = 0x05,
+    DAIFSet = 0x1e,
+    DAIFClr = 0x1f
+  };
+
+  struct PStateMapper : NamedImmMapper {
+    const static Mapping PStatePairs[];
+
+    PStateMapper();
+  };
+
+}
+
+namespace A64SE {
+    enum ShiftExtSpecifiers {
+        Invalid = -1,
+        LSL,
+        LSR,
+        ASR,
+        ROR,
+
+        UXTB,
+        UXTH,
+        UXTW,
+        UXTX,
+
+        SXTB,
+        SXTH,
+        SXTW,
+        SXTX
+    };
+}
+
+namespace A64SysReg {
+  enum SysRegROValues {
+    MDCCSR_EL0        = 0x9808, // 10  011  0000  0001  000
+    DBGDTRRX_EL0      = 0x9828, // 10  011  0000  0101  000
+    MDRAR_EL1         = 0x8080, // 10  000  0001  0000  000
+    OSLSR_EL1         = 0x808c, // 10  000  0001  0001  100
+    DBGAUTHSTATUS_EL1 = 0x83f6, // 10  000  0111  1110  110
+    PMCEID0_EL0       = 0xdce6, // 11  011  1001  1100  110
+    PMCEID1_EL0       = 0xdce7, // 11  011  1001  1100  111
+    MIDR_EL1          = 0xc000, // 11  000  0000  0000  000
+    CCSIDR_EL1        = 0xc800, // 11  001  0000  0000  000
+    CLIDR_EL1         = 0xc801, // 11  001  0000  0000  001
+    CTR_EL0           = 0xd801, // 11  011  0000  0000  001
+    MPIDR_EL1         = 0xc005, // 11  000  0000  0000  101
+    REVIDR_EL1        = 0xc006, // 11  000  0000  0000  110
+    AIDR_EL1          = 0xc807, // 11  001  0000  0000  111
+    DCZID_EL0         = 0xd807, // 11  011  0000  0000  111
+    ID_PFR0_EL1       = 0xc008, // 11  000  0000  0001  000
+    ID_PFR1_EL1       = 0xc009, // 11  000  0000  0001  001
+    ID_DFR0_EL1       = 0xc00a, // 11  000  0000  0001  010
+    ID_AFR0_EL1       = 0xc00b, // 11  000  0000  0001  011
+    ID_MMFR0_EL1      = 0xc00c, // 11  000  0000  0001  100
+    ID_MMFR1_EL1      = 0xc00d, // 11  000  0000  0001  101
+    ID_MMFR2_EL1      = 0xc00e, // 11  000  0000  0001  110
+    ID_MMFR3_EL1      = 0xc00f, // 11  000  0000  0001  111
+    ID_ISAR0_EL1      = 0xc010, // 11  000  0000  0010  000
+    ID_ISAR1_EL1      = 0xc011, // 11  000  0000  0010  001
+    ID_ISAR2_EL1      = 0xc012, // 11  000  0000  0010  010
+    ID_ISAR3_EL1      = 0xc013, // 11  000  0000  0010  011
+    ID_ISAR4_EL1      = 0xc014, // 11  000  0000  0010  100
+    ID_ISAR5_EL1      = 0xc015, // 11  000  0000  0010  101
+    ID_AA64PFR0_EL1   = 0xc020, // 11  000  0000  0100  000
+    ID_AA64PFR1_EL1   = 0xc021, // 11  000  0000  0100  001
+    ID_AA64DFR0_EL1   = 0xc028, // 11  000  0000  0101  000
+    ID_AA64DFR1_EL1   = 0xc029, // 11  000  0000  0101  001
+    ID_AA64AFR0_EL1   = 0xc02c, // 11  000  0000  0101  100
+    ID_AA64AFR1_EL1   = 0xc02d, // 11  000  0000  0101  101
+    ID_AA64ISAR0_EL1  = 0xc030, // 11  000  0000  0110  000
+    ID_AA64ISAR1_EL1  = 0xc031, // 11  000  0000  0110  001
+    ID_AA64MMFR0_EL1  = 0xc038, // 11  000  0000  0111  000
+    ID_AA64MMFR1_EL1  = 0xc039, // 11  000  0000  0111  001
+    MVFR0_EL1         = 0xc018, // 11  000  0000  0011  000
+    MVFR1_EL1         = 0xc019, // 11  000  0000  0011  001
+    MVFR2_EL1         = 0xc01a, // 11  000  0000  0011  010
+    RVBAR_EL1         = 0xc601, // 11  000  1100  0000  001
+    RVBAR_EL2         = 0xe601, // 11  100  1100  0000  001
+    RVBAR_EL3         = 0xf601, // 11  110  1100  0000  001
+    ISR_EL1           = 0xc608, // 11  000  1100  0001  000
+    CNTPCT_EL0        = 0xdf01, // 11  011  1110  0000  001
+    CNTVCT_EL0        = 0xdf02  // 11  011  1110  0000  010
+  };
+
+  enum SysRegWOValues {
+    DBGDTRTX_EL0      = 0x9828, // 10  011  0000  0101  000
+    OSLAR_EL1         = 0x8084, // 10  000  0001  0000  100
+    PMSWINC_EL0       = 0xdce4  // 11  011  1001  1100  100
+  };
+
+  enum SysRegValues {
+    Invalid = -1,               // Op0 Op1  CRn   CRm   Op2
+    OSDTRRX_EL1       = 0x8002, // 10  000  0000  0000  010
+    OSDTRTX_EL1       = 0x801a, // 10  000  0000  0011  010
+    TEECR32_EL1       = 0x9000, // 10  010  0000  0000  000
+    MDCCINT_EL1       = 0x8010, // 10  000  0000  0010  000
+    MDSCR_EL1         = 0x8012, // 10  000  0000  0010  010
+    DBGDTR_EL0        = 0x9820, // 10  011  0000  0100  000
+    OSECCR_EL1        = 0x8032, // 10  000  0000  0110  010
+    DBGVCR32_EL2      = 0xa038, // 10  100  0000  0111  000
+    DBGBVR0_EL1       = 0x8004, // 10  000  0000  0000  100
+    DBGBVR1_EL1       = 0x800c, // 10  000  0000  0001  100
+    DBGBVR2_EL1       = 0x8014, // 10  000  0000  0010  100
+    DBGBVR3_EL1       = 0x801c, // 10  000  0000  0011  100
+    DBGBVR4_EL1       = 0x8024, // 10  000  0000  0100  100
+    DBGBVR5_EL1       = 0x802c, // 10  000  0000  0101  100
+    DBGBVR6_EL1       = 0x8034, // 10  000  0000  0110  100
+    DBGBVR7_EL1       = 0x803c, // 10  000  0000  0111  100
+    DBGBVR8_EL1       = 0x8044, // 10  000  0000  1000  100
+    DBGBVR9_EL1       = 0x804c, // 10  000  0000  1001  100
+    DBGBVR10_EL1      = 0x8054, // 10  000  0000  1010  100
+    DBGBVR11_EL1      = 0x805c, // 10  000  0000  1011  100
+    DBGBVR12_EL1      = 0x8064, // 10  000  0000  1100  100
+    DBGBVR13_EL1      = 0x806c, // 10  000  0000  1101  100
+    DBGBVR14_EL1      = 0x8074, // 10  000  0000  1110  100
+    DBGBVR15_EL1      = 0x807c, // 10  000  0000  1111  100
+    DBGBCR0_EL1       = 0x8005, // 10  000  0000  0000  101
+    DBGBCR1_EL1       = 0x800d, // 10  000  0000  0001  101
+    DBGBCR2_EL1       = 0x8015, // 10  000  0000  0010  101
+    DBGBCR3_EL1       = 0x801d, // 10  000  0000  0011  101
+    DBGBCR4_EL1       = 0x8025, // 10  000  0000  0100  101
+    DBGBCR5_EL1       = 0x802d, // 10  000  0000  0101  101
+    DBGBCR6_EL1       = 0x8035, // 10  000  0000  0110  101
+    DBGBCR7_EL1       = 0x803d, // 10  000  0000  0111  101
+    DBGBCR8_EL1       = 0x8045, // 10  000  0000  1000  101
+    DBGBCR9_EL1       = 0x804d, // 10  000  0000  1001  101
+    DBGBCR10_EL1      = 0x8055, // 10  000  0000  1010  101
+    DBGBCR11_EL1      = 0x805d, // 10  000  0000  1011  101
+    DBGBCR12_EL1      = 0x8065, // 10  000  0000  1100  101
+    DBGBCR13_EL1      = 0x806d, // 10  000  0000  1101  101
+    DBGBCR14_EL1      = 0x8075, // 10  000  0000  1110  101
+    DBGBCR15_EL1      = 0x807d, // 10  000  0000  1111  101
+    DBGWVR0_EL1       = 0x8006, // 10  000  0000  0000  110
+    DBGWVR1_EL1       = 0x800e, // 10  000  0000  0001  110
+    DBGWVR2_EL1       = 0x8016, // 10  000  0000  0010  110
+    DBGWVR3_EL1       = 0x801e, // 10  000  0000  0011  110
+    DBGWVR4_EL1       = 0x8026, // 10  000  0000  0100  110
+    DBGWVR5_EL1       = 0x802e, // 10  000  0000  0101  110
+    DBGWVR6_EL1       = 0x8036, // 10  000  0000  0110  110
+    DBGWVR7_EL1       = 0x803e, // 10  000  0000  0111  110
+    DBGWVR8_EL1       = 0x8046, // 10  000  0000  1000  110
+    DBGWVR9_EL1       = 0x804e, // 10  000  0000  1001  110
+    DBGWVR10_EL1      = 0x8056, // 10  000  0000  1010  110
+    DBGWVR11_EL1      = 0x805e, // 10  000  0000  1011  110
+    DBGWVR12_EL1      = 0x8066, // 10  000  0000  1100  110
+    DBGWVR13_EL1      = 0x806e, // 10  000  0000  1101  110
+    DBGWVR14_EL1      = 0x8076, // 10  000  0000  1110  110
+    DBGWVR15_EL1      = 0x807e, // 10  000  0000  1111  110
+    DBGWCR0_EL1       = 0x8007, // 10  000  0000  0000  111
+    DBGWCR1_EL1       = 0x800f, // 10  000  0000  0001  111
+    DBGWCR2_EL1       = 0x8017, // 10  000  0000  0010  111
+    DBGWCR3_EL1       = 0x801f, // 10  000  0000  0011  111
+    DBGWCR4_EL1       = 0x8027, // 10  000  0000  0100  111
+    DBGWCR5_EL1       = 0x802f, // 10  000  0000  0101  111
+    DBGWCR6_EL1       = 0x8037, // 10  000  0000  0110  111
+    DBGWCR7_EL1       = 0x803f, // 10  000  0000  0111  111
+    DBGWCR8_EL1       = 0x8047, // 10  000  0000  1000  111
+    DBGWCR9_EL1       = 0x804f, // 10  000  0000  1001  111
+    DBGWCR10_EL1      = 0x8057, // 10  000  0000  1010  111
+    DBGWCR11_EL1      = 0x805f, // 10  000  0000  1011  111
+    DBGWCR12_EL1      = 0x8067, // 10  000  0000  1100  111
+    DBGWCR13_EL1      = 0x806f, // 10  000  0000  1101  111
+    DBGWCR14_EL1      = 0x8077, // 10  000  0000  1110  111
+    DBGWCR15_EL1      = 0x807f, // 10  000  0000  1111  111
+    TEEHBR32_EL1      = 0x9080, // 10  010  0001  0000  000
+    OSDLR_EL1         = 0x809c, // 10  000  0001  0011  100
+    DBGPRCR_EL1       = 0x80a4, // 10  000  0001  0100  100
+    DBGCLAIMSET_EL1   = 0x83c6, // 10  000  0111  1000  110
+    DBGCLAIMCLR_EL1   = 0x83ce, // 10  000  0111  1001  110
+    CSSELR_EL1        = 0xd000, // 11  010  0000  0000  000
+    VPIDR_EL2         = 0xe000, // 11  100  0000  0000  000
+    VMPIDR_EL2        = 0xe005, // 11  100  0000  0000  101
+    CPACR_EL1         = 0xc082, // 11  000  0001  0000  010
+    SCTLR_EL1         = 0xc080, // 11  000  0001  0000  000
+    SCTLR_EL2         = 0xe080, // 11  100  0001  0000  000
+    SCTLR_EL3         = 0xf080, // 11  110  0001  0000  000
+    ACTLR_EL1         = 0xc081, // 11  000  0001  0000  001
+    ACTLR_EL2         = 0xe081, // 11  100  0001  0000  001
+    ACTLR_EL3         = 0xf081, // 11  110  0001  0000  001
+    HCR_EL2           = 0xe088, // 11  100  0001  0001  000
+    SCR_EL3           = 0xf088, // 11  110  0001  0001  000
+    MDCR_EL2          = 0xe089, // 11  100  0001  0001  001
+    SDER32_EL3        = 0xf089, // 11  110  0001  0001  001
+    CPTR_EL2          = 0xe08a, // 11  100  0001  0001  010
+    CPTR_EL3          = 0xf08a, // 11  110  0001  0001  010
+    HSTR_EL2          = 0xe08b, // 11  100  0001  0001  011
+    HACR_EL2          = 0xe08f, // 11  100  0001  0001  111
+    MDCR_EL3          = 0xf099, // 11  110  0001  0011  001
+    TTBR0_EL1         = 0xc100, // 11  000  0010  0000  000
+    TTBR0_EL2         = 0xe100, // 11  100  0010  0000  000
+    TTBR0_EL3         = 0xf100, // 11  110  0010  0000  000
+    TTBR1_EL1         = 0xc101, // 11  000  0010  0000  001
+    TCR_EL1           = 0xc102, // 11  000  0010  0000  010
+    TCR_EL2           = 0xe102, // 11  100  0010  0000  010
+    TCR_EL3           = 0xf102, // 11  110  0010  0000  010
+    VTTBR_EL2         = 0xe108, // 11  100  0010  0001  000
+    VTCR_EL2          = 0xe10a, // 11  100  0010  0001  010
+    DACR32_EL2        = 0xe180, // 11  100  0011  0000  000
+    SPSR_EL1          = 0xc200, // 11  000  0100  0000  000
+    SPSR_EL2          = 0xe200, // 11  100  0100  0000  000
+    SPSR_EL3          = 0xf200, // 11  110  0100  0000  000
+    ELR_EL1           = 0xc201, // 11  000  0100  0000  001
+    ELR_EL2           = 0xe201, // 11  100  0100  0000  001
+    ELR_EL3           = 0xf201, // 11  110  0100  0000  001
+    SP_EL0            = 0xc208, // 11  000  0100  0001  000
+    SP_EL1            = 0xe208, // 11  100  0100  0001  000
+    SP_EL2            = 0xf208, // 11  110  0100  0001  000
+    SPSel             = 0xc210, // 11  000  0100  0010  000
+    NZCV              = 0xda10, // 11  011  0100  0010  000
+    DAIF              = 0xda11, // 11  011  0100  0010  001
+    CurrentEL         = 0xc212, // 11  000  0100  0010  010
+    SPSR_irq          = 0xe218, // 11  100  0100  0011  000
+    SPSR_abt          = 0xe219, // 11  100  0100  0011  001
+    SPSR_und          = 0xe21a, // 11  100  0100  0011  010
+    SPSR_fiq          = 0xe21b, // 11  100  0100  0011  011
+    FPCR              = 0xda20, // 11  011  0100  0100  000
+    FPSR              = 0xda21, // 11  011  0100  0100  001
+    DSPSR_EL0         = 0xda28, // 11  011  0100  0101  000
+    DLR_EL0           = 0xda29, // 11  011  0100  0101  001
+    IFSR32_EL2        = 0xe281, // 11  100  0101  0000  001
+    AFSR0_EL1         = 0xc288, // 11  000  0101  0001  000
+    AFSR0_EL2         = 0xe288, // 11  100  0101  0001  000
+    AFSR0_EL3         = 0xf288, // 11  110  0101  0001  000
+    AFSR1_EL1         = 0xc289, // 11  000  0101  0001  001
+    AFSR1_EL2         = 0xe289, // 11  100  0101  0001  001
+    AFSR1_EL3         = 0xf289, // 11  110  0101  0001  001
+    ESR_EL1           = 0xc290, // 11  000  0101  0010  000
+    ESR_EL2           = 0xe290, // 11  100  0101  0010  000
+    ESR_EL3           = 0xf290, // 11  110  0101  0010  000
+    FPEXC32_EL2       = 0xe298, // 11  100  0101  0011  000
+    FAR_EL1           = 0xc300, // 11  000  0110  0000  000
+    FAR_EL2           = 0xe300, // 11  100  0110  0000  000
+    FAR_EL3           = 0xf300, // 11  110  0110  0000  000
+    HPFAR_EL2         = 0xe304, // 11  100  0110  0000  100
+    PAR_EL1           = 0xc3a0, // 11  000  0111  0100  000
+    PMCR_EL0          = 0xdce0, // 11  011  1001  1100  000
+    PMCNTENSET_EL0    = 0xdce1, // 11  011  1001  1100  001
+    PMCNTENCLR_EL0    = 0xdce2, // 11  011  1001  1100  010
+    PMOVSCLR_EL0      = 0xdce3, // 11  011  1001  1100  011
+    PMSELR_EL0        = 0xdce5, // 11  011  1001  1100  101
+    PMCCNTR_EL0       = 0xdce8, // 11  011  1001  1101  000
+    PMXEVTYPER_EL0    = 0xdce9, // 11  011  1001  1101  001
+    PMXEVCNTR_EL0     = 0xdcea, // 11  011  1001  1101  010
+    PMUSERENR_EL0     = 0xdcf0, // 11  011  1001  1110  000
+    PMINTENSET_EL1    = 0xc4f1, // 11  000  1001  1110  001
+    PMINTENCLR_EL1    = 0xc4f2, // 11  000  1001  1110  010
+    PMOVSSET_EL0      = 0xdcf3, // 11  011  1001  1110  011
+    MAIR_EL1          = 0xc510, // 11  000  1010  0010  000
+    MAIR_EL2          = 0xe510, // 11  100  1010  0010  000
+    MAIR_EL3          = 0xf510, // 11  110  1010  0010  000
+    AMAIR_EL1         = 0xc518, // 11  000  1010  0011  000
+    AMAIR_EL2         = 0xe518, // 11  100  1010  0011  000
+    AMAIR_EL3         = 0xf518, // 11  110  1010  0011  000
+    VBAR_EL1          = 0xc600, // 11  000  1100  0000  000
+    VBAR_EL2          = 0xe600, // 11  100  1100  0000  000
+    VBAR_EL3          = 0xf600, // 11  110  1100  0000  000
+    RMR_EL1           = 0xc602, // 11  000  1100  0000  010
+    RMR_EL2           = 0xe602, // 11  100  1100  0000  010
+    RMR_EL3           = 0xf602, // 11  110  1100  0000  010
+    CONTEXTIDR_EL1    = 0xc681, // 11  000  1101  0000  001
+    TPIDR_EL0         = 0xde82, // 11  011  1101  0000  010
+    TPIDR_EL2         = 0xe682, // 11  100  1101  0000  010
+    TPIDR_EL3         = 0xf682, // 11  110  1101  0000  010
+    TPIDRRO_EL0       = 0xde83, // 11  011  1101  0000  011
+    TPIDR_EL1         = 0xc684, // 11  000  1101  0000  100
+    CNTFRQ_EL0        = 0xdf00, // 11  011  1110  0000  000
+    CNTVOFF_EL2       = 0xe703, // 11  100  1110  0000  011
+    CNTKCTL_EL1       = 0xc708, // 11  000  1110  0001  000
+    CNTHCTL_EL2       = 0xe708, // 11  100  1110  0001  000
+    CNTP_TVAL_EL0     = 0xdf10, // 11  011  1110  0010  000
+    CNTHP_TVAL_EL2    = 0xe710, // 11  100  1110  0010  000
+    CNTPS_TVAL_EL1    = 0xff10, // 11  111  1110  0010  000
+    CNTP_CTL_EL0      = 0xdf11, // 11  011  1110  0010  001
+    CNTHP_CTL_EL2     = 0xe711, // 11  100  1110  0010  001
+    CNTPS_CTL_EL1     = 0xff11, // 11  111  1110  0010  001
+    CNTP_CVAL_EL0     = 0xdf12, // 11  011  1110  0010  010
+    CNTHP_CVAL_EL2    = 0xe712, // 11  100  1110  0010  010
+    CNTPS_CVAL_EL1    = 0xff12, // 11  111  1110  0010  010
+    CNTV_TVAL_EL0     = 0xdf18, // 11  011  1110  0011  000
+    CNTV_CTL_EL0      = 0xdf19, // 11  011  1110  0011  001
+    CNTV_CVAL_EL0     = 0xdf1a, // 11  011  1110  0011  010
+    PMEVCNTR0_EL0     = 0xdf40, // 11  011  1110  1000  000
+    PMEVCNTR1_EL0     = 0xdf41, // 11  011  1110  1000  001
+    PMEVCNTR2_EL0     = 0xdf42, // 11  011  1110  1000  010
+    PMEVCNTR3_EL0     = 0xdf43, // 11  011  1110  1000  011
+    PMEVCNTR4_EL0     = 0xdf44, // 11  011  1110  1000  100
+    PMEVCNTR5_EL0     = 0xdf45, // 11  011  1110  1000  101
+    PMEVCNTR6_EL0     = 0xdf46, // 11  011  1110  1000  110
+    PMEVCNTR7_EL0     = 0xdf47, // 11  011  1110  1000  111
+    PMEVCNTR8_EL0     = 0xdf48, // 11  011  1110  1001  000
+    PMEVCNTR9_EL0     = 0xdf49, // 11  011  1110  1001  001
+    PMEVCNTR10_EL0    = 0xdf4a, // 11  011  1110  1001  010
+    PMEVCNTR11_EL0    = 0xdf4b, // 11  011  1110  1001  011
+    PMEVCNTR12_EL0    = 0xdf4c, // 11  011  1110  1001  100
+    PMEVCNTR13_EL0    = 0xdf4d, // 11  011  1110  1001  101
+    PMEVCNTR14_EL0    = 0xdf4e, // 11  011  1110  1001  110
+    PMEVCNTR15_EL0    = 0xdf4f, // 11  011  1110  1001  111
+    PMEVCNTR16_EL0    = 0xdf50, // 11  011  1110  1010  000
+    PMEVCNTR17_EL0    = 0xdf51, // 11  011  1110  1010  001
+    PMEVCNTR18_EL0    = 0xdf52, // 11  011  1110  1010  010
+    PMEVCNTR19_EL0    = 0xdf53, // 11  011  1110  1010  011
+    PMEVCNTR20_EL0    = 0xdf54, // 11  011  1110  1010  100
+    PMEVCNTR21_EL0    = 0xdf55, // 11  011  1110  1010  101
+    PMEVCNTR22_EL0    = 0xdf56, // 11  011  1110  1010  110
+    PMEVCNTR23_EL0    = 0xdf57, // 11  011  1110  1010  111
+    PMEVCNTR24_EL0    = 0xdf58, // 11  011  1110  1011  000
+    PMEVCNTR25_EL0    = 0xdf59, // 11  011  1110  1011  001
+    PMEVCNTR26_EL0    = 0xdf5a, // 11  011  1110  1011  010
+    PMEVCNTR27_EL0    = 0xdf5b, // 11  011  1110  1011  011
+    PMEVCNTR28_EL0    = 0xdf5c, // 11  011  1110  1011  100
+    PMEVCNTR29_EL0    = 0xdf5d, // 11  011  1110  1011  101
+    PMEVCNTR30_EL0    = 0xdf5e, // 11  011  1110  1011  110
+    PMCCFILTR_EL0     = 0xdf7f, // 11  011  1110  1111  111
+    PMEVTYPER0_EL0    = 0xdf60, // 11  011  1110  1100  000
+    PMEVTYPER1_EL0    = 0xdf61, // 11  011  1110  1100  001
+    PMEVTYPER2_EL0    = 0xdf62, // 11  011  1110  1100  010
+    PMEVTYPER3_EL0    = 0xdf63, // 11  011  1110  1100  011
+    PMEVTYPER4_EL0    = 0xdf64, // 11  011  1110  1100  100
+    PMEVTYPER5_EL0    = 0xdf65, // 11  011  1110  1100  101
+    PMEVTYPER6_EL0    = 0xdf66, // 11  011  1110  1100  110
+    PMEVTYPER7_EL0    = 0xdf67, // 11  011  1110  1100  111
+    PMEVTYPER8_EL0    = 0xdf68, // 11  011  1110  1101  000
+    PMEVTYPER9_EL0    = 0xdf69, // 11  011  1110  1101  001
+    PMEVTYPER10_EL0   = 0xdf6a, // 11  011  1110  1101  010
+    PMEVTYPER11_EL0   = 0xdf6b, // 11  011  1110  1101  011
+    PMEVTYPER12_EL0   = 0xdf6c, // 11  011  1110  1101  100
+    PMEVTYPER13_EL0   = 0xdf6d, // 11  011  1110  1101  101
+    PMEVTYPER14_EL0   = 0xdf6e, // 11  011  1110  1101  110
+    PMEVTYPER15_EL0   = 0xdf6f, // 11  011  1110  1101  111
+    PMEVTYPER16_EL0   = 0xdf70, // 11  011  1110  1110  000
+    PMEVTYPER17_EL0   = 0xdf71, // 11  011  1110  1110  001
+    PMEVTYPER18_EL0   = 0xdf72, // 11  011  1110  1110  010
+    PMEVTYPER19_EL0   = 0xdf73, // 11  011  1110  1110  011
+    PMEVTYPER20_EL0   = 0xdf74, // 11  011  1110  1110  100
+    PMEVTYPER21_EL0   = 0xdf75, // 11  011  1110  1110  101
+    PMEVTYPER22_EL0   = 0xdf76, // 11  011  1110  1110  110
+    PMEVTYPER23_EL0   = 0xdf77, // 11  011  1110  1110  111
+    PMEVTYPER24_EL0   = 0xdf78, // 11  011  1110  1111  000
+    PMEVTYPER25_EL0   = 0xdf79, // 11  011  1110  1111  001
+    PMEVTYPER26_EL0   = 0xdf7a, // 11  011  1110  1111  010
+    PMEVTYPER27_EL0   = 0xdf7b, // 11  011  1110  1111  011
+    PMEVTYPER28_EL0   = 0xdf7c, // 11  011  1110  1111  100
+    PMEVTYPER29_EL0   = 0xdf7d, // 11  011  1110  1111  101
+    PMEVTYPER30_EL0   = 0xdf7e  // 11  011  1110  1111  110
+  };
+
+  // Note that these do not inherit from NamedImmMapper. This class is
+  // sufficiently different in its behaviour that I don't believe it's worth
+  // burdening the common NamedImmMapper with abstractions only needed in
+  // this one case.
+  struct SysRegMapper {
+    static const NamedImmMapper::Mapping SysRegPairs[];
+
+    const NamedImmMapper::Mapping *InstPairs;
+    size_t NumInstPairs;
+
+    SysRegMapper() {}
+    uint32_t fromString(StringRef Name, bool &Valid) const;
+    std::string toString(uint32_t Bits, bool &Valid) const;
+  };
+
+  struct MSRMapper : SysRegMapper {
+    static const NamedImmMapper::Mapping MSRPairs[];
+    MSRMapper();
+  };
+
+  struct MRSMapper : SysRegMapper {
+    static const NamedImmMapper::Mapping MRSPairs[];
+    MRSMapper();
+  };
+
+  uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
+}
+
+namespace A64TLBI {
+  enum TLBIValues {
+    Invalid = -1,          // Op0 Op1  CRn   CRm   Op2
+    IPAS2E1IS    = 0x6401, // 01  100  1000  0000  001
+    IPAS2LE1IS   = 0x6405, // 01  100  1000  0000  101
+    VMALLE1IS    = 0x4418, // 01  000  1000  0011  000
+    ALLE2IS      = 0x6418, // 01  100  1000  0011  000
+    ALLE3IS      = 0x7418, // 01  110  1000  0011  000
+    VAE1IS       = 0x4419, // 01  000  1000  0011  001
+    VAE2IS       = 0x6419, // 01  100  1000  0011  001
+    VAE3IS       = 0x7419, // 01  110  1000  0011  001
+    ASIDE1IS     = 0x441a, // 01  000  1000  0011  010
+    VAAE1IS      = 0x441b, // 01  000  1000  0011  011
+    ALLE1IS      = 0x641c, // 01  100  1000  0011  100
+    VALE1IS      = 0x441d, // 01  000  1000  0011  101
+    VALE2IS      = 0x641d, // 01  100  1000  0011  101
+    VALE3IS      = 0x741d, // 01  110  1000  0011  101
+    VMALLS12E1IS = 0x641e, // 01  100  1000  0011  110
+    VAALE1IS     = 0x441f, // 01  000  1000  0011  111
+    IPAS2E1      = 0x6421, // 01  100  1000  0100  001
+    IPAS2LE1     = 0x6425, // 01  100  1000  0100  101
+    VMALLE1      = 0x4438, // 01  000  1000  0111  000
+    ALLE2        = 0x6438, // 01  100  1000  0111  000
+    ALLE3        = 0x7438, // 01  110  1000  0111  000
+    VAE1         = 0x4439, // 01  000  1000  0111  001
+    VAE2         = 0x6439, // 01  100  1000  0111  001
+    VAE3         = 0x7439, // 01  110  1000  0111  001
+    ASIDE1       = 0x443a, // 01  000  1000  0111  010
+    VAAE1        = 0x443b, // 01  000  1000  0111  011
+    ALLE1        = 0x643c, // 01  100  1000  0111  100
+    VALE1        = 0x443d, // 01  000  1000  0111  101
+    VALE2        = 0x643d, // 01  100  1000  0111  101
+    VALE3        = 0x743d, // 01  110  1000  0111  101
+    VMALLS12E1   = 0x643e, // 01  100  1000  0111  110
+    VAALE1       = 0x443f  // 01  000  1000  0111  111
+  };
+
+  struct TLBIMapper : NamedImmMapper {
+    const static Mapping TLBIPairs[];
+
+    TLBIMapper();
+  };
+
+  static inline bool NeedsRegister(TLBIValues Val) {
+    switch (Val) {
+    case VMALLE1IS:
+    case ALLE2IS:
+    case ALLE3IS:
+    case ALLE1IS:
+    case VMALLS12E1IS:
+    case VMALLE1:
+    case ALLE2:
+    case ALLE3:
+    case ALLE1:
+    case VMALLS12E1:
+      return false;
+    default:
+      return true;
+    }
+  }
+}
+
+namespace AArch64II {
+
+  enum TOF {
+    //===--------------------------------------------------------------===//
+    // AArch64 Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    // MO_GOT - Represents a relocation referring to the GOT entry of a given
+    // symbol. Used in adrp.
+    MO_GOT,
+
+    // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the
+    // GOT entry of a given symbol. Used in ldr only.
+    MO_GOT_LO12,
+
+    // MO_DTPREL_* - Represents a relocation referring to the offset from a
+    // module's dynamic thread pointer. Used in the local-dynamic TLS access
+    // model.
+    MO_DTPREL_G1,
+    MO_DTPREL_G0_NC,
+
+    // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry
+    // providing the offset of a variable from the thread-pointer. Used in
+    // initial-exec TLS model where this offset is assigned in the static thread
+    // block and thus known by the dynamic linker.
+    MO_GOTTPREL,
+    MO_GOTTPREL_LO12,
+
+    // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing
+    // a TLS descriptor chosen by the dynamic linker. Used for the
+    // general-dynamic and local-dynamic TLS access models where very littls is
+    // known at link-time.
+    MO_TLSDESC,
+    MO_TLSDESC_LO12,
+
+    // MO_TPREL_* - Represents a relocation referring to the offset of a
+    // variable from the thread pointer itself. Used in the local-exec TLS
+    // access model.
+    MO_TPREL_G1,
+    MO_TPREL_G0_NC,
+
+    // MO_LO12 - On a symbol operand, this represents a relocation containing
+    // lower 12 bits of the address. Used in add/sub/ldr/str.
+    MO_LO12
+  };
+}
+
+class APFloat;
+
+namespace A64Imms {
+  bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits);
+
+  inline bool isFPImm(const APFloat &Val) {
+    uint32_t Imm8;
+    return isFPImm(Val, Imm8);
+  }
+
+  bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits);
+  bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm);
+
+  bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+  bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+  // We sometimes want to know whether the immediate is representable with a
+  // MOVN but *not* with a MOVZ (because that would take priority).
+  bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
new file mode 100644
index 0000000000..476b94e70f
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -0,0 +1,287 @@
+//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  AArch64ELFObjectWriter(uint8_t OSABI);
+
+  virtual ~AArch64ELFObjectWriter();
+
+protected:
+  virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                bool IsPCRel, bool IsRelocWithSymbol,
+                                int64_t Addend) const;
+private:
+};
+}
+
+AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI)
+  : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
+                            /*HasRelocationAddend*/ true)
+{}
+
+AArch64ELFObjectWriter::~AArch64ELFObjectWriter()
+{}
+
+unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
+                                              const MCFixup &Fixup,
+                                              bool IsPCRel,
+                                              bool IsRelocWithSymbol,
+                                              int64_t Addend) const {
+  unsigned Type;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default:
+      llvm_unreachable("Unimplemented fixup -> relocation");
+    case FK_Data_8:
+      return ELF::R_AARCH64_PREL64;
+    case FK_Data_4:
+      return ELF::R_AARCH64_PREL32;
+    case FK_Data_2:
+      return ELF::R_AARCH64_PREL16;
+    case AArch64::fixup_a64_ld_prel:
+      Type = ELF::R_AARCH64_LD_PREL_LO19;
+      break;
+    case AArch64::fixup_a64_adr_prel:
+      Type = ELF::R_AARCH64_ADR_PREL_LO21;
+      break;
+    case AArch64::fixup_a64_adr_prel_page:
+      Type = ELF::R_AARCH64_ADR_PREL_PG_HI21;
+      break;
+    case AArch64::fixup_a64_adr_prel_got_page:
+      Type = ELF::R_AARCH64_ADR_GOT_PAGE;
+      break;
+    case AArch64::fixup_a64_tstbr:
+      Type = ELF::R_AARCH64_TSTBR14;
+      break;
+    case AArch64::fixup_a64_condbr:
+      Type = ELF::R_AARCH64_CONDBR19;
+      break;
+    case AArch64::fixup_a64_uncondbr:
+      Type = ELF::R_AARCH64_JUMP26;
+      break;
+    case AArch64::fixup_a64_call:
+      Type = ELF::R_AARCH64_CALL26;
+      break;
+    case AArch64::fixup_a64_adr_gottprel_page:
+      Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
+      break;
+    case AArch64::fixup_a64_ld_gottprel_prel19:
+      Type =  ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
+      break;
+    case AArch64::fixup_a64_tlsdesc_adr_page:
+      Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+      break;
+    }
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+    default:
+      llvm_unreachable("Unimplemented fixup -> relocation");
+    case FK_Data_8:
+      return ELF::R_AARCH64_ABS64;
+    case FK_Data_4:
+      return ELF::R_AARCH64_ABS32;
+    case FK_Data_2:
+      return ELF::R_AARCH64_ABS16;
+    case AArch64::fixup_a64_add_lo12:
+      Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ld64_got_lo12_nc:
+      Type = ELF::R_AARCH64_LD64_GOT_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst8_lo12:
+      Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst16_lo12:
+      Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst32_lo12:
+      Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst64_lo12:
+      Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst128_lo12:
+      Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g0:
+      Type = ELF::R_AARCH64_MOVW_UABS_G0;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g0_nc:
+      Type = ELF::R_AARCH64_MOVW_UABS_G0_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g1:
+      Type = ELF::R_AARCH64_MOVW_UABS_G1;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g1_nc:
+      Type = ELF::R_AARCH64_MOVW_UABS_G1_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g2:
+      Type = ELF::R_AARCH64_MOVW_UABS_G2;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g2_nc:
+      Type = ELF::R_AARCH64_MOVW_UABS_G2_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g3:
+      Type = ELF::R_AARCH64_MOVW_UABS_G3;
+      break;
+    case AArch64::fixup_a64_movw_sabs_g0:
+      Type = ELF::R_AARCH64_MOVW_SABS_G0;
+      break;
+    case AArch64::fixup_a64_movw_sabs_g1:
+      Type = ELF::R_AARCH64_MOVW_SABS_G1;
+      break;
+    case AArch64::fixup_a64_movw_sabs_g2:
+      Type = ELF::R_AARCH64_MOVW_SABS_G2;
+      break;
+
+    // TLS Local-dynamic block
+    case AArch64::fixup_a64_movw_dtprel_g2:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g1:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g1_nc:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g0:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g0_nc:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
+      break;
+    case AArch64::fixup_a64_add_dtprel_hi12:
+      Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12;
+      break;
+    case AArch64::fixup_a64_add_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_add_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst8_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst16_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst32_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst64_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
+      break;
+
+    // TLS initial-exec block
+    case AArch64::fixup_a64_movw_gottprel_g1:
+      Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
+      break;
+    case AArch64::fixup_a64_movw_gottprel_g0_nc:
+      Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
+      break;
+    case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
+      break;
+
+    // TLS local-exec block
+    case AArch64::fixup_a64_movw_tprel_g2:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g1:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g1_nc:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g0:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g0_nc:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
+      break;
+    case AArch64::fixup_a64_add_tprel_hi12:
+      Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12;
+      break;
+    case AArch64::fixup_a64_add_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_add_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst8_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst16_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst32_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst64_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
+      break;
+
+    // TLS general-dynamic block
+    case AArch64::fixup_a64_tlsdesc_adr_page:
+      Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+      break;
+    case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+      Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
+      break;
+    case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+      Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
+      break;
+    case AArch64::fixup_a64_tlsdesc_call:
+      Type = ELF::R_AARCH64_TLSDESC_CALL;
+      break;
+    }
+  }
+
+  return Type;
+}
+
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
+                                                   uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI);
+  return createELFObjectWriter(MOTW, OS,  /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
new file mode 100644
index 0000000000..b83577af45
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -0,0 +1,160 @@
+//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits AArch64 ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit
+// regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// AArch64 ELF ABI:
+///    infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf
+///
+/// In brief: $x or $d should be emitted at the start of each contiguous region
+/// of A64 code or data in a section. In practice, this emission does not rely
+/// on explicit assembler directives but on inherent properties of the
+/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an
+/// instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class AArch64ELFStreamer : public MCELFStreamer {
+public:
+  AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                 raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCELFStreamer(Context, TAB, OS, Emitter),
+      MappingSymbolCounter(0), LastEMS(EMS_None) {
+  }
+
+  ~AArch64ELFStreamer() {}
+
+  virtual void ChangeSection(const MCSection *Section) {
+    // We have to keep track of the mapping symbol state of any sections we
+    // use. Each one should start off as EMS_None, which is provided as the
+    // default constructor by DenseMap::lookup.
+    LastMappingSymbols[getPreviousSection()] = LastEMS;
+    LastEMS = LastMappingSymbols.lookup(Section);
+
+    MCELFStreamer::ChangeSection(Section);
+  }
+
+  /// This function is the one used to emit instruction data into the ELF
+  /// streamer. We override it to add the appropriate mapping symbol if
+  /// necessary.
+  virtual void EmitInstruction(const MCInst& Inst) {
+    EmitA64MappingSymbol();
+    MCELFStreamer::EmitInstruction(Inst);
+  }
+
+  /// This is one of the functions used to emit data into an ELF section, so the
+  /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+  /// if necessary.
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+    EmitDataMappingSymbol();
+    MCELFStreamer::EmitBytes(Data, AddrSpace);
+  }
+
+  /// This is one of the functions used to emit data into an ELF section, so the
+  /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+  /// if necessary.
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             unsigned AddrSpace) {
+    EmitDataMappingSymbol();
+    MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+  }
+
+private:
+  enum ElfMappingSymbol {
+    EMS_None,
+    EMS_A64,
+    EMS_Data
+  };
+
+  void EmitDataMappingSymbol() {
+    if (LastEMS == EMS_Data) return;
+    EmitMappingSymbol("$d");
+    LastEMS = EMS_Data;
+  }
+
+  void EmitA64MappingSymbol() {
+    if (LastEMS == EMS_A64) return;
+    EmitMappingSymbol("$x");
+    LastEMS = EMS_A64;
+  }
+
+  void EmitMappingSymbol(StringRef Name) {
+    MCSymbol *Start = getContext().CreateTempSymbol();
+    EmitLabel(Start);
+
+    MCSymbol *Symbol =
+      getContext().GetOrCreateSymbol(Name + "." +
+                                     Twine(MappingSymbolCounter++));
+
+    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+    MCELF::SetType(SD, ELF::STT_NOTYPE);
+    MCELF::SetBinding(SD, ELF::STB_LOCAL);
+    SD.setExternal(false);
+    Symbol->setSection(*getCurrentSection());
+
+    const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+    Symbol->setVariableValue(Value);
+  }
+
+  int64_t MappingSymbolCounter;
+
+  DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+  ElfMappingSymbol LastEMS;
+
+  /// @}
+};
+}
+
+namespace llvm {
+  MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                      raw_ostream &OS, MCCodeEmitter *Emitter,
+                                      bool RelaxAll, bool NoExecStack) {
+    AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
+    if (RelaxAll)
+      S->getAssembler().setRelaxAll(true);
+    if (NoExecStack)
+      S->getAssembler().setNoExecStack(true);
+    return S;
+  }
+}
+
+
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
new file mode 100644
index 0000000000..5a89ca50ce
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -0,0 +1,27 @@
+//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF streamer information for the AArch64 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_ELF_STREAMER_H
+#define LLVM_AARCH64_ELF_STREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+
+  MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                          raw_ostream &OS,
+                                          MCCodeEmitter *Emitter,
+                                          bool RelaxAll, bool NoExecStack);
+}
+
+#endif // AArch64_ELF_STREAMER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
new file mode 100644
index 0000000000..15e088667c
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -0,0 +1,108 @@
+//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H
+#define LLVM_AARCH64_AARCH64FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+  namespace AArch64 {
+    enum Fixups {
+      fixup_a64_ld_prel = FirstTargetFixupKind,
+      fixup_a64_adr_prel,
+      fixup_a64_adr_prel_page,
+
+      fixup_a64_add_lo12,
+
+      fixup_a64_ldst8_lo12,
+      fixup_a64_ldst16_lo12,
+      fixup_a64_ldst32_lo12,
+      fixup_a64_ldst64_lo12,
+      fixup_a64_ldst128_lo12,
+
+      fixup_a64_tstbr,
+      fixup_a64_condbr,
+      fixup_a64_uncondbr,
+      fixup_a64_call,
+
+      fixup_a64_movw_uabs_g0,
+      fixup_a64_movw_uabs_g0_nc,
+      fixup_a64_movw_uabs_g1,
+      fixup_a64_movw_uabs_g1_nc,
+      fixup_a64_movw_uabs_g2,
+      fixup_a64_movw_uabs_g2_nc,
+      fixup_a64_movw_uabs_g3,
+
+      fixup_a64_movw_sabs_g0,
+      fixup_a64_movw_sabs_g1,
+      fixup_a64_movw_sabs_g2,
+
+      fixup_a64_adr_prel_got_page,
+      fixup_a64_ld64_got_lo12_nc,
+
+      // Produce offsets relative to the module's dynamic TLS area.
+      fixup_a64_movw_dtprel_g2,
+      fixup_a64_movw_dtprel_g1,
+      fixup_a64_movw_dtprel_g1_nc,
+      fixup_a64_movw_dtprel_g0,
+      fixup_a64_movw_dtprel_g0_nc,
+      fixup_a64_add_dtprel_hi12,
+      fixup_a64_add_dtprel_lo12,
+      fixup_a64_add_dtprel_lo12_nc,
+      fixup_a64_ldst8_dtprel_lo12,
+      fixup_a64_ldst8_dtprel_lo12_nc,
+      fixup_a64_ldst16_dtprel_lo12,
+      fixup_a64_ldst16_dtprel_lo12_nc,
+      fixup_a64_ldst32_dtprel_lo12,
+      fixup_a64_ldst32_dtprel_lo12_nc,
+      fixup_a64_ldst64_dtprel_lo12,
+      fixup_a64_ldst64_dtprel_lo12_nc,
+
+      // Produce the GOT entry containing a variable's address in TLS's
+      // initial-exec mode.
+      fixup_a64_movw_gottprel_g1,
+      fixup_a64_movw_gottprel_g0_nc,
+      fixup_a64_adr_gottprel_page,
+      fixup_a64_ld64_gottprel_lo12_nc,
+      fixup_a64_ld_gottprel_prel19,
+
+      // Produce offsets relative to the thread pointer: TPIDR_EL0.
+      fixup_a64_movw_tprel_g2,
+      fixup_a64_movw_tprel_g1,
+      fixup_a64_movw_tprel_g1_nc,
+      fixup_a64_movw_tprel_g0,
+      fixup_a64_movw_tprel_g0_nc,
+      fixup_a64_add_tprel_hi12,
+      fixup_a64_add_tprel_lo12,
+      fixup_a64_add_tprel_lo12_nc,
+      fixup_a64_ldst8_tprel_lo12,
+      fixup_a64_ldst8_tprel_lo12_nc,
+      fixup_a64_ldst16_tprel_lo12,
+      fixup_a64_ldst16_tprel_lo12_nc,
+      fixup_a64_ldst32_tprel_lo12,
+      fixup_a64_ldst32_tprel_lo12_nc,
+      fixup_a64_ldst64_tprel_lo12,
+      fixup_a64_ldst64_tprel_lo12_nc,
+
+      // Produce the special fixups used by the general-dynamic TLS model.
+      fixup_a64_tlsdesc_adr_page,
+      fixup_a64_tlsdesc_ld64_lo12_nc,
+      fixup_a64_tlsdesc_add_lo12_nc,
+      fixup_a64_tlsdesc_call,
+
+
+      // Marker
+      LastTargetFixupKind,
+      NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+    };
+  }
+}
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
new file mode 100644
index 0000000000..8ec8cbf1c5
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -0,0 +1,41 @@
+//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AArch64MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCAsmInfo.h"
+
+using namespace llvm;
+
+AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() {
+  PointerSize = 8;
+
+  // ".comm align is in bytes but .align is pow-2."
+  AlignmentIsInBytes = false;
+
+  CommentString = "//";
+  PrivateGlobalPrefix = ".L";
+  Code32Directive = ".code\t32";
+
+  Data16bitsDirective = "\t.hword\t";
+  Data32bitsDirective = "\t.word\t";
+  Data64bitsDirective = "\t.xword\t";
+
+  UseDataRegionDirectives = true;
+
+  WeakRefDirective = "\t.weak\t";
+
+  HasLEB128 = true;
+  SupportsDebugInformation = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
new file mode 100644
index 0000000000..a20bc471c2
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -0,0 +1,27 @@
+//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AArch64MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64TARGETASMINFO_H
+#define LLVM_AARCH64TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+
+  struct AArch64ELFMCAsmInfo : public MCAsmInfo {
+    explicit AArch64ELFMCAsmInfo();
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
new file mode 100644
index 0000000000..f2bbd85209
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -0,0 +1,517 @@
+//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64MCCodeEmitter : public MCCodeEmitter {
+  AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
+  const MCInstrInfo &MCII;
+  const MCSubtargetInfo &STI;
+  MCContext &Ctx;
+
+public:
+  AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                       MCContext &ctx)
+    : MCII(mcii), STI(sti), Ctx(ctx) {
+  }
+
+  ~AArch64MCCodeEmitter() {}
+
+  unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  template<int MemSize>
+  unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups) const {
+    return getOffsetUImm12OpValue(MI, OpIdx, Fixups, MemSize);
+  }
+
+  unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups,
+                                    int MemSize) const;
+
+  unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  // Labels are handled mostly the same way: a symbol is needed, and
+  // just gets some fixup attached.
+  template<AArch64::Fixups fixupDesired>
+  unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned  getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  unsigned getAddressWithFixup(const MCOperand &MO,
+                               unsigned FixupKind,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  void EmitByte(unsigned char C, raw_ostream &OS) const {
+    OS << (char)C;
+  }
+
+  void EmitInstruction(uint32_t Val, raw_ostream &OS) const {
+    // Output the constant in little endian byte order.
+    for (unsigned i = 0; i != 4; ++i) {
+      EmitByte(Val & 0xff, OS);
+      Val >>= 8;
+    }
+  }
+
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned fixFCMPImm(const MCInst &MI, unsigned EncodedValue) const;
+
+  template<int hasRs, int hasRt2> unsigned
+  fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const;
+
+  unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue) const;
+
+  unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue) const;
+
+
+};
+
+} // end anonymous namespace
+
+unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO,
+                                       unsigned FixupKind,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  if (!MO.isExpr()) {
+    // This can occur for manually decoded or constructed MCInsts, but neither
+    // the assembly-parser nor instruction selection will currently produce an
+    // MCInst that's not a symbol reference.
+    assert(MO.isImm() && "Unexpected address requested");
+    return MO.getImm();
+  }
+
+  const MCExpr *Expr = MO.getExpr();
+  MCFixupKind Kind = MCFixupKind(FixupKind);
+  Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+  return 0;
+}
+
+unsigned AArch64MCCodeEmitter::
+getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                       SmallVectorImpl<MCFixup> &Fixups,
+                       int MemSize) const {
+  const MCOperand &ImmOp = MI.getOperand(OpIdx);
+  if (ImmOp.isImm())
+    return ImmOp.getImm();
+
+  assert(ImmOp.isExpr() && "Unexpected operand type");
+  const AArch64MCExpr *Expr = cast<AArch64MCExpr>(ImmOp.getExpr());
+  unsigned FixupKind;
+
+
+  switch (Expr->getKind()) {
+  default: llvm_unreachable("Unexpected operand modifier");
+  case AArch64MCExpr::VK_AARCH64_LO12: {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12,
+                                AArch64::fixup_a64_ldst16_lo12,
+                                AArch64::fixup_a64_ldst32_lo12,
+                                AArch64::fixup_a64_ldst64_lo12,
+                                AArch64::fixup_a64_ldst128_lo12 };
+    assert(MemSize <= 16 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_GOT_LO12:
+    assert(MemSize == 8 && "Invalid fixup for operation");
+    FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc;
+    break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12:  {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12,
+                                AArch64::fixup_a64_ldst16_dtprel_lo12,
+                                AArch64::fixup_a64_ldst32_dtprel_lo12,
+                                AArch64::fixup_a64_ldst64_dtprel_lo12 };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc,
+                                AArch64::fixup_a64_ldst16_dtprel_lo12_nc,
+                                AArch64::fixup_a64_ldst32_dtprel_lo12_nc,
+                                AArch64::fixup_a64_ldst64_dtprel_lo12_nc };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12:
+    assert(MemSize == 8 && "Invalid fixup for operation");
+    FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc;
+    break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12,
+                                AArch64::fixup_a64_ldst16_tprel_lo12,
+                                AArch64::fixup_a64_ldst32_tprel_lo12,
+                                AArch64::fixup_a64_ldst64_tprel_lo12 };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc,
+                                AArch64::fixup_a64_ldst16_tprel_lo12_nc,
+                                AArch64::fixup_a64_ldst32_tprel_lo12_nc,
+                                AArch64::fixup_a64_ldst64_tprel_lo12_nc };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+    assert(MemSize == 8 && "Invalid fixup for operation");
+    FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc;
+    break;
+  }
+
+  return getAddressWithFixup(ImmOp, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+
+  assert(MO.isExpr());
+
+  unsigned FixupKind = 0;
+  switch(cast<AArch64MCExpr>(MO.getExpr())->getKind()) {
+  default: llvm_unreachable("Invalid expression modifier");
+  case AArch64MCExpr::VK_AARCH64_LO12:
+    FixupKind = AArch64::fixup_a64_add_lo12; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_HI12:
+    FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12:
+    FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC:
+    FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_HI12:
+    FixupKind = AArch64::fixup_a64_add_tprel_hi12; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12:
+    FixupKind = AArch64::fixup_a64_add_tprel_lo12; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC:
+    FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+    FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break;
+  }
+
+  return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+
+  assert(MO.isExpr());
+
+  unsigned Modifier = AArch64MCExpr::VK_AARCH64_None;
+  if (const AArch64MCExpr *Expr = dyn_cast<AArch64MCExpr>(MO.getExpr()))
+    Modifier = Expr->getKind();
+
+  unsigned FixupKind = 0;
+  switch(Modifier) {
+  case AArch64MCExpr::VK_AARCH64_None:
+    FixupKind = AArch64::fixup_a64_adr_prel_page;
+    break;
+  case AArch64MCExpr::VK_AARCH64_GOT:
+    FixupKind = AArch64::fixup_a64_adr_prel_got_page;
+    break;
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL:
+    FixupKind = AArch64::fixup_a64_adr_gottprel_page;
+    break;
+  case AArch64MCExpr::VK_AARCH64_TLSDESC:
+    FixupKind = AArch64::fixup_a64_tlsdesc_adr_page;
+    break;
+  default:
+    llvm_unreachable("Unknown symbol reference kind for ADRP instruction");
+  }
+
+  return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Only immediate expected for shift");
+
+  return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6;
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Only immediate expected for shift");
+
+  return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6;
+}
+
+
+template<AArch64::Fixups fixupDesired> unsigned
+AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,
+                                      unsigned OpIdx,
+                                      SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+
+  if (MO.isExpr())
+    return getAddressWithFixup(MO, fixupDesired, Fixups);
+
+  assert(MO.isImm());
+  return MO.getImm();
+}
+
+unsigned
+AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI,
+                                       unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+
+  if (MO.isImm())
+    return MO.getImm();
+
+  assert(MO.isExpr());
+
+  unsigned FixupKind;
+  if (isa<AArch64MCExpr>(MO.getExpr())) {
+    assert(dyn_cast<AArch64MCExpr>(MO.getExpr())->getKind()
+           == AArch64MCExpr::VK_AARCH64_GOTTPREL
+           && "Invalid symbol modifier for literal load");
+    FixupKind = AArch64::fixup_a64_ld_gottprel_prel19;
+  } else {
+    FixupKind = AArch64::fixup_a64_ld_prel;
+  }
+
+  return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+
+unsigned
+AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                       const MCOperand &MO,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    return Ctx.getRegisterInfo().getEncodingValue(MO.getReg());
+  } else if (MO.isImm()) {
+    return static_cast<unsigned>(MO.getImm());
+  }
+
+  llvm_unreachable("Unable to encode MCOperand!");
+  return 0;
+}
+
+unsigned
+AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &UImm16MO = MI.getOperand(OpIdx);
+  const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1);
+
+  unsigned Result = static_cast<unsigned>(ShiftMO.getImm()) << 16;
+
+  if (UImm16MO.isImm()) {
+    Result |= UImm16MO.getImm();
+    return Result;
+  }
+
+  const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+  AArch64::Fixups requestedFixup;
+  switch (A64E->getKind()) {
+  default: llvm_unreachable("unexpected expression modifier");
+  case AArch64MCExpr::VK_AARCH64_ABS_G0:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G1:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G2:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G3:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break;
+  case AArch64MCExpr::VK_AARCH64_SABS_G0:
+    requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break;
+  case AArch64MCExpr::VK_AARCH64_SABS_G1:
+    requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break;
+  case AArch64MCExpr::VK_AARCH64_SABS_G2:
+    requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break;
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+    requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break;
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break;
+  }
+
+  return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups);
+}
+
+unsigned AArch64MCCodeEmitter::fixFCMPImm(const MCInst &MI,
+                                          unsigned EncodedValue) const {
+    // For FCMP[E] Rn, #0.0, the Rm field has a canonical representation
+    // with 0s, but is architecturally ignored
+    EncodedValue &= ~0x1f0000u;
+
+    return EncodedValue;
+}
+
+template<int hasRs, int hasRt2> unsigned
+AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
+                                            unsigned EncodedValue) const {
+  if (!hasRs) EncodedValue |= 0x001F0000;
+  if (!hasRt2) EncodedValue |= 0x00007C00;
+
+  return EncodedValue;
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue) const {
+  // If one of the signed fixup kinds is applied to a MOVZ instruction, the
+  // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
+  // job to ensure that any bits possibly affected by this are 0. This means we
+  // must zero out bit 30 (essentially emitting a MOVN).
+  MCOperand UImm16MO = MI.getOperand(1);
+
+  // Nothing to do if there's no fixup.
+  if (UImm16MO.isImm())
+    return EncodedValue;
+
+  const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+  switch (A64E->getKind()) {
+  case AArch64MCExpr::VK_AARCH64_SABS_G0:
+  case AArch64MCExpr::VK_AARCH64_SABS_G1:
+  case AArch64MCExpr::VK_AARCH64_SABS_G2:
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+  case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+  case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+  case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+    return EncodedValue & ~(1u << 30);
+  default:
+    // Nothing to do for an unsigned fixup.
+    return EncodedValue;
+  }
+
+  llvm_unreachable("Should have returned by now");
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI,
+                                 unsigned EncodedValue) const {
+  // The Ra field of SMULH and UMULH is unused: it should be assembled as 31
+  // (i.e. all bits 1) but is ignored by the processor.
+  EncodedValue |= 0x1f << 10;
+  return EncodedValue;
+}
+
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+                                                const MCRegisterInfo &MRI,
+                                                const MCSubtargetInfo &STI,
+                                                MCContext &Ctx) {
+  return new AArch64MCCodeEmitter(MCII, STI, Ctx);
+}
+
+void AArch64MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MI.getOpcode() == AArch64::TLSDESCCALL) {
+    // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
+    // following (BLR) instruction. It doesn't emit any code itself so it
+    // doesn't go through the normal TableGenerated channels.
+    MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call);
+    const MCExpr *Expr;
+    Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx);
+    Fixups.push_back(MCFixup::Create(0, Expr, Fixup));
+    return;
+  }
+
+  uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+
+  EmitInstruction(Binary, OS);
+}
+
+
+#include "AArch64GenMCCodeEmitter.inc"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
new file mode 100644
index 0000000000..e86e04aa33
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -0,0 +1,173 @@
+//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64mcexpr"
+#include "AArch64MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/Object/ELF.h"
+
+using namespace llvm;
+
+const AArch64MCExpr*
+AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+                      MCContext &Ctx) {
+  return new (Ctx) AArch64MCExpr(Kind, Expr);
+}
+
+void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
+  switch (Kind) {
+  default: llvm_unreachable("Invalid kind!");
+  case VK_AARCH64_GOT:              OS << ":got:"; break;
+  case VK_AARCH64_GOT_LO12:         OS << ":got_lo12:"; break;
+  case VK_AARCH64_LO12:             OS << ":lo12:"; break;
+  case VK_AARCH64_ABS_G0:           OS << ":abs_g0:"; break;
+  case VK_AARCH64_ABS_G0_NC:        OS << ":abs_g0_nc:"; break;
+  case VK_AARCH64_ABS_G1:           OS << ":abs_g1:"; break;
+  case VK_AARCH64_ABS_G1_NC:        OS << ":abs_g1_nc:"; break;
+  case VK_AARCH64_ABS_G2:           OS << ":abs_g2:"; break;
+  case VK_AARCH64_ABS_G2_NC:        OS << ":abs_g2_nc:"; break;
+  case VK_AARCH64_ABS_G3:           OS << ":abs_g3:"; break;
+  case VK_AARCH64_SABS_G0:          OS << ":abs_g0_s:"; break;
+  case VK_AARCH64_SABS_G1:          OS << ":abs_g1_s:"; break;
+  case VK_AARCH64_SABS_G2:          OS << ":abs_g2_s:"; break;
+  case VK_AARCH64_DTPREL_G2:        OS << ":dtprel_g2:"; break;
+  case VK_AARCH64_DTPREL_G1:        OS << ":dtprel_g1:"; break;
+  case VK_AARCH64_DTPREL_G1_NC:     OS << ":dtprel_g1_nc:"; break;
+  case VK_AARCH64_DTPREL_G0:        OS << ":dtprel_g0:"; break;
+  case VK_AARCH64_DTPREL_G0_NC:     OS << ":dtprel_g0_nc:"; break;
+  case VK_AARCH64_DTPREL_HI12:      OS << ":dtprel_hi12:"; break;
+  case VK_AARCH64_DTPREL_LO12:      OS << ":dtprel_lo12:"; break;
+  case VK_AARCH64_DTPREL_LO12_NC:   OS << ":dtprel_lo12_nc:"; break;
+  case VK_AARCH64_GOTTPREL_G1:      OS << ":gottprel_g1:"; break;
+  case VK_AARCH64_GOTTPREL_G0_NC:   OS << ":gottprel_g0_nc:"; break;
+  case VK_AARCH64_GOTTPREL:         OS << ":gottprel:"; break;
+  case VK_AARCH64_GOTTPREL_LO12:    OS << ":gottprel_lo12:"; break;
+  case VK_AARCH64_TPREL_G2:         OS << ":tprel_g2:"; break;
+  case VK_AARCH64_TPREL_G1:         OS << ":tprel_g1:"; break;
+  case VK_AARCH64_TPREL_G1_NC:      OS << ":tprel_g1_nc:"; break;
+  case VK_AARCH64_TPREL_G0:         OS << ":tprel_g0:"; break;
+  case VK_AARCH64_TPREL_G0_NC:      OS << ":tprel_g0_nc:"; break;
+  case VK_AARCH64_TPREL_HI12:       OS << ":tprel_hi12:"; break;
+  case VK_AARCH64_TPREL_LO12:       OS << ":tprel_lo12:"; break;
+  case VK_AARCH64_TPREL_LO12_NC:    OS << ":tprel_lo12_nc:"; break;
+  case VK_AARCH64_TLSDESC:          OS << ":tlsdesc:"; break;
+  case VK_AARCH64_TLSDESC_LO12:     OS << ":tlsdesc_lo12:"; break;
+
+  }
+
+  const MCExpr *Expr = getSubExpr();
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << '(';
+  Expr->print(OS);
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << ')';
+}
+
+bool
+AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                         const MCAsmLayout *Layout) const {
+  return getSubExpr()->EvaluateAsRelocatable(Res, *Layout);
+}
+
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+  switch (Expr->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle nested target expression");
+    break;
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+    fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+    fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef: {
+    // We're known to be under a TLS fixup, so any symbol should be
+    // modified. There should be only one.
+    const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+    MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
+    MCELF::SetType(SD, ELF::STT_TLS);
+    break;
+  }
+
+  case MCExpr::Unary:
+    fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+  switch (getKind()) {
+  default:
+    return;
+  case VK_AARCH64_DTPREL_G2:
+  case VK_AARCH64_DTPREL_G1:
+  case VK_AARCH64_DTPREL_G1_NC:
+  case VK_AARCH64_DTPREL_G0:
+  case VK_AARCH64_DTPREL_G0_NC:
+  case VK_AARCH64_DTPREL_HI12:
+  case VK_AARCH64_DTPREL_LO12:
+  case VK_AARCH64_DTPREL_LO12_NC:
+  case VK_AARCH64_GOTTPREL_G1:
+  case VK_AARCH64_GOTTPREL_G0_NC:
+  case VK_AARCH64_GOTTPREL:
+  case VK_AARCH64_GOTTPREL_LO12:
+  case VK_AARCH64_TPREL_G2:
+  case VK_AARCH64_TPREL_G1:
+  case VK_AARCH64_TPREL_G1_NC:
+  case VK_AARCH64_TPREL_G0:
+  case VK_AARCH64_TPREL_G0_NC:
+  case VK_AARCH64_TPREL_HI12:
+  case VK_AARCH64_TPREL_LO12:
+  case VK_AARCH64_TPREL_LO12_NC:
+  case VK_AARCH64_TLSDESC:
+  case VK_AARCH64_TLSDESC_LO12:
+    break;
+  }
+
+  fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+// FIXME: really do above: now that two backends are using it.
+static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
+  switch (Value->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle nested target expr!");
+    break;
+
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+    AddValueSymbolsImpl(BE->getLHS(), Asm);
+    AddValueSymbolsImpl(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef:
+    Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+    break;
+
+  case MCExpr::Unary:
+    AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
+  AddValueSymbolsImpl(getSubExpr(), Asm);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
new file mode 100644
index 0000000000..20adc0c6b4
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -0,0 +1,161 @@
+//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCEXPR_H
+#define LLVM_AARCH64MCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class AArch64MCExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    VK_AARCH64_None,
+    VK_AARCH64_GOT,      // :got: modifier in assembly
+    VK_AARCH64_GOT_LO12, // :got_lo12:
+    VK_AARCH64_LO12,     // :lo12:
+
+    VK_AARCH64_ABS_G0, // :abs_g0:
+    VK_AARCH64_ABS_G0_NC, // :abs_g0_nc:
+    VK_AARCH64_ABS_G1,
+    VK_AARCH64_ABS_G1_NC,
+    VK_AARCH64_ABS_G2,
+    VK_AARCH64_ABS_G2_NC,
+    VK_AARCH64_ABS_G3,
+
+    VK_AARCH64_SABS_G0, // :abs_g0_s:
+    VK_AARCH64_SABS_G1,
+    VK_AARCH64_SABS_G2,
+
+    VK_AARCH64_DTPREL_G2, // :dtprel_g2:
+    VK_AARCH64_DTPREL_G1,
+    VK_AARCH64_DTPREL_G1_NC,
+    VK_AARCH64_DTPREL_G0,
+    VK_AARCH64_DTPREL_G0_NC,
+    VK_AARCH64_DTPREL_HI12,
+    VK_AARCH64_DTPREL_LO12,
+    VK_AARCH64_DTPREL_LO12_NC,
+
+    VK_AARCH64_GOTTPREL_G1, // :gottprel:
+    VK_AARCH64_GOTTPREL_G0_NC,
+    VK_AARCH64_GOTTPREL,
+    VK_AARCH64_GOTTPREL_LO12,
+
+    VK_AARCH64_TPREL_G2, // :tprel:
+    VK_AARCH64_TPREL_G1,
+    VK_AARCH64_TPREL_G1_NC,
+    VK_AARCH64_TPREL_G0,
+    VK_AARCH64_TPREL_G0_NC,
+    VK_AARCH64_TPREL_HI12,
+    VK_AARCH64_TPREL_LO12,
+    VK_AARCH64_TPREL_LO12_NC,
+
+    VK_AARCH64_TLSDESC, // :tlsdesc:
+    VK_AARCH64_TLSDESC_LO12
+  };
+
+private:
+  const VariantKind Kind;
+  const MCExpr *Expr;
+
+  explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr)
+    : Kind(_Kind), Expr(_Expr) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+                                     MCContext &Ctx);
+
+  static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_AARCH64_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_AARCH64_GOT, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_AARCH64_GOT_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr,
+                                             MCContext &Ctx) {
+    return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr,
+                                                MCContext &Ctx) {
+    return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr,
+                                             MCContext &Ctx) {
+    return Create(VK_AARCH64_GOTTPREL, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr,
+                                                 MCContext &Ctx) {
+    return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr,
+                                            MCContext &Ctx) {
+    return Create(VK_AARCH64_TLSDESC, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr,
+                                                MCContext &Ctx) {
+    return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr,
+                                             MCContext &Ctx) {
+    return Create(VK_AARCH64_TPREL_G1, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr,
+                                                MCContext &Ctx) {
+    return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx);
+  }
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this expression.
+  VariantKind getKind() const { return Kind; }
+
+  /// getSubExpr - Get the child of this expression.
+  const MCExpr *getSubExpr() const { return Expr; }
+
+  /// @}
+
+  void PrintImpl(raw_ostream &OS) const;
+  bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                 const MCAsmLayout *Layout) const;
+  void AddValueSymbols(MCAssembler *) const;
+  const MCSection *FindAssociatedSection() const {
+    return getSubExpr()->FindAssociatedSection();
+  }
+
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+
+  static bool classof(const AArch64MCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
new file mode 100644
index 0000000000..0d2855fc0a
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -0,0 +1,991 @@
+//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCTargetDesc.h"
+#include "AArch64BaseInfo.h"
+#include "AArch64ELFStreamer.h"
+#include "AArch64MCAsmInfo.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_REGINFO_MC_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
+  for (unsigned i = 0; i < NumPairs; ++i) {
+    if (Pairs[i].Value == Value) {
+      Valid = true;
+      return Pairs[i].Name;
+    }
+  }
+
+  Valid = false;
+  return StringRef();
+}
+
+uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+  std::string LowerCaseName = Name.lower();
+  for (unsigned i = 0; i < NumPairs; ++i) {
+    if (Pairs[i].Name == LowerCaseName) {
+      Valid = true;
+      return Pairs[i].Value;
+    }
+  }
+
+  Valid = false;
+  return -1;
+}
+
+bool NamedImmMapper::validImm(uint32_t Value) const {
+  return Value < TooBigImm;
+}
+
+const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = {
+  {"s1e1r", S1E1R},
+  {"s1e2r", S1E2R},
+  {"s1e3r", S1E3R},
+  {"s1e1w", S1E1W},
+  {"s1e2w", S1E2W},
+  {"s1e3w", S1E3W},
+  {"s1e0r", S1E0R},
+  {"s1e0w", S1E0W},
+  {"s12e1r", S12E1R},
+  {"s12e1w", S12E1W},
+  {"s12e0r", S12E0R},
+  {"s12e0w", S12E0W},
+};
+
+A64AT::ATMapper::ATMapper()
+  : NamedImmMapper(ATPairs, 0) {}
+
+const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = {
+  {"oshld", OSHLD},
+  {"oshst", OSHST},
+  {"osh", OSH},
+  {"nshld", NSHLD},
+  {"nshst", NSHST},
+  {"nsh", NSH},
+  {"ishld", ISHLD},
+  {"ishst", ISHST},
+  {"ish", ISH},
+  {"ld", LD},
+  {"st", ST},
+  {"sy", SY}
+};
+
+A64DB::DBarrierMapper::DBarrierMapper()
+  : NamedImmMapper(DBarrierPairs, 16u) {}
+
+const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = {
+  {"zva", ZVA},
+  {"ivac", IVAC},
+  {"isw", ISW},
+  {"cvac", CVAC},
+  {"csw", CSW},
+  {"cvau", CVAU},
+  {"civac", CIVAC},
+  {"cisw", CISW}
+};
+
+A64DC::DCMapper::DCMapper()
+  : NamedImmMapper(DCPairs, 0) {}
+
+const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = {
+  {"ialluis",  IALLUIS},
+  {"iallu", IALLU},
+  {"ivau", IVAU}
+};
+
+A64IC::ICMapper::ICMapper()
+  : NamedImmMapper(ICPairs, 0) {}
+
+const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = {
+  {"sy",  SY},
+};
+
+A64ISB::ISBMapper::ISBMapper()
+  : NamedImmMapper(ISBPairs, 16) {}
+
+const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = {
+  {"pldl1keep", PLDL1KEEP},
+  {"pldl1strm", PLDL1STRM},
+  {"pldl2keep", PLDL2KEEP},
+  {"pldl2strm", PLDL2STRM},
+  {"pldl3keep", PLDL3KEEP},
+  {"pldl3strm", PLDL3STRM},
+  {"pstl1keep", PSTL1KEEP},
+  {"pstl1strm", PSTL1STRM},
+  {"pstl2keep", PSTL2KEEP},
+  {"pstl2strm", PSTL2STRM},
+  {"pstl3keep", PSTL3KEEP},
+  {"pstl3strm", PSTL3STRM}
+};
+
+A64PRFM::PRFMMapper::PRFMMapper()
+  : NamedImmMapper(PRFMPairs, 32) {}
+
+const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = {
+  {"spsel", SPSel},
+  {"daifset", DAIFSet},
+  {"daifclr", DAIFClr}
+};
+
+A64PState::PStateMapper::PStateMapper()
+  : NamedImmMapper(PStatePairs, 0) {}
+
+const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = {
+  {"mdccsr_el0", MDCCSR_EL0},
+  {"dbgdtrrx_el0", DBGDTRRX_EL0},
+  {"mdrar_el1", MDRAR_EL1},
+  {"oslsr_el1", OSLSR_EL1},
+  {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1},
+  {"pmceid0_el0", PMCEID0_EL0},
+  {"pmceid1_el0", PMCEID1_EL0},
+  {"midr_el1", MIDR_EL1},
+  {"ccsidr_el1", CCSIDR_EL1},
+  {"clidr_el1", CLIDR_EL1},
+  {"ctr_el0", CTR_EL0},
+  {"mpidr_el1", MPIDR_EL1},
+  {"revidr_el1", REVIDR_EL1},
+  {"aidr_el1", AIDR_EL1},
+  {"dczid_el0", DCZID_EL0},
+  {"id_pfr0_el1", ID_PFR0_EL1},
+  {"id_pfr1_el1", ID_PFR1_EL1},
+  {"id_dfr0_el1", ID_DFR0_EL1},
+  {"id_afr0_el1", ID_AFR0_EL1},
+  {"id_mmfr0_el1", ID_MMFR0_EL1},
+  {"id_mmfr1_el1", ID_MMFR1_EL1},
+  {"id_mmfr2_el1", ID_MMFR2_EL1},
+  {"id_mmfr3_el1", ID_MMFR3_EL1},
+  {"id_isar0_el1", ID_ISAR0_EL1},
+  {"id_isar1_el1", ID_ISAR1_EL1},
+  {"id_isar2_el1", ID_ISAR2_EL1},
+  {"id_isar3_el1", ID_ISAR3_EL1},
+  {"id_isar4_el1", ID_ISAR4_EL1},
+  {"id_isar5_el1", ID_ISAR5_EL1},
+  {"id_aa64pfr0_el1", ID_AA64PFR0_EL1},
+  {"id_aa64pfr1_el1", ID_AA64PFR1_EL1},
+  {"id_aa64dfr0_el1", ID_AA64DFR0_EL1},
+  {"id_aa64dfr1_el1", ID_AA64DFR1_EL1},
+  {"id_aa64afr0_el1", ID_AA64AFR0_EL1},
+  {"id_aa64afr1_el1", ID_AA64AFR1_EL1},
+  {"id_aa64isar0_el1", ID_AA64ISAR0_EL1},
+  {"id_aa64isar1_el1", ID_AA64ISAR1_EL1},
+  {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1},
+  {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1},
+  {"mvfr0_el1", MVFR0_EL1},
+  {"mvfr1_el1", MVFR1_EL1},
+  {"mvfr2_el1", MVFR2_EL1},
+  {"rvbar_el1", RVBAR_EL1},
+  {"rvbar_el2", RVBAR_EL2},
+  {"rvbar_el3", RVBAR_EL3},
+  {"isr_el1", ISR_EL1},
+  {"cntpct_el0", CNTPCT_EL0},
+  {"cntvct_el0", CNTVCT_EL0}
+};
+
+A64SysReg::MRSMapper::MRSMapper() {
+    InstPairs = &MRSPairs[0];
+    NumInstPairs = llvm::array_lengthof(MRSPairs);
+}
+
+const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = {
+  {"dbgdtrtx_el0", DBGDTRTX_EL0},
+  {"oslar_el1", OSLAR_EL1},
+  {"pmswinc_el0", PMSWINC_EL0}
+};
+
+A64SysReg::MSRMapper::MSRMapper() {
+    InstPairs = &MSRPairs[0];
+    NumInstPairs = llvm::array_lengthof(MSRPairs);
+}
+
+
+const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = {
+  {"osdtrrx_el1", OSDTRRX_EL1},
+  {"osdtrtx_el1",  OSDTRTX_EL1},
+  {"teecr32_el1", TEECR32_EL1},
+  {"mdccint_el1", MDCCINT_EL1},
+  {"mdscr_el1", MDSCR_EL1},
+  {"dbgdtr_el0", DBGDTR_EL0},
+  {"oseccr_el1", OSECCR_EL1},
+  {"dbgvcr32_el2", DBGVCR32_EL2},
+  {"dbgbvr0_el1", DBGBVR0_EL1},
+  {"dbgbvr1_el1", DBGBVR1_EL1},
+  {"dbgbvr2_el1", DBGBVR2_EL1},
+  {"dbgbvr3_el1", DBGBVR3_EL1},
+  {"dbgbvr4_el1", DBGBVR4_EL1},
+  {"dbgbvr5_el1", DBGBVR5_EL1},
+  {"dbgbvr6_el1", DBGBVR6_EL1},
+  {"dbgbvr7_el1", DBGBVR7_EL1},
+  {"dbgbvr8_el1", DBGBVR8_EL1},
+  {"dbgbvr9_el1", DBGBVR9_EL1},
+  {"dbgbvr10_el1", DBGBVR10_EL1},
+  {"dbgbvr11_el1", DBGBVR11_EL1},
+  {"dbgbvr12_el1", DBGBVR12_EL1},
+  {"dbgbvr13_el1", DBGBVR13_EL1},
+  {"dbgbvr14_el1", DBGBVR14_EL1},
+  {"dbgbvr15_el1", DBGBVR15_EL1},
+  {"dbgbcr0_el1", DBGBCR0_EL1},
+  {"dbgbcr1_el1", DBGBCR1_EL1},
+  {"dbgbcr2_el1", DBGBCR2_EL1},
+  {"dbgbcr3_el1", DBGBCR3_EL1},
+  {"dbgbcr4_el1", DBGBCR4_EL1},
+  {"dbgbcr5_el1", DBGBCR5_EL1},
+  {"dbgbcr6_el1", DBGBCR6_EL1},
+  {"dbgbcr7_el1", DBGBCR7_EL1},
+  {"dbgbcr8_el1", DBGBCR8_EL1},
+  {"dbgbcr9_el1", DBGBCR9_EL1},
+  {"dbgbcr10_el1", DBGBCR10_EL1},
+  {"dbgbcr11_el1", DBGBCR11_EL1},
+  {"dbgbcr12_el1", DBGBCR12_EL1},
+  {"dbgbcr13_el1", DBGBCR13_EL1},
+  {"dbgbcr14_el1", DBGBCR14_EL1},
+  {"dbgbcr15_el1", DBGBCR15_EL1},
+  {"dbgwvr0_el1", DBGWVR0_EL1},
+  {"dbgwvr1_el1", DBGWVR1_EL1},
+  {"dbgwvr2_el1", DBGWVR2_EL1},
+  {"dbgwvr3_el1", DBGWVR3_EL1},
+  {"dbgwvr4_el1", DBGWVR4_EL1},
+  {"dbgwvr5_el1", DBGWVR5_EL1},
+  {"dbgwvr6_el1", DBGWVR6_EL1},
+  {"dbgwvr7_el1", DBGWVR7_EL1},
+  {"dbgwvr8_el1", DBGWVR8_EL1},
+  {"dbgwvr9_el1", DBGWVR9_EL1},
+  {"dbgwvr10_el1", DBGWVR10_EL1},
+  {"dbgwvr11_el1", DBGWVR11_EL1},
+  {"dbgwvr12_el1", DBGWVR12_EL1},
+  {"dbgwvr13_el1", DBGWVR13_EL1},
+  {"dbgwvr14_el1", DBGWVR14_EL1},
+  {"dbgwvr15_el1", DBGWVR15_EL1},
+  {"dbgwcr0_el1", DBGWCR0_EL1},
+  {"dbgwcr1_el1", DBGWCR1_EL1},
+  {"dbgwcr2_el1", DBGWCR2_EL1},
+  {"dbgwcr3_el1", DBGWCR3_EL1},
+  {"dbgwcr4_el1", DBGWCR4_EL1},
+  {"dbgwcr5_el1", DBGWCR5_EL1},
+  {"dbgwcr6_el1", DBGWCR6_EL1},
+  {"dbgwcr7_el1", DBGWCR7_EL1},
+  {"dbgwcr8_el1", DBGWCR8_EL1},
+  {"dbgwcr9_el1", DBGWCR9_EL1},
+  {"dbgwcr10_el1", DBGWCR10_EL1},
+  {"dbgwcr11_el1", DBGWCR11_EL1},
+  {"dbgwcr12_el1", DBGWCR12_EL1},
+  {"dbgwcr13_el1", DBGWCR13_EL1},
+  {"dbgwcr14_el1", DBGWCR14_EL1},
+  {"dbgwcr15_el1", DBGWCR15_EL1},
+  {"teehbr32_el1", TEEHBR32_EL1},
+  {"osdlr_el1", OSDLR_EL1},
+  {"dbgprcr_el1", DBGPRCR_EL1},
+  {"dbgclaimset_el1", DBGCLAIMSET_EL1},
+  {"dbgclaimclr_el1", DBGCLAIMCLR_EL1},
+  {"csselr_el1", CSSELR_EL1},
+  {"vpidr_el2", VPIDR_EL2},
+  {"vmpidr_el2", VMPIDR_EL2},
+  {"sctlr_el1", SCTLR_EL1},
+  {"sctlr_el2", SCTLR_EL2},
+  {"sctlr_el3", SCTLR_EL3},
+  {"actlr_el1", ACTLR_EL1},
+  {"actlr_el2", ACTLR_EL2},
+  {"actlr_el3", ACTLR_EL3},
+  {"cpacr_el1", CPACR_EL1},
+  {"hcr_el2", HCR_EL2},
+  {"scr_el3", SCR_EL3},
+  {"mdcr_el2", MDCR_EL2},
+  {"sder32_el3", SDER32_EL3},
+  {"cptr_el2", CPTR_EL2},
+  {"cptr_el3", CPTR_EL3},
+  {"hstr_el2", HSTR_EL2},
+  {"hacr_el2", HACR_EL2},
+  {"mdcr_el3", MDCR_EL3},
+  {"ttbr0_el1", TTBR0_EL1},
+  {"ttbr0_el2", TTBR0_EL2},
+  {"ttbr0_el3", TTBR0_EL3},
+  {"ttbr1_el1", TTBR1_EL1},
+  {"tcr_el1", TCR_EL1},
+  {"tcr_el2", TCR_EL2},
+  {"tcr_el3", TCR_EL3},
+  {"vttbr_el2", VTTBR_EL2},
+  {"vtcr_el2", VTCR_EL2},
+  {"dacr32_el2", DACR32_EL2},
+  {"spsr_el1", SPSR_EL1},
+  {"spsr_el2", SPSR_EL2},
+  {"spsr_el3", SPSR_EL3},
+  {"elr_el1", ELR_EL1},
+  {"elr_el2", ELR_EL2},
+  {"elr_el3", ELR_EL3},
+  {"sp_el0", SP_EL0},
+  {"sp_el1", SP_EL1},
+  {"sp_el2", SP_EL2},
+  {"spsel", SPSel},
+  {"nzcv", NZCV},
+  {"daif", DAIF},
+  {"currentel", CurrentEL},
+  {"spsr_irq", SPSR_irq},
+  {"spsr_abt", SPSR_abt},
+  {"spsr_und", SPSR_und},
+  {"spsr_fiq", SPSR_fiq},
+  {"fpcr", FPCR},
+  {"fpsr", FPSR},
+  {"dspsr_el0", DSPSR_EL0},
+  {"dlr_el0", DLR_EL0},
+  {"ifsr32_el2", IFSR32_EL2},
+  {"afsr0_el1", AFSR0_EL1},
+  {"afsr0_el2", AFSR0_EL2},
+  {"afsr0_el3", AFSR0_EL3},
+  {"afsr1_el1", AFSR1_EL1},
+  {"afsr1_el2", AFSR1_EL2},
+  {"afsr1_el3", AFSR1_EL3},
+  {"esr_el1", ESR_EL1},
+  {"esr_el2", ESR_EL2},
+  {"esr_el3", ESR_EL3},
+  {"fpexc32_el2", FPEXC32_EL2},
+  {"far_el1", FAR_EL1},
+  {"far_el2", FAR_EL2},
+  {"far_el3", FAR_EL3},
+  {"hpfar_el2", HPFAR_EL2},
+  {"par_el1", PAR_EL1},
+  {"pmcr_el0", PMCR_EL0},
+  {"pmcntenset_el0", PMCNTENSET_EL0},
+  {"pmcntenclr_el0", PMCNTENCLR_EL0},
+  {"pmovsclr_el0", PMOVSCLR_EL0},
+  {"pmselr_el0", PMSELR_EL0},
+  {"pmccntr_el0", PMCCNTR_EL0},
+  {"pmxevtyper_el0", PMXEVTYPER_EL0},
+  {"pmxevcntr_el0", PMXEVCNTR_EL0},
+  {"pmuserenr_el0", PMUSERENR_EL0},
+  {"pmintenset_el1", PMINTENSET_EL1},
+  {"pmintenclr_el1", PMINTENCLR_EL1},
+  {"pmovsset_el0", PMOVSSET_EL0},
+  {"mair_el1", MAIR_EL1},
+  {"mair_el2", MAIR_EL2},
+  {"mair_el3", MAIR_EL3},
+  {"amair_el1", AMAIR_EL1},
+  {"amair_el2", AMAIR_EL2},
+  {"amair_el3", AMAIR_EL3},
+  {"vbar_el1", VBAR_EL1},
+  {"vbar_el2", VBAR_EL2},
+  {"vbar_el3", VBAR_EL3},
+  {"rmr_el1", RMR_EL1},
+  {"rmr_el2", RMR_EL2},
+  {"rmr_el3", RMR_EL3},
+  {"contextidr_el1", CONTEXTIDR_EL1},
+  {"tpidr_el0", TPIDR_EL0},
+  {"tpidr_el2", TPIDR_EL2},
+  {"tpidr_el3", TPIDR_EL3},
+  {"tpidrro_el0", TPIDRRO_EL0},
+  {"tpidr_el1", TPIDR_EL1},
+  {"cntfrq_el0", CNTFRQ_EL0},
+  {"cntvoff_el2", CNTVOFF_EL2},
+  {"cntkctl_el1", CNTKCTL_EL1},
+  {"cnthctl_el2", CNTHCTL_EL2},
+  {"cntp_tval_el0", CNTP_TVAL_EL0},
+  {"cnthp_tval_el2", CNTHP_TVAL_EL2},
+  {"cntps_tval_el1", CNTPS_TVAL_EL1},
+  {"cntp_ctl_el0", CNTP_CTL_EL0},
+  {"cnthp_ctl_el2", CNTHP_CTL_EL2},
+  {"cntps_ctl_el1", CNTPS_CTL_EL1},
+  {"cntp_cval_el0", CNTP_CVAL_EL0},
+  {"cnthp_cval_el2", CNTHP_CVAL_EL2},
+  {"cntps_cval_el1", CNTPS_CVAL_EL1},
+  {"cntv_tval_el0", CNTV_TVAL_EL0},
+  {"cntv_ctl_el0", CNTV_CTL_EL0},
+  {"cntv_cval_el0", CNTV_CVAL_EL0},
+  {"pmevcntr0_el0", PMEVCNTR0_EL0},
+  {"pmevcntr1_el0", PMEVCNTR1_EL0},
+  {"pmevcntr2_el0", PMEVCNTR2_EL0},
+  {"pmevcntr3_el0", PMEVCNTR3_EL0},
+  {"pmevcntr4_el0", PMEVCNTR4_EL0},
+  {"pmevcntr5_el0", PMEVCNTR5_EL0},
+  {"pmevcntr6_el0", PMEVCNTR6_EL0},
+  {"pmevcntr7_el0", PMEVCNTR7_EL0},
+  {"pmevcntr8_el0", PMEVCNTR8_EL0},
+  {"pmevcntr9_el0", PMEVCNTR9_EL0},
+  {"pmevcntr10_el0", PMEVCNTR10_EL0},
+  {"pmevcntr11_el0", PMEVCNTR11_EL0},
+  {"pmevcntr12_el0", PMEVCNTR12_EL0},
+  {"pmevcntr13_el0", PMEVCNTR13_EL0},
+  {"pmevcntr14_el0", PMEVCNTR14_EL0},
+  {"pmevcntr15_el0", PMEVCNTR15_EL0},
+  {"pmevcntr16_el0", PMEVCNTR16_EL0},
+  {"pmevcntr17_el0", PMEVCNTR17_EL0},
+  {"pmevcntr18_el0", PMEVCNTR18_EL0},
+  {"pmevcntr19_el0", PMEVCNTR19_EL0},
+  {"pmevcntr20_el0", PMEVCNTR20_EL0},
+  {"pmevcntr21_el0", PMEVCNTR21_EL0},
+  {"pmevcntr22_el0", PMEVCNTR22_EL0},
+  {"pmevcntr23_el0", PMEVCNTR23_EL0},
+  {"pmevcntr24_el0", PMEVCNTR24_EL0},
+  {"pmevcntr25_el0", PMEVCNTR25_EL0},
+  {"pmevcntr26_el0", PMEVCNTR26_EL0},
+  {"pmevcntr27_el0", PMEVCNTR27_EL0},
+  {"pmevcntr28_el0", PMEVCNTR28_EL0},
+  {"pmevcntr29_el0", PMEVCNTR29_EL0},
+  {"pmevcntr30_el0", PMEVCNTR30_EL0},
+  {"pmccfiltr_el0", PMCCFILTR_EL0},
+  {"pmevtyper0_el0", PMEVTYPER0_EL0},
+  {"pmevtyper1_el0", PMEVTYPER1_EL0},
+  {"pmevtyper2_el0", PMEVTYPER2_EL0},
+  {"pmevtyper3_el0", PMEVTYPER3_EL0},
+  {"pmevtyper4_el0", PMEVTYPER4_EL0},
+  {"pmevtyper5_el0", PMEVTYPER5_EL0},
+  {"pmevtyper6_el0", PMEVTYPER6_EL0},
+  {"pmevtyper7_el0", PMEVTYPER7_EL0},
+  {"pmevtyper8_el0", PMEVTYPER8_EL0},
+  {"pmevtyper9_el0", PMEVTYPER9_EL0},
+  {"pmevtyper10_el0", PMEVTYPER10_EL0},
+  {"pmevtyper11_el0", PMEVTYPER11_EL0},
+  {"pmevtyper12_el0", PMEVTYPER12_EL0},
+  {"pmevtyper13_el0", PMEVTYPER13_EL0},
+  {"pmevtyper14_el0", PMEVTYPER14_EL0},
+  {"pmevtyper15_el0", PMEVTYPER15_EL0},
+  {"pmevtyper16_el0", PMEVTYPER16_EL0},
+  {"pmevtyper17_el0", PMEVTYPER17_EL0},
+  {"pmevtyper18_el0", PMEVTYPER18_EL0},
+  {"pmevtyper19_el0", PMEVTYPER19_EL0},
+  {"pmevtyper20_el0", PMEVTYPER20_EL0},
+  {"pmevtyper21_el0", PMEVTYPER21_EL0},
+  {"pmevtyper22_el0", PMEVTYPER22_EL0},
+  {"pmevtyper23_el0", PMEVTYPER23_EL0},
+  {"pmevtyper24_el0", PMEVTYPER24_EL0},
+  {"pmevtyper25_el0", PMEVTYPER25_EL0},
+  {"pmevtyper26_el0", PMEVTYPER26_EL0},
+  {"pmevtyper27_el0", PMEVTYPER27_EL0},
+  {"pmevtyper28_el0", PMEVTYPER28_EL0},
+  {"pmevtyper29_el0", PMEVTYPER29_EL0},
+  {"pmevtyper30_el0", PMEVTYPER30_EL0},
+};
+
+uint32_t
+A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+  // First search the registers shared by all
+  std::string NameLower = Name.lower();
+  for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+    if (SysRegPairs[i].Name == NameLower) {
+      Valid = true;
+      return SysRegPairs[i].Value;
+    }
+  }
+
+  // Now try the instruction-specific registers (either read-only or
+  // write-only).
+  for (unsigned i = 0; i < NumInstPairs; ++i) {
+    if (InstPairs[i].Name == NameLower) {
+      Valid = true;
+      return InstPairs[i].Value;
+    }
+  }
+
+  // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits
+  // are: 11 xxx 1x11 xxxx xxx
+  Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$");
+
+  SmallVector<StringRef, 4> Ops;
+  if (!GenericRegPattern.match(NameLower, &Ops)) {
+    Valid = false;
+    return -1;
+  }
+
+  uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0;
+  uint32_t Bits;
+  Ops[1].getAsInteger(10, Op1);
+  Ops[2].getAsInteger(10, CRn);
+  Ops[3].getAsInteger(10, CRm);
+  Ops[4].getAsInteger(10, Op2);
+  Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2;
+
+  Valid = true;
+  return Bits;
+}
+
+std::string
+A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+  for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+    if (SysRegPairs[i].Value == Bits) {
+      Valid = true;
+      return SysRegPairs[i].Name;
+    }
+  }
+
+  for (unsigned i = 0; i < NumInstPairs; ++i) {
+    if (InstPairs[i].Value == Bits) {
+      Valid = true;
+      return InstPairs[i].Name;
+    }
+  }
+
+  uint32_t Op0 = (Bits >> 14) & 0x3;
+  uint32_t Op1 = (Bits >> 11) & 0x7;
+  uint32_t CRn = (Bits >> 7) & 0xf;
+  uint32_t CRm = (Bits >> 3) & 0xf;
+  uint32_t Op2 = Bits & 0x7;
+
+  // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic
+  // name.
+  if (Op0 != 3 || (CRn != 11 && CRn != 15)) {
+      Valid = false;
+      return "";
+  }
+
+  assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg");
+
+  Valid = true;
+  return "s3_" + utostr(Op1) + "_c" + utostr(CRn)
+               + "_c" + utostr(CRm) + "_" + utostr(Op2);
+}
+
+const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = {
+  {"ipas2e1is", IPAS2E1IS},
+  {"ipas2le1is", IPAS2LE1IS},
+  {"vmalle1is", VMALLE1IS},
+  {"alle2is", ALLE2IS},
+  {"alle3is", ALLE3IS},
+  {"vae1is", VAE1IS},
+  {"vae2is", VAE2IS},
+  {"vae3is", VAE3IS},
+  {"aside1is", ASIDE1IS},
+  {"vaae1is", VAAE1IS},
+  {"alle1is", ALLE1IS},
+  {"vale1is", VALE1IS},
+  {"vale2is", VALE2IS},
+  {"vale3is", VALE3IS},
+  {"vmalls12e1is", VMALLS12E1IS},
+  {"vaale1is", VAALE1IS},
+  {"ipas2e1", IPAS2E1},
+  {"ipas2le1", IPAS2LE1},
+  {"vmalle1", VMALLE1},
+  {"alle2", ALLE2},
+  {"alle3", ALLE3},
+  {"vae1", VAE1},
+  {"vae2", VAE2},
+  {"vae3", VAE3},
+  {"aside1", ASIDE1},
+  {"vaae1", VAAE1},
+  {"alle1", ALLE1},
+  {"vale1", VALE1},
+  {"vale2", VALE2},
+  {"vale3", VALE3},
+  {"vmalls12e1", VMALLS12E1},
+  {"vaale1", VAALE1}
+};
+
+A64TLBI::TLBIMapper::TLBIMapper()
+  : NamedImmMapper(TLBIPairs, 0) {}
+
+bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) {
+  const fltSemantics &Sem = Val.getSemantics();
+  unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1;
+
+  uint32_t ExpMask;
+  switch (FracBits) {
+  case 10: // IEEE half-precision
+    ExpMask = 0x1f;
+    break;
+  case 23: // IEEE single-precision
+    ExpMask = 0xff;
+    break;
+  case 52: // IEEE double-precision
+    ExpMask = 0x7ff;
+    break;
+  case 112: // IEEE quad-precision
+    // No immediates are valid for double precision.
+    return false;
+  default:
+    llvm_unreachable("Only half, single and double precision supported");
+  }
+
+  uint32_t ExpStart = FracBits;
+  uint64_t FracMask = (1ULL << FracBits) - 1;
+
+  uint32_t Sign = Val.isNegative();
+
+  uint64_t Bits= Val.bitcastToAPInt().getLimitedValue();
+  uint64_t Fraction = Bits & FracMask;
+  int32_t Exponent = ((Bits >> ExpStart) & ExpMask);
+  Exponent -= ExpMask >> 1;
+
+  // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19)
+  // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48)
+  // This translates to: only 4 bits of fraction; -3 <= exp <= 4.
+  uint64_t A64FracStart = FracBits - 4;
+  uint64_t A64FracMask = 0xf;
+
+  // Are there too many fraction bits?
+  if (Fraction & ~(A64FracMask << A64FracStart))
+    return false;
+
+  if (Exponent < -3 || Exponent > 4)
+    return false;
+
+  uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask;
+  uint32_t PackedExp = (Exponent + 7) & 0x7;
+
+  Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction;
+  return true;
+}
+
+// Encoding of the immediate for logical (immediate) instructions:
+//
+// | N | imms   | immr   | size | R            | S            |
+// |---+--------+--------+------+--------------+--------------|
+// | 1 | ssssss | rrrrrr |   64 | UInt(rrrrrr) | UInt(ssssss) |
+// | 0 | 0sssss | xrrrrr |   32 | UInt(rrrrr)  | UInt(sssss)  |
+// | 0 | 10ssss | xxrrrr |   16 | UInt(rrrr)   | UInt(ssss)   |
+// | 0 | 110sss | xxxrrr |    8 | UInt(rrr)    | UInt(sss)    |
+// | 0 | 1110ss | xxxxrr |    4 | UInt(rr)     | UInt(ss)     |
+// | 0 | 11110s | xxxxxr |    2 | UInt(r)      | UInt(s)      |
+// | 0 | 11111x | -      |      | UNALLOCATED  |              |
+//
+// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in
+// which the lower S+1 bits are ones and the remaining bits are zero, then
+// rotated right by R bits, which is then replicated across the datapath.
+//
+// + Values of 'N', 'imms' and 'immr' which do not match the above table are
+//   RESERVED.
+// + If all 's' bits in the imms field are set then the instruction is
+//   RESERVED.
+// + The 'x' bits in the 'immr' field are IGNORED.
+
+bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) {
+  int RepeatWidth;
+  int Rotation = 0;
+  int Num1s = 0;
+
+  // Because there are S+1 ones in the replicated mask, an immediate of all
+  // zeros is not allowed. Filtering it here is probably more efficient.
+  if (Imm == 0) return false;
+
+  for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) {
+    uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1;
+    uint64_t ReplicatedMask = Imm & RepeatMask;
+
+    if (ReplicatedMask == 0) continue;
+
+    // First we have to make sure the mask is actually repeated in each slot for
+    // this width-specifier.
+    bool IsReplicatedMask = true;
+    for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) {
+      if (((Imm >> i) & RepeatMask) != ReplicatedMask) {
+        IsReplicatedMask = false;
+        break;
+      }
+    }
+    if (!IsReplicatedMask) continue;
+
+    // Now we have to work out the amount of rotation needed. The first part of
+    // this calculation is actually independent of RepeatWidth, but the complex
+    // case will depend on it.
+    Rotation = CountTrailingZeros_64(Imm);
+    if (Rotation == 0) {
+      // There were no leading zeros, which means it's either in place or there
+      // are 1s at each end (e.g. 0x8003 needs rotating).
+      Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm)
+                                : CountLeadingOnes_32(Imm);
+      Rotation = RepeatWidth - Rotation;
+    }
+
+    uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation)
+      | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+    // Of course, they may not actually be ones, so we have to check that:
+    if (!isMask_64(ReplicatedOnes))
+      continue;
+
+    Num1s = CountTrailingOnes_64(ReplicatedOnes);
+
+    // We know we've got an almost valid encoding (certainly, if this is invalid
+    // no other parameters would work).
+    break;
+  }
+
+  // The encodings which would produce all 1s are RESERVED.
+  if (RepeatWidth == 1 || Num1s == RepeatWidth) return false;
+
+  uint32_t N = RepeatWidth == 64;
+  uint32_t ImmR = RepeatWidth - Rotation;
+  uint32_t ImmS = Num1s - 1;
+
+  switch (RepeatWidth) {
+  default: break; // No action required for other valid rotations.
+  case 16: ImmS |= 0x20; break; // 10ssss
+  case 8: ImmS |= 0x30; break;  // 110sss
+  case 4: ImmS |= 0x38; break;  // 1110ss
+  case 2: ImmS |= 0x3c; break;  // 11110s
+  }
+
+  Bits = ImmS | (ImmR << 6) | (N << 12);
+
+  return true;
+}
+
+
+bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm) {
+  uint32_t N = Bits >> 12;
+  uint32_t ImmR = (Bits >> 6) & 0x3f;
+  uint32_t ImmS = Bits & 0x3f;
+
+  // N=1 encodes a 64-bit replication and is invalid for the 32-bit
+  // instructions.
+  if (RegWidth == 32 && N != 0) return false;
+
+  int Width = 0;
+  if (N == 1)
+    Width = 64;
+  else if ((ImmS & 0x20) == 0)
+    Width = 32;
+  else if ((ImmS & 0x10) == 0)
+    Width = 16;
+  else if ((ImmS & 0x08) == 0)
+    Width = 8;
+  else if ((ImmS & 0x04) == 0)
+    Width = 4;
+  else if ((ImmS & 0x02) == 0)
+    Width = 2;
+  else {
+    // ImmS  is 0b11111x: UNALLOCATED
+    return false;
+  }
+
+  int Num1s = (ImmS & (Width - 1)) + 1;
+
+  // All encodings which would map to -1 (signed) are RESERVED.
+  if (Num1s == Width) return false;
+
+  int Rotation = (ImmR & (Width - 1));
+  uint64_t Mask = (1ULL << Num1s) - 1;
+  uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1;
+  Mask = (Mask >> Rotation)
+    | ((Mask << (Width - Rotation)) & WidthMask);
+
+  Imm = 0;
+  for (unsigned i = 0; i < RegWidth / Width; ++i) {
+    Imm |= Mask;
+    Mask <<= Width;
+  }
+
+  return true;
+}
+
+bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+  // If high bits are set then a 32-bit MOVZ can't possibly work.
+  if (RegWidth == 32 && (Value & ~0xffffffffULL))
+    return false;
+
+  for (int i = 0; i < RegWidth; i += 16) {
+    // If the value is 0 when we mask out all the bits that could be set with
+    // the current LSL value then it's representable.
+    if ((Value & ~(0xffffULL << i)) == 0) {
+      Shift = i / 16;
+      UImm16 = (Value >> i) & 0xffff;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+  // MOVN is defined to set its register to NOT(LSL(imm16, shift)).
+
+  // We have to be a little careful about a 32-bit register: 0xffff_1234 *is*
+  // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not
+  // a valid input for isMOVZImm.
+  if (RegWidth == 32 && (Value & ~0xffffffffULL))
+    return false;
+
+  uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value;
+
+  return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift);
+}
+
+bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value,
+                            int &UImm16, int &Shift) {
+  if (isMOVZImm(RegWidth, Value, UImm16, Shift))
+    return false;
+
+  return isMOVNImm(RegWidth, Value, UImm16, Shift);
+}
+
+MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT,
+                                                          StringRef CPU,
+                                                          StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitAArch64MCSubtargetInfo(X, TT, CPU, "");
+  return X;
+}
+
+
+static MCInstrInfo *createAArch64MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitAArch64MCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitAArch64MCRegisterInfo(X, AArch64::X30);
+  return X;
+}
+
+static MCAsmInfo *createAArch64MCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+
+  MCAsmInfo *MAI = new AArch64ELFMCAsmInfo();
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(AArch64::XSP, 0);
+  MAI->addInitialFrameState(0, Dst, Src);
+
+  return MAI;
+}
+
+static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                                 CodeModel::Model CM,
+                                                 CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+  if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) {
+    // On ELF platforms the default static relocation model has a smart enough
+    // linker to cope with referencing external symbols defined in a shared
+    // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
+    RM = Reloc::Static;
+  }
+
+  if (CM == CodeModel::Default)
+    CM = CodeModel::Small;
+
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+                                    MCContext &Ctx, MCAsmBackend &MAB,
+                                    raw_ostream &OS,
+                                    MCCodeEmitter *Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  Triple TheTriple(TT);
+
+  return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+
+static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+                                                 unsigned SyntaxVariant,
+                                                 const MCAsmInfo &MAI,
+                                                 const MCInstrInfo &MII,
+                                                 const MCRegisterInfo &MRI,
+                                                 const MCSubtargetInfo &STI) {
+  if (SyntaxVariant == 0)
+    return new AArch64InstPrinter(MAI, MII, MRI, STI);
+  return 0;
+}
+
+namespace {
+
+class AArch64MCInstrAnalysis : public MCInstrAnalysis {
+public:
+  AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+  virtual bool isUnconditionalBranch(const MCInst &Inst) const {
+    if (Inst.getOpcode() == AArch64::Bcc
+        && Inst.getOperand(0).getImm() == A64CC::AL)
+      return true;
+    return MCInstrAnalysis::isUnconditionalBranch(Inst);
+  }
+
+  virtual bool isConditionalBranch(const MCInst &Inst) const {
+    if (Inst.getOpcode() == AArch64::Bcc
+        && Inst.getOperand(0).getImm() == A64CC::AL)
+      return false;
+    return MCInstrAnalysis::isConditionalBranch(Inst);
+  }
+
+  uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
+                          uint64_t Size) const {
+    unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0;
+    // FIXME: We only handle PCRel branches for now.
+    if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType
+        != MCOI::OPERAND_PCREL)
+      return -1ULL;
+
+    int64_t Imm = Inst.getOperand(LblOperand).getImm();
+
+    return Addr + Imm;
+  }
+};
+
+}
+
+static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) {
+  return new AArch64MCInstrAnalysis(Info);
+}
+
+
+
+extern "C" void LLVMInitializeAArch64TargetMC() {
+  // Register the MC asm info.
+  RegisterMCAsmInfoFn A(TheAArch64Target, createAArch64MCAsmInfo);
+
+  // Register the MC codegen info.
+  TargetRegistry::RegisterMCCodeGenInfo(TheAArch64Target,
+                                        createAArch64MCCodeGenInfo);
+
+  // Register the MC instruction info.
+  TargetRegistry::RegisterMCInstrInfo(TheAArch64Target,
+                                      createAArch64MCInstrInfo);
+
+  // Register the MC register info.
+  TargetRegistry::RegisterMCRegInfo(TheAArch64Target,
+                                    createAArch64MCRegisterInfo);
+
+  // Register the MC subtarget info.
+  using AArch64_MC::createAArch64MCSubtargetInfo;
+  TargetRegistry::RegisterMCSubtargetInfo(TheAArch64Target,
+                                          createAArch64MCSubtargetInfo);
+
+  // Register the MC instruction analyzer.
+  TargetRegistry::RegisterMCInstrAnalysis(TheAArch64Target,
+                                          createAArch64MCInstrAnalysis);
+
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterMCCodeEmitter(TheAArch64Target,
+                                        createAArch64MCCodeEmitter);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterMCAsmBackend(TheAArch64Target,
+                                       createAArch64AsmBackend);
+
+  // Register the object streamer.
+  TargetRegistry::RegisterMCObjectStreamer(TheAArch64Target,
+                                           createMCStreamer);
+
+  // Register the MCInstPrinter.
+  TargetRegistry::RegisterMCInstPrinter(TheAArch64Target,
+                                        createAArch64MCInstPrinter);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
new file mode 100644
index 0000000000..3849fe3795
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -0,0 +1,65 @@
+//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCTARGETDESC_H
+#define LLVM_AARCH64MCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheAArch64Target;
+
+namespace AArch64_MC {
+  MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                StringRef FS);
+}
+
+MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+                                          const MCRegisterInfo &MRI,
+                                          const MCSubtargetInfo &STI,
+                                          MCContext &Ctx);
+
+MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS,
+                                             uint8_t OSABI);
+
+MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT,
+                                      StringRef CPU);
+
+} // End llvm namespace
+
+// Defines symbolic names for AArch64 registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "AArch64GenRegisterInfo.inc"
+
+// Defines symbolic names for the AArch64 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AArch64GenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000000..44c66a224e
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_llvm_library(LLVMAArch64Desc
+  AArch64AsmBackend.cpp
+  AArch64ELFObjectWriter.cpp
+  AArch64ELFStreamer.cpp
+  AArch64MCAsmInfo.cpp
+  AArch64MCCodeEmitter.cpp
+  AArch64MCExpr.cpp
+  AArch64MCTargetDesc.cpp
+  )
+add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 0000000000..5a2f46704a
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Desc
+parent = AArch64
+required_libraries = AArch64AsmPrinter MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/MCTargetDesc/Makefile b/lib/Target/AArch64/MCTargetDesc/Makefile
new file mode 100644
index 0000000000..5779ac5ac6
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile
new file mode 100644
index 0000000000..b2ca2787a4
--- /dev/null
+++ b/lib/Target/AArch64/Makefile
@@ -0,0 +1,30 @@
+##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAArch64CodeGen
+TARGET = AArch64
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AArch64GenAsmMatcher.inc \
+   AArch64GenAsmWriter.inc \
+   AArch64GenCallingConv.inc \
+   AArch64GenDAGISel.inc \
+   AArch64GenDisassemblerTables.inc \
+   AArch64GenInstrInfo.inc \
+   AArch64GenMCCodeEmitter.inc \
+   AArch64GenMCPseudoLowering.inc \
+   AArch64GenRegisterInfo.inc \
+   AArch64GenSubtargetInfo.inc
+
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
+
+
diff --git a/lib/Target/AArch64/README.txt b/lib/Target/AArch64/README.txt
new file mode 100644
index 0000000000..601990f17d
--- /dev/null
+++ b/lib/Target/AArch64/README.txt
@@ -0,0 +1,2 @@
+This file will contain changes that need to be made before AArch64 can become an
+officially supported target. Currently a placeholder.
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
new file mode 100644
index 0000000000..fa07d492ff
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheAArch64Target;
+
+extern "C" void LLVMInitializeAArch64TargetInfo() {
+  RegisterTarget<Triple::aarch64>
+    X(TheAArch64Target, "aarch64", "AArch64");
+}
diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000000..e236eed00b
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Info
+  AArch64TargetInfo.cpp
+  )
+
+add_dependencies(LLVMAArch64Info AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
new file mode 100644
index 0000000000..5b003f0122
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Info
+parent = AArch64
+required_libraries = MC Support Target
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/TargetInfo/Makefile b/lib/Target/AArch64/TargetInfo/Makefile
new file mode 100644
index 0000000000..9dc9aa4bcc
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index b404e6c6e0..cd4067a529 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -64,6 +64,9 @@ public:
     return getSubExpr()->FindAssociatedSection();
   }
 
+  // There are no TLS ARMMCExprs at the moment.
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Target;
   }
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index f3a9c1c3e7..c06e8bc3cd 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
+subdirectories = AArch64 ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
 
 ; This is a special group whose required libraries are extended (by llvm-build)
 ; with the best execution engine (the native JIT, if available, or the
diff --git a/projects/sample/autoconf/config.sub b/projects/sample/autoconf/config.sub
index 9942491533..9d22c1e52e 100755
--- a/projects/sample/autoconf/config.sub
+++ b/projects/sample/autoconf/config.sub
@@ -251,7 +251,8 @@ case $basic_machine in
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
 	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
-        | be32 | be64 \
+   | be32 | be64 \
+   | aarch64 \
 	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
@@ -359,6 +360,7 @@ case $basic_machine in
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+   | aarch64-* \
 	| avr-* | avr32-* \
 	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac
index 1763ea2696..283bc12bb3 100644
--- a/projects/sample/autoconf/configure.ac
+++ b/projects/sample/autoconf/configure.ac
@@ -304,6 +304,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -474,6 +475,7 @@ else
     PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
     x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
     ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    AArch64)     AC_SUBST(TARGET_HAS_JIT,0) ;;
     Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
     XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
@@ -596,7 +598,7 @@ if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -604,6 +606,7 @@ case "$enableval" in
         sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
         powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
@@ -617,6 +620,7 @@ case "$enableval" in
             Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
             PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
diff --git a/projects/sample/configure b/projects/sample/configure
index 94e931fbdc..a8fc4bff7e 100755
--- a/projects/sample/configure
+++ b/projects/sample/configure
@@ -3844,6 +3844,7 @@ else
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -5101,6 +5102,8 @@ else
  ;;
     ARM)         TARGET_HAS_JIT=1
  ;;
+    AArch64)     TARGET_HAS_JIT=0
+ ;;
     Mips)        TARGET_HAS_JIT=1
  ;;
     XCore)       TARGET_HAS_JIT=0
@@ -5297,7 +5300,7 @@ if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -5305,6 +5308,7 @@ case "$enableval" in
         sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
         powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
@@ -5318,6 +5322,7 @@ case "$enableval" in
             Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
             PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
@@ -10348,7 +10353,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10351 "configure"
+#line 10356 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll
new file mode 100644
index 0000000000..45bf07928f
--- /dev/null
+++ b/test/CodeGen/AArch64/adc.ll
@@ -0,0 +1,54 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
+; CHECK: test_simple:
+
+  %valadd = add i128 %a, %b
+; CHECK: adds [[ADDLO:x[0-9]+]], x0, x2
+; CHECK-NEXT: adcs [[ADDHI:x[0-9]+]], x1, x3
+
+  %valsub = sub i128 %valadd, %c
+; CHECK: subs x0, [[ADDLO]], x4
+; CHECK: sbcs x1, [[ADDHI]], x5
+
+  ret i128 %valsub
+; CHECK: ret
+}
+
+define i128 @test_imm(i128 %a) {
+; CHECK: test_imm:
+
+  %val = add i128 %a, 12
+; CHECK: adds x0, x0, #12
+; CHECK: adcs x1, x1, {{x[0-9]|xzr}}
+
+  ret i128 %val
+; CHECK: ret
+}
+
+define i128 @test_shifted(i128 %a, i128 %b) {
+; CHECK: test_shifted:
+
+  %rhs = shl i128 %b, 45
+
+  %val = add i128 %a, %rhs
+; CHECK: adds x0, x0, x2, lsl #45
+; CHECK: adcs x1, x1, {{x[0-9]}}
+
+  ret i128 %val
+; CHECK: ret
+}
+
+define i128 @test_extended(i128 %a, i16 %b) {
+; CHECK: test_extended:
+
+  %ext = sext i16 %b to i128
+  %rhs = shl i128 %ext, 3
+
+  %val = add i128 %a, %rhs
+; CHECK: adds x0, x0, w2, sxth #3
+; CHECK: adcs x1, x1, {{x[0-9]}}
+
+  ret i128 %val
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/addsub-shifted.ll b/test/CodeGen/AArch64/addsub-shifted.ll
new file mode 100644
index 0000000000..ed8ef0d59a
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub-shifted.ll
@@ -0,0 +1,295 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_lsl_arith:
+
+  %rhs1 = load volatile i32* @var32
+  %shift1 = shl i32 %rhs1, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #18
+
+  %rhs2 = load volatile i32* @var32
+  %shift2 = shl i32 %rhs2, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+
+  %rhs3 = load volatile i32* @var32
+  %shift3 = shl i32 %rhs3, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #5
+
+; Subtraction is not commutative!
+  %rhs4 = load volatile i32* @var32
+  %shift4 = shl i32 %rhs4, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #19
+
+  %lhs4a = load volatile i32* @var32
+  %shift4a = shl i32 %lhs4a, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsl #15
+
+  %rhs5 = load volatile i64* @var64
+  %shift5 = shl i64 %rhs5, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #18
+
+  %rhs6 = load volatile i64* @var64
+  %shift6 = shl i64 %rhs6, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #31
+
+  %rhs7 = load volatile i64* @var64
+  %shift7 = shl i64 %rhs7, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #5
+
+; Subtraction is not commutative!
+  %rhs8 = load volatile i64* @var64
+  %shift8 = shl i64 %rhs8, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #19
+
+  %lhs8a = load volatile i64* @var64
+  %shift8a = shl i64 %lhs8a, 60
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsl #60
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_lsr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_lsr_arith:
+
+  %shift1 = lshr i32 %rhs32, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #18
+
+  %shift2 = lshr i32 %rhs32, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #31
+
+  %shift3 = lshr i32 %rhs32, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #5
+
+; Subtraction is not commutative!
+  %shift4 = lshr i32 %rhs32, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #19
+
+  %shift4a = lshr i32 %lhs32, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsr #15
+
+  %shift5 = lshr i64 %rhs64, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #18
+
+  %shift6 = lshr i64 %rhs64, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #31
+
+  %shift7 = lshr i64 %rhs64, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #5
+
+; Subtraction is not commutative!
+  %shift8 = lshr i64 %rhs64, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #19
+
+  %shift8a = lshr i64 %lhs64, 45
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsr #45
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_asr_arith:
+
+  %shift1 = ashr i32 %rhs32, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #18
+
+  %shift2 = ashr i32 %rhs32, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #31
+
+  %shift3 = ashr i32 %rhs32, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #5
+
+; Subtraction is not commutative!
+  %shift4 = ashr i32 %rhs32, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #19
+
+  %shift4a = ashr i32 %lhs32, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, asr #15
+
+  %shift5 = ashr i64 %rhs64, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #18
+
+  %shift6 = ashr i64 %rhs64, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #31
+
+  %shift7 = ashr i64 %rhs64, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #5
+
+; Subtraction is not commutative!
+  %shift8 = ashr i64 %rhs64, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #19
+
+  %shift8a = ashr i64 %lhs64, 45
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, asr #45
+
+  ret void
+; CHECK: ret
+}
+
+define i32 @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_cmp:
+
+  %shift1 = shl i32 %rhs32, 13
+  %tst1 = icmp uge i32 %lhs32, %shift1
+  br i1 %tst1, label %t2, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsl #13
+
+t2:
+  %shift2 = lshr i32 %rhs32, 20
+  %tst2 = icmp ne i32 %lhs32, %shift2
+  br i1 %tst2, label %t3, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
+
+t3:
+  %shift3 = ashr i32 %rhs32, 9
+  %tst3 = icmp ne i32 %lhs32, %shift3
+  br i1 %tst3, label %t4, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, asr #9
+
+t4:
+  %shift4 = shl i64 %rhs64, 43
+  %tst4 = icmp uge i64 %lhs64, %shift4
+  br i1 %tst4, label %t5, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsl #43
+
+t5:
+  %shift5 = lshr i64 %rhs64, 20
+  %tst5 = icmp ne i64 %lhs64, %shift5
+  br i1 %tst5, label %t6, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
+
+t6:
+  %shift6 = ashr i64 %rhs64, 59
+  %tst6 = icmp ne i64 %lhs64, %shift6
+  br i1 %tst6, label %t7, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, asr #59
+
+t7:
+  ret i32 1
+end:
+
+  ret i32 0
+; CHECK: ret
+}
+
+define i32 @test_cmn(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_cmn:
+
+  %shift1 = shl i32 %rhs32, 13
+  %val1 = sub i32 0, %shift1
+  %tst1 = icmp uge i32 %lhs32, %val1
+  br i1 %tst1, label %t2, label %end
+  ; Important that this isn't lowered to a cmn instruction because if %rhs32 ==
+  ; 0 then the results will differ.
+; CHECK: sub [[RHS:w[0-9]+]], wzr, {{w[0-9]+}}, lsl #13
+; CHECK: cmp {{w[0-9]+}}, [[RHS]]
+
+t2:
+  %shift2 = lshr i32 %rhs32, 20
+  %val2 = sub i32 0, %shift2
+  %tst2 = icmp ne i32 %lhs32, %val2
+  br i1 %tst2, label %t3, label %end
+; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
+
+t3:
+  %shift3 = ashr i32 %rhs32, 9
+  %val3 = sub i32 0, %shift3
+  %tst3 = icmp eq i32 %lhs32, %val3
+  br i1 %tst3, label %t4, label %end
+; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, asr #9
+
+t4:
+  %shift4 = shl i64 %rhs64, 43
+  %val4 = sub i64 0, %shift4
+  %tst4 = icmp slt i64 %lhs64, %val4
+  br i1 %tst4, label %t5, label %end
+  ; Again, it's important that cmn isn't used here in case %rhs64 == 0.
+; CHECK: sub [[RHS:x[0-9]+]], xzr, {{x[0-9]+}}, lsl #43
+; CHECK: cmp {{x[0-9]+}}, [[RHS]]
+
+t5:
+  %shift5 = lshr i64 %rhs64, 20
+  %val5 = sub i64 0, %shift5
+  %tst5 = icmp ne i64 %lhs64, %val5
+  br i1 %tst5, label %t6, label %end
+; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
+
+t6:
+  %shift6 = ashr i64 %rhs64, 59
+  %val6 = sub i64 0, %shift6
+  %tst6 = icmp ne i64 %lhs64, %val6
+  br i1 %tst6, label %t7, label %end
+; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, asr #59
+
+t7:
+  ret i32 1
+end:
+
+  ret i32 0
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll
new file mode 100644
index 0000000000..ccfb1c8f4a
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub.ll
@@ -0,0 +1,127 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+; Note that this should be refactored (for efficiency if nothing else)
+; when the PCS is implemented so we don't have to worry about the
+; loads and stores.
+
+@var_i32 = global i32 42
+@var_i64 = global i64 0
+
+; Add pure 12-bit immediates:
+define void @add_small() {
+; CHECK: add_small:
+
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #4095
+  %val32 = load i32* @var_i32
+  %newval32 = add i32 %val32, 4095
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #52
+  %val64 = load i64* @var_i64
+  %newval64 = add i64 %val64, 52
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Add 12-bit immediates, shifted left by 12 bits
+define void @add_med() {
+; CHECK: add_med:
+
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+  %val32 = load i32* @var_i32
+  %newval32 = add i32 %val32, 14610432 ; =0xdef000
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+  %val64 = load i64* @var_i64
+  %newval64 = add i64 %val64, 16773120 ; =0xfff000
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Subtract 12-bit immediates
+define void @sub_small() {
+; CHECK: sub_small:
+
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #4095
+  %val32 = load i32* @var_i32
+  %newval32 = sub i32 %val32, 4095
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #52
+  %val64 = load i64* @var_i64
+  %newval64 = sub i64 %val64, 52
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Subtract 12-bit immediates, shifted left by 12 bits
+define void @sub_med() {
+; CHECK: sub_med:
+
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+  %val32 = load i32* @var_i32
+  %newval32 = sub i32 %val32, 14610432 ; =0xdef000
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+  %val64 = load i64* @var_i64
+  %newval64 = sub i64 %val64, 16773120 ; =0xfff000
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+define void @testing() {
+; CHECK: testing:
+  %val = load i32* @var_i32
+
+; CHECK: cmp {{w[0-9]+}}, #4095
+; CHECK: b.ne .LBB4_6
+  %cmp_pos_small = icmp ne i32 %val, 4095
+  br i1 %cmp_pos_small, label %ret, label %test2
+
+test2:
+; CHECK: cmp {{w[0-9]+}}, #3567, lsl #12
+; CHECK: b.lo .LBB4_6
+  %newval2 = add i32 %val, 1
+  store i32 %newval2, i32* @var_i32
+  %cmp_pos_big = icmp ult i32 %val, 14610432
+  br i1 %cmp_pos_big, label %ret, label %test3
+
+test3:
+; CHECK: cmp {{w[0-9]+}}, #123
+; CHECK: b.lt .LBB4_6
+  %newval3 = add i32 %val, 2
+  store i32 %newval3, i32* @var_i32
+  %cmp_pos_slt = icmp slt i32 %val, 123
+  br i1 %cmp_pos_slt, label %ret, label %test4
+
+test4:
+; CHECK: cmp {{w[0-9]+}}, #321
+; CHECK: b.gt .LBB4_6
+  %newval4 = add i32 %val, 3
+  store i32 %newval4, i32* @var_i32
+  %cmp_pos_sgt = icmp sgt i32 %val, 321
+  br i1 %cmp_pos_sgt, label %ret, label %test5
+
+test5:
+; CHECK: cmn {{w[0-9]+}}, #444
+; CHECK: b.gt .LBB4_6
+  %newval5 = add i32 %val, 4
+  store i32 %newval5, i32* @var_i32
+  %cmp_neg_uge = icmp sgt i32 %val, -444
+  br i1 %cmp_neg_uge, label %ret, label %test6
+
+test6:
+  %newval6 = add i32 %val, 5
+  store i32 %newval6, i32* @var_i32
+  ret void
+
+ret:
+  ret void
+}
+; TODO: adds/subs
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
new file mode 100644
index 0000000000..e9e3cf2c67
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -0,0 +1,189 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @addsub_i8rhs() {
+; CHECK: addsub_i8rhs:
+    %val8_tmp = load i8* @var8
+    %lhs32 = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i8 operand.
+    %val8 = add i8 %val8_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i8 %val8 to i32
+    %res32_zext = add i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i8 %val8 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i8 %val8 to i32
+    %res32_sext = add i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i8 %val8 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4
+
+
+; CMP variants
+    %tst = icmp slt i32 %lhs32, %rhs32_zext
+    br i1 %tst, label %end, label %test2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+test2:
+    %cmp_sext = sext i8 %val8 to i64
+    %tst2 = icmp eq i64 %lhs64, %cmp_sext
+    br i1 %tst2, label %other, label %end
+; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+other:
+    store volatile i32 %lhs32, i32* @var32
+    ret void
+
+end:
+    ret void
+}
+
+define void @addsub_i16rhs() {
+; CHECK: addsub_i16rhs:
+    %val16_tmp = load i16* @var16
+    %lhs32 = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i16 operand.
+    %val16 = add i16 %val16_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i16 %val16 to i32
+    %res32_zext = add i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i16 %val16 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i16 %val16 to i32
+    %res32_sext = add i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i16 %val16 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4
+
+
+; CMP variants
+    %tst = icmp slt i32 %lhs32, %rhs32_zext
+    br i1 %tst, label %end, label %test2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+test2:
+    %cmp_sext = sext i16 %val16 to i64
+    %tst2 = icmp eq i64 %lhs64, %cmp_sext
+    br i1 %tst2, label %other, label %end
+; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+other:
+    store volatile i32 %lhs32, i32* @var32
+    ret void
+
+end:
+    ret void
+}
+
+; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
+; example), but the remaining instructions are probably not idiomatic
+; in the face of "add/sub (shifted register)" so I don't intend to.
+define void @addsub_i32rhs() {
+; CHECK: addsub_i32rhs:
+    %val32_tmp = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    %val32 = add i32 %val32_tmp, 123
+
+    %rhs64_zext = zext i32 %val32 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
+
+    %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+    %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+    store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
+
+    %rhs64_sext = sext i32 %val32 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+
+    %rhs64_sext_shift = shl i64 %rhs64_sext, 2
+    %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+    store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
+
+    ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/adrp-relocation.ll b/test/CodeGen/AArch64/adrp-relocation.ll
new file mode 100644
index 0000000000..ee600f0092
--- /dev/null
+++ b/test/CodeGen/AArch64/adrp-relocation.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs -filetype=obj < %s | elf-dump | FileCheck %s
+
+define fp128 @testfn() nounwind {
+entry:
+  ret fp128 0xL00000000000000004004500000000000
+}
+
+define fp128 @foo() nounwind {
+entry:
+  %bar = alloca fp128 ()*, align 8
+  store fp128 ()* @testfn, fp128 ()** %bar, align 8
+  %call = call fp128 @testfn()
+  ret fp128 %call
+}
+
+; The above should produce an ADRP/ADD pair to calculate the address of
+; testfn. The important point is that LLVM shouldn't think it can deal with the
+; relocation on the ADRP itself (even though it knows everything about the
+; relative offsets of testfn and foo) because its value depends on where this
+; object file's .text section gets relocated in memory.
+
+; CHECK: .rela.text
+
+; CHECK: # Relocation 0
+; CHECK-NEXT: (('r_offset', 0x0000000000000028)
+; CHECK-NEXT:  ('r_sym', 0x00000009)
+; CHECK-NEXT:  ('r_type', 0x00000113)
+; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+; CHECK-NEXT: ),
+; CHECK-NEXT:  Relocation 1
+; CHECK-NEXT: (('r_offset', 0x000000000000002c)
+; CHECK-NEXT:  ('r_sym', 0x00000009)
+; CHECK-NEXT:  ('r_type', 0x00000115)
+; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+; CHECK-NEXT: ),
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
new file mode 100644
index 0000000000..0e8c14d7d2
--- /dev/null
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -0,0 +1,134 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+declare void @use_addr(i8*)
+
+define void @test_simple_alloca(i64 %n) {
+; CHECK: test_simple_alloca:
+
+  %buf = alloca i8, i64 %n
+  ; Make sure we align the stack change to 16 bytes:
+; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+
+  ; Make sure we change SP. It would be surprising if anything but x0 were used
+  ; for the final sp, but it could be if it was then moved into x0.
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, x0
+
+  call void @use_addr(i8* %buf)
+; CHECK: bl use_addr
+
+  ret void
+  ; Make sure epilogue restores sp from fp
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: add sp, sp, #32
+; CHECK: ret
+}
+
+declare void @use_addr_loc(i8*, i64*)
+
+define i64 @test_alloca_with_local(i64 %n) {
+; CHECK: test_alloca_with_local:
+; CHECK: sub sp, sp, #32
+; CHECK: stp x29, x30, [sp, #16]
+
+  %loc = alloca i64
+  %buf = alloca i8, i64 %n
+  ; Make sure we align the stack change to 16 bytes:
+; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+
+  ; Make sure we change SP. It would be surprising if anything but x0 were used
+  ; for the final sp, but it could be if it was then moved into x0.
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, x0
+
+  ; Obviously suboptimal code here, but it to get &local in x1
+; CHECK: sub [[TMP:x[0-9]+]], x29, [[LOC_FROM_FP:#[0-9]+]]
+; CHECK: add x1, [[TMP]], #0
+
+  call void @use_addr_loc(i8* %buf, i64* %loc)
+; CHECK: bl use_addr
+
+  %val = load i64* %loc
+; CHECK: sub x[[TMP:[0-9]+]], x29, [[LOC_FROM_FP]]
+; CHECK: ldr x0, [x[[TMP]]]
+
+  ret i64 %val
+  ; Make sure epilogue restores sp from fp
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: add sp, sp, #32
+; CHECK: ret
+}
+
+define void @test_variadic_alloca(i64 %n, ...) {
+; CHECK: test_variadic_alloca:
+
+; CHECK: sub     sp, sp, #208
+; CHECK: stp     x29, x30, [sp, #192]
+; CHECK: add     x29, sp, #192
+; CHECK: sub     x9, x29, #192
+; CHECK: add     x8, x9, #0
+; CHECK: str     q7, [x8, #112]
+; [...]
+; CHECK: str     q1, [x8, #16]
+
+  %addr = alloca i8, i64 %n
+
+  call void @use_addr(i8* %addr)
+; CHECK: bl use_addr
+
+  ret void
+; CHECK: sub sp, x29, #192
+; CHECK: ldp x29, x30, [sp, #192]
+; CHECK: add sp, sp, #208
+}
+
+define void @test_alloca_large_frame(i64 %n) {
+; CHECK: test_alloca_large_frame:
+
+; CHECK: sub sp, sp, #496
+; CHECK: stp x29, x30, [sp, #480]
+; CHECK: add x29, sp, #480
+; CHECK: sub sp, sp, #48
+; CHECK: sub sp, sp, #1953, lsl #12
+
+  %addr1 = alloca i8, i64 %n
+  %addr2 = alloca i64, i64 1000000
+
+  call void @use_addr_loc(i8* %addr1, i64* %addr2)
+
+  ret void
+; CHECK: sub sp, x29, #480
+; CHECK: ldp x29, x30, [sp, #480]
+; CHECK: add sp, sp, #496
+}
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+define void @test_scoped_alloca(i64 %n) {
+; CHECK: test_scoped_alloca
+; CHECK: sub sp, sp, #32
+
+  %sp = call i8* @llvm.stacksave()
+; CHECK: mov [[SAVED_SP:x[0-9]+]], sp
+
+  %addr = alloca i8, i64 %n
+; CHECK: and [[SPDELTA:x[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0
+; CHECK: mov [[OLDSP:x[0-9]+]], sp
+; CHECK: sub [[NEWSP:x[0-9]+]], [[OLDSP]], [[SPDELTA]]
+; CHECK: mov sp, [[NEWSP]]
+
+  call void @use_addr(i8* %addr)
+; CHECK: bl use_addr
+
+  call void @llvm.stackrestore(i8* %sp)
+; CHECK: mov sp, [[SAVED_SP]]
+
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll
new file mode 100644
index 0000000000..1f6e96f1d5
--- /dev/null
+++ b/test/CodeGen/AArch64/analyze-branch.ll
@@ -0,0 +1,231 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+; This test checks that LLVM can do basic stripping and reapplying of branches
+; to basic blocks.
+
+declare void @test_true()
+declare void @test_false()
+
+; !0 corresponds to a branch being taken, !1 to not being takne.
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+
+define void @test_Bcc_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_Bcc_fallthrough_taken:
+  %tst = icmp eq i32 %in, 42
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cmp {{w[0-9]+}}, #42
+
+; CHECK: b.ne [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_Bcc_fallthrough_nottaken(i32 %in) nounwind {
+; CHECK: test_Bcc_fallthrough_nottaken:
+  %tst = icmp eq i32 %in, 42
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cmp {{w[0-9]+}}, #42
+
+; CHECK: b.eq [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_CBZ_fallthrough_taken:
+  %tst = icmp eq i32 %in, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cbnz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_CBZ_fallthrough_nottaken:
+  %tst = icmp eq i64 %in, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cbz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBNZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_CBNZ_fallthrough_taken:
+  %tst = icmp ne i32 %in, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cbz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBNZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_CBNZ_fallthrough_nottaken:
+  %tst = icmp ne i64 %in, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cbnz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_TBZ_fallthrough_taken:
+  %bit = and i32 %in, 32768
+  %tst = icmp eq i32 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: tbnz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_TBZ_fallthrough_nottaken:
+  %bit = and i64 %in, 32768
+  %tst = icmp eq i64 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: tbz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+
+define void @test_TBNZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_TBNZ_fallthrough_taken:
+  %bit = and i32 %in, 32768
+  %tst = icmp ne i32 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: tbz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBNZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_TBNZ_fallthrough_nottaken:
+  %bit = and i64 %in, 32768
+  %tst = icmp ne i64 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: tbnz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
new file mode 100644
index 0000000000..f383d76b74
--- /dev/null
+++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+define i32 @foo(i32* %var, i1 %cond) {
+; CHECK: foo:
+  br i1 %cond, label %atomic_ver, label %simple_ver
+simple_ver:
+  %oldval = load i32* %var
+  %newval = add nsw i32 %oldval, -1
+  store i32 %newval, i32* %var
+  br label %somewhere
+atomic_ver:
+  %val = atomicrmw add i32* %var, i32 -1 seq_cst
+  br label %somewhere
+; CHECK: dmb
+; CHECK: ldxr
+; CHECK: dmb
+  ; The key point here is that the second dmb isn't immediately followed by the
+  ; simple_ver basic block, which LLVM attempted to do when DMB had been marked
+  ; with isBarrier. For now, look for something that looks like "somewhere".
+; CHECK-NEXT: mov
+somewhere:
+  %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver]
+  ret i32 %combined
+}
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
new file mode 100644
index 0000000000..8a1e97626d
--- /dev/null
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -0,0 +1,1099 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_add_i8:
+   %old = atomicrmw add i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_add_i16:
+   %old = atomicrmw add i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_add_i32:
+   %old = atomicrmw add i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_add_i64:
+   %old = atomicrmw add i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i8:
+   %old = atomicrmw sub i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i16:
+   %old = atomicrmw sub i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i32:
+   %old = atomicrmw sub i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i64:
+   %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_and_i8:
+   %old = atomicrmw and i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_and_i16:
+   %old = atomicrmw and i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_and_i32:
+   %old = atomicrmw and i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_and_i64:
+   %old = atomicrmw and i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_or_i8:
+   %old = atomicrmw or i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_or_i16:
+   %old = atomicrmw or i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_or_i32:
+   %old = atomicrmw or i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_or_i64:
+   %old = atomicrmw or i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i8:
+   %old = atomicrmw xor i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i16:
+   %old = atomicrmw xor i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i32:
+   %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i64:
+   %old = atomicrmw xor i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i8:
+   %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i16:
+   %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i32:
+   %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i64:
+   %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+
+define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_min_i8:
+   %old = atomicrmw min i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_min_i16:
+   %old = atomicrmw min i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_min_i32:
+   %old = atomicrmw min i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_min_i64:
+   %old = atomicrmw min i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_max_i8:
+   %old = atomicrmw max i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_max_i16:
+   %old = atomicrmw max i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_max_i32:
+   %old = atomicrmw max i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_max_i64:
+   %old = atomicrmw max i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i8:
+   %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i16:
+   %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i32:
+   %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i64:
+   %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i8:
+   %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i16:
+   %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i32:
+   %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i64:
+   %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i8:
+   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i16:
+   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i32:
+   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i64:
+   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_monotonic_i8() nounwind {
+; CHECK: test_atomic_load_monotonic_i8:
+  %val = load atomic i8* @var8 monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
+; CHECK-NOT: dmb
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
+; CHECK: test_atomic_load_monotonic_regoff_i8:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  %val = load atomic i8* %addr monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK: ldrb w0, [x0, x1]
+; CHECK-NOT: dmb
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_acquire_i8() nounwind {
+; CHECK: test_atomic_load_acquire_i8:
+  %val = load atomic i8* @var8 acquire, align 1
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: ldarb w0, [x[[ADDR]]]
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_seq_cst_i8() nounwind {
+; CHECK: test_atomic_load_seq_cst_i8:
+  %val = load atomic i8* @var8 seq_cst, align 1
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
+; CHECK: dmb ish
+  ret i8 %val
+}
+
+define i16 @test_atomic_load_monotonic_i16() nounwind {
+; CHECK: test_atomic_load_monotonic_i16:
+  %val = load atomic i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
+; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK: ldrh w0, [x[[HIADDR]], #:lo12:var16]
+; CHECK-NOT: dmb
+
+  ret i16 %val
+}
+
+define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind {
+; CHECK: test_atomic_load_monotonic_regoff_i32:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  %val = load atomic i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
+; CHECK: ldr w0, [x0, x1]
+; CHECK-NOT: dmb
+
+  ret i32 %val
+}
+
+define i64 @test_atomic_load_seq_cst_i64() nounwind {
+; CHECK: test_atomic_load_seq_cst_i64:
+  %val = load atomic i64* @var64 seq_cst, align 8
+; CHECK: adrp x[[HIADDR:[0-9]+]], var64
+; CHECK: ldr x0, [x[[HIADDR]], #:lo12:var64]
+; CHECK: dmb ish
+  ret i64 %val
+}
+
+define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_i8:
+  store atomic i8 %val, i8* @var8 monotonic, align 1
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: strb w0, [x[[HIADDR]], #:lo12:var8]
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_regoff_i8:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  store atomic i8 %val, i8* %addr monotonic, align 1
+; CHECK: strb w2, [x0, x1]
+
+  ret void
+}
+define void @test_atomic_store_release_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_release_i8:
+  store atomic i8 %val, i8* @var8 release, align 1
+; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK: stlrb w0, [x[[ADDR]]]
+
+  ret void
+}
+
+define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_seq_cst_i8:
+  store atomic i8 %val, i8* @var8 seq_cst, align 1
+; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK: stlrb w0, [x[[ADDR]]]
+; CHECK: dmb ish
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_i16:
+  store atomic i16 %val, i16* @var16 monotonic, align 2
+; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK: strh w0, [x[[HIADDR]], #:lo12:var16]
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_regoff_i32:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  store atomic i32 %val, i32* %addr monotonic, align 4
+; CHECK: str w2, [x0, x1]
+
+  ret void
+}
+
+define void @test_atomic_store_release_i64(i64 %val) nounwind {
+; CHECK: test_atomic_store_release_i64:
+  store atomic i64 %val, i64* @var64 release, align 8
+; CHECK: adrp [[HIADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
+; CHECK: stlr x0, [x[[ADDR]]]
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll
new file mode 100644
index 0000000000..da94041c95
--- /dev/null
+++ b/test/CodeGen/AArch64/basic-pic.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -filetype=obj %s -o -| llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
+
+@var = global i32 0
+
+; CHECK-ELF: RELOCATION RECORDS FOR [.text]
+
+define i32 @get_globalvar() {
+; CHECK: get_globalvar:
+
+  %val = load i32* @var
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
+; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], #:got_lo12:var]
+; CHECK: ldr w0, [x[[GOTLOC]]]
+
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
+  ret i32 %val
+}
+
+define i32* @get_globalvaraddr() {
+; CHECK: get_globalvaraddr:
+
+  %val = load i32* @var
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
+; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:var]
+
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
+  ret i32* @var
+}
+
+@hiddenvar = hidden global i32 0
+
+define i32 @get_hiddenvar() {
+; CHECK: get_hiddenvar:
+
+  %val = load i32* @hiddenvar
+; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
+; CHECK: ldr w0, [x[[HI]], #:lo12:hiddenvar]
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+; CHECK-ELF: R_AARCH64_LDST32_ABS_LO12_NC hiddenvar
+  ret i32 %val
+}
+
+define i32* @get_hiddenvaraddr() {
+; CHECK: get_hiddenvaraddr:
+
+  %val = load i32* @hiddenvar
+; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
+; CHECK: add x0, [[HI]], #:lo12:hiddenvar
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC hiddenvar
+  ret i32* @hiddenvar
+}
+
+define void()* @get_func() {
+; CHECK: get_func:
+
+  ret void()* bitcast(void()*()* @get_func to void()*)
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:get_func
+; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:get_func]
+
+  ; Particularly important that the ADRP gets a relocation, LLVM tends to think
+  ; it can relax it because it knows where get_func is. It can't!
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE get_func
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC get_func
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll
new file mode 100644
index 0000000000..1343ec7449
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield-insert-0.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=aarch64 -filetype=obj < %s | llvm-objdump -disassemble - | FileCheck %s
+
+; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one
+; point, in the edge case where lsb = 0. Just make sure.
+
+define void @test_bfi0(i32* %existing, i32* %new) {
+; CHECK: bfxil {{w[0-9]+}}, {{w[0-9]+}}, #0, #18
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 4294705152 ; 0xfffc_0000
+
+  %newval = load volatile i32* %new
+  %newval_masked = and i32 %newval, 262143 ; = 0x0003_ffff
+
+  %combined = or i32 %newval_masked, %oldval_keep
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll
new file mode 100644
index 0000000000..d874a12240
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -0,0 +1,193 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+; First, a simple example from Clang. The registers could plausibly be
+; different, but probably won't be.
+
+%struct.foo = type { i8, [2 x i8], i8 }
+
+define [1 x i64] @from_clang([1 x i64] %f.coerce, i32 %n) nounwind readnone {
+; CHECK: from_clang:
+; CHECK: bfi w0, w1, #3, #4
+; CHECK-NEXT: ret
+
+entry:
+  %f.coerce.fca.0.extract = extractvalue [1 x i64] %f.coerce, 0
+  %tmp.sroa.0.0.extract.trunc = trunc i64 %f.coerce.fca.0.extract to i32
+  %bf.value = shl i32 %n, 3
+  %0 = and i32 %bf.value, 120
+  %f.sroa.0.0.insert.ext.masked = and i32 %tmp.sroa.0.0.extract.trunc, 135
+  %1 = or i32 %f.sroa.0.0.insert.ext.masked, %0
+  %f.sroa.0.0.extract.trunc = zext i32 %1 to i64
+  %tmp1.sroa.1.1.insert.insert = and i64 %f.coerce.fca.0.extract, 4294967040
+  %tmp1.sroa.0.0.insert.insert = or i64 %f.sroa.0.0.extract.trunc, %tmp1.sroa.1.1.insert.insert
+  %.fca.0.insert = insertvalue [1 x i64] undef, i64 %tmp1.sroa.0.0.insert.insert, 0
+  ret [1 x i64] %.fca.0.insert
+}
+
+define void @test_whole32(i32* %existing, i32* %new) {
+; CHECK: test_whole32:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #5
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 26
+  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+define void @test_whole64(i64* %existing, i64* %new) {
+; CHECK: test_whole64:
+; CHECK: bfi {{x[0-9]+}}, {{x[0-9]+}}, #26, #14
+; CHECK-NOT: and
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 18446742974265032703 ; = 0xffffff0003ffffffL
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 26
+  %newval_masked = and i64 %newval_shifted, 1099444518912 ; = 0xfffc000000
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+define void @test_whole32_from64(i64* %existing, i64* %new) {
+; CHECK: test_whole32_from64:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #{{0|16}}, #16
+; CHECK-NOT: and
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 4294901760 ; = 0xffff0000
+
+  %newval = load volatile i64* %new
+  %newval_masked = and i64 %newval, 65535 ; = 0xffff
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+define void @test_32bit_masked(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_masked:
+; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
+; CHECK: and {{w[0-9]+}}, [[INSERT]], #0xff
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 120 ; = 0x78
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+define void @test_64bit_masked(i64 *%existing, i64 *%new) {
+; CHECK: test_64bit_masked:
+; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
+; CHECK: and {{x[0-9]+}}, [[INSERT]], #0xffff00000000
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 40
+  %newval_masked = and i64 %newval_shifted, 280375465082880 ; = 0xff00_0000_0000
+
+  %combined = or i64 %newval_masked, %oldval_keep
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+; Mask is too complicated for literal ANDwwi, make sure other avenues are tried.
+define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_complexmask:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 647 ; = 0x287
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 120 ; = 0x278
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+; Neither mask is is a contiguous set of 1s. BFI can't be used
+define void @test_32bit_badmask(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_badmask:
+; CHECK-NOT: bfi
+; CHECK: ret
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 632 ; = 0x278
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+; Ditto
+define void @test_64bit_badmask(i64 *%existing, i64 *%new) {
+; CHECK: test_64bit_badmask:
+; CHECK-NOT: bfi
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 3
+  %newval_masked = and i64 %newval_shifted, 664 ; = 0x278
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+; Bitfield insert where there's a left-over shr needed at the beginning
+; (e.g. result of str.bf1 = str.bf2)
+define void @test_32bit_with_shr(i32* %existing, i32* %new) {
+; CHECK: test_32bit_with_shr:
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
+
+  %newval = load i32* %new
+  %newval_shifted = shl i32 %newval, 12
+  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+; CHECK: lsr [[BIT:w[0-9]+]], {{w[0-9]+}}, #14
+; CHECK: bfi {{w[0-9]}}, [[BIT]], #26, #5
+
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll
new file mode 100644
index 0000000000..06a296ef27
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -0,0 +1,218 @@
+
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_extendb(i8 %var) {
+; CHECK: test_extendb:
+
+  %sxt32 = sext i8 %var to i32
+  store volatile i32 %sxt32, i32* @var32
+; CHECK: sxtb {{w[0-9]+}}, {{w[0-9]+}}
+
+  %sxt64 = sext i8 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i8 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
+
+  %uxt64 = zext i8 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: uxtb {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+define void @test_extendh(i16 %var) {
+; CHECK: test_extendh:
+
+  %sxt32 = sext i16 %var to i32
+  store volatile i32 %sxt32, i32* @var32
+; CHECK: sxth {{w[0-9]+}}, {{w[0-9]+}}
+
+  %sxt64 = sext i16 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i16 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+
+  %uxt64 = zext i16 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: uxth {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+define void @test_extendw(i32 %var) {
+; CHECK: test_extendw:
+
+  %sxt64 = sext i32 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}}
+
+  %uxt64 = zext i32 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #0, #32
+  ret void
+}
+
+define void @test_shifts(i32 %val32, i64 %val64) {
+; CHECK: test_shifts:
+
+  %shift1 = ashr i32 %val32, 31
+  store volatile i32 %shift1, i32* @var32
+; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  %shift2 = lshr i32 %val32, 8
+  store volatile i32 %shift2, i32* @var32
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #8
+
+  %shift3 = shl i32 %val32, 1
+  store volatile i32 %shift3, i32* @var32
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #1
+
+  %shift4 = ashr i64 %val64, 31
+  store volatile i64 %shift4, i64* @var64
+; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #31
+
+  %shift5 = lshr i64 %val64, 8
+  store volatile i64 %shift5, i64* @var64
+; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #8
+
+  %shift6 = shl i64 %val64, 63
+  store volatile i64 %shift6, i64* @var64
+; CHECK: lsl {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift7 = ashr i64 %val64, 63
+  store volatile i64 %shift7, i64* @var64
+; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift8 = lshr i64 %val64, 63
+  store volatile i64 %shift8, i64* @var64
+; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift9 = lshr i32 %val32, 31
+  store volatile i32 %shift9, i32* @var32
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  %shift10 = shl i32 %val32, 31
+  store volatile i32 %shift10, i32* @var32
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  ret void
+}
+
+; LLVM can produce in-register extensions taking place entirely with
+; 64-bit registers too.
+define void @test_sext_inreg_64(i64 %in) {
+; CHECK: test_sext_inreg_64:
+
+; i1 doesn't have an official alias, but crops up and is handled by
+; the bitfield ops.
+  %trunc_i1 = trunc i64 %in to i1
+  %sext_i1 = sext i1 %trunc_i1 to i64
+  store volatile i64 %sext_i1, i64* @var64
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  %trunc_i8 = trunc i64 %in to i8
+  %sext_i8 = sext i8 %trunc_i8 to i64
+  store volatile i64 %sext_i8, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+
+  %trunc_i16 = trunc i64 %in to i16
+  %sext_i16 = sext i16 %trunc_i16 to i64
+  store volatile i64 %sext_i16, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+
+  %trunc_i32 = trunc i64 %in to i32
+  %sext_i32 = sext i32 %trunc_i32 to i64
+  store volatile i64 %sext_i32, i64* @var64
+; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+; These instructions don't actually select to official bitfield
+; operations, but it's important that we select them somehow:
+define void @test_zext_inreg_64(i64 %in) {
+; CHECK: test_zext_inreg_64:
+
+  %trunc_i8 = trunc i64 %in to i8
+  %zext_i8 = zext i8 %trunc_i8 to i64
+  store volatile i64 %zext_i8, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
+
+  %trunc_i16 = trunc i64 %in to i16
+  %zext_i16 = zext i16 %trunc_i16 to i64
+  store volatile i64 %zext_i16, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
+
+  %trunc_i32 = trunc i64 %in to i32
+  %zext_i32 = zext i32 %trunc_i32 to i64
+  store volatile i64 %zext_i32, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffffffff
+
+  ret void
+}
+
+define i64 @test_sext_inreg_from_32(i32 %in) {
+; CHECK: test_sext_inreg_from_32:
+
+  %small = trunc i32 %in to i1
+  %ext = sext i1 %small to i64
+
+  ; Different registers are of course, possible, though suboptimal. This is
+  ; making sure that a 64-bit "(sext_inreg (anyext GPR32), i1)" uses the 64-bit
+  ; sbfx rather than just 32-bits.
+; CHECK: sbfx x0, x0, #0, #1
+  ret i64 %ext
+}
+
+
+define i32 @test_ubfx32(i32* %addr) {
+; CHECK: test_ubfx32:
+; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #23, #3
+
+   %fields = load i32* %addr
+   %shifted = lshr i32 %fields, 23
+   %masked = and i32 %shifted, 7
+   ret i32 %masked
+}
+
+define i64 @test_ubfx64(i64* %addr) {
+; CHECK: test_ubfx64:
+; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #25, #10
+
+   %fields = load i64* %addr
+   %shifted = lshr i64 %fields, 25
+   %masked = and i64 %shifted, 1023
+   ret i64 %masked
+}
+
+define i32 @test_sbfx32(i32* %addr) {
+; CHECK: test_sbfx32:
+; CHECK: sbfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #3
+
+   %fields = load i32* %addr
+   %shifted = shl i32 %fields, 23
+   %extended = ashr i32 %shifted, 29
+   ret i32 %extended
+}
+
+define i64 @test_sbfx64(i64* %addr) {
+; CHECK: test_sbfx64:
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #63
+
+   %fields = load i64* %addr
+   %shifted = shl i64 %fields, 1
+   %extended = ashr i64 %shifted, 1
+   ret i64 %extended
+}
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
new file mode 100644
index 0000000000..a7de51d458
--- /dev/null
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+@addr = global i8* null
+
+define void @test_blockaddress() {
+; CHECK: test_blockaddress:
+  store volatile i8* blockaddress(@test_blockaddress, %block), i8** @addr
+  %val = load volatile i8** @addr
+  indirectbr i8* %val, [label %block]
+; CHECK: adrp [[DEST_HI:x[0-9]+]], [[DEST_LBL:.Ltmp[0-9]+]]
+; CHECK: add [[DEST:x[0-9]+]], [[DEST_HI]], #:lo12:[[DEST_LBL]]
+; CHECK: str [[DEST]],
+; CHECK: ldr [[NEWDEST:x[0-9]+]]
+; CHECK: br [[NEWDEST]]
+
+block:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/bool-loads.ll b/test/CodeGen/AArch64/bool-loads.ll
new file mode 100644
index 0000000000..43d030f80d
--- /dev/null
+++ b/test/CodeGen/AArch64/bool-loads.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+@var = global i1 0
+
+define i32 @test_sextloadi32() {
+; CHECK: test_sextloadi32
+
+  %val = load i1* @var
+  %ret = sext i1 %val to i32
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  ret i32 %ret
+; CHECK: ret
+}
+
+define i64 @test_sextloadi64() {
+; CHECK: test_sextloadi64
+
+  %val = load i1* @var
+  %ret = sext i1 %val to i64
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  ret i64 %ret
+; CHECK: ret
+}
+
+define i32 @test_zextloadi32() {
+; CHECK: test_zextloadi32
+
+; It's not actually necessary that "ret" is next, but as far as LLVM
+; is concerned only 0 or 1 should be loadable so no extension is
+; necessary.
+  %val = load i1* @var
+  %ret = zext i1 %val to i32
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+
+  ret i32 %ret
+; CHECK-NEXT: ret
+}
+
+define i64 @test_zextloadi64() {
+; CHECK: test_zextloadi64
+
+; It's not actually necessary that "ret" is next, but as far as LLVM
+; is concerned only 0 or 1 should be loadable so no extension is
+; necessary.
+  %val = load i1* @var
+  %ret = zext i1 %val to i64
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+
+  ret i64 %ret
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll
new file mode 100644
index 0000000000..fc490610e0
--- /dev/null
+++ b/test/CodeGen/AArch64/breg.ll
@@ -0,0 +1,17 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@stored_label = global i8* null
+
+define void @foo() {
+; CHECK: foo:
+  %lab = load i8** @stored_label
+  indirectbr i8* %lab, [label  %otherlab, label %retlab]
+; CHECK: adrp {{x[0-9]+}}, stored_label
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:stored_label]
+; CHECK: br {{x[0-9]+}}
+
+otherlab:
+  ret void
+retlab:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll
new file mode 100644
index 0000000000..953dbc433f
--- /dev/null
+++ b/test/CodeGen/AArch64/callee-save.ll
@@ -0,0 +1,86 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var = global float 0.0
+
+define void @foo() {
+; CHECK: foo:
+
+; CHECK stp d14, d15, [sp
+; CHECK stp d12, d13, [sp
+; CHECK stp d10, d11, [sp
+; CHECK stp d8, d9, [sp
+
+  ; Create lots of live variables to exhaust the supply of
+  ; caller-saved registers
+  %val1 = load volatile float* @var
+  %val2 = load volatile float* @var
+  %val3 = load volatile float* @var
+  %val4 = load volatile float* @var
+  %val5 = load volatile float* @var
+  %val6 = load volatile float* @var
+  %val7 = load volatile float* @var
+  %val8 = load volatile float* @var
+  %val9 = load volatile float* @var
+  %val10 = load volatile float* @var
+  %val11 = load volatile float* @var
+  %val12 = load volatile float* @var
+  %val13 = load volatile float* @var
+  %val14 = load volatile float* @var
+  %val15 = load volatile float* @var
+  %val16 = load volatile float* @var
+  %val17 = load volatile float* @var
+  %val18 = load volatile float* @var
+  %val19 = load volatile float* @var
+  %val20 = load volatile float* @var
+  %val21 = load volatile float* @var
+  %val22 = load volatile float* @var
+  %val23 = load volatile float* @var
+  %val24 = load volatile float* @var
+  %val25 = load volatile float* @var
+  %val26 = load volatile float* @var
+  %val27 = load volatile float* @var
+  %val28 = load volatile float* @var
+  %val29 = load volatile float* @var
+  %val30 = load volatile float* @var
+  %val31 = load volatile float* @var
+  %val32 = load volatile float* @var
+
+  store volatile float %val1, float* @var
+  store volatile float %val2, float* @var
+  store volatile float %val3, float* @var
+  store volatile float %val4, float* @var
+  store volatile float %val5, float* @var
+  store volatile float %val6, float* @var
+  store volatile float %val7, float* @var
+  store volatile float %val8, float* @var
+  store volatile float %val9, float* @var
+  store volatile float %val10, float* @var
+  store volatile float %val11, float* @var
+  store volatile float %val12, float* @var
+  store volatile float %val13, float* @var
+  store volatile float %val14, float* @var
+  store volatile float %val15, float* @var
+  store volatile float %val16, float* @var
+  store volatile float %val17, float* @var
+  store volatile float %val18, float* @var
+  store volatile float %val19, float* @var
+  store volatile float %val20, float* @var
+  store volatile float %val21, float* @var
+  store volatile float %val22, float* @var
+  store volatile float %val23, float* @var
+  store volatile float %val24, float* @var
+  store volatile float %val25, float* @var
+  store volatile float %val26, float* @var
+  store volatile float %val27, float* @var
+  store volatile float %val28, float* @var
+  store volatile float %val29, float* @var
+  store volatile float %val30, float* @var
+  store volatile float %val31, float* @var
+  store volatile float %val32, float* @var
+
+; CHECK: ldp     d8, d9, [sp
+; CHECK: ldp     d10, d11, [sp
+; CHECK: ldp     d12, d13, [sp
+; CHECK: ldp     d14, d15, [sp
+  ret void
+}
diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll
new file mode 100644
index 0000000000..52a0d5d92a
--- /dev/null
+++ b/test/CodeGen/AArch64/compare-branch.ll
@@ -0,0 +1,38 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @foo() {
+; CHECK: foo:
+
+  %val1 = load volatile i32* @var32
+  %tst1 = icmp eq i32 %val1, 0
+  br i1 %tst1, label %end, label %test2
+; CHECK: cbz {{w[0-9]+}}, .LBB
+
+test2:
+  %val2 = load volatile i32* @var32
+  %tst2 = icmp ne i32 %val2, 0
+  br i1 %tst2, label %end, label %test3
+; CHECK: cbnz {{w[0-9]+}}, .LBB
+
+test3:
+  %val3 = load volatile i64* @var64
+  %tst3 = icmp eq i64 %val3, 0
+  br i1 %tst3, label %end, label %test4
+; CHECK: cbz {{x[0-9]+}}, .LBB
+
+test4:
+  %val4 = load volatile i64* @var64
+  %tst4 = icmp ne i64 %val4, 0
+  br i1 %tst4, label %end, label %test5
+; CHECK: cbnz {{x[0-9]+}}, .LBB
+
+test5:
+  store volatile i64 %val4, i64* @var64
+  ret void
+
+end:
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll
new file mode 100644
index 0000000000..9ca7997a14
--- /dev/null
+++ b/test/CodeGen/AArch64/cond-sel.ll
@@ -0,0 +1,213 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csel:
+
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, i32 42, i32 52
+  store i32 %val1, i32* @var32
+; CHECK: movz [[W52:w[0-9]+]], #52
+; CHECK: movz [[W42:w[0-9]+]], #42
+; CHECK: csel {{w[0-9]+}}, [[W42]], [[W52]], hi
+
+  %rhs64 = sext i32 %rhs32 to i64
+  %tst2 = icmp sle i64 %lhs64, %rhs64
+  %val2 = select i1 %tst2, i64 %lhs64, i64 %rhs64
+  store i64 %val2, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], [[RHS:w[0-9]+]], sxtw
+; CHECK: sxtw [[EXT_RHS:x[0-9]+]], [[RHS]]
+; CHECK: csel {{x[0-9]+}}, [[LHS]], [[EXT_RHS]], le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %rhs64) {
+; CHECK: test_floatcsel:
+
+  %tst1 = fcmp one float %lhs32, %rhs32
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+  %val1 = select i1 %tst1, i32 42, i32 52
+  store i32 %val1, i32* @var32
+; CHECK: movz [[W52:w[0-9]+]], #52
+; CHECK: movz [[W42:w[0-9]+]], #42
+; CHECK: csel [[MAYBETRUE:w[0-9]+]], [[W42]], [[W52]], mi
+; CHECK: csel {{w[0-9]+}}, [[W42]], [[MAYBETRUE]], gt
+
+
+  %tst2 = fcmp ueq double %lhs64, %rhs64
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+  %val2 = select i1 %tst2, i64 9, i64 15
+  store i64 %val2, i64* @var64
+; CHECK: movz [[CONST15:x[0-9]+]], #15
+; CHECK: movz [[CONST9:x[0-9]+]], #9
+; CHECK: csel [[MAYBETRUE:x[0-9]+]], [[CONST9]], [[CONST15]], eq
+; CHECK: csel {{x[0-9]+}}, [[CONST9]], [[MAYBETRUE]], vs
+
+  ret void
+; CHECK: ret
+}
+
+
+define void @test_csinc(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csinc:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = add i32 %rhs32, 1
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csinc {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = add i32 %rhs32, 1
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = add i64 %rhs3, 1
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = add i64 %rhs4, 1
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csinv(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csinv:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = xor i32 -1, %rhs32
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csinv {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = xor i32 -1, %rhs32
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = xor i64 -1, %rhs3
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = xor i64 -1, %rhs4
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csneg(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csneg:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = sub i32 0, %rhs32
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csneg {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = sub i32 0, %rhs32
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = sub i64 0, %rhs3
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = sub i64 0, %rhs4
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_cset(i32 %lhs, i32 %rhs, i64 %lhs64) {
+; CHECK: test_cset:
+
+; N.b. code is not optimal here (32-bit csinc would be better) but
+; incoming DAG is too complex
+  %tst1 = icmp eq i32 %lhs, %rhs
+  %val1 = zext i1 %tst1 to i32
+  store i32 %val1, i32* @var32
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: csinc {{w[0-9]+}}, wzr, wzr, ne
+
+  %rhs64 = sext i32 %rhs to i64
+  %tst2 = icmp ule i64 %lhs64, %rhs64
+  %val2 = zext i1 %tst2 to i64
+  store i64 %val2, i64* @var64
+; CHECK: csinc {{w[0-9]+}}, wzr, wzr, hi
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csetm(i32 %lhs, i32 %rhs, i64 %lhs64) {
+; CHECK: test_csetm:
+
+  %tst1 = icmp eq i32 %lhs, %rhs
+  %val1 = sext i1 %tst1 to i32
+  store i32 %val1, i32* @var32
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: csinv {{w[0-9]+}}, wzr, wzr, ne
+
+  %rhs64 = sext i32 %rhs to i64
+  %tst2 = icmp ule i64 %lhs64, %rhs64
+  %val2 = sext i1 %tst2 to i64
+  store i64 %val2, i64* @var64
+; CHECK: csinv {{x[0-9]+}}, xzr, xzr, hi
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
new file mode 100644
index 0000000000..3741011e38
--- /dev/null
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -0,0 +1,84 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
+; CHECK: test_select_i32:
+  %val = select i1 %bit, i32 %a, i32 %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: csel w0, w1, w2, ne
+
+  ret i32 %val
+}
+
+define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) {
+; CHECK: test_select_i64:
+  %val = select i1 %bit, i64 %a, i64 %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: csel x0, x1, x2, ne
+
+  ret i64 %val
+}
+
+define float @test_select_float(i1 %bit, float %a, float %b) {
+; CHECK: test_select_float:
+  %val = select i1 %bit, float %a, float %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+
+  ret float %val
+}
+
+define double @test_select_double(i1 %bit, double %a, double %b) {
+; CHECK: test_select_double:
+  %val = select i1 %bit, double %a, double %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: fcsel d0, d0, d1, ne
+
+  ret double %val
+}
+
+define i32 @test_brcond(i1 %bit) {
+; CHECK: test_brcond:
+  br i1 %bit, label %true, label %false
+; CHECK: tbz {{w[0-9]+}}, #0, .LBB
+
+true:
+  ret i32 0
+false:
+  ret i32 42
+}
+
+define i1 @test_setcc_float(float %lhs, float %rhs) {
+; CHECK: test_setcc_float
+  %val = fcmp oeq float %lhs, %rhs
+; CHECK: fcmp s0, s1
+; CHECK: csinc w0, wzr, wzr, ne
+  ret i1 %val
+}
+
+define i1 @test_setcc_double(double %lhs, double %rhs) {
+; CHECK: test_setcc_double
+  %val = fcmp oeq double %lhs, %rhs
+; CHECK: fcmp d0, d1
+; CHECK: csinc w0, wzr, wzr, ne
+  ret i1 %val
+}
+
+define i1 @test_setcc_i32(i32 %lhs, i32 %rhs) {
+; CHECK: test_setcc_i32
+  %val = icmp ugt i32 %lhs, %rhs
+; CHECK: cmp w0, w1
+; CHECK: csinc w0, wzr, wzr, ls
+  ret i1 %val
+}
+
+define i1 @test_setcc_i64(i64 %lhs, i64 %rhs) {
+; CHECK: test_setcc_i64
+  %val = icmp ne i64 %lhs, %rhs
+; CHECK: cmp x0, x1
+; CHECK: csinc w0, wzr, wzr, eq
+  ret i1 %val
+}
diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll
new file mode 100644
index 0000000000..1553cc08c5
--- /dev/null
+++ b/test/CodeGen/AArch64/dp-3source.ll
@@ -0,0 +1,163 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) {
+; CHECK: test_madd32:
+  %mid = mul i32 %val1, %val2
+  %res = add i32 %val0, %mid
+; CHECK: madd {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_madd64(i64 %val0, i64 %val1, i64 %val2) {
+; CHECK: test_madd64:
+  %mid = mul i64 %val1, %val2
+  %res = add i64 %val0, %mid
+; CHECK: madd {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i32 @test_msub32(i32 %val0, i32 %val1, i32 %val2) {
+; CHECK: test_msub32:
+  %mid = mul i32 %val1, %val2
+  %res = sub i32 %val0, %mid
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_msub64(i64 %val0, i64 %val1, i64 %val2) {
+; CHECK: test_msub64:
+  %mid = mul i64 %val1, %val2
+  %res = sub i64 %val0, %mid
+; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smaddl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_smaddl:
+  %ext1 = sext i32 %val1 to i64
+  %ext2 = sext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = add i64 %acc, %prod
+; CHECK: smaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smsubl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_smsubl:
+  %ext1 = sext i32 %val1 to i64
+  %ext2 = sext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 %acc, %prod
+; CHECK: smsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umaddl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_umaddl:
+  %ext1 = zext i32 %val1 to i64
+  %ext2 = zext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = add i64 %acc, %prod
+; CHECK: umaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umsubl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_umsubl:
+  %ext1 = zext i32 %val1 to i64
+  %ext2 = zext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 %acc, %prod
+; CHECK: umsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smulh(i64 %lhs, i64 %rhs) {
+; CHECK: test_smulh:
+  %ext1 = sext i64 %lhs to i128
+  %ext2 = sext i64 %rhs to i128
+  %res = mul i128 %ext1, %ext2
+  %high = lshr i128 %res, 64
+  %val = trunc i128 %high to i64
+; CHECK: smulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %val
+}
+
+define i64 @test_umulh(i64 %lhs, i64 %rhs) {
+; CHECK: test_umulh:
+  %ext1 = zext i64 %lhs to i128
+  %ext2 = zext i64 %rhs to i128
+  %res = mul i128 %ext1, %ext2
+  %high = lshr i128 %res, 64
+  %val = trunc i128 %high to i64
+; CHECK: umulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %val
+}
+
+define i32 @test_mul32(i32 %lhs, i32 %rhs) {
+; CHECK: test_mul32:
+  %res = mul i32 %lhs, %rhs
+; CHECK: mul {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_mul64(i64 %lhs, i64 %rhs) {
+; CHECK: test_mul64:
+  %res = mul i64 %lhs, %rhs
+; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i32 @test_mneg32(i32 %lhs, i32 %rhs) {
+; CHECK: test_mneg32:
+  %prod = mul i32 %lhs, %rhs
+  %res = sub i32 0, %prod
+; CHECK: mneg {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_mneg64(i64 %lhs, i64 %rhs) {
+; CHECK: test_mneg64:
+  %prod = mul i64 %lhs, %rhs
+  %res = sub i64 0, %prod
+; CHECK: mneg {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smull(i32 %lhs, i32 %rhs) {
+; CHECK: test_smull:
+  %ext1 = sext i32 %lhs to i64
+  %ext2 = sext i32 %rhs to i64
+  %res = mul i64 %ext1, %ext2
+; CHECK: smull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umull(i32 %lhs, i32 %rhs) {
+; CHECK: test_umull:
+  %ext1 = zext i32 %lhs to i64
+  %ext2 = zext i32 %rhs to i64
+  %res = mul i64 %ext1, %ext2
+; CHECK: umull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smnegl(i32 %lhs, i32 %rhs) {
+; CHECK: test_smnegl:
+  %ext1 = sext i32 %lhs to i64
+  %ext2 = sext i32 %rhs to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 0, %prod
+; CHECK: smnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umnegl(i32 %lhs, i32 %rhs) {
+; CHECK: test_umnegl:
+  %ext1 = zext i32 %lhs to i64
+  %ext2 = zext i32 %rhs to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 0, %prod
+; CHECK: umnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll
new file mode 100644
index 0000000000..cfa82af7d4
--- /dev/null
+++ b/test/CodeGen/AArch64/dp1.ll
@@ -0,0 +1,152 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @rev_i32() {
+; CHECK: rev_i32:
+    %val0_tmp = load i32* @var32
+    %val1_tmp = call i32 @llvm.bswap.i32(i32 %val0_tmp)
+; CHECK: rev	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val1_tmp, i32* @var32
+    ret void
+}
+
+define void @rev_i64() {
+; CHECK: rev_i64:
+    %val0_tmp = load i64* @var64
+    %val1_tmp = call i64 @llvm.bswap.i64(i64 %val0_tmp)
+; CHECK: rev	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val1_tmp, i64* @var64
+    ret void
+}
+
+define void @rev32_i64() {
+; CHECK: rev32_i64:
+    %val0_tmp = load i64* @var64
+    %val1_tmp = shl i64 %val0_tmp, 32
+    %val5_tmp = sub i64 64, 32
+    %val2_tmp = lshr i64 %val0_tmp, %val5_tmp
+    %val3_tmp = or i64 %val1_tmp, %val2_tmp
+    %val4_tmp = call i64 @llvm.bswap.i64(i64 %val3_tmp)
+; CHECK: rev32	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @rev16_i32() {
+; CHECK: rev16_i32:
+    %val0_tmp = load i32* @var32
+    %val1_tmp = shl i32 %val0_tmp, 16
+    %val2_tmp = lshr i32 %val0_tmp, 16
+    %val3_tmp = or i32 %val1_tmp, %val2_tmp
+    %val4_tmp = call i32 @llvm.bswap.i32(i32 %val3_tmp)
+; CHECK: rev16	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zerodef_i32() {
+; CHECK: clz_zerodef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 0)
+; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zerodef_i64() {
+; CHECK: clz_zerodef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 0)
+; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @clz_zeroundef_i32() {
+; CHECK: clz_zeroundef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 1)
+; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zeroundef_i64() {
+; CHECK: clz_zeroundef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 1)
+; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @cttz_zerodef_i32() {
+; CHECK: cttz_zerodef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 0)
+; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
+; CHECK: clz	{{w[0-9]+}}, [[REVERSED]]
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @cttz_zerodef_i64() {
+; CHECK: cttz_zerodef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 0)
+; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
+; CHECK: clz	{{x[0-9]+}}, [[REVERSED]]
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @cttz_zeroundef_i32() {
+; CHECK: cttz_zeroundef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 1)
+; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
+; CHECK: clz	{{w[0-9]+}}, [[REVERSED]]
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @cttz_zeroundef_i64() {
+; CHECK: cttz_zeroundef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 1)
+; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
+; CHECK: clz	{{x[0-9]+}}, [[REVERSED]]
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+; These two are just compilation tests really: the operation's set to Expand in
+; ISelLowering.
+define void @ctpop_i32() {
+; CHECK: ctpop_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctpop.i32(i32 %val0_tmp)
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @ctpop_i64() {
+; CHECK: ctpop_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctpop.i64(i64 %val0_tmp)
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare i32  @llvm.ctlz.i32 (i32, i1)
+declare i64  @llvm.ctlz.i64 (i64, i1)
+declare i32  @llvm.cttz.i32 (i32, i1)
+declare i64  @llvm.cttz.i64 (i64, i1)
+declare i32  @llvm.ctpop.i32 (i32)
+declare i64  @llvm.ctpop.i64 (i64)
+
diff --git a/test/CodeGen/AArch64/dp2.ll b/test/CodeGen/AArch64/dp2.ll
new file mode 100644
index 0000000000..97f89be8d2
--- /dev/null
+++ b/test/CodeGen/AArch64/dp2.ll
@@ -0,0 +1,169 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var32_0 = global i32 0
+@var32_1 = global i32 0
+@var64_0 = global i64 0
+@var64_1 = global i64 0
+
+define void @rorv_i64() {
+; CHECK: rorv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val2_tmp = sub i64 64, %val1_tmp
+    %val3_tmp = shl i64 %val0_tmp, %val2_tmp
+    %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
+    %val5_tmp = or i64 %val3_tmp, %val4_tmp
+; CHECK: ror	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val5_tmp, i64* @var64_0
+    ret void
+}
+
+define void @asrv_i64() {
+; CHECK: asrv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = ashr i64 %val0_tmp, %val1_tmp
+; CHECK: asr	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+define void @lsrv_i64() {
+; CHECK: lsrv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
+; CHECK: lsr	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_0
+    ret void
+}
+
+define void @lslv_i64() {
+; CHECK: lslv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = shl i64 %val0_tmp, %val1_tmp
+; CHECK: lsl	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+define void @udiv_i64() {
+; CHECK: udiv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = udiv i64 %val0_tmp, %val1_tmp
+; CHECK: udiv	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_0
+    ret void
+}
+
+define void @sdiv_i64() {
+; CHECK: sdiv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = sdiv i64 %val0_tmp, %val1_tmp
+; CHECK: sdiv	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+
+define void @lsrv_i32() {
+; CHECK: lsrv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = lshr i32 %val0_tmp, %val2_tmp
+; CHECK: lsr	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_0
+    ret void
+}
+
+define void @lslv_i32() {
+; CHECK: lslv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = shl i32 %val0_tmp, %val2_tmp
+; CHECK: lsl	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @rorv_i32() {
+; CHECK: rorv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val6_tmp = load i32* @var32_1
+    %val1_tmp = add i32 1, %val6_tmp
+    %val2_tmp = sub i32 32, %val1_tmp
+    %val3_tmp = shl i32 %val0_tmp, %val2_tmp
+    %val4_tmp = lshr i32 %val0_tmp, %val1_tmp
+    %val5_tmp = or i32 %val3_tmp, %val4_tmp
+; CHECK: ror	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val5_tmp, i32* @var32_0
+    ret void
+}
+
+define void @asrv_i32() {
+; CHECK: asrv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = ashr i32 %val0_tmp, %val2_tmp
+; CHECK: asr	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @sdiv_i32() {
+; CHECK: sdiv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val4_tmp = sdiv i32 %val0_tmp, %val1_tmp
+; CHECK: sdiv	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @udiv_i32() {
+; CHECK: udiv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val4_tmp = udiv i32 %val0_tmp, %val1_tmp
+; CHECK: udiv	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_0
+    ret void
+}
+
+; The point of this test is that we may not actually see (shl GPR32:$Val, (zext GPR32:$Val2))
+; in the DAG (the RHS may be natively 64-bit), but we should still use the lsl instructions.
+define i32 @test_lsl32() {
+; CHECK: test_lsl32:
+
+  %val = load i32* @var32_0
+  %ret = shl i32 1, %val
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
+
+define i32 @test_lsr32() {
+; CHECK: test_lsr32:
+
+  %val = load i32* @var32_0
+  %ret = lshr i32 1, %val
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
+
+define i32 @test_asr32(i32 %in) {
+; CHECK: test_asr32:
+
+  %val = load i32* @var32_0
+  %ret = ashr i32 %in, %val
+; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
diff --git a/test/CodeGen/AArch64/elf-extern.ll b/test/CodeGen/AArch64/elf-extern.ll
new file mode 100644
index 0000000000..544b9e7a76
--- /dev/null
+++ b/test/CodeGen/AArch64/elf-extern.ll
@@ -0,0 +1,21 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 -filetype=obj | elf-dump | FileCheck %s
+
+; External symbols are a different concept to global variables but should still
+; get relocations and so on when used.
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define i32 @check_extern() {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
+  ret i32 0
+}
+
+; CHECK: .rela.text
+; CHECK: ('r_sym', 0x00000009)
+; CHECK-NEXT: ('r_type', 0x0000011b)
+
+; CHECK: .symtab
+; CHECK: Symbol 9
+; CHECK-NEXT: memcpy
+
+
diff --git a/test/CodeGen/AArch64/extract.ll b/test/CodeGen/AArch64/extract.ll
new file mode 100644
index 0000000000..0efd4472e0
--- /dev/null
+++ b/test/CodeGen/AArch64/extract.ll
@@ -0,0 +1,57 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+define i64 @ror_i64(i64 %in) {
+; CHECK: ror_i64:
+    %left = shl i64 %in, 19
+    %right = lshr i64 %in, 45
+    %val5 = or i64 %left, %right
+; CHECK: extr {{x[0-9]+}}, x0, x0, #45
+    ret i64 %val5
+}
+
+define i32 @ror_i32(i32 %in) {
+; CHECK: ror_i32:
+    %left = shl i32 %in, 9
+    %right = lshr i32 %in, 23
+    %val5 = or i32 %left, %right
+; CHECK: extr {{w[0-9]+}}, w0, w0, #23
+    ret i32 %val5
+}
+
+define i32 @extr_i32(i32 %lhs, i32 %rhs) {
+; CHECK: extr_i32:
+  %left = shl i32 %lhs, 6
+  %right = lshr i32 %rhs, 26
+  %val = or i32 %left, %right
+  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
+  ; something other than w0 and w1.
+; CHECK: extr {{w[0-9]+}}, w0, w1, #26
+
+  ret i32 %val
+}
+
+define i64 @extr_i64(i64 %lhs, i64 %rhs) {
+; CHECK: extr_i64:
+  %right = lshr i64 %rhs, 40
+  %left = shl i64 %lhs, 24
+  %val = or i64 %right, %left
+  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
+  ; something other than w0 and w1.
+; CHECK: extr {{x[0-9]+}}, x0, x1, #40
+
+  ret i64 %val
+}
+
+; Regression test: a bad experimental pattern crept into git which optimised
+; this pattern to a single EXTR.
+define i32 @extr_regress(i32 %a, i32 %b) {
+; CHECK: extr_regress:
+
+    %sh1 = shl i32 %a, 14
+    %sh2 = lshr i32 %b, 14
+    %val = or i32 %sh2, %sh1
+; CHECK-NOT: extr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, #{{[0-9]+}}
+
+    ret i32 %val
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll
new file mode 100644
index 0000000000..b0bc1c908d
--- /dev/null
+++ b/test/CodeGen/AArch64/fastcc-reserved.ll
@@ -0,0 +1,58 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 -tailcallopt | FileCheck %s
+
+; This test is designed to be run in the situation where the
+; call-frame is not reserved (hence disable-fp-elim), but where
+; callee-pop can occur (hence tailcallopt).
+
+declare fastcc void @will_pop([8 x i32], i32 %val)
+
+define fastcc void @foo(i32 %in) {
+; CHECK: foo:
+
+  %addr = alloca i8, i32 %in
+
+; Normal frame setup stuff:
+; CHECK: sub sp, sp,
+; CHECK stp x29, x30
+
+; Reserve space for call-frame:
+; CHECK: sub sp, sp, #16
+
+  call fastcc void @will_pop([8 x i32] undef, i32 42)
+; CHECK: bl will_pop
+
+; Since @will_pop is fastcc with tailcallopt, it will put the stack
+; back where it needs to be, we shouldn't duplicate that
+; CHECK-NOT: sub sp, sp, #16
+; CHECK-NOT: add sp, sp,
+
+; CHECK: ldp x29, x30
+; CHECK: add sp, sp,
+  ret void
+}
+
+declare void @wont_pop([8 x i32], i32 %val)
+
+define void @foo1(i32 %in) {
+; CHECK: foo1:
+
+  %addr = alloca i8, i32 %in
+; Normal frame setup again
+; CHECK sub sp, sp,
+; CHECK stp x29, x30
+
+; Reserve space for call-frame
+; CHECK sub sp, sp, #16
+
+  call void @wont_pop([8 x i32] undef, i32 42)
+; CHECK bl wont_pop
+
+; This time we *do* need to unreserve the call-frame
+; CHECK add sp, sp, #16
+
+; Check for epilogue (primarily to make sure sp spotted above wasn't
+; part of it).
+; CHECK: ldp x29, x30
+; CHECK: add sp, sp,
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll
new file mode 100644
index 0000000000..8f9b9feb58
--- /dev/null
+++ b/test/CodeGen/AArch64/fastcc.ll
@@ -0,0 +1,123 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+; Without tailcallopt fastcc still means the caller cleans up the
+; stack, so try to make sure this is respected.
+
+define fastcc void @func_stack0() {
+; CHECK: func_stack0:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack0:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #48
+; CHECK-TAIL-NEXT: ret
+
+}
+
+define fastcc void @func_stack8([8 x i32], i32 %stacked) {
+; CHECK: func_stack8:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack8:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #64
+; CHECK-TAIL-NEXT: ret
+}
+
+define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
+; CHECK: func_stack32:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack32:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #80
+; CHECK-TAIL-NEXT: ret
+}
diff --git a/test/CodeGen/AArch64/fcmp.ll b/test/CodeGen/AArch64/fcmp.ll
new file mode 100644
index 0000000000..d254e8c051
--- /dev/null
+++ b/test/CodeGen/AArch64/fcmp.ll
@@ -0,0 +1,81 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+declare void @bar(i32)
+
+define void @test_float(float %a, float %b) {
+; CHECK: test_float:
+
+  %tst1 = fcmp oeq float %a, %b
+  br i1 %tst1, label %end, label %t2
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK: b.eq .L
+
+t2:
+  %tst2 = fcmp une float %b, 0.0
+  br i1 %tst2, label %t3, label %end
+; CHECK: fcmp {{s[0-9]+}}, #0.0
+; CHECK: b.eq .L
+
+
+t3:
+; This test can't be implemented with just one A64 conditional
+; branch. LLVM converts "ordered and not equal" to "unordered or
+; equal" before instruction selection, which is what we currently
+; test. Obviously, other sequences are valid.
+  %tst3 = fcmp one float %a,  %b
+  br i1 %tst3, label %t4, label %end
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.vs .[[T4]]
+t4:
+  %tst4 = fcmp uge float %a, -0.0
+  br i1 %tst4, label %t5, label %end
+; CHECK-NOT: fcmp {{s[0-9]+}}, #0.0
+; CHECK: b.mi .LBB
+
+t5:
+  call void @bar(i32 0)
+  ret void
+end:
+  ret void
+
+}
+
+define void @test_double(double %a, double %b) {
+; CHECK: test_double:
+
+  %tst1 = fcmp oeq double %a, %b
+  br i1 %tst1, label %end, label %t2
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: b.eq .L
+
+t2:
+  %tst2 = fcmp une double %b, 0.0
+  br i1 %tst2, label %t3, label %end
+; CHECK: fcmp {{d[0-9]+}}, #0.0
+; CHECK: b.eq .L
+
+
+t3:
+; This test can't be implemented with just one A64 conditional
+; branch. LLVM converts "ordered and not equal" to "unordered or
+; equal" before instruction selection, which is what we currently
+; test. Obviously, other sequences are valid.
+  %tst3 = fcmp one double %a,  %b
+  br i1 %tst3, label %t4, label %end
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.vs .[[T4]]
+t4:
+  %tst4 = fcmp uge double %a, -0.0
+  br i1 %tst4, label %t5, label %end
+; CHECK-NOT: fcmp {{d[0-9]+}}, #0.0
+; CHECK: b.mi .LBB
+
+t5:
+  call void @bar(i32 0)
+  ret void
+end:
+  ret void
+
+}
diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll
new file mode 100644
index 0000000000..c76a84ac77
--- /dev/null
+++ b/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -0,0 +1,191 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 -O0 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_fcvtzs(float %flt, double %dbl) {
+; CHECK: test_fcvtzs:
+
+  %fix1 = fmul float %flt, 128.0
+  %cvt1 = fptosi float %fix1 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i32 %cvt1, i32* @var32
+
+  %fix2 = fmul float %flt, 4294967296.0
+  %cvt2 = fptosi float %fix2 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #32
+  store volatile i32 %cvt2, i32* @var32
+
+  %fix3 = fmul float %flt, 128.0
+  %cvt3 = fptosi float %fix3 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i64 %cvt3, i64* @var64
+
+  %fix4 = fmul float %flt, 18446744073709551616.0
+  %cvt4 = fptosi float %fix4 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #64
+  store volatile i64 %cvt4, i64* @var64
+
+  %fix5 = fmul double %dbl, 128.0
+  %cvt5 = fptosi double %fix5 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i32 %cvt5, i32* @var32
+
+  %fix6 = fmul double %dbl, 4294967296.0
+  %cvt6 = fptosi double %fix6 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #32
+  store volatile i32 %cvt6, i32* @var32
+
+  %fix7 = fmul double %dbl, 128.0
+  %cvt7 = fptosi double %fix7 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i64 %cvt7, i64* @var64
+
+  %fix8 = fmul double %dbl, 18446744073709551616.0
+  %cvt8 = fptosi double %fix8 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #64
+  store volatile i64 %cvt8, i64* @var64
+
+  ret void
+}
+
+define void @test_fcvtzu(float %flt, double %dbl) {
+; CHECK: test_fcvtzu:
+
+  %fix1 = fmul float %flt, 128.0
+  %cvt1 = fptoui float %fix1 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i32 %cvt1, i32* @var32
+
+  %fix2 = fmul float %flt, 4294967296.0
+  %cvt2 = fptoui float %fix2 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #32
+  store volatile i32 %cvt2, i32* @var32
+
+  %fix3 = fmul float %flt, 128.0
+  %cvt3 = fptoui float %fix3 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i64 %cvt3, i64* @var64
+
+  %fix4 = fmul float %flt, 18446744073709551616.0
+  %cvt4 = fptoui float %fix4 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #64
+  store volatile i64 %cvt4, i64* @var64
+
+  %fix5 = fmul double %dbl, 128.0
+  %cvt5 = fptoui double %fix5 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i32 %cvt5, i32* @var32
+
+  %fix6 = fmul double %dbl, 4294967296.0
+  %cvt6 = fptoui double %fix6 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #32
+  store volatile i32 %cvt6, i32* @var32
+
+  %fix7 = fmul double %dbl, 128.0
+  %cvt7 = fptoui double %fix7 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i64 %cvt7, i64* @var64
+
+  %fix8 = fmul double %dbl, 18446744073709551616.0
+  %cvt8 = fptoui double %fix8 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #64
+  store volatile i64 %cvt8, i64* @var64
+
+  ret void
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_scvtf(i32 %int, i64 %long) {
+; CHECK: test_scvtf:
+
+  %cvt1 = sitofp i32 %int to float
+  %fix1 = fdiv float %cvt1, 128.0
+; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile float %fix1, float* @varfloat
+
+  %cvt2 = sitofp i32 %int to float
+  %fix2 = fdiv float %cvt2, 4294967296.0
+; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile float %fix2, float* @varfloat
+
+  %cvt3 = sitofp i64 %long to float
+  %fix3 = fdiv float %cvt3, 128.0
+; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile float %fix3, float* @varfloat
+
+  %cvt4 = sitofp i64 %long to float
+  %fix4 = fdiv float %cvt4, 18446744073709551616.0
+; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile float %fix4, float* @varfloat
+
+  %cvt5 = sitofp i32 %int to double
+  %fix5 = fdiv double %cvt5, 128.0
+; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile double %fix5, double* @vardouble
+
+  %cvt6 = sitofp i32 %int to double
+  %fix6 = fdiv double %cvt6, 4294967296.0
+; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile double %fix6, double* @vardouble
+
+  %cvt7 = sitofp i64 %long to double
+  %fix7 = fdiv double %cvt7, 128.0
+; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile double %fix7, double* @vardouble
+
+  %cvt8 = sitofp i64 %long to double
+  %fix8 = fdiv double %cvt8, 18446744073709551616.0
+; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile double %fix8, double* @vardouble
+
+  ret void
+}
+
+define void @test_ucvtf(i32 %int, i64 %long) {
+; CHECK: test_ucvtf:
+
+  %cvt1 = uitofp i32 %int to float
+  %fix1 = fdiv float %cvt1, 128.0
+; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile float %fix1, float* @varfloat
+
+  %cvt2 = uitofp i32 %int to float
+  %fix2 = fdiv float %cvt2, 4294967296.0
+; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile float %fix2, float* @varfloat
+
+  %cvt3 = uitofp i64 %long to float
+  %fix3 = fdiv float %cvt3, 128.0
+; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile float %fix3, float* @varfloat
+
+  %cvt4 = uitofp i64 %long to float
+  %fix4 = fdiv float %cvt4, 18446744073709551616.0
+; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile float %fix4, float* @varfloat
+
+  %cvt5 = uitofp i32 %int to double
+  %fix5 = fdiv double %cvt5, 128.0
+; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile double %fix5, double* @vardouble
+
+  %cvt6 = uitofp i32 %int to double
+  %fix6 = fdiv double %cvt6, 4294967296.0
+; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile double %fix6, double* @vardouble
+
+  %cvt7 = uitofp i64 %long to double
+  %fix7 = fdiv double %cvt7, 128.0
+; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile double %fix7, double* @vardouble
+
+  %cvt8 = uitofp i64 %long to double
+  %fix8 = fdiv double %cvt8, 18446744073709551616.0
+; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile double %fix8, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll
new file mode 100644
index 0000000000..c4bcaac206
--- /dev/null
+++ b/test/CodeGen/AArch64/fcvt-int.ll
@@ -0,0 +1,151 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+define i32 @test_floattoi32(float %in) {
+; CHECK: test_floattoi32:
+
+  %signed = fptosi float %in to i32
+  %unsigned = fptoui float %in to i32
+; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{s[0-9]+}}
+; CHECK: fcvtzs [[SIG:w[0-9]+]], {{s[0-9]+}}
+
+  %res = sub i32 %signed, %unsigned
+; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i32 %res
+; CHECK: ret
+}
+
+define i32 @test_doubletoi32(double %in) {
+; CHECK: test_doubletoi32:
+
+  %signed = fptosi double %in to i32
+  %unsigned = fptoui double %in to i32
+; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{d[0-9]+}}
+; CHECK: fcvtzs [[SIG:w[0-9]+]], {{d[0-9]+}}
+
+  %res = sub i32 %signed, %unsigned
+; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i32 %res
+; CHECK: ret
+}
+
+define i64 @test_floattoi64(float %in) {
+; CHECK: test_floattoi64:
+
+  %signed = fptosi float %in to i64
+  %unsigned = fptoui float %in to i64
+; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{s[0-9]+}}
+; CHECK: fcvtzs [[SIG:x[0-9]+]], {{s[0-9]+}}
+
+  %res = sub i64 %signed, %unsigned
+; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i64 %res
+; CHECK: ret
+}
+
+define i64 @test_doubletoi64(double %in) {
+; CHECK: test_doubletoi64:
+
+  %signed = fptosi double %in to i64
+  %unsigned = fptoui double %in to i64
+; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{d[0-9]+}}
+; CHECK: fcvtzs [[SIG:x[0-9]+]], {{d[0-9]+}}
+
+  %res = sub i64 %signed, %unsigned
+; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i64 %res
+; CHECK: ret
+}
+
+define float @test_i32tofloat(i32 %in) {
+; CHECK: test_i32tofloat:
+
+  %signed = sitofp i32 %in to float
+  %unsigned = uitofp i32 %in to float
+; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{w[0-9]+}}
+; CHECK: scvtf [[SIG:s[0-9]+]], {{w[0-9]+}}
+
+  %res = fsub float %signed, %unsigned
+; CHECL: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret float %res
+; CHECK: ret
+}
+
+define double @test_i32todouble(i32 %in) {
+; CHECK: test_i32todouble:
+
+  %signed = sitofp i32 %in to double
+  %unsigned = uitofp i32 %in to double
+; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{w[0-9]+}}
+; CHECK: scvtf [[SIG:d[0-9]+]], {{w[0-9]+}}
+
+  %res = fsub double %signed, %unsigned
+; CHECK: fsub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret double %res
+; CHECK: ret
+}
+
+define float @test_i64tofloat(i64 %in) {
+; CHECK: test_i64tofloat:
+
+  %signed = sitofp i64 %in to float
+  %unsigned = uitofp i64 %in to float
+; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{x[0-9]+}}
+; CHECK: scvtf [[SIG:s[0-9]+]], {{x[0-9]+}}
+
+  %res = fsub float %signed, %unsigned
+; CHECK: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret float %res
+; CHECK: ret
+}
+
+define double @test_i64todouble(i64 %in) {
+; CHECK: test_i64todouble:
+
+  %signed = sitofp i64 %in to double
+  %unsigned = uitofp i64 %in to double
+; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{x[0-9]+}}
+; CHECK: scvtf [[SIG:d[0-9]+]], {{x[0-9]+}}
+
+  %res = fsub double %signed, %unsigned
+; CHECK: sub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret double %res
+; CHECK: ret
+}
+
+define i32 @test_bitcastfloattoi32(float %in) {
+; CHECK: test_bitcastfloattoi32:
+
+   %res = bitcast float %in to i32
+; CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
+   ret i32 %res
+}
+
+define i64 @test_bitcastdoubletoi64(double %in) {
+; CHECK: test_bitcastdoubletoi64:
+
+   %res = bitcast double %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define float @test_bitcasti32tofloat(i32 %in) {
+; CHECK: test_bitcasti32tofloat:
+
+   %res = bitcast i32 %in to float
+; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
+   ret float %res
+
+}
+
+define double @test_bitcasti64todouble(i64 %in) {
+; CHECK: test_bitcasti64todouble:
+
+   %res = bitcast i64 %in to double
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret double %res
+
+}
diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll
new file mode 100644
index 0000000000..80be052cd9
--- /dev/null
+++ b/test/CodeGen/AArch64/flags-multiuse.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+; LLVM should be able to cope with multiple uses of the same flag-setting
+; instruction at different points of a routine. Either by rematerializing the
+; compare or by saving and restoring the flag register.
+
+declare void @bar()
+
+@var = global i32 0
+
+define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) {
+; CHECK: test_multiflag:
+
+  %test = icmp ne i32 %n, %m
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+
+  %val = zext i1 %test to i32
+; CHECK: csinc {{[xw][0-9]+}}, {{xzr|wzr}}, {{xzr|wzr}}, eq
+
+  store i32 %val, i32* @var
+
+  call void @bar()
+; CHECK: bl bar
+
+  ; Currently, the comparison is emitted again. An MSR/MRS pair would also be
+  ; acceptable, but assuming the call preserves NZCV is not.
+  br i1 %test, label %iftrue, label %iffalse
+; CHECK: cmp [[LHS]], [[RHS]]
+; CHECK: b.eq
+
+iftrue:
+  ret i32 42
+iffalse:
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/floatdp_1source.ll b/test/CodeGen/AArch64/floatdp_1source.ll
new file mode 100644
index 0000000000..0f10b42c0b
--- /dev/null
+++ b/test/CodeGen/AArch64/floatdp_1source.ll
@@ -0,0 +1,138 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@varhalf = global half 0.0
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+declare float @fabsf(float) readonly
+declare double @fabs(double) readonly
+
+declare float @llvm.sqrt.f32(float %Val)
+declare double @llvm.sqrt.f64(double %Val)
+
+declare float @ceilf(float) readonly
+declare double @ceil(double) readonly
+
+declare float @floorf(float) readonly
+declare double @floor(double) readonly
+
+declare float @truncf(float) readonly
+declare double @trunc(double) readonly
+
+declare float @rintf(float) readonly
+declare double @rint(double) readonly
+
+declare float @nearbyintf(float) readonly
+declare double @nearbyint(double) readonly
+
+define void @simple_float() {
+; CHECK: simple_float:
+  %val1 = load volatile float* @varfloat
+
+  %valabs = call float @fabsf(float %val1)
+  store volatile float %valabs, float* @varfloat
+; CHECK: fabs {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valneg = fsub float -0.0, %val1
+  store volatile float %valneg, float* @varfloat
+; CHECK: fneg {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valsqrt = call float @llvm.sqrt.f32(float %val1)
+  store volatile float %valsqrt, float* @varfloat
+; CHECK: fsqrt {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valceil = call float @ceilf(float %val1)
+  store volatile float %valceil, float* @varfloat
+; CHECK: frintp {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valfloor = call float @floorf(float %val1)
+  store volatile float %valfloor, float* @varfloat
+; CHECK: frintm {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valtrunc = call float @truncf(float %val1)
+  store volatile float %valtrunc, float* @varfloat
+; CHECK: frintz {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valrint = call float @rintf(float %val1)
+  store volatile float %valrint, float* @varfloat
+; CHECK: frintx {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valnearbyint = call float @nearbyintf(float %val1)
+  store volatile float %valnearbyint, float* @varfloat
+; CHECK: frinti {{s[0-9]+}}, {{s[0-9]+}}
+
+  ret void
+}
+
+define void @simple_double() {
+; CHECK: simple_double:
+  %val1 = load volatile double* @vardouble
+
+  %valabs = call double @fabs(double %val1)
+  store volatile double %valabs, double* @vardouble
+; CHECK: fabs {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valneg = fsub double -0.0, %val1
+  store volatile double %valneg, double* @vardouble
+; CHECK: fneg {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valsqrt = call double @llvm.sqrt.f64(double %val1)
+  store volatile double %valsqrt, double* @vardouble
+; CHECK: fsqrt {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valceil = call double @ceil(double %val1)
+  store volatile double %valceil, double* @vardouble
+; CHECK: frintp {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valfloor = call double @floor(double %val1)
+  store volatile double %valfloor, double* @vardouble
+; CHECK: frintm {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valtrunc = call double @trunc(double %val1)
+  store volatile double %valtrunc, double* @vardouble
+; CHECK: frintz {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valrint = call double @rint(double %val1)
+  store volatile double %valrint, double* @vardouble
+; CHECK: frintx {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valnearbyint = call double @nearbyint(double %val1)
+  store volatile double %valnearbyint, double* @vardouble
+; CHECK: frinti {{d[0-9]+}}, {{d[0-9]+}}
+
+  ret void
+}
+
+define void @converts() {
+; CHECK: converts:
+
+  %val16 = load volatile half* @varhalf
+  %val32 = load volatile float* @varfloat
+  %val64 = load volatile double* @vardouble
+
+  %val16to32 = fpext half %val16 to float
+  store volatile float %val16to32, float* @varfloat
+; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}}
+
+  %val16to64 = fpext half %val16 to double
+  store volatile double %val16to64, double* @vardouble
+; CHECK: fcvt {{d[0-9]+}}, {{h[0-9]+}}
+
+  %val32to16 = fptrunc float %val32 to half
+  store volatile half %val32to16, half* @varhalf
+; CHECK: fcvt {{h[0-9]+}}, {{s[0-9]+}}
+
+  %val32to64 = fpext float %val32 to double
+  store volatile double %val32to64, double* @vardouble
+; CHECK: fcvt {{d[0-9]+}}, {{s[0-9]+}}
+
+  %val64to16 = fptrunc double %val64 to half
+  store volatile half %val64to16, half* @varhalf
+; CHECK: fcvt {{h[0-9]+}}, {{d[0-9]+}}
+
+  %val64to32 = fptrunc double %val64 to float
+  store volatile float %val64to32, float* @varfloat
+; CHECK: fcvt {{s[0-9]+}}, {{d[0-9]+}}
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll
new file mode 100644
index 0000000000..2cbcca723a
--- /dev/null
+++ b/test/CodeGen/AArch64/floatdp_2source.ll
@@ -0,0 +1,60 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @testfloat() {
+; CHECK: testfloat:
+  %val1 = load float* @varfloat
+
+  %val2 = fadd float %val1, %val1
+; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val3 = fmul float %val2, %val1
+; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val4 = fdiv float %val3, %val1
+; CHECK: fdiv {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val5 = fsub float %val4, %val2
+; CHECK: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  store volatile float %val5, float* @varfloat
+
+; These will be enabled with the implementation of floating-point litpool entries.
+  %val6 = fmul float %val1, %val2
+  %val7 = fsub float -0.0, %val6
+; CHECK: fnmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  store volatile float %val7, float* @varfloat
+
+  ret void
+}
+
+define void @testdouble() {
+; CHECK: testdouble:
+  %val1 = load double* @vardouble
+
+  %val2 = fadd double %val1, %val1
+; CHECK: fadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val3 = fmul double %val2, %val1
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val4 = fdiv double %val3, %val1
+; CHECK: fdiv {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val5 = fsub double %val4, %val2
+; CHECK: fsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  store volatile double %val5, double* @vardouble
+
+; These will be enabled with the implementation of doubleing-point litpool entries.
+   %val6 = fmul double %val1, %val2
+   %val7 = fsub double -0.0, %val6
+; CHECK: fnmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+   store volatile double %val7, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll
new file mode 100644
index 0000000000..c64927b34e
--- /dev/null
+++ b/test/CodeGen/AArch64/fp-cond-sel.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csel:
+
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float 0.0, float 1.0
+  store float %val1, float* @varfloat
+; CHECK: fmov [[FLT1:s[0-9]+]], #1.0
+; CHECK: ldr [[FLT0:s[0-9]+]], .LCPI
+; CHECK: fcsel {{s[0-9]+}}, [[FLT0]], [[FLT1]], hi
+
+  %rhs64 = sext i32 %rhs32 to i64
+  %tst2 = icmp sle i64 %lhs64, %rhs64
+  %val2 = select i1 %tst2, double 1.0, double 0.0
+  store double %val2, double* @vardouble
+; CHECK: ldr [[FLT0:d[0-9]+]], .LCPI
+; CHECK: fmov [[FLT1:d[0-9]+]], #1.0
+; CHECK: fcsel {{d[0-9]+}}, [[FLT1]], [[FLT0]], le
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll
new file mode 100644
index 0000000000..84f34567b1
--- /dev/null
+++ b/test/CodeGen/AArch64/fp-dp3.ll
@@ -0,0 +1,102 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 -fp-contract=fast | FileCheck %s
+
+declare float @llvm.fma.f32(float, float, float)
+declare double @llvm.fma.f64(double, double, double)
+
+define float @test_fmadd(float %a, float %b, float %c) {
+; CHECK: test_fmadd:
+  %val = call float @llvm.fma.f32(float %a, float %b, float %c)
+; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fmsub(float %a, float %b, float %c) {
+; CHECK: test_fmsub:
+  %nega = fsub float -0.0, %a
+  %val = call float @llvm.fma.f32(float %nega, float %b, float %c)
+; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fnmadd(float %a, float %b, float %c) {
+; CHECK: test_fnmadd:
+  %negc = fsub float -0.0, %c
+  %val = call float @llvm.fma.f32(float %a, float %b, float %negc)
+; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fnmsub(float %a, float %b, float %c) {
+; CHECK: test_fnmsub:
+  %nega = fsub float -0.0, %a
+  %negc = fsub float -0.0, %c
+  %val = call float @llvm.fma.f32(float %nega, float %b, float %negc)
+; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define double @testd_fmadd(double %a, double %b, double %c) {
+; CHECK: testd_fmadd:
+  %val = call double @llvm.fma.f64(double %a, double %b, double %c)
+; CHECK: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fmsub(double %a, double %b, double %c) {
+; CHECK: testd_fmsub:
+  %nega = fsub double -0.0, %a
+  %val = call double @llvm.fma.f64(double %nega, double %b, double %c)
+; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fnmadd(double %a, double %b, double %c) {
+; CHECK: testd_fnmadd:
+  %negc = fsub double -0.0, %c
+  %val = call double @llvm.fma.f64(double %a, double %b, double %negc)
+; CHECK: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fnmsub(double %a, double %b, double %c) {
+; CHECK: testd_fnmsub:
+  %nega = fsub double -0.0, %a
+  %negc = fsub double -0.0, %c
+  %val = call double @llvm.fma.f64(double %nega, double %b, double %negc)
+; CHECK: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define float @test_fmadd_unfused(float %a, float %b, float %c) {
+; CHECK: test_fmadd_unfused:
+  %prod = fmul float %b, %c
+  %sum = fadd float %a, %prod
+; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %sum
+}
+
+define float @test_fmsub_unfused(float %a, float %b, float %c) {
+; CHECK: test_fmsub_unfused:
+  %prod = fmul float %b, %c
+  %diff = fsub float %a, %prod
+; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %diff
+}
+
+define float @test_fnmadd_unfused(float %a, float %b, float %c) {
+; CHECK: test_fnmadd_unfused:
+  %nega = fsub float -0.0, %a
+  %prod = fmul float %b, %c
+  %sum = fadd float %nega, %prod
+; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %sum
+}
+
+define float @test_fnmsub_unfused(float %a, float %b, float %c) {
+; CHECK: test_fnmsub_unfused:
+  %nega = fsub float -0.0, %a
+  %prod = fmul float %b, %c
+  %diff = fsub float %nega, %prod
+; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %diff
+}
diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll
new file mode 100644
index 0000000000..891755fece
--- /dev/null
+++ b/test/CodeGen/AArch64/fp128-folding.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+declare void @bar(i8*, i8*, i32*)
+
+; SelectionDAG used to try to fold some fp128 operations using the ppc128 type,
+; which is not supported.
+
+define fp128 @test_folding() {
+; CHECK: test_folding:
+  %l = alloca i32
+  store i32 42, i32* %l
+  %val = load i32* %l
+  %fpval = sitofp i32 %val to fp128
+  ; If the value is loaded from a constant pool into an fp128, it's been folded
+  ; successfully.
+; CHECK: ldr {{q[0-9]+}}, .LCPI
+  ret fp128 %fpval
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll
new file mode 100644
index 0000000000..afba292402
--- /dev/null
+++ b/test/CodeGen/AArch64/fp128.ll
@@ -0,0 +1,280 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+@lhs = global fp128 zeroinitializer
+@rhs = global fp128 zeroinitializer
+
+define fp128 @test_add() {
+; CHECK: test_add:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fadd fp128 %lhs, %rhs
+; CHECK: bl __addtf3
+  ret fp128 %val
+}
+
+define fp128 @test_sub() {
+; CHECK: test_sub:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fsub fp128 %lhs, %rhs
+; CHECK: bl __subtf3
+  ret fp128 %val
+}
+
+define fp128 @test_mul() {
+; CHECK: test_mul:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fmul fp128 %lhs, %rhs
+; CHECK: bl __multf3
+  ret fp128 %val
+}
+
+define fp128 @test_div() {
+; CHECK: test_div:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fdiv fp128 %lhs, %rhs
+; CHECK: bl __divtf3
+  ret fp128 %val
+}
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_fptosi() {
+; CHECK: test_fptosi:
+  %val = load fp128* @lhs
+
+  %val32 = fptosi fp128 %val to i32
+  store i32 %val32, i32* @var32
+; CHECK: bl __fixtfsi
+
+  %val64 = fptosi fp128 %val to i64
+  store i64 %val64, i64* @var64
+; CHECK: bl __fixtfdi
+
+  ret void
+}
+
+define void @test_fptoui() {
+; CHECK: test_fptoui:
+  %val = load fp128* @lhs
+
+  %val32 = fptoui fp128 %val to i32
+  store i32 %val32, i32* @var32
+; CHECK: bl __fixunstfsi
+
+  %val64 = fptoui fp128 %val to i64
+  store i64 %val64, i64* @var64
+; CHECK: bl __fixunstfdi
+
+  ret void
+}
+
+define void @test_sitofp() {
+; CHECK: test_sitofp:
+
+  %src32 = load i32* @var32
+  %val32 = sitofp i32 %src32 to fp128
+  store volatile fp128 %val32, fp128* @lhs
+; CHECK: bl __floatsitf
+
+  %src64 = load i64* @var64
+  %val64 = sitofp i64 %src64 to fp128
+  store volatile fp128 %val64, fp128* @lhs
+; CHECK: bl __floatditf
+
+  ret void
+}
+
+define void @test_uitofp() {
+; CHECK: test_uitofp:
+
+  %src32 = load i32* @var32
+  %val32 = uitofp i32 %src32 to fp128
+  store volatile fp128 %val32, fp128* @lhs
+; CHECK: bl __floatunsitf
+
+  %src64 = load i64* @var64
+  %val64 = uitofp i64 %src64 to fp128
+  store volatile fp128 %val64, fp128* @lhs
+; CHECK: bl __floatunditf
+
+  ret void
+}
+
+define i1 @test_setcc1() {
+; CHECK: test_setcc1:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+; Technically, everything after the call to __letf2 is redundant, but we'll let
+; LLVM have its fun for now.
+  %val = fcmp ole fp128 %lhs, %rhs
+; CHECK: bl __letf2
+; CHECK: cmp w0, #0
+; CHECK: csinc w0, wzr, wzr, gt
+
+  ret i1 %val
+; CHECK: ret
+}
+
+define i1 @test_setcc2() {
+; CHECK: test_setcc2:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+; Technically, everything after the call to __letf2 is redundant, but we'll let
+; LLVM have its fun for now.
+  %val = fcmp ugt fp128 %lhs, %rhs
+; CHECK: bl      __unordtf2
+; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
+
+; CHECK: bl      __gttf2
+; CHECK: cmp w0, #0
+; CHECK: csinc   [[GT:w[0-9]+]], wzr, wzr, le
+; CHECK: cmp w[[UNORDERED]], #0
+; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+; CHECK: orr     w0, [[UNORDERED]], [[GT]]
+
+  ret i1 %val
+; CHECK: ret
+}
+
+define i32 @test_br_cc() {
+; CHECK: test_br_cc:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  ; olt == !uge, which LLVM unfortunately "optimizes" this to.
+  %cond = fcmp olt fp128 %lhs, %rhs
+; CHECK: bl      __unordtf2
+; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
+
+; CHECK: bl      __getf2
+; CHECK: cmp w0, #0
+
+; CHECK: csinc   [[OGE:w[0-9]+]], wzr, wzr, lt
+; CHECK: cmp w[[UNORDERED]], #0
+; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+; CHECK: orr     [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
+; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
+  br i1 %cond, label %iftrue, label %iffalse
+
+iftrue:
+  ret i32 42
+; CHECK-NEXT: BB#
+; CHECK-NEXT: movz x0, #42
+; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]]
+
+iffalse:
+  ret i32 29
+; CHECK: [[RET29]]:
+; CHECK-NEXT: movz x0, #29
+; CHECK-NEXT: [[REALRET]]:
+; CHECK: ret
+}
+
+define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
+; CHECK: test_select:
+
+  %val = select i1 %cond, fp128 %lhs, fp128 %rhs
+  store fp128 %val, fp128* @lhs
+; CHECK: cmp w0, #0
+; CHECK: str q1, [sp]
+; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#
+; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: [[IFFALSE]]:
+; CHECK-NEXT: ldr q0, [sp]
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+  ret void
+; CHECK: ret
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_round() {
+; CHECK: test_round:
+
+  %val = load fp128* @lhs
+
+  %float = fptrunc fp128 %val to float
+  store float %float, float* @varfloat
+; CHECK: bl __trunctfsf2
+; CHECK: str s0, [{{x[0-9]+}}, #:lo12:varfloat]
+
+  %double = fptrunc fp128 %val to double
+  store double %double, double* @vardouble
+; CHECK: bl __trunctfdf2
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
+
+  ret void
+}
+
+define void @test_extend() {
+; CHECK: test_extend:
+
+  %val = load fp128* @lhs
+
+  %float = load float* @varfloat
+  %fromfloat = fpext float %float to fp128
+  store volatile fp128 %fromfloat, fp128* @lhs
+; CHECK: bl __extendsftf2
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+
+  %double = load double* @vardouble
+  %fromdouble = fpext double %double to fp128
+  store volatile fp128 %fromdouble, fp128* @lhs
+; CHECK: bl __extenddftf2
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+
+  ret void
+; CHECK: ret
+}
+
+define fp128 @test_neg(fp128 %in) {
+; CHECK: test_neg:
+
+  ; Could in principle be optimized to fneg which we can't select, this makes
+  ; sure that doesn't happen.
+  %ret = fsub fp128 0xL00000000000000008000000000000000, %in
+; CHECK: str q0, [sp, #-16]
+; CHECK-NEXT: ldr q1, [sp], #16
+; CHECK: ldr q0, [[MINUS0:.LCPI[0-9]+_0]]
+; CHECK: bl __subtf3
+
+  ret fp128 %ret
+; CHECK: ret
+
+; CHECK: [[MINUS0]]:
+; Make sure the weird hex constant below *is* -0.0
+; CHECK-NEXT: fp128 -0
+}
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
new file mode 100644
index 0000000000..64c512126c
--- /dev/null
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -0,0 +1,34 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@varf32 = global float 0.0
+@varf64 = global double 0.0
+
+define void @check_float() {
+; CHECK: check_float:
+
+  %val = load float* @varf32
+  %newval1 = fadd float %val, 8.5
+  store volatile float %newval1, float* @varf32
+; CHECK: fmov {{s[0-9]+}}, #8.5
+
+  %newval2 = fadd float %val, 128.0
+  store volatile float %newval2, float* @varf32
+; CHECK: ldr {{s[0-9]+}}, .LCPI0_0
+
+  ret void
+}
+
+define void @check_double() {
+; CHECK: check_double:
+
+  %val = load double* @varf64
+  %newval1 = fadd double %val, 8.5
+  store volatile double %newval1, double* @varf64
+; CHECK: fmov {{d[0-9]+}}, #8.5
+
+  %newval2 = fadd double %val, 128.0
+  store volatile double %newval2, double* @varf64
+; CHECK: ldr {{d[0-9]+}}, .LCPI1_0
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
new file mode 100644
index 0000000000..2c4dd03911
--- /dev/null
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -0,0 +1,192 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+%myStruct = type { i64 , i8, i32 }
+
+@var8 = global i8 0
+@var32 = global i32 0
+@var64 = global i64 0
+@var128 = global i128 0
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+@varstruct = global %myStruct zeroinitializer
+
+define void @take_i8s(i8 %val1, i8 %val2) {
+; CHECK: take_i8s:
+    store i8 %val2, i8* @var8
+    ; Not using w1 may be technically allowed, but it would indicate a
+    ; problem in itself.
+;  CHECK: strb w1, [{{x[0-9]+}}, #:lo12:var8]
+    ret void
+}
+
+define void @add_floats(float %val1, float %val2) {
+; CHECK: add_floats:
+    %newval = fadd float %val1, %val2
+; CHECK: fadd [[ADDRES:s[0-9]+]], s0, s1
+    store float %newval, float* @varfloat
+; CHECK: str [[ADDRES]], [{{x[0-9]+}}, #:lo12:varfloat]
+    ret void
+}
+
+; byval pointers should be allocated to the stack and copied as if
+; with memcpy.
+define void @take_struct(%myStruct* byval %structval) {
+; CHECK: take_struct:
+    %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
+    %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
+
+    %val0 = load i32* %addr0
+    ; Some weird move means x0 is used for one access
+; CHECK: ldr [[REG32:w[0-9]+]], [{{x[0-9]+|sp}}, #12]
+    store i32 %val0, i32* @var32
+; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
+
+    %val1 = load i64* %addr1
+; CHECK: ldr [[REG64:x[0-9]+]], [{{x[0-9]+|sp}}]
+    store i64 %val1, i64* @var64
+; CHECK str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    ret void
+}
+
+; %structval should be at sp + 16
+define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %structval) {
+; CHECK: check_byval_align:
+
+    %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
+    %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
+
+    %val0 = load i32* %addr0
+    ; Some weird move means x0 is used for one access
+; CHECK: add x[[STRUCTVAL_ADDR:[0-9]+]], sp, #16
+; CHECK: ldr [[REG32:w[0-9]+]], [x[[STRUCTVAL_ADDR]], #12]
+    store i32 %val0, i32* @var32
+; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
+
+    %val1 = load i64* %addr1
+; CHECK: ldr [[REG64:x[0-9]+]], [sp, #16]
+    store i64 %val1, i64* @var64
+; CHECK str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    ret void
+}
+
+define i32 @return_int() {
+; CHECK: return_int:
+    %val = load i32* @var32
+    ret i32 %val
+; CHECK: ldr w0, [{{x[0-9]+}}, #:lo12:var32]
+    ; Make sure epilogue follows
+; CHECK-NEXT: ret
+}
+
+define double @return_double() {
+; CHECK: return_double:
+    ret double 3.14
+; CHECK: ldr d0, .LCPI
+}
+
+; This is the kind of IR clang will produce for returning a struct
+; small enough to go into registers. Not all that pretty, but it
+; works.
+define [2 x i64] @return_struct() {
+; CHECK: return_struct:
+    %addr = bitcast %myStruct* @varstruct to [2 x i64]*
+    %val = load [2 x i64]* %addr
+    ret [2 x i64] %val
+; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:varstruct]
+    ; Odd register regex below disallows x0 which we want to be live now.
+; CHECK: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, #:lo12:varstruct
+; CHECK-NEXT: ldr x1, [{{x[1-9][0-9]*}}, #8]
+    ; Make sure epilogue immediately follows
+; CHECK-NEXT: ret
+}
+
+; Large structs are passed by reference (storage allocated by caller
+; to preserve value semantics) in x8. Strictly this only applies to
+; structs larger than 16 bytes, but C semantics can still be provided
+; if LLVM does it to %myStruct too. So this is the simplest check
+define void @return_large_struct(%myStruct* sret %retval) {
+; CHECK: return_large_struct:
+    %addr0 = getelementptr %myStruct* %retval, i64 0, i32 0
+    %addr1 = getelementptr %myStruct* %retval, i64 0, i32 1
+    %addr2 = getelementptr %myStruct* %retval, i64 0, i32 2
+
+    store i64 42, i64* %addr0
+    store i8 2, i8* %addr1
+    store i32 9, i32* %addr2
+; CHECK: str {{x[0-9]+}}, [x8]
+; CHECK: strb {{w[0-9]+}}, [x8, #8]
+; CHECK: str {{w[0-9]+}}, [x8, #12]
+
+    ret void
+}
+
+; This struct is just too far along to go into registers: (only x7 is
+; available, but it needs two). Also make sure that %stacked doesn't
+; sneak into x7 behind.
+define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var45,
+                          i32* %var6, %myStruct* byval %struct, i32* byval %stacked,
+                          double %notstacked) {
+; CHECK: struct_on_stack:
+    %addr = getelementptr %myStruct* %struct, i64 0, i32 0
+    %val64 = load i64* %addr
+    store i64 %val64, i64* @var64
+    ; Currently nothing on local stack, so struct should be at sp
+; CHECK: ldr [[VAL64:x[0-9]+]], [sp]
+; CHECK: str [[VAL64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    store double %notstacked, double* @vardouble
+; CHECK-NOT: ldr d0
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble
+
+    %retval = load i32* %stacked
+    ret i32 %retval
+; CHECK: ldr w0, [sp, #16]
+}
+
+define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
+                         float %var4, float %var5, float %var6, float %var7,
+                         float %var8) {
+; CHECK: stacked_fpu:
+    store float %var8, float* @varfloat
+    ; Beware as above: the offset would be different on big-endian
+    ; machines if the first ldr were changed to use s-registers.
+; CHECK: ldr d[[VALFLOAT:[0-9]+]], [sp]
+; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, #:lo12:varfloat]
+
+    ret void
+}
+
+; 128-bit integer types should be passed in xEVEN, xODD rather than
+; the reverse. In this case x2 and x3. Nothing should use x1.
+define i32 @check_i128_regalign(i32 %val0, i128 %val1, i32 %val2) {
+; CHECK: check_i128_regalign
+    store i128 %val1, i128* @var128
+; CHECK: str x2, [{{x[0-9]+}}, #:lo12:var128]
+; CHECK: str x3, [{{x[0-9]+}}, #8]
+
+    ret i32 %val2
+; CHECK: mov x0, x4
+}
+
+define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                                   i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                                   i32 %stack1, i128 %stack2) {
+; CHECK: check_i128_stackalign
+    store i128 %stack2, i128* @var128
+    ; Nothing local on stack in current codegen, so first stack is 16 away
+; CHECK: ldr {{x[0-9]+}}, [sp, #16]
+    ; Important point is that we address sp+24 for second dword
+; CHECK: ldr {{x[0-9]+}}, [sp, #24]
+    ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define i32 @test_extern() {
+; CHECK: test_extern:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
+; CHECK: bl memcpy
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
new file mode 100644
index 0000000000..f96564d3a1
--- /dev/null
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -0,0 +1,140 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+%myStruct = type { i64 , i8, i32 }
+
+@var8 = global i8 0
+@var8_2 = global i8 0
+@var32 = global i32 0
+@var64 = global i64 0
+@var128 = global i128 0
+@varfloat = global float 0.0
+@varfloat_2 = global float 0.0
+@vardouble = global double 0.0
+@varstruct = global %myStruct zeroinitializer
+@varsmallstruct = global [2 x i64] zeroinitializer
+
+declare void @take_i8s(i8 %val1, i8 %val2)
+declare void @take_floats(float %val1, float %val2)
+
+define void @simple_args() {
+; CHECK: simple_args:
+  %char1 = load i8* @var8
+  %char2 = load i8* @var8_2
+  call void @take_i8s(i8 %char1, i8 %char2)
+; CHECK: ldrb w0, [{{x[0-9]+}}, #:lo12:var8]
+; CHECK: ldrb w1, [{{x[0-9]+}}, #:lo12:var8_2]
+; CHECK: bl take_i8s
+
+  %float1 = load float* @varfloat
+  %float2 = load float* @varfloat_2
+  call void @take_floats(float %float1, float %float2)
+; CHECK: ldr s1, [{{x[0-9]+}}, #:lo12:varfloat_2]
+; CHECK: ldr s0, [{{x[0-9]+}}, #:lo12:varfloat]
+; CHECK: bl take_floats
+
+  ret void
+}
+
+declare i32 @return_int()
+declare double @return_double()
+declare [2 x i64] @return_smallstruct()
+declare void @return_large_struct(%myStruct* sret %retval)
+
+define void @simple_rets() {
+; CHECK: simple_rets:
+
+  %int = call i32 @return_int()
+  store i32 %int, i32* @var32
+; CHECK: bl return_int
+; CHECK: str w0, [{{x[0-9]+}}, #:lo12:var32]
+
+  %dbl = call double @return_double()
+  store double %dbl, double* @vardouble
+; CHECK: bl return_double
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
+
+  %arr = call [2 x i64] @return_smallstruct()
+  store [2 x i64] %arr, [2 x i64]* @varsmallstruct
+; CHECK: bl return_smallstruct
+; CHECK: str x1, [{{x[0-9]+}}, #8]
+; CHECK: str x0, [{{x[0-9]+}}, #:lo12:varsmallstruct]
+
+  call void @return_large_struct(%myStruct* sret @varstruct)
+; CHECK: add x8, {{x[0-9]+}}, #:lo12:varstruct
+; CHECK bl return_large_struct
+
+  ret void
+}
+
+
+declare i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var45,
+                             i32* %var6, %myStruct* byval %struct, i32 %stacked,
+                             double %notstacked)
+declare void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
+                          float %var4, float %var5, float %var6, float %var7,
+                          float %var8)
+
+define void @check_stack_args() {
+  call i32 @struct_on_stack(i8 0, i16 12, i32 42, i64 99, i128 1,
+                            i32* @var32, %myStruct* byval @varstruct,
+                            i32 999, double 1.0)
+  ; Want to check that the final double is passed in registers and
+  ; that varstruct is passed on the stack. Rather dependent on how a
+  ; memcpy gets created, but the following works for now.
+; CHECK: mov x0, sp
+; CHECK: str {{w[0-9]+}}, [x0]
+; CHECK: str {{w[0-9]+}}, [x0, #12]
+; CHECK: fmov d0,
+; CHECK: bl struct_on_stack
+
+  call void @stacked_fpu(float -1.0, double 1.0, float 4.0, float 2.0,
+                         float -2.0, float -8.0, float 16.0, float 1.0,
+                         float 64.0)
+; CHECK: ldr s[[STACKEDREG:[0-9]+]], .LCPI
+; CHECK: mov x0, sp
+; CHECK: str d[[STACKEDREG]], [x0]
+; CHECK bl stacked_fpu
+  ret void
+}
+
+
+declare void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                                    i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                                    i32 %stack1, i128 %stack2)
+
+declare void @check_i128_regalign(i32 %val0, i128 %val1)
+
+
+define void @check_i128_align() {
+; CHECK: check_i128_align:
+  %val = load i128* @var128
+  call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,
+                                   i32 4, i32 5, i32 6, i32 7,
+                                   i32 42, i128 %val)
+; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var128]
+; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
+; CHECK: mov x[[SPREG:[0-9]+]], sp
+; CHECK: str [[I128HI]], [x[[SPREG]], #24]
+; CHECK: str [[I128LO]], [x[[SPREG]], #16]
+; CHECK: bl check_i128_stackalign
+
+  call void @check_i128_regalign(i32 0, i128 42)
+; CHECK-NOT: mov x1
+; CHECK: movz x2, #42
+; CHECK: mov x3, xzr
+; CHECK: bl check_i128_regalign
+
+  ret void
+}
+
+@fptr = global void()* null
+
+define void @check_indirect_call() {
+; CHECK: check_indirect_call:
+  %func = load void()** @fptr
+  call void %func()
+; CHECK: ldr [[FPTR:x[0-9]+]], [{{x[0-9]+}}, #:lo12:fptr]
+; CHECK: blr [[FPTR]]
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll
new file mode 100644
index 0000000000..afd70e08c7
--- /dev/null
+++ b/test/CodeGen/AArch64/global-alignment.ll
@@ -0,0 +1,69 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+@var32 = global [3 x i32] zeroinitializer
+@var64 = global [3 x i64] zeroinitializer
+@var32_align64 = global [3 x i32] zeroinitializer, align 8
+
+define i64 @test_align32() {
+; CHECK: test_align32:
+  %addr = bitcast [3 x i32]* @var32 to i64*
+
+  ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
+  ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
+  %val = load i64* %addr
+; CHECK: adrp [[HIBITS:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:var32
+; CHECK: ldr x0, [x[[ADDR]]]
+
+  ret i64 %val
+}
+
+define i64 @test_align64() {
+; CHECK: test_align64:
+  %addr = bitcast [3 x i64]* @var64 to i64*
+
+  ; However, var64 *is* properly aligned and emitting an adrp/add/ldr would be
+  ; inefficient.
+  %val = load i64* %addr
+; CHECK: adrp x[[HIBITS:[0-9]+]], var64
+; CHECK-NOT: add x[[HIBITS]]
+; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var64]
+
+  ret i64 %val
+}
+
+define i64 @test_var32_align64() {
+; CHECK: test_var32_align64:
+  %addr = bitcast [3 x i32]* @var32_align64 to i64*
+
+  ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
+  ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
+  %val = load i64* %addr
+; CHECK: adrp x[[HIBITS:[0-9]+]], var32_align64
+; CHECK-NOT: add x[[HIBITS]]
+; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var32_align64]
+
+  ret i64 %val
+}
+
+@yet_another_var = external global {i32, i32}
+
+define i64 @test_yet_another_var() {
+; CHECK: test_yet_another_var:
+
+  ; @yet_another_var has a preferred alignment of 8, but that's not enough if
+  ; we're going to be linking against other things. Its ABI alignment is only 4
+  ; so we can't fold the load.
+  %val = load i64* bitcast({i32, i32}* @yet_another_var to i64*)
+; CHECK: adrp [[HIBITS:x[0-9]+]], yet_another_var
+; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:yet_another_var
+; CHECK: ldr x0, [x[[ADDR]]]
+  ret i64 %val
+}
+
+define i64()* @test_functions() {
+; CHECK: test_functions:
+  ret i64()* @test_yet_another_var
+; CHECK: adrp [[HIBITS:x[0-9]+]], test_yet_another_var
+; CHECK: add x0, [[HIBITS]], #:lo12:test_yet_another_var
+}
diff --git a/test/CodeGen/AArch64/got-abuse.ll b/test/CodeGen/AArch64/got-abuse.ll
new file mode 100644
index 0000000000..b233697aa2
--- /dev/null
+++ b/test/CodeGen/AArch64/got-abuse.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=aarch64 -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -march=aarch64 -relocation-model=pic -filetype=obj < %s
+
+; LLVM gives well-defined semantics to this horrible construct (though C says
+; it's undefined). Regardless, we shouldn't crash. The important feature here is
+; that in general the only way to access a GOT symbol is via a 64-bit
+; load. Neither of these alternatives has the ELF relocations required to
+; support it:
+;    + ldr wD, [xN, #:got_lo12:func]
+;    + add xD, xN, #:got_lo12:func
+
+declare void @consume(i32)
+declare void @func()
+
+define void @foo() nounwind {
+; CHECK: foo:
+entry:
+  call void @consume(i32 ptrtoint (void ()* @func to i32))
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:func
+; CHECK: ldr {{x[0-9]+}}, [x[[ADDRHI]], #:got_lo12:func]
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/i128-align.ll b/test/CodeGen/AArch64/i128-align.ll
new file mode 100644
index 0000000000..2b6d2cdb43
--- /dev/null
+++ b/test/CodeGen/AArch64/i128-align.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+%struct = type { i32, i128, i8 }
+
+@var = global %struct zeroinitializer
+
+define i64 @check_size() {
+; CHECK: check_size:
+  %starti = ptrtoint %struct* @var to i64
+
+  %endp = getelementptr %struct* @var, i64 1
+  %endi = ptrtoint %struct* %endp to i64
+
+  %diff = sub i64 %endi, %starti
+  ret i64 %diff
+; CHECK: movz x0, #48
+}
+
+define i64 @check_field() {
+; CHECK: check_field:
+  %starti = ptrtoint %struct* @var to i64
+
+  %endp = getelementptr %struct* @var, i64 0, i32 1
+  %endi = ptrtoint i128* %endp to i64
+
+  %diff = sub i64 %endi, %starti
+  ret i64 %diff
+; CHECK: movz x0, #16
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll
new file mode 100644
index 0000000000..8c735dda3e
--- /dev/null
+++ b/test/CodeGen/AArch64/illegal-float-ops.ll
@@ -0,0 +1,221 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+@varfp128 = global fp128 zeroinitializer
+
+declare float @llvm.cos.f32(float)
+declare double @llvm.cos.f64(double)
+declare fp128 @llvm.cos.f128(fp128)
+
+define void @test_cos(float %float, double %double, fp128 %fp128) {
+; CHECK: test_cos:
+
+   %cosfloat = call float @llvm.cos.f32(float %float)
+   store float %cosfloat, float* @varfloat
+; CHECK: bl cosf
+
+   %cosdouble = call double @llvm.cos.f64(double %double)
+   store double %cosdouble, double* @vardouble
+; CHECK: bl cos
+
+   %cosfp128 = call fp128 @llvm.cos.f128(fp128 %fp128)
+   store fp128 %cosfp128, fp128* @varfp128
+; CHECK: bl cosl
+
+  ret void
+}
+
+declare float @llvm.exp.f32(float)
+declare double @llvm.exp.f64(double)
+declare fp128 @llvm.exp.f128(fp128)
+
+define void @test_exp(float %float, double %double, fp128 %fp128) {
+; CHECK: test_exp:
+
+   %expfloat = call float @llvm.exp.f32(float %float)
+   store float %expfloat, float* @varfloat
+; CHECK: bl expf
+
+   %expdouble = call double @llvm.exp.f64(double %double)
+   store double %expdouble, double* @vardouble
+; CHECK: bl exp
+
+   %expfp128 = call fp128 @llvm.exp.f128(fp128 %fp128)
+   store fp128 %expfp128, fp128* @varfp128
+; CHECK: bl expl
+
+  ret void
+}
+
+declare float @llvm.exp2.f32(float)
+declare double @llvm.exp2.f64(double)
+declare fp128 @llvm.exp2.f128(fp128)
+
+define void @test_exp2(float %float, double %double, fp128 %fp128) {
+; CHECK: test_exp2:
+
+   %exp2float = call float @llvm.exp2.f32(float %float)
+   store float %exp2float, float* @varfloat
+; CHECK: bl exp2f
+
+   %exp2double = call double @llvm.exp2.f64(double %double)
+   store double %exp2double, double* @vardouble
+; CHECK: bl exp2
+
+   %exp2fp128 = call fp128 @llvm.exp2.f128(fp128 %fp128)
+   store fp128 %exp2fp128, fp128* @varfp128
+; CHECK: bl exp2l
+  ret void
+
+}
+
+declare float @llvm.log.f32(float)
+declare double @llvm.log.f64(double)
+declare fp128 @llvm.log.f128(fp128)
+
+define void @test_log(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log:
+
+   %logfloat = call float @llvm.log.f32(float %float)
+   store float %logfloat, float* @varfloat
+; CHECK: bl logf
+
+   %logdouble = call double @llvm.log.f64(double %double)
+   store double %logdouble, double* @vardouble
+; CHECK: bl log
+
+   %logfp128 = call fp128 @llvm.log.f128(fp128 %fp128)
+   store fp128 %logfp128, fp128* @varfp128
+; CHECK: bl logl
+
+  ret void
+}
+
+declare float @llvm.log2.f32(float)
+declare double @llvm.log2.f64(double)
+declare fp128 @llvm.log2.f128(fp128)
+
+define void @test_log2(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log2:
+
+   %log2float = call float @llvm.log2.f32(float %float)
+   store float %log2float, float* @varfloat
+; CHECK: bl log2f
+
+   %log2double = call double @llvm.log2.f64(double %double)
+   store double %log2double, double* @vardouble
+; CHECK: bl log2
+
+   %log2fp128 = call fp128 @llvm.log2.f128(fp128 %fp128)
+   store fp128 %log2fp128, fp128* @varfp128
+; CHECK: bl log2l
+  ret void
+
+}
+
+declare float @llvm.log10.f32(float)
+declare double @llvm.log10.f64(double)
+declare fp128 @llvm.log10.f128(fp128)
+
+define void @test_log10(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log10:
+
+   %log10float = call float @llvm.log10.f32(float %float)
+   store float %log10float, float* @varfloat
+; CHECK: bl log10f
+
+   %log10double = call double @llvm.log10.f64(double %double)
+   store double %log10double, double* @vardouble
+; CHECK: bl log10
+
+   %log10fp128 = call fp128 @llvm.log10.f128(fp128 %fp128)
+   store fp128 %log10fp128, fp128* @varfp128
+; CHECK: bl log10l
+
+  ret void
+}
+
+declare float @llvm.sin.f32(float)
+declare double @llvm.sin.f64(double)
+declare fp128 @llvm.sin.f128(fp128)
+
+define void @test_sin(float %float, double %double, fp128 %fp128) {
+; CHECK: test_sin:
+
+   %sinfloat = call float @llvm.sin.f32(float %float)
+   store float %sinfloat, float* @varfloat
+; CHECK: bl sinf
+
+   %sindouble = call double @llvm.sin.f64(double %double)
+   store double %sindouble, double* @vardouble
+; CHECK: bl sin
+
+   %sinfp128 = call fp128 @llvm.sin.f128(fp128 %fp128)
+   store fp128 %sinfp128, fp128* @varfp128
+; CHECK: bl sinl
+  ret void
+
+}
+
+declare float @llvm.pow.f32(float, float)
+declare double @llvm.pow.f64(double, double)
+declare fp128 @llvm.pow.f128(fp128, fp128)
+
+define void @test_pow(float %float, double %double, fp128 %fp128) {
+; CHECK: test_pow:
+
+   %powfloat = call float @llvm.pow.f32(float %float, float %float)
+   store float %powfloat, float* @varfloat
+; CHECK: bl powf
+
+   %powdouble = call double @llvm.pow.f64(double %double, double %double)
+   store double %powdouble, double* @vardouble
+; CHECK: bl pow
+
+   %powfp128 = call fp128 @llvm.pow.f128(fp128 %fp128, fp128 %fp128)
+   store fp128 %powfp128, fp128* @varfp128
+; CHECK: bl powl
+
+  ret void
+}
+
+declare float @llvm.powi.f32(float, i32)
+declare double @llvm.powi.f64(double, i32)
+declare fp128 @llvm.powi.f128(fp128, i32)
+
+define void @test_powi(float %float, double %double, i32 %exponent, fp128 %fp128) {
+; CHECK: test_powi:
+
+   %powifloat = call float @llvm.powi.f32(float %float, i32 %exponent)
+   store float %powifloat, float* @varfloat
+; CHECK: bl __powisf2
+
+   %powidouble = call double @llvm.powi.f64(double %double, i32 %exponent)
+   store double %powidouble, double* @vardouble
+; CHECK: bl __powidf2
+
+   %powifp128 = call fp128 @llvm.powi.f128(fp128 %fp128, i32 %exponent)
+   store fp128 %powifp128, fp128* @varfp128
+; CHECK: bl __powitf2
+  ret void
+
+}
+
+define void @test_frem(float %float, double %double, fp128 %fp128) {
+; CHECK: test_frem:
+
+  %fremfloat = frem float %float, %float
+  store float %fremfloat, float* @varfloat
+; CHECK: bl fmodf
+
+  %fremdouble = frem double %double, %double
+  store double %fremdouble, double* @vardouble
+; CHECK: bl fmod
+
+  %fremfp128 = frem fp128 %fp128, %fp128
+  store fp128 %fremfp128, fp128* @varfp128
+; CHECK: bl fmodl
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll
new file mode 100644
index 0000000000..d80be8f3a6
--- /dev/null
+++ b/test/CodeGen/AArch64/init-array.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s
+
+define internal void @_GLOBAL__I_a() section ".text.startup" {
+  ret void
+}
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+; CHECK: .section .init_array
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
new file mode 100644
index 0000000000..c300482d10
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -march=aarch64 < %s
+
+define void @foo() {
+  ; Out of range immediate for I.
+  call void asm sideeffect "add x0, x0, $0", "I"(i32 4096)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
new file mode 100644
index 0000000000..2b5229cd9c
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -march=aarch64 < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 13)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
new file mode 100644
index 0000000000..f0ad87a39b
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -march=aarch64 < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and w0, w0, $0", "K"(i64 4294967296)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
new file mode 100644
index 0000000000..90da9ba128
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -march=aarch64 < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and x0, x0, $0", "L"(i32 13)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints.ll b/test/CodeGen/AArch64/inline-asm-constraints.ll
new file mode 100644
index 0000000000..fb3e392187
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints.ll
@@ -0,0 +1,117 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+define i64 @test_inline_constraint_r(i64 %base, i32 %offset) {
+; CHECK: test_inline_constraint_r:
+  %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 %base, i32 %offset)
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+  ret i64 %val
+}
+
+define i16 @test_small_reg(i16 %lhs, i16 %rhs) {
+; CHECK: test_small_reg:
+  %val = call i16 asm sideeffect "add $0, $1, $2, sxth", "=r,r,r"(i16 %lhs, i16 %rhs)
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
+  ret i16 %val
+}
+
+define i64 @test_inline_constraint_r_imm(i64 %base, i32 %offset) {
+; CHECK: test_inline_constraint_r_imm:
+  %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 4, i32 12)
+; CHECK: movz [[FOUR:x[0-9]+]], #4
+; CHECK: movz [[TWELVE:w[0-9]+]], #12
+; CHECK: add {{x[0-9]+}}, [[FOUR]], [[TWELVE]], sxtw
+  ret i64 %val
+}
+
+; m is permitted to have a base/offset form. We don't do that
+; currently though.
+define i32 @test_inline_constraint_m(i32 *%ptr) {
+; CHECK: test_inline_constraint_m:
+  %val = call i32 asm "ldr $0, $1", "=r,m"(i32 *%ptr)
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
+  ret i32 %val
+}
+
+@arr = global [8 x i32] zeroinitializer
+
+; Q should *never* have base/offset form even if given the chance.
+define i32 @test_inline_constraint_Q(i32 *%ptr) {
+; CHECK: test_inline_constraint_Q:
+  %val = call i32 asm "ldr $0, $1", "=r,Q"(i32* getelementptr([8 x i32]* @arr, i32 0, i32 1))
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
+  ret i32 %val
+}
+
+@dump = global fp128 zeroinitializer
+
+define void @test_inline_constraint_I() {
+; CHECK: test_inline_constraint_I:
+  call void asm sideeffect "add x0, x0, $0", "I"(i32 0)
+  call void asm sideeffect "add x0, x0, $0", "I"(i64 4095)
+; CHECK: add x0, x0, #0
+; CHECK: add x0, x0, #4095
+
+  ret void
+}
+
+; Skip J because it's useless
+
+define void @test_inline_constraint_K() {
+; CHECK: test_inline_constraint_K:
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 2863311530) ; = 0xaaaaaaaa
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 65535)
+; CHECK: and w0, w0, #-1431655766
+; CHECK: and w0, w0, #65535
+
+  ret void
+}
+
+define void @test_inline_constraint_L() {
+; CHECK: test_inline_constraint_L:
+  call void asm sideeffect "and x0, x0, $0", "L"(i64 4294967296) ; = 0xaaaaaaaa
+  call void asm sideeffect "and x0, x0, $0", "L"(i64 65535)
+; CHECK: and x0, x0, #4294967296
+; CHECK: and x0, x0, #65535
+
+  ret void
+}
+
+; Skip M and N because we don't support MOV pseudo-instructions yet.
+
+@var = global i32 0
+
+define void @test_inline_constraint_S() {
+; CHECK: test_inline_constraint_S:
+  call void asm sideeffect "adrp x0, $0", "S"(i32* @var)
+  call void asm sideeffect "adrp x0, ${0:A}", "S"(i32* @var)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S"(i32* @var)
+; CHECK: adrp x0, var
+; CHECK: adrp x0, var
+; CHECK: add x0, x0, #:lo12:var
+  ret void
+}
+
+define i32 @test_inline_constraint_S_label(i1 %in) {
+; CHECK: test_inline_constraint_S_label:
+  call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc))
+; CHECK: adr x0, .Ltmp{{[0-9]+}}
+  br i1 %in, label %loc, label %loc2
+loc:
+  ret i32 0
+loc2:
+  ret i32 42
+}
+
+define void @test_inline_constraint_Y() {
+; CHECK: test_inline_constraint_Y:
+  call void asm sideeffect "fcmp s0, $0", "Y"(float 0.0)
+; CHECK: fcmp s0, #0.0
+  ret void
+}
+
+define void @test_inline_constraint_Z() {
+; CHECK: test_inline_constraint_Z:
+  call void asm sideeffect "cmp w0, $0", "Z"(i32 0)
+; CHECK: cmp w0, #0
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-modifiers.ll b/test/CodeGen/AArch64/inline-asm-modifiers.ll
new file mode 100644
index 0000000000..5b485aac3f
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-modifiers.ll
@@ -0,0 +1,125 @@
+; RUN: llc -march=aarch64 -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -march=aarch64 -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
+
+@var_simple = hidden global i32 0
+@var_got = global i32 0
+@var_tlsgd = thread_local global i32 0
+@var_tlsld = thread_local(localdynamic) global i32 0
+@var_tlsie = thread_local(initialexec) global i32 0
+@var_tlsle = thread_local(localexec) global i32 0
+
+define void @test_inline_modifier_L() nounwind {
+; CHECK: test_inline_modifier_L:
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_simple)
+  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_got)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsgd)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsld)
+  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_tlsie)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsle)
+; CHECK: add x0, x0, #:lo12:var_simple
+; CHECK: ldr x0, [x0, #:got_lo12:var_got]
+; CHECK: add x0, x0, #:tlsdesc_lo12:var_tlsgd
+; CHECK: add x0, x0, #:dtprel_lo12:var_tlsld
+; CHECK: ldr x0, [x0, #:gottprel_lo12:var_tlsie]
+; CHECK: add x0, x0, #:tprel_lo12:var_tlsle
+
+; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC var_simple
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var_got
+; CHECK-ELF: R_AARCH64_TLSDESC_ADD_LO12_NC var_tlsgd
+; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_LO12 var_tlsld
+; CHECK-ELF: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC var_tlsie
+; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_LO12 var_tlsle
+
+  ret void
+}
+
+define void @test_inline_modifier_G() nounwind {
+; CHECK: test_inline_modifier_G:
+  call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsld)
+  call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsle)
+; CHECK: add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12
+; CHECK: add x0, x0, #:tprel_hi12:var_tlsle, lsl #12
+
+; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_HI12 var_tlsld
+; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_HI12 var_tlsle
+
+  ret void
+}
+
+define void @test_inline_modifier_A() nounwind {
+; CHECK: test_inline_modifier_A:
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_simple)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_got)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsgd)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsie)
+  ; N.b. All tprel and dtprel relocs are modified: lo12 or granules.
+; CHECK: adrp x0, var_simple
+; CHECK: adrp x0, :got:var_got
+; CHECK: adrp x0, :tlsdesc:var_tlsgd
+; CHECK: adrp x0, :gottprel:var_tlsie
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 var_simple
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var_got
+; CHECK-ELF: R_AARCH64_TLSDESC_ADR_PAGE var_tlsgd
+; CHECK-ELF: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 var_tlsie
+
+  ret void
+}
+
+define void @test_inline_modifier_wx(i32 %small, i64 %big) nounwind {
+; CHECK: test_inline_modifier_wx:
+  call i32 asm sideeffect "add $0, $0, $0", "=r,0"(i32 %small)
+  call i32 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i32 %small)
+  call i32 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i32 %small)
+; CHECK: //APP
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+
+  call i64 asm sideeffect "add $0, $0, $0", "=r,0"(i64 %big)
+  call i64 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i64 %big)
+  call i64 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i64 %big)
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+
+  call i32 asm sideeffect "add ${0:w}, ${1:w}, ${1:w}", "=r,r"(i32 0)
+  call i32 asm sideeffect "add ${0:x}, ${1:x}, ${1:x}", "=r,r"(i32 0)
+; CHECK: add {{w[0-9]+}}, wzr, wzr
+; CHECK: add {{x[0-9]+}}, xzr, xzr
+  ret void
+}
+
+define void @test_inline_modifier_bhsdq() nounwind {
+; CHECK: test_inline_modifier_bhsdq:
+  call float asm sideeffect "ldr ${0:b}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:h}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:s}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:d}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:q}, [sp]", "=w"()
+; CHECK: ldr b0, [sp]
+; CHECK: ldr h0, [sp]
+; CHECK: ldr s0, [sp]
+; CHECK: ldr d0, [sp]
+; CHECK: ldr q0, [sp]
+
+  call double asm sideeffect "ldr ${0:b}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:h}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:s}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:d}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:q}, [sp]", "=w"()
+; CHECK: ldr b0, [sp]
+; CHECK: ldr h0, [sp]
+; CHECK: ldr s0, [sp]
+; CHECK: ldr d0, [sp]
+; CHECK: ldr q0, [sp]
+  ret void
+}
+
+define void @test_inline_modifier_c() nounwind {
+; CHECK: test_inline_modifier_c:
+  call void asm sideeffect "adr x0, ${0:c}", "i"(i32 3)
+; CHECK: adr x0, 3
+
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
new file mode 100644
index 0000000000..037813398b
--- /dev/null
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -0,0 +1,56 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 -filetype=obj | elf-dump | FileCheck %s -check-prefix=CHECK-ELF
+
+define i32 @test_jumptable(i32 %in) {
+; CHECK: test_jumptable
+
+  switch i32 %in, label %def [
+    i32 0, label %lbl1
+    i32 1, label %lbl2
+    i32 2, label %lbl3
+    i32 4, label %lbl4
+  ]
+; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0
+; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], #:lo12:.LJTI0_0
+; CHECK: ldr [[DEST:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #3]
+; CHECK: br [[DEST]]
+
+def:
+  ret i32 0
+
+lbl1:
+  ret i32 1
+
+lbl2:
+  ret i32 2
+
+lbl3:
+  ret i32 4
+
+lbl4:
+  ret i32 8
+
+}
+
+; CHECK: .rodata
+
+; CHECK: .LJTI0_0:
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+
+; ELF tests:
+
+; First make sure we get a page/lo12 pair in .text to pick up the jump-table
+; CHECK-ELF: .rela.text
+; CHECK-ELF: ('r_sym', 0x00000008)
+; CHECK-ELF-NEXT: ('r_type', 0x00000113)
+; CHECK-ELF: ('r_sym', 0x00000008)
+; CHECK-ELF-NEXT: ('r_type', 0x00000115)
+
+; Also check the targets in .rodata are relocated
+; CHECK-ELF: .rela.rodata
+; CHECK-ELF: ('r_sym', 0x00000005)
+; CHECK-ELF-NEXT: ('r_type', 0x00000101)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/large-frame.ll b/test/CodeGen/AArch64/large-frame.ll
new file mode 100644
index 0000000000..e924461f87
--- /dev/null
+++ b/test/CodeGen/AArch64/large-frame.ll
@@ -0,0 +1,117 @@
+; RUN: llc -verify-machineinstrs -march=aarch64 < %s | FileCheck %s
+declare void @use_addr(i8*)
+
+@addr = global i8* null
+
+define void @test_bigframe() {
+; CHECK: test_bigframe:
+
+  %var1 = alloca i8, i32 20000000
+  %var2 = alloca i8, i32 16
+  %var3 = alloca i8, i32 20000000
+; CHECK: sub sp, sp, #496
+; CHECK: str x30, [sp, #488]
+; CHECK: ldr [[FRAMEOFFSET:x[0-9]+]], [[FRAMEOFFSET_CPI:.LCPI0_[0-9]+]]
+; CHECK: sub sp, sp, [[FRAMEOFFSET]]
+
+; CHECK: ldr [[VAR1OFFSET:x[0-9]+]], [[VAR1LOC_CPI:.LCPI0_[0-9]+]]
+; CHECK: add {{x[0-9]+}}, sp, [[VAR1OFFSET]]
+  store volatile i8* %var1, i8** @addr
+
+  %var1plus2 = getelementptr i8* %var1, i32 2
+  store volatile i8* %var1plus2, i8** @addr
+
+; CHECK: ldr [[VAR2OFFSET:x[0-9]+]], [[VAR2LOC_CPI:.LCPI0_[0-9]+]]
+; CHECK: add {{x[0-9]+}}, sp, [[VAR2OFFSET]]
+  store volatile i8* %var2, i8** @addr
+
+  %var2plus2 = getelementptr i8* %var2, i32 2
+  store volatile i8* %var2plus2, i8** @addr
+
+  store volatile i8* %var3, i8** @addr
+
+  %var3plus2 = getelementptr i8* %var3, i32 2
+  store volatile i8* %var3plus2, i8** @addr
+
+; CHECK: ldr [[FRAMEOFFSET:x[0-9]+]], [[FRAMEOFFSET_CPI]]
+; CHECK: add sp, sp, [[FRAMEOFFSET]]
+  ret void
+
+; CHECK: [[FRAMEOFFSET_CPI]]:
+; CHECK-NEXT: 39999536
+
+; CHECK: [[VAR1LOC_CPI]]:
+; CHECK-NEXT: 20000024
+
+; CHECK: [[VAR2LOC_CPI]]:
+; CHECK-NEXT: 20000008
+}
+
+define void @test_mediumframe() {
+; CHECK: test_mediumframe:
+  %var1 = alloca i8, i32 1000000
+  %var2 = alloca i8, i32 16
+  %var3 = alloca i8, i32 1000000
+; CHECK: sub sp, sp, #496
+; CHECK: str x30, [sp, #488]
+; CHECK: sub sp, sp, #688
+; CHECK-NEXT: sub sp, sp, #488, lsl #12
+
+  store volatile i8* %var1, i8** @addr
+; CHECK: add [[VAR1ADDR:x[0-9]+]], sp, #600
+; CHECK: add [[VAR1ADDR]], [[VAR1ADDR]], #244, lsl #12
+
+  %var1plus2 = getelementptr i8* %var1, i32 2
+  store volatile i8* %var1plus2, i8** @addr
+; CHECK: add [[VAR1PLUS2:x[0-9]+]], {{x[0-9]+}}, #2
+
+  store volatile i8* %var2, i8** @addr
+; CHECK: add [[VAR2ADDR:x[0-9]+]], sp, #584
+; CHECK: add [[VAR2ADDR]], [[VAR2ADDR]], #244, lsl #12
+
+  %var2plus2 = getelementptr i8* %var2, i32 2
+  store volatile i8* %var2plus2, i8** @addr
+; CHECK: add [[VAR2PLUS2:x[0-9]+]], {{x[0-9]+}}, #2
+
+  store volatile i8* %var3, i8** @addr
+
+  %var3plus2 = getelementptr i8* %var3, i32 2
+  store volatile i8* %var3plus2, i8** @addr
+
+; CHECK: add sp, sp, #688
+; CHECK: add sp, sp, #488, lsl #12
+; CHECK: ldr x30, [sp, #488]
+; CHECK: add sp, sp, #496
+  ret void
+}
+
+
+@bigspace = global [8 x i64] zeroinitializer
+
+; If temporary registers are allocated for adjustment, they should *not* clobber
+; argument registers.
+define void @test_tempallocation([8 x i64] %val) nounwind {
+; CHECK: test_tempallocation:
+  %var = alloca i8, i32 1000000
+; CHECK: sub sp, sp,
+
+; Make sure the prologue is reasonably efficient
+; CHECK-NEXT: stp x29, x30, [sp,
+; CHECK-NEXT: stp x25, x26, [sp,
+; CHECK-NEXT: stp x23, x24, [sp,
+; CHECK-NEXT: stp x21, x22, [sp,
+; CHECK-NEXT: stp x19, x20, [sp,
+
+; Make sure we don't trash an argument register
+; CHECK-NOT: ldr {{x[0-7]}}, .LCPI1
+; CHECK: sub sp, sp,
+
+; CHECK-NOT: ldr {{x[0-7]}}, .LCPI1
+
+; CHECK: bl use_addr
+  call void @use_addr(i8* %var)
+
+  store [8 x i64] %val, [8 x i64]* @bigspace
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll
new file mode 100644
index 0000000000..13c682c655
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-regoffset.ll
@@ -0,0 +1,333 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_8bit:
+
+   %addr8_sxtw = getelementptr i8* %base, i32 %off32
+   %val8_sxtw = load volatile i8* %addr8_sxtw
+   %val32_signed = sext i8 %val8_sxtw to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %addr_lsl = getelementptr i8* %base, i64 %off64
+  %val8_lsl = load volatile i8* %addr_lsl
+  %val32_unsigned = zext i8 %val8_lsl to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %addrint_uxtw = ptrtoint i8* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i8*
+  %val8_uxtw = load volatile i8* %addr_uxtw
+  %newval8 = add i8 %val8_uxtw, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+   ret void
+}
+
+
+define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_16bit:
+
+   %addr8_sxtwN = getelementptr i16* %base, i32 %off32
+   %val8_sxtwN = load volatile i16* %addr8_sxtwN
+   %val32_signed = sext i16 %val8_sxtwN to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #1]
+
+  %addr_lslN = getelementptr i16* %base, i64 %off64
+  %val8_lslN = load volatile i16* %addr_lslN
+  %val32_unsigned = zext i16 %val8_lslN to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #1]
+
+  %addrint_uxtw = ptrtoint i16* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i16*
+  %val8_uxtw = load volatile i16* %addr_uxtw
+  %newval8 = add i16 %val8_uxtw, 1
+  store volatile i16 %newval8, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint i16* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i16*
+  %val16_sxtw = load volatile i16* %addr_sxtw
+  %val64_signed = sext i16 %val16_sxtw to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+  %base_lsl = ptrtoint i16* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i16*
+  %val16_lsl = load volatile i16* %addr_lsl
+  %val64_unsigned = zext i16 %val16_lsl to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i16* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 1
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16*
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* %addr_uxtwN
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #1]
+   ret void
+}
+
+define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_32bit:
+
+   %addr_sxtwN = getelementptr i32* %base, i32 %off32
+   %val_sxtwN = load volatile i32* %addr_sxtwN
+   store volatile i32 %val_sxtwN, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+
+  %addr_lslN = getelementptr i32* %base, i64 %off64
+  %val_lslN = load volatile i32* %addr_lslN
+  store volatile i32 %val_lslN, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
+
+  %addrint_uxtw = ptrtoint i32* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i32*
+  %val_uxtw = load volatile i32* %addr_uxtw
+  %newval8 = add i32 %val_uxtw, 1
+  store volatile i32 %newval8, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+
+  %base_sxtw = ptrtoint i32* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i32*
+  %val16_sxtw = load volatile i32* %addr_sxtw
+  %val64_signed = sext i32 %val16_sxtw to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+  %base_lsl = ptrtoint i32* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i32*
+  %val16_lsl = load volatile i32* %addr_lsl
+  %val64_unsigned = zext i32 %val16_lsl to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i32* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 2
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32*
+  %val32 = load volatile i32* @var_32bit
+  store volatile i32 %val32, i32* %addr_uxtwN
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+   ret void
+}
+
+define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_64bit:
+
+   %addr_sxtwN = getelementptr i64* %base, i32 %off32
+   %val_sxtwN = load volatile i64* %addr_sxtwN
+   store volatile i64 %val_sxtwN, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+
+  %addr_lslN = getelementptr i64* %base, i64 %off64
+  %val_lslN = load volatile i64* %addr_lslN
+  store volatile i64 %val_lslN, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
+
+  %addrint_uxtw = ptrtoint i64* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i64*
+  %val8_uxtw = load volatile i64* %addr_uxtw
+  %newval8 = add i64 %val8_uxtw, 1
+  store volatile i64 %newval8, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint i64* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i64*
+  %val64_sxtw = load volatile i64* %addr_sxtw
+  store volatile i64 %val64_sxtw, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint i64* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i64*
+  %val64_lsl = load volatile i64* %addr_lsl
+  store volatile i64 %val64_lsl, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i64* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 3
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64*
+  %val64 = load volatile i64* @var_64bit
+  store volatile i64 %val64, i64* %addr_uxtwN
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+   ret void
+}
+
+define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_float:
+
+   %addr_sxtwN = getelementptr float* %base, i32 %off32
+   %val_sxtwN = load volatile float* %addr_sxtwN
+   store volatile float %val_sxtwN, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+
+  %addr_lslN = getelementptr float* %base, i64 %off64
+  %val_lslN = load volatile float* %addr_lslN
+  store volatile float %val_lslN, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
+
+  %addrint_uxtw = ptrtoint float* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to float*
+  %val_uxtw = load volatile float* %addr_uxtw
+  store volatile float %val_uxtw, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint float* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to float*
+  %val64_sxtw = load volatile float* %addr_sxtw
+  store volatile float %val64_sxtw, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint float* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to float*
+  %val64_lsl = load volatile float* %addr_lsl
+  store volatile float %val64_lsl, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint float* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 2
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to float*
+  %val64 = load volatile float* @var_float
+  store volatile float %val64, float* %addr_uxtwN
+; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+   ret void
+}
+
+define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_double:
+
+   %addr_sxtwN = getelementptr double* %base, i32 %off32
+   %val_sxtwN = load volatile double* %addr_sxtwN
+   store volatile double %val_sxtwN, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+
+  %addr_lslN = getelementptr double* %base, i64 %off64
+  %val_lslN = load volatile double* %addr_lslN
+  store volatile double %val_lslN, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
+
+  %addrint_uxtw = ptrtoint double* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to double*
+  %val_uxtw = load volatile double* %addr_uxtw
+  store volatile double %val_uxtw, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint double* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to double*
+  %val64_sxtw = load volatile double* %addr_sxtw
+  store volatile double %val64_sxtw, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint double* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to double*
+  %val64_lsl = load volatile double* %addr_lsl
+  store volatile double %val64_lsl, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint double* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 3
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to double*
+  %val64 = load volatile double* @var_double
+  store volatile double %val64, double* %addr_uxtwN
+; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+   ret void
+}
+
+
+define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_128bit:
+
+   %addr_sxtwN = getelementptr fp128* %base, i32 %off32
+   %val_sxtwN = load volatile fp128* %addr_sxtwN
+   store volatile fp128 %val_sxtwN, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+
+  %addr_lslN = getelementptr fp128* %base, i64 %off64
+  %val_lslN = load volatile fp128* %addr_lslN
+  store volatile fp128 %val_lslN, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #4]
+
+  %addrint_uxtw = ptrtoint fp128* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to fp128*
+  %val_uxtw = load volatile fp128* %addr_uxtw
+  store volatile fp128 %val_uxtw, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint fp128* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to fp128*
+  %val64_sxtw = load volatile fp128* %addr_sxtw
+  store volatile fp128 %val64_sxtw, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint fp128* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to fp128*
+  %val64_lsl = load volatile fp128* %addr_lsl
+  store volatile fp128 %val64_lsl, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint fp128* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 4
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to fp128*
+  %val64 = load volatile fp128* %base
+  store volatile fp128 %val64, fp128* %addr_uxtwN
+; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #4]
+   ret void
+}
diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll
new file mode 100644
index 0000000000..dcc50ae11f
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll
@@ -0,0 +1,218 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+@varptr = global i8* null
+
+define void @ldst_8bit() {
+; CHECK: ldst_8bit:
+
+; No architectural support for loads to 16-bit or 8-bit since we
+; promote i8 during lowering.
+  %addr_8bit = load i8** @varptr
+
+; match a sign-extending load 8-bit -> 32-bit
+   %addr_sext32 = getelementptr i8* %addr_8bit, i64 -256
+   %val8_sext32 = load volatile i8* %addr_sext32
+   %val32_signed = sext i8 %val8_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldursb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; match a zero-extending load volatile 8-bit -> 32-bit
+  %addr_zext32 = getelementptr i8* %addr_8bit, i64 -12
+  %val8_zext32 = load volatile i8* %addr_zext32
+  %val32_unsigned = zext i8 %val8_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-12]
+
+; match an any-extending load volatile 8-bit -> 32-bit
+  %addr_anyext = getelementptr i8* %addr_8bit, i64 -1
+  %val8_anyext = load volatile i8* %addr_anyext
+  %newval8 = add i8 %val8_anyext, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+; match a sign-extending load volatile 8-bit -> 64-bit
+  %addr_sext64 = getelementptr i8* %addr_8bit, i64 -5
+  %val8_sext64 = load volatile i8* %addr_sext64
+  %val64_signed = sext i8 %val8_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursb {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+; match a zero-extending load volatile 8-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %addr_zext64 = getelementptr i8* %addr_8bit, i64 -9
+  %val8_zext64 = load volatile i8* %addr_zext64
+  %val64_unsigned = zext i8 %val8_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-9]
+
+; truncating store volatile 32-bits to 8-bits
+  %addr_trunc32 = getelementptr i8* %addr_8bit, i64 -256
+  %val32 = load volatile i32* @var_32bit
+  %val8_trunc32 = trunc i32 %val32 to i8
+  store volatile i8 %val8_trunc32, i8* %addr_trunc32
+; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; truncating store volatile 64-bits to 8-bits
+  %addr_trunc64 = getelementptr i8* %addr_8bit, i64 -1
+  %val64 = load volatile i64* @var_64bit
+  %val8_trunc64 = trunc i64 %val64 to i8
+  store volatile i8 %val8_trunc64, i8* %addr_trunc64
+; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+   ret void
+}
+
+define void @ldst_16bit() {
+; CHECK: ldst_16bit:
+
+; No architectural support for loads to 16-bit or 16-bit since we
+; promote i16 during lowering.
+  %addr_8bit = load i8** @varptr
+
+; match a sign-extending load 16-bit -> 32-bit
+   %addr8_sext32 = getelementptr i8* %addr_8bit, i64 -256
+   %addr_sext32 = bitcast i8* %addr8_sext32 to i16*
+   %val16_sext32 = load volatile i16* %addr_sext32
+   %val32_signed = sext i16 %val16_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldursh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; match a zero-extending load volatile 16-bit -> 32-bit. With offset that would be unaligned.
+  %addr8_zext32 = getelementptr i8* %addr_8bit, i64 15
+  %addr_zext32 = bitcast i8* %addr8_zext32 to i16*
+  %val16_zext32 = load volatile i16* %addr_zext32
+  %val32_unsigned = zext i16 %val16_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #15]
+
+; match an any-extending load volatile 16-bit -> 32-bit
+  %addr8_anyext = getelementptr i8* %addr_8bit, i64 -1
+  %addr_anyext = bitcast i8* %addr8_anyext to i16*
+  %val16_anyext = load volatile i16* %addr_anyext
+  %newval16 = add i16 %val16_anyext, 1
+  store volatile i16 %newval16, i16* @var_16bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+; match a sign-extending load volatile 16-bit -> 64-bit
+  %addr8_sext64 = getelementptr i8* %addr_8bit, i64 -5
+  %addr_sext64 = bitcast i8* %addr8_sext64 to i16*
+  %val16_sext64 = load volatile i16* %addr_sext64
+  %val64_signed = sext i16 %val16_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursh {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+; match a zero-extending load volatile 16-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %addr8_zext64 = getelementptr i8* %addr_8bit, i64 9
+  %addr_zext64 = bitcast i8* %addr8_zext64 to i16*
+  %val16_zext64 = load volatile i16* %addr_zext64
+  %val64_unsigned = zext i16 %val16_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #9]
+
+; truncating store volatile 32-bits to 16-bits
+  %addr8_trunc32 = getelementptr i8* %addr_8bit, i64 -256
+  %addr_trunc32 = bitcast i8* %addr8_trunc32 to i16*
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* %addr_trunc32
+; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; truncating store volatile 64-bits to 16-bits
+  %addr8_trunc64 = getelementptr i8* %addr_8bit, i64 -1
+  %addr_trunc64 = bitcast i8* %addr8_trunc64 to i16*
+  %val64 = load volatile i64* @var_64bit
+  %val16_trunc64 = trunc i64 %val64 to i16
+  store volatile i16 %val16_trunc64, i16* %addr_trunc64
+; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+   ret void
+}
+
+define void @ldst_32bit() {
+; CHECK: ldst_32bit:
+
+  %addr_8bit = load i8** @varptr
+
+; Straight 32-bit load/store
+  %addr32_8_noext = getelementptr i8* %addr_8bit, i64 1
+  %addr32_noext = bitcast i8* %addr32_8_noext to i32*
+  %val32_noext = load volatile i32* %addr32_noext
+  store volatile i32 %val32_noext, i32* %addr32_noext
+; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+
+; Zero-extension to 64-bits
+  %addr32_8_zext = getelementptr i8* %addr_8bit, i64 -256
+  %addr32_zext = bitcast i8* %addr32_8_zext to i32*
+  %val32_zext = load volatile i32* %addr32_zext
+  %val64_unsigned = zext i32 %val32_zext to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Sign-extension to 64-bits
+  %addr32_8_sext = getelementptr i8* %addr_8bit, i64 -12
+  %addr32_sext = bitcast i8* %addr32_8_sext to i32*
+  %val32_sext = load volatile i32* %addr32_sext
+  %val64_signed = sext i32 %val32_sext to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursw {{x[0-9]+}}, [{{x[0-9]+}}, #-12]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Truncation from 64-bits
+  %addr64_8_trunc = getelementptr i8* %addr_8bit, i64 255
+  %addr64_trunc = bitcast i8* %addr64_8_trunc to i64*
+  %addr32_8_trunc = getelementptr i8* %addr_8bit, i64 -20
+  %addr32_trunc = bitcast i8* %addr32_8_trunc to i32*
+
+  %val64_trunc = load volatile i64* %addr64_trunc
+  %val32_trunc = trunc i64 %val64_trunc to i32
+  store volatile i32 %val32_trunc, i32* %addr32_trunc
+; CHECK: ldur {{x[0-9]+}}, [{{x[0-9]+}}, #255]
+; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #-20]
+
+  ret void
+}
+
+define void @ldst_float() {
+; CHECK: ldst_float:
+
+  %addr_8bit = load i8** @varptr
+  %addrfp_8 = getelementptr i8* %addr_8bit, i64 -5
+  %addrfp = bitcast i8* %addrfp_8 to float*
+
+  %valfp = load volatile float* %addrfp
+; CHECK: ldur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+  store volatile float %valfp, float* %addrfp
+; CHECK: stur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+  ret void
+}
+
+define void @ldst_double() {
+; CHECK: ldst_double:
+
+  %addr_8bit = load i8** @varptr
+  %addrfp_8 = getelementptr i8* %addr_8bit, i64 4
+  %addrfp = bitcast i8* %addrfp_8 to double*
+
+  %valfp = load volatile double* %addrfp
+; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+
+  store volatile double %valfp, double* %addrfp
+; CHECK: stur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+
+   ret void
+}
diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll
new file mode 100644
index 0000000000..aa513f507f
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll
@@ -0,0 +1,251 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+define void @ldst_8bit() {
+; CHECK: ldst_8bit:
+
+; No architectural support for loads to 16-bit or 8-bit since we
+; promote i8 during lowering.
+
+; match a sign-extending load 8-bit -> 32-bit
+   %val8_sext32 = load volatile i8* @var_8bit
+   %val32_signed = sext i8 %val8_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_8bit
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a zero-extending load volatile 8-bit -> 32-bit
+  %val8_zext32 = load volatile i8* @var_8bit
+  %val32_unsigned = zext i8 %val8_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match an any-extending load volatile 8-bit -> 32-bit
+  %val8_anyext = load volatile i8* @var_8bit
+  %newval8 = add i8 %val8_anyext, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a sign-extending load volatile 8-bit -> 64-bit
+  %val8_sext64 = load volatile i8* @var_8bit
+  %val64_signed = sext i8 %val8_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsb {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a zero-extending load volatile 8-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %val8_zext64 = load volatile i8* @var_8bit
+  %val64_unsigned = zext i8 %val8_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; truncating store volatile 32-bits to 8-bits
+  %val32 = load volatile i32* @var_32bit
+  %val8_trunc32 = trunc i32 %val32 to i8
+  store volatile i8 %val8_trunc32, i8* @var_8bit
+; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; truncating store volatile 64-bits to 8-bits
+  %val64 = load volatile i64* @var_64bit
+  %val8_trunc64 = trunc i64 %val64 to i8
+  store volatile i8 %val8_trunc64, i8* @var_8bit
+; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+   ret void
+}
+
+define void @ldst_16bit() {
+; CHECK: ldst_16bit:
+
+; No architectural support for load volatiles to 16-bit promote i16 during
+; lowering.
+
+; match a sign-extending load volatile 16-bit -> 32-bit
+  %val16_sext32 = load volatile i16* @var_16bit
+  %val32_signed = sext i16 %val16_sext32 to i32
+  store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_16bit
+; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a zero-extending load volatile 16-bit -> 32-bit
+  %val16_zext32 = load volatile i16* @var_16bit
+  %val32_unsigned = zext i16 %val16_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match an any-extending load volatile 16-bit -> 32-bit
+  %val16_anyext = load volatile i16* @var_16bit
+  %newval16 = add i16 %val16_anyext, 1
+  store volatile i16 %newval16, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a sign-extending load volatile 16-bit -> 64-bit
+  %val16_sext64 = load volatile i16* @var_16bit
+  %val64_signed = sext i16 %val16_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a zero-extending load volatile 16-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %val16_zext64 = load volatile i16* @var_16bit
+  %val64_unsigned = zext i16 %val16_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; truncating store volatile 32-bits to 16-bits
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* @var_16bit
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; truncating store volatile 64-bits to 16-bits
+  %val64 = load volatile i64* @var_64bit
+  %val16_trunc64 = trunc i64 %val64 to i16
+  store volatile i16 %val16_trunc64, i16* @var_16bit
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+  ret void
+}
+
+define void @ldst_32bit() {
+; CHECK: ldst_32bit:
+
+; Straight 32-bit load/store
+  %val32_noext = load volatile i32* @var_32bit
+  store volatile i32 %val32_noext, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+
+; Zero-extension to 64-bits
+  %val32_zext = load volatile i32* @var_32bit
+  %val64_unsigned = zext i32 %val32_zext to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Sign-extension to 64-bits
+  %val32_sext = load volatile i32* @var_32bit
+  %val64_signed = sext i32 %val32_sext to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Truncation from 64-bits
+  %val64_trunc = load volatile i64* @var_64bit
+  %val32_trunc = trunc i64 %val64_trunc to i32
+  store volatile i32 %val32_trunc, i32* @var_32bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+
+  ret void
+}
+
+@arr8 = global i8* null
+@arr16 = global i16* null
+@arr32 = global i32* null
+@arr64 = global i64* null
+
+; Now check that our selection copes with accesses more complex than a
+; single symbol. Permitted offsets should be folded into the loads and
+; stores. Since all forms use the same Operand it's only necessary to
+; check the various access-sizes involved.
+
+define void @ldst_complex_offsets() {
+; CHECK: ldst_complex_offsets
+  %arr8_addr = load volatile i8** @arr8
+; CHECK: adrp {{x[0-9]+}}, arr8
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr8]
+
+  %arr8_sub1_addr = getelementptr i8* %arr8_addr, i64 1
+  %arr8_sub1 = load volatile i8* %arr8_sub1_addr
+  store volatile i8 %arr8_sub1, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+
+  %arr8_sub4095_addr = getelementptr i8* %arr8_addr, i64 4095
+  %arr8_sub4095 = load volatile i8* %arr8_sub4095_addr
+  store volatile i8 %arr8_sub4095, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #4095]
+
+
+  %arr16_addr = load volatile i16** @arr16
+; CHECK: adrp {{x[0-9]+}}, arr16
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr16]
+
+  %arr16_sub1_addr = getelementptr i16* %arr16_addr, i64 1
+  %arr16_sub1 = load volatile i16* %arr16_sub1_addr
+  store volatile i16 %arr16_sub1, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #2]
+
+  %arr16_sub4095_addr = getelementptr i16* %arr16_addr, i64 4095
+  %arr16_sub4095 = load volatile i16* %arr16_sub4095_addr
+  store volatile i16 %arr16_sub4095, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #8190]
+
+
+  %arr32_addr = load volatile i32** @arr32
+; CHECK: adrp {{x[0-9]+}}, arr32
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr32]
+
+  %arr32_sub1_addr = getelementptr i32* %arr32_addr, i64 1
+  %arr32_sub1 = load volatile i32* %arr32_sub1_addr
+  store volatile i32 %arr32_sub1, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #4]
+
+  %arr32_sub4095_addr = getelementptr i32* %arr32_addr, i64 4095
+  %arr32_sub4095 = load volatile i32* %arr32_sub4095_addr
+  store volatile i32 %arr32_sub4095, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #16380]
+
+
+  %arr64_addr = load volatile i64** @arr64
+; CHECK: adrp {{x[0-9]+}}, arr64
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr64]
+
+  %arr64_sub1_addr = getelementptr i64* %arr64_addr, i64 1
+  %arr64_sub1 = load volatile i64* %arr64_sub1_addr
+  store volatile i64 %arr64_sub1, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8]
+
+  %arr64_sub4095_addr = getelementptr i64* %arr64_addr, i64 4095
+  %arr64_sub4095 = load volatile i64* %arr64_sub4095_addr
+  store volatile i64 %arr64_sub4095, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #32760]
+
+  ret void
+}
+
+define void @ldst_float() {
+; CHECK: ldst_float:
+
+   %valfp = load volatile float* @var_float
+; CHECK: adrp {{x[0-9]+}}, var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+
+  store volatile float %valfp, float* @var_float
+; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+
+   ret void
+}
+
+define void @ldst_double() {
+; CHECK: ldst_double:
+
+   %valfp = load volatile double* @var_double
+; CHECK: adrp {{x[0-9]+}}, var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+
+  store volatile double %valfp, double* @var_double
+; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+
+   ret void
+}
diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
new file mode 100644
index 0000000000..c5ce2411ed
--- /dev/null
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/AArch64/literal_pools.ll b/test/CodeGen/AArch64/literal_pools.ll
new file mode 100644
index 0000000000..370d65cdf6
--- /dev/null
+++ b/test/CodeGen/AArch64/literal_pools.ll
@@ -0,0 +1,49 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @foo() {
+; CHECK: foo:
+    %val32 = load i32* @var32
+    %val64 = load i64* @var64
+
+    %val32_lit32 = and i32 %val32, 123456785
+    store volatile i32 %val32_lit32, i32* @var32
+; CHECK: ldr {{w[0-9]+}}, .LCPI0
+
+    %val64_lit32 = and i64 %val64, 305402420
+    store volatile i64 %val64_lit32, i64* @var64
+; CHECK: ldr {{w[0-9]+}}, .LCPI0
+
+    %val64_lit32signed = and i64 %val64, -12345678
+    store volatile i64 %val64_lit32signed, i64* @var64
+; CHECK: ldrsw {{x[0-9]+}}, .LCPI0
+
+    %val64_lit64 = and i64 %val64, 1234567898765432
+    store volatile i64 %val64_lit64, i64* @var64
+; CHECK: ldr {{x[0-9]+}}, .LCPI0
+
+    ret void
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @floating_lits() {
+; CHECK: floating_lits:
+
+  %floatval = load float* @varfloat
+  %newfloat = fadd float %floatval, 128.0
+; CHECK: ldr {{s[0-9]+}}, .LCPI1
+; CHECK: fadd
+  store float %newfloat, float* @varfloat
+
+  %doubleval = load double* @vardouble
+  %newdouble = fadd double %doubleval, 129.0
+; CHECK: ldr {{d[0-9]+}}, .LCPI1
+; CHECK: fadd
+  store double %newdouble, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll
new file mode 100644
index 0000000000..c9826053b0
--- /dev/null
+++ b/test/CodeGen/AArch64/local_vars.ll
@@ -0,0 +1,57 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 -O0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 -O0 -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP %s
+
+; Make sure a reasonably sane prologue and epilogue are
+; generated. This test is not robust in the face of an frame-handling
+; evolving, but still has value for unrelated changes, I
+; believe.
+;
+; In particular, it will fail when ldp/stp are used for frame setup,
+; when FP-elim is implemented, and when addressing from FP is
+; implemented.
+
+@var = global i64 0
+@local_addr = global i64* null
+
+declare void @foo()
+
+define void @trivial_func() nounwind {
+; CHECK: trivial_func: // @trivial_func
+; CHECK-NEXT: // BB#0
+; CHECK-NEXT: ret
+
+  ret void
+}
+
+define void @trivial_fp_func() {
+; CHECK-WITHFP: trivial_fp_func:
+
+; CHECK-WITHFP: sub sp, sp, #16
+; CHECK-WITHFP: stp x29, x30, [sp]
+; CHECK-WITHFP-NEXT: mov x29, sp
+
+; Dont't really care, but it would be a Bad Thing if this came after the epilogue.
+; CHECK: bl foo
+  call void @foo()
+  ret void
+
+; CHECK-WITHFP: ldp x29, x30, [sp]
+; CHECK-WITHFP: add sp, sp, #16
+
+; CHECK-WITHFP: ret
+}
+
+define void @stack_local() {
+  %local_var = alloca i64
+; CHECK: stack_local:
+; CHECK: sub sp, sp, #16
+
+  %val = load i64* @var
+  store i64 %val, i64* %local_var
+; CHECK: str {{x[0-9]+}}, [sp, #{{[0-9]+}}]
+
+  store i64* %local_var, i64** @local_addr
+; CHECK: add {{x[0-9]+}}, sp, #{{[0-9]+}}
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/logical-imm.ll b/test/CodeGen/AArch64/logical-imm.ll
new file mode 100644
index 0000000000..54c14dcd00
--- /dev/null
+++ b/test/CodeGen/AArch64/logical-imm.ll
@@ -0,0 +1,84 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_and(i32 %in32, i64 %in64) {
+; CHECK: test_and:
+
+  %val0 = and i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = and i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = and i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = and i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_orr(i32 %in32, i64 %in64) {
+; CHECK: test_orr:
+
+  %val0 = or i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = or i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = or i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = or i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_eor(i32 %in32, i64 %in64) {
+; CHECK: test_eor:
+
+  %val0 = xor i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = xor i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = xor i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = xor i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_mov(i32 %in32, i64 %in64) {
+; CHECK: test_mov:
+  %val0 = add i32 %in32, 2863311530
+  store i32 %val0, i32* @var32
+; CHECK: orr {{w[0-9]+}}, wzr, #0xaaaaaaaa
+
+  %val1 = add i64 %in64, 11068046444225730969
+  store i64 %val1, i64* @var64
+; CHECK: orr {{x[0-9]+}}, xzr, #0x9999999999999999
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll
new file mode 100644
index 0000000000..739381d344
--- /dev/null
+++ b/test/CodeGen/AArch64/logical_shifted_reg.ll
@@ -0,0 +1,224 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 -O0 | FileCheck %s
+
+@var1_32 = global i32 0
+@var2_32 = global i32 0
+
+@var1_64 = global i64 0
+@var2_64 = global i64 0
+
+define void @logical_32bit() {
+; CHECK: logical_32bit:
+  %val1 = load i32* @var1_32
+  %val2 = load i32* @var2_32
+
+  ; First check basic and/bic/or/orn/eor/eon patterns with no shift
+  %neg_val2 = xor i32 -1, %val2
+
+  %and_noshift = and i32 %val1, %val2
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %and_noshift, i32* @var1_32
+  %bic_noshift = and i32 %neg_val2, %val1
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %bic_noshift, i32* @var1_32
+
+  %or_noshift = or i32 %val1, %val2
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %or_noshift, i32* @var1_32
+  %orn_noshift = or i32 %neg_val2, %val1
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %orn_noshift, i32* @var1_32
+
+  %xor_noshift = xor i32 %val1, %val2
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %xor_noshift, i32* @var1_32
+  %xorn_noshift = xor i32 %neg_val2, %val1
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %xorn_noshift, i32* @var1_32
+
+  ; Check the maximum shift on each
+  %operand_lsl31 = shl i32 %val2, 31
+  %neg_operand_lsl31 = xor i32 -1, %operand_lsl31
+
+  %and_lsl31 = and i32 %val1, %operand_lsl31
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %and_lsl31, i32* @var1_32
+  %bic_lsl31 = and i32 %val1, %neg_operand_lsl31
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %bic_lsl31, i32* @var1_32
+
+  %or_lsl31 = or i32 %val1, %operand_lsl31
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %or_lsl31, i32* @var1_32
+  %orn_lsl31 = or i32 %val1, %neg_operand_lsl31
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %orn_lsl31, i32* @var1_32
+
+  %xor_lsl31 = xor i32 %val1, %operand_lsl31
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %xor_lsl31, i32* @var1_32
+  %xorn_lsl31 = xor i32 %val1, %neg_operand_lsl31
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %xorn_lsl31, i32* @var1_32
+
+  ; Check other shifts on a subset
+  %operand_asr10 = ashr i32 %val2, 10
+  %neg_operand_asr10 = xor i32 -1, %operand_asr10
+
+  %bic_asr10 = and i32 %val1, %neg_operand_asr10
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #10
+  store volatile i32 %bic_asr10, i32* @var1_32
+  %xor_asr10 = xor i32 %val1, %operand_asr10
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #10
+  store volatile i32 %xor_asr10, i32* @var1_32
+
+  %operand_lsr1 = lshr i32 %val2, 1
+  %neg_operand_lsr1 = xor i32 -1, %operand_lsr1
+
+  %orn_lsr1 = or i32 %val1, %neg_operand_lsr1
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #1
+  store volatile i32 %orn_lsr1, i32* @var1_32
+  %xor_lsr1 = xor i32 %val1, %operand_lsr1
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #1
+  store volatile i32 %xor_lsr1, i32* @var1_32
+
+  %operand_ror20_big = shl i32 %val2, 12
+  %operand_ror20_small = lshr i32 %val2, 20
+  %operand_ror20 = or i32 %operand_ror20_big, %operand_ror20_small
+  %neg_operand_ror20 = xor i32 -1, %operand_ror20
+
+  %xorn_ror20 = xor i32 %val1, %neg_operand_ror20
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ror #20
+  store volatile i32 %xorn_ror20, i32* @var1_32
+  %and_ror20 = and i32 %val1, %operand_ror20
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ror #20
+  store volatile i32 %and_ror20, i32* @var1_32
+
+  ret void
+}
+
+define void @logical_64bit() {
+; CHECK: logical_64bit:
+  %val1 = load i64* @var1_64
+  %val2 = load i64* @var2_64
+
+  ; First check basic and/bic/or/orn/eor/eon patterns with no shift
+  %neg_val2 = xor i64 -1, %val2
+
+  %and_noshift = and i64 %val1, %val2
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %and_noshift, i64* @var1_64
+  %bic_noshift = and i64 %neg_val2, %val1
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %bic_noshift, i64* @var1_64
+
+  %or_noshift = or i64 %val1, %val2
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %or_noshift, i64* @var1_64
+  %orn_noshift = or i64 %neg_val2, %val1
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %orn_noshift, i64* @var1_64
+
+  %xor_noshift = xor i64 %val1, %val2
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %xor_noshift, i64* @var1_64
+  %xorn_noshift = xor i64 %neg_val2, %val1
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %xorn_noshift, i64* @var1_64
+
+  ; Check the maximum shift on each
+  %operand_lsl63 = shl i64 %val2, 63
+  %neg_operand_lsl63 = xor i64 -1, %operand_lsl63
+
+  %and_lsl63 = and i64 %val1, %operand_lsl63
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %and_lsl63, i64* @var1_64
+  %bic_lsl63 = and i64 %val1, %neg_operand_lsl63
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %bic_lsl63, i64* @var1_64
+
+  %or_lsl63 = or i64 %val1, %operand_lsl63
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %or_lsl63, i64* @var1_64
+  %orn_lsl63 = or i64 %val1, %neg_operand_lsl63
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %orn_lsl63, i64* @var1_64
+
+  %xor_lsl63 = xor i64 %val1, %operand_lsl63
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %xor_lsl63, i64* @var1_64
+  %xorn_lsl63 = xor i64 %val1, %neg_operand_lsl63
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %xorn_lsl63, i64* @var1_64
+
+  ; Check other shifts on a subset
+  %operand_asr10 = ashr i64 %val2, 10
+  %neg_operand_asr10 = xor i64 -1, %operand_asr10
+
+  %bic_asr10 = and i64 %val1, %neg_operand_asr10
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #10
+  store volatile i64 %bic_asr10, i64* @var1_64
+  %xor_asr10 = xor i64 %val1, %operand_asr10
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #10
+  store volatile i64 %xor_asr10, i64* @var1_64
+
+  %operand_lsr1 = lshr i64 %val2, 1
+  %neg_operand_lsr1 = xor i64 -1, %operand_lsr1
+
+  %orn_lsr1 = or i64 %val1, %neg_operand_lsr1
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #1
+  store volatile i64 %orn_lsr1, i64* @var1_64
+  %xor_lsr1 = xor i64 %val1, %operand_lsr1
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #1
+  store volatile i64 %xor_lsr1, i64* @var1_64
+
+  ; Construct a rotate-right from a bunch of other logical
+  ; operations. DAGCombiner should ensure we the ROTR during
+  ; selection
+  %operand_ror20_big = shl i64 %val2, 44
+  %operand_ror20_small = lshr i64 %val2, 20
+  %operand_ror20 = or i64 %operand_ror20_big, %operand_ror20_small
+  %neg_operand_ror20 = xor i64 -1, %operand_ror20
+
+  %xorn_ror20 = xor i64 %val1, %neg_operand_ror20
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ror #20
+  store volatile i64 %xorn_ror20, i64* @var1_64
+  %and_ror20 = and i64 %val1, %operand_ror20
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ror #20
+  store volatile i64 %and_ror20, i64* @var1_64
+
+  ret void
+}
+
+define void @flag_setting() {
+; CHECK: flag_setting:
+  %val1 = load i64* @var1_64
+  %val2 = load i64* @var2_64
+
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: b.gt .L
+  %simple_and = and i64 %val1, %val2
+  %tst1 = icmp sgt i64 %simple_and, 0
+  br i1 %tst1, label %ret, label %test2
+
+test2:
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+; CHECK: b.lt .L
+  %shifted_op = shl i64 %val2, 63
+  %shifted_and = and i64 %val1, %shifted_op
+  %tst2 = icmp slt i64 %shifted_and, 0
+  br i1 %tst2, label %ret, label %test3
+
+test3:
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, asr #12
+; CHECK: b.gt .L
+  %asr_op = ashr i64 %val2, 12
+  %asr_and = and i64 %asr_op, %val1
+  %tst3 = icmp sgt i64 %asr_and, 0
+  br i1 %tst3, label %ret, label %other_exit
+
+other_exit:
+  store volatile i64 %val1, i64* @var1_64
+  ret void
+ret:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.s b/test/CodeGen/AArch64/logical_shifted_reg.s
new file mode 100644
index 0000000000..89aea58011
--- /dev/null
+++ b/test/CodeGen/AArch64/logical_shifted_reg.s
@@ -0,0 +1,208 @@
+	.file	"/home/timnor01/a64-trunk/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll"
+	.text
+	.globl	logical_32bit
+	.type	logical_32bit,@function
+logical_32bit:                          // @logical_32bit
+	.cfi_startproc
+// BB#0:
+	adrp	x0, var1_32
+	ldr	w1, [x0, #:lo12:var1_32]
+	adrp	x0, var2_32
+	ldr	w2, [x0, #:lo12:var2_32]
+	and	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orr	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w2, w1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	and	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orr	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2, asr #10
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, asr #10
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2, lsr #1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, lsr #1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w1, w2, ror #20
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	and	w1, w1, w2, ror #20
+	adrp	x0, var1_32
+	str	w1, [x0, #:lo12:var1_32]
+	ret
+.Ltmp0:
+	.size	logical_32bit, .Ltmp0-logical_32bit
+	.cfi_endproc
+
+	.globl	logical_64bit
+	.type	logical_64bit,@function
+logical_64bit:                          // @logical_64bit
+	.cfi_startproc
+// BB#0:
+	adrp	x0, var1_64
+	ldr	x0, [x0, #:lo12:var1_64]
+	adrp	x1, var2_64
+	ldr	x1, [x1, #:lo12:var2_64]
+	and	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orr	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x1, x0
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	and	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orr	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1, asr #10
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, asr #10
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1, lsr #1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, lsr #1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x0, x1, ror #20
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	and	x0, x0, x1, ror #20
+	adrp	x1, var1_64
+	str	x0, [x1, #:lo12:var1_64]
+	ret
+.Ltmp1:
+	.size	logical_64bit, .Ltmp1-logical_64bit
+	.cfi_endproc
+
+	.globl	flag_setting
+	.type	flag_setting,@function
+flag_setting:                           // @flag_setting
+	.cfi_startproc
+// BB#0:
+	sub	sp, sp, #16
+	adrp	x0, var1_64
+	ldr	x0, [x0, #:lo12:var1_64]
+	adrp	x1, var2_64
+	ldr	x1, [x1, #:lo12:var2_64]
+	tst	x0, x1
+	str	x0, [sp, #8]            // 8-byte Folded Spill
+	str	x1, [sp]                // 8-byte Folded Spill
+	b.gt .LBB2_4
+	b	.LBB2_1
+.LBB2_1:                                // %test2
+	ldr	x0, [sp, #8]            // 8-byte Folded Reload
+	ldr	x1, [sp]                // 8-byte Folded Reload
+	tst	x0, x1, lsl #63
+	b.lt .LBB2_4
+	b	.LBB2_2
+.LBB2_2:                                // %test3
+	ldr	x0, [sp, #8]            // 8-byte Folded Reload
+	ldr	x1, [sp]                // 8-byte Folded Reload
+	tst	x0, x1, asr #12
+	b.gt .LBB2_4
+	b	.LBB2_3
+.LBB2_3:                                // %other_exit
+	adrp	x0, var1_64
+	ldr	x1, [sp, #8]            // 8-byte Folded Reload
+	str	x1, [x0, #:lo12:var1_64]
+	add	sp, sp, #16
+	ret
+.LBB2_4:                                // %ret
+	add	sp, sp, #16
+	ret
+.Ltmp2:
+	.size	flag_setting, .Ltmp2-flag_setting
+	.cfi_endproc
+
+	.type	var1_32,@object         // @var1_32
+	.bss
+	.globl	var1_32
+	.align	2
+var1_32:
+	.word	0                       // 0x0
+	.size	var1_32, 4
+
+	.type	var2_32,@object         // @var2_32
+	.globl	var2_32
+	.align	2
+var2_32:
+	.word	0                       // 0x0
+	.size	var2_32, 4
+
+	.type	var1_64,@object         // @var1_64
+	.globl	var1_64
+	.align	3
+var1_64:
+	.xword	0                       // 0x0
+	.size	var1_64, 8
+
+	.type	var2_64,@object         // @var2_64
+	.globl	var2_64
+	.align	3
+var2_64:
+	.xword	0                       // 0x0
+	.size	var2_64, 8
+
+
diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll
new file mode 100644
index 0000000000..421043645f
--- /dev/null
+++ b/test/CodeGen/AArch64/movw-consts.ll
@@ -0,0 +1,124 @@
+; RUN: llc -verify-machineinstrs -O0 < %s -march=aarch64 | FileCheck %s
+
+define i64 @test0() {
+; CHECK: test0:
+; Not produced by move wide instructions, but good to make sure we can return 0 anyway:
+; CHECK: mov x0, xzr
+  ret i64 0
+}
+
+define i64 @test1() {
+; CHECK: test1:
+; CHECK: movz x0, #1
+  ret i64 1
+}
+
+define i64 @test2() {
+; CHECK: test2:
+; CHECK: movz x0, #65535
+  ret i64 65535
+}
+
+define i64 @test3() {
+; CHECK: test3:
+; CHECK: movz x0, #1, lsl #16
+  ret i64 65536
+}
+
+define i64 @test4() {
+; CHECK: test4:
+; CHECK: movz x0, #65535, lsl #16
+  ret i64 4294901760
+}
+
+define i64 @test5() {
+; CHECK: test5:
+; CHECK: movz x0, #1, lsl #32
+  ret i64 4294967296
+}
+
+define i64 @test6() {
+; CHECK: test6:
+; CHECK: movz x0, #65535, lsl #32
+  ret i64 281470681743360
+}
+
+define i64 @test7() {
+; CHECK: test7:
+; CHECK: movz x0, #1, lsl #48
+  ret i64 281474976710656
+}
+
+; A 32-bit MOVN can generate some 64-bit patterns that a 64-bit one
+; couldn't. Useful even for i64
+define i64 @test8() {
+; CHECK: test8:
+; CHECK: movn w0, #60875
+  ret i64 4294906420
+}
+
+define i64 @test9() {
+; CHECK: test9:
+; CHECK: movn x0, #0
+  ret i64 -1
+}
+
+define i64 @test10() {
+; CHECK: test10:
+; CHECK: movn x0, #60875, lsl #16
+  ret i64 18446744069720047615
+}
+
+; For reasonably legitimate reasons returning an i32 results in the
+; selection of an i64 constant, so we need a different idiom to test that selection
+@var32 = global i32 0
+
+define void @test11() {
+; CHECK: test11:
+; CHECK movz {{w[0-9]+}}, #0
+  store i32 0, i32* @var32
+  ret void
+}
+
+define void @test12() {
+; CHECK: test12:
+; CHECK: movz {{w[0-9]+}}, #1
+  store i32 1, i32* @var32
+  ret void
+}
+
+define void @test13() {
+; CHECK: test13:
+; CHECK: movz {{w[0-9]+}}, #65535
+  store i32 65535, i32* @var32
+  ret void
+}
+
+define void @test14() {
+; CHECK: test14:
+; CHECK: movz {{w[0-9]+}}, #1, lsl #16
+  store i32 65536, i32* @var32
+  ret void
+}
+
+define void @test15() {
+; CHECK: test15:
+; CHECK: movz {{w[0-9]+}}, #65535, lsl #16
+  store i32 4294901760, i32* @var32
+  ret void
+}
+
+define void @test16() {
+; CHECK: test16:
+; CHECK: movn {{w[0-9]+}}, #0
+  store i32 -1, i32* @var32
+  ret void
+}
+
+define i64 @test17() {
+; CHECK: test17:
+
+  ; Mustn't MOVN w0 here.
+; CHECK: movn x0, #2
+  ret i64 -3
+}
diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll
new file mode 100644
index 0000000000..77bf691cbc
--- /dev/null
+++ b/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+
+; Make sure exception-handling PIC code can be linked correctly. An alternative
+; to the sequence described below would have .gcc_except_table itself writable
+; and not use the indirection, but this isn't what LLVM does right now.
+
+  ; There should be a read-only .gcc_except_table section...
+; CHECK: .section .gcc_except_table,"a"
+
+  ; ... referring indirectly to stubs for its typeinfo ...
+; CHECK: // @TType Encoding = indirect pcrel sdata8
+  ; ... one of which is "int"'s typeinfo
+; CHECK: .Ltmp9:
+; CHECK-NEXT: .xword  .L_ZTIi.DW.stub-.Ltmp9
+
+  ; .. and which is properly defined (in a writable section for the dynamic loader) later.
+; CHECK: .section .data.rel,"aw"
+; CHECK: .L_ZTIi.DW.stub:
+; CHECK-NEXT: .xword _ZTIi
+
+@_ZTIi = external constant i8*
+
+define i32 @_Z3barv() {
+entry:
+  invoke void @_Z3foov()
+          to label %return unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 1
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %matches = icmp eq i32 %1, %2
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = extractvalue { i8*, i32 } %0, 0
+  %4 = tail call i8* @__cxa_begin_catch(i8* %3) nounwind
+  %5 = bitcast i8* %4 to i32*
+  %exn.scalar = load i32* %5, align 4
+  tail call void @__cxa_end_catch() nounwind
+  br label %return
+
+return:                                           ; preds = %entry, %catch
+  %retval.0 = phi i32 [ %exn.scalar, %catch ], [ 42, %entry ]
+  ret i32 %retval.0
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z3foov()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/regress-bitcast-formals.ll b/test/CodeGen/AArch64/regress-bitcast-formals.ll
new file mode 100644
index 0000000000..774b0fdee2
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-bitcast-formals.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+; CallingConv.td requires a bitcast for vector arguments. Make sure we're
+; actually capable of that (the test was omitted from LowerFormalArguments).
+
+define void @test_bitcast_lower(<2 x i32> %a) {
+; CHECK: test_bitcast_lower:
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/regress-f128csel-flags.ll
new file mode 100644
index 0000000000..a1ffb09178
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-f128csel-flags.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+; We used to not mark NZCV as being used in the continuation basic-block
+; when lowering a 128-bit "select" to branches. This meant a subsequent use
+; of the same flags gave an internal fault here.
+
+declare void @foo(fp128)
+
+define double @test_f128csel_flags(i32 %lhs, fp128 %a, fp128 %b) nounwind {
+; CHECK: test_f128csel_flags
+
+    %tst = icmp ne i32 %lhs, 42
+    %val = select i1 %tst, fp128 %a, fp128 %b
+; CHECK: cmp w0, #42
+; CHECK: b.eq .LBB0
+
+    call void @foo(fp128 %val)
+    %retval = select i1 %tst, double 4.0, double 5.0
+
+    ; It's also reasonably important that the actual fcsel comes before the
+    ; function call since bl may corrupt NZCV. We were doing the right thing anyway,
+    ; but just as well test it while we're here.
+; CHECK: fcsel {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, ne
+; CHECK: bl foo
+
+    ret double %retval
+}
diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll
new file mode 100644
index 0000000000..0c7f8cbffa
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-tail-livereg.ll
@@ -0,0 +1,19 @@
+; RUN: llc -verify-machineinstrs -march=aarch64 < %s | FileCheck %s
+@var = global void()* zeroinitializer
+
+declare void @bar()
+
+define void @foo() {
+; CHECK: foo:
+       %func = load void()** @var
+
+       ; Calling a function encourages @foo to use a callee-saved register,
+       ; which makes it a natural choice for the tail call itself. But we don't
+       ; want that: the final "br xN" has to use a temporary or argument
+       ; register.
+       call void @bar()
+
+       tail call void %func()
+; CHECK: br {{x([0-79]|1[0-8])}}
+       ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll
new file mode 100644
index 0000000000..0c53f83b66
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -0,0 +1,36 @@
+; RUN: llc -verify-machineinstrs -march=aarch64 < %s | FileCheck %s
+
+; When generating DAG selection tables, TableGen used to only flag an
+; instruction as needing a chain on its own account if it had a built-in pattern
+; which used the chain. This meant that the AArch64 load/stores weren't
+; recognised and so both loads from %locvar below were coalesced into a single
+; LS8_LDR instruction (same operands other than the non-existent chain) and the
+; increment was lost at return.
+
+; This was obviously a Bad Thing.
+
+declare void @bar(i8*)
+
+define i64 @test_chains() {
+; CHECK: test_chains:
+
+  %locvar = alloca i8
+
+  call void @bar(i8* %locvar)
+; CHECK: bl bar
+
+  %inc.1 = load i8* %locvar
+  %inc.2 = zext i8 %inc.1 to i64
+  %inc.3 = add i64 %inc.2, 1
+  %inc.4 = trunc i64 %inc.3 to i8
+  store i8 %inc.4, i8* %locvar
+; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR:#[0-9]+]]]
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #1
+; CHECK: strb {{w[0-9]+}}, [sp, [[LOCADDR]]]
+; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR]]]
+
+  %ret.1 = load i8* %locvar
+  %ret.2 = zext i8 %ret.1 to i64
+  ret i64 %ret.2
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
new file mode 100644
index 0000000000..98bd92b06c
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=aarch64 -disable-fp-elim < %s | FileCheck %s
+@var = global i32 0
+
+declare void @bar()
+
+define void @test_w29_reserved() {
+; CHECK: test_w29_reserved:
+; CHECK add x29, sp, #{{[0-9]+}}
+
+  %val1 = load volatile i32* @var
+  %val2 = load volatile i32* @var
+  %val3 = load volatile i32* @var
+  %val4 = load volatile i32* @var
+  %val5 = load volatile i32* @var
+  %val6 = load volatile i32* @var
+  %val7 = load volatile i32* @var
+  %val8 = load volatile i32* @var
+  %val9 = load volatile i32* @var
+
+; CHECK-NOT: ldr w29,
+
+  ; Call to prevent fp-elim that occurs regardless in leaf functions.
+  call void @bar()
+
+  store volatile i32 %val1,  i32* @var
+  store volatile i32 %val2,  i32* @var
+  store volatile i32 %val3,  i32* @var
+  store volatile i32 %val4,  i32* @var
+  store volatile i32 %val5,  i32* @var
+  store volatile i32 %val6,  i32* @var
+  store volatile i32 %val7,  i32* @var
+  store volatile i32 %val8,  i32* @var
+  store volatile i32 %val9,  i32* @var
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-wzr-allocatable.ll b/test/CodeGen/AArch64/regress-wzr-allocatable.ll
new file mode 100644
index 0000000000..a587d83bd8
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-wzr-allocatable.ll
@@ -0,0 +1,41 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 -O0
+
+; When WZR wasn't marked as reserved, this function tried to allocate
+; it at O0 and then generated an internal fault (mostly incidentally)
+; when it discovered that it was already in use for a multiplication.
+
+; I'm not really convinced this is a good test since it could easily
+; stop testing what it does now with no-one any the wiser. However, I
+; can't think of a better way to force the allocator to use WZR
+; specifically.
+
+define void @test() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  br label %for.cond6
+
+for.cond6:                                        ; preds = %for.body9, %for.end
+  br i1 undef, label %for.body9, label %while.cond30
+
+for.body9:                                        ; preds = %for.cond6
+  store i16 0, i16* undef, align 2
+  %0 = load i32* undef, align 4
+  %1 = load i32* undef, align 4
+  %mul15 = mul i32 %0, %1
+  %add16 = add i32 %mul15, 32768
+  %div = udiv i32 %add16, 65535
+  %add17 = add i32 %div, 1
+  store i32 %add17, i32* undef, align 4
+  br label %for.cond6
+
+while.cond30:                                     ; preds = %for.cond6
+  ret void
+}
diff --git a/test/CodeGen/AArch64/setcc-takes-i32.ll b/test/CodeGen/AArch64/setcc-takes-i32.ll
new file mode 100644
index 0000000000..795747af41
--- /dev/null
+++ b/test/CodeGen/AArch64/setcc-takes-i32.ll
@@ -0,0 +1,22 @@
+; RUN: llc -verify-machineinstrs -march=aarch64 < %s | FileCheck %s
+
+; Most important point here is that the promotion of the i1 works
+; correctly. Previously LLVM thought that i64 was the appropriate SetCC output,
+; which meant it proceded in two steps and produced an i64 -> i64 any_ext which
+; couldn't be selected and faulted.
+
+; It was expecting the smallest legal promotion of i1 to be the preferred SetCC
+; type, so we'll satisfy it (this actually arguably gives better code anyway,
+; with flag-manipulation operations allowed to use W-registers).
+
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64)
+
+define i64 @test_select(i64 %lhs, i64 %rhs) {
+; CHECK: test_select:
+
+  %res = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %lhs, i64 %rhs)
+  %flag = extractvalue {i64, i1} %res, 1
+  %retval = select i1 %flag, i64 %lhs, i64 %rhs
+  ret i64 %retval
+; CHECK: ret
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll
new file mode 100644
index 0000000000..a4ea064d12
--- /dev/null
+++ b/test/CodeGen/AArch64/sibling-call.ll
@@ -0,0 +1,97 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+declare void @callee_stack0()
+declare void @callee_stack8([8 x i32], i64)
+declare void @callee_stack16([8 x i32], i64, i64)
+
+define void @caller_to0_from0() nounwind {
+; CHECK: caller_to0_from0:
+; CHECK-NEXT: // BB
+  tail call void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define void @caller_to0_from8([8 x i32], i64) nounwind{
+; CHECK: caller_to0_from8:
+; CHECK-NEXT: // BB
+
+  tail call void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define void @caller_to8_from0() {
+; CHECK: caller_to8_from0:
+
+; Caller isn't going to clean up any extra stack we allocate, so it
+; can't be a tail call.
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: bl callee_stack8
+}
+
+define void @caller_to8_from8([8 x i32], i64 %a) {
+; CHECK: caller_to8_from8:
+; CHECK-NOT: sub sp, sp,
+
+; This should reuse our stack area for the 42
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp]
+; CHECK-NEXT: b callee_stack8
+}
+
+define void @caller_to16_from8([8 x i32], i64 %a) {
+; CHECK: caller_to16_from8:
+
+; Shouldn't be a tail call: we can't use SP+8 because our caller might
+; have something there. This may sound obvious but implementation does
+; some funky aligning.
+  tail call void @callee_stack16([8 x i32] undef, i64 undef, i64 undef)
+; CHECK: bl callee_stack16
+  ret void
+}
+
+define void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
+; CHECK: caller_to8_from24:
+; CHECK-NOT: sub sp, sp
+
+; Reuse our area, putting "42" at incoming sp
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp]
+; CHECK-NEXT: b callee_stack8
+}
+
+define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
+; CHECK: caller_to16_from16:
+; CHECK-NOT: sub sp, sp,
+
+; Here we want to make sure that both loads happen before the stores:
+; otherwise either %a or %b will be wrongly clobbered.
+  tail call void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
+  ret void
+
+; CHECK: ldr x0,
+; CHECK: ldr x1,
+; CHECK: str x1,
+; CHECK: str x0,
+
+; CHECK-NOT: add sp, sp,
+; CHECK: b callee_stack16
+}
+
+@func = global void(i32)* null
+
+define void @indirect_tail() {
+; CHECK: indirect_tail:
+; CHECK-NOT: sub sp, sp
+
+  %fptr = load void(i32)** @func
+  tail call void %fptr(i32 42)
+  ret void
+; CHECK: movz w0, #42
+; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, #:lo12:func]
+; CHECK: br [[FPTR]]
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll
new file mode 100644
index 0000000000..eed6ae5f04
--- /dev/null
+++ b/test/CodeGen/AArch64/tail-call.ll
@@ -0,0 +1,94 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 -tailcallopt | FileCheck %s
+
+declare fastcc void @callee_stack0()
+declare fastcc void @callee_stack8([8 x i32], i64)
+declare fastcc void @callee_stack16([8 x i32], i64, i64)
+
+define fastcc void @caller_to0_from0() nounwind {
+; CHECK: caller_to0_from0:
+; CHECK-NEXT: // BB
+  tail call fastcc void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define fastcc void @caller_to0_from8([8 x i32], i64) {
+; CHECK: caller_to0_from8:
+
+  tail call fastcc void @callee_stack0()
+  ret void
+; CHECK: add sp, sp, #16
+; CHECK-NEXT: b callee_stack0
+}
+
+define fastcc void @caller_to8_from0() {
+; CHECK: caller_to8_from0:
+; CHECK: sub sp, sp, #32
+
+; Key point is that the "42" should go #16 below incoming stack
+; pointer (we didn't have arg space to reuse).
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b callee_stack8
+}
+
+define fastcc void @caller_to8_from8([8 x i32], i64 %a) {
+; CHECK: caller_to8_from8:
+; CHECK: sub sp, sp, #16
+
+; Key point is that the "%a" should go where at SP on entry.
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b callee_stack8
+}
+
+define fastcc void @caller_to16_from8([8 x i32], i64 %a) {
+; CHECK: caller_to16_from8:
+; CHECK: sub sp, sp, #16
+
+; Important point is that the call reuses the "dead" argument space
+; above %a on the stack. If it tries to go below incoming-SP then the
+; callee will not deallocate the space, even in fastcc.
+  tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2)
+; CHECK: str {{x[0-9]+}}, [sp, #24]
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK: add sp, sp, #16
+; CHECK: b callee_stack16
+  ret void
+}
+
+
+define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
+; CHECK: caller_to8_from24:
+; CHECK: sub sp, sp, #16
+
+; Key point is that the "%a" should go where at #16 above SP on entry.
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #32]
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: b callee_stack8
+}
+
+
+define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
+; CHECK: caller_to16_from16:
+; CHECK: sub sp, sp, #16
+
+; Here we want to make sure that both loads happen before the stores:
+; otherwise either %a or %b will be wrongly clobbered.
+  tail call fastcc void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
+  ret void
+
+; CHECK: ldr x0,
+; CHECK: ldr x1,
+; CHECK: str x1,
+; CHECK: str x0,
+
+; CHECK: add sp, sp, #16
+; CHECK: b callee_stack16
+}
diff --git a/test/CodeGen/AArch64/tls-dynamic-together.ll b/test/CodeGen/AArch64/tls-dynamic-together.ll
new file mode 100644
index 0000000000..bad2298c8a
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-dynamic-together.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O0 -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
+
+; If the .tlsdesccall and blr parts are emitted completely separately (even with
+; glue) then LLVM will separate them quite happily (with a spill at O0, hence
+; the option). This is definitely wrong, so we make sure they are emitted
+; together.
+
+@general_dynamic_var = external thread_local global i32
+
+define i32 @test_generaldynamic() {
+; CHECK: test_generaldynamic:
+
+  %val = load i32* @general_dynamic_var
+  ret i32 %val
+
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr {{x[0-9]+}}
+}
diff --git a/test/CodeGen/AArch64/tls-dynamics.ll b/test/CodeGen/AArch64/tls-dynamics.ll
new file mode 100644
index 0000000000..cdfd11783c
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-dynamics.ll
@@ -0,0 +1,121 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+
+@general_dynamic_var = external thread_local global i32
+
+define i32 @test_generaldynamic() {
+; CHECK: test_generaldynamic:
+
+  %val = load i32* @general_dynamic_var
+  ret i32 %val
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], x0]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+define i32* @test_generaldynamic_addr() {
+; CHECK: test_generaldynamic_addr:
+
+  ret i32* @general_dynamic_var
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], x0
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+@local_dynamic_var = external thread_local(localdynamic) global i32
+
+define i32 @test_localdynamic() {
+; CHECK: test_localdynamic:
+
+  %val = load i32* @local_dynamic_var
+  ret i32 %val
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
+; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
+
+; CHECK: ldr w0, [x0, [[DTP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+define i32* @test_localdynamic_addr() {
+; CHECK: test_localdynamic_addr:
+
+  ret i32* @local_dynamic_var
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
+; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
+
+; CHECK: add x0, x0, [[DTP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+; The entire point of the local-dynamic access model is to have a single call to
+; the expensive resolver. Make sure we achieve that goal.
+
+@local_dynamic_var2 = external thread_local(localdynamic) global i32
+
+define i32 @test_localdynamic_deduplicate() {
+; CHECK: test_localdynamic_deduplicate:
+
+  %val = load i32* @local_dynamic_var
+  %val2 = load i32* @local_dynamic_var2
+
+  %sum = add i32 %val, %val2
+  ret i32 %sum
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK-NOT: _TLS_MODULE_BASE_
+
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/tls-execs.ll b/test/CodeGen/AArch64/tls-execs.ll
new file mode 100644
index 0000000000..a665884227
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-execs.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+
+@initial_exec_var = external thread_local(initialexec) global i32
+
+define i32 @test_initial_exec() {
+; CHECK: test_initial_exec:
+  %val = load i32* @initial_exec_var
+
+; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
+; CHECK: ldr x[[TP_OFFSET:[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var]
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], x[[TP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+
+  ret i32 %val
+}
+
+define i32* @test_initial_exec_addr() {
+; CHECK: test_initial_exec_addr:
+  ret i32* @initial_exec_var
+
+; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
+; CHECK: ldr [[TP_OFFSET:x[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var]
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], [[TP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+
+}
+
+@local_exec_var = thread_local(initialexec) global i32 0
+
+define i32 @test_local_exec() {
+; CHECK: test_local_exec:
+  %val = load i32* @local_exec_var
+
+; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
+; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+
+  ret i32 %val
+}
+
+define i32* @test_local_exec_addr() {
+; CHECK: test_local_exec_addr:
+  ret i32* @local_exec_var
+
+; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
+; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], [[TP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+}
diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll
new file mode 100644
index 0000000000..17a328fe4d
--- /dev/null
+++ b/test/CodeGen/AArch64/tst-br.ll
@@ -0,0 +1,48 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+; We've got the usual issues with LLVM reordering blocks here. The
+; tests are correct for the current order, but who knows when that
+; will change. Beware!
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i32 @test_tbz() {
+; CHECK: test_tbz:
+
+  %val = load i32* @var32
+  %val64 = load i64* @var64
+
+  %tbit0 = and i32 %val, 32768
+  %tst0 = icmp ne i32 %tbit0, 0
+  br i1 %tst0, label %test1, label %end1
+; CHECK: tbz {{w[0-9]+}}, #15, [[LBL_end1:.LBB0_[0-9]+]]
+
+test1:
+  %tbit1 = and i32 %val, 4096
+  %tst1 = icmp ne i32 %tbit1, 0
+  br i1 %tst1, label %test2, label %end1
+; CHECK: tbz {{w[0-9]+}}, #12, [[LBL_end1]]
+
+test2:
+  %tbit2 = and i64 %val64, 32768
+  %tst2 = icmp ne i64 %tbit2, 0
+  br i1 %tst2, label %test3, label %end1
+; CHECK: tbz {{x[0-9]+}}, #15, [[LBL_end1]]
+
+test3:
+  %tbit3 = and i64 %val64, 4096
+  %tst3 = icmp ne i64 %tbit3, 0
+  br i1 %tst3, label %end2, label %end1
+; CHECK: tbz {{x[0-9]+}}, #12, [[LBL_end1]]
+
+end2:
+; CHECK: movz x0, #1
+; CHECK-NEXT: ret
+  ret i32 1
+
+end1:
+; CHECK: [[LBL_end1]]:
+; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: ret
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll
new file mode 100644
index 0000000000..f601d4731e
--- /dev/null
+++ b/test/CodeGen/AArch64/variadic.ll
@@ -0,0 +1,144 @@
+; RUN: llc -verify-machineinstrs -march=aarch64 < %s | FileCheck %s
+
+%va_list = type {i8*, i8*, i8*, i32, i32}
+
+@var = global %va_list zeroinitializer
+
+declare void @llvm.va_start(i8*)
+
+define void @test_simple(i32 %n, ...) {
+; CHECK: test_simple:
+; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #112]
+; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
+; CHECK: str x7, [x[[GPRBASE]], #48]
+
+; Omit the middle ones
+
+; CHECK: str q0, [sp]
+; CHECK: str x1, [sp, #[[GPRFROMSP]]]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #127
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: movn [[GR_OFFS:w[0-9]+]], #55
+; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56
+; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
+; CHECK: test_fewargs:
+; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #96]
+; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
+; CHECK: str x7, [x[[GPRBASE]], #32]
+
+; Omit the middle ones
+
+; CHECK: str q1, [sp]
+; CHECK: str x3, [sp, #[[GPRFROMSP]]]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #111
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: movn [[GR_OFFS:w[0-9]+]], #39
+; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #112
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #40
+; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+define void @test_nospare([8 x i64], [8 x float], ...) {
+; CHECK: test_nospare:
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK-NOT: sub sp, sp
+; CHECK: mov [[STACK:x[0-9]+]], sp
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+; If there are non-variadic arguments on the stack (here two i64s) then the
+; __stack field should point just past them.
+define void @test_offsetstack([10 x i64], [3 x float], ...) {
+; CHECK: test_offsetstack:
+; CHECK: sub sp, sp, #80
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #64]
+
+; CHECK-NOT: str x{{[0-9]+}},
+; Omit the middle ones
+
+; CHECK: str q3, [sp]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #79
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: str wzr, [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #80
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[STACK:x[0-9]+]], sp, #96
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+declare void @llvm.va_end(i8*)
+
+define void @test_va_end() nounwind {
+; CHECK: test_va_end:
+; CHECK-NEXT: BB#0
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_end(i8* %addr)
+
+  ret void
+; CHECK-NEXT: ret
+}
+
+declare void @llvm.va_copy(i8* %dest, i8* %src)
+
+@second_list = global %va_list zeroinitializer
+
+define void @test_va_copy() {
+; CHECK: test_va_copy:
+  %srcaddr = bitcast %va_list* @var to i8*
+  %dstaddr = bitcast %va_list* @second_list to i8*
+  call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr)
+
+; Check beginning and end again:
+
+; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
+
+; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
+; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+
+; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24]
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll
new file mode 100644
index 0000000000..f4f76bef21
--- /dev/null
+++ b/test/CodeGen/AArch64/zero-reg.ll
@@ -0,0 +1,31 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_zr() {
+; CHECK: test_zr:
+
+  store i32 0, i32* @var32
+; CHECK: str wzr, [{{x[0-9]+}}, #:lo12:var32]
+  store i64 0, i64* @var64
+; CHECK: str xzr, [{{x[0-9]+}}, #:lo12:var64]
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_sp(i32 %val) {
+; CHECK: test_sp:
+
+; Important correctness point here is that LLVM doesn't try to use xzr
+; as an addressing register: "str w0, [xzr]" is not a valid A64
+; instruction (0b11111 in the Rn field would mean "sp").
+  %addr = getelementptr i32* null, i64 0
+  store i32 %val, i32* %addr
+; CHECK: mov x[[NULL:[0-9]+]], xzr
+; CHECK: str {{w[0-9]+}}, [x[[NULL]]]
+
+  ret void
+; CHECK: ret
+}
+\ No newline at end of file
diff --git a/test/DebugInfo/AArch64/cfi-frame.ll b/test/DebugInfo/AArch64/cfi-frame.ll
new file mode 100644
index 0000000000..4217925b98
--- /dev/null
+++ b/test/DebugInfo/AArch64/cfi-frame.ll
@@ -0,0 +1,58 @@
+; RUN: llc -verify-machineinstrs -march aarch64 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -march aarch64 -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-WITH-FP
+
+@bigspace = global [8 x i64] zeroinitializer
+
+declare void @use_addr(i8*)
+
+define void @test_frame([8 x i64] %val) {
+; CHECK: test_frame:
+; CHECK: .cfi_startproc
+
+  %var = alloca i8, i32 1000000
+; CHECK: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]]
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]]
+
+; Make sure the prologue is reasonably efficient
+; CHECK-NEXT: stp x29, x30, [sp,
+; CHECK-NEXT: stp x25, x26, [sp,
+; CHECK-NEXT: stp x23, x24, [sp,
+; CHECK-NEXT: stp x21, x22, [sp,
+; CHECK-NEXT: stp x19, x20, [sp,
+; CHECK-NEXT: sub sp, sp, #160
+; CHECK-NEXT: sub sp, sp, #244, lsl #12
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_def_cfa sp, 1000080
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_offset x30, -8
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_offset x29, -16
+; [...]
+; CHECK: .cfi_offset x19, -80
+
+; CHECK: bl use_addr
+  call void @use_addr(i8* %var)
+
+  store [8 x i64] %val, [8 x i64]* @bigspace
+  ret void
+; CHECK: ret
+; CHECK: .cfi_endproc
+}
+
+; CHECK-WITH-FP: test_frame:
+
+; CHECK-WITH-FP: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]]
+; CHECK-WITH-FP-NEXT: .Ltmp
+; CHECK-WITH-FP-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]]
+
+; CHECK-WITH-FP: stp x29, x30, [sp, [[OFFSET:#[0-9]+]]]
+; CHECK-WITH-FP-NEXT: add x29, sp, [[OFFSET]]
+; CHECK-WITH-FP-NEXT: .Ltmp
+; CHECK-WITH-FP-NEXT: .cfi_def_cfa x29, 16
+
+  ; We shouldn't emit any kind of update for the second stack adjustment if the
+  ; FP is in use.
+; CHECK-WITH-FP-NOT: .cfi_def_cfa_offset
+
+; CHECK-WITH-FP: bl use_addr
diff --git a/test/DebugInfo/AArch64/eh_frame.ll b/test/DebugInfo/AArch64/eh_frame.ll
new file mode 100644
index 0000000000..13436596a5
--- /dev/null
+++ b/test/DebugInfo/AArch64/eh_frame.ll
@@ -0,0 +1,51 @@
+; RUN: llc -verify-machineinstrs -march=aarch64 %s -filetype=obj -o %t
+; RUN: llvm-objdump -s %t | FileCheck %s
+@var = global i32 0
+
+declare void @bar()
+
+define i64 @check_largest_class(i32 %in)  {
+  %res = load i32* @var
+  call void @bar()
+  %ext = zext i32 %res to i64
+  ret i64 %ext
+}
+
+; The really key points we're checking here are:
+;  * Return register is x30.
+;  * Pointer format is 0x1b (GNU doesn't appear to understand others).
+
+; The rest is largely incidental, but not expected to change regularly.
+
+; Output is:
+
+; CHECK: Contents of section .eh_frame:
+; CHECK-NEXT: 0000 10000000 00000000 017a5200 017c1e01  .........zR..|..
+; CHECK-NEXT: 0010 1b0c1f00 18000000 18000000 00000000  ................
+
+
+; Won't check the rest, it's rather incidental.
+; 0020 24000000 00440c1f 10449e02 93040000  $....D...D......
+
+
+; The first CIE:
+; -------------------
+; 10000000: length of first CIE = 0x10
+; 00000000: This is a CIE
+; 01: version = 0x1
+; 7a 52 00: augmentation string "zR" -- pointer format is specified
+; 01: code alignment factor 1
+; 7c: data alignment factor -4
+; 1e: return address register 30 (== x30).
+; 01: 1 byte of augmentation
+; 1b: pointer format 1b: DW_EH_PE_pcrel | DW_EH_PE_sdata4
+; 0c 1f 00: initial instructions: "DW_CFA_def_cfa x31 ofs 0" in this case
+
+; Next the FDE:
+; -------------
+; 18000000: FDE length 0x18
+; 18000000: Uses CIE 0x18 backwards (only coincidentally same as above)
+; 00000000: PC begin for this FDE is at 00000000 (relocation is applied here)
+; 24000000: FDE applies up to PC begin+0x24
+; 00: Augmentation string length 0 for this FDE
+; Rest: call frame instructions
diff --git a/test/DebugInfo/AArch64/eh_frame_personality.ll b/test/DebugInfo/AArch64/eh_frame_personality.ll
new file mode 100644
index 0000000000..ab06d211fd
--- /dev/null
+++ b/test/DebugInfo/AArch64/eh_frame_personality.ll
@@ -0,0 +1,46 @@
+; RUN: llc -verify-machineinstrs -march=aarch64 %s -filetype=obj -o %t
+; RUN: llvm-objdump -s %t | FileCheck %s
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @bar()
+
+define i64 @foo(i64 %lhs, i64 %rhs) {
+  invoke void @bar() to label %end unwind label %clean
+end:
+ ret i64 0
+
+clean:
+  %tst = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) cleanup
+  ret i64 42
+}
+
+; CHECK: Contents of section .eh_frame:
+; CHECK: 0000 1c000000 00000000 017a504c 5200017c  .........zPLR..|
+; CHECK: 0010 1e0b0000 00000000 00000000 1b0c1f00  ................
+
+; Don't really care about the rest:
+
+; 0020 1c000000 24000000 00000000 24000000  ....$.......$...
+; 0030 08000000 00000000 00440c1f 10449e02  .........D...D..
+
+; The key test here is that the personality routine is sanely encoded (under the
+; small memory model it must be an 8-byte value for full generality: code+data <
+; 4GB, but you might need both +4GB and -4GB depending on where things end
+; up. However, for completeness:
+
+; First CIE:
+; ----------
+; 1c000000: Length = 0x1c
+; 00000000: This is a CIE
+; 01: Version 1
+; 7a 50 4c 52 00: Augmentation string "zPLR" (personality routine, language-specific data, pointer format)
+; 01: Code alignment factor 1
+; 78: Data alignment factor: -8
+; 1e: Return address in x30
+; 07: Augmentation data 0xb bytes (this is key!)
+; 00: Personality encoding is DW_EH_PE_absptr
+; 00 00 00 00 00 00 00 00: First part of aug (personality routine). Relocated, obviously
+; 00: Second part of aug (language-specific data): absolute pointer format used
+; 1b: pointer format: pc-relative signed 4-byte. Just like GNU.
+; 0c 1f 00: Initial instructions ("DW_CFA_def_cfa x31 ofs 0" in this case)
diff --git a/test/DebugInfo/AArch64/lit.local.cfg b/test/DebugInfo/AArch64/lit.local.cfg
new file mode 100644
index 0000000000..c5ce2411ed
--- /dev/null
+++ b/test/DebugInfo/AArch64/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+
diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll
new file mode 100644
index 0000000000..32772046ef
--- /dev/null
+++ b/test/DebugInfo/AArch64/variable-loc.ll
@@ -0,0 +1,87 @@
+; RUN: llc -march=aarch64 -disable-fp-elim < %s | FileCheck %s
+
+; This is a regression test making sure the location of variables is correct in
+; debugging information, even if they're addressed via the frame pointer.
+
+  ; First make sure main_arr is where we expect it: sp + 12 == x29 - 420:
+; CHECK: main:
+; CHECK: sub sp, sp, #448
+; CHECK: stp x29, x30, [sp, #432]
+; CHECK: add x29, sp, #432
+; CHECK: add {{x[0-9]+}}, sp, #12
+
+  ; Now check the debugging information reflects this:
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: .word .Linfo_string7
+
+  ; Rather hard-coded, but 145 => DW_OP_fbreg and the .ascii is LEB128 encoded -420.
+; CHECK: DW_AT_location
+; CHECK-NEXT: .byte 145
+; CHECK-NEXT: .ascii "\334|"
+
+; CHECK: .Linfo_string7:
+; CHECK-NEXT: main_arr
+
+
+target datalayout = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-n32:64-S128"
+target triple = "aarch64-none-linux-gnu"
+
+@.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 1
+
+declare void @populate_array(i32*, i32) nounwind
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @sum_array(i32*, i32) nounwind
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %main_arr = alloca [100 x i32], align 4
+  %val = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{[100 x i32]* %main_arr}, metadata !17), !dbg !22
+  call void @llvm.dbg.declare(metadata !{i32* %val}, metadata !23), !dbg !24
+  %arraydecay = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !25
+  call void @populate_array(i32* %arraydecay, i32 100), !dbg !25
+  %arraydecay1 = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !26
+  %call = call i32 @sum_array(i32* %arraydecay1, i32 100), !dbg !26
+  store i32 %call, i32* %val, align 4, !dbg !26
+  %0 = load i32* %val, align 4, !dbg !27
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), i32 %0), !dbg !27
+  ret i32 0, !dbg !28
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build", metadata !"clang version 3.2 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !11, metadata !14}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
+!6 = metadata !{i32 786473, metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9, metadata !10}
+!9 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{i32 786478, i32 0, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", metadata !6, i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
+!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{metadata !10, metadata !9, metadata !10}
+!14 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
+!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !10}
+!17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
+!18 = metadata !{i32 786443, metadata !14, i32 18, i32 16, metadata !6, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
+!19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
+!20 = metadata !{metadata !21}
+!21 = metadata !{i32 786465, i64 0, i64 99}       ; [ DW_TAG_subrange_type ] [0, 99]
+!22 = metadata !{i32 19, i32 7, metadata !18, null}
+!23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20]
+!24 = metadata !{i32 20, i32 7, metadata !18, null}
+!25 = metadata !{i32 22, i32 3, metadata !18, null}
+!26 = metadata !{i32 23, i32 9, metadata !18, null}
+!27 = metadata !{i32 24, i32 3, metadata !18, null}
+!28 = metadata !{i32 26, i32 3, metadata !18, null}
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
new file mode 100644
index 0000000000..eb13aa60b6
--- /dev/null
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -0,0 +1,3709 @@
+// RUN: not llvm-mc -triple=aarch64 < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+
+//------------------------------------------------------------------------------
+// Add/sub (extended register)
+//------------------------------------------------------------------------------
+
+        // Mismatched final register and extend
+        add x2, x3, x5, sxtb
+        add x2, x4, w2, uxtx
+        add w5, w7, x9, sxtx
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         add x2, x3, x5, sxtb
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         add x2, x4, w2, uxtx
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         add w5, w7, x9, sxtx
+// CHECK-ERROR:                     ^
+
+        // Out of range extends
+        add x9, x10, w11, uxtb #-1
+        add x3, x5, w7, uxtb #5
+        sub x9, x15, x2, uxth #5
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR:         add x9, x10, w11, uxtb #-1
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         add x3, x5, w7, uxtb #5
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sub x9, x15, x2, uxth #5
+// CHECK-ERROR:                          ^
+
+        // Wrong registers on normal variants
+        add xzr, x3, x5, uxtx
+        sub x3, xzr, w9, sxth #1
+        add x1, x2, sp, uxtx
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         add xzr, x3, x5, uxtx
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sub x3, xzr, w9, sxth #1
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         add x1, x2, sp, uxtx
+// CHECK-ERROR:                     ^
+
+        // Wrong registers on flag-setting variants
+        adds sp, x3, w2, uxtb
+        adds x3, xzr, x9, uxtx
+        subs x2, x1, sp, uxtx
+        adds x2, x1, sp, uxtb #2
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         adds sp, x3, w2, uxtb
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         adds x3, xzr, x9, uxtx
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         subs x2, x1, sp, uxtx
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         adds x2, x1, sp, uxtb #2
+// CHECK-ERROR:                      ^
+
+        // Amount not optional if lsl valid and used
+        add sp, x5, x7, lsl
+// CHECK-ERROR: error: expected #imm after shift specifier
+// CHECK-ERROR:         add sp, x5, x7, lsl
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Add/sub (immediate)
+//------------------------------------------------------------------------------
+
+// Out of range immediates: < 0 or more than 12 bits
+        add w4, w5, #-1
+        add w5, w6, #0x1000
+        add w4, w5, #-1, lsl #12
+        add w5, w6, #0x1000, lsl #12
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w4, w5, #-1
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w5, w6, #0x1000
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w4, w5, #-1, lsl #12
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w5, w6, #0x1000, lsl #12
+// CHECK-ERROR-NEXT:                     ^
+
+// Only lsl #0 and lsl #12 are allowed
+        add w2, w3, #0x1, lsl #1
+        add w5, w17, #0xfff, lsl #13
+        add w17, w20, #0x1000, lsl #12
+        sub xsp, x34, #0x100, lsl #-1
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w2, w3, #0x1, lsl #1
+// CHECK-ERROR-NEXT:                                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w5, w17, #0xfff, lsl #13
+// CHECK-ERROR-NEXT:                                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w17, w20, #0x1000, lsl #12
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: only 'lsl #+N' valid after immediate
+// CHECK-ERROR-NEXT:         sub xsp, x34, #0x100, lsl #-1
+// CHECK-ERROR-NEXT:                                    ^
+
+// Incorrect registers (w31 doesn't exist at all, and 31 decodes to sp for these).
+        add w31, w20, #1234
+        add wzr, w20, #0x123
+        add w20, wzr, #0x321
+        add wzr, wzr, #0xfff
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w31, w20, #1234
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add wzr, w20, #0x123
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w20, wzr, #0x321
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add wzr, wzr, #0xfff
+// CHECK-ERROR-NEXT:             ^
+
+// Mixed register classes
+        add xsp, w2, #123
+        sub w2, x30, #32
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add xsp, w2, #123
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sub w2, x30, #32
+// CHECK-ERROR-NEXT:                 ^
+
+// Out of range immediate
+        adds w0, w5, #0x10000
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adds w0, w5, #0x10000
+// CHECK-ERROR-NEXT:                      ^
+
+// Wn|WSP should be in second place
+        adds w4, wzr, #0x123
+// ...but wzr is the 31 destination
+        subs wsp, w5, #123
+        subs x5, xzr, #0x456, lsl #12
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adds w4, wzr, #0x123
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs wsp, w5, #123
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs x5, xzr, #0x456, lsl #12
+// CHECK-ERROR-NEXT:                  ^
+
+        // MOV alias should not accept any fiddling
+        mov x2, xsp, #123
+        mov wsp, w27, #0xfff, lsl #12
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mov x2, xsp, #123
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mov wsp, w27, #0xfff, lsl #12
+// CHECK-ERROR-NEXT:                       ^
+
+        // A relocation should be provided for symbols
+        add x3, x9, #variable
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add x3, x9, #variable
+// CHECK-ERROR-NEXT:                      ^
+
+
+//------------------------------------------------------------------------------
+// Add-subtract (shifted register)
+//------------------------------------------------------------------------------
+
+        add wsp, w1, w2, lsr #3
+        add x4, sp, x9, asr #5
+        add x9, x10, x5, ror #3
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add wsp, w1, w2, lsr #3
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add x4, sp, x9, asr #5
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add x9, x10, x5, ror #3
+// CHECK-ERROR-NEXT:                          ^
+
+        add w1, w2, w3, lsl #-1
+        add w1, w2, w3, lsl #32
+        add w1, w2, w3, lsr #-1
+        add w1, w2, w3, lsr #32
+        add w1, w2, w3, asr #-1
+        add w1, w2, w3, asr #32
+        add x1, x2, x3, lsl #-1
+        add x1, x2, x3, lsl #64
+        add x1, x2, x3, lsr #-1
+        add x1, x2, x3, lsr #64
+        add x1, x2, x3, asr #-1
+        add x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                         ^
+
+        adds w1, w2, w3, lsl #-1
+        adds w1, w2, w3, lsl #32
+        adds w1, w2, w3, lsr #-1
+        adds w1, w2, w3, lsr #32
+        adds w1, w2, w3, asr #-1
+        adds w1, w2, w3, asr #32
+        adds x1, x2, x3, lsl #-1
+        adds x1, x2, x3, lsl #64
+        adds x1, x2, x3, lsr #-1
+        adds x1, x2, x3, lsr #64
+        adds x1, x2, x3, asr #-1
+        adds x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                          ^
+
+        sub w1, w2, w3, lsl #-1
+        sub w1, w2, w3, lsl #32
+        sub w1, w2, w3, lsr #-1
+        sub w1, w2, w3, lsr #32
+        sub w1, w2, w3, asr #-1
+        sub w1, w2, w3, asr #32
+        sub x1, x2, x3, lsl #-1
+        sub x1, x2, x3, lsl #64
+        sub x1, x2, x3, lsr #-1
+        sub x1, x2, x3, lsr #64
+        sub x1, x2, x3, asr #-1
+        sub x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                         ^
+
+        subs w1, w2, w3, lsl #-1
+        subs w1, w2, w3, lsl #32
+        subs w1, w2, w3, lsr #-1
+        subs w1, w2, w3, lsr #32
+        subs w1, w2, w3, asr #-1
+        subs w1, w2, w3, asr #32
+        subs x1, x2, x3, lsl #-1
+        subs x1, x2, x3, lsl #64
+        subs x1, x2, x3, lsr #-1
+        subs x1, x2, x3, lsr #64
+        subs x1, x2, x3, asr #-1
+        subs x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                          ^
+
+        cmn w9, w10, lsl #-1
+        cmn w9, w10, lsl #32
+        cmn w11, w12, lsr #-1
+        cmn w11, w12, lsr #32
+        cmn w19, wzr, asr #-1
+        cmn wzr, wzr, asr #32
+        cmn x9, x10, lsl #-1
+        cmn x9, x10, lsl #64
+        cmn x11, x12, lsr #-1
+        cmn x11, x12, lsr #64
+        cmn x19, xzr, asr #-1
+        cmn xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmn w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmn w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmn wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmn x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmn x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmn xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                       ^
+
+        cmp w9, w10, lsl #-1
+        cmp w9, w10, lsl #32
+        cmp w11, w12, lsr #-1
+        cmp w11, w12, lsr #32
+        cmp w19, wzr, asr #-1
+        cmp wzr, wzr, asr #32
+        cmp x9, x10, lsl #-1
+        cmp x9, x10, lsl #64
+        cmp x11, x12, lsr #-1
+        cmp x11, x12, lsr #64
+        cmp x19, xzr, asr #-1
+        cmp xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmp w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmp w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmp wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmp x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmp x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         cmp xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                       ^
+
+        neg w9, w10, lsl #-1
+        neg w9, w10, lsl #32
+        neg w11, w12, lsr #-1
+        neg w11, w12, lsr #32
+        neg w19, wzr, asr #-1
+        neg wzr, wzr, asr #32
+        neg x9, x10, lsl #-1
+        neg x9, x10, lsl #64
+        neg x11, x12, lsr #-1
+        neg x11, x12, lsr #64
+        neg x19, xzr, asr #-1
+        neg xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         neg w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         neg w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         neg wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         neg x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         neg x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         neg xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                       ^
+
+        negs w9, w10, lsl #-1
+        negs w9, w10, lsl #32
+        negs w11, w12, lsr #-1
+        negs w11, w12, lsr #32
+        negs w19, wzr, asr #-1
+        negs wzr, wzr, asr #32
+        negs x9, x10, lsl #-1
+        negs x9, x10, lsl #64
+        negs x11, x12, lsr #-1
+        negs x11, x12, lsr #64
+        negs x19, xzr, asr #-1
+        negs xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         negs w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         negs w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         negs wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         negs x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         negs x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         negs xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                        ^
+
+//------------------------------------------------------------------------------
+// Add-subtract (shifted register)
+//------------------------------------------------------------------------------
+
+        adc wsp, w3, w5
+        adc w1, wsp, w2
+        adc w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        adc wsp, w3, w5
+// CHECK-ERROR-NEXT:            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc w1, wsp, w2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc w0, w10, wsp
+// CHECK-ERROR-NEXT:                      ^
+
+        adc sp, x3, x5
+        adc x1, sp, x2
+        adc x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc sp, x3, x5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc x1, sp, x2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc x0, x10, sp
+// CHECK-ERROR-NEXT:                      ^
+
+        adcs wsp, w3, w5
+        adcs w1, wsp, w2
+        adcs w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs wsp, w3, w5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs w1, wsp, w2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs w0, w10, wsp
+// CHECK-ERROR-NEXT:                       ^
+
+        adcs sp, x3, x5
+        adcs x1, sp, x2
+        adcs x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs sp, x3, x5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs x1, sp, x2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs x0, x10, sp
+// CHECK-ERROR-NEXT:                       ^
+
+        sbc wsp, w3, w5
+        sbc w1, wsp, w2
+        sbc w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc wsp, w3, w5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc w1, wsp, w2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc w0, w10, wsp
+// CHECK-ERROR-NEXT:                      ^
+
+        sbc sp, x3, x5
+        sbc x1, sp, x2
+        sbc x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc sp, x3, x5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc x1, sp, x2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc x0, x10, sp
+// CHECK-ERROR-NEXT:                      ^
+
+        sbcs wsp, w3, w5
+        sbcs w1, wsp, w2
+        sbcs w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs wsp, w3, w5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs w1, wsp, w2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs w0, w10, wsp
+// CHECK-ERROR-NEXT:                       ^
+
+        sbcs sp, x3, x5
+        sbcs x1, sp, x2
+        sbcs x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs sp, x3, x5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs x1, sp, x2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs x0, x10, sp
+// CHECK-ERROR-NEXT:                       ^
+
+        ngc wsp, w3
+        ngc w9, wsp
+        ngc sp, x9
+        ngc x2, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc wsp, w3
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc w9, wsp
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc sp, x9
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc x2, sp
+// CHECK-ERROR-NEXT:                 ^
+
+        ngcs wsp, w3
+        ngcs w9, wsp
+        ngcs sp, x9
+        ngcs x2, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs wsp, w3
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs w9, wsp
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs sp, x9
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs x2, sp
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Bitfield
+//------------------------------------------------------------------------------
+
+        sbfm x3, w13, #0, #0
+        sbfm w12, x9, #0, #0
+        sbfm sp, x3, #3, #5
+        sbfm w3, wsp, #1, #9
+        sbfm x9, x5, #-1, #0
+        sbfm x9, x5, #0, #-1
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm x3, w13, #0, #0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm w12, x9, #0, #0
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm sp, x3, #3, #5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm w3, wsp, #1, #9
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm x9, x5, #-1, #0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm x9, x5, #0, #-1
+// CHECK-ERROR-NEXT:                          ^
+
+        sbfm w3, w5, #32, #1
+        sbfm w7, w11, #19, #32
+        sbfm x29, x30, #64, #0
+        sbfm x10, x20, #63, #64
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm w3, w5, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm w7, w11, #19, #32
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm x29, x30, #64, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm x10, x20, #63, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        ubfm w3, w5, #32, #1
+        ubfm w7, w11, #19, #32
+        ubfm x29, x30, #64, #0
+        ubfm x10, x20, #63, #64
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfm w3, w5, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfm w7, w11, #19, #32
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfm x29, x30, #64, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfm x10, x20, #63, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        bfm w3, w5, #32, #1
+        bfm w7, w11, #19, #32
+        bfm x29, x30, #64, #0
+        bfm x10, x20, #63, #64
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfm w3, w5, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfm w7, w11, #19, #32
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfm x29, x30, #64, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfm x10, x20, #63, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        sxtb x3, x2
+        sxth xzr, xzr
+        sxtw x3, x5
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sxtb x3, x2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sxth xzr, xzr
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sxtw x3, x5
+// CHECK-ERROR-NEXT:                  ^
+
+        uxtb x3, x12
+        uxth x5, x9
+        uxtw x3, x5
+        uxtb x2, sp
+        uxtb sp, xzr
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxtb x3, x12
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxth x5, x9
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid instruction
+// CHECK-ERROR-NEXT:         uxtw x3, x5
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxtb x2, sp
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxtb sp, xzr
+// CHECK-ERROR-NEXT:              ^
+
+        asr x3, w2, #1
+        asr sp, x2, #1
+        asr x25, x26, #-1
+        asr x25, x26, #64
+        asr w9, w8, #32
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         asr x3, w2, #1
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         asr sp, x2, #1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         asr x25, x26, #-1
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         asr x25, x26, #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         asr w9, w8, #32
+// CHECK-ERROR-NEXT:                     ^
+
+        sbfiz w1, w2, #0, #0
+        sbfiz wsp, w9, #0, #1
+        sbfiz w9, w10, #32, #1
+        sbfiz w11, w12, #32, #0
+        sbfiz w9, w10, #10, #23
+        sbfiz x3, x5, #12, #53
+        sbfiz sp, x3, #5, #6
+        sbfiz w3, wsp, #7, #8
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         sbfiz w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         sbfiz x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz sp, x3, #5, #6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                   ^
+
+        sbfx w1, w2, #0, #0
+        sbfx wsp, w9, #0, #1
+        sbfx w9, w10, #32, #1
+        sbfx w11, w12, #32, #0
+        sbfx w9, w10, #10, #23
+        sbfx x3, x5, #12, #53
+        sbfx sp, x3, #5, #6
+        sbfx w3, wsp, #7, #8
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         sbfx w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         sbfx x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx sp, x3, #5, #6
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                  ^
+
+        bfi w1, w2, #0, #0
+        bfi wsp, w9, #0, #1
+        bfi w9, w10, #32, #1
+        bfi w11, w12, #32, #0
+        bfi w9, w10, #10, #23
+        bfi x3, x5, #12, #53
+        bfi sp, x3, #5, #6
+        bfi w3, wsp, #7, #8
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         bfi w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         bfi x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi sp, x3, #5, #6
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                 ^
+
+        bfxil w1, w2, #0, #0
+        bfxil wsp, w9, #0, #1
+        bfxil w9, w10, #32, #1
+        bfxil w11, w12, #32, #0
+        bfxil w9, w10, #10, #23
+        bfxil x3, x5, #12, #53
+        bfxil sp, x3, #5, #6
+        bfxil w3, wsp, #7, #8
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         bfxil w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         bfxil x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil sp, x3, #5, #6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                   ^
+
+        ubfiz w1, w2, #0, #0
+        ubfiz wsp, w9, #0, #1
+        ubfiz w9, w10, #32, #1
+        ubfiz w11, w12, #32, #0
+        ubfiz w9, w10, #10, #23
+        ubfiz x3, x5, #12, #53
+        ubfiz sp, x3, #5, #6
+        ubfiz w3, wsp, #7, #8
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         ubfiz w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         ubfiz x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz sp, x3, #5, #6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                   ^
+
+        ubfx w1, w2, #0, #0
+        ubfx wsp, w9, #0, #1
+        ubfx w9, w10, #32, #1
+        ubfx w11, w12, #32, #0
+        ubfx w9, w10, #10, #23
+        ubfx x3, x5, #12, #53
+        ubfx sp, x3, #5, #6
+        ubfx w3, wsp, #7, #8
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         ubfx w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         ubfx x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx sp, x3, #5, #6
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Compare & branch (immediate)
+//------------------------------------------------------------------------------
+
+        cbnz wsp, lbl
+        cbz  sp, lbl
+        cbz  x3, x5
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           cbnz wsp, lbl
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           cbz sp, lbl
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           cbz x3, x5
+// CHECK-ERROR-NEXT:                   ^
+
+        cbz w20, #1048576
+        cbnz xzr, #-1048580
+        cbz x29, #1
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           cbz w20, #1048576
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           cbnz xzr, #-1048580
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           cbz x29, #1
+// CHECK-ERROR-NEXT:                    ^
+
+//------------------------------------------------------------------------------
+// Conditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b.zf lbl
+// CHECK-ERROR: error: invalid condition code
+// CHECK-ERROR-NEXT:           b.zf lbl
+// CHECK-ERROR-NEXT:             ^
+
+        b.eq #1048576
+        b.ge #-1048580
+        b.cc #1
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           b.eq #1048576
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           b.ge #-1048580
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           b.cc #1
+// CHECK-ERROR-NEXT:                ^
+
+//------------------------------------------------------------------------------
+// Conditional compare (immediate)
+//------------------------------------------------------------------------------
+
+        ccmp wsp, #4, #2, ne
+        ccmp w25, #-1, #15, hs
+        ccmp w3, #32, #0, ge
+        ccmp w19, #5, #-1, lt
+        ccmp w20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp wsp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp w25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp w3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp w19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp w20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmp sp, #4, #2, ne
+        ccmp x25, #-1, #15, hs
+        ccmp x3, #32, #0, ge
+        ccmp x19, #5, #-1, lt
+        ccmp x20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp sp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp x25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp x3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp x19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp x20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn wsp, #4, #2, ne
+        ccmn w25, #-1, #15, hs
+        ccmn w3, #32, #0, ge
+        ccmn w19, #5, #-1, lt
+        ccmn w20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn wsp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn w25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn w3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn w19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn w20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn sp, #4, #2, ne
+        ccmn x25, #-1, #15, hs
+        ccmn x3, #32, #0, ge
+        ccmn x19, #5, #-1, lt
+        ccmn x20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn sp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn x25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn x3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn x19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn x20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+//------------------------------------------------------------------------------
+// Conditional compare (register)
+//------------------------------------------------------------------------------
+
+        ccmp wsp, w4, #2, ne
+        ccmp w3, wsp, #0, ge
+        ccmp w19, w5, #-1, lt
+        ccmp w20, w7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp wsp, w4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp w3, wsp, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp w19, w5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp w20, w7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmp sp, x4, #2, ne
+        ccmp x25, sp, #15, hs
+        ccmp x19, x5, #-1, lt
+        ccmp x20, x7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp sp, x4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp x25, sp, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp x19, x5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp x20, x7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn wsp, w4, #2, ne
+        ccmn w25, wsp, #15, hs
+        ccmn w19, w5, #-1, lt
+        ccmn w20, w7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn wsp, w4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn w25, wsp, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn w19, w5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn w20, w7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn sp, x4, #2, ne
+        ccmn x25, sp, #15, hs
+        ccmn x19, x5, #-1, lt
+        ccmn x20, x7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn sp, x4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn x25, sp, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn x19, x5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn x20, x7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+//------------------------------------------------------------------------------
+// Conditional select
+//------------------------------------------------------------------------------
+
+        csel w4, wsp, w9, eq
+        csel wsp, w2, w3, ne
+        csel w10, w11, wsp, ge
+        csel w1, w2, w3, #3
+        csel x4, sp, x9, eq
+        csel sp, x2, x3, ne
+        csel x10, x11, sp, ge
+        csel x1, x2, x3, #3
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel w4, wsp, w9, eq
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel wsp, w2, w3, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel w10, w11, wsp, ge
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel w1, w2, w3, #3
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel x4, sp, x9, eq
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel sp, x2, x3, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel x10, x11, sp, ge
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel x1, x2, x3, #3
+// CHECK-ERROR-NEXT:                         ^
+
+        csinc w20, w21, wsp, mi
+        csinc sp, x30, x29, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinc w20, w21, wsp, mi
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinc sp, x30, x29, eq
+// CHECK-ERROR-NEXT:              ^
+
+        csinv w20, wsp, wsp, mi
+        csinv sp, x30, x29, le
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinv w20, wsp, wsp, mi
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinv sp, x30, x29, le
+// CHECK-ERROR-NEXT:              ^
+
+        csneg w20, w21, wsp, mi
+        csneg x0, sp, x29, le
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csneg w20, w21, wsp, mi
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csneg x0, sp, x29, le
+// CHECK-ERROR-NEXT:                  ^
+
+        cset wsp, lt
+        csetm sp, ge
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cset wsp, lt
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csetm sp, ge
+// CHECK-ERROR-NEXT:              ^
+
+        cinc w3, wsp, ne
+        cinc sp, x9, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinc w3, wsp, ne
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinc sp, x9, eq
+// CHECK-ERROR-NEXT:             ^
+
+        cinv w3, wsp, ne
+        cinv sp, x9, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinv w3, wsp, ne
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinv sp, x9, eq
+// CHECK-ERROR-NEXT:             ^
+
+        cneg w3, wsp, ne
+        cneg sp, x9, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cneg w3, wsp, ne
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cneg sp, x9, eq
+// CHECK-ERROR-NEXT:             ^
+
+//------------------------------------------------------------------------------
+// Data Processing (1 source)
+//------------------------------------------------------------------------------
+        rbit x23, w2
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     rbit x23, w2
+
+        cls sp, x2
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     cls sp, x2
+
+        clz wsp, w3
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     clz wsp, w3
+
+//------------------------------------------------------------------------------
+// Data Processing (2 sources)
+//------------------------------------------------------------------------------
+        udiv x23, w2, x18
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     udiv x23, w2, x18
+
+        lsl sp, x2, x4
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     lsl sp, x2, x4
+
+        asr wsp, w3, w9
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     asr wsp, w3, w9
+
+//------------------------------------------------------------------------------
+// Data Processing (3 sources)
+//------------------------------------------------------------------------------
+
+        madd sp, x3, x9, x10
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     madd sp, x3, x9, x10
+
+//------------------------------------------------------------------------------
+// Exception generation
+//------------------------------------------------------------------------------
+        svc #-1
+        hlt #65536
+        dcps4 #43
+        dcps4
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         svc #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         hlt #65536
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid instruction
+// CHECK-ERROR-NEXT:         dcps4 #43
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR-NEXT: error: invalid instruction
+// CHECK-ERROR-NEXT:         dcps4
+// CHECK-ERROR-NEXT:         ^
+
+//------------------------------------------------------------------------------
+// Extract (immediate)
+//------------------------------------------------------------------------------
+
+        extr w2, w20, w30, #-1
+        extr w9, w19, w20, #32
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         extr w2, w20, w30, #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         extr w9, w19, w20, #32
+// CHECK-ERROR-NEXT:                            ^
+
+        extr x10, x15, x20, #-1
+        extr x20, x25, x30, #64
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         extr x10, x15, x20, #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         extr x20, x25, x30, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        ror w9, w10, #32
+        ror x10, x11, #64
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ror w9, w10, #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ror x10, x11, #64
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Floating-point compare
+//------------------------------------------------------------------------------
+
+        fcmp s3, d2
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcmp s3, d2
+// CHECK-ERROR-NEXT:                  ^
+
+        fcmp s9, #-0.0
+        fcmp d3, #-0.0
+        fcmp s1, #1.0
+        fcmpe s30, #-0.0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcmp s9, #-0.0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcmp d3, #-0.0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcmp s1, #1.0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcmpe s30, #-0.0
+// CHECK-ERROR-NEXT:                    ^
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fccmp s19, s5, #-1, lt
+        fccmp s20, s7, #16, hs
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fccmp s19, s5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fccmp s20, s7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        fccmp d19, d5, #-1, lt
+        fccmp d20, d7, #16, hs
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fccmp d19, d5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fccmp d20, d7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        fccmpe s19, s5, #-1, lt
+        fccmpe s20, s7, #16, hs
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fccmpe s19, s5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fccmpe s20, s7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        fccmpe d19, d5, #-1, lt
+        fccmpe d20, d7, #16, hs
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fccmpe d19, d5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fccmpe d20, d7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fcsel q3, q20, q9, pl
+        fcsel h9, h10, h11, mi
+        fcsel b9, b10, b11, mi
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcsel q3, q20, q9, pl
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcsel h9, h10, h11, mi
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcsel b9, b10, b11, mi
+// CHECK-ERROR-NEXT:               ^
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (1 source)
+//------------------------------------------------------------------------------
+
+        fmov d0, s3
+        fcvt d0, d1
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fmov d0, s3
+// CHECK-ERROR-NEXT: ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fcvt d0, d1
+// CHECK-ERROR-NEXT: ^
+
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (2 sources)
+//------------------------------------------------------------------------------
+
+        fadd s0, d3, d7
+        fmaxnm d3, s19, d12
+        fnmul d1, d9, s18
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fadd s0, d3, d7
+// CHECK-ERROR-NEXT: ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fmaxnm d3, s19, d12
+// CHECK-ERROR-NEXT: ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fnmul d1, d9, s18
+// CHECK-ERROR-NEXT: ^
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (3 sources)
+//------------------------------------------------------------------------------
+
+        fmadd b3, b4, b5, b6
+        fmsub h1, h2, h3, h4
+        fnmadd q3, q5, q6, q7
+        fnmsub s2, s4, d5, h9
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fmadd b3, b4, b5, b6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fmsub h1, h2, h3, h4
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fnmadd q3, q5, q6, q7
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fnmsub s2, s4, d5, h9
+// CHECK-ERROR-NEXT:                ^
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fcvtzs w13, s31, #0
+        fcvtzs w19, s20, #33
+        fcvtzs wsp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzs w13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzs w19, s20, #33
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzs wsp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        fcvtzs x13, s31, #0
+        fcvtzs x19, s20, #65
+        fcvtzs sp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzs x13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzs x19, s20, #65
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzs sp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        fcvtzu w13, s31, #0
+        fcvtzu w19, s20, #33
+        fcvtzu wsp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzu w13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzu w19, s20, #33
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzu wsp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        fcvtzu x13, s31, #0
+        fcvtzu x19, s20, #65
+        fcvtzu sp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzu x13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzu x19, s20, #65
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzu sp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        scvtf w13, s31, #0
+        scvtf w19, s20, #33
+        scvtf wsp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf w13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf w19, s20, #33
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf wsp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+        scvtf x13, s31, #0
+        scvtf x19, s20, #65
+        scvtf sp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf x13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf x19, s20, #65
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf sp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+        ucvtf w13, s31, #0
+        ucvtf w19, s20, #33
+        ucvtf wsp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf w13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf w19, s20, #33
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf wsp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+        ucvtf x13, s31, #0
+        ucvtf x19, s20, #65
+        ucvtf sp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf x13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf x19, s20, #65
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf sp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+//------------------------------------------------------------------------------
+// Floating-point immediate
+//------------------------------------------------------------------------------
+        ;; Exponent too large
+        fmov d3, #0.0625
+        fmov s2, #32.0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fmov d3, #0.0625
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fmov s2, #32.0
+// CHECK-ERROR-NEXT:                    ^
+
+        ;; Fraction too precise
+        fmov s9, #1.03125
+        fmov s28, #1.96875
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fmov s9, #1.03125
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fmov s28, #1.96875
+// CHECK-ERROR-NEXT:                     ^
+
+        ;; No particular reason, but a striking omission
+        fmov d0, #0.0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fmov d0, #0.0
+// CHECK-ERROR-NEXT:                    ^
+
+//------------------------------------------------------------------------------
+// Floating-point <-> integer conversion
+//------------------------------------------------------------------------------
+
+        fmov x3, v0.d[0]
+        fmov v29.1d[1], x2
+        fmov x7, v0.d[2]
+        fcvtns sp, s5
+        scvtf s6, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fmov x3, v0.d[0]
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: lane number incompatible with layout
+// CHECK-ERROR-NEXT: fmov v29.1d[1], x2
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: lane number incompatible with layout
+// CHECK-ERROR-NEXT: fmov x7, v0.d[2]
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcvtns sp, s5
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         scvtf s6, wsp
+// CHECK-ERROR-NEXT:                   ^
+
+//------------------------------------------------------------------------------
+// Load-register (literal)
+//------------------------------------------------------------------------------
+
+        ldr sp, some_label
+        ldrsw w3, somewhere
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr sp, some_label
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsw w3, somewhere
+// CHECK-ERROR-NEXT:               ^
+
+        ldrsw x2, #1048576
+        ldr q0, #-1048580
+        ldr x0, #2
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsw x2, #1048576
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr q0, #-1048580
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr x0, #2
+// CHECK-ERROR-NEXT:                 ^
+
+//------------------------------------------------------------------------------
+// Load/store exclusive
+//------------------------------------------------------------------------------
+
+       stxrb w2, x3, [x4, #20]
+       stlxrh w10, w11, [w2]
+// CHECK-ERROR: error: expected '#0'
+// CHECK-ERROR-NEXT:         stxrb w2, x3, [x4, #20]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxrh w10, w11, [w2]
+// CHECK-ERROR-NEXT:                           ^
+
+       stlxr  x20, w21, [sp]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxr  x20, w21, [sp]
+// CHECK-ERROR-NEXT:                ^
+
+       ldxr   sp, [sp]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldxr   sp, [sp]
+// CHECK-ERROR-NEXT:                ^
+
+       stxp x1, x2, x3, [x4]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stxp x1, x2,  x3, [x4]
+// CHECK-ERROR-NEXT:              ^
+
+       stlxp w5, x1, w4, [x5]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxp w5, x1, w4, [x5]
+// CHECK-ERROR-NEXT:                       ^
+
+       stlxp w17, w6, x7, [x22]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxp w17, w6, x7, [x22]
+// CHECK-ERROR-NEXT:                        ^
+
+//------------------------------------------------------------------------------
+// Load/store (unscaled immediate)
+//------------------------------------------------------------------------------
+
+        ldurb w2, [sp, #256]
+        sturh w17, [x1, #256]
+        ldursw x20, [x1, #256]
+        ldur x12, [sp, #256]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ldurb w2, [sp, #256]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sturh w17, [x1, #256]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldursw x20, [x1, #256]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldur x12, [sp, #256]
+// CHECK-ERROR-NEXT:                   ^
+
+        stur h2, [x2, #-257]
+        stur b2, [x2, #-257]
+        ldursb x9, [sp, #-257]
+        ldur w2, [x30, #-257]
+        stur q9, [x20, #-257]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stur h2, [x2, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stur b2, [x2, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldursb x9, [sp, #-257]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldur w2, [x30, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stur q9, [x20, #-257]
+// CHECK-ERROR-NEXT:                  ^
+
+        prfum pstl3strm, [xzr]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         prfum pstl3strm, [xzr]
+// CHECK-ERROR-NEXT:                           ^
+
+//------------------------------------------------------------------------------
+// Load-store register (immediate post-indexed)
+//------------------------------------------------------------------------------
+        ldr x3, [x4, #25], #0
+        ldr x4, [x9, #0], #4
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr x3, [x4, #25], #0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr x4, [x9, #0], #4
+// CHECK-ERROR-NEXT:                           ^
+
+        strb w1, [x19], #256
+        strb w9, [sp], #-257
+        strh w1, [x19], #256
+        strh w9, [sp], #-257
+        str w1, [x19], #256
+        str w9, [sp], #-257
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strb w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strb w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strh w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strh w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str w1, [x19], #256
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str w9, [sp], #-257
+// CHECK-ERROR-NEXT:                       ^
+
+        ldrb w1, [x19], #256
+        ldrb w9, [sp], #-257
+        ldrh w1, [x19], #256
+        ldrh w9, [sp], #-257
+        ldr w1, [x19], #256
+        ldr w9, [sp], #-257
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrb w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrb w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrh w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrh w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w1, [x19], #256
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w9, [sp], #-257
+// CHECK-ERROR-NEXT:                       ^
+
+        ldrsb x2, [x3], #256
+        ldrsb x22, [x13], #-257
+        ldrsh x2, [x3], #256
+        ldrsh x22, [x13], #-257
+        ldrsw x2, [x3], #256
+        ldrsw x22, [x13], #-257
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb x2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb x22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh x2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh x22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsw x2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsw x22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+
+        ldrsb w2, [x3], #256
+        ldrsb w22, [x13], #-257
+        ldrsh w2, [x3], #256
+        ldrsh w22, [x13], #-257
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb w2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb w22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh w2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh w22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+
+        str b3, [x3], #256
+        str b3, [x13], #-257
+        str h3, [x3], #256
+        str h3, [x13], #-257
+        str s3, [x3], #256
+        str s3, [x13], #-257
+        str d3, [x3], #256
+        str d3, [x13], #-257
+        str q3, [x3], #256
+        str q3, [x13], #-257
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str b3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str b3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str h3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str h3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str s3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str s3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str d3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str d3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str q3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str q3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+
+        ldr b3, [x3], #256
+        ldr b3, [x13], #-257
+        ldr h3, [x3], #256
+        ldr h3, [x13], #-257
+        ldr s3, [x3], #256
+        ldr s3, [x13], #-257
+        ldr d3, [x3], #256
+        ldr d3, [x13], #-257
+        ldr q3, [x3], #256
+        ldr q3, [x13], #-257
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr b3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr b3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr h3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr h3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr s3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr s3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr d3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr d3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr q3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr q3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+
+//------------------------------------------------------------------------------
+// Load-store register (immediate pre-indexed)
+//------------------------------------------------------------------------------
+
+        ldr x3, [x4]!
+// CHECK-ERROR: error:
+// CHECK-ERROR-NEXT:         ldr x3, [x4]!
+// CHECK-ERROR-NEXT:                     ^
+
+        strb w1, [x19, #256]!
+        strb w9, [sp, #-257]!
+        strh w1, [x19, #256]!
+        strh w9, [sp, #-257]!
+        str w1, [x19, #256]!
+        str w9, [sp, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strb w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strb w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strh w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strh w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+        ldrb w1, [x19, #256]!
+        ldrb w9, [sp, #-257]!
+        ldrh w1, [x19, #256]!
+        ldrh w9, [sp, #-257]!
+        ldr w1, [x19, #256]!
+        ldr w9, [sp, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrb w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrb w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrh w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrh w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+        ldrsb x2, [x3, #256]!
+        ldrsb x22, [x13, #-257]!
+        ldrsh x2, [x3, #256]!
+        ldrsh x22, [x13, #-257]!
+        ldrsw x2, [x3, #256]!
+        ldrsw x22, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb x2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb x22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh x2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh x22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsw x2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsw x22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+
+        ldrsb w2, [x3, #256]!
+        ldrsb w22, [x13, #-257]!
+        ldrsh w2, [x3, #256]!
+        ldrsh w22, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb w2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb w22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh w2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh w22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+
+        str b3, [x3, #256]!
+        str b3, [x13, #-257]!
+        str h3, [x3, #256]!
+        str h3, [x13, #-257]!
+        str s3, [x3, #256]!
+        str s3, [x13, #-257]!
+        str d3, [x3, #256]!
+        str d3, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str b3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str b3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str h3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str h3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str s3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str s3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str d3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str d3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+        ldr b3, [x3, #256]!
+        ldr b3, [x13, #-257]!
+        ldr h3, [x3, #256]!
+        ldr h3, [x13, #-257]!
+        ldr s3, [x3, #256]!
+        ldr s3, [x13, #-257]!
+        ldr d3, [x3, #256]!
+        ldr d3, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr b3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr b3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr h3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr h3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr s3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr s3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr d3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr d3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+//------------------------------------------------------------------------------
+// Load/store (unprivileged)
+//------------------------------------------------------------------------------
+
+        ldtrb w2, [sp, #256]
+        sttrh w17, [x1, #256]
+        ldtrsw x20, [x1, #256]
+        ldtr x12, [sp, #256]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ldtrb w2, [sp, #256]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sttrh w17, [x1, #256]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldtrsw x20, [x1, #256]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldtr x12, [sp, #256]
+// CHECK-ERROR-NEXT:                   ^
+
+        sttr h2, [x2, #-257]
+        sttr b2, [x2, #-257]
+        ldtrsb x9, [sp, #-257]
+        ldtr w2, [x30, #-257]
+        sttr q9, [x20, #-257]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sttr h2, [x2, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sttr b2, [x2, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldtrsb x9, [sp, #-257]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldtr w2, [x30, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sttr q9, [x20, #-257]
+// CHECK-ERROR-NEXT:                  ^
+
+
+//------------------------------------------------------------------------------
+// Load/store (unsigned immediate)
+//------------------------------------------------------------------------------
+
+//// Out of range immediates
+        ldr q0, [x11, #65536]
+        ldr x0, [sp, #32768]
+        ldr w0, [x4, #16384]
+        ldrh w2, [x21, #8192]
+        ldrb w3, [x12, #4096]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr q0, [x11, #65536]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr x0, [sp, #32768]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w0, [x4, #16384]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrh w2, [x21, #8192]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrb w3, [x12, #4096]
+// CHECK-ERROR-NEXT:                  ^
+
+//// Misaligned addresses
+        ldr w0, [x0, #2]
+        ldrsh w2, [x0, #123]
+        str q0, [x0, #8]
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         ldr w0, [x0, #2]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         ldrsh w2, [x0, #123]
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         str q0, [x0, #8]
+// CHECK-ERROR-NEXT:                 ^
+
+//// 32-bit addresses
+        ldr w0, [w20]
+        ldrsh x3, [wsp]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w0, [w20]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh x3, [wsp]
+// CHECK-ERROR-NEXT:                    ^
+
+//// Store things
+        strb w0, [wsp]
+        strh w31, [x23, #1]
+        str x5, [x22, #12]
+        str w7, [x12, #16384]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT: strb w0, [wsp]
+// CHECK-ERROR-NEXT:           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strh w31, [x23, #1]
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         str x5, [x22, #12]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str w7, [x12, #16384]
+// CHECK-ERROR-NEXT:                 ^
+
+//// Bad PRFMs
+        prfm #-1, [sp]
+        prfm #32, [sp, #8]
+        prfm pldl1strm, [w3, #8]
+        prfm wibble, [sp]
+// CHECK-ERROR: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:        prfm #-1, [sp]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:        prfm #32, [sp, #8]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        prfm pldl1strm, [w3, #8]
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: operand specifier not recognised
+// CHECK-ERROR-NEXT:        prfm wibble, [sp]
+// CHECK-ERROR-NEXT:             ^
+
+//------------------------------------------------------------------------------
+// Load/store register (register offset)
+//------------------------------------------------------------------------------
+
+        ldr w3, [xzr, x3]
+        ldr w4, [x0, x4, lsl]
+        ldr w9, [x5, x5, uxtw]
+        ldr w10, [x6, x9, sxtw #2]
+        ldr w11, [x7, w2, lsl #2]
+        ldr w12, [x8, w1, sxtx]
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ldr w3, [xzr, x3]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected #imm after shift specifier
+// CHECK-ERROR-NEXT:         ldr w4, [x0, x4, lsl]
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w9, [x5, x5, uxtw]
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w10, [x6, x9, sxtw #2]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w11, [x7, w2, lsl #2]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w12, [x8, w1, sxtx]
+// CHECK-ERROR-NEXT:                           ^
+
+        ldrsb w9, [x4, x2, lsl #-1]
+        strb w9, [x4, x2, lsl #1]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         ldrsb w9, [x4, x2, lsl #-1]
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strb w9, [x4, x2, lsl #1]
+// CHECK-ERROR-NEXT:                  ^
+
+        ldrsh w9, [x4, x2, lsl #-1]
+        ldr h13, [x4, w2, uxtw #2]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         ldrsh w9, [x4, x2, lsl #-1]
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr h13, [x4, w2, uxtw #2]
+// CHECK-ERROR-NEXT:                  ^
+
+        str w9, [x5, w9, sxtw #-1]
+        str s3, [sp, w9, uxtw #1]
+        ldrsw x9, [x15, x4, sxtx #3]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         str w9, [x5, w9, sxtw #-1]
+// CHECK-ERROR-NEXT:                                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str s3, [sp, w9, uxtw #1]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsw x9, [x15, x4, sxtx #3]
+// CHECK-ERROR-NEXT:                   ^
+
+        str xzr, [x5, x9, sxtx #-1]
+        prfm pldl3keep, [sp, x20, lsl #2]
+        ldr d3, [x20, wzr, uxtw #4]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         str xzr, [x5, x9, sxtx #-1]
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         prfm pldl3keep, [sp, x20, lsl #2]
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr d3, [x20, wzr, uxtw #4]
+// CHECK-ERROR-NEXT:                 ^
+
+        ldr q5, [sp, x2, lsl #-1]
+        ldr q10, [x20, w4, uxtw #2]
+        str q21, [x20, w4, uxtw #5]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         ldr q5, [sp, x2, lsl #-1]
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr q10, [x20, w4, uxtw #2]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str q21, [x20, w4, uxtw #5]
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (offset)
+//------------------------------------------------------------------------------
+        ldp w3, w2, [x4, #1]
+        stp w1, w2, [x3, #253]
+        stp w9, w10, [x5, #256]
+        ldp w11, w12, [x9, #-260]
+        stp wsp, w9, [sp]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp w3, w2, [x4, #1]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp w1, w2, [x3, #253]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp w9, w10, [x5, #256]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp w11, w12, [x9, #-260]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp wsp, w9, [sp]
+// CHECK-ERROR-NEXT:             ^
+
+        ldpsw x9, x2, [sp, #2]
+        ldpsw x1, x2, [x10, #256]
+        ldpsw x3, x4, [x11, #-260]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp, #2]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10, #256]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11, #-260]
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp x2, x5, [sp, #4]
+        ldp x5, x6, [x9, #512]
+        stp x7, x8, [x10, #-520]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp x2, x5, [sp, #4]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp x5, x6, [x9, #512]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x7, x8, [x10, #-520]
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp sp, x3, [x10]
+        stp x3, sp, [x9]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp sp, x3, [x10]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x3, sp, [x9]
+// CHECK-ERROR-NEXT:                 ^
+
+        stp s3, s5, [sp, #-2]
+        ldp s6, s26, [x4, #-260]
+        stp s13, s19, [x5, #256]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp s3, s5, [sp, #-2]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp s6, s26, [x4, #-260]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp s13, s19, [x5, #256]
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp d3, d4, [xzr]
+        ldp d5, d6, [x0, #512]
+        stp d7, d8, [x0, #-520]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, d4, [xzr]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d5, d6, [x0, #512]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp d7, d8, [x0, #-520]
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp d3, q2, [sp]
+        ldp q3, q5, [sp, #8]
+        stp q20, q25, [x5, #1024]
+        ldp q30, q15, [x23, #-1040]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, q2, [sp]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp q3, q5, [sp, #8]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp q20, q25, [x5, #1024]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp q30, q15, [x23, #-1040]
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (post-indexed)
+//------------------------------------------------------------------------------
+
+        ldp w3, w2, [x4], #1
+        stp w1, w2, [x3], #253
+        stp w9, w10, [x5], #256
+        ldp w11, w12, [x9], #-260
+        stp wsp, w9, [sp], #0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp w3, w2, [x4], #1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp w1, w2, [x3], #253
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp w9, w10, [x5], #256
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp w11, w12, [x9], #-260
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp wsp, w9, [sp], #0
+// CHECK-ERROR-NEXT:             ^
+
+        ldpsw x9, x2, [sp], #2
+        ldpsw x1, x2, [x10], #256
+        ldpsw x3, x4, [x11], #-260
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp], #2
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11], #-260
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp x2, x5, [sp], #4
+        ldp x5, x6, [x9], #512
+        stp x7, x8, [x10], #-520
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp x2, x5, [sp], #4
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp x5, x6, [x9], #512
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x7, x8, [x10], #-520
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp sp, x3, [x10], #0
+        stp x3, sp, [x9], #0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp sp, x3, [x10], #0
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x3, sp, [x9], #0
+// CHECK-ERROR-NEXT:                 ^
+
+        stp s3, s5, [sp], #-2
+        ldp s6, s26, [x4], #-260
+        stp s13, s19, [x5], #256
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp s3, s5, [sp], #-2
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp s6, s26, [x4], #-260
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp s13, s19, [x5], #256
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp d3, d4, [xzr], #0
+        ldp d5, d6, [x0], #512
+        stp d7, d8, [x0], #-520
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, d4, [xzr], #0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d5, d6, [x0], #512
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp d7, d8, [x0], #-520
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp d3, q2, [sp], #0
+        ldp q3, q5, [sp], #8
+        stp q20, q25, [x5], #1024
+        ldp q30, q15, [x23], #-1040
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, q2, [sp], #0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp q3, q5, [sp], #8
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp q20, q25, [x5], #1024
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp q30, q15, [x23], #-1040
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (pre-indexed)
+//------------------------------------------------------------------------------
+
+        ldp w3, w2, [x4, #1]!
+        stp w1, w2, [x3, #253]!
+        stp w9, w10, [x5, #256]!
+        ldp w11, w12, [x9, #-260]!
+        stp wsp, w9, [sp, #0]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp w3, w2, [x4, #1]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp w1, w2, [x3, #253]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp w9, w10, [x5, #256]!
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp w11, w12, [x9, #-260]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp wsp, w9, [sp, #0]!
+// CHECK-ERROR-NEXT:             ^
+
+        ldpsw x9, x2, [sp, #2]!
+        ldpsw x1, x2, [x10, #256]!
+        ldpsw x3, x4, [x11, #-260]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp, #2]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10, #256]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11, #-260]!
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp x2, x5, [sp, #4]!
+        ldp x5, x6, [x9, #512]!
+        stp x7, x8, [x10, #-520]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp x2, x5, [sp, #4]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp x5, x6, [x9, #512]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x7, x8, [x10, #-520]!
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp sp, x3, [x10, #0]!
+        stp x3, sp, [x9, #0]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp sp, x3, [x10, #0]!
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x3, sp, [x9, #0]!
+// CHECK-ERROR-NEXT:                 ^
+
+        stp s3, s5, [sp, #-2]!
+        ldp s6, s26, [x4, #-260]!
+        stp s13, s19, [x5, #256]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp s3, s5, [sp, #-2]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp s6, s26, [x4, #-260]!
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp s13, s19, [x5, #256]!
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp d3, d4, [xzr, #0]!
+        ldp d5, d6, [x0, #512]!
+        stp d7, d8, [x0, #-520]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, d4, [xzr, #0]!
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d5, d6, [x0, #512]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp d7, d8, [x0, #-520]!
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp d3, q2, [sp, #0]!
+        ldp q3, q5, [sp, #8]!
+        stp q20, q25, [x5, #1024]!
+        ldp q30, q15, [x23, #-1040]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, q2, [sp, #0]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp q3, q5, [sp, #8]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp q20, q25, [x5, #1024]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp q30, q15, [x23, #-1040]!
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (offset)
+//------------------------------------------------------------------------------
+        ldnp w3, w2, [x4, #1]
+        stnp w1, w2, [x3, #253]
+        stnp w9, w10, [x5, #256]
+        ldnp w11, w12, [x9, #-260]
+        stnp wsp, w9, [sp]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp w3, w2, [x4, #1]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp w1, w2, [x3, #253]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp w9, w10, [x5, #256]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp w11, w12, [x9, #-260]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp wsp, w9, [sp]
+// CHECK-ERROR-NEXT:             ^
+
+        ldnp x2, x5, [sp, #4]
+        ldnp x5, x6, [x9, #512]
+        stnp x7, x8, [x10, #-520]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp x2, x5, [sp, #4]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp x5, x6, [x9, #512]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp x7, x8, [x10, #-520]
+// CHECK-ERROR-NEXT:                     ^
+
+        ldnp sp, x3, [x10]
+        stnp x3, sp, [x9]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp sp, x3, [x10]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp x3, sp, [x9]
+// CHECK-ERROR-NEXT:                 ^
+
+        stnp s3, s5, [sp, #-2]
+        ldnp s6, s26, [x4, #-260]
+        stnp s13, s19, [x5, #256]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp s3, s5, [sp, #-2]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp s6, s26, [x4, #-260]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp s13, s19, [x5, #256]
+// CHECK-ERROR-NEXT:                       ^
+
+        ldnp d3, d4, [xzr]
+        ldnp d5, d6, [x0, #512]
+        stnp d7, d8, [x0, #-520]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp d3, d4, [xzr]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp d5, d6, [x0, #512]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp d7, d8, [x0, #-520]
+// CHECK-ERROR-NEXT:                     ^
+
+        ldnp d3, q2, [sp]
+        ldnp q3, q5, [sp, #8]
+        stnp q20, q25, [x5, #1024]
+        ldnp q30, q15, [x23, #-1040]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp d3, q2, [sp]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp q3, q5, [sp, #8]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp q20, q25, [x5, #1024]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp q30, q15, [x23, #-1040]
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Logical (shifted register)
+//------------------------------------------------------------------------------
+        orr w0, w1, #0xffffffff
+        and x3, x5, #0xffffffffffffffff
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         orr w0, w1, #0xffffffff
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         and x3, x5, #0xffffffffffffffff
+// CHECK-ERROR-NEXT:                     ^
+
+        ands w3, w9, #0x0
+        eor x2, x0, #0x0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ands w3, w9, #0x0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         eor x2, x0, #0x0
+// CHECK-ERROR-NEXT:                     ^
+
+        eor w3, w5, #0x83
+        eor x9, x20, #0x1234
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         eor w3, w5, #0x83
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         eor x9, x20, #0x1234
+// CHECK-ERROR-NEXT:                      ^
+
+        and wzr, w4, 0xffff0000
+        eor xzr, x9, #0xffff0000ffff0000
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         and wzr, w4, 0xffff0000
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         eor xzr, x9, #0xffff0000ffff0000
+// CHECK-ERROR-NEXT:                      ^
+
+        orr w3, wsp, #0xf0f0f0f0
+        ands x3, sp, #0xaaaaaaaaaaaaaaaa
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         orr w3, wsp, #0xf0f0f0f0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ands x3, sp, #0xaaaaaaaaaaaaaaaa
+// CHECK-ERROR-NEXT:                  ^
+
+        tst sp, #0xe0e0e0e0e0e0e0e0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         tst sp, #0xe0e0e0e0e0e0e0e0
+// CHECK-ERROR-NEXT:             ^
+
+        // movi has been removed from the specification. Make sure it's really gone.
+        movi wzr, #0x44444444
+        movi w3, #0xffff
+        movi x9, #0x0000ffff00000000
+// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR-NEXT:         movi wzr, #0x44444444
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR-NEXT:         movi w3, #0xffff
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR-NEXT:         movi x9, #0x0000ffff00000000
+// CHECK-ERROR-NEXT:         ^
+
+//------------------------------------------------------------------------------
+// Logical (shifted register)
+//------------------------------------------------------------------------------
+
+        //// Out of range shifts
+        and w2, w24, w6, lsl #-1
+        and w4, w6, w12, lsl #32
+        and x4, x6, x12, lsl #64
+        and x2, x5, x11, asr
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         and w2, w24, w6, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         and w4, w6, w12, lsl #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         and x4, x6, x12, lsl #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected #imm after shift specifier
+// CHECK-ERROR-NEXT:         and x2, x5, x11, asr
+// CHECK-ERROR-NEXT:                             ^
+
+        //// sp not allowed
+        orn wsp, w3, w5
+        bics x20, sp, x9, lsr #0
+        orn x2, x6, sp, lsl #3
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         orn wsp, w3, w5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bics x20, sp, x9, lsr #0
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         orn x2, x6, sp, lsl #3
+// CHECK-ERROR-NEXT:                     ^
+
+        //// Mismatched registers
+        and x3, w2, w1
+        ands w1, x12, w2
+        and x4, x5, w6, lsl #12
+        orr w2, w5, x7, asr #0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         and x3, w2, w1
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ands w1, x12, w2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         and x4, x5, w6, lsl #12
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         orr w2, w5, x7, asr #0
+// CHECK-ERROR-NEXT:                     ^
+
+        //// Shifts should not be allowed on mov
+        mov w3, w7, lsl #13
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mov w3, w7, lsl #13
+// CHECK-ERROR-NEXT:                     ^
+
+//------------------------------------------------------------------------------
+// Move wide (immediate)
+//------------------------------------------------------------------------------
+
+        movz w3, #65536, lsl #0
+        movz w4, #65536
+        movn w1, #2, lsl #1
+        movk w3, #0, lsl #-1
+        movn w2, #-1, lsl #0
+        movz x3, #-1
+        movk w3, #1, lsl #32
+        movn x2, #12, lsl #64
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movz w3, #65536, lsl #0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movz w4, #65536
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movn w1, #2, lsl #1
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: only 'lsl #+N' valid after immediate
+// CHECK-ERROR-NEXT:         movk w3, #0, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movn w2, #-1, lsl #0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movz x3, #-1
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk w3, #1, lsl #32
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movn x2, #12, lsl #64
+// CHECK-ERROR-NEXT:                  ^
+
+        movz x12, #:abs_g0:sym, lsl #16
+        movz x12, #:abs_g0:sym, lsl #0
+        movn x2, #:abs_g0:sym
+        movk w3, #:abs_g0:sym
+        movz x3, #:abs_g0_nc:sym
+        movn x4, #:abs_g0_nc:sym
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movz x12, #:abs_g0:sym, lsl #16
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movz x12, #:abs_g0:sym, lsl #0
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movn x2, #:abs_g0:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk w3, #:abs_g0:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movz x3, #:abs_g0_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movn x4, #:abs_g0_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+
+        movn x2, #:abs_g1:sym
+        movk w3, #:abs_g1:sym
+        movz x3, #:abs_g1_nc:sym
+        movn x4, #:abs_g1_nc:sym
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movn x2, #:abs_g1:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk w3, #:abs_g1:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movz x3, #:abs_g1_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movn x4, #:abs_g1_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+
+        movz w12, #:abs_g2:sym
+        movn x12, #:abs_g2:sym
+        movk x13, #:abs_g2:sym
+        movk w3, #:abs_g2_nc:sym
+        movz x13, #:abs_g2_nc:sym
+        movn x24, #:abs_g2_nc:sym
+// CHECK-ERROR:         error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movz w12, #:abs_g2:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movn x12, #:abs_g2:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk x13, #:abs_g2:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk w3, #:abs_g2_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movz x13, #:abs_g2_nc:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movn x24, #:abs_g2_nc:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movn x19, #:abs_g3:sym
+        movz w20, #:abs_g3:sym
+        movk w21, #:abs_g3:sym
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movn x19, #:abs_g3:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movz w20, #:abs_g3:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk w21, #:abs_g3:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movk x19, #:abs_g0_s:sym
+        movk w23, #:abs_g0_s:sym
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk x19, #:abs_g0_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk w23, #:abs_g0_s:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movk x19, #:abs_g1_s:sym
+        movk w23, #:abs_g1_s:sym
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk x19, #:abs_g1_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk w23, #:abs_g1_s:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movz w2, #:abs_g2_s:sym
+        movn w29, #:abs_g2_s:sym
+        movk x19, #:abs_g2_s:sym
+        movk w23, #:abs_g2_s:sym
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movz w2, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movn w29, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk x19, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         movk w23, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                   ^
+
+//------------------------------------------------------------------------------
+// PC-relative addressing
+//------------------------------------------------------------------------------
+
+        adr sp, loc             // expects xzr
+        adrp x3, #20            // Immediate unaligned
+        adrp w2, loc            // 64-bit register needed
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adr sp, loc
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adrp x3, #20
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adrp w2, loc
+// CHECK-ERROR-NEXT:              ^
+
+        adr x9, #1048576
+        adr x2, #-1048577
+        adrp x9, #4294967296
+        adrp x20, #-4294971392
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adr x9, #1048576
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adr x2, #-1048577
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adrp x9, #4294967296
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adrp x20, #-4294971392
+// CHECK-ERROR-NEXT:                   ^
+
+//------------------------------------------------------------------------------
+// System
+//------------------------------------------------------------------------------
+
+        hint #-1
+        hint #128
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         hint #-1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         hint #128
+// CHECK-ERROR-NEXT:              ^
+
+        clrex #-1
+        clrex #16
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         clrex #-1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         clrex #16
+// CHECK-ERROR-NEXT:               ^
+
+        dsb #-1
+        dsb #16
+        dmb #-1
+        dmb #16
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dsb #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dsb #16
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dmb #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dmb #16
+// CHECK-ERROR-NEXT:             ^
+
+        isb #-1
+        isb #16
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         isb #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         isb #16
+// CHECK-ERROR-NEXT:             ^
+
+        msr daifset, x4
+        msr spsel #-1
+        msr daifclr, #16
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr daifset, x4
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error:
+// CHECK-ERROR-NEXT:         msr spsel #-1
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr daifclr, #16
+// CHECK-ERROR-NEXT:                      ^
+
+        sys #8, c1, c2, #7, x9
+        sys #3, c16, c2, #3, x10
+        sys #2, c11, c16, #5
+        sys #4, c9, c8, #8, xzr
+        sysl x11, #8, c1, c2, #7
+        sysl x13, #3, c16, c2, #3
+        sysl x9, #2, c11, c16, #5
+        sysl x4, #4, c9, c8, #8
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sys #8, c1, c2, #7, x9
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sys #3, c16, c2, #3, x10
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sys #2, c11, c16, #5
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sys #4, c9, c8, #8, xzr
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sysl x11, #8, c1, c2, #7
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sysl x13, #3, c16, c2, #3
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sysl x9, #2, c11, c16, #5
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sysl x4, #4, c9, c8, #8
+// CHECK-ERROR-NEXT:                              ^
+
+        ic ialluis, x2
+        ic allu, x7
+        ic ivau
+// CHECK-ERROR-NEXT: error: specified IC op does not use a register
+// CHECK-ERROR-NEXT:         ic ialluis, x2
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: operand specifier not recognised
+// CHECK-ERROR-NEXT:         ic allu, x7
+// CHECK-ERROR-NEXT:            ^
+// CHECK-ERROR-NEXT: error: specified IC op requires a register
+// CHECK-ERROR-NEXT:         ic ivau
+// CHECK-ERROR-NEXT:            ^
+
+        tlbi IPAS2E1IS
+        tlbi IPAS2LE1IS
+        tlbi VMALLE1IS, x12
+        tlbi ALLE2IS, x11
+        tlbi ALLE3IS, x20
+        tlbi VAE1IS
+        tlbi VAE2IS
+        tlbi VAE3IS
+        tlbi ASIDE1IS
+        tlbi VAAE1IS
+        tlbi ALLE1IS, x0
+        tlbi VALE1IS
+        tlbi VALE2IS
+        tlbi VALE3IS
+        tlbi VMALLS12E1IS, xzr
+        tlbi VAALE1IS
+        tlbi IPAS2E1
+        tlbi IPAS2LE1
+        tlbi VMALLE1, x9
+        tlbi ALLE2, x10
+        tlbi ALLE3, x11
+        tlbi VAE1
+        tlbi VAE2
+        tlbi VAE3
+        tlbi ASIDE1
+        tlbi VAAE1
+        tlbi ALLE1, x25
+        tlbi VALE1
+        tlbi VALE2
+        tlbi VALE3
+        tlbi VMALLS12E1, x15
+        tlbi VAALE1
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2E1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2LE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLE1IS, x12
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE2IS, x11
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE3IS, x20
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE2IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE3IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi ASIDE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAAE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE1IS, x0
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE2IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE3IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLS12E1IS, xzr
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAALE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2E1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2LE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLE1, x9
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE2, x10
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE3, x11
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE2
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE3
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi ASIDE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAAE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE1, x25
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE2
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE3
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLS12E1, x15
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAALE1
+// CHECK-ERROR-NEXT:              ^
+
+// For the MSR/MRS instructions, first make sure read-only and
+// write-only registers actually are.
+        msr MDCCSR_EL0, x12
+        msr DBGDTRRX_EL0, x12
+        msr MDRAR_EL1, x12
+        msr OSLSR_EL1, x12
+        msr DBGAUTHSTATUS_EL1, x12
+        msr MIDR_EL1, x12
+        msr CCSIDR_EL1, x12
+        msr CLIDR_EL1, x12
+        msr CTR_EL0, x12
+        msr MPIDR_EL1, x12
+        msr REVIDR_EL1, x12
+        msr AIDR_EL1, x12
+        msr DCZID_EL0, x12
+        msr ID_PFR0_EL1, x12
+        msr ID_PFR1_EL1, x12
+        msr ID_DFR0_EL1, x12
+        msr ID_AFR0_EL1, x12
+        msr ID_MMFR0_EL1, x12
+        msr ID_MMFR1_EL1, x12
+        msr ID_MMFR2_EL1, x12
+        msr ID_MMFR3_EL1, x12
+        msr ID_ISAR0_EL1, x12
+        msr ID_ISAR1_EL1, x12
+        msr ID_ISAR2_EL1, x12
+        msr ID_ISAR3_EL1, x12
+        msr ID_ISAR4_EL1, x12
+        msr ID_ISAR5_EL1, x12
+        msr MVFR0_EL1, x12
+        msr MVFR1_EL1, x12
+        msr MVFR2_EL1, x12
+        msr ID_AA64PFR0_EL1, x12
+        msr ID_AA64PFR1_EL1, x12
+        msr ID_AA64DFR0_EL1, x12
+        msr ID_AA64DFR1_EL1, x12
+        msr ID_AA64AFR0_EL1, x12
+        msr ID_AA64AFR1_EL1, x12
+        msr ID_AA64ISAR0_EL1, x12
+        msr ID_AA64ISAR1_EL1, x12
+        msr ID_AA64MMFR0_EL1, x12
+        msr ID_AA64MMFR1_EL1, x12
+        msr PMCEID0_EL0, x12
+        msr PMCEID1_EL0, x12
+        msr RVBAR_EL1, x12
+        msr RVBAR_EL2, x12
+        msr RVBAR_EL3, x12
+        msr ISR_EL1, x12
+        msr CNTPCT_EL0, x12
+        msr CNTVCT_EL0, x12
+        msr PMEVCNTR31_EL0, x12
+        msr PMEVTYPER31_EL0, x12
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr MDCCSR_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr DBGDTRRX_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr MDRAR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr OSLSR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr DBGAUTHSTATUS_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr MIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr CCSIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr CLIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr CTR_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr MPIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr REVIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr AIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr DCZID_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_PFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_PFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_DFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_AFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_MMFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_MMFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_MMFR2_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_MMFR3_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_ISAR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_ISAR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_ISAR2_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_ISAR3_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_ISAR4_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_ISAR5_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr MVFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr MVFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr MVFR2_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_AA64PFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_AA64PFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_AA64DFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_AA64DFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_AA64AFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_AA64AFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_AA64ISAR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_AA64ISAR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_AA64MMFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ID_AA64MMFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr PMCEID0_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr PMCEID1_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr RVBAR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr RVBAR_EL2, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr RVBAR_EL3, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr ISR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr CNTPCT_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr CNTVCT_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr PMEVCNTR31_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         msr PMEVTYPER31_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+
+        mrs x9, DBGDTRTX_EL0
+        mrs x9, OSLAR_EL1
+        mrs x9, PMSWINC_EL0
+        mrs x9, PMEVCNTR31_EL0
+        mrs x9, PMEVTYPER31_EL0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mrs x9, DBGDTRTX_EL0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mrs x9, OSLAR_EL1
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mrs x9, PMSWINC_EL0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mrs x9, PMEVCNTR31_EL0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mrs x9, PMEVTYPER31_EL0
+// CHECK-ERROR-NEXT:                 ^
+
+// Now check some invalid generic names
+        mrs xzr, s2_5_c11_c13_2
+        mrs x12, s3_8_c11_c13_2
+        mrs x13, s3_3_c12_c13_2
+        mrs x19, s3_2_c15_c16_2
+        mrs x30, s3_2_c15_c1_8
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mrs xzr, s2_5_c11_c13_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mrs x12, s3_8_c11_c13_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mrs x13, s3_3_c12_c13_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mrs x19, s3_2_c15_c16_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mrs x30, s3_2_c15_c1_8
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Test and branch (immediate)
+//------------------------------------------------------------------------------
+
+        tbz w3, #-1, addr
+        tbz w3, #32, nowhere
+        tbz x9, #-1, there
+        tbz x20, #64, dont
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:     tbz w3, #-1, addr
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        tbz w3, #32, nowhere
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        tbz x9, #-1, there
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        tbz x20, #64, dont
+// CHECK-ERROR-NEXT:                 ^
+
+        tbnz w3, #-1, addr
+        tbnz w3, #32, nowhere
+        tbnz x9, #-1, there
+        tbnz x20, #64, dont
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        tbnz w3, #-1, addr
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        tbnz w3, #32, nowhere
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        tbnz x9, #-1, there
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        tbnz x20, #64, dont
+
+//------------------------------------------------------------------------------
+// Unconditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b #134217728
+        b #-134217732
+        b #1
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         b #134217728
+// CHECK-ERROR-NEXT:           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         b #-134217732
+// CHECK-ERROR-NEXT:           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         b #1
+// CHECK-ERROR-NEXT:           ^
+
+//------------------------------------------------------------------------------
+// Unconditional branch (register)
+//------------------------------------------------------------------------------
+
+        br w2
+        br sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         br w2
+// CHECK-ERROR-NEXT:            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         br sp
+// CHECK-ERROR-NEXT:            ^
+
+        //// These ones shouldn't allow any registers
+        eret x2
+        drps x2
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         eret x2
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         drps x2
+// CHECK-ERROR-NEXT:              ^
+
diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s
new file mode 100644
index 0000000000..e16b2ea724
--- /dev/null
+++ b/test/MC/AArch64/basic-a64-instructions.s
@@ -0,0 +1,4790 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding < %s | FileCheck %s
+  .globl _func
+
+// Check that the assembler can handle the documented syntax from the ARM ARM.
+// For complex constructs like shifter operands, check more thoroughly for them
+// once then spot check that following instructions accept the form generally.
+// This gives us good coverage while keeping the overall size of the test
+// more reasonable.
+
+
+_func:
+// CHECK: _func
+
+//------------------------------------------------------------------------------
+// Add/sub (extended register)
+//------------------------------------------------------------------------------
+        // Basic extends 64-bit ops
+        add x2, x4, w5, uxtb
+        add x20, sp, w19, uxth
+        add x12, x1, w20, uxtw
+        add x20, x3, x13, uxtx
+        add x17, x25, w20, sxtb
+        add x18, x13, w19, sxth
+        add sp, x2, w3, sxtw
+        add x3, x5, x9, sxtx
+// CHECK: add      x2, x4, w5, uxtb           // encoding: [0x82,0x00,0x25,0x8b]
+// CHECK: add      x20, sp, w19, uxth         // encoding: [0xf4,0x23,0x33,0x8b]
+// CHECK: add      x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0x8b]
+// CHECK: add      x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0x8b]
+// CHECK: add      x17, x25, w20, sxtb        // encoding: [0x31,0x83,0x34,0x8b]
+// CHECK: add      x18, x13, w19, sxth        // encoding: [0xb2,0xa1,0x33,0x8b]
+// CHECK: add      sp, x2, w3, sxtw           // encoding: [0x5f,0xc0,0x23,0x8b]
+// CHECK: add      x3, x5, x9, sxtx           // encoding: [0xa3,0xe0,0x29,0x8b]
+
+        // Basic extends, 32-bit ops
+        add w2, w5, w7, uxtb
+        add w21, w15, w17, uxth
+        add w30, w29, wzr, uxtw
+        add w19, w17, w1, uxtx  // Goodness knows what this means
+        add w2, w5, w1, sxtb
+        add w26, w17, w19, sxth
+        add w0, w2, w3, sxtw
+        add w2, w3, w5, sxtx
+// CHECK: add      w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x0b]
+// CHECK: add      w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x0b]
+// CHECK: add      w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x0b]
+// CHECK: add      w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x0b]
+// CHECK: add      w2, w5, w1, sxtb           // encoding: [0xa2,0x80,0x21,0x0b]
+// CHECK: add      w26, w17, w19, sxth        // encoding: [0x3a,0xa2,0x33,0x0b]
+// CHECK: add      w0, w2, w3, sxtw           // encoding: [0x40,0xc0,0x23,0x0b]
+// CHECK: add      w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x0b]
+
+        // Nonzero shift amounts
+        add x2, x3, w5, sxtb #0
+        add x7, x11, w13, uxth #4
+        add w17, w19, w23, uxtw #2
+        add w29, w23, w17, uxtx #1
+// CHECK: add      x2, x3, w5, sxtb           // encoding: [0x62,0x80,0x25,0x8b]
+// CHECK: add      x7, x11, w13, uxth #4      // encoding: [0x67,0x31,0x2d,0x8b]
+// CHECK: add      w17, w19, w23, uxtw #2     // encoding: [0x71,0x4a,0x37,0x0b]
+// CHECK: add      w29, w23, w17, uxtx #1     // encoding: [0xfd,0x66,0x31,0x0b]
+
+        // Sub
+        sub x2, x4, w5, uxtb #2
+        sub x20, sp, w19, uxth #4
+        sub x12, x1, w20, uxtw
+        sub x20, x3, x13, uxtx #0
+        sub x17, x25, w20, sxtb
+        sub x18, x13, w19, sxth
+        sub sp, x2, w3, sxtw
+        sub x3, x5, x9, sxtx
+// CHECK: sub      x2, x4, w5, uxtb #2        // encoding: [0x82,0x08,0x25,0xcb]
+// CHECK: sub      x20, sp, w19, uxth #4      // encoding: [0xf4,0x33,0x33,0xcb]
+// CHECK: sub      x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0xcb]
+// CHECK: sub      x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0xcb]
+// CHECK: sub      x17, x25, w20, sxtb        // encoding: [0x31,0x83,0x34,0xcb]
+// CHECK: sub      x18, x13, w19, sxth        // encoding: [0xb2,0xa1,0x33,0xcb]
+// CHECK: sub      sp, x2, w3, sxtw           // encoding: [0x5f,0xc0,0x23,0xcb]
+// CHECK: sub      x3, x5, x9, sxtx           // encoding: [0xa3,0xe0,0x29,0xcb]
+
+        sub w2, w5, w7, uxtb
+        sub w21, w15, w17, uxth
+        sub w30, w29, wzr, uxtw
+        sub w19, w17, w1, uxtx  // Goodness knows what this means
+        sub w2, w5, w1, sxtb
+        sub w26, wsp, w19, sxth
+        sub wsp, w2, w3, sxtw
+        sub w2, w3, w5, sxtx
+// CHECK: sub      w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x4b]
+// CHECK: sub      w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x4b]
+// CHECK: sub      w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x4b]
+// CHECK: sub      w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x4b]
+// CHECK: sub      w2, w5, w1, sxtb           // encoding: [0xa2,0x80,0x21,0x4b]
+// CHECK: sub      w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x4b]
+// CHECK: sub      wsp, w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x4b]
+// CHECK: sub      w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x4b]
+
+        // Adds
+        adds x2, x4, w5, uxtb #2
+        adds x20, sp, w19, uxth #4
+        adds x12, x1, w20, uxtw
+        adds x20, x3, x13, uxtx #0
+        adds xzr, x25, w20, sxtb #3
+        adds x18, sp, w19, sxth
+        adds xzr, x2, w3, sxtw
+        adds x3, x5, x9, sxtx #2
+// CHECK: adds     x2, x4, w5, uxtb #2        // encoding: [0x82,0x08,0x25,0xab]
+// CHECK: adds     x20, sp, w19, uxth #4      // encoding: [0xf4,0x33,0x33,0xab]
+// CHECK: adds     x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0xab]
+// CHECK: adds     x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0xab]
+// CHECK: adds     xzr, x25, w20, sxtb #3     // encoding: [0x3f,0x8f,0x34,0xab]
+// CHECK: adds     x18, sp, w19, sxth         // encoding: [0xf2,0xa3,0x33,0xab]
+// CHECK: adds     xzr, x2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0xab]
+// CHECK: adds     x3, x5, x9, sxtx #2        // encoding: [0xa3,0xe8,0x29,0xab]
+
+        adds w2, w5, w7, uxtb
+        adds w21, w15, w17, uxth
+        adds w30, w29, wzr, uxtw
+        adds w19, w17, w1, uxtx  // Goodness knows what this means
+        adds w2, w5, w1, sxtb #1
+        adds w26, wsp, w19, sxth
+        adds wzr, w2, w3, sxtw
+        adds w2, w3, w5, sxtx
+// CHECK: adds     w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x2b]
+// CHECK: adds     w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x2b]
+// CHECK: adds     w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x2b]
+// CHECK: adds     w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x2b]
+// CHECK: adds     w2, w5, w1, sxtb #1        // encoding: [0xa2,0x84,0x21,0x2b]
+// CHECK: adds     w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x2b]
+// CHECK: adds     wzr, w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x2b]
+// CHECK: adds     w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x2b]
+
+        // subs
+        subs x2, x4, w5, uxtb #2
+        subs x20, sp, w19, uxth #4
+        subs x12, x1, w20, uxtw
+        subs x20, x3, x13, uxtx #0
+        subs xzr, x25, w20, sxtb #3
+        subs x18, sp, w19, sxth
+        subs xzr, x2, w3, sxtw
+        subs x3, x5, x9, sxtx #2
+// CHECK: subs     x2, x4, w5, uxtb #2        // encoding: [0x82,0x08,0x25,0xeb]
+// CHECK: subs     x20, sp, w19, uxth #4      // encoding: [0xf4,0x33,0x33,0xeb]
+// CHECK: subs     x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0xeb]
+// CHECK: subs     x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0xeb]
+// CHECK: subs     xzr, x25, w20, sxtb #3     // encoding: [0x3f,0x8f,0x34,0xeb]
+// CHECK: subs     x18, sp, w19, sxth         // encoding: [0xf2,0xa3,0x33,0xeb]
+// CHECK: subs     xzr, x2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0xeb]
+// CHECK: subs     x3, x5, x9, sxtx #2        // encoding: [0xa3,0xe8,0x29,0xeb]
+
+        subs w2, w5, w7, uxtb
+        subs w21, w15, w17, uxth
+        subs w30, w29, wzr, uxtw
+        subs w19, w17, w1, uxtx  // Goodness knows what this means
+        subs w2, w5, w1, sxtb #1
+        subs w26, wsp, w19, sxth
+        subs wzr, w2, w3, sxtw
+        subs w2, w3, w5, sxtx
+// CHECK: subs     w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x6b]
+// CHECK: subs     w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x6b]
+// CHECK: subs     w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x6b]
+// CHECK: subs     w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x6b]
+// CHECK: subs     w2, w5, w1, sxtb #1        // encoding: [0xa2,0x84,0x21,0x6b]
+// CHECK: subs     w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x6b]
+// CHECK: subs     wzr, w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x6b]
+// CHECK: subs     w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x6b]
+
+        // cmp
+        cmp x4, w5, uxtb #2
+        cmp sp, w19, uxth #4
+        cmp x1, w20, uxtw
+        cmp x3, x13, uxtx #0
+        cmp x25, w20, sxtb #3
+        cmp sp, w19, sxth
+        cmp x2, w3, sxtw
+        cmp x5, x9, sxtx #2
+// CHECK: cmp      x4, w5, uxtb #2            // encoding: [0x9f,0x08,0x25,0xeb]
+// CHECK: cmp      sp, w19, uxth #4           // encoding: [0xff,0x33,0x33,0xeb]
+// CHECK: cmp      x1, w20, uxtw              // encoding: [0x3f,0x40,0x34,0xeb]
+// CHECK: cmp      x3, x13, uxtx              // encoding: [0x7f,0x60,0x2d,0xeb]
+// CHECK: cmp      x25, w20, sxtb #3          // encoding: [0x3f,0x8f,0x34,0xeb]
+// CHECK: cmp      sp, w19, sxth              // encoding: [0xff,0xa3,0x33,0xeb]
+// CHECK: cmp      x2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0xeb]
+// CHECK: cmp      x5, x9, sxtx #2            // encoding: [0xbf,0xe8,0x29,0xeb]
+
+        cmp w5, w7, uxtb
+        cmp w15, w17, uxth
+        cmp w29, wzr, uxtw
+        cmp w17, w1, uxtx  // Goodness knows what this means
+        cmp w5, w1, sxtb #1
+        cmp wsp, w19, sxth
+        cmp w2, w3, sxtw
+        cmp w3, w5, sxtx
+// CHECK: cmp      w5, w7, uxtb               // encoding: [0xbf,0x00,0x27,0x6b]
+// CHECK: cmp      w15, w17, uxth             // encoding: [0xff,0x21,0x31,0x6b]
+// CHECK: cmp      w29, wzr, uxtw             // encoding: [0xbf,0x43,0x3f,0x6b]
+// CHECK: cmp      w17, w1, uxtx              // encoding: [0x3f,0x62,0x21,0x6b]
+// CHECK: cmp      w5, w1, sxtb #1            // encoding: [0xbf,0x84,0x21,0x6b]
+// CHECK: cmp      wsp, w19, sxth             // encoding: [0xff,0xa3,0x33,0x6b]
+// CHECK: cmp      w2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0x6b]
+// CHECK: cmp      w3, w5, sxtx               // encoding: [0x7f,0xe0,0x25,0x6b]
+
+
+        // cmn
+        cmn x4, w5, uxtb #2
+        cmn sp, w19, uxth #4
+        cmn x1, w20, uxtw
+        cmn x3, x13, uxtx #0
+        cmn x25, w20, sxtb #3
+        cmn sp, w19, sxth
+        cmn x2, w3, sxtw
+        cmn x5, x9, sxtx #2
+// CHECK: cmn      x4, w5, uxtb #2            // encoding: [0x9f,0x08,0x25,0xab]
+// CHECK: cmn      sp, w19, uxth #4           // encoding: [0xff,0x33,0x33,0xab]
+// CHECK: cmn      x1, w20, uxtw              // encoding: [0x3f,0x40,0x34,0xab]
+// CHECK: cmn      x3, x13, uxtx              // encoding: [0x7f,0x60,0x2d,0xab]
+// CHECK: cmn      x25, w20, sxtb #3          // encoding: [0x3f,0x8f,0x34,0xab]
+// CHECK: cmn      sp, w19, sxth              // encoding: [0xff,0xa3,0x33,0xab]
+// CHECK: cmn      x2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0xab]
+// CHECK: cmn      x5, x9, sxtx #2            // encoding: [0xbf,0xe8,0x29,0xab]
+
+        cmn w5, w7, uxtb
+        cmn w15, w17, uxth
+        cmn w29, wzr, uxtw
+        cmn w17, w1, uxtx  // Goodness knows what this means
+        cmn w5, w1, sxtb #1
+        cmn wsp, w19, sxth
+        cmn w2, w3, sxtw
+        cmn w3, w5, sxtx
+// CHECK: cmn      w5, w7, uxtb               // encoding: [0xbf,0x00,0x27,0x2b]
+// CHECK: cmn      w15, w17, uxth             // encoding: [0xff,0x21,0x31,0x2b]
+// CHECK: cmn      w29, wzr, uxtw             // encoding: [0xbf,0x43,0x3f,0x2b]
+// CHECK: cmn      w17, w1, uxtx              // encoding: [0x3f,0x62,0x21,0x2b]
+// CHECK: cmn      w5, w1, sxtb #1            // encoding: [0xbf,0x84,0x21,0x2b]
+// CHECK: cmn      wsp, w19, sxth             // encoding: [0xff,0xa3,0x33,0x2b]
+// CHECK: cmn      w2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0x2b]
+// CHECK: cmn      w3, w5, sxtx               // encoding: [0x7f,0xe0,0x25,0x2b]
+
+        // operands for cmp
+        cmp x20, w29, uxtb #3
+        cmp x12, x13, uxtx #4
+        cmp wsp, w1, uxtb
+        cmn wsp, wzr, sxtw
+// CHECK: cmp      x20, w29, uxtb #3          // encoding: [0x9f,0x0e,0x3d,0xeb]
+// CHECK: cmp      x12, x13, uxtx #4          // encoding: [0x9f,0x71,0x2d,0xeb]
+// CHECK: cmp      wsp, w1, uxtb              // encoding: [0xff,0x03,0x21,0x6b]
+// CHECK: cmn      wsp, wzr, sxtw             // encoding: [0xff,0xc3,0x3f,0x2b]
+
+        // LSL variant if sp involved
+        sub sp, x3, x7, lsl #4
+        add w2, wsp, w3, lsl #1
+        cmp wsp, w9, lsl #0
+        adds wzr, wsp, w3, lsl #4
+        subs x3, sp, x9, lsl #2
+// CHECK: sub      sp, x3, x7, lsl #4         // encoding: [0x7f,0x70,0x27,0xcb]
+// CHECK: add      w2, wsp, w3, lsl #1        // encoding: [0xe2,0x47,0x23,0x0b]
+// CHECK: cmp      wsp, w9                    // encoding: [0xff,0x43,0x29,0x6b]
+// CHECK: adds     wzr, wsp, w3, lsl #4       // encoding: [0xff,0x53,0x23,0x2b]
+// CHECK: subs     x3, sp, x9, lsl #2         // encoding: [0xe3,0x6b,0x29,0xeb]
+
+//------------------------------------------------------------------------------
+// Add/sub (immediate)
+//------------------------------------------------------------------------------
+
+// Check basic immediate values: an unsigned 12-bit immediate, optionally
+// shifted left by 12 bits.
+        add w4, w5, #0x0
+        add w2, w3, #4095
+        add w30, w29, #1, lsl #12
+        add w13, w5, #4095, lsl #12
+        add x5, x7, #1638
+// CHECK: add      w4, w5, #0                 // encoding: [0xa4,0x00,0x00,0x11]
+// CHECK: add      w2, w3, #4095              // encoding: [0x62,0xfc,0x3f,0x11]
+// CHECK: add      w30, w29, #1, lsl #12      // encoding: [0xbe,0x07,0x40,0x11]
+// CHECK: add      w13, w5, #4095, lsl #12    // encoding: [0xad,0xfc,0x7f,0x11]
+// CHECK: add      x5, x7, #1638              // encoding: [0xe5,0x98,0x19,0x91]
+
+// All registers involved in the non-S variants have 31 encoding sp rather than zr
+        add w20, wsp, #801, lsl #0
+        add wsp, wsp, #1104
+        add wsp, w30, #4084
+// CHECK: add      w20, wsp, #801             // encoding: [0xf4,0x87,0x0c,0x11]
+// CHECK: add      wsp, wsp, #1104            // encoding: [0xff,0x43,0x11,0x11]
+// CHECK: add      wsp, w30, #4084            // encoding: [0xdf,0xd3,0x3f,0x11]
+
+// A few checks on the sanity of 64-bit versions
+        add x0, x24, #291
+        add x3, x24, #4095, lsl #12
+        add x8, sp, #1074
+        add sp, x29, #3816
+// CHECK: add      x0, x24, #291              // encoding: [0x00,0x8f,0x04,0x91]
+// CHECK: add      x3, x24, #4095, lsl #12    // encoding: [0x03,0xff,0x7f,0x91]
+// CHECK: add      x8, sp, #1074              // encoding: [0xe8,0xcb,0x10,0x91]
+// CHECK: add      sp, x29, #3816             // encoding: [0xbf,0xa3,0x3b,0x91]
+
+// And on sub
+        sub w0, wsp, #4077
+        sub w4, w20, #546, lsl #12
+        sub sp, sp, #288
+        sub wsp, w19, #16
+// CHECK: sub      w0, wsp, #4077             // encoding: [0xe0,0xb7,0x3f,0x51]
+// CHECK: sub      w4, w20, #546, lsl #12     // encoding: [0x84,0x8a,0x48,0x51]
+// CHECK: sub      sp, sp, #288               // encoding: [0xff,0x83,0x04,0xd1]
+// CHECK: sub      wsp, w19, #16              // encoding: [0x7f,0x42,0x00,0x51]
+
+// ADDS/SUBS accept zr in the Rd position but sp in the Rn position
+        adds w13, w23, #291, lsl #12
+        adds wzr, w2, #4095                  // FIXME: canonically should be cmn
+        adds w20, wsp, #0x0
+        adds xzr, x3, #0x1, lsl #12          // FIXME: canonically should be cmn
+// CHECK: adds     w13, w23, #291, lsl #12    // encoding: [0xed,0x8e,0x44,0x31]
+// CHECK: adds     wzr, w2, #4095             // encoding: [0x5f,0xfc,0x3f,0x31]
+// CHECK: adds     w20, wsp, #0               // encoding: [0xf4,0x03,0x00,0x31]
+// CHECK: adds     xzr, x3, #1, lsl #12       // encoding: [0x7f,0x04,0x40,0xb1]
+
+// Checks for subs
+        subs xzr, sp, #20, lsl #12           // FIXME: canonically should be cmp
+        subs xzr, x30, #4095, lsl #0         // FIXME: canonically should be cmp
+        subs x4, sp, #3822
+// CHECK: subs     xzr, sp, #20, lsl #12      // encoding: [0xff,0x53,0x40,0xf1]
+// CHECK: subs     xzr, x30, #4095            // encoding: [0xdf,0xff,0x3f,0xf1]
+// CHECK: subs     x4, sp, #3822              // encoding: [0xe4,0xbb,0x3b,0xf1]
+
+// cmn is an alias for adds zr, ...
+        cmn w3, #291, lsl #12
+        cmn wsp, #1365, lsl #0
+        cmn sp, #1092, lsl #12
+// CHECK: cmn      w3, #291, lsl #12          // encoding: [0x7f,0x8c,0x44,0x31]
+// CHECK: cmn      wsp, #1365                 // encoding: [0xff,0x57,0x15,0x31]
+// CHECK: cmn      sp, #1092, lsl #12         // encoding: [0xff,0x13,0x51,0xb1]
+
+// cmp is an alias for subs zr, ... (FIXME: should always disassemble as such too).
+        cmp x4, #300, lsl #12
+        cmp wsp, #500
+        cmp sp, #200, lsl #0
+// CHECK: cmp      x4, #300, lsl #12          // encoding: [0x9f,0xb0,0x44,0xf1]
+// CHECK: cmp      wsp, #500                  // encoding: [0xff,0xd3,0x07,0x71]
+// CHECK: cmp      sp, #200                   // encoding: [0xff,0x23,0x03,0xf1]
+
+// A "MOV" involving sp is encoded in this manner: add Reg, Reg, #0
+        mov sp, x30
+        mov wsp, w20
+        mov x11, sp
+        mov w24, wsp
+// CHECK: mov      sp, x30                    // encoding: [0xdf,0x03,0x00,0x91]
+// CHECK: mov      wsp, w20                   // encoding: [0x9f,0x02,0x00,0x11]
+// CHECK: mov      x11, sp                    // encoding: [0xeb,0x03,0x00,0x91]
+// CHECK: mov      w24, wsp                   // encoding: [0xf8,0x03,0x00,0x11]
+
+// A relocation check (default to lo12, which is the only sane relocation anyway really)
+        add x0, x4, #:lo12:var
+// CHECK: add     x0, x4, #:lo12:var         // encoding: [0x80'A',A,A,0x91'A']
+// CHECK:                                    //   fixup A - offset: 0, value: :lo12:var, kind: fixup_a64_add_lo12
+
+//------------------------------------------------------------------------------
+// Add-sub (shifted register)
+//------------------------------------------------------------------------------
+
+// As usual, we don't print the canonical forms of many instructions.
+
+        add w3, w5, w7
+        add wzr, w3, w5
+        add w20, wzr, w4
+        add w4, w6, wzr
+// CHECK: add      w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x0b]
+// CHECK: add      wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x0b]
+// CHECK: add      w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x0b]
+// CHECK: add      w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x0b]
+
+        add w11, w13, w15, lsl #0
+        add w9, w3, wzr, lsl #10
+        add w17, w29, w20, lsl #31
+// CHECK: add      w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x0b]
+// CHECK: add      w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x0b]
+// CHECK: add      w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x0b]
+
+        add w21, w22, w23, lsr #0
+        add w24, w25, w26, lsr #18
+        add w27, w28, w29, lsr #31
+// CHECK: add      w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x0b]
+// CHECK: add      w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x0b]
+// CHECK: add      w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x0b]
+
+        add w2, w3, w4, asr #0
+        add w5, w6, w7, asr #21
+        add w8, w9, w10, asr #31
+// CHECK: add      w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x0b]
+// CHECK: add      w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x0b]
+// CHECK: add      w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x0b]
+
+        add x3, x5, x7
+        add xzr, x3, x5
+        add x20, xzr, x4
+        add x4, x6, xzr
+// CHECK: add      x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0x8b]
+// CHECK: add      xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0x8b]
+// CHECK: add      x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0x8b]
+// CHECK: add      x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0x8b]
+
+        add x11, x13, x15, lsl #0
+        add x9, x3, xzr, lsl #10
+        add x17, x29, x20, lsl #63
+// CHECK: add      x11, x13, x15              // encoding: [0xab,0x01,0x0f,0x8b]
+// CHECK: add      x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x8b]
+// CHECK: add      x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0x8b]
+
+        add x21, x22, x23, lsr #0
+        add x24, x25, x26, lsr #18
+        add x27, x28, x29, lsr #63
+// CHECK: add      x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0x8b]
+// CHECK: add      x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x8b]
+// CHECK: add      x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0x8b]
+
+        add x2, x3, x4, asr #0
+        add x5, x6, x7, asr #21
+        add x8, x9, x10, asr #63
+// CHECK: add      x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0x8b]
+// CHECK: add      x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0x8b]
+// CHECK: add      x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0x8b]
+
+        adds w3, w5, w7
+        adds wzr, w3, w5
+        adds w20, wzr, w4
+        adds w4, w6, wzr
+// CHECK: adds     w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x2b]
+// CHECK: adds     wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x2b]
+// CHECK: adds     w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x2b]
+// CHECK: adds     w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x2b]
+
+        adds w11, w13, w15, lsl #0
+        adds w9, w3, wzr, lsl #10
+        adds w17, w29, w20, lsl #31
+// CHECK: adds     w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x2b]
+// CHECK: adds     w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x2b]
+// CHECK: adds     w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x2b]
+
+        adds w21, w22, w23, lsr #0
+        adds w24, w25, w26, lsr #18
+        adds w27, w28, w29, lsr #31
+// CHECK: adds     w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x2b]
+// CHECK: adds     w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x2b]
+// CHECK: adds     w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x2b]
+
+        adds w2, w3, w4, asr #0
+        adds w5, w6, w7, asr #21
+        adds w8, w9, w10, asr #31
+// CHECK: adds     w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x2b]
+// CHECK: adds     w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x2b]
+// CHECK: adds     w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x2b]
+
+        adds x3, x5, x7
+        adds xzr, x3, x5
+        adds x20, xzr, x4
+        adds x4, x6, xzr
+// CHECK: adds     x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xab]
+// CHECK: adds     xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xab]
+// CHECK: adds     x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xab]
+// CHECK: adds     x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xab]
+
+        adds x11, x13, x15, lsl #0
+        adds x9, x3, xzr, lsl #10
+        adds x17, x29, x20, lsl #63
+// CHECK: adds     x11, x13, x15              // encoding: [0xab,0x01,0x0f,0xab]
+// CHECK: adds     x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0xab]
+// CHECK: adds     x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0xab]
+
+        adds x21, x22, x23, lsr #0
+        adds x24, x25, x26, lsr #18
+        adds x27, x28, x29, lsr #63
+// CHECK: adds     x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0xab]
+// CHECK: adds     x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0xab]
+// CHECK: adds     x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0xab]
+
+        adds x2, x3, x4, asr #0
+        adds x5, x6, x7, asr #21
+        adds x8, x9, x10, asr #63
+// CHECK: adds     x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0xab]
+// CHECK: adds     x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0xab]
+// CHECK: adds     x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0xab]
+
+        sub w3, w5, w7
+        sub wzr, w3, w5
+        sub w20, wzr, w4
+        sub w4, w6, wzr
+// CHECK: sub      w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x4b]
+// CHECK: sub      wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x4b]
+// CHECK: sub      w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x4b]
+// CHECK: sub      w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x4b]
+
+        sub w11, w13, w15, lsl #0
+        sub w9, w3, wzr, lsl #10
+        sub w17, w29, w20, lsl #31
+// CHECK: sub      w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x4b]
+// CHECK: sub      w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x4b]
+// CHECK: sub      w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x4b]
+
+        sub w21, w22, w23, lsr #0
+        sub w24, w25, w26, lsr #18
+        sub w27, w28, w29, lsr #31
+// CHECK: sub      w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x4b]
+// CHECK: sub      w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x4b]
+// CHECK: sub      w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x4b]
+
+        sub w2, w3, w4, asr #0
+        sub w5, w6, w7, asr #21
+        sub w8, w9, w10, asr #31
+// CHECK: sub      w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x4b]
+// CHECK: sub      w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x4b]
+// CHECK: sub      w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x4b]
+
+        sub x3, x5, x7
+        sub xzr, x3, x5
+        sub x20, xzr, x4
+        sub x4, x6, xzr
+// CHECK: sub      x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xcb]
+// CHECK: sub      xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xcb]
+// CHECK: sub      x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xcb]
+// CHECK: sub      x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xcb]
+
+        sub x11, x13, x15, lsl #0
+        sub x9, x3, xzr, lsl #10
+        sub x17, x29, x20, lsl #63
+// CHECK: sub      x11, x13, x15              // encoding: [0xab,0x01,0x0f,0xcb]
+// CHECK: sub      x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0xcb]
+// CHECK: sub      x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0xcb]
+
+        sub x21, x22, x23, lsr #0
+        sub x24, x25, x26, lsr #18
+        sub x27, x28, x29, lsr #63
+// CHECK: sub      x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0xcb]
+// CHECK: sub      x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0xcb]
+// CHECK: sub      x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0xcb]
+
+        sub x2, x3, x4, asr #0
+        sub x5, x6, x7, asr #21
+        sub x8, x9, x10, asr #63
+// CHECK: sub      x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0xcb]
+// CHECK: sub      x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0xcb]
+// CHECK: sub      x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0xcb]
+
+        subs w3, w5, w7
+        subs wzr, w3, w5
+        subs w20, wzr, w4
+        subs w4, w6, wzr
+// CHECK: subs     w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x6b]
+// CHECK: subs     wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x6b]
+// CHECK: subs     w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x6b]
+// CHECK: subs     w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x6b]
+
+        subs w11, w13, w15, lsl #0
+        subs w9, w3, wzr, lsl #10
+        subs w17, w29, w20, lsl #31
+// CHECK: subs     w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x6b]
+// CHECK: subs     w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x6b]
+// CHECK: subs     w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x6b]
+
+        subs w21, w22, w23, lsr #0
+        subs w24, w25, w26, lsr #18
+        subs w27, w28, w29, lsr #31
+// CHECK: subs     w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x6b]
+// CHECK: subs     w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x6b]
+// CHECK: subs     w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x6b]
+
+        subs w2, w3, w4, asr #0
+        subs w5, w6, w7, asr #21
+        subs w8, w9, w10, asr #31
+// CHECK: subs     w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x6b]
+// CHECK: subs     w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x6b]
+// CHECK: subs     w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x6b]
+
+        subs x3, x5, x7
+        subs xzr, x3, x5
+        subs x20, xzr, x4
+        subs x4, x6, xzr
+// CHECK: subs     x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xeb]
+// CHECK: subs     xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xeb]
+// CHECK: subs     x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xeb]
+// CHECK: subs     x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xeb]
+
+        subs x11, x13, x15, lsl #0
+        subs x9, x3, xzr, lsl #10
+        subs x17, x29, x20, lsl #63
+// CHECK: subs     x11, x13, x15              // encoding: [0xab,0x01,0x0f,0xeb]
+// CHECK: subs     x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0xeb]
+// CHECK: subs     x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0xeb]
+
+        subs x21, x22, x23, lsr #0
+        subs x24, x25, x26, lsr #18
+        subs x27, x28, x29, lsr #63
+// CHECK: subs     x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0xeb]
+// CHECK: subs     x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0xeb]
+// CHECK: subs     x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0xeb]
+
+        subs x2, x3, x4, asr #0
+        subs x5, x6, x7, asr #21
+        subs x8, x9, x10, asr #63
+// CHECK: subs     x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0xeb]
+// CHECK: subs     x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0xeb]
+// CHECK: subs     x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0xeb]
+
+        cmn w0, w3
+        cmn wzr, w4
+        cmn w5, wzr
+// CHECK: cmn      w0, w3                     // encoding: [0x1f,0x00,0x03,0x2b]
+// CHECK: cmn      wzr, w4                    // encoding: [0xff,0x03,0x04,0x2b]
+// CHECK: cmn      w5, wzr                    // encoding: [0xbf,0x00,0x1f,0x2b]
+
+        cmn w6, w7, lsl #0
+        cmn w8, w9, lsl #15
+        cmn w10, w11, lsl #31
+// CHECK: cmn      w6, w7                     // encoding: [0xdf,0x00,0x07,0x2b]
+// CHECK: cmn      w8, w9, lsl #15            // encoding: [0x1f,0x3d,0x09,0x2b]
+// CHECK: cmn      w10, w11, lsl #31          // encoding: [0x5f,0x7d,0x0b,0x2b]
+
+        cmn w12, w13, lsr #0
+        cmn w14, w15, lsr #21
+        cmn w16, w17, lsr #31
+// CHECK: cmn      w12, w13, lsr #0           // encoding: [0x9f,0x01,0x4d,0x2b]
+// CHECK: cmn      w14, w15, lsr #21          // encoding: [0xdf,0x55,0x4f,0x2b]
+// CHECK: cmn      w16, w17, lsr #31          // encoding: [0x1f,0x7e,0x51,0x2b]
+
+        cmn w18, w19, asr #0
+        cmn w20, w21, asr #22
+        cmn w22, w23, asr #31
+// CHECK: cmn      w18, w19, asr #0           // encoding: [0x5f,0x02,0x93,0x2b]
+// CHECK: cmn      w20, w21, asr #22          // encoding: [0x9f,0x5a,0x95,0x2b]
+// CHECK: cmn      w22, w23, asr #31          // encoding: [0xdf,0x7e,0x97,0x2b]
+
+        cmn x0, x3
+        cmn xzr, x4
+        cmn x5, xzr
+// CHECK: cmn      x0, x3                     // encoding: [0x1f,0x00,0x03,0xab]
+// CHECK: cmn      xzr, x4                    // encoding: [0xff,0x03,0x04,0xab]
+// CHECK: cmn      x5, xzr                    // encoding: [0xbf,0x00,0x1f,0xab]
+
+        cmn x6, x7, lsl #0
+        cmn x8, x9, lsl #15
+        cmn x10, x11, lsl #63
+// CHECK: cmn      x6, x7                     // encoding: [0xdf,0x00,0x07,0xab]
+// CHECK: cmn      x8, x9, lsl #15            // encoding: [0x1f,0x3d,0x09,0xab]
+// CHECK: cmn      x10, x11, lsl #63          // encoding: [0x5f,0xfd,0x0b,0xab]
+
+        cmn x12, x13, lsr #0
+        cmn x14, x15, lsr #41
+        cmn x16, x17, lsr #63
+// CHECK: cmn      x12, x13, lsr #0           // encoding: [0x9f,0x01,0x4d,0xab]
+// CHECK: cmn      x14, x15, lsr #41          // encoding: [0xdf,0xa5,0x4f,0xab]
+// CHECK: cmn      x16, x17, lsr #63          // encoding: [0x1f,0xfe,0x51,0xab]
+
+        cmn x18, x19, asr #0
+        cmn x20, x21, asr #55
+        cmn x22, x23, asr #63
+// CHECK: cmn      x18, x19, asr #0           // encoding: [0x5f,0x02,0x93,0xab]
+// CHECK: cmn      x20, x21, asr #55          // encoding: [0x9f,0xde,0x95,0xab]
+// CHECK: cmn      x22, x23, asr #63          // encoding: [0xdf,0xfe,0x97,0xab]
+
+        cmp w0, w3
+        cmp wzr, w4
+        cmp w5, wzr
+// CHECK: cmp      w0, w3                     // encoding: [0x1f,0x00,0x03,0x6b]
+// CHECK: cmp      wzr, w4                    // encoding: [0xff,0x03,0x04,0x6b]
+// CHECK: cmp      w5, wzr                    // encoding: [0xbf,0x00,0x1f,0x6b]
+
+        cmp w6, w7, lsl #0
+        cmp w8, w9, lsl #15
+        cmp w10, w11, lsl #31
+// CHECK: cmp      w6, w7                     // encoding: [0xdf,0x00,0x07,0x6b]
+// CHECK: cmp      w8, w9, lsl #15            // encoding: [0x1f,0x3d,0x09,0x6b]
+// CHECK: cmp      w10, w11, lsl #31          // encoding: [0x5f,0x7d,0x0b,0x6b]
+
+        cmp w12, w13, lsr #0
+        cmp w14, w15, lsr #21
+        cmp w16, w17, lsr #31
+// CHECK: cmp      w12, w13, lsr #0           // encoding: [0x9f,0x01,0x4d,0x6b]
+// CHECK: cmp      w14, w15, lsr #21          // encoding: [0xdf,0x55,0x4f,0x6b]
+// CHECK: cmp      w16, w17, lsr #31          // encoding: [0x1f,0x7e,0x51,0x6b]
+
+        cmp w18, w19, asr #0
+        cmp w20, w21, asr #22
+        cmp w22, w23, asr #31
+// CHECK: cmp      w18, w19, asr #0           // encoding: [0x5f,0x02,0x93,0x6b]
+// CHECK: cmp      w20, w21, asr #22          // encoding: [0x9f,0x5a,0x95,0x6b]
+// CHECK: cmp      w22, w23, asr #31          // encoding: [0xdf,0x7e,0x97,0x6b]
+
+        cmp x0, x3
+        cmp xzr, x4
+        cmp x5, xzr
+// CHECK: cmp      x0, x3                     // encoding: [0x1f,0x00,0x03,0xeb]
+// CHECK: cmp      xzr, x4                    // encoding: [0xff,0x03,0x04,0xeb]
+// CHECK: cmp      x5, xzr                    // encoding: [0xbf,0x00,0x1f,0xeb]
+
+        cmp x6, x7, lsl #0
+        cmp x8, x9, lsl #15
+        cmp x10, x11, lsl #63
+// CHECK: cmp      x6, x7                     // encoding: [0xdf,0x00,0x07,0xeb]
+// CHECK: cmp      x8, x9, lsl #15            // encoding: [0x1f,0x3d,0x09,0xeb]
+// CHECK: cmp      x10, x11, lsl #63          // encoding: [0x5f,0xfd,0x0b,0xeb]
+
+        cmp x12, x13, lsr #0
+        cmp x14, x15, lsr #41
+        cmp x16, x17, lsr #63
+// CHECK: cmp      x12, x13, lsr #0           // encoding: [0x9f,0x01,0x4d,0xeb]
+// CHECK: cmp      x14, x15, lsr #41          // encoding: [0xdf,0xa5,0x4f,0xeb]
+// CHECK: cmp      x16, x17, lsr #63          // encoding: [0x1f,0xfe,0x51,0xeb]
+
+        cmp x18, x19, asr #0
+        cmp x20, x21, asr #55
+        cmp x22, x23, asr #63
+// CHECK: cmp      x18, x19, asr #0           // encoding: [0x5f,0x02,0x93,0xeb]
+// CHECK: cmp      x20, x21, asr #55          // encoding: [0x9f,0xde,0x95,0xeb]
+// CHECK: cmp      x22, x23, asr #63          // encoding: [0xdf,0xfe,0x97,0xeb]
+
+        neg w29, w30
+        neg w30, wzr
+        neg wzr, w0
+// CHECK: sub      w29, wzr, w30              // encoding: [0xfd,0x03,0x1e,0x4b]
+// CHECK: sub      w30, wzr, wzr              // encoding: [0xfe,0x03,0x1f,0x4b]
+// CHECK: sub      wzr, wzr, w0                    // encoding: [0xff,0x03,0x00,0x4b]
+
+        neg w28, w27, lsl #0
+        neg w26, w25, lsl #29
+        neg w24, w23, lsl #31
+// CHECK: sub      w28, wzr, w27              // encoding: [0xfc,0x03,0x1b,0x4b]
+// CHECK: sub      w26, wzr, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x4b]
+// CHECK: sub      w24, wzr, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x4b]
+
+        neg w22, w21, lsr #0
+        neg w20, w19, lsr #1
+        neg w18, w17, lsr #31
+// CHECK: sub      w22, wzr, w21, lsr #0      // encoding: [0xf6,0x03,0x55,0x4b]
+// CHECK: sub      w20, wzr, w19, lsr #1      // encoding: [0xf4,0x07,0x53,0x4b]
+// CHECK: sub      w18, wzr, w17, lsr #31     // encoding: [0xf2,0x7f,0x51,0x4b]
+
+        neg w16, w15, asr #0
+        neg w14, w13, asr #12
+        neg w12, w11, asr #31
+// CHECK: sub      w16, wzr, w15, asr #0      // encoding: [0xf0,0x03,0x8f,0x4b]
+// CHECK: sub      w14, wzr, w13, asr #12     // encoding: [0xee,0x33,0x8d,0x4b]
+// CHECK: sub      w12, wzr, w11, asr #31     // encoding: [0xec,0x7f,0x8b,0x4b]
+
+        neg x29, x30
+        neg x30, xzr
+        neg xzr, x0
+// CHECK: sub      x29, xzr, x30              // encoding: [0xfd,0x03,0x1e,0xcb]
+// CHECK: sub      x30, xzr, xzr              // encoding: [0xfe,0x03,0x1f,0xcb]
+// CHECK: sub      xzr, xzr, x0               // encoding: [0xff,0x03,0x00,0xcb]
+
+        neg x28, x27, lsl #0
+        neg x26, x25, lsl #29
+        neg x24, x23, lsl #31
+// CHECK: sub      x28, xzr, x27              // encoding: [0xfc,0x03,0x1b,0xcb]
+// CHECK: sub      x26, xzr, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xcb]
+// CHECK: sub      x24, xzr, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xcb]
+
+        neg x22, x21, lsr #0
+        neg x20, x19, lsr #1
+        neg x18, x17, lsr #31
+// CHECK: sub      x22, xzr, x21, lsr #0      // encoding: [0xf6,0x03,0x55,0xcb]
+// CHECK: sub      x20, xzr, x19, lsr #1      // encoding: [0xf4,0x07,0x53,0xcb]
+// CHECK: sub      x18, xzr, x17, lsr #31     // encoding: [0xf2,0x7f,0x51,0xcb]
+
+        neg x16, x15, asr #0
+        neg x14, x13, asr #12
+        neg x12, x11, asr #31
+// CHECK: sub      x16, xzr, x15, asr #0      // encoding: [0xf0,0x03,0x8f,0xcb]
+// CHECK: sub      x14, xzr, x13, asr #12     // encoding: [0xee,0x33,0x8d,0xcb]
+// CHECK: sub      x12, xzr, x11, asr #31     // encoding: [0xec,0x7f,0x8b,0xcb]
+
+        negs w29, w30
+        negs w30, wzr
+        negs wzr, w0
+// CHECK: subs     w29, wzr, w30              // encoding: [0xfd,0x03,0x1e,0x6b]
+// CHECK: subs     w30, wzr, wzr              // encoding: [0xfe,0x03,0x1f,0x6b]
+// CHECK: subs     wzr, wzr, w0               // encoding: [0xff,0x03,0x00,0x6b]
+
+        negs w28, w27, lsl #0
+        negs w26, w25, lsl #29
+        negs w24, w23, lsl #31
+// CHECK: subs     w28, wzr, w27              // encoding: [0xfc,0x03,0x1b,0x6b]
+// CHECK: subs     w26, wzr, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x6b]
+// CHECK: subs     w24, wzr, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x6b]
+
+        negs w22, w21, lsr #0
+        negs w20, w19, lsr #1
+        negs w18, w17, lsr #31
+// CHECK: subs     w22, wzr, w21, lsr #0      // encoding: [0xf6,0x03,0x55,0x6b]
+// CHECK: subs     w20, wzr, w19, lsr #1      // encoding: [0xf4,0x07,0x53,0x6b]
+// CHECK: subs     w18, wzr, w17, lsr #31     // encoding: [0xf2,0x7f,0x51,0x6b]
+
+        negs w16, w15, asr #0
+        negs w14, w13, asr #12
+        negs w12, w11, asr #31
+// CHECK: subs     w16, wzr, w15, asr #0      // encoding: [0xf0,0x03,0x8f,0x6b]
+// CHECK: subs     w14, wzr, w13, asr #12     // encoding: [0xee,0x33,0x8d,0x6b]
+// CHECK: subs     w12, wzr, w11, asr #31     // encoding: [0xec,0x7f,0x8b,0x6b]
+
+        negs x29, x30
+        negs x30, xzr
+        negs xzr, x0
+// CHECK: subs     x29, xzr, x30              // encoding: [0xfd,0x03,0x1e,0xeb]
+// CHECK: subs     x30, xzr, xzr              // encoding: [0xfe,0x03,0x1f,0xeb]
+// CHECK: subs     xzr, xzr, x0               // encoding: [0xff,0x03,0x00,0xeb]
+
+        negs x28, x27, lsl #0
+        negs x26, x25, lsl #29
+        negs x24, x23, lsl #31
+// CHECK: subs     x28, xzr, x27              // encoding: [0xfc,0x03,0x1b,0xeb]
+// CHECK: subs     x26, xzr, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xeb]
+// CHECK: subs     x24, xzr, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xeb]
+
+        negs x22, x21, lsr #0
+        negs x20, x19, lsr #1
+        negs x18, x17, lsr #31
+// CHECK: subs     x22, xzr, x21, lsr #0      // encoding: [0xf6,0x03,0x55,0xeb]
+// CHECK: subs     x20, xzr, x19, lsr #1      // encoding: [0xf4,0x07,0x53,0xeb]
+// CHECK: subs     x18, xzr, x17, lsr #31     // encoding: [0xf2,0x7f,0x51,0xeb]
+
+        negs x16, x15, asr #0
+        negs x14, x13, asr #12
+        negs x12, x11, asr #31
+// CHECK: subs     x16, xzr, x15, asr #0      // encoding: [0xf0,0x03,0x8f,0xeb]
+// CHECK: subs     x14, xzr, x13, asr #12     // encoding: [0xee,0x33,0x8d,0xeb]
+// CHECK: subs     x12, xzr, x11, asr #31     // encoding: [0xec,0x7f,0x8b,0xeb]
+
+//------------------------------------------------------------------------------
+// Add-sub (shifted register)
+//------------------------------------------------------------------------------
+        adc w29, w27, w25
+        adc wzr, w3, w4
+        adc w9, wzr, w10
+        adc w20, w0, wzr
+// CHECK: adc      w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x1a]
+// CHECK: adc      wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x1a]
+// CHECK: adc      w9, wzr, w10               // encoding: [0xe9,0x03,0x0a,0x1a]
+// CHECK: adc      w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x1a]
+
+        adc x29, x27, x25
+        adc xzr, x3, x4
+        adc x9, xzr, x10
+        adc x20, x0, xzr
+// CHECK: adc      x29, x27, x25              // encoding: [0x7d,0x03,0x19,0x9a]
+// CHECK: adc      xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0x9a]
+// CHECK: adc      x9, xzr, x10               // encoding: [0xe9,0x03,0x0a,0x9a]
+// CHECK: adc      x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0x9a]
+
+        adcs w29, w27, w25
+        adcs wzr, w3, w4
+        adcs w9, wzr, w10
+        adcs w20, w0, wzr
+// CHECK: adcs     w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x3a]
+// CHECK: adcs     wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x3a]
+// CHECK: adcs     w9, wzr, w10               // encoding: [0xe9,0x03,0x0a,0x3a]
+// CHECK: adcs     w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x3a]
+
+        adcs x29, x27, x25
+        adcs xzr, x3, x4
+        adcs x9, xzr, x10
+        adcs x20, x0, xzr
+// CHECK: adcs     x29, x27, x25              // encoding: [0x7d,0x03,0x19,0xba]
+// CHECK: adcs     xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0xba]
+// CHECK: adcs     x9, xzr, x10               // encoding: [0xe9,0x03,0x0a,0xba]
+// CHECK: adcs     x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0xba]
+
+        sbc w29, w27, w25
+        sbc wzr, w3, w4
+        sbc w9, wzr, w10
+        sbc w20, w0, wzr
+// CHECK: sbc      w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x5a]
+// CHECK: sbc      wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x5a]
+// CHECK: ngc      w9, w10                    // encoding: [0xe9,0x03,0x0a,0x5a]
+// CHECK: sbc      w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x5a]
+
+        sbc x29, x27, x25
+        sbc xzr, x3, x4
+        sbc x9, xzr, x10
+        sbc x20, x0, xzr
+// CHECK: sbc      x29, x27, x25              // encoding: [0x7d,0x03,0x19,0xda]
+// CHECK: sbc      xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0xda]
+// CHECK: ngc      x9, x10                    // encoding: [0xe9,0x03,0x0a,0xda]
+// CHECK: sbc      x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0xda]
+
+        sbcs w29, w27, w25
+        sbcs wzr, w3, w4
+        sbcs w9, wzr, w10
+        sbcs w20, w0, wzr
+// CHECK: sbcs     w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x7a]
+// CHECK: sbcs     wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x7a]
+// CHECK: ngcs     w9, w10                    // encoding: [0xe9,0x03,0x0a,0x7a]
+// CHECK: sbcs     w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x7a]
+
+        sbcs x29, x27, x25
+        sbcs xzr, x3, x4
+        sbcs x9, xzr, x10
+        sbcs x20, x0, xzr
+// CHECK: sbcs     x29, x27, x25              // encoding: [0x7d,0x03,0x19,0xfa]
+// CHECK: sbcs     xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0xfa]
+// CHECK: ngcs     x9, x10                    // encoding: [0xe9,0x03,0x0a,0xfa]
+// CHECK: sbcs     x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0xfa]
+
+        ngc w3, w12
+        ngc wzr, w9
+        ngc w23, wzr
+// CHECK: ngc      w3, w12                    // encoding: [0xe3,0x03,0x0c,0x5a]
+// CHECK: ngc      wzr, w9                    // encoding: [0xff,0x03,0x09,0x5a]
+// CHECK: ngc      w23, wzr                   // encoding: [0xf7,0x03,0x1f,0x5a]
+
+        ngc x29, x30
+        ngc xzr, x0
+        ngc x0, xzr
+// CHECK: ngc      x29, x30                   // encoding: [0xfd,0x03,0x1e,0xda]
+// CHECK: ngc      xzr, x0                    // encoding: [0xff,0x03,0x00,0xda]
+// CHECK: ngc      x0, xzr                    // encoding: [0xe0,0x03,0x1f,0xda]
+
+        ngcs w3, w12
+        ngcs wzr, w9
+        ngcs w23, wzr
+// CHECK: ngcs     w3, w12                    // encoding: [0xe3,0x03,0x0c,0x7a]
+// CHECK: ngcs     wzr, w9                    // encoding: [0xff,0x03,0x09,0x7a]
+// CHECK: ngcs     w23, wzr                   // encoding: [0xf7,0x03,0x1f,0x7a]
+
+        ngcs x29, x30
+        ngcs xzr, x0
+        ngcs x0, xzr
+// CHECK: ngcs     x29, x30                   // encoding: [0xfd,0x03,0x1e,0xfa]
+// CHECK: ngcs     xzr, x0                    // encoding: [0xff,0x03,0x00,0xfa]
+// CHECK: ngcs     x0, xzr                    // encoding: [0xe0,0x03,0x1f,0xfa]
+
+//------------------------------------------------------------------------------
+// Bitfield
+//------------------------------------------------------------------------------
+
+        sbfm x1, x2, #3, #4
+        sbfm x3, x4, #63, #63
+        sbfm wzr, wzr, #31, #31
+        sbfm w12, w9, #0, #0
+// CHECK: sbfm     x1, x2, #3, #4             // encoding: [0x41,0x10,0x43,0x93]
+// CHECK: sbfm     x3, x4, #63, #63           // encoding: [0x83,0xfc,0x7f,0x93]
+// CHECK: sbfm     wzr, wzr, #31, #31         // encoding: [0xff,0x7f,0x1f,0x13]
+// CHECK: sbfm     w12, w9, #0, #0            // encoding: [0x2c,0x01,0x00,0x13]
+
+        ubfm x4, x5, #12, #10
+        ubfm xzr, x4, #0, #0
+        ubfm x4, xzr, #63, #5
+        ubfm x5, x6, #12, #63
+// CHECK: ubfm     x4, x5, #12, #10           // encoding: [0xa4,0x28,0x4c,0xd3]
+// CHECK: ubfm     xzr, x4, #0, #0            // encoding: [0x9f,0x00,0x40,0xd3]
+// CHECK: ubfm     x4, xzr, #63, #5            // encoding: [0xe4,0x17,0x7f,0xd3]
+// CHECK: ubfm     x5, x6, #12, #63           // encoding: [0xc5,0xfc,0x4c,0xd3]
+
+        bfm x4, x5, #12, #10
+        bfm xzr, x4, #0, #0
+        bfm x4, xzr, #63, #5
+        bfm x5, x6, #12, #63
+// CHECK: bfm      x4, x5, #12, #10           // encoding: [0xa4,0x28,0x4c,0xb3]
+// CHECK: bfm      xzr, x4, #0, #0            // encoding: [0x9f,0x00,0x40,0xb3]
+// CHECK: bfm      x4, xzr, #63, #5            // encoding: [0xe4,0x17,0x7f,0xb3]
+// CHECK: bfm      x5, x6, #12, #63           // encoding: [0xc5,0xfc,0x4c,0xb3]
+
+        sxtb w1, w2
+        sxtb xzr, w3
+        sxth w9, w10
+        sxth x0, w1
+        sxtw x3, w30
+// CHECK: sxtb     w1, w2                     // encoding: [0x41,0x1c,0x00,0x13]
+// CHECK: sxtb     xzr, w3                    // encoding: [0x7f,0x1c,0x40,0x93]
+// CHECK: sxth     w9, w10                    // encoding: [0x49,0x3d,0x00,0x13]
+// CHECK: sxth     x0, w1                     // encoding: [0x20,0x3c,0x40,0x93]
+// CHECK: sxtw     x3, w30                    // encoding: [0xc3,0x7f,0x40,0x93]
+
+        uxtb w1, w2
+        uxtb xzr, w3
+        uxth w9, w10
+        uxth x0, w1
+// CHECK: uxtb     w1, w2                     // encoding: [0x41,0x1c,0x00,0x53]
+// CHECK: uxtb     xzr, w3                    // encoding: [0x7f,0x1c,0x00,0x53]
+// CHECK: uxth     w9, w10                    // encoding: [0x49,0x3d,0x00,0x53]
+// CHECK: uxth     x0, w1                     // encoding: [0x20,0x3c,0x00,0x53]
+
+        asr w3, w2, #0
+        asr w9, w10, #31
+        asr x20, x21, #63
+        asr w1, wzr, #3
+// CHECK: asr      w3, w2, #0                 // encoding: [0x43,0x7c,0x00,0x13]
+// CHECK: asr      w9, w10, #31               // encoding: [0x49,0x7d,0x1f,0x13]
+// CHECK: asr      x20, x21, #63              // encoding: [0xb4,0xfe,0x7f,0x93]
+// CHECK: asr      w1, wzr, #3                // encoding: [0xe1,0x7f,0x03,0x13]
+
+        lsr w3, w2, #0
+        lsr w9, w10, #31
+        lsr x20, x21, #63
+        lsr wzr, wzr, #3
+// CHECK: lsr      w3, w2, #0                 // encoding: [0x43,0x7c,0x00,0x53]
+// CHECK: lsr      w9, w10, #31               // encoding: [0x49,0x7d,0x1f,0x53]
+// CHECK: lsr      x20, x21, #63              // encoding: [0xb4,0xfe,0x7f,0xd3]
+// CHECK: lsr      wzr, wzr, #3               // encoding: [0xff,0x7f,0x03,0x53]
+
+        lsl w3, w2, #0
+        lsl w9, w10, #31
+        lsl x20, x21, #63
+        lsl w1, wzr, #3
+// CHECK: lsl      w3, w2, #0                 // encoding: [0x43,0x7c,0x00,0x53]
+// CHECK: lsl      w9, w10, #31               // encoding: [0x49,0x01,0x01,0x53]
+// CHECK: lsl      x20, x21, #63              // encoding: [0xb4,0x02,0x41,0xd3]
+// CHECK: lsl      w1, wzr, #3                // encoding: [0xe1,0x73,0x1d,0x53]
+
+        sbfiz w9, w10, #0, #1
+        sbfiz x2, x3, #63, #1
+        sbfiz x19, x20, #0, #64
+        sbfiz x9, x10, #5, #59
+        sbfiz w9, w10, #0, #32
+        sbfiz w11, w12, #31, #1
+        sbfiz w13, w14, #29, #3
+        sbfiz xzr, xzr, #10, #11
+// CHECK: sbfiz    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x13]
+// CHECK: sbfiz    x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0x93]
+// CHECK: sbfiz    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0x93]
+// CHECK: sbfiz    x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0x93]
+// CHECK: sbfiz    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x13]
+// CHECK: sbfiz    w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x13]
+// CHECK: sbfiz    w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x13]
+// CHECK: sbfiz    xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0x93]
+
+        sbfx w9, w10, #0, #1
+        sbfx x2, x3, #63, #1
+        sbfx x19, x20, #0, #64
+        sbfx x9, x10, #5, #59
+        sbfx w9, w10, #0, #32
+        sbfx w11, w12, #31, #1
+        sbfx w13, w14, #29, #3
+        sbfx xzr, xzr, #10, #11
+// CHECK: sbfx     w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x13]
+// CHECK: sbfx     x2, x3, #63, #1            // encoding: [0x62,0xfc,0x7f,0x93]
+// CHECK: sbfx     x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0x93]
+// CHECK: sbfx     x9, x10, #5, #59           // encoding: [0x49,0xfd,0x45,0x93]
+// CHECK: sbfx     w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x13]
+// CHECK: sbfx     w11, w12, #31, #1          // encoding: [0x8b,0x7d,0x1f,0x13]
+// CHECK: sbfx     w13, w14, #29, #3          // encoding: [0xcd,0x7d,0x1d,0x13]
+// CHECK: sbfx     xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0x93]
+
+        bfi w9, w10, #0, #1
+        bfi x2, x3, #63, #1
+        bfi x19, x20, #0, #64
+        bfi x9, x10, #5, #59
+        bfi w9, w10, #0, #32
+        bfi w11, w12, #31, #1
+        bfi w13, w14, #29, #3
+        bfi xzr, xzr, #10, #11
+// CHECK: bfi      w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x33]
+// CHECK: bfi      x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0xb3]
+// CHECK: bfi      x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xb3]
+// CHECK: bfi      x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0xb3]
+// CHECK: bfi      w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x33]
+// CHECK: bfi      w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x33]
+// CHECK: bfi      w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x33]
+// CHECK: bfi      xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0xb3]
+
+        bfxil w9, w10, #0, #1
+        bfxil x2, x3, #63, #1
+        bfxil x19, x20, #0, #64
+        bfxil x9, x10, #5, #59
+        bfxil w9, w10, #0, #32
+        bfxil w11, w12, #31, #1
+        bfxil w13, w14, #29, #3
+        bfxil xzr, xzr, #10, #11
+// CHECK: bfxil    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x33]
+// CHECK: bfxil    x2, x3, #63, #1            // encoding: [0x62,0xfc,0x7f,0xb3]
+// CHECK: bfxil    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xb3]
+// CHECK: bfxil    x9, x10, #5, #59           // encoding: [0x49,0xfd,0x45,0xb3]
+// CHECK: bfxil    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x33]
+// CHECK: bfxil    w11, w12, #31, #1          // encoding: [0x8b,0x7d,0x1f,0x33]
+// CHECK: bfxil    w13, w14, #29, #3          // encoding: [0xcd,0x7d,0x1d,0x33]
+// CHECK: bfxil    xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0xb3]
+
+        ubfiz w9, w10, #0, #1
+        ubfiz x2, x3, #63, #1
+        ubfiz x19, x20, #0, #64
+        ubfiz x9, x10, #5, #59
+        ubfiz w9, w10, #0, #32
+        ubfiz w11, w12, #31, #1
+        ubfiz w13, w14, #29, #3
+        ubfiz xzr, xzr, #10, #11
+// CHECK: ubfiz    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x53]
+// CHECK: ubfiz    x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0xd3]
+// CHECK: ubfiz    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xd3]
+// CHECK: ubfiz    x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0xd3]
+// CHECK: ubfiz    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x53]
+// CHECK: ubfiz    w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x53]
+// CHECK: ubfiz    w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x53]
+// CHECK: ubfiz    xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0xd3]
+
+        ubfx w9, w10, #0, #1
+        ubfx x2, x3, #63, #1
+        ubfx x19, x20, #0, #64
+        ubfx x9, x10, #5, #59
+        ubfx w9, w10, #0, #32
+        ubfx w11, w12, #31, #1
+        ubfx w13, w14, #29, #3
+        ubfx xzr, xzr, #10, #11
+// CHECK: ubfx     w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x53]
+// CHECK: ubfx     x2, x3, #63, #1            // encoding: [0x62,0xfc,0x7f,0xd3]
+// CHECK: ubfx     x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xd3]
+// CHECK: ubfx     x9, x10, #5, #59           // encoding: [0x49,0xfd,0x45,0xd3]
+// CHECK: ubfx     w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x53]
+// CHECK: ubfx     w11, w12, #31, #1          // encoding: [0x8b,0x7d,0x1f,0x53]
+// CHECK: ubfx     w13, w14, #29, #3          // encoding: [0xcd,0x7d,0x1d,0x53]
+// CHECK: ubfx     xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0xd3]
+
+//------------------------------------------------------------------------------
+// Compare & branch (immediate)
+//------------------------------------------------------------------------------
+
+        cbz w5, lbl
+        cbz x5, lbl
+        cbnz x2, lbl
+        cbnz x26, lbl
+// CHECK: cbz      w5, lbl                // encoding: [0x05'A',A,A,0x34'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbz      x5, lbl                // encoding: [0x05'A',A,A,0xb4'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbnz     x2, lbl                // encoding: [0x02'A',A,A,0xb5'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbnz     x26, lbl               // encoding: [0x1a'A',A,A,0xb5'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+
+        cbz wzr, lbl
+        cbnz xzr, lbl
+// CHECK: cbz      wzr, lbl               // encoding: [0x1f'A',A,A,0x34'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbnz     xzr, lbl               // encoding: [0x1f'A',A,A,0xb5'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+
+        cbz w5, #0
+        cbnz x3, #-4
+        cbz w20, #1048572
+        cbnz xzr, #-1048576
+// CHECK: cbz     w5, #0                  // encoding: [0x05,0x00,0x00,0x34]
+// CHECK: cbnz    x3, #-4                 // encoding: [0xe3,0xff,0xff,0xb5]
+// CHECK: cbz     w20, #1048572           // encoding: [0xf4,0xff,0x7f,0x34]
+// CHECK: cbnz    xzr, #-1048576          // encoding: [0x1f,0x00,0x80,0xb5]
+
+//------------------------------------------------------------------------------
+// Conditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b.eq lbl
+        b.ne lbl
+        b.cs lbl
+        b.hs lbl
+        b.lo lbl
+        b.cc lbl
+        b.mi lbl
+        b.pl lbl
+        b.vs lbl
+        b.vc lbl
+        b.hi lbl
+        b.ls lbl
+        b.ge lbl
+        b.lt lbl
+        b.gt lbl
+        b.le lbl
+        b.al lbl
+// CHECK: b.eq lbl                        // encoding: [A,A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.ne lbl                        // encoding: [0x01'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.hs lbl                        // encoding: [0x02'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.hs lbl                        // encoding: [0x02'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.lo lbl                        // encoding: [0x03'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.lo lbl                        // encoding: [0x03'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.mi lbl                        // encoding: [0x04'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.pl lbl                        // encoding: [0x05'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.vs lbl                        // encoding: [0x06'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.vc lbl                        // encoding: [0x07'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.hi lbl                        // encoding: [0x08'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.ls lbl                        // encoding: [0x09'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.ge lbl                        // encoding: [0x0a'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.lt lbl                        // encoding: [0x0b'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.gt lbl                        // encoding: [0x0c'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.le lbl                        // encoding: [0x0d'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.al lbl                        // encoding: [0x0e'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+
+        b.eq #0
+        b.lt #-4
+        b.cc #1048572
+// CHECK: b.eq #0                         // encoding: [0x00,0x00,0x00,0x54]
+// CHECK: b.lt #-4                        // encoding: [0xeb,0xff,0xff,0x54]
+// CHECK: b.lo #1048572                   // encoding: [0xe3,0xff,0x7f,0x54]
+
+//------------------------------------------------------------------------------
+// Conditional compare (immediate)
+//------------------------------------------------------------------------------
+
+        ccmp w1, #31, #0, eq
+        ccmp w3, #0, #15, hs
+        ccmp wzr, #15, #13, cs
+// CHECK: ccmp    w1, #31, #0, eq         // encoding: [0x20,0x08,0x5f,0x7a]
+// CHECK: ccmp    w3, #0, #15, hs         // encoding: [0x6f,0x28,0x40,0x7a]
+// CHECK: ccmp    wzr, #15, #13, hs       // encoding: [0xed,0x2b,0x4f,0x7a]
+
+        ccmp x9, #31, #0, le
+        ccmp x3, #0, #15, gt
+        ccmp xzr, #5, #7, ne
+// CHECK: ccmp    x9, #31, #0, le         // encoding: [0x20,0xd9,0x5f,0xfa]
+// CHECK: ccmp    x3, #0, #15, gt         // encoding: [0x6f,0xc8,0x40,0xfa]
+// CHECK: ccmp    xzr, #5, #7, ne         // encoding: [0xe7,0x1b,0x45,0xfa]
+
+        ccmn w1, #31, #0, eq
+        ccmn w3, #0, #15, hs
+        ccmn wzr, #15, #13, cs
+// CHECK: ccmn    w1, #31, #0, eq         // encoding: [0x20,0x08,0x5f,0x3a]
+// CHECK: ccmn    w3, #0, #15, hs         // encoding: [0x6f,0x28,0x40,0x3a]
+// CHECK: ccmn    wzr, #15, #13, hs       // encoding: [0xed,0x2b,0x4f,0x3a]
+
+        ccmn x9, #31, #0, le
+        ccmn x3, #0, #15, gt
+        ccmn xzr, #5, #7, ne
+// CHECK: ccmn    x9, #31, #0, le         // encoding: [0x20,0xd9,0x5f,0xba]
+// CHECK: ccmn    x3, #0, #15, gt         // encoding: [0x6f,0xc8,0x40,0xba]
+// CHECK: ccmn    xzr, #5, #7, ne         // encoding: [0xe7,0x1b,0x45,0xba]
+
+//------------------------------------------------------------------------------
+// Conditional compare (register)
+//------------------------------------------------------------------------------
+
+        ccmp w1, wzr, #0, eq
+        ccmp w3, w0, #15, hs
+        ccmp wzr, w15, #13, cs
+// CHECK: ccmp    w1, wzr, #0, eq         // encoding: [0x20,0x00,0x5f,0x7a]
+// CHECK: ccmp    w3, w0, #15, hs         // encoding: [0x6f,0x20,0x40,0x7a]
+// CHECK: ccmp    wzr, w15, #13, hs       // encoding: [0xed,0x23,0x4f,0x7a]
+
+        ccmp x9, xzr, #0, le
+        ccmp x3, x0, #15, gt
+        ccmp xzr, x5, #7, ne
+// CHECK: ccmp    x9, xzr, #0, le         // encoding: [0x20,0xd1,0x5f,0xfa]
+// CHECK: ccmp    x3, x0, #15, gt         // encoding: [0x6f,0xc0,0x40,0xfa]
+// CHECK: ccmp    xzr, x5, #7, ne         // encoding: [0xe7,0x13,0x45,0xfa]
+
+        ccmn w1, wzr, #0, eq
+        ccmn w3, w0, #15, hs
+        ccmn wzr, w15, #13, cs
+// CHECK: ccmn    w1, wzr, #0, eq         // encoding: [0x20,0x00,0x5f,0x3a]
+// CHECK: ccmn    w3, w0, #15, hs         // encoding: [0x6f,0x20,0x40,0x3a]
+// CHECK: ccmn    wzr, w15, #13, hs       // encoding: [0xed,0x23,0x4f,0x3a]
+
+        ccmn x9, xzr, #0, le
+        ccmn x3, x0, #15, gt
+        ccmn xzr, x5, #7, ne
+// CHECK: ccmn    x9, xzr, #0, le         // encoding: [0x20,0xd1,0x5f,0xba]
+// CHECK: ccmn    x3, x0, #15, gt         // encoding: [0x6f,0xc0,0x40,0xba]
+// CHECK: ccmn    xzr, x5, #7, ne         // encoding: [0xe7,0x13,0x45,0xba]
+
+//------------------------------------------------------------------------------
+// Conditional select
+//------------------------------------------------------------------------------
+        csel w1, w0, w19, ne
+        csel wzr, w5, w9, eq
+        csel w9, wzr, w30, gt
+        csel w1, w28, wzr, mi
+// CHECK: csel     w1, w0, w19, ne            // encoding: [0x01,0x10,0x93,0x1a]
+// CHECK: csel     wzr, w5, w9, eq            // encoding: [0xbf,0x00,0x89,0x1a]
+// CHECK: csel     w9, wzr, w30, gt           // encoding: [0xe9,0xc3,0x9e,0x1a]
+// CHECK: csel     w1, w28, wzr, mi           // encoding: [0x81,0x43,0x9f,0x1a]
+
+        csel x19, x23, x29, lt
+        csel xzr, x3, x4, ge
+        csel x5, xzr, x6, cs
+        csel x7, x8, xzr, cc
+// CHECK: csel     x19, x23, x29, lt          // encoding: [0xf3,0xb2,0x9d,0x9a]
+// CHECK: csel     xzr, x3, x4, ge            // encoding: [0x7f,0xa0,0x84,0x9a]
+// CHECK: csel     x5, xzr, x6, hs            // encoding: [0xe5,0x23,0x86,0x9a]
+// CHECK: csel     x7, x8, xzr, lo            // encoding: [0x07,0x31,0x9f,0x9a]
+
+        csinc w1, w0, w19, ne
+        csinc wzr, w5, w9, eq
+        csinc w9, wzr, w30, gt
+        csinc w1, w28, wzr, mi
+// CHECK: csinc    w1, w0, w19, ne            // encoding: [0x01,0x14,0x93,0x1a]
+// CHECK: csinc    wzr, w5, w9, eq            // encoding: [0xbf,0x04,0x89,0x1a]
+// CHECK: csinc    w9, wzr, w30, gt           // encoding: [0xe9,0xc7,0x9e,0x1a]
+// CHECK: csinc    w1, w28, wzr, mi           // encoding: [0x81,0x47,0x9f,0x1a]
+
+        csinc x19, x23, x29, lt
+        csinc xzr, x3, x4, ge
+        csinc x5, xzr, x6, cs
+        csinc x7, x8, xzr, cc
+// CHECK: csinc    x19, x23, x29, lt          // encoding: [0xf3,0xb6,0x9d,0x9a]
+// CHECK: csinc    xzr, x3, x4, ge            // encoding: [0x7f,0xa4,0x84,0x9a]
+// CHECK: csinc    x5, xzr, x6, hs            // encoding: [0xe5,0x27,0x86,0x9a]
+// CHECK: csinc    x7, x8, xzr, lo            // encoding: [0x07,0x35,0x9f,0x9a]
+
+        csinv w1, w0, w19, ne
+        csinv wzr, w5, w9, eq
+        csinv w9, wzr, w30, gt
+        csinv w1, w28, wzr, mi
+// CHECK: csinv    w1, w0, w19, ne            // encoding: [0x01,0x10,0x93,0x5a]
+// CHECK: csinv    wzr, w5, w9, eq            // encoding: [0xbf,0x00,0x89,0x5a]
+// CHECK: csinv    w9, wzr, w30, gt           // encoding: [0xe9,0xc3,0x9e,0x5a]
+// CHECK: csinv    w1, w28, wzr, mi           // encoding: [0x81,0x43,0x9f,0x5a]
+
+        csinv x19, x23, x29, lt
+        csinv xzr, x3, x4, ge
+        csinv x5, xzr, x6, cs
+        csinv x7, x8, xzr, cc
+// CHECK: csinv    x19, x23, x29, lt          // encoding: [0xf3,0xb2,0x9d,0xda]
+// CHECK: csinv    xzr, x3, x4, ge            // encoding: [0x7f,0xa0,0x84,0xda]
+// CHECK: csinv    x5, xzr, x6, hs            // encoding: [0xe5,0x23,0x86,0xda]
+// CHECK: csinv    x7, x8, xzr, lo            // encoding: [0x07,0x31,0x9f,0xda]
+
+        csneg w1, w0, w19, ne
+        csneg wzr, w5, w9, eq
+        csneg w9, wzr, w30, gt
+        csneg w1, w28, wzr, mi
+// CHECK: csneg    w1, w0, w19, ne            // encoding: [0x01,0x14,0x93,0x5a]
+// CHECK: csneg    wzr, w5, w9, eq            // encoding: [0xbf,0x04,0x89,0x5a]
+// CHECK: csneg    w9, wzr, w30, gt           // encoding: [0xe9,0xc7,0x9e,0x5a]
+// CHECK: csneg    w1, w28, wzr, mi           // encoding: [0x81,0x47,0x9f,0x5a]
+
+        csneg x19, x23, x29, lt
+        csneg xzr, x3, x4, ge
+        csneg x5, xzr, x6, cs
+        csneg x7, x8, xzr, cc
+// CHECK: csneg    x19, x23, x29, lt          // encoding: [0xf3,0xb6,0x9d,0xda]
+// CHECK: csneg    xzr, x3, x4, ge            // encoding: [0x7f,0xa4,0x84,0xda]
+// CHECK: csneg    x5, xzr, x6, hs            // encoding: [0xe5,0x27,0x86,0xda]
+// CHECK: csneg    x7, x8, xzr, lo            // encoding: [0x07,0x35,0x9f,0xda]
+
+        cset w3, eq
+        cset x9, pl
+// CHECK: csinc    w3, wzr, wzr, ne           // encoding: [0xe3,0x17,0x9f,0x1a]
+// CHECK: csinc    x9, xzr, xzr, mi           // encoding: [0xe9,0x47,0x9f,0x9a]
+
+        csetm w20, ne
+        csetm x30, ge
+// CHECK: csinv    w20, wzr, wzr, eq          // encoding: [0xf4,0x03,0x9f,0x5a]
+// CHECK: csinv    x30, xzr, xzr, lt          // encoding: [0xfe,0xb3,0x9f,0xda]
+
+        cinc w3, w5, gt
+        cinc wzr, w4, le
+        cinc w9, wzr, lt
+// CHECK: csinc    w3, w5, w5, le             // encoding: [0xa3,0xd4,0x85,0x1a]
+// CHECK: csinc    wzr, w4, w4, gt            // encoding: [0x9f,0xc4,0x84,0x1a]
+// CHECK: csinc    w9, wzr, wzr, ge           // encoding: [0xe9,0xa7,0x9f,0x1a]
+
+        cinc x3, x5, gt
+        cinc xzr, x4, le
+        cinc x9, xzr, lt
+// CHECK: csinc     x3, x5, x5, le             // encoding: [0xa3,0xd4,0x85,0x9a]
+// CHECK: csinc     xzr, x4, x4, gt            // encoding: [0x9f,0xc4,0x84,0x9a]
+// CHECK: csinc     x9, xzr, xzr, ge           // encoding: [0xe9,0xa7,0x9f,0x9a]
+
+        cinv w3, w5, gt
+        cinv wzr, w4, le
+        cinv w9, wzr, lt
+// CHECK: csinv    w3, w5, w5, le             // encoding: [0xa3,0xd0,0x85,0x5a]
+// CHECK: csinv    wzr, w4, w4, gt            // encoding: [0x9f,0xc0,0x84,0x5a]
+// CHECK: csinv    w9, wzr, wzr, ge           // encoding: [0xe9,0xa3,0x9f,0x5a]
+
+        cinv x3, x5, gt
+        cinv xzr, x4, le
+        cinv x9, xzr, lt
+// CHECK: csinv    x3, x5, x5, le             // encoding: [0xa3,0xd0,0x85,0xda]
+// CHECK: csinv    xzr, x4, x4, gt            // encoding: [0x9f,0xc0,0x84,0xda]
+// CHECK: csinv    x9, xzr, xzr, ge           // encoding: [0xe9,0xa3,0x9f,0xda]
+
+        cneg w3, w5, gt
+        cneg wzr, w4, le
+        cneg w9, wzr, lt
+// CHECK: csneg    w3, w5, w5, le             // encoding: [0xa3,0xd4,0x85,0x5a]
+// CHECK: csneg    wzr, w4, w4, gt            // encoding: [0x9f,0xc4,0x84,0x5a]
+// CHECK: csneg    w9, wzr, wzr, ge           // encoding: [0xe9,0xa7,0x9f,0x5a]
+
+        cneg x3, x5, gt
+        cneg xzr, x4, le
+        cneg x9, xzr, lt
+// CHECK: csneg    x3, x5, x5, le             // encoding: [0xa3,0xd4,0x85,0xda]
+// CHECK: csneg    xzr, x4, x4, gt            // encoding: [0x9f,0xc4,0x84,0xda]
+// CHECK: csneg    x9, xzr, xzr, ge           // encoding: [0xe9,0xa7,0x9f,0xda]
+
+//------------------------------------------------------------------------------
+// Data-processing (1 source)
+//------------------------------------------------------------------------------
+
+	rbit	w0, w7
+	rbit	x18, x3
+	rev16	w17, w1
+	rev16	x5, x2
+	rev	w18, w0
+	rev32	x20, x1
+	rev32	x20, xzr
+// CHECK: rbit	w0, w7                       // encoding: [0xe0,0x00,0xc0,0x5a]
+// CHECK: rbit	x18, x3                      // encoding: [0x72,0x00,0xc0,0xda]
+// CHECK: rev16 w17, w1                      // encoding: [0x31,0x04,0xc0,0x5a]
+// CHECK: rev16	x5, x2                       // encoding: [0x45,0x04,0xc0,0xda]
+// CHECK: rev	w18, w0                      // encoding: [0x12,0x08,0xc0,0x5a]
+// CHECK: rev32	x20, x1                      // encoding: [0x34,0x08,0xc0,0xda]
+// CHECK: rev32	x20, xzr                     // encoding: [0xf4,0x0b,0xc0,0xda]
+
+	rev	x22, x2
+	rev	x18, xzr
+	rev	w7, wzr
+	clz	w24, w3
+	clz	x26, x4
+	cls	w3, w5
+	cls	x20, x5
+// CHECK: rev	x22, x2                      // encoding: [0x56,0x0c,0xc0,0xda]
+// CHECK: rev	x18, xzr                     // encoding: [0xf2,0x0f,0xc0,0xda]
+// CHECK: rev	w7, wzr                      // encoding: [0xe7,0x0b,0xc0,0x5a]
+// CHECK: clz	w24, w3                      // encoding: [0x78,0x10,0xc0,0x5a]
+// CHECK: clz	x26, x4                      // encoding: [0x9a,0x10,0xc0,0xda]
+// CHECK: cls	w3, w5                       // encoding: [0xa3,0x14,0xc0,0x5a]
+// CHECK: cls	x20, x5                      // encoding: [0xb4,0x14,0xc0,0xda]
+
+	clz	w24, wzr
+	rev	x22, xzr
+// CHECK: clz	w24, wzr                     // encoding: [0xf8,0x13,0xc0,0x5a]
+// CHECK: rev	x22, xzr                     // encoding: [0xf6,0x0f,0xc0,0xda]
+
+//------------------------------------------------------------------------------
+// Data-processing (2 source)
+//------------------------------------------------------------------------------
+
+        udiv	w0, w7, w10
+        udiv	x9, x22, x4
+        sdiv	w12, w21, w0
+        sdiv	x13, x2, x1
+        lslv	w11, w12, w13
+        lslv	x14, x15, x16
+        lsrv	w17, w18, w19
+        lsrv	x20, x21, x22
+        asrv	w23, w24, w25
+        asrv	x26, x27, x28
+        rorv	w0, w1, w2
+        rorv    x3, x4, x5
+
+
+// CHECK: udiv	w0, w7, w10                   // encoding: [0xe0,0x08,0xca,0x1a]
+// CHECK: udiv	x9, x22, x4                   // encoding: [0xc9,0x0a,0xc4,0x9a]
+// CHECK: sdiv	w12, w21, w0                  // encoding: [0xac,0x0e,0xc0,0x1a]
+// CHECK: sdiv	x13, x2, x1                   // encoding: [0x4d,0x0c,0xc1,0x9a]
+// CHECK: lsl	w11, w12, w13                 // encoding: [0x8b,0x21,0xcd,0x1a]
+// CHECK: lsl	x14, x15, x16                 // encoding: [0xee,0x21,0xd0,0x9a]
+// CHECK: lsr	w17, w18, w19                 // encoding: [0x51,0x26,0xd3,0x1a]
+// CHECK: lsr	x20, x21, x22                 // encoding: [0xb4,0x26,0xd6,0x9a]
+// CHECK: asr	w23, w24, w25                 // encoding: [0x17,0x2b,0xd9,0x1a]
+// CHECK: asr	x26, x27, x28                 // encoding: [0x7a,0x2b,0xdc,0x9a]
+// CHECK: ror	w0, w1, w2                    // encoding: [0x20,0x2c,0xc2,0x1a]
+// CHECK: ror  x3, x4, x5                     // encoding: [0x83,0x2c,0xc5,0x9a]
+
+
+        lsl	w6, w7, w8
+        lsl	x9, x10, x11
+        lsr	w12, w13, w14
+        lsr	x15, x16, x17
+        asr	w18, w19, w20
+        asr	x21, x22, x23
+        ror	w24, w25, w26
+        ror	x27, x28, x29
+// CHECK: lsl	w6, w7, w8                    // encoding: [0xe6,0x20,0xc8,0x1a]
+// CHECK: lsl	x9, x10, x11                  // encoding: [0x49,0x21,0xcb,0x9a]
+// CHECK: lsr	w12, w13, w14                 // encoding: [0xac,0x25,0xce,0x1a]
+// CHECK: lsr	x15, x16, x17                 // encoding: [0x0f,0x26,0xd1,0x9a]
+// CHECK: asr	w18, w19, w20                 // encoding: [0x72,0x2a,0xd4,0x1a]
+// CHECK: asr	x21, x22, x23                 // encoding: [0xd5,0x2a,0xd7,0x9a]
+// CHECK: ror	w24, w25, w26                 // encoding: [0x38,0x2f,0xda,0x1a]
+// CHECK: ror	x27, x28, x29                 // encoding: [0x9b,0x2f,0xdd,0x9a]
+
+        madd w1, w3, w7, w4
+        madd wzr, w0, w9, w11
+        madd w13, wzr, w4, w4
+        madd w19, w30, wzr, w29
+        madd w4, w5, w6, wzr
+// CHECK: madd     w1, w3, w7, w4             // encoding: [0x61,0x10,0x07,0x1b]
+// CHECK: madd     wzr, w0, w9, w11           // encoding: [0x1f,0x2c,0x09,0x1b]
+// CHECK: madd     w13, wzr, w4, w4           // encoding: [0xed,0x13,0x04,0x1b]
+// CHECK: madd     w19, w30, wzr, w29         // encoding: [0xd3,0x77,0x1f,0x1b]
+// CHECK: mul      w4, w5, w6                 // encoding: [0xa4,0x7c,0x06,0x1b]
+
+        madd x1, x3, x7, x4
+        madd xzr, x0, x9, x11
+        madd x13, xzr, x4, x4
+        madd x19, x30, xzr, x29
+        madd x4, x5, x6, xzr
+// CHECK: madd     x1, x3, x7, x4             // encoding: [0x61,0x10,0x07,0x9b]
+// CHECK: madd     xzr, x0, x9, x11           // encoding: [0x1f,0x2c,0x09,0x9b]
+// CHECK: madd     x13, xzr, x4, x4           // encoding: [0xed,0x13,0x04,0x9b]
+// CHECK: madd     x19, x30, xzr, x29         // encoding: [0xd3,0x77,0x1f,0x9b]
+// CHECK: mul      x4, x5, x6                 // encoding: [0xa4,0x7c,0x06,0x9b]
+
+        msub w1, w3, w7, w4
+        msub wzr, w0, w9, w11
+        msub w13, wzr, w4, w4
+        msub w19, w30, wzr, w29
+        msub w4, w5, w6, wzr
+// CHECK: msub     w1, w3, w7, w4             // encoding: [0x61,0x90,0x07,0x1b]
+// CHECK: msub     wzr, w0, w9, w11           // encoding: [0x1f,0xac,0x09,0x1b]
+// CHECK: msub     w13, wzr, w4, w4           // encoding: [0xed,0x93,0x04,0x1b]
+// CHECK: msub     w19, w30, wzr, w29         // encoding: [0xd3,0xf7,0x1f,0x1b]
+// CHECK: mneg     w4, w5, w6                 // encoding: [0xa4,0xfc,0x06,0x1b]
+
+        msub x1, x3, x7, x4
+        msub xzr, x0, x9, x11
+        msub x13, xzr, x4, x4
+        msub x19, x30, xzr, x29
+        msub x4, x5, x6, xzr
+// CHECK: msub     x1, x3, x7, x4             // encoding: [0x61,0x90,0x07,0x9b]
+// CHECK: msub     xzr, x0, x9, x11           // encoding: [0x1f,0xac,0x09,0x9b]
+// CHECK: msub     x13, xzr, x4, x4           // encoding: [0xed,0x93,0x04,0x9b]
+// CHECK: msub     x19, x30, xzr, x29         // encoding: [0xd3,0xf7,0x1f,0x9b]
+// CHECK: mneg     x4, x5, x6                 // encoding: [0xa4,0xfc,0x06,0x9b]
+
+        smaddl x3, w5, w2, x9
+        smaddl xzr, w10, w11, x12
+        smaddl x13, wzr, w14, x15
+        smaddl x16, w17, wzr, x18
+        smaddl x19, w20, w21, xzr
+// CHECK: smaddl   x3, w5, w2, x9             // encoding: [0xa3,0x24,0x22,0x9b]
+// CHECK: smaddl   xzr, w10, w11, x12         // encoding: [0x5f,0x31,0x2b,0x9b]
+// CHECK: smaddl   x13, wzr, w14, x15         // encoding: [0xed,0x3f,0x2e,0x9b]
+// CHECK: smaddl   x16, w17, wzr, x18         // encoding: [0x30,0x4a,0x3f,0x9b]
+// CHECK: smull    x19, w20, w21              // encoding: [0x93,0x7e,0x35,0x9b]
+
+        smsubl x3, w5, w2, x9
+        smsubl xzr, w10, w11, x12
+        smsubl x13, wzr, w14, x15
+        smsubl x16, w17, wzr, x18
+        smsubl x19, w20, w21, xzr
+// CHECK: smsubl   x3, w5, w2, x9             // encoding: [0xa3,0xa4,0x22,0x9b]
+// CHECK: smsubl   xzr, w10, w11, x12         // encoding: [0x5f,0xb1,0x2b,0x9b]
+// CHECK: smsubl   x13, wzr, w14, x15         // encoding: [0xed,0xbf,0x2e,0x9b]
+// CHECK: smsubl   x16, w17, wzr, x18         // encoding: [0x30,0xca,0x3f,0x9b]
+// CHECK: smnegl   x19, w20, w21              // encoding: [0x93,0xfe,0x35,0x9b]
+
+        umaddl x3, w5, w2, x9
+        umaddl xzr, w10, w11, x12
+        umaddl x13, wzr, w14, x15
+        umaddl x16, w17, wzr, x18
+        umaddl x19, w20, w21, xzr
+// CHECK: umaddl   x3, w5, w2, x9             // encoding: [0xa3,0x24,0xa2,0x9b]
+// CHECK: umaddl   xzr, w10, w11, x12         // encoding: [0x5f,0x31,0xab,0x9b]
+// CHECK: umaddl   x13, wzr, w14, x15         // encoding: [0xed,0x3f,0xae,0x9b]
+// CHECK: umaddl   x16, w17, wzr, x18         // encoding: [0x30,0x4a,0xbf,0x9b]
+// CHECK: umull    x19, w20, w21              // encoding: [0x93,0x7e,0xb5,0x9b]
+
+
+
+        umsubl x3, w5, w2, x9
+        umsubl xzr, w10, w11, x12
+        umsubl x13, wzr, w14, x15
+        umsubl x16, w17, wzr, x18
+        umsubl x19, w20, w21, xzr
+// CHECK: umsubl   x3, w5, w2, x9             // encoding: [0xa3,0xa4,0xa2,0x9b]
+// CHECK: umsubl   xzr, w10, w11, x12         // encoding: [0x5f,0xb1,0xab,0x9b]
+// CHECK: umsubl   x13, wzr, w14, x15         // encoding: [0xed,0xbf,0xae,0x9b]
+// CHECK: umsubl   x16, w17, wzr, x18         // encoding: [0x30,0xca,0xbf,0x9b]
+// CHECK: umnegl   x19, w20, w21              // encoding: [0x93,0xfe,0xb5,0x9b]
+
+        smulh x30, x29, x28
+        smulh xzr, x27, x26
+        smulh x25, xzr, x24
+        smulh x23, x22, xzr
+// CHECK: smulh    x30, x29, x28              // encoding: [0xbe,0x7f,0x5c,0x9b]
+// CHECK: smulh    xzr, x27, x26              // encoding: [0x7f,0x7f,0x5a,0x9b]
+// CHECK: smulh    x25, xzr, x24              // encoding: [0xf9,0x7f,0x58,0x9b]
+// CHECK: smulh    x23, x22, xzr              // encoding: [0xd7,0x7e,0x5f,0x9b]
+
+        umulh x30, x29, x28
+        umulh xzr, x27, x26
+        umulh x25, xzr, x24
+        umulh x23, x22, xzr
+// CHECK: umulh    x30, x29, x28              // encoding: [0xbe,0x7f,0xdc,0x9b]
+// CHECK: umulh    xzr, x27, x26              // encoding: [0x7f,0x7f,0xda,0x9b]
+// CHECK: umulh    x25, xzr, x24              // encoding: [0xf9,0x7f,0xd8,0x9b]
+// CHECK: umulh    x23, x22, xzr              // encoding: [0xd7,0x7e,0xdf,0x9b]
+
+        mul w3, w4, w5
+        mul wzr, w6, w7
+        mul w8, wzr, w9
+        mul w10, w11, wzr
+
+        mul x12, x13, x14
+        mul xzr, x15, x16
+        mul x17, xzr, x18
+        mul x19, x20, xzr
+
+        mneg w21, w22, w23
+        mneg wzr, w24, w25
+        mneg w26, wzr, w27
+        mneg w28, w29, wzr
+
+        smull x11, w13, w17
+        umull x11, w13, w17
+        smnegl x11, w13, w17
+        umnegl x11, w13, w17
+// CHECK: mul      w3, w4, w5                 // encoding: [0x83,0x7c,0x05,0x1b]
+// CHECK: mul      wzr, w6, w7                // encoding: [0xdf,0x7c,0x07,0x1b]
+// CHECK: mul      w8, wzr, w9                // encoding: [0xe8,0x7f,0x09,0x1b]
+// CHECK: mul      w10, w11, wzr              // encoding: [0x6a,0x7d,0x1f,0x1b]
+// CHECK: mul      x12, x13, x14              // encoding: [0xac,0x7d,0x0e,0x9b]
+// CHECK: mul      xzr, x15, x16              // encoding: [0xff,0x7d,0x10,0x9b]
+// CHECK: mul      x17, xzr, x18              // encoding: [0xf1,0x7f,0x12,0x9b]
+// CHECK: mul      x19, x20, xzr              // encoding: [0x93,0x7e,0x1f,0x9b]
+// CHECK: mneg     w21, w22, w23              // encoding: [0xd5,0xfe,0x17,0x1b]
+// CHECK: mneg     wzr, w24, w25              // encoding: [0x1f,0xff,0x19,0x1b]
+// CHECK: mneg     w26, wzr, w27              // encoding: [0xfa,0xff,0x1b,0x1b]
+// CHECK: mneg     w28, w29, wzr              // encoding: [0xbc,0xff,0x1f,0x1b]
+// CHECK: smull    x11, w13, w17              // encoding: [0xab,0x7d,0x31,0x9b]
+// CHECK: umull    x11, w13, w17              // encoding: [0xab,0x7d,0xb1,0x9b]
+// CHECK: smnegl   x11, w13, w17              // encoding: [0xab,0xfd,0x31,0x9b]
+// CHECK: umnegl   x11, w13, w17              // encoding: [0xab,0xfd,0xb1,0x9b]
+
+//------------------------------------------------------------------------------
+// Exception generation
+//------------------------------------------------------------------------------
+        svc #0
+        svc #65535
+// CHECK: svc      #0                         // encoding: [0x01,0x00,0x00,0xd4]
+// CHECK: svc      #65535                     // encoding: [0xe1,0xff,0x1f,0xd4]
+
+        hvc #1
+        smc #12000
+        brk #12
+        hlt #123
+// CHECK: hvc      #1                         // encoding: [0x22,0x00,0x00,0xd4]
+// CHECK: smc      #12000                     // encoding: [0x03,0xdc,0x05,0xd4]
+// CHECK: brk      #12                        // encoding: [0x80,0x01,0x20,0xd4]
+// CHECK: hlt      #123                       // encoding: [0x60,0x0f,0x40,0xd4]
+
+        dcps1 #42
+        dcps2 #9
+        dcps3 #1000
+// CHECK: dcps1    #42                        // encoding: [0x41,0x05,0xa0,0xd4]
+// CHECK: dcps2    #9                         // encoding: [0x22,0x01,0xa0,0xd4]
+// CHECK: dcps3    #1000                      // encoding: [0x03,0x7d,0xa0,0xd4]
+
+        dcps1
+        dcps2
+        dcps3
+// CHECK: dcps1                               // encoding: [0x01,0x00,0xa0,0xd4]
+// CHECK: dcps2                               // encoding: [0x02,0x00,0xa0,0xd4]
+// CHECK: dcps3                               // encoding: [0x03,0x00,0xa0,0xd4]
+
+//------------------------------------------------------------------------------
+// Extract (immediate)
+//------------------------------------------------------------------------------
+
+        extr w3, w5, w7, #0
+        extr w11, w13, w17, #31
+// CHECK: extr     w3, w5, w7, #0             // encoding: [0xa3,0x00,0x87,0x13]
+// CHECK: extr     w11, w13, w17, #31         // encoding: [0xab,0x7d,0x91,0x13]
+
+        extr x3, x5, x7, #15
+        extr x11, x13, x17, #63
+// CHECK: extr     x3, x5, x7, #15            // encoding: [0xa3,0x3c,0xc7,0x93]
+// CHECK: extr     x11, x13, x17, #63         // encoding: [0xab,0xfd,0xd1,0x93]
+
+        ror x19, x23, #24
+        ror x29, xzr, #63
+// CHECK: extr     x19, x23, x23, #24         // encoding: [0xf3,0x62,0xd7,0x93]
+// CHECK: extr     x29, xzr, xzr, #63         // encoding: [0xfd,0xff,0xdf,0x93]
+
+        ror w9, w13, #31
+// CHECK: extr     w9, w13, w13, #31          // encoding: [0xa9,0x7d,0x8d,0x13]
+
+//------------------------------------------------------------------------------
+// Floating-point compare
+//------------------------------------------------------------------------------
+
+        fcmp s3, s5
+        fcmp s31, #0.0
+// CHECK: fcmp    s3, s5                  // encoding: [0x60,0x20,0x25,0x1e]
+// CHECK: fcmp    s31, #0.0               // encoding: [0xe8,0x23,0x20,0x1e]
+
+        fcmpe s29, s30
+        fcmpe s15, #0.0
+// CHECK: fcmpe   s29, s30                // encoding: [0xb0,0x23,0x3e,0x1e]
+// CHECK: fcmpe   s15, #0.0               // encoding: [0xf8,0x21,0x20,0x1e]
+
+        fcmp d4, d12
+        fcmp d23, #0.0
+// CHECK: fcmp    d4, d12                 // encoding: [0x80,0x20,0x6c,0x1e]
+// CHECK: fcmp    d23, #0.0               // encoding: [0xe8,0x22,0x60,0x1e]
+
+        fcmpe d26, d22
+        fcmpe d29, #0.0
+// CHECK: fcmpe   d26, d22                // encoding: [0x50,0x23,0x76,0x1e]
+// CHECK: fcmpe   d29, #0.0               // encoding: [0xb8,0x23,0x60,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fccmp s1, s31, #0, eq
+        fccmp s3, s0, #15, hs
+        fccmp s31, s15, #13, cs
+// CHECK: fccmp    s1, s31, #0, eq         // encoding: [0x20,0x04,0x3f,0x1e]
+// CHECK: fccmp    s3, s0, #15, hs         // encoding: [0x6f,0x24,0x20,0x1e]
+// CHECK: fccmp    s31, s15, #13, hs       // encoding: [0xed,0x27,0x2f,0x1e]
+
+        fccmp d9, d31, #0, le
+        fccmp d3, d0, #15, gt
+        fccmp d31, d5, #7, ne
+// CHECK: fccmp    d9, d31, #0, le         // encoding: [0x20,0xd5,0x7f,0x1e]
+// CHECK: fccmp    d3, d0, #15, gt         // encoding: [0x6f,0xc4,0x60,0x1e]
+// CHECK: fccmp    d31, d5, #7, ne         // encoding: [0xe7,0x17,0x65,0x1e]
+
+        fccmpe s1, s31, #0, eq
+        fccmpe s3, s0, #15, hs
+        fccmpe s31, s15, #13, cs
+// CHECK: fccmpe    s1, s31, #0, eq         // encoding: [0x30,0x04,0x3f,0x1e]
+// CHECK: fccmpe    s3, s0, #15, hs         // encoding: [0x7f,0x24,0x20,0x1e]
+// CHECK: fccmpe    s31, s15, #13, hs       // encoding: [0xfd,0x27,0x2f,0x1e]
+
+        fccmpe d9, d31, #0, le
+        fccmpe d3, d0, #15, gt
+        fccmpe d31, d5, #7, ne
+// CHECK: fccmpe    d9, d31, #0, le         // encoding: [0x30,0xd5,0x7f,0x1e]
+// CHECK: fccmpe    d3, d0, #15, gt         // encoding: [0x7f,0xc4,0x60,0x1e]
+// CHECK: fccmpe    d31, d5, #7, ne         // encoding: [0xf7,0x17,0x65,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fcsel s3, s20, s9, pl
+        fcsel d9, d10, d11, mi
+// CHECK: fcsel   s3, s20, s9, pl         // encoding: [0x83,0x5e,0x29,0x1e]
+// CHECK: fcsel   d9, d10, d11, mi        // encoding: [0x49,0x4d,0x6b,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (1 source)
+//------------------------------------------------------------------------------
+
+        fmov s0, s1
+        fabs s2, s3
+        fneg s4, s5
+        fsqrt s6, s7
+        fcvt d8, s9
+        fcvt h10, s11
+        frintn s12, s13
+        frintp s14, s15
+        frintm s16, s17
+        frintz s18, s19
+        frinta s20, s21
+        frintx s22, s23
+        frinti s24, s25
+// CHECK: fmov     s0, s1                // encoding: [0x20,0x40,0x20,0x1e]
+// CHECK: fabs     s2, s3                // encoding: [0x62,0xc0,0x20,0x1e]
+// CHECK: fneg     s4, s5                     // encoding: [0xa4,0x40,0x21,0x1e]
+// CHECK: fsqrt    s6, s7                     // encoding: [0xe6,0xc0,0x21,0x1e]
+// CHECK: fcvt     d8, s9                     // encoding: [0x28,0xc1,0x22,0x1e]
+// CHECK: fcvt     h10, s11                   // encoding: [0x6a,0xc1,0x23,0x1e]
+// CHECK: frintn   s12, s13                   // encoding: [0xac,0x41,0x24,0x1e]
+// CHECK: frintp   s14, s15                   // encoding: [0xee,0xc1,0x24,0x1e]
+// CHECK: frintm   s16, s17                   // encoding: [0x30,0x42,0x25,0x1e]
+// CHECK: frintz   s18, s19                   // encoding: [0x72,0xc2,0x25,0x1e]
+// CHECK: frinta   s20, s21                   // encoding: [0xb4,0x42,0x26,0x1e]
+// CHECK: frintx   s22, s23                   // encoding: [0xf6,0x42,0x27,0x1e]
+// CHECK: frinti   s24, s25                   // encoding: [0x38,0xc3,0x27,0x1e]
+
+        fmov d0, d1
+        fabs d2, d3
+        fneg d4, d5
+        fsqrt d6, d7
+        fcvt s8, d9
+        fcvt h10, d11
+        frintn d12, d13
+        frintp d14, d15
+        frintm d16, d17
+        frintz d18, d19
+        frinta d20, d21
+        frintx d22, d23
+        frinti d24, d25
+// CHECK: fmov     d0, d1                     // encoding: [0x20,0x40,0x60,0x1e]
+// CHECK: fabs     d2, d3                     // encoding: [0x62,0xc0,0x60,0x1e]
+// CHECK: fneg     d4, d5                     // encoding: [0xa4,0x40,0x61,0x1e]
+// CHECK: fsqrt    d6, d7                     // encoding: [0xe6,0xc0,0x61,0x1e]
+// CHECK: fcvt     s8, d9                     // encoding: [0x28,0x41,0x62,0x1e]
+// CHECK: fcvt     h10, d11                   // encoding: [0x6a,0xc1,0x63,0x1e]
+// CHECK: frintn   d12, d13                   // encoding: [0xac,0x41,0x64,0x1e]
+// CHECK: frintp   d14, d15                   // encoding: [0xee,0xc1,0x64,0x1e]
+// CHECK: frintm   d16, d17                   // encoding: [0x30,0x42,0x65,0x1e]
+// CHECK: frintz   d18, d19                   // encoding: [0x72,0xc2,0x65,0x1e]
+// CHECK: frinta   d20, d21                   // encoding: [0xb4,0x42,0x66,0x1e]
+// CHECK: frintx   d22, d23                   // encoding: [0xf6,0x42,0x67,0x1e]
+// CHECK: frinti   d24, d25                   // encoding: [0x38,0xc3,0x67,0x1e]
+
+        fcvt s26, h27
+        fcvt d28, h29
+// CHECK: fcvt     s26, h27                   // encoding: [0x7a,0x43,0xe2,0x1e]
+// CHECK: fcvt     d28, h29                   // encoding: [0xbc,0xc3,0xe2,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (2 sources)
+//------------------------------------------------------------------------------
+
+        fmul s20, s19, s17
+        fdiv s1, s2, s3
+        fadd s4, s5, s6
+        fsub s7, s8, s9
+        fmax s10, s11, s12
+        fmin s13, s14, s15
+        fmaxnm s16, s17, s18
+        fminnm s19, s20, s21
+        fnmul s22, s23, s24
+// CHECK: fmul     s20, s19, s17              // encoding: [0x74,0x0a,0x31,0x1e]
+// CHECK: fdiv     s1, s2, s3                 // encoding: [0x41,0x18,0x23,0x1e]
+// CHECK: fadd     s4, s5, s6                 // encoding: [0xa4,0x28,0x26,0x1e]
+// CHECK: fsub     s7, s8, s9                 // encoding: [0x07,0x39,0x29,0x1e]
+// CHECK: fmax     s10, s11, s12              // encoding: [0x6a,0x49,0x2c,0x1e]
+// CHECK: fmin     s13, s14, s15              // encoding: [0xcd,0x59,0x2f,0x1e]
+// CHECK: fmaxnm   s16, s17, s18              // encoding: [0x30,0x6a,0x32,0x1e]
+// CHECK: fminnm   s19, s20, s21              // encoding: [0x93,0x7a,0x35,0x1e]
+// CHECK: fnmul    s22, s23, s24              // encoding: [0xf6,0x8a,0x38,0x1e]
+
+        fmul d20, d19, d17
+        fdiv d1, d2, d3
+        fadd d4, d5, d6
+        fsub d7, d8, d9
+        fmax d10, d11, d12
+        fmin d13, d14, d15
+        fmaxnm d16, d17, d18
+        fminnm d19, d20, d21
+        fnmul d22, d23, d24
+// CHECK: fmul     d20, d19, d17              // encoding: [0x74,0x0a,0x71,0x1e]
+// CHECK: fdiv     d1, d2, d3                 // encoding: [0x41,0x18,0x63,0x1e]
+// CHECK: fadd     d4, d5, d6                 // encoding: [0xa4,0x28,0x66,0x1e]
+// CHECK: fsub     d7, d8, d9                 // encoding: [0x07,0x39,0x69,0x1e]
+// CHECK: fmax     d10, d11, d12              // encoding: [0x6a,0x49,0x6c,0x1e]
+// CHECK: fmin     d13, d14, d15              // encoding: [0xcd,0x59,0x6f,0x1e]
+// CHECK: fmaxnm   d16, d17, d18              // encoding: [0x30,0x6a,0x72,0x1e]
+// CHECK: fminnm   d19, d20, d21              // encoding: [0x93,0x7a,0x75,0x1e]
+// CHECK: fnmul    d22, d23, d24              // encoding: [0xf6,0x8a,0x78,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (3 sources)
+//------------------------------------------------------------------------------
+
+        fmadd s3, s5, s6, s31
+        fmadd d3, d13, d0, d23
+        fmsub s3, s5, s6, s31
+        fmsub d3, d13, d0, d23
+        fnmadd s3, s5, s6, s31
+        fnmadd d3, d13, d0, d23
+        fnmsub s3, s5, s6, s31
+        fnmsub d3, d13, d0, d23
+// CHECK: fmadd   s3, s5, s6, s31         // encoding: [0xa3,0x7c,0x06,0x1f]
+// CHECK: fmadd   d3, d13, d0, d23        // encoding: [0xa3,0x5d,0x40,0x1f]
+// CHECK: fmsub   s3, s5, s6, s31         // encoding: [0xa3,0xfc,0x06,0x1f]
+// CHECK: fmsub   d3, d13, d0, d23        // encoding: [0xa3,0xdd,0x40,0x1f]
+// CHECK: fnmadd  s3, s5, s6, s31         // encoding: [0xa3,0x7c,0x26,0x1f]
+// CHECK: fnmadd  d3, d13, d0, d23        // encoding: [0xa3,0x5d,0x60,0x1f]
+// CHECK: fnmsub  s3, s5, s6, s31         // encoding: [0xa3,0xfc,0x26,0x1f]
+// CHECK: fnmsub  d3, d13, d0, d23        // encoding: [0xa3,0xdd,0x60,0x1f]
+
+//------------------------------------------------------------------------------
+// Floating-point <-> fixed-point conversion
+//------------------------------------------------------------------------------
+
+        fcvtzs w3, s5, #1
+        fcvtzs wzr, s20, #13
+        fcvtzs w19, s0, #32
+// CHECK: fcvtzs  w3, s5, #1              // encoding: [0xa3,0xfc,0x18,0x1e]
+// CHECK: fcvtzs  wzr, s20, #13           // encoding: [0x9f,0xce,0x18,0x1e]
+// CHECK: fcvtzs  w19, s0, #32            // encoding: [0x13,0x80,0x18,0x1e]
+
+        fcvtzs x3, s5, #1
+        fcvtzs x12, s30, #45
+        fcvtzs x19, s0, #64
+// CHECK: fcvtzs  x3, s5, #1              // encoding: [0xa3,0xfc,0x18,0x9e]
+// CHECK: fcvtzs  x12, s30, #45           // encoding: [0xcc,0x4f,0x18,0x9e]
+// CHECK: fcvtzs  x19, s0, #64            // encoding: [0x13,0x00,0x18,0x9e]
+
+        fcvtzs w3, d5, #1
+        fcvtzs wzr, d20, #13
+        fcvtzs w19, d0, #32
+// CHECK: fcvtzs  w3, d5, #1              // encoding: [0xa3,0xfc,0x58,0x1e]
+// CHECK: fcvtzs  wzr, d20, #13           // encoding: [0x9f,0xce,0x58,0x1e]
+// CHECK: fcvtzs  w19, d0, #32            // encoding: [0x13,0x80,0x58,0x1e]
+
+        fcvtzs x3, d5, #1
+        fcvtzs x12, d30, #45
+        fcvtzs x19, d0, #64
+// CHECK: fcvtzs  x3, d5, #1              // encoding: [0xa3,0xfc,0x58,0x9e]
+// CHECK: fcvtzs  x12, d30, #45           // encoding: [0xcc,0x4f,0x58,0x9e]
+// CHECK: fcvtzs  x19, d0, #64            // encoding: [0x13,0x00,0x58,0x9e]
+
+        fcvtzu w3, s5, #1
+        fcvtzu wzr, s20, #13
+        fcvtzu w19, s0, #32
+// CHECK: fcvtzu  w3, s5, #1              // encoding: [0xa3,0xfc,0x19,0x1e]
+// CHECK: fcvtzu  wzr, s20, #13           // encoding: [0x9f,0xce,0x19,0x1e]
+// CHECK: fcvtzu  w19, s0, #32            // encoding: [0x13,0x80,0x19,0x1e]
+
+        fcvtzu x3, s5, #1
+        fcvtzu x12, s30, #45
+        fcvtzu x19, s0, #64
+// CHECK: fcvtzu  x3, s5, #1              // encoding: [0xa3,0xfc,0x19,0x9e]
+// CHECK: fcvtzu  x12, s30, #45           // encoding: [0xcc,0x4f,0x19,0x9e]
+// CHECK: fcvtzu  x19, s0, #64            // encoding: [0x13,0x00,0x19,0x9e]
+
+        fcvtzu w3, d5, #1
+        fcvtzu wzr, d20, #13
+        fcvtzu w19, d0, #32
+// CHECK: fcvtzu  w3, d5, #1              // encoding: [0xa3,0xfc,0x59,0x1e]
+// CHECK: fcvtzu  wzr, d20, #13           // encoding: [0x9f,0xce,0x59,0x1e]
+// CHECK: fcvtzu  w19, d0, #32            // encoding: [0x13,0x80,0x59,0x1e]
+
+        fcvtzu x3, d5, #1
+        fcvtzu x12, d30, #45
+        fcvtzu x19, d0, #64
+// CHECK: fcvtzu  x3, d5, #1              // encoding: [0xa3,0xfc,0x59,0x9e]
+// CHECK: fcvtzu  x12, d30, #45           // encoding: [0xcc,0x4f,0x59,0x9e]
+// CHECK: fcvtzu  x19, d0, #64            // encoding: [0x13,0x00,0x59,0x9e]
+
+        scvtf s23, w19, #1
+        scvtf s31, wzr, #20
+        scvtf s14, w0, #32
+// CHECK: scvtf   s23, w19, #1            // encoding: [0x77,0xfe,0x02,0x1e]
+// CHECK: scvtf   s31, wzr, #20           // encoding: [0xff,0xb3,0x02,0x1e]
+// CHECK: scvtf   s14, w0, #32            // encoding: [0x0e,0x80,0x02,0x1e]
+
+        scvtf s23, x19, #1
+        scvtf s31, xzr, #20
+        scvtf s14, x0, #64
+// CHECK: scvtf   s23, x19, #1            // encoding: [0x77,0xfe,0x02,0x9e]
+// CHECK: scvtf   s31, xzr, #20           // encoding: [0xff,0xb3,0x02,0x9e]
+// CHECK: scvtf   s14, x0, #64            // encoding: [0x0e,0x00,0x02,0x9e]
+
+        scvtf d23, w19, #1
+        scvtf d31, wzr, #20
+        scvtf d14, w0, #32
+// CHECK: scvtf   d23, w19, #1            // encoding: [0x77,0xfe,0x42,0x1e]
+// CHECK: scvtf   d31, wzr, #20           // encoding: [0xff,0xb3,0x42,0x1e]
+// CHECK: scvtf   d14, w0, #32            // encoding: [0x0e,0x80,0x42,0x1e]
+
+        scvtf d23, x19, #1
+        scvtf d31, xzr, #20
+        scvtf d14, x0, #64
+// CHECK: scvtf   d23, x19, #1            // encoding: [0x77,0xfe,0x42,0x9e]
+// CHECK: scvtf   d31, xzr, #20           // encoding: [0xff,0xb3,0x42,0x9e]
+// CHECK: scvtf   d14, x0, #64            // encoding: [0x0e,0x00,0x42,0x9e]
+
+        ucvtf s23, w19, #1
+        ucvtf s31, wzr, #20
+        ucvtf s14, w0, #32
+// CHECK: ucvtf   s23, w19, #1            // encoding: [0x77,0xfe,0x03,0x1e]
+// CHECK: ucvtf   s31, wzr, #20           // encoding: [0xff,0xb3,0x03,0x1e]
+// CHECK: ucvtf   s14, w0, #32            // encoding: [0x0e,0x80,0x03,0x1e]
+
+        ucvtf s23, x19, #1
+        ucvtf s31, xzr, #20
+        ucvtf s14, x0, #64
+// CHECK: ucvtf   s23, x19, #1            // encoding: [0x77,0xfe,0x03,0x9e]
+// CHECK: ucvtf   s31, xzr, #20           // encoding: [0xff,0xb3,0x03,0x9e]
+// CHECK: ucvtf   s14, x0, #64            // encoding: [0x0e,0x00,0x03,0x9e]
+
+        ucvtf d23, w19, #1
+        ucvtf d31, wzr, #20
+        ucvtf d14, w0, #32
+// CHECK: ucvtf   d23, w19, #1            // encoding: [0x77,0xfe,0x43,0x1e]
+// CHECK: ucvtf   d31, wzr, #20           // encoding: [0xff,0xb3,0x43,0x1e]
+// CHECK: ucvtf   d14, w0, #32            // encoding: [0x0e,0x80,0x43,0x1e]
+
+        ucvtf d23, x19, #1
+        ucvtf d31, xzr, #20
+        ucvtf d14, x0, #64
+// CHECK: ucvtf   d23, x19, #1            // encoding: [0x77,0xfe,0x43,0x9e]
+// CHECK: ucvtf   d31, xzr, #20           // encoding: [0xff,0xb3,0x43,0x9e]
+// CHECK: ucvtf   d14, x0, #64            // encoding: [0x0e,0x00,0x43,0x9e]
+
+//------------------------------------------------------------------------------
+// Floating-point <-> integer conversion
+//------------------------------------------------------------------------------
+        fcvtns w3, s31
+        fcvtns xzr, s12
+        fcvtnu wzr, s12
+        fcvtnu x0, s0
+// CHECK: fcvtns   w3, s31                    // encoding: [0xe3,0x03,0x20,0x1e]
+// CHECK: fcvtns   xzr, s12                   // encoding: [0x9f,0x01,0x20,0x9e]
+// CHECK: fcvtnu   wzr, s12                   // encoding: [0x9f,0x01,0x21,0x1e]
+// CHECK: fcvtnu   x0, s0                     // encoding: [0x00,0x00,0x21,0x9e]
+
+        fcvtps wzr, s9
+        fcvtps x12, s20
+        fcvtpu w30, s23
+        fcvtpu x29, s3
+// CHECK: fcvtps   wzr, s9                    // encoding: [0x3f,0x01,0x28,0x1e]
+// CHECK: fcvtps   x12, s20                   // encoding: [0x8c,0x02,0x28,0x9e]
+// CHECK: fcvtpu   w30, s23                   // encoding: [0xfe,0x02,0x29,0x1e]
+// CHECK: fcvtpu   x29, s3                    // encoding: [0x7d,0x00,0x29,0x9e]
+
+        fcvtms w2, s3
+        fcvtms x4, s5
+        fcvtmu w6, s7
+        fcvtmu x8, s9
+// CHECK: fcvtms   w2, s3                     // encoding: [0x62,0x00,0x30,0x1e]
+// CHECK: fcvtms   x4, s5                     // encoding: [0xa4,0x00,0x30,0x9e]
+// CHECK: fcvtmu   w6, s7                     // encoding: [0xe6,0x00,0x31,0x1e]
+// CHECK: fcvtmu   x8, s9                     // encoding: [0x28,0x01,0x31,0x9e]
+
+        fcvtzs w10, s11
+        fcvtzs x12, s13
+        fcvtzu w14, s15
+        fcvtzu x15, s16
+// CHECK: fcvtzs   w10, s11                   // encoding: [0x6a,0x01,0x38,0x1e]
+// CHECK: fcvtzs   x12, s13                   // encoding: [0xac,0x01,0x38,0x9e]
+// CHECK: fcvtzu   w14, s15                   // encoding: [0xee,0x01,0x39,0x1e]
+// CHECK: fcvtzu   x15, s16                   // encoding: [0x0f,0x02,0x39,0x9e]
+
+        scvtf s17, w18
+        scvtf s19, x20
+        ucvtf s21, w22
+        scvtf s23, x24
+// CHECK: scvtf    s17, w18                   // encoding: [0x51,0x02,0x22,0x1e]
+// CHECK: scvtf    s19, x20                   // encoding: [0x93,0x02,0x22,0x9e]
+// CHECK: ucvtf    s21, w22                   // encoding: [0xd5,0x02,0x23,0x1e]
+// CHECK: scvtf    s23, x24                   // encoding: [0x17,0x03,0x22,0x9e]
+
+        fcvtas w25, s26
+        fcvtas x27, s28
+        fcvtau w29, s30
+        fcvtau xzr, s0
+// CHECK: fcvtas   w25, s26                   // encoding: [0x59,0x03,0x24,0x1e]
+// CHECK: fcvtas   x27, s28                   // encoding: [0x9b,0x03,0x24,0x9e]
+// CHECK: fcvtau   w29, s30                   // encoding: [0xdd,0x03,0x25,0x1e]
+// CHECK: fcvtau   xzr, s0                    // encoding: [0x1f,0x00,0x25,0x9e]
+
+        fcvtns w3, d31
+        fcvtns xzr, d12
+        fcvtnu wzr, d12
+        fcvtnu x0, d0
+// CHECK: fcvtns   w3, d31                    // encoding: [0xe3,0x03,0x60,0x1e]
+// CHECK: fcvtns   xzr, d12                   // encoding: [0x9f,0x01,0x60,0x9e]
+// CHECK: fcvtnu   wzr, d12                   // encoding: [0x9f,0x01,0x61,0x1e]
+// CHECK: fcvtnu   x0, d0                     // encoding: [0x00,0x00,0x61,0x9e]
+
+        fcvtps wzr, d9
+        fcvtps x12, d20
+        fcvtpu w30, d23
+        fcvtpu x29, d3
+// CHECK: fcvtps   wzr, d9                    // encoding: [0x3f,0x01,0x68,0x1e]
+// CHECK: fcvtps   x12, d20                   // encoding: [0x8c,0x02,0x68,0x9e]
+// CHECK: fcvtpu   w30, d23                   // encoding: [0xfe,0x02,0x69,0x1e]
+// CHECK: fcvtpu   x29, d3                    // encoding: [0x7d,0x00,0x69,0x9e]
+
+        fcvtms w2, d3
+        fcvtms x4, d5
+        fcvtmu w6, d7
+        fcvtmu x8, d9
+// CHECK: fcvtms   w2, d3                     // encoding: [0x62,0x00,0x70,0x1e]
+// CHECK: fcvtms   x4, d5                     // encoding: [0xa4,0x00,0x70,0x9e]
+// CHECK: fcvtmu   w6, d7                     // encoding: [0xe6,0x00,0x71,0x1e]
+// CHECK: fcvtmu   x8, d9                     // encoding: [0x28,0x01,0x71,0x9e]
+
+        fcvtzs w10, d11
+        fcvtzs x12, d13
+        fcvtzu w14, d15
+        fcvtzu x15, d16
+// CHECK: fcvtzs   w10, d11                   // encoding: [0x6a,0x01,0x78,0x1e]
+// CHECK: fcvtzs   x12, d13                   // encoding: [0xac,0x01,0x78,0x9e]
+// CHECK: fcvtzu   w14, d15                   // encoding: [0xee,0x01,0x79,0x1e]
+// CHECK: fcvtzu   x15, d16                   // encoding: [0x0f,0x02,0x79,0x9e]
+
+        scvtf d17, w18
+        scvtf d19, x20
+        ucvtf d21, w22
+        ucvtf d23, x24
+// CHECK: scvtf    d17, w18                   // encoding: [0x51,0x02,0x62,0x1e]
+// CHECK: scvtf    d19, x20                   // encoding: [0x93,0x02,0x62,0x9e]
+// CHECK: ucvtf    d21, w22                   // encoding: [0xd5,0x02,0x63,0x1e]
+// CHECK: ucvtf    d23, x24                   // encoding: [0x17,0x03,0x63,0x9e]
+
+        fcvtas w25, d26
+        fcvtas x27, d28
+        fcvtau w29, d30
+        fcvtau xzr, d0
+// CHECK: fcvtas   w25, d26                   // encoding: [0x59,0x03,0x64,0x1e]
+// CHECK: fcvtas   x27, d28                   // encoding: [0x9b,0x03,0x64,0x9e]
+// CHECK: fcvtau   w29, d30                   // encoding: [0xdd,0x03,0x65,0x1e]
+// CHECK: fcvtau   xzr, d0                    // encoding: [0x1f,0x00,0x65,0x9e]
+
+        fmov w3, s9
+        fmov s9, w3
+// CHECK: fmov     w3, s9                     // encoding: [0x23,0x01,0x26,0x1e]
+// CHECK: fmov     s9, w3                     // encoding: [0x69,0x00,0x27,0x1e]
+
+        fmov x20, d31
+        fmov d1, x15
+// CHECK: fmov     x20, d31                   // encoding: [0xf4,0x03,0x66,0x9e]
+// CHECK: fmov     d1, x15                    // encoding: [0xe1,0x01,0x67,0x9e]
+
+        fmov x3, v12.d[1]
+        fmov v1.d[1], x19
+        fmov v3.2d[1], xzr
+// CHECK: fmov     x3, v12.d[1]               // encoding: [0x83,0x01,0xae,0x9e]
+// CHECK: fmov     v1.d[1], x19               // encoding: [0x61,0x02,0xaf,0x9e]
+// CHECK: fmov     v3.d[1], xzr               // encoding: [0xe3,0x03,0xaf,0x9e]
+
+//------------------------------------------------------------------------------
+// Floating-point immediate
+//------------------------------------------------------------------------------
+
+        fmov s2, #0.125
+        fmov s3, #1.0
+        fmov d30, #16.0
+// CHECK: fmov     s2, #0.12500000            // encoding: [0x02,0x10,0x28,0x1e]
+// CHECK: fmov     s3, #1.00000000            // encoding: [0x03,0x10,0x2e,0x1e]
+// CHECK: fmov     d30, #16.00000000          // encoding: [0x1e,0x10,0x66,0x1e]
+
+        fmov s4, #1.0625
+        fmov d10, #1.9375
+// CHECK: fmov     s4, #1.06250000            // encoding: [0x04,0x30,0x2e,0x1e]
+// CHECK: fmov     d10, #1.93750000           // encoding: [0x0a,0xf0,0x6f,0x1e]
+
+        fmov s12, #-1.0
+// CHECK: fmov     s12, #-1.00000000          // encoding: [0x0c,0x10,0x3e,0x1e]
+
+        fmov d16, #8.5
+// CHECK: fmov     d16, #8.50000000           // encoding: [0x10,0x30,0x64,0x1e]
+
+//------------------------------------------------------------------------------
+// Load-register (literal)
+//------------------------------------------------------------------------------
+        ldr w3, here
+        ldr x29, there
+        ldrsw xzr, everywhere
+// CHECK: ldr     w3, here                // encoding: [0x03'A',A,A,0x18'A']
+// CHECK:                                 //   fixup A - offset: 0, value: here, kind: fixup_a64_ld_prel
+// CHECK: ldr     x29, there              // encoding: [0x1d'A',A,A,0x58'A']
+// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_a64_ld_prel
+// CHECK: ldrsw   xzr, everywhere         // encoding: [0x1f'A',A,A,0x98'A']
+// CHECK:                                 //   fixup A - offset: 0, value: everywhere, kind: fixup_a64_ld_prel
+
+        ldr s0, who_knows
+        ldr d0, i_dont
+        ldr q0, there_must_be_a_better_way
+// CHECK: ldr     s0, who_knows           // encoding: [A,A,A,0x1c'A']
+// CHECK:                                 //   fixup A - offset: 0, value: who_knows, kind: fixup_a64_ld_prel
+// CHECK: ldr     d0, i_dont              // encoding: [A,A,A,0x5c'A']
+// CHECK:                                 //   fixup A - offset: 0, value: i_dont, kind: fixup_a64_ld_prel
+// CHECK: ldr     q0, there_must_be_a_better_way // encoding: [A,A,A,0x9c'A']
+// CHECK:                                 //   fixup A - offset: 0, value: there_must_be_a_better_way, kind: fixup_a64_ld_prel
+
+        ldr w0, #1048572
+        ldr x10, #-1048576
+// CHECK: ldr     w0, #1048572            // encoding: [0xe0,0xff,0x7f,0x18]
+// CHECK: ldr     x10, #-1048576          // encoding: [0x0a,0x00,0x80,0x58]
+
+        prfm pldl1strm, nowhere
+        prfm #22, somewhere
+// CHECK: prfm    pldl1strm, nowhere      // encoding: [0x01'A',A,A,0xd8'A']
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_ld_prel
+// CHECK: prfm    #22, somewhere          // encoding: [0x16'A',A,A,0xd8'A']
+// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_ld_prel
+
+//------------------------------------------------------------------------------
+// Floating-point immediate
+//------------------------------------------------------------------------------
+
+        fmov s2, #0.125
+        fmov s3, #1.0
+        fmov d30, #16.0
+// CHECK: fmov     s2, #0.12500000            // encoding: [0x02,0x10,0x28,0x1e]
+// CHECK: fmov     s3, #1.00000000            // encoding: [0x03,0x10,0x2e,0x1e]
+// CHECK: fmov     d30, #16.00000000          // encoding: [0x1e,0x10,0x66,0x1e]
+
+        fmov s4, #1.0625
+        fmov d10, #1.9375
+// CHECK: fmov     s4, #1.06250000            // encoding: [0x04,0x30,0x2e,0x1e]
+// CHECK: fmov     d10, #1.93750000           // encoding: [0x0a,0xf0,0x6f,0x1e]
+
+        fmov s12, #-1.0
+// CHECK: fmov     s12, #-1.00000000          // encoding: [0x0c,0x10,0x3e,0x1e]
+
+        fmov d16, #8.5
+// CHECK: fmov     d16, #8.50000000           // encoding: [0x10,0x30,0x64,0x1e]
+
+//------------------------------------------------------------------------------
+// Load/store exclusive
+//------------------------------------------------------------------------------
+
+        stxrb      w1, w2, [x3, #0]
+        stxrh      w2, w3, [x4]
+        stxr       wzr, w4, [sp]
+        stxr       w5, x6, [x7]
+// CHECK: stxrb    w1, w2, [x3]              // encoding: [0x62,0x7c,0x01,0x08]
+// CHECK: stxrh    w2, w3, [x4]              // encoding: [0x83,0x7c,0x02,0x48]
+// CHECK: stxr     wzr, w4, [sp]             // encoding: [0xe4,0x7f,0x1f,0x88]
+// CHECK: stxr     w5, x6, [x7]              // encoding: [0xe6,0x7c,0x05,0xc8]
+
+        ldxrb      w7, [x9]
+        ldxrh      wzr, [x10]
+        ldxr       w9, [sp]
+        ldxr       x10, [x11]
+// CHECK: ldxrb    w7, [x9]                  // encoding: [0x27,0x7d,0x5f,0x08]
+// CHECK: ldxrh    wzr, [x10]                // encoding: [0x5f,0x7d,0x5f,0x48]
+// CHECK: ldxr     w9, [sp]                  // encoding: [0xe9,0x7f,0x5f,0x88]
+// CHECK: ldxr     x10, [x11]                // encoding: [0x6a,0x7d,0x5f,0xc8]
+
+        stxp       w11, w12, w13, [x14]
+        stxp       wzr, x23, x14, [x15]
+// CHECK: stxp     w11, w12, w13, [x14]      // encoding: [0xcc,0x35,0x2b,0x88]
+// CHECK: stxp     wzr, x23, x14, [x15]      // encoding: [0xf7,0x39,0x3f,0xc8]
+
+        ldxp       w12, wzr, [sp]
+        ldxp       x13, x14, [x15]
+// CHECK: ldxp     w12, wzr, [sp]            // encoding: [0xec,0x7f,0x7f,0x88]
+// CHECK: ldxp     x13, x14, [x15]           // encoding: [0xed,0x39,0x7f,0xc8]
+
+        stlxrb     w14, w15, [x16]
+        stlxrh     w15, w16, [x17,#0]
+        stlxr      wzr, w17, [sp]
+        stlxr      w18, x19, [x20]
+// CHECK: stlxrb   w14, w15, [x16]           // encoding: [0x0f,0xfe,0x0e,0x08]
+// CHECK: stlxrh   w15, w16, [x17]           // encoding: [0x30,0xfe,0x0f,0x48]
+// CHECK: stlxr    wzr, w17, [sp]            // encoding: [0xf1,0xff,0x1f,0x88]
+// CHECK: stlxr    w18, x19, [x20]           // encoding: [0x93,0xfe,0x12,0xc8]
+
+        ldaxrb     w19, [x21]
+        ldaxrh     w20, [sp]
+        ldaxr      wzr, [x22]
+        ldaxr      x21, [x23]
+// CHECK: ldaxrb   w19, [x21]                // encoding: [0xb3,0xfe,0x5f,0x08]
+// CHECK: ldaxrh   w20, [sp]                 // encoding: [0xf4,0xff,0x5f,0x48]
+// CHECK: ldaxr    wzr, [x22]                // encoding: [0xdf,0xfe,0x5f,0x88]
+// CHECK: ldaxr    x21, [x23]                // encoding: [0xf5,0xfe,0x5f,0xc8]
+
+        stlxp      wzr, w22, w23, [x24]
+        stlxp      w25, x26, x27, [sp]
+// CHECK: stlxp    wzr, w22, w23, [x24]      // encoding: [0x16,0xdf,0x3f,0x88]
+// CHECK: stlxp    w25, x26, x27, [sp]       // encoding: [0xfa,0xef,0x39,0xc8]
+
+        ldaxp      w26, wzr, [sp]
+        ldaxp      x27, x28, [x30]
+// CHECK: ldaxp    w26, wzr, [sp]            // encoding: [0xfa,0xff,0x7f,0x88]
+// CHECK: ldaxp    x27, x28, [x30]           // encoding: [0xdb,0xf3,0x7f,0xc8]
+
+        stlrb      w27, [sp]
+        stlrh      w28, [x0]
+        stlr       wzr, [x1]
+        stlr       x30, [x2]
+// CHECK: stlrb    w27, [sp]                 // encoding: [0xfb,0xff,0x9f,0x08]
+// CHECK: stlrh    w28, [x0]                 // encoding: [0x1c,0xfc,0x9f,0x48]
+// CHECK: stlr     wzr, [x1]                 // encoding: [0x3f,0xfc,0x9f,0x88]
+// CHECK: stlr     x30, [x2]                 // encoding: [0x5e,0xfc,0x9f,0xc8]
+
+        ldarb      w29, [sp]
+        ldarh      w30, [x0]
+        ldar       wzr, [x1]
+        ldar       x1, [x2]
+// CHECK: ldarb    w29, [sp]                 // encoding: [0xfd,0xff,0xdf,0x08]
+// CHECK: ldarh    w30, [x0]                 // encoding: [0x1e,0xfc,0xdf,0x48]
+// CHECK: ldar     wzr, [x1]                 // encoding: [0x3f,0xfc,0xdf,0x88]
+// CHECK: ldar     x1, [x2]                  // encoding: [0x41,0xfc,0xdf,0xc8]
+
+        stlxp      wzr, w22, w23, [x24,#0]
+// CHECK: stlxp    wzr, w22, w23, [x24]      // encoding: [0x16,0xdf,0x3f,0x88]
+
+//------------------------------------------------------------------------------
+// Load/store (unaligned immediate)
+//------------------------------------------------------------------------------
+
+        sturb w9, [sp, #0]
+        sturh wzr, [x12, #255]
+        stur w16, [x0, #-256]
+        stur x28, [x14, #1]
+// CHECK: sturb    w9, [sp]                   // encoding: [0xe9,0x03,0x00,0x38]
+// CHECK: sturh    wzr, [x12, #255]           // encoding: [0x9f,0xf1,0x0f,0x78]
+// CHECK: stur     w16, [x0, #-256]           // encoding: [0x10,0x00,0x10,0xb8]
+// CHECK: stur     x28, [x14, #1]             // encoding: [0xdc,0x11,0x00,0xf8]
+
+        ldurb w1, [x20, #255]
+        ldurh w20, [x1, #255]
+        ldur w12, [sp, #255]
+        ldur xzr, [x12, #255]
+// CHECK: ldurb    w1, [x20, #255]            // encoding: [0x81,0xf2,0x4f,0x38]
+// CHECK: ldurh    w20, [x1, #255]            // encoding: [0x34,0xf0,0x4f,0x78]
+// CHECK: ldur     w12, [sp, #255]            // encoding: [0xec,0xf3,0x4f,0xb8]
+// CHECK: ldur     xzr, [x12, #255]           // encoding: [0x9f,0xf1,0x4f,0xf8]
+
+        ldursb x9, [x7, #-256]
+        ldursh x17, [x19, #-256]
+        ldursw x20, [x15, #-256]
+        ldursw x13, [x2]
+        prfum pldl2keep, [sp, #-256]
+        ldursb w19, [x1, #-256]
+        ldursh w15, [x21, #-256]
+// CHECK: ldursb   x9, [x7, #-256]            // encoding: [0xe9,0x00,0x90,0x38]
+// CHECK: ldursh   x17, [x19, #-256]          // encoding: [0x71,0x02,0x90,0x78]
+// CHECK: ldursw   x20, [x15, #-256]          // encoding: [0xf4,0x01,0x90,0xb8]
+// CHECK: ldursw   x13, [x2]                  // encoding: [0x4d,0x00,0x80,0xb8]
+// CHECK: prfum    pldl2keep, [sp, #-256]     // encoding: [0xe2,0x03,0x90,0xf8]
+// CHECK: ldursb   w19, [x1, #-256]           // encoding: [0x33,0x00,0xd0,0x38]
+// CHECK: ldursh   w15, [x21, #-256]          // encoding: [0xaf,0x02,0xd0,0x78]
+
+        stur b0, [sp, #1]
+        stur h12, [x12, #-1]
+        stur s15, [x0, #255]
+        stur d31, [x5, #25]
+        stur q9, [x5]
+// CHECK: stur     b0, [sp, #1]               // encoding: [0xe0,0x13,0x00,0x3c]
+// CHECK: stur     h12, [x12, #-1]            // encoding: [0x8c,0xf1,0x1f,0x7c]
+// CHECK: stur     s15, [x0, #255]            // encoding: [0x0f,0xf0,0x0f,0xbc]
+// CHECK: stur     d31, [x5, #25]             // encoding: [0xbf,0x90,0x01,0xfc]
+// CHECK: stur     q9, [x5]                   // encoding: [0xa9,0x00,0x80,0x3c]
+
+        ldur b3, [sp]
+        ldur h5, [x4, #-256]
+        ldur s7, [x12, #-1]
+        ldur d11, [x19, #4]
+        ldur q13, [x1, #2]
+// CHECK: ldur     b3, [sp]                   // encoding: [0xe3,0x03,0x40,0x3c]
+// CHECK: ldur     h5, [x4, #-256]            // encoding: [0x85,0x00,0x50,0x7c]
+// CHECK: ldur     s7, [x12, #-1]             // encoding: [0x87,0xf1,0x5f,0xbc]
+// CHECK: ldur     d11, [x19, #4]             // encoding: [0x6b,0x42,0x40,0xfc]
+// CHECK: ldur     q13, [x1, #2]              // encoding: [0x2d,0x20,0xc0,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store (unsigned immediate)
+//------------------------------------------------------------------------------
+
+//// Basic addressing mode limits: 8 byte access
+        ldr x0, [x0]
+        ldr x4, [x29, #0]
+        ldr x30, [x12, #32760]
+        ldr x20, [sp, #8]
+// CHECK: ldr      x0, [x0]                   // encoding: [0x00,0x00,0x40,0xf9]
+// CHECK: ldr      x4, [x29]                  // encoding: [0xa4,0x03,0x40,0xf9]
+// CHECK: ldr      x30, [x12, #32760]         // encoding: [0x9e,0xfd,0x7f,0xf9]
+// CHECK: ldr      x20, [sp, #8]              // encoding: [0xf4,0x07,0x40,0xf9]
+
+//// Rt treats 31 as zero-register
+        ldr xzr, [sp]
+// CHECK: ldr      xzr, [sp]                  // encoding: [0xff,0x03,0x40,0xf9]
+
+        //// 4-byte load, check still 64-bit address, limits
+        ldr w2, [sp]
+        ldr w17, [sp, #16380]
+        ldr w13, [x2, #4]
+// CHECK: ldr      w2, [sp]                   // encoding: [0xe2,0x03,0x40,0xb9]
+// CHECK: ldr      w17, [sp, #16380]          // encoding: [0xf1,0xff,0x7f,0xb9]
+// CHECK: ldr      w13, [x2, #4]              // encoding: [0x4d,0x04,0x40,0xb9]
+
+//// Signed 4-byte load. Limits.
+        ldrsw x2, [x5,#4]
+        ldrsw x23, [sp, #16380]
+// CHECK: ldrsw    x2, [x5, #4]               // encoding: [0xa2,0x04,0x80,0xb9]
+// CHECK: ldrsw    x23, [sp, #16380]          // encoding: [0xf7,0xff,0xbf,0xb9]
+
+////  2-byte loads
+        ldrh w2, [x4]
+        ldrsh w23, [x6, #8190]
+        ldrsh wzr, [sp, #2]
+        ldrsh x29, [x2, #2]
+// CHECK: ldrh     w2, [x4]                   // encoding: [0x82,0x00,0x40,0x79]
+// CHECK: ldrsh    w23, [x6, #8190]           // encoding: [0xd7,0xfc,0xff,0x79]
+// CHECK: ldrsh    wzr, [sp, #2]              // encoding: [0xff,0x07,0xc0,0x79]
+// CHECK: ldrsh    x29, [x2, #2]              // encoding: [0x5d,0x04,0x80,0x79]
+
+//// 1-byte loads
+        ldrb w26, [x3, #121]
+        ldrb w12, [x2, #0]
+        ldrsb w27, [sp, #4095]
+        ldrsb xzr, [x15]
+// CHECK: ldrb     w26, [x3, #121]            // encoding: [0x7a,0xe4,0x41,0x39]
+// CHECK: ldrb     w12, [x2]                  // encoding: [0x4c,0x00,0x40,0x39]
+// CHECK: ldrsb    w27, [sp, #4095]           // encoding: [0xfb,0xff,0xff,0x39]
+// CHECK: ldrsb    xzr, [x15]                 // encoding: [0xff,0x01,0x80,0x39]
+
+//// Stores
+        str x30, [sp]
+        str w20, [x4, #16380]
+        strh w20, [x10, #14]
+        strh w17, [sp, #8190]
+        strb w23, [x3, #4095]
+        strb wzr, [x2]
+// CHECK: str      x30, [sp]                  // encoding: [0xfe,0x03,0x00,0xf9]
+// CHECK: str      w20, [x4, #16380]          // encoding: [0x94,0xfc,0x3f,0xb9]
+// CHECK: strh     w20, [x10, #14]            // encoding: [0x54,0x1d,0x00,0x79]
+// CHECK: strh     w17, [sp, #8190]           // encoding: [0xf1,0xff,0x3f,0x79]
+// CHECK: strb     w23, [x3, #4095]           // encoding: [0x77,0xfc,0x3f,0x39]
+// CHECK: strb     wzr, [x2]                  // encoding: [0x5f,0x00,0x00,0x39]
+
+//// Relocations
+        str x15, [x5, #:lo12:sym]
+        ldrb w15, [x5, #:lo12:sym]
+        ldrsh x15, [x5, #:lo12:sym]
+        ldrsw x15, [x5, #:lo12:sym]
+        ldr x15, [x5, #:lo12:sym]
+        ldr q3, [x2, #:lo12:sym]
+// CHECK: str     x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,A,0xf9'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12
+// CHECK: ldrb    w15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x40'A',0x39'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst8_lo12
+// CHECK: ldrsh   x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x80'A',0x79'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst16_lo12
+// CHECK: ldrsw   x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x80'A',0xb9'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst32_lo12
+// CHECK: ldr     x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x40'A',0xf9'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12
+// CHECK: ldr     q3, [x2, #:lo12:sym]    // encoding: [0x43'A',A,0xc0'A',0x3d'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst128_lo12
+
+        prfm pldl1keep, [sp, #8]
+        prfm pldl1strm, [x3]
+        prfm pldl2keep, [x5,#16]
+        prfm pldl2strm, [x2]
+        prfm pldl3keep, [x5]
+        prfm pldl3strm, [x6]
+        prfm pstl1keep, [sp, #8]
+        prfm pstl1strm, [x3]
+        prfm pstl2keep, [x5,#16]
+        prfm pstl2strm, [x2]
+        prfm pstl3keep, [x5]
+        prfm pstl3strm, [x6]
+        prfm #15, [sp]
+// CHECK: prfm    pldl1keep, [sp, #8]     // encoding: [0xe0,0x07,0x80,0xf9]
+// CHECK: prfm    pldl1strm, [x3, #0]     // encoding: [0x61,0x00,0x80,0xf9]
+// CHECK: prfm    pldl2keep, [x5, #16]    // encoding: [0xa2,0x08,0x80,0xf9]
+// CHECK: prfm    pldl2strm, [x2, #0]     // encoding: [0x43,0x00,0x80,0xf9]
+// CHECK: prfm    pldl3keep, [x5, #0]     // encoding: [0xa4,0x00,0x80,0xf9]
+// CHECK: prfm    pldl3strm, [x6, #0]     // encoding: [0xc5,0x00,0x80,0xf9]
+// CHECK: prfm    pstl1keep, [sp, #8]     // encoding: [0xf0,0x07,0x80,0xf9]
+// CHECK: prfm    pstl1strm, [x3, #0]     // encoding: [0x71,0x00,0x80,0xf9]
+// CHECK: prfm    pstl2keep, [x5, #16]    // encoding: [0xb2,0x08,0x80,0xf9]
+// CHECK: prfm    pstl2strm, [x2, #0]     // encoding: [0x53,0x00,0x80,0xf9]
+// CHECK: prfm    pstl3keep, [x5, #0]     // encoding: [0xb4,0x00,0x80,0xf9]
+// CHECK: prfm    pstl3strm, [x6, #0]     // encoding: [0xd5,0x00,0x80,0xf9]
+// CHECK: prfm    #15, [sp, #0]           // encoding: [0xef,0x03,0x80,0xf9]
+
+//// Floating-point versions
+
+        ldr b31, [sp, #4095]
+        ldr h20, [x2, #8190]
+        ldr s10, [x19, #16380]
+        ldr d3, [x10, #32760]
+        str q12, [sp, #65520]
+// CHECK: ldr      b31, [sp, #4095]           // encoding: [0xff,0xff,0x7f,0x3d]
+// CHECK: ldr      h20, [x2, #8190]           // encoding: [0x54,0xfc,0x7f,0x7d]
+// CHECK: ldr      s10, [x19, #16380]         // encoding: [0x6a,0xfe,0x7f,0xbd]
+// CHECK: ldr      d3, [x10, #32760]          // encoding: [0x43,0xfd,0x7f,0xfd]
+// CHECK: str      q12, [sp, #65520]          // encoding: [0xec,0xff,0xbf,0x3d]
+
+//------------------------------------------------------------------------------
+// Load/store register (register offset)
+//------------------------------------------------------------------------------
+
+        ldrb w3, [sp, x5]
+        ldrb w9, [x27, x6, lsl #0]
+        ldrsb w10, [x30, x7]
+        ldrb w11, [x29, x3, sxtx]
+        strb w12, [x28, xzr, sxtx #0]
+        ldrb w14, [x26, w6, uxtw]
+        ldrsb w15, [x25, w7, uxtw #0]
+        ldrb w17, [x23, w9, sxtw]
+        ldrsb x18, [x22, w10, sxtw #0]
+// CHECK: ldrb     w3, [sp, x5]               // encoding: [0xe3,0x6b,0x65,0x38]
+// CHECK: ldrb     w9, [x27, x6, lsl #0]      // encoding: [0x69,0x7b,0x66,0x38]
+// CHECK: ldrsb    w10, [x30, x7]             // encoding: [0xca,0x6b,0xe7,0x38]
+// CHECK: ldrb     w11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x63,0x38]
+// CHECK: strb     w12, [x28, xzr, sxtx #0]   // encoding: [0x8c,0xfb,0x3f,0x38]
+// CHECK: ldrb     w14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0x66,0x38]
+// CHECK: ldrsb    w15, [x25, w7, uxtw #0]    // encoding: [0x2f,0x5b,0xe7,0x38]
+// CHECK: ldrb     w17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0x69,0x38]
+// CHECK: ldrsb    x18, [x22, w10, sxtw #0]   // encoding: [0xd2,0xda,0xaa,0x38]
+
+        ldrsh w3, [sp, x5]
+        ldrsh w9, [x27, x6, lsl #0]
+        ldrh w10, [x30, x7, lsl #1]
+        strh w11, [x29, x3, sxtx]
+        ldrh w12, [x28, xzr, sxtx #0]
+        ldrsh x13, [x27, x5, sxtx #1]
+        ldrh w14, [x26, w6, uxtw]
+        ldrh w15, [x25, w7, uxtw #0]
+        ldrsh w16, [x24, w8, uxtw #1]
+        ldrh w17, [x23, w9, sxtw]
+        ldrh w18, [x22, w10, sxtw #0]
+        strh w19, [x21, wzr, sxtw #1]
+// CHECK: ldrsh    w3, [sp, x5]               // encoding: [0xe3,0x6b,0xe5,0x78]
+// CHECK: ldrsh    w9, [x27, x6]              // encoding: [0x69,0x6b,0xe6,0x78]
+// CHECK: ldrh     w10, [x30, x7, lsl #1]     // encoding: [0xca,0x7b,0x67,0x78]
+// CHECK: strh     w11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x23,0x78]
+// CHECK: ldrh     w12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0x7f,0x78]
+// CHECK: ldrsh    x13, [x27, x5, sxtx #1]    // encoding: [0x6d,0xfb,0xa5,0x78]
+// CHECK: ldrh     w14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0x66,0x78]
+// CHECK: ldrh     w15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0x67,0x78]
+// CHECK: ldrsh    w16, [x24, w8, uxtw #1]    // encoding: [0x10,0x5b,0xe8,0x78]
+// CHECK: ldrh     w17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0x69,0x78]
+// CHECK: ldrh     w18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0x6a,0x78]
+// CHECK: strh     w19, [x21, wzr, sxtw #1]   // encoding: [0xb3,0xda,0x3f,0x78]
+
+        ldr w3, [sp, x5]
+        ldr s9, [x27, x6, lsl #0]
+        ldr w10, [x30, x7, lsl #2]
+        ldr w11, [x29, x3, sxtx]
+        str s12, [x28, xzr, sxtx #0]
+        str w13, [x27, x5, sxtx #2]
+        str w14, [x26, w6, uxtw]
+        ldr w15, [x25, w7, uxtw #0]
+        ldr w16, [x24, w8, uxtw #2]
+        ldrsw x17, [x23, w9, sxtw]
+        ldr w18, [x22, w10, sxtw #0]
+        ldrsw x19, [x21, wzr, sxtw #2]
+// CHECK: ldr      w3, [sp, x5]               // encoding: [0xe3,0x6b,0x65,0xb8]
+// CHECK: ldr      s9, [x27, x6]              // encoding: [0x69,0x6b,0x66,0xbc]
+// CHECK: ldr      w10, [x30, x7, lsl #2]     // encoding: [0xca,0x7b,0x67,0xb8]
+// CHECK: ldr      w11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x63,0xb8]
+// CHECK: str      s12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0x3f,0xbc]
+// CHECK: str      w13, [x27, x5, sxtx #2]    // encoding: [0x6d,0xfb,0x25,0xb8]
+// CHECK: str      w14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0x26,0xb8]
+// CHECK: ldr      w15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0x67,0xb8]
+// CHECK: ldr      w16, [x24, w8, uxtw #2]    // encoding: [0x10,0x5b,0x68,0xb8]
+// CHECK: ldrsw    x17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0xa9,0xb8]
+// CHECK: ldr      w18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0x6a,0xb8]
+// CHECK: ldrsw    x19, [x21, wzr, sxtw #2]   // encoding: [0xb3,0xda,0xbf,0xb8]
+
+        ldr x3, [sp, x5]
+        str x9, [x27, x6, lsl #0]
+        ldr d10, [x30, x7, lsl #3]
+        str x11, [x29, x3, sxtx]
+        ldr x12, [x28, xzr, sxtx #0]
+        ldr x13, [x27, x5, sxtx #3]
+        prfm pldl1keep, [x26, w6, uxtw]
+        ldr x15, [x25, w7, uxtw #0]
+        ldr x16, [x24, w8, uxtw #3]
+        ldr x17, [x23, w9, sxtw]
+        ldr x18, [x22, w10, sxtw #0]
+        str d19, [x21, wzr, sxtw #3]
+        prfm #6, [x0, x5]
+// CHECK: ldr      x3, [sp, x5]               // encoding: [0xe3,0x6b,0x65,0xf8]
+// CHECK: str      x9, [x27, x6]              // encoding: [0x69,0x6b,0x26,0xf8]
+// CHECK: ldr      d10, [x30, x7, lsl #3]     // encoding: [0xca,0x7b,0x67,0xfc]
+// CHECK: str      x11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x23,0xf8]
+// CHECK: ldr      x12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0x7f,0xf8]
+// CHECK: ldr      x13, [x27, x5, sxtx #3]    // encoding: [0x6d,0xfb,0x65,0xf8]
+// CHECK: prfm     pldl1keep, [x26, w6, uxtw] // encoding: [0x40,0x4b,0xa6,0xf8]
+// CHECK: ldr      x15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0x67,0xf8]
+// CHECK: ldr      x16, [x24, w8, uxtw #3]    // encoding: [0x10,0x5b,0x68,0xf8]
+// CHECK: ldr      x17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0x69,0xf8]
+// CHECK: ldr      x18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0x6a,0xf8]
+// CHECK: str      d19, [x21, wzr, sxtw #3]   // encoding: [0xb3,0xda,0x3f,0xfc]
+// CHECK: prfm     #6, [x0, x5, lsl #0]       // encoding: [0x06,0x68,0xa5,0xf8]
+
+        ldr q3, [sp, x5]
+        ldr q9, [x27, x6, lsl #0]
+        ldr q10, [x30, x7, lsl #4]
+        str q11, [x29, x3, sxtx]
+        str q12, [x28, xzr, sxtx #0]
+        str q13, [x27, x5, sxtx #4]
+        ldr q14, [x26, w6, uxtw]
+        ldr q15, [x25, w7, uxtw #0]
+        ldr q16, [x24, w8, uxtw #4]
+        ldr q17, [x23, w9, sxtw]
+        str q18, [x22, w10, sxtw #0]
+        ldr q19, [x21, wzr, sxtw #4]
+// CHECK: ldr      q3, [sp, x5]               // encoding: [0xe3,0x6b,0xe5,0x3c]
+// CHECK: ldr      q9, [x27, x6]              // encoding: [0x69,0x6b,0xe6,0x3c]
+// CHECK: ldr      q10, [x30, x7, lsl #4]     // encoding: [0xca,0x7b,0xe7,0x3c]
+// CHECK: str      q11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0xa3,0x3c]
+// CHECK: str      q12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0xbf,0x3c]
+// CHECK: str      q13, [x27, x5, sxtx #4]    // encoding: [0x6d,0xfb,0xa5,0x3c]
+// CHECK: ldr      q14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0xe6,0x3c]
+// CHECK: ldr      q15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0xe7,0x3c]
+// CHECK: ldr      q16, [x24, w8, uxtw #4]    // encoding: [0x10,0x5b,0xe8,0x3c]
+// CHECK: ldr      q17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0xe9,0x3c]
+// CHECK: str      q18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0xaa,0x3c]
+// CHECK: ldr      q19, [x21, wzr, sxtw #4]   // encoding: [0xb3,0xda,0xff,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store register (immediate post-indexed)
+//------------------------------------------------------------------------------
+
+        strb w9, [x2], #255
+        strb w10, [x3], #1
+        strb w10, [x3], #-256
+        strh w9, [x2], #255
+        strh w9, [x2], #1
+        strh w10, [x3], #-256
+// CHECK: strb     w9, [x2], #255             // encoding: [0x49,0xf4,0x0f,0x38]
+// CHECK: strb     w10, [x3], #1              // encoding: [0x6a,0x14,0x00,0x38]
+// CHECK: strb     w10, [x3], #-256           // encoding: [0x6a,0x04,0x10,0x38]
+// CHECK: strh     w9, [x2], #255             // encoding: [0x49,0xf4,0x0f,0x78]
+// CHECK: strh     w9, [x2], #1               // encoding: [0x49,0x14,0x00,0x78]
+// CHECK: strh     w10, [x3], #-256           // encoding: [0x6a,0x04,0x10,0x78]
+
+        str w19, [sp], #255
+        str w20, [x30], #1
+        str w21, [x12], #-256
+        str xzr, [x9], #255
+        str x2, [x3], #1
+        str x19, [x12], #-256
+// CHECK: str      w19, [sp], #255            // encoding: [0xf3,0xf7,0x0f,0xb8]
+// CHECK: str      w20, [x30], #1             // encoding: [0xd4,0x17,0x00,0xb8]
+// CHECK: str      w21, [x12], #-256          // encoding: [0x95,0x05,0x10,0xb8]
+// CHECK: str      xzr, [x9], #255            // encoding: [0x3f,0xf5,0x0f,0xf8]
+// CHECK: str      x2, [x3], #1               // encoding: [0x62,0x14,0x00,0xf8]
+// CHECK: str      x19, [x12], #-256          // encoding: [0x93,0x05,0x10,0xf8]
+
+        ldrb w9, [x2], #255
+        ldrb w10, [x3], #1
+        ldrb w10, [x3], #-256
+        ldrh w9, [x2], #255
+        ldrh w9, [x2], #1
+        ldrh w10, [x3], #-256
+// CHECK: ldrb     w9, [x2], #255             // encoding: [0x49,0xf4,0x4f,0x38]
+// CHECK: ldrb     w10, [x3], #1              // encoding: [0x6a,0x14,0x40,0x38]
+// CHECK: ldrb     w10, [x3], #-256           // encoding: [0x6a,0x04,0x50,0x38]
+// CHECK: ldrh     w9, [x2], #255             // encoding: [0x49,0xf4,0x4f,0x78]
+// CHECK: ldrh     w9, [x2], #1               // encoding: [0x49,0x14,0x40,0x78]
+// CHECK: ldrh     w10, [x3], #-256           // encoding: [0x6a,0x04,0x50,0x78]
+
+        ldr w19, [sp], #255
+        ldr w20, [x30], #1
+        ldr w21, [x12], #-256
+        ldr xzr, [x9], #255
+        ldr x2, [x3], #1
+        ldr x19, [x12], #-256
+// CHECK: ldr      w19, [sp], #255            // encoding: [0xf3,0xf7,0x4f,0xb8]
+// CHECK: ldr      w20, [x30], #1             // encoding: [0xd4,0x17,0x40,0xb8]
+// CHECK: ldr      w21, [x12], #-256          // encoding: [0x95,0x05,0x50,0xb8]
+// CHECK: ldr      xzr, [x9], #255            // encoding: [0x3f,0xf5,0x4f,0xf8]
+// CHECK: ldr      x2, [x3], #1               // encoding: [0x62,0x14,0x40,0xf8]
+// CHECK: ldr      x19, [x12], #-256          // encoding: [0x93,0x05,0x50,0xf8]
+
+        ldrsb xzr, [x9], #255
+        ldrsb x2, [x3], #1
+        ldrsb x19, [x12], #-256
+        ldrsh xzr, [x9], #255
+        ldrsh x2, [x3], #1
+        ldrsh x19, [x12], #-256
+        ldrsw xzr, [x9], #255
+        ldrsw x2, [x3], #1
+        ldrsw x19, [x12], #-256
+// CHECK: ldrsb    xzr, [x9], #255            // encoding: [0x3f,0xf5,0x8f,0x38]
+// CHECK: ldrsb    x2, [x3], #1               // encoding: [0x62,0x14,0x80,0x38]
+// CHECK: ldrsb    x19, [x12], #-256          // encoding: [0x93,0x05,0x90,0x38]
+// CHECK: ldrsh    xzr, [x9], #255            // encoding: [0x3f,0xf5,0x8f,0x78]
+// CHECK: ldrsh    x2, [x3], #1               // encoding: [0x62,0x14,0x80,0x78]
+// CHECK: ldrsh    x19, [x12], #-256          // encoding: [0x93,0x05,0x90,0x78]
+// CHECK: ldrsw    xzr, [x9], #255            // encoding: [0x3f,0xf5,0x8f,0xb8]
+// CHECK: ldrsw    x2, [x3], #1               // encoding: [0x62,0x14,0x80,0xb8]
+// CHECK: ldrsw    x19, [x12], #-256          // encoding: [0x93,0x05,0x90,0xb8]
+
+        ldrsb wzr, [x9], #255
+        ldrsb w2, [x3], #1
+        ldrsb w19, [x12], #-256
+        ldrsh wzr, [x9], #255
+        ldrsh w2, [x3], #1
+        ldrsh w19, [x12], #-256
+// CHECK: ldrsb    wzr, [x9], #255            // encoding: [0x3f,0xf5,0xcf,0x38]
+// CHECK: ldrsb    w2, [x3], #1               // encoding: [0x62,0x14,0xc0,0x38]
+// CHECK: ldrsb    w19, [x12], #-256          // encoding: [0x93,0x05,0xd0,0x38]
+// CHECK: ldrsh    wzr, [x9], #255            // encoding: [0x3f,0xf5,0xcf,0x78]
+// CHECK: ldrsh    w2, [x3], #1               // encoding: [0x62,0x14,0xc0,0x78]
+// CHECK: ldrsh    w19, [x12], #-256          // encoding: [0x93,0x05,0xd0,0x78]
+
+        str b0, [x0], #255
+        str b3, [x3], #1
+        str b5, [sp], #-256
+        str h10, [x10], #255
+        str h13, [x23], #1
+        str h15, [sp], #-256
+        str s20, [x20], #255
+        str s23, [x23], #1
+        str s25, [x0], #-256
+        str d20, [x20], #255
+        str d23, [x23], #1
+        str d25, [x0], #-256
+// CHECK: str      b0, [x0], #255             // encoding: [0x00,0xf4,0x0f,0x3c]
+// CHECK: str      b3, [x3], #1               // encoding: [0x63,0x14,0x00,0x3c]
+// CHECK: str      b5, [sp], #-256            // encoding: [0xe5,0x07,0x10,0x3c]
+// CHECK: str      h10, [x10], #255           // encoding: [0x4a,0xf5,0x0f,0x7c]
+// CHECK: str      h13, [x23], #1             // encoding: [0xed,0x16,0x00,0x7c]
+// CHECK: str      h15, [sp], #-256           // encoding: [0xef,0x07,0x10,0x7c]
+// CHECK: str      s20, [x20], #255           // encoding: [0x94,0xf6,0x0f,0xbc]
+// CHECK: str      s23, [x23], #1             // encoding: [0xf7,0x16,0x00,0xbc]
+// CHECK: str      s25, [x0], #-256           // encoding: [0x19,0x04,0x10,0xbc]
+// CHECK: str      d20, [x20], #255           // encoding: [0x94,0xf6,0x0f,0xfc]
+// CHECK: str      d23, [x23], #1             // encoding: [0xf7,0x16,0x00,0xfc]
+// CHECK: str      d25, [x0], #-256           // encoding: [0x19,0x04,0x10,0xfc]
+
+        ldr b0, [x0], #255
+        ldr b3, [x3], #1
+        ldr b5, [sp], #-256
+        ldr h10, [x10], #255
+        ldr h13, [x23], #1
+        ldr h15, [sp], #-256
+        ldr s20, [x20], #255
+        ldr s23, [x23], #1
+        ldr s25, [x0], #-256
+        ldr d20, [x20], #255
+        ldr d23, [x23], #1
+        ldr d25, [x0], #-256
+// CHECK: ldr      b0, [x0], #255             // encoding: [0x00,0xf4,0x4f,0x3c]
+// CHECK: ldr      b3, [x3], #1               // encoding: [0x63,0x14,0x40,0x3c]
+// CHECK: ldr      b5, [sp], #-256            // encoding: [0xe5,0x07,0x50,0x3c]
+// CHECK: ldr      h10, [x10], #255           // encoding: [0x4a,0xf5,0x4f,0x7c]
+// CHECK: ldr      h13, [x23], #1             // encoding: [0xed,0x16,0x40,0x7c]
+// CHECK: ldr      h15, [sp], #-256           // encoding: [0xef,0x07,0x50,0x7c]
+// CHECK: ldr      s20, [x20], #255           // encoding: [0x94,0xf6,0x4f,0xbc]
+// CHECK: ldr      s23, [x23], #1             // encoding: [0xf7,0x16,0x40,0xbc]
+// CHECK: ldr      s25, [x0], #-256           // encoding: [0x19,0x04,0x50,0xbc]
+// CHECK: ldr      d20, [x20], #255           // encoding: [0x94,0xf6,0x4f,0xfc]
+// CHECK: ldr      d23, [x23], #1             // encoding: [0xf7,0x16,0x40,0xfc]
+// CHECK: ldr      d25, [x0], #-256           // encoding: [0x19,0x04,0x50,0xfc]
+
+        ldr q20, [x1], #255
+        ldr q23, [x9], #1
+        ldr q25, [x20], #-256
+        str q10, [x1], #255
+        str q22, [sp], #1
+        str q21, [x20], #-256
+// CHECK: ldr      q20, [x1], #255            // encoding: [0x34,0xf4,0xcf,0x3c]
+// CHECK: ldr      q23, [x9], #1              // encoding: [0x37,0x15,0xc0,0x3c]
+// CHECK: ldr      q25, [x20], #-256          // encoding: [0x99,0x06,0xd0,0x3c]
+// CHECK: str      q10, [x1], #255            // encoding: [0x2a,0xf4,0x8f,0x3c]
+// CHECK: str      q22, [sp], #1              // encoding: [0xf6,0x17,0x80,0x3c]
+// CHECK: str      q21, [x20], #-256          // encoding: [0x95,0x06,0x90,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store register (immediate pre-indexed)
+//------------------------------------------------------------------------------
+
+        ldr x3, [x4, #0]!
+        ldr xzr, [sp, #0]!
+// CHECK: ldr      x3, [x4, #0]!              // encoding: [0x83,0x0c,0x40,0xf8]
+// CHECK: ldr      xzr, [sp, #0]!              // encoding: [0xff,0x0f,0x40,0xf8]
+
+        strb w9, [x2, #255]!
+        strb w10, [x3, #1]!
+        strb w10, [x3, #-256]!
+        strh w9, [x2, #255]!
+        strh w9, [x2, #1]!
+        strh w10, [x3, #-256]!
+// CHECK: strb     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x0f,0x38]
+// CHECK: strb     w10, [x3, #1]!             // encoding: [0x6a,0x1c,0x00,0x38]
+// CHECK: strb     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x10,0x38]
+// CHECK: strh     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x0f,0x78]
+// CHECK: strh     w9, [x2, #1]!              // encoding: [0x49,0x1c,0x00,0x78]
+// CHECK: strh     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x10,0x78]
+
+        str w19, [sp, #255]!
+        str w20, [x30, #1]!
+        str w21, [x12, #-256]!
+        str xzr, [x9, #255]!
+        str x2, [x3, #1]!
+        str x19, [x12, #-256]!
+// CHECK: str      w19, [sp, #255]!           // encoding: [0xf3,0xff,0x0f,0xb8]
+// CHECK: str      w20, [x30, #1]!            // encoding: [0xd4,0x1f,0x00,0xb8]
+// CHECK: str      w21, [x12, #-256]!         // encoding: [0x95,0x0d,0x10,0xb8]
+// CHECK: str      xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x0f,0xf8]
+// CHECK: str      x2, [x3, #1]!              // encoding: [0x62,0x1c,0x00,0xf8]
+// CHECK: str      x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x10,0xf8]
+
+        ldrb w9, [x2, #255]!
+        ldrb w10, [x3, #1]!
+        ldrb w10, [x3, #-256]!
+        ldrh w9, [x2, #255]!
+        ldrh w9, [x2, #1]!
+        ldrh w10, [x3, #-256]!
+// CHECK: ldrb     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x4f,0x38]
+// CHECK: ldrb     w10, [x3, #1]!             // encoding: [0x6a,0x1c,0x40,0x38]
+// CHECK: ldrb     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x50,0x38]
+// CHECK: ldrh     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x4f,0x78]
+// CHECK: ldrh     w9, [x2, #1]!              // encoding: [0x49,0x1c,0x40,0x78]
+// CHECK: ldrh     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x50,0x78]
+
+        ldr w19, [sp, #255]!
+        ldr w20, [x30, #1]!
+        ldr w21, [x12, #-256]!
+        ldr xzr, [x9, #255]!
+        ldr x2, [x3, #1]!
+        ldr x19, [x12, #-256]!
+// CHECK: ldr      w19, [sp, #255]!           // encoding: [0xf3,0xff,0x4f,0xb8]
+// CHECK: ldr      w20, [x30, #1]!            // encoding: [0xd4,0x1f,0x40,0xb8]
+// CHECK: ldr      w21, [x12, #-256]!         // encoding: [0x95,0x0d,0x50,0xb8]
+// CHECK: ldr      xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x4f,0xf8]
+// CHECK: ldr      x2, [x3, #1]!              // encoding: [0x62,0x1c,0x40,0xf8]
+// CHECK: ldr      x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x50,0xf8]
+
+        ldrsb xzr, [x9, #255]!
+        ldrsb x2, [x3, #1]!
+        ldrsb x19, [x12, #-256]!
+        ldrsh xzr, [x9, #255]!
+        ldrsh x2, [x3, #1]!
+        ldrsh x19, [x12, #-256]!
+        ldrsw xzr, [x9, #255]!
+        ldrsw x2, [x3, #1]!
+        ldrsw x19, [x12, #-256]!
+// CHECK: ldrsb    xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x8f,0x38]
+// CHECK: ldrsb    x2, [x3, #1]!              // encoding: [0x62,0x1c,0x80,0x38]
+// CHECK: ldrsb    x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x90,0x38]
+// CHECK: ldrsh    xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x8f,0x78]
+// CHECK: ldrsh    x2, [x3, #1]!              // encoding: [0x62,0x1c,0x80,0x78]
+// CHECK: ldrsh    x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x90,0x78]
+// CHECK: ldrsw    xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x8f,0xb8]
+// CHECK: ldrsw    x2, [x3, #1]!              // encoding: [0x62,0x1c,0x80,0xb8]
+// CHECK: ldrsw    x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x90,0xb8]
+
+        ldrsb wzr, [x9, #255]!
+        ldrsb w2, [x3, #1]!
+        ldrsb w19, [x12, #-256]!
+        ldrsh wzr, [x9, #255]!
+        ldrsh w2, [x3, #1]!
+        ldrsh w19, [x12, #-256]!
+// CHECK: ldrsb    wzr, [x9, #255]!           // encoding: [0x3f,0xfd,0xcf,0x38]
+// CHECK: ldrsb    w2, [x3, #1]!              // encoding: [0x62,0x1c,0xc0,0x38]
+// CHECK: ldrsb    w19, [x12, #-256]!         // encoding: [0x93,0x0d,0xd0,0x38]
+// CHECK: ldrsh    wzr, [x9, #255]!           // encoding: [0x3f,0xfd,0xcf,0x78]
+// CHECK: ldrsh    w2, [x3, #1]!              // encoding: [0x62,0x1c,0xc0,0x78]
+// CHECK: ldrsh    w19, [x12, #-256]!         // encoding: [0x93,0x0d,0xd0,0x78]
+
+        str b0, [x0, #255]!
+        str b3, [x3, #1]!
+        str b5, [sp, #-256]!
+        str h10, [x10, #255]!
+        str h13, [x23, #1]!
+        str h15, [sp, #-256]!
+        str s20, [x20, #255]!
+        str s23, [x23, #1]!
+        str s25, [x0, #-256]!
+        str d20, [x20, #255]!
+        str d23, [x23, #1]!
+        str d25, [x0, #-256]!
+// CHECK: str      b0, [x0, #255]!            // encoding: [0x00,0xfc,0x0f,0x3c]
+// CHECK: str      b3, [x3, #1]!              // encoding: [0x63,0x1c,0x00,0x3c]
+// CHECK: str      b5, [sp, #-256]!           // encoding: [0xe5,0x0f,0x10,0x3c]
+// CHECK: str      h10, [x10, #255]!          // encoding: [0x4a,0xfd,0x0f,0x7c]
+// CHECK: str      h13, [x23, #1]!            // encoding: [0xed,0x1e,0x00,0x7c]
+// CHECK: str      h15, [sp, #-256]!          // encoding: [0xef,0x0f,0x10,0x7c]
+// CHECK: str      s20, [x20, #255]!          // encoding: [0x94,0xfe,0x0f,0xbc]
+// CHECK: str      s23, [x23, #1]!            // encoding: [0xf7,0x1e,0x00,0xbc]
+// CHECK: str      s25, [x0, #-256]!          // encoding: [0x19,0x0c,0x10,0xbc]
+// CHECK: str      d20, [x20, #255]!          // encoding: [0x94,0xfe,0x0f,0xfc]
+// CHECK: str      d23, [x23, #1]!            // encoding: [0xf7,0x1e,0x00,0xfc]
+// CHECK: str      d25, [x0, #-256]!          // encoding: [0x19,0x0c,0x10,0xfc]
+
+        ldr b0, [x0, #255]!
+        ldr b3, [x3, #1]!
+        ldr b5, [sp, #-256]!
+        ldr h10, [x10, #255]!
+        ldr h13, [x23, #1]!
+        ldr h15, [sp, #-256]!
+        ldr s20, [x20, #255]!
+        ldr s23, [x23, #1]!
+        ldr s25, [x0, #-256]!
+        ldr d20, [x20, #255]!
+        ldr d23, [x23, #1]!
+        ldr d25, [x0, #-256]!
+// CHECK: ldr      b0, [x0, #255]!            // encoding: [0x00,0xfc,0x4f,0x3c]
+// CHECK: ldr      b3, [x3, #1]!              // encoding: [0x63,0x1c,0x40,0x3c]
+// CHECK: ldr      b5, [sp, #-256]!           // encoding: [0xe5,0x0f,0x50,0x3c]
+// CHECK: ldr      h10, [x10, #255]!          // encoding: [0x4a,0xfd,0x4f,0x7c]
+// CHECK: ldr      h13, [x23, #1]!            // encoding: [0xed,0x1e,0x40,0x7c]
+// CHECK: ldr      h15, [sp, #-256]!          // encoding: [0xef,0x0f,0x50,0x7c]
+// CHECK: ldr      s20, [x20, #255]!          // encoding: [0x94,0xfe,0x4f,0xbc]
+// CHECK: ldr      s23, [x23, #1]!            // encoding: [0xf7,0x1e,0x40,0xbc]
+// CHECK: ldr      s25, [x0, #-256]!          // encoding: [0x19,0x0c,0x50,0xbc]
+// CHECK: ldr      d20, [x20, #255]!          // encoding: [0x94,0xfe,0x4f,0xfc]
+// CHECK: ldr      d23, [x23, #1]!            // encoding: [0xf7,0x1e,0x40,0xfc]
+// CHECK: ldr      d25, [x0, #-256]!          // encoding: [0x19,0x0c,0x50,0xfc]
+
+        ldr q20, [x1, #255]!
+        ldr q23, [x9, #1]!
+        ldr q25, [x20, #-256]!
+        str q10, [x1, #255]!
+        str q22, [sp, #1]!
+        str q21, [x20, #-256]!
+// CHECK: ldr      q20, [x1, #255]!           // encoding: [0x34,0xfc,0xcf,0x3c]
+// CHECK: ldr      q23, [x9, #1]!             // encoding: [0x37,0x1d,0xc0,0x3c]
+// CHECK: ldr      q25, [x20, #-256]!         // encoding: [0x99,0x0e,0xd0,0x3c]
+// CHECK: str      q10, [x1, #255]!           // encoding: [0x2a,0xfc,0x8f,0x3c]
+// CHECK: str      q22, [sp, #1]!             // encoding: [0xf6,0x1f,0x80,0x3c]
+// CHECK: str      q21, [x20, #-256]!         // encoding: [0x95,0x0e,0x90,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store (unprivileged)
+//------------------------------------------------------------------------------
+
+        sttrb w9, [sp, #0]
+        sttrh wzr, [x12, #255]
+        sttr w16, [x0, #-256]
+        sttr x28, [x14, #1]
+// CHECK: sttrb    w9, [sp]                   // encoding: [0xe9,0x0b,0x00,0x38]
+// CHECK: sttrh    wzr, [x12, #255]           // encoding: [0x9f,0xf9,0x0f,0x78]
+// CHECK: sttr     w16, [x0, #-256]           // encoding: [0x10,0x08,0x10,0xb8]
+// CHECK: sttr     x28, [x14, #1]             // encoding: [0xdc,0x19,0x00,0xf8]
+
+        ldtrb w1, [x20, #255]
+        ldtrh w20, [x1, #255]
+        ldtr w12, [sp, #255]
+        ldtr xzr, [x12, #255]
+// CHECK: ldtrb    w1, [x20, #255]            // encoding: [0x81,0xfa,0x4f,0x38]
+// CHECK: ldtrh    w20, [x1, #255]            // encoding: [0x34,0xf8,0x4f,0x78]
+// CHECK: ldtr     w12, [sp, #255]            // encoding: [0xec,0xfb,0x4f,0xb8]
+// CHECK: ldtr     xzr, [x12, #255]           // encoding: [0x9f,0xf9,0x4f,0xf8]
+
+        ldtrsb x9, [x7, #-256]
+        ldtrsh x17, [x19, #-256]
+        ldtrsw x20, [x15, #-256]
+        ldtrsb w19, [x1, #-256]
+        ldtrsh w15, [x21, #-256]
+// CHECK: ldtrsb   x9, [x7, #-256]            // encoding: [0xe9,0x08,0x90,0x38]
+// CHECK: ldtrsh   x17, [x19, #-256]          // encoding: [0x71,0x0a,0x90,0x78]
+// CHECK: ldtrsw   x20, [x15, #-256]          // encoding: [0xf4,0x09,0x90,0xb8]
+// CHECK: ldtrsb   w19, [x1, #-256]           // encoding: [0x33,0x08,0xd0,0x38]
+// CHECK: ldtrsh   w15, [x21, #-256]          // encoding: [0xaf,0x0a,0xd0,0x78]
+
+//------------------------------------------------------------------------------
+// Load/store register pair (offset)
+//------------------------------------------------------------------------------
+
+        ldp w3, w5, [sp]
+        stp wzr, w9, [sp, #252]
+        ldp w2, wzr, [sp, #-256]
+        ldp w9, w10, [sp, #4]
+// CHECK: ldp      w3, w5, [sp]               // encoding: [0xe3,0x17,0x40,0x29]
+// CHECK: stp      wzr, w9, [sp, #252]        // encoding: [0xff,0xa7,0x1f,0x29]
+// CHECK: ldp      w2, wzr, [sp, #-256]       // encoding: [0xe2,0x7f,0x60,0x29]
+// CHECK: ldp      w9, w10, [sp, #4]          // encoding: [0xe9,0xab,0x40,0x29]
+
+        ldpsw x9, x10, [sp, #4]
+        ldpsw x9, x10, [x2, #-256]
+        ldpsw x20, x30, [sp, #252]
+// CHECK: ldpsw    x9, x10, [sp, #4]          // encoding: [0xe9,0xab,0x40,0x69]
+// CHECK: ldpsw    x9, x10, [x2, #-256]       // encoding: [0x49,0x28,0x60,0x69]
+// CHECK: ldpsw    x20, x30, [sp, #252]       // encoding: [0xf4,0xfb,0x5f,0x69]
+
+        ldp x21, x29, [x2, #504]
+        ldp x22, x23, [x3, #-512]
+        ldp x24, x25, [x4, #8]
+// CHECK: ldp      x21, x29, [x2, #504]       // encoding: [0x55,0xf4,0x5f,0xa9]
+// CHECK: ldp      x22, x23, [x3, #-512]      // encoding: [0x76,0x5c,0x60,0xa9]
+// CHECK: ldp      x24, x25, [x4, #8]         // encoding: [0x98,0xe4,0x40,0xa9]
+
+        ldp s29, s28, [sp, #252]
+        stp s27, s26, [sp, #-256]
+        ldp s1, s2, [x3, #44]
+// CHECK: ldp      s29, s28, [sp, #252]       // encoding: [0xfd,0xf3,0x5f,0x2d]
+// CHECK: stp      s27, s26, [sp, #-256]      // encoding: [0xfb,0x6b,0x20,0x2d]
+// CHECK: ldp      s1, s2, [x3, #44]          // encoding: [0x61,0x88,0x45,0x2d]
+
+        stp d3, d5, [x9, #504]
+        stp d7, d11, [x10, #-512]
+        ldp d2, d3, [x30, #-8]
+// CHECK: stp      d3, d5, [x9, #504]         // encoding: [0x23,0x95,0x1f,0x6d]
+// CHECK: stp      d7, d11, [x10, #-512]      // encoding: [0x47,0x2d,0x20,0x6d]
+// CHECK: ldp      d2, d3, [x30, #-8]         // encoding: [0xc2,0x8f,0x7f,0x6d]
+
+        stp q3, q5, [sp]
+        stp q17, q19, [sp, #1008]
+        ldp q23, q29, [x1, #-1024]
+// CHECK: stp      q3, q5, [sp]               // encoding: [0xe3,0x17,0x00,0xad]
+// CHECK: stp      q17, q19, [sp, #1008]      // encoding: [0xf1,0xcf,0x1f,0xad]
+// CHECK: ldp      q23, q29, [x1, #-1024]     // encoding: [0x37,0x74,0x60,0xad]
+
+//------------------------------------------------------------------------------
+// Load/store register pair (post-indexed)
+//------------------------------------------------------------------------------
+
+        ldp w3, w5, [sp], #0
+        stp wzr, w9, [sp], #252
+        ldp w2, wzr, [sp], #-256
+        ldp w9, w10, [sp], #4
+// CHECK: ldp      w3, w5, [sp], #0           // encoding: [0xe3,0x17,0xc0,0x28]
+// CHECK: stp      wzr, w9, [sp], #252        // encoding: [0xff,0xa7,0x9f,0x28]
+// CHECK: ldp      w2, wzr, [sp], #-256       // encoding: [0xe2,0x7f,0xe0,0x28]
+// CHECK: ldp      w9, w10, [sp], #4          // encoding: [0xe9,0xab,0xc0,0x28]
+
+        ldpsw x9, x10, [sp], #4
+        ldpsw x9, x10, [x2], #-256
+        ldpsw x20, x30, [sp], #252
+// CHECK: ldpsw    x9, x10, [sp], #4          // encoding: [0xe9,0xab,0xc0,0x68]
+// CHECK: ldpsw    x9, x10, [x2], #-256       // encoding: [0x49,0x28,0xe0,0x68]
+// CHECK: ldpsw    x20, x30, [sp], #252       // encoding: [0xf4,0xfb,0xdf,0x68]
+
+        ldp x21, x29, [x2], #504
+        ldp x22, x23, [x3], #-512
+        ldp x24, x25, [x4], #8
+// CHECK: ldp      x21, x29, [x2], #504       // encoding: [0x55,0xf4,0xdf,0xa8]
+// CHECK: ldp      x22, x23, [x3], #-512      // encoding: [0x76,0x5c,0xe0,0xa8]
+// CHECK: ldp      x24, x25, [x4], #8         // encoding: [0x98,0xe4,0xc0,0xa8]
+
+        ldp s29, s28, [sp], #252
+        stp s27, s26, [sp], #-256
+        ldp s1, s2, [x3], #44
+// CHECK: ldp      s29, s28, [sp], #252       // encoding: [0xfd,0xf3,0xdf,0x2c]
+// CHECK: stp      s27, s26, [sp], #-256      // encoding: [0xfb,0x6b,0xa0,0x2c]
+// CHECK: ldp      s1, s2, [x3], #44          // encoding: [0x61,0x88,0xc5,0x2c]
+
+        stp d3, d5, [x9], #504
+        stp d7, d11, [x10], #-512
+        ldp d2, d3, [x30], #-8
+// CHECK: stp      d3, d5, [x9], #504         // encoding: [0x23,0x95,0x9f,0x6c]
+// CHECK: stp      d7, d11, [x10], #-512      // encoding: [0x47,0x2d,0xa0,0x6c]
+// CHECK: ldp      d2, d3, [x30], #-8         // encoding: [0xc2,0x8f,0xff,0x6c]
+
+        stp q3, q5, [sp], #0
+        stp q17, q19, [sp], #1008
+        ldp q23, q29, [x1], #-1024
+// CHECK: stp      q3, q5, [sp], #0           // encoding: [0xe3,0x17,0x80,0xac]
+// CHECK: stp      q17, q19, [sp], #1008      // encoding: [0xf1,0xcf,0x9f,0xac]
+// CHECK: ldp      q23, q29, [x1], #-1024     // encoding: [0x37,0x74,0xe0,0xac]
+
+//------------------------------------------------------------------------------
+// Load/store register pair (pre-indexed)
+//------------------------------------------------------------------------------
+        ldp w3, w5, [sp, #0]!
+        stp wzr, w9, [sp, #252]!
+        ldp w2, wzr, [sp, #-256]!
+        ldp w9, w10, [sp, #4]!
+// CHECK: ldp      w3, w5, [sp, #0]!          // encoding: [0xe3,0x17,0xc0,0x29]
+// CHECK: stp      wzr, w9, [sp, #252]!       // encoding: [0xff,0xa7,0x9f,0x29]
+// CHECK: ldp      w2, wzr, [sp, #-256]!      // encoding: [0xe2,0x7f,0xe0,0x29]
+// CHECK: ldp      w9, w10, [sp, #4]!         // encoding: [0xe9,0xab,0xc0,0x29]
+
+        ldpsw x9, x10, [sp, #4]!
+        ldpsw x9, x10, [x2, #-256]!
+        ldpsw x20, x30, [sp, #252]!
+// CHECK: ldpsw    x9, x10, [sp, #4]!         // encoding: [0xe9,0xab,0xc0,0x69]
+// CHECK: ldpsw    x9, x10, [x2, #-256]!      // encoding: [0x49,0x28,0xe0,0x69]
+// CHECK: ldpsw    x20, x30, [sp, #252]!      // encoding: [0xf4,0xfb,0xdf,0x69]
+
+        ldp x21, x29, [x2, #504]!
+        ldp x22, x23, [x3, #-512]!
+        ldp x24, x25, [x4, #8]!
+// CHECK: ldp      x21, x29, [x2, #504]!      // encoding: [0x55,0xf4,0xdf,0xa9]
+// CHECK: ldp      x22, x23, [x3, #-512]!     // encoding: [0x76,0x5c,0xe0,0xa9]
+// CHECK: ldp      x24, x25, [x4, #8]!        // encoding: [0x98,0xe4,0xc0,0xa9]
+
+        ldp s29, s28, [sp, #252]!
+        stp s27, s26, [sp, #-256]!
+        ldp s1, s2, [x3, #44]!
+// CHECK: ldp      s29, s28, [sp, #252]!      // encoding: [0xfd,0xf3,0xdf,0x2d]
+// CHECK: stp      s27, s26, [sp, #-256]!     // encoding: [0xfb,0x6b,0xa0,0x2d]
+// CHECK: ldp      s1, s2, [x3, #44]!         // encoding: [0x61,0x88,0xc5,0x2d]
+
+        stp d3, d5, [x9, #504]!
+        stp d7, d11, [x10, #-512]!
+        ldp d2, d3, [x30, #-8]!
+// CHECK: stp      d3, d5, [x9, #504]!        // encoding: [0x23,0x95,0x9f,0x6d]
+// CHECK: stp      d7, d11, [x10, #-512]!     // encoding: [0x47,0x2d,0xa0,0x6d]
+// CHECK: ldp      d2, d3, [x30, #-8]!        // encoding: [0xc2,0x8f,0xff,0x6d]
+
+        stp q3, q5, [sp, #0]!
+        stp q17, q19, [sp, #1008]!
+        ldp q23, q29, [x1, #-1024]!
+// CHECK: stp      q3, q5, [sp, #0]!          // encoding: [0xe3,0x17,0x80,0xad]
+// CHECK: stp      q17, q19, [sp, #1008]!     // encoding: [0xf1,0xcf,0x9f,0xad]
+// CHECK: ldp      q23, q29, [x1, #-1024]!    // encoding: [0x37,0x74,0xe0,0xad]
+
+//------------------------------------------------------------------------------
+// Load/store non-temporal register pair (offset)
+//------------------------------------------------------------------------------
+
+        ldnp w3, w5, [sp]
+        stnp wzr, w9, [sp, #252]
+        ldnp w2, wzr, [sp, #-256]
+        ldnp w9, w10, [sp, #4]
+// CHECK: ldnp      w3, w5, [sp]               // encoding: [0xe3,0x17,0x40,0x28]
+// CHECK: stnp      wzr, w9, [sp, #252]        // encoding: [0xff,0xa7,0x1f,0x28]
+// CHECK: ldnp      w2, wzr, [sp, #-256]       // encoding: [0xe2,0x7f,0x60,0x28]
+// CHECK: ldnp      w9, w10, [sp, #4]          // encoding: [0xe9,0xab,0x40,0x28]
+
+        ldnp x21, x29, [x2, #504]
+        ldnp x22, x23, [x3, #-512]
+        ldnp x24, x25, [x4, #8]
+// CHECK: ldnp      x21, x29, [x2, #504]       // encoding: [0x55,0xf4,0x5f,0xa8]
+// CHECK: ldnp      x22, x23, [x3, #-512]      // encoding: [0x76,0x5c,0x60,0xa8]
+// CHECK: ldnp      x24, x25, [x4, #8]         // encoding: [0x98,0xe4,0x40,0xa8]
+
+        ldnp s29, s28, [sp, #252]
+        stnp s27, s26, [sp, #-256]
+        ldnp s1, s2, [x3, #44]
+// CHECK: ldnp      s29, s28, [sp, #252]       // encoding: [0xfd,0xf3,0x5f,0x2c]
+// CHECK: stnp      s27, s26, [sp, #-256]      // encoding: [0xfb,0x6b,0x20,0x2c]
+// CHECK: ldnp      s1, s2, [x3, #44]          // encoding: [0x61,0x88,0x45,0x2c]
+
+        stnp d3, d5, [x9, #504]
+        stnp d7, d11, [x10, #-512]
+        ldnp d2, d3, [x30, #-8]
+// CHECK: stnp      d3, d5, [x9, #504]         // encoding: [0x23,0x95,0x1f,0x6c]
+// CHECK: stnp      d7, d11, [x10, #-512]      // encoding: [0x47,0x2d,0x20,0x6c]
+// CHECK: ldnp      d2, d3, [x30, #-8]         // encoding: [0xc2,0x8f,0x7f,0x6c]
+
+        stnp q3, q5, [sp]
+        stnp q17, q19, [sp, #1008]
+        ldnp q23, q29, [x1, #-1024]
+// CHECK: stnp      q3, q5, [sp]               // encoding: [0xe3,0x17,0x00,0xac]
+// CHECK: stnp      q17, q19, [sp, #1008]      // encoding: [0xf1,0xcf,0x1f,0xac]
+// CHECK: ldnp      q23, q29, [x1, #-1024]     // encoding: [0x37,0x74,0x60,0xac]
+
+//------------------------------------------------------------------------------
+// Logical (immediate)
+//------------------------------------------------------------------------------
+        // 32 bit replication-width
+        orr w3, w9, #0xffff0000
+        orr wsp, w10, #0xe00000ff
+        orr w9, w10, #0x000003ff
+// CHECK: orr      w3, w9, #0xffff0000        // encoding: [0x23,0x3d,0x10,0x32]
+// CHECK: orr      wsp, w10, #0xe00000ff      // encoding: [0x5f,0x29,0x03,0x32]
+// CHECK: orr      w9, w10, #0x3ff            // encoding: [0x49,0x25,0x00,0x32]
+
+        // 16 bit replication width
+        and w14, w15, #0x80008000
+        and w12, w13, #0xffc3ffc3
+        and w11, wzr, #0x00030003
+// CHECK: and      w14, w15, #0x80008000      // encoding: [0xee,0x81,0x01,0x12]
+// CHECK: and      w12, w13, #0xffc3ffc3      // encoding: [0xac,0xad,0x0a,0x12]
+// CHECK: and      w11, wzr, #0x30003         // encoding: [0xeb,0x87,0x00,0x12]
+
+        // 8 bit replication width
+        eor w3, w6, #0xe0e0e0e0
+        eor wsp, wzr, #0x03030303
+        eor w16, w17, #0x81818181
+// CHECK: eor      w3, w6, #0xe0e0e0e0        // encoding: [0xc3,0xc8,0x03,0x52]
+// CHECK: eor      wsp, wzr, #0x3030303       // encoding: [0xff,0xc7,0x00,0x52]
+// CHECK: eor      w16, w17, #0x81818181      // encoding: [0x30,0xc6,0x01,0x52]
+
+        // 4 bit replication width
+        ands wzr, w18, #0xcccccccc
+        ands w19, w20, #0x33333333
+        ands w21, w22, #0x99999999
+// CHECK: ands     wzr, w18, #0xcccccccc      // encoding: [0x5f,0xe6,0x02,0x72]
+// CHECK: ands     w19, w20, #0x33333333      // encoding: [0x93,0xe6,0x00,0x72]
+// CHECK: ands     w21, w22, #0x99999999      // encoding: [0xd5,0xe6,0x01,0x72]
+
+        // 2 bit replication width
+        tst w3, #0xaaaaaaaa
+        tst wzr, #0x55555555
+// CHECK: ands     wzr, w3, #0xaaaaaaaa       // encoding: [0x7f,0xf0,0x01,0x72]
+// CHECK: ands     wzr, wzr, #0x55555555      // encoding: [0xff,0xf3,0x00,0x72]
+
+        // 64 bit replication-width
+        eor x3, x5, #0xffffffffc000000
+        and x9, x10, #0x00007fffffffffff
+        orr x11, x12, #0x8000000000000fff
+// CHECK: eor      x3, x5, #0xffffffffc000000 // encoding: [0xa3,0x84,0x66,0xd2]
+// CHECK: and      x9, x10, #0x7fffffffffff   // encoding: [0x49,0xb9,0x40,0x92]
+// CHECK: orr      x11, x12, #0x8000000000000fff // encoding: [0x8b,0x31,0x41,0xb2]
+
+        // 32 bit replication-width
+        orr x3, x9, #0xffff0000ffff0000
+        orr sp, x10, #0xe00000ffe00000ff
+        orr x9, x10, #0x000003ff000003ff
+// CHECK: orr      x3, x9, #0xffff0000ffff0000 // encoding: [0x23,0x3d,0x10,0xb2]
+// CHECK: orr      sp, x10, #0xe00000ffe00000ff // encoding: [0x5f,0x29,0x03,0xb2]
+// CHECK: orr      x9, x10, #0x3ff000003ff    // encoding: [0x49,0x25,0x00,0xb2]
+
+        // 16 bit replication-width
+        and x14, x15, #0x8000800080008000
+        and x12, x13, #0xffc3ffc3ffc3ffc3
+        and x11, xzr, #0x0003000300030003
+// CHECK: and      x14, x15, #0x8000800080008000 // encoding: [0xee,0x81,0x01,0x92]
+// CHECK: and      x12, x13, #0xffc3ffc3ffc3ffc3 // encoding: [0xac,0xad,0x0a,0x92]
+// CHECK: and      x11, xzr, #0x3000300030003 // encoding: [0xeb,0x87,0x00,0x92]
+
+        // 8 bit replication-width
+        eor x3, x6, #0xe0e0e0e0e0e0e0e0
+        eor sp, xzr, #0x0303030303030303
+        eor x16, x17, #0x8181818181818181
+// CHECK: eor      x3, x6, #0xe0e0e0e0e0e0e0e0 // encoding: [0xc3,0xc8,0x03,0xd2]
+// CHECK: eor      sp, xzr, #0x303030303030303 // encoding: [0xff,0xc7,0x00,0xd2]
+// CHECK: eor      x16, x17, #0x8181818181818181 // encoding: [0x30,0xc6,0x01,0xd2]
+
+        // 4 bit replication-width
+        ands xzr, x18, #0xcccccccccccccccc
+        ands x19, x20, #0x3333333333333333
+        ands x21, x22, #0x9999999999999999
+// CHECK: ands     xzr, x18, #0xcccccccccccccccc // encoding: [0x5f,0xe6,0x02,0xf2]
+// CHECK: ands     x19, x20, #0x3333333333333333 // encoding: [0x93,0xe6,0x00,0xf2]
+// CHECK: ands     x21, x22, #0x9999999999999999 // encoding: [0xd5,0xe6,0x01,0xf2]
+
+        // 2 bit replication-width
+        tst x3, #0xaaaaaaaaaaaaaaaa
+        tst xzr, #0x5555555555555555
+// CHECK: ands     xzr, x3, #0xaaaaaaaaaaaaaaaa    // encoding: [0x7f,0xf0,0x01,0xf2]
+// CHECK: ands     xzr, xzr, #0x5555555555555555   // encoding: [0xff,0xf3,0x00,0xf2]
+
+        mov w3, #0xf000f
+        mov x10, #0xaaaaaaaaaaaaaaaa
+// CHECK: orr      w3, wzr, #0xf000f          // encoding: [0xe3,0x8f,0x00,0x32]
+// CHECK: orr      x10, xzr, #0xaaaaaaaaaaaaaaaa // encoding: [0xea,0xf3,0x01,0xb2]
+
+//------------------------------------------------------------------------------
+// Logical (shifted register)
+//------------------------------------------------------------------------------
+
+        and w12, w23, w21
+        and w16, w15, w1, lsl #1
+        and w9, w4, w10, lsl #31
+        and w3, w30, w11, lsl #0
+        and x3, x5, x7, lsl #63
+// CHECK: and      w12, w23, w21              // encoding: [0xec,0x02,0x15,0x0a]
+// CHECK: and      w16, w15, w1, lsl #1       // encoding: [0xf0,0x05,0x01,0x0a]
+// CHECK: and      w9, w4, w10, lsl #31       // encoding: [0x89,0x7c,0x0a,0x0a]
+// CHECK: and      w3, w30, w11               // encoding: [0xc3,0x03,0x0b,0x0a]
+// CHECK: and      x3, x5, x7, lsl #63        // encoding: [0xa3,0xfc,0x07,0x8a]
+
+        and x5, x14, x19, asr #4
+        and w3, w17, w19, ror #31
+        and w0, w2, wzr, lsr #17
+        and w3, w30, w11, asr #0
+// CHECK: and      x5, x14, x19, asr #4       // encoding: [0xc5,0x11,0x93,0x8a]
+// CHECK: and      w3, w17, w19, ror #31      // encoding: [0x23,0x7e,0xd3,0x0a]
+// CHECK: and      w0, w2, wzr, lsr #17       // encoding: [0x40,0x44,0x5f,0x0a]
+// CHECK: and      w3, w30, w11, asr #0       // encoding: [0xc3,0x03,0x8b,0x0a]
+
+        and xzr, x4, x26, lsl #0
+        and w3, wzr, w20, ror #0
+        and x7, x20, xzr, asr #63
+// CHECK: and      xzr, x4, x26               // encoding: [0x9f,0x00,0x1a,0x8a]
+// CHECK: and      w3, wzr, w20, ror #0       // encoding: [0xe3,0x03,0xd4,0x0a]
+// CHECK: and      x7, x20, xzr, asr #63      // encoding: [0x87,0xfe,0x9f,0x8a]
+
+        bic x13, x20, x14, lsl #47
+        bic w2, w7, w9
+        orr w2, w7, w0, asr #31
+        orr x8, x9, x10, lsl #12
+        orn x3, x5, x7, asr #0
+        orn w2, w5, w29
+// CHECK: bic      x13, x20, x14, lsl #47     // encoding: [0x8d,0xbe,0x2e,0x8a]
+// CHECK: bic      w2, w7, w9                 // encoding: [0xe2,0x00,0x29,0x0a]
+// CHECK: orr      w2, w7, w0, asr #31        // encoding: [0xe2,0x7c,0x80,0x2a]
+// CHECK: orr      x8, x9, x10, lsl #12       // encoding: [0x28,0x31,0x0a,0xaa]
+// CHECK: orn      x3, x5, x7, asr #0         // encoding: [0xa3,0x00,0xa7,0xaa]
+// CHECK: orn      w2, w5, w29                // encoding: [0xa2,0x00,0x3d,0x2a]
+
+        ands w7, wzr, w9, lsl #1
+        ands x3, x5, x20, ror #63
+        bics w3, w5, w7, lsl #0
+        bics x3, xzr, x3, lsl #1
+// CHECK: ands     w7, wzr, w9, lsl #1        // encoding: [0xe7,0x07,0x09,0x6a]
+// CHECK: ands     x3, x5, x20, ror #63       // encoding: [0xa3,0xfc,0xd4,0xea]
+// CHECK: bics     w3, w5, w7                 // encoding: [0xa3,0x00,0x27,0x6a]
+// CHECK: bics     x3, xzr, x3, lsl #1        // encoding: [0xe3,0x07,0x23,0xea]
+
+        tst w3, w7, lsl #31
+        tst x2, x20, asr #0
+// CHECK: tst      w3, w7, lsl #31            // encoding: [0x7f,0x7c,0x07,0x6a]
+// CHECK: tst      x2, x20, asr #0            // encoding: [0x5f,0x00,0x94,0xea]
+
+        mov x3, x6
+        mov x3, xzr
+        mov wzr, w2
+        mov w3, w5
+// CHECK: mov      x3, x6                     // encoding: [0xe3,0x03,0x06,0xaa]
+// CHECK: mov      x3, xzr                    // encoding: [0xe3,0x03,0x1f,0xaa]
+// CHECK: mov      wzr, w2                    // encoding: [0xff,0x03,0x02,0x2a]
+// CHECK: mov      w3, w5                     // encoding: [0xe3,0x03,0x05,0x2a]
+
+//------------------------------------------------------------------------------
+// Move wide (immediate)
+//------------------------------------------------------------------------------
+
+        movz w1, #65535, lsl #0
+        movz w2, #0, lsl #16
+        movn w2, #1234, lsl #0
+// CHECK: movz     w1, #65535                 // encoding: [0xe1,0xff,0x9f,0x52]
+// CHECK: movz     w2, #0, lsl #16            // encoding: [0x02,0x00,0xa0,0x52]
+// CHECK: movn     w2, #1234                  // encoding: [0x42,0x9a,0x80,0x12]
+
+        movz x2, #1234, lsl #32
+        movk xzr, #4321, lsl #48
+// CHECK: movz     x2, #1234, lsl #32         // encoding: [0x42,0x9a,0xc0,0xd2]
+// CHECK: movk     xzr, #4321, lsl #48        // encoding: [0x3f,0x1c,0xe2,0xf2]
+
+        movz x2, #:abs_g0:sym
+        movk w3, #:abs_g0_nc:sym
+// CHECK: movz    x2, #:abs_g0:sym        // encoding: [0x02'A',A,0x80'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_a64_movw_uabs_g0
+// CHECK: movk     w3, #:abs_g0_nc:sym    // encoding: [0x03'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_a64_movw_uabs_g0_nc
+
+        movz x4, #:abs_g1:sym
+        movk w5, #:abs_g1_nc:sym
+// CHECK: movz     x4, #:abs_g1:sym       // encoding: [0x04'A',A,0xa0'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_a64_movw_uabs_g1
+// CHECK: movk     w5, #:abs_g1_nc:sym    // encoding: [0x05'A',A,0xa0'A',0x72'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_a64_movw_uabs_g1_nc
+
+        movz x6, #:abs_g2:sym
+        movk x7, #:abs_g2_nc:sym
+// CHECK: movz     x6, #:abs_g2:sym       // encoding: [0x06'A',A,0xc0'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_a64_movw_uabs_g2
+// CHECK: movk     x7, #:abs_g2_nc:sym    // encoding: [0x07'A',A,0xc0'A',0xf2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_a64_movw_uabs_g2_nc
+
+        movz x8, #:abs_g3:sym
+        movk x9, #:abs_g3:sym
+// CHECK: movz     x8, #:abs_g3:sym       // encoding: [0x08'A',A,0xe0'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3
+// CHECK: movk     x9, #:abs_g3:sym       // encoding: [0x09'A',A,0xe0'A',0xf2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3
+
+        movn x30, #:abs_g0_s:sym
+        movz x19, #:abs_g0_s:sym
+        movn w10, #:abs_g0_s:sym
+        movz w25, #:abs_g0_s:sym
+// CHECK: movn     x30, #:abs_g0_s:sym    // encoding: [0x1e'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+// CHECK: movz     x19, #:abs_g0_s:sym    // encoding: [0x13'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+// CHECK: movn     w10, #:abs_g0_s:sym    // encoding: [0x0a'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+// CHECK: movz     w25, #:abs_g0_s:sym    // encoding: [0x19'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+
+        movn x30, #:abs_g1_s:sym
+        movz x19, #:abs_g1_s:sym
+        movn w10, #:abs_g1_s:sym
+        movz w25, #:abs_g1_s:sym
+// CHECK: movn     x30, #:abs_g1_s:sym    // encoding: [0x1e'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+// CHECK: movz     x19, #:abs_g1_s:sym    // encoding: [0x13'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+// CHECK: movn     w10, #:abs_g1_s:sym    // encoding: [0x0a'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+// CHECK: movz     w25, #:abs_g1_s:sym    // encoding: [0x19'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+
+        movn x30, #:abs_g2_s:sym
+        movz x19, #:abs_g2_s:sym
+// CHECK: movn     x30, #:abs_g2_s:sym    // encoding: [0x1e'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2
+// CHECK: movz     x19, #:abs_g2_s:sym    // encoding: [0x13'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2
+
+//------------------------------------------------------------------------------
+// PC-relative addressing
+//------------------------------------------------------------------------------
+
+        adr x2, loc
+        adr xzr, loc
+ // CHECK: adr     x2, loc                 // encoding: [0x02'A',A,A,0x10'A']
+ // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel
+ // CHECK: adr     xzr, loc                // encoding: [0x1f'A',A,A,0x10'A']
+ // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel
+
+        adrp x29, loc
+ // CHECK: adrp    x29, loc                // encoding: [0x1d'A',A,A,0x90'A']
+ // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel_page
+
+        adrp x30, #4096
+        adr x20, #0
+        adr x9, #-1
+        adr x5, #1048575
+// CHECK: adrp    x30, #4096              // encoding: [0x1e,0x00,0x00,0xb0]
+// CHECK: adr     x20, #0                 // encoding: [0x14,0x00,0x00,0x10]
+// CHECK: adr     x9, #-1                 // encoding: [0xe9,0xff,0xff,0x70]
+// CHECK: adr     x5, #1048575            // encoding: [0xe5,0xff,0x7f,0x70]
+
+        adr x9, #1048575
+        adr x2, #-1048576
+        adrp x9, #4294963200
+        adrp x20, #-4294967296
+// CHECK: adr     x9, #1048575            // encoding: [0xe9,0xff,0x7f,0x70]
+// CHECK: adr     x2, #-1048576           // encoding: [0x02,0x00,0x80,0x10]
+// CHECK: adrp    x9, #4294963200         // encoding: [0xe9,0xff,0x7f,0xf0]
+// CHECK: adrp    x20, #-4294967296       // encoding: [0x14,0x00,0x80,0x90]
+
+//------------------------------------------------------------------------------
+// System
+//------------------------------------------------------------------------------
+
+        hint #0
+        hint #127
+// CHECK: nop                             // encoding: [0x1f,0x20,0x03,0xd5]
+// CHECK: hint    #127                    // encoding: [0xff,0x2f,0x03,0xd5]
+
+        nop
+        yield
+        wfe
+        wfi
+        sev
+        sevl
+// CHECK: nop                             // encoding: [0x1f,0x20,0x03,0xd5]
+// CHECK: yield                           // encoding: [0x3f,0x20,0x03,0xd5]
+// CHECK: wfe                             // encoding: [0x5f,0x20,0x03,0xd5]
+// CHECK: wfi                             // encoding: [0x7f,0x20,0x03,0xd5]
+// CHECK: sev                             // encoding: [0x9f,0x20,0x03,0xd5]
+// CHECK: sevl                            // encoding: [0xbf,0x20,0x03,0xd5]
+
+        clrex
+        clrex #0
+        clrex #7
+        clrex #15
+// CHECK: clrex                           // encoding: [0x5f,0x3f,0x03,0xd5]
+// CHECK: clrex   #0                      // encoding: [0x5f,0x30,0x03,0xd5]
+// CHECK: clrex   #7                      // encoding: [0x5f,0x37,0x03,0xd5]
+// CHECK: clrex                           // encoding: [0x5f,0x3f,0x03,0xd5]
+
+        dsb #0
+        dsb #12
+        dsb #15
+        dsb oshld
+        dsb oshst
+        dsb osh
+        dsb nshld
+        dsb nshst
+        dsb nsh
+        dsb ishld
+        dsb ishst
+        dsb ish
+        dsb ld
+        dsb st
+        dsb sy
+// CHECK: dsb     #0                      // encoding: [0x9f,0x30,0x03,0xd5]
+// CHECK: dsb     #12                     // encoding: [0x9f,0x3c,0x03,0xd5]
+// CHECK: dsb     sy                      // encoding: [0x9f,0x3f,0x03,0xd5]
+// CHECK: dsb     oshld                   // encoding: [0x9f,0x31,0x03,0xd5]
+// CHECK: dsb     oshst                   // encoding: [0x9f,0x32,0x03,0xd5]
+// CHECK: dsb     osh                     // encoding: [0x9f,0x33,0x03,0xd5]
+// CHECK: dsb     nshld                   // encoding: [0x9f,0x35,0x03,0xd5]
+// CHECK: dsb     nshst                   // encoding: [0x9f,0x36,0x03,0xd5]
+// CHECK: dsb     nsh                     // encoding: [0x9f,0x37,0x03,0xd5]
+// CHECK: dsb     ishld                   // encoding: [0x9f,0x39,0x03,0xd5]
+// CHECK: dsb     ishst                   // encoding: [0x9f,0x3a,0x03,0xd5]
+// CHECK: dsb     ish                     // encoding: [0x9f,0x3b,0x03,0xd5]
+// CHECK: dsb     ld                      // encoding: [0x9f,0x3d,0x03,0xd5]
+// CHECK: dsb     st                      // encoding: [0x9f,0x3e,0x03,0xd5]
+// CHECK: dsb     sy                      // encoding: [0x9f,0x3f,0x03,0xd5]
+
+        dmb #0
+        dmb #12
+        dmb #15
+        dmb oshld
+        dmb oshst
+        dmb osh
+        dmb nshld
+        dmb nshst
+        dmb nsh
+        dmb ishld
+        dmb ishst
+        dmb ish
+        dmb ld
+        dmb st
+        dmb sy
+// CHECK: dmb     #0                      // encoding: [0xbf,0x30,0x03,0xd5]
+// CHECK: dmb     #12                     // encoding: [0xbf,0x3c,0x03,0xd5]
+// CHECK: dmb     sy                      // encoding: [0xbf,0x3f,0x03,0xd5]
+// CHECK: dmb     oshld                   // encoding: [0xbf,0x31,0x03,0xd5]
+// CHECK: dmb     oshst                   // encoding: [0xbf,0x32,0x03,0xd5]
+// CHECK: dmb     osh                     // encoding: [0xbf,0x33,0x03,0xd5]
+// CHECK: dmb     nshld                   // encoding: [0xbf,0x35,0x03,0xd5]
+// CHECK: dmb     nshst                   // encoding: [0xbf,0x36,0x03,0xd5]
+// CHECK: dmb     nsh                     // encoding: [0xbf,0x37,0x03,0xd5]
+// CHECK: dmb     ishld                   // encoding: [0xbf,0x39,0x03,0xd5]
+// CHECK: dmb     ishst                   // encoding: [0xbf,0x3a,0x03,0xd5]
+// CHECK: dmb     ish                     // encoding: [0xbf,0x3b,0x03,0xd5]
+// CHECK: dmb     ld                      // encoding: [0xbf,0x3d,0x03,0xd5]
+// CHECK: dmb     st                      // encoding: [0xbf,0x3e,0x03,0xd5]
+// CHECK: dmb     sy                      // encoding: [0xbf,0x3f,0x03,0xd5]
+
+        isb sy
+        isb
+        isb #12
+// CHECK: isb                             // encoding: [0xdf,0x3f,0x03,0xd5]
+// CHECK: isb                             // encoding: [0xdf,0x3f,0x03,0xd5]
+// CHECK: isb     #12                     // encoding: [0xdf,0x3c,0x03,0xd5]
+
+
+        msr spsel, #0
+        msr daifset, #15
+        msr daifclr, #12
+// CHECK: msr     spsel, #0               // encoding: [0xbf,0x40,0x00,0xd5]
+// CHECK: msr     daifset, #15            // encoding: [0xdf,0x4f,0x03,0xd5]
+// CHECK: msr     daifclr, #12            // encoding: [0xff,0x4c,0x03,0xd5]
+
+        sys #7, c5, c9, #7, x5
+        sys #0, c15, c15, #2
+// CHECK: sys     #7, c5, c9, #7, x5      // encoding: [0xe5,0x59,0x0f,0xd5]
+// CHECK: sys     #0, c15, c15, #2, xzr   // encoding: [0x5f,0xff,0x08,0xd5]
+
+        sysl x9, #7, c5, c9, #7
+        sysl x1, #0, c15, c15, #2
+// CHECK: sysl    x9, #7, c5, c9, #7      // encoding: [0xe9,0x59,0x2f,0xd5]
+// CHECK: sysl    x1, #0, c15, c15, #2    // encoding: [0x41,0xff,0x28,0xd5]
+
+        ic ialluis
+        ic iallu
+        ic ivau, x9
+// CHECK:         ic      ialluis                 // encoding: [0x1f,0x71,0x08,0xd5]
+// CHECK:         ic      iallu                   // encoding: [0x1f,0x75,0x08,0xd5]
+// CHECK:         ic      ivau, x9                // encoding: [0x29,0x75,0x0b,0xd5]
+
+        dc zva, x12
+        dc ivac, xzr
+        dc isw, x2
+        dc cvac, x9
+        dc csw, x10
+        dc cvau, x0
+        dc civac, x3
+        dc cisw, x30
+// CHECK:         dc      zva, x12                // encoding: [0x2c,0x74,0x0b,0xd5]
+// CHECK:         dc      ivac, xzr               // encoding: [0x3f,0x76,0x08,0xd5]
+// CHECK:         dc      isw, x2                 // encoding: [0x42,0x76,0x08,0xd5]
+// CHECK:         dc      cvac, x9                // encoding: [0x29,0x7a,0x0b,0xd5]
+// CHECK:         dc      csw, x10                // encoding: [0x4a,0x7a,0x08,0xd5]
+// CHECK:         dc      cvau, x0                // encoding: [0x20,0x7b,0x0b,0xd5]
+// CHECK:         dc      civac, x3               // encoding: [0x23,0x7e,0x0b,0xd5]
+// CHECK:         dc      cisw, x30               // encoding: [0x5e,0x7e,0x08,0xd5]
+
+        at S1E1R, x19
+        at S1E2R, x19
+        at S1E3R, x19
+        at S1E1W, x19
+        at S1E2W, x19
+        at S1E3W, x19
+        at S1E0R, x19
+        at S1E0W, x19
+        at S12E1R, x20
+        at S12E1W, x20
+        at S12E0R, x20
+        at S12E0W, x20
+// CHECK: at      s1e1r, x19              // encoding: [0x13,0x78,0x08,0xd5]
+// CHECK: at      s1e2r, x19              // encoding: [0x13,0x78,0x0c,0xd5]
+// CHECK: at      s1e3r, x19              // encoding: [0x13,0x78,0x0e,0xd5]
+// CHECK: at      s1e1w, x19              // encoding: [0x33,0x78,0x08,0xd5]
+// CHECK: at      s1e2w, x19              // encoding: [0x33,0x78,0x0c,0xd5]
+// CHECK: at      s1e3w, x19              // encoding: [0x33,0x78,0x0e,0xd5]
+// CHECK: at      s1e0r, x19              // encoding: [0x53,0x78,0x08,0xd5]
+// CHECK: at      s1e0w, x19              // encoding: [0x73,0x78,0x08,0xd5]
+// CHECK: at      s12e1r, x20             // encoding: [0x94,0x78,0x0c,0xd5]
+// CHECK: at      s12e1w, x20             // encoding: [0xb4,0x78,0x0c,0xd5]
+// CHECK: at      s12e0r, x20             // encoding: [0xd4,0x78,0x0c,0xd5]
+// CHECK: at      s12e0w, x20             // encoding: [0xf4,0x78,0x0c,0xd5]
+
+        tlbi IPAS2E1IS, x4
+        tlbi IPAS2LE1IS, x9
+        tlbi VMALLE1IS
+        tlbi ALLE2IS
+        tlbi ALLE3IS
+        tlbi VAE1IS, x1
+        tlbi VAE2IS, x2
+        tlbi VAE3IS, x3
+        tlbi ASIDE1IS, x5
+        tlbi VAAE1IS, x9
+        tlbi ALLE1IS
+        tlbi VALE1IS, x10
+        tlbi VALE2IS, x11
+        tlbi VALE3IS, x13
+        tlbi VMALLS12E1IS
+        tlbi VAALE1IS, x14
+        tlbi IPAS2E1, x15
+        tlbi IPAS2LE1, x16
+        tlbi VMALLE1
+        tlbi ALLE2
+        tlbi ALLE3
+        tlbi VAE1, x17
+        tlbi VAE2, x18
+        tlbi VAE3, x19
+        tlbi ASIDE1, x20
+        tlbi VAAE1, x21
+        tlbi ALLE1
+        tlbi VALE1, x22
+        tlbi VALE2, x23
+        tlbi VALE3, x24
+        tlbi VMALLS12E1
+        tlbi VAALE1, x25
+// CHECK: tlbi    ipas2e1is, x4           // encoding: [0x24,0x80,0x0c,0xd5]
+// CHECK: tlbi    ipas2le1is, x9          // encoding: [0xa9,0x80,0x0c,0xd5]
+// CHECK: tlbi    vmalle1is               // encoding: [0x1f,0x83,0x08,0xd5]
+// CHECK: tlbi    alle2is                 // encoding: [0x1f,0x83,0x0c,0xd5]
+// CHECK: tlbi    alle3is                 // encoding: [0x1f,0x83,0x0e,0xd5]
+// CHECK: tlbi    vae1is, x1              // encoding: [0x21,0x83,0x08,0xd5]
+// CHECK: tlbi    vae2is, x2              // encoding: [0x22,0x83,0x0c,0xd5]
+// CHECK: tlbi    vae3is, x3              // encoding: [0x23,0x83,0x0e,0xd5]
+// CHECK: tlbi    aside1is, x5            // encoding: [0x45,0x83,0x08,0xd5]
+// CHECK: tlbi    vaae1is, x9             // encoding: [0x69,0x83,0x08,0xd5]
+// CHECK: tlbi    alle1is                 // encoding: [0x9f,0x83,0x0c,0xd5]
+// CHECK: tlbi    vale1is, x10            // encoding: [0xaa,0x83,0x08,0xd5]
+// CHECK: tlbi    vale2is, x11            // encoding: [0xab,0x83,0x0c,0xd5]
+// CHECK: tlbi    vale3is, x13            // encoding: [0xad,0x83,0x0e,0xd5]
+// CHECK: tlbi    vmalls12e1is            // encoding: [0xdf,0x83,0x0c,0xd5]
+// CHECK: tlbi    vaale1is, x14           // encoding: [0xee,0x83,0x08,0xd5]
+// CHECK: tlbi    ipas2e1, x15            // encoding: [0x2f,0x84,0x0c,0xd5]
+// CHECK: tlbi    ipas2le1, x16           // encoding: [0xb0,0x84,0x0c,0xd5]
+// CHECK: tlbi    vmalle1                 // encoding: [0x1f,0x87,0x08,0xd5]
+// CHECK: tlbi    alle2                   // encoding: [0x1f,0x87,0x0c,0xd5]
+// CHECK: tlbi    alle3                   // encoding: [0x1f,0x87,0x0e,0xd5]
+// CHECK: tlbi    vae1, x17               // encoding: [0x31,0x87,0x08,0xd5]
+// CHECK: tlbi    vae2, x18               // encoding: [0x32,0x87,0x0c,0xd5]
+// CHECK: tlbi    vae3, x19               // encoding: [0x33,0x87,0x0e,0xd5]
+// CHECK: tlbi    aside1, x20             // encoding: [0x54,0x87,0x08,0xd5]
+// CHECK: tlbi    vaae1, x21              // encoding: [0x75,0x87,0x08,0xd5]
+// CHECK: tlbi    alle1                   // encoding: [0x9f,0x87,0x0c,0xd5]
+// CHECK: tlbi    vale1, x22              // encoding: [0xb6,0x87,0x08,0xd5]
+// CHECK: tlbi    vale2, x23              // encoding: [0xb7,0x87,0x0c,0xd5]
+// CHECK: tlbi    vale3, x24              // encoding: [0xb8,0x87,0x0e,0xd5]
+// CHECK: tlbi    vmalls12e1              // encoding: [0xdf,0x87,0x0c,0xd5]
+// CHECK: tlbi    vaale1, x25             // encoding: [0xf9,0x87,0x08,0xd5]
+
+	msr TEECR32_EL1, x12
+	msr OSDTRRX_EL1, x12
+	msr MDCCINT_EL1, x12
+	msr MDSCR_EL1, x12
+	msr OSDTRTX_EL1, x12
+	msr DBGDTR_EL0, x12
+	msr DBGDTRTX_EL0, x12
+	msr OSECCR_EL1, x12
+	msr DBGVCR32_EL2, x12
+	msr DBGBVR0_EL1, x12
+	msr DBGBVR1_EL1, x12
+	msr DBGBVR2_EL1, x12
+	msr DBGBVR3_EL1, x12
+	msr DBGBVR4_EL1, x12
+	msr DBGBVR5_EL1, x12
+	msr DBGBVR6_EL1, x12
+	msr DBGBVR7_EL1, x12
+	msr DBGBVR8_EL1, x12
+	msr DBGBVR9_EL1, x12
+	msr DBGBVR10_EL1, x12
+	msr DBGBVR11_EL1, x12
+	msr DBGBVR12_EL1, x12
+	msr DBGBVR13_EL1, x12
+	msr DBGBVR14_EL1, x12
+	msr DBGBVR15_EL1, x12
+	msr DBGBCR0_EL1, x12
+	msr DBGBCR1_EL1, x12
+	msr DBGBCR2_EL1, x12
+	msr DBGBCR3_EL1, x12
+	msr DBGBCR4_EL1, x12
+	msr DBGBCR5_EL1, x12
+	msr DBGBCR6_EL1, x12
+	msr DBGBCR7_EL1, x12
+	msr DBGBCR8_EL1, x12
+	msr DBGBCR9_EL1, x12
+	msr DBGBCR10_EL1, x12
+	msr DBGBCR11_EL1, x12
+	msr DBGBCR12_EL1, x12
+	msr DBGBCR13_EL1, x12
+	msr DBGBCR14_EL1, x12
+	msr DBGBCR15_EL1, x12
+	msr DBGWVR0_EL1, x12
+	msr DBGWVR1_EL1, x12
+	msr DBGWVR2_EL1, x12
+	msr DBGWVR3_EL1, x12
+	msr DBGWVR4_EL1, x12
+	msr DBGWVR5_EL1, x12
+	msr DBGWVR6_EL1, x12
+	msr DBGWVR7_EL1, x12
+	msr DBGWVR8_EL1, x12
+	msr DBGWVR9_EL1, x12
+	msr DBGWVR10_EL1, x12
+	msr DBGWVR11_EL1, x12
+	msr DBGWVR12_EL1, x12
+	msr DBGWVR13_EL1, x12
+	msr DBGWVR14_EL1, x12
+	msr DBGWVR15_EL1, x12
+	msr DBGWCR0_EL1, x12
+	msr DBGWCR1_EL1, x12
+	msr DBGWCR2_EL1, x12
+	msr DBGWCR3_EL1, x12
+	msr DBGWCR4_EL1, x12
+	msr DBGWCR5_EL1, x12
+	msr DBGWCR6_EL1, x12
+	msr DBGWCR7_EL1, x12
+	msr DBGWCR8_EL1, x12
+	msr DBGWCR9_EL1, x12
+	msr DBGWCR10_EL1, x12
+	msr DBGWCR11_EL1, x12
+	msr DBGWCR12_EL1, x12
+	msr DBGWCR13_EL1, x12
+	msr DBGWCR14_EL1, x12
+	msr DBGWCR15_EL1, x12
+	msr TEEHBR32_EL1, x12
+	msr OSLAR_EL1, x12
+	msr OSDLR_EL1, x12
+	msr DBGPRCR_EL1, x12
+	msr DBGCLAIMSET_EL1, x12
+	msr DBGCLAIMCLR_EL1, x12
+	msr CSSELR_EL1, x12
+	msr VPIDR_EL2, x12
+	msr VMPIDR_EL2, x12
+	msr SCTLR_EL1, x12
+	msr SCTLR_EL2, x12
+	msr SCTLR_EL3, x12
+	msr ACTLR_EL1, x12
+	msr ACTLR_EL2, x12
+	msr ACTLR_EL3, x12
+	msr CPACR_EL1, x12
+	msr HCR_EL2, x12
+	msr SCR_EL3, x12
+	msr MDCR_EL2, x12
+	msr SDER32_EL3, x12
+	msr CPTR_EL2, x12
+	msr CPTR_EL3, x12
+	msr HSTR_EL2, x12
+	msr HACR_EL2, x12
+	msr MDCR_EL3, x12
+	msr TTBR0_EL1, x12
+	msr TTBR0_EL2, x12
+	msr TTBR0_EL3, x12
+	msr TTBR1_EL1, x12
+	msr TCR_EL1, x12
+	msr TCR_EL2, x12
+	msr TCR_EL3, x12
+	msr VTTBR_EL2, x12
+	msr VTCR_EL2, x12
+	msr DACR32_EL2, x12
+	msr SPSR_EL1, x12
+	msr SPSR_EL2, x12
+	msr SPSR_EL3, x12
+	msr ELR_EL1, x12
+	msr ELR_EL2, x12
+	msr ELR_EL3, x12
+	msr SP_EL0, x12
+	msr SP_EL1, x12
+	msr SP_EL2, x12
+	msr SPSel, x12
+	msr NZCV, x12
+	msr DAIF, x12
+	msr CurrentEL, x12
+	msr SPSR_irq, x12
+	msr SPSR_abt, x12
+	msr SPSR_und, x12
+	msr SPSR_fiq, x12
+	msr FPCR, x12
+	msr FPSR, x12
+	msr DSPSR_EL0, x12
+	msr DLR_EL0, x12
+	msr IFSR32_EL2, x12
+	msr AFSR0_EL1, x12
+	msr AFSR0_EL2, x12
+	msr AFSR0_EL3, x12
+	msr AFSR1_EL1, x12
+	msr AFSR1_EL2, x12
+	msr AFSR1_EL3, x12
+	msr ESR_EL1, x12
+	msr ESR_EL2, x12
+	msr ESR_EL3, x12
+	msr FPEXC32_EL2, x12
+	msr FAR_EL1, x12
+	msr FAR_EL2, x12
+	msr FAR_EL3, x12
+	msr HPFAR_EL2, x12
+	msr PAR_EL1, x12
+	msr PMCR_EL0, x12
+	msr PMCNTENSET_EL0, x12
+	msr PMCNTENCLR_EL0, x12
+	msr PMOVSCLR_EL0, x12
+	msr PMSELR_EL0, x12
+	msr PMCCNTR_EL0, x12
+	msr PMXEVTYPER_EL0, x12
+	msr PMXEVCNTR_EL0, x12
+	msr PMUSERENR_EL0, x12
+	msr PMINTENSET_EL1, x12
+	msr PMINTENCLR_EL1, x12
+	msr PMOVSSET_EL0, x12
+	msr MAIR_EL1, x12
+	msr MAIR_EL2, x12
+	msr MAIR_EL3, x12
+	msr AMAIR_EL1, x12
+	msr AMAIR_EL2, x12
+	msr AMAIR_EL3, x12
+	msr VBAR_EL1, x12
+	msr VBAR_EL2, x12
+	msr VBAR_EL3, x12
+	msr RMR_EL1, x12
+	msr RMR_EL2, x12
+	msr RMR_EL3, x12
+	msr CONTEXTIDR_EL1, x12
+	msr TPIDR_EL0, x12
+	msr TPIDR_EL2, x12
+	msr TPIDR_EL3, x12
+	msr TPIDRRO_EL0, x12
+	msr TPIDR_EL1, x12
+	msr CNTFRQ_EL0, x12
+	msr CNTVOFF_EL2, x12
+	msr CNTKCTL_EL1, x12
+	msr CNTHCTL_EL2, x12
+	msr CNTP_TVAL_EL0, x12
+	msr CNTHP_TVAL_EL2, x12
+	msr CNTPS_TVAL_EL1, x12
+	msr CNTP_CTL_EL0, x12
+	msr CNTHP_CTL_EL2, x12
+	msr CNTPS_CTL_EL1, x12
+	msr CNTP_CVAL_EL0, x12
+	msr CNTHP_CVAL_EL2, x12
+	msr CNTPS_CVAL_EL1, x12
+	msr CNTV_TVAL_EL0, x12
+	msr CNTV_CTL_EL0, x12
+	msr CNTV_CVAL_EL0, x12
+	msr PMEVCNTR0_EL0, x12
+	msr PMEVCNTR1_EL0, x12
+	msr PMEVCNTR2_EL0, x12
+	msr PMEVCNTR3_EL0, x12
+	msr PMEVCNTR4_EL0, x12
+	msr PMEVCNTR5_EL0, x12
+	msr PMEVCNTR6_EL0, x12
+	msr PMEVCNTR7_EL0, x12
+	msr PMEVCNTR8_EL0, x12
+	msr PMEVCNTR9_EL0, x12
+	msr PMEVCNTR10_EL0, x12
+	msr PMEVCNTR11_EL0, x12
+	msr PMEVCNTR12_EL0, x12
+	msr PMEVCNTR13_EL0, x12
+	msr PMEVCNTR14_EL0, x12
+	msr PMEVCNTR15_EL0, x12
+	msr PMEVCNTR16_EL0, x12
+	msr PMEVCNTR17_EL0, x12
+	msr PMEVCNTR18_EL0, x12
+	msr PMEVCNTR19_EL0, x12
+	msr PMEVCNTR20_EL0, x12
+	msr PMEVCNTR21_EL0, x12
+	msr PMEVCNTR22_EL0, x12
+	msr PMEVCNTR23_EL0, x12
+	msr PMEVCNTR24_EL0, x12
+	msr PMEVCNTR25_EL0, x12
+	msr PMEVCNTR26_EL0, x12
+	msr PMEVCNTR27_EL0, x12
+	msr PMEVCNTR28_EL0, x12
+	msr PMEVCNTR29_EL0, x12
+	msr PMEVCNTR30_EL0, x12
+	msr PMCCFILTR_EL0, x12
+	msr PMEVTYPER0_EL0, x12
+	msr PMEVTYPER1_EL0, x12
+	msr PMEVTYPER2_EL0, x12
+	msr PMEVTYPER3_EL0, x12
+	msr PMEVTYPER4_EL0, x12
+	msr PMEVTYPER5_EL0, x12
+	msr PMEVTYPER6_EL0, x12
+	msr PMEVTYPER7_EL0, x12
+	msr PMEVTYPER8_EL0, x12
+	msr PMEVTYPER9_EL0, x12
+	msr PMEVTYPER10_EL0, x12
+	msr PMEVTYPER11_EL0, x12
+	msr PMEVTYPER12_EL0, x12
+	msr PMEVTYPER13_EL0, x12
+	msr PMEVTYPER14_EL0, x12
+	msr PMEVTYPER15_EL0, x12
+	msr PMEVTYPER16_EL0, x12
+	msr PMEVTYPER17_EL0, x12
+	msr PMEVTYPER18_EL0, x12
+	msr PMEVTYPER19_EL0, x12
+	msr PMEVTYPER20_EL0, x12
+	msr PMEVTYPER21_EL0, x12
+	msr PMEVTYPER22_EL0, x12
+	msr PMEVTYPER23_EL0, x12
+	msr PMEVTYPER24_EL0, x12
+	msr PMEVTYPER25_EL0, x12
+	msr PMEVTYPER26_EL0, x12
+	msr PMEVTYPER27_EL0, x12
+	msr PMEVTYPER28_EL0, x12
+	msr PMEVTYPER29_EL0, x12
+	msr PMEVTYPER30_EL0, x12
+// CHECK: msr      teecr32_el1, x12           // encoding: [0x0c,0x00,0x12,0xd5]
+// CHECK: msr      osdtrrx_el1, x12           // encoding: [0x4c,0x00,0x10,0xd5]
+// CHECK: msr      mdccint_el1, x12           // encoding: [0x0c,0x02,0x10,0xd5]
+// CHECK: msr      mdscr_el1, x12             // encoding: [0x4c,0x02,0x10,0xd5]
+// CHECK: msr      osdtrtx_el1, x12           // encoding: [0x4c,0x03,0x10,0xd5]
+// CHECK: msr      dbgdtr_el0, x12            // encoding: [0x0c,0x04,0x13,0xd5]
+// CHECK: msr      dbgdtrtx_el0, x12          // encoding: [0x0c,0x05,0x13,0xd5]
+// CHECK: msr      oseccr_el1, x12            // encoding: [0x4c,0x06,0x10,0xd5]
+// CHECK: msr      dbgvcr32_el2, x12          // encoding: [0x0c,0x07,0x14,0xd5]
+// CHECK: msr      dbgbvr0_el1, x12           // encoding: [0x8c,0x00,0x10,0xd5]
+// CHECK: msr      dbgbvr1_el1, x12           // encoding: [0x8c,0x01,0x10,0xd5]
+// CHECK: msr      dbgbvr2_el1, x12           // encoding: [0x8c,0x02,0x10,0xd5]
+// CHECK: msr      dbgbvr3_el1, x12           // encoding: [0x8c,0x03,0x10,0xd5]
+// CHECK: msr      dbgbvr4_el1, x12           // encoding: [0x8c,0x04,0x10,0xd5]
+// CHECK: msr      dbgbvr5_el1, x12           // encoding: [0x8c,0x05,0x10,0xd5]
+// CHECK: msr      dbgbvr6_el1, x12           // encoding: [0x8c,0x06,0x10,0xd5]
+// CHECK: msr      dbgbvr7_el1, x12           // encoding: [0x8c,0x07,0x10,0xd5]
+// CHECK: msr      dbgbvr8_el1, x12           // encoding: [0x8c,0x08,0x10,0xd5]
+// CHECK: msr      dbgbvr9_el1, x12           // encoding: [0x8c,0x09,0x10,0xd5]
+// CHECK: msr      dbgbvr10_el1, x12          // encoding: [0x8c,0x0a,0x10,0xd5]
+// CHECK: msr      dbgbvr11_el1, x12          // encoding: [0x8c,0x0b,0x10,0xd5]
+// CHECK: msr      dbgbvr12_el1, x12          // encoding: [0x8c,0x0c,0x10,0xd5]
+// CHECK: msr      dbgbvr13_el1, x12          // encoding: [0x8c,0x0d,0x10,0xd5]
+// CHECK: msr      dbgbvr14_el1, x12          // encoding: [0x8c,0x0e,0x10,0xd5]
+// CHECK: msr      dbgbvr15_el1, x12          // encoding: [0x8c,0x0f,0x10,0xd5]
+// CHECK: msr      dbgbcr0_el1, x12           // encoding: [0xac,0x00,0x10,0xd5]
+// CHECK: msr      dbgbcr1_el1, x12           // encoding: [0xac,0x01,0x10,0xd5]
+// CHECK: msr      dbgbcr2_el1, x12           // encoding: [0xac,0x02,0x10,0xd5]
+// CHECK: msr      dbgbcr3_el1, x12           // encoding: [0xac,0x03,0x10,0xd5]
+// CHECK: msr      dbgbcr4_el1, x12           // encoding: [0xac,0x04,0x10,0xd5]
+// CHECK: msr      dbgbcr5_el1, x12           // encoding: [0xac,0x05,0x10,0xd5]
+// CHECK: msr      dbgbcr6_el1, x12           // encoding: [0xac,0x06,0x10,0xd5]
+// CHECK: msr      dbgbcr7_el1, x12           // encoding: [0xac,0x07,0x10,0xd5]
+// CHECK: msr      dbgbcr8_el1, x12           // encoding: [0xac,0x08,0x10,0xd5]
+// CHECK: msr      dbgbcr9_el1, x12           // encoding: [0xac,0x09,0x10,0xd5]
+// CHECK: msr      dbgbcr10_el1, x12          // encoding: [0xac,0x0a,0x10,0xd5]
+// CHECK: msr      dbgbcr11_el1, x12          // encoding: [0xac,0x0b,0x10,0xd5]
+// CHECK: msr      dbgbcr12_el1, x12          // encoding: [0xac,0x0c,0x10,0xd5]
+// CHECK: msr      dbgbcr13_el1, x12          // encoding: [0xac,0x0d,0x10,0xd5]
+// CHECK: msr      dbgbcr14_el1, x12          // encoding: [0xac,0x0e,0x10,0xd5]
+// CHECK: msr      dbgbcr15_el1, x12          // encoding: [0xac,0x0f,0x10,0xd5]
+// CHECK: msr      dbgwvr0_el1, x12           // encoding: [0xcc,0x00,0x10,0xd5]
+// CHECK: msr      dbgwvr1_el1, x12           // encoding: [0xcc,0x01,0x10,0xd5]
+// CHECK: msr      dbgwvr2_el1, x12           // encoding: [0xcc,0x02,0x10,0xd5]
+// CHECK: msr      dbgwvr3_el1, x12           // encoding: [0xcc,0x03,0x10,0xd5]
+// CHECK: msr      dbgwvr4_el1, x12           // encoding: [0xcc,0x04,0x10,0xd5]
+// CHECK: msr      dbgwvr5_el1, x12           // encoding: [0xcc,0x05,0x10,0xd5]
+// CHECK: msr      dbgwvr6_el1, x12           // encoding: [0xcc,0x06,0x10,0xd5]
+// CHECK: msr      dbgwvr7_el1, x12           // encoding: [0xcc,0x07,0x10,0xd5]
+// CHECK: msr      dbgwvr8_el1, x12           // encoding: [0xcc,0x08,0x10,0xd5]
+// CHECK: msr      dbgwvr9_el1, x12           // encoding: [0xcc,0x09,0x10,0xd5]
+// CHECK: msr      dbgwvr10_el1, x12          // encoding: [0xcc,0x0a,0x10,0xd5]
+// CHECK: msr      dbgwvr11_el1, x12          // encoding: [0xcc,0x0b,0x10,0xd5]
+// CHECK: msr      dbgwvr12_el1, x12          // encoding: [0xcc,0x0c,0x10,0xd5]
+// CHECK: msr      dbgwvr13_el1, x12          // encoding: [0xcc,0x0d,0x10,0xd5]
+// CHECK: msr      dbgwvr14_el1, x12          // encoding: [0xcc,0x0e,0x10,0xd5]
+// CHECK: msr      dbgwvr15_el1, x12          // encoding: [0xcc,0x0f,0x10,0xd5]
+// CHECK: msr      dbgwcr0_el1, x12           // encoding: [0xec,0x00,0x10,0xd5]
+// CHECK: msr      dbgwcr1_el1, x12           // encoding: [0xec,0x01,0x10,0xd5]
+// CHECK: msr      dbgwcr2_el1, x12           // encoding: [0xec,0x02,0x10,0xd5]
+// CHECK: msr      dbgwcr3_el1, x12           // encoding: [0xec,0x03,0x10,0xd5]
+// CHECK: msr      dbgwcr4_el1, x12           // encoding: [0xec,0x04,0x10,0xd5]
+// CHECK: msr      dbgwcr5_el1, x12           // encoding: [0xec,0x05,0x10,0xd5]
+// CHECK: msr      dbgwcr6_el1, x12           // encoding: [0xec,0x06,0x10,0xd5]
+// CHECK: msr      dbgwcr7_el1, x12           // encoding: [0xec,0x07,0x10,0xd5]
+// CHECK: msr      dbgwcr8_el1, x12           // encoding: [0xec,0x08,0x10,0xd5]
+// CHECK: msr      dbgwcr9_el1, x12           // encoding: [0xec,0x09,0x10,0xd5]
+// CHECK: msr      dbgwcr10_el1, x12          // encoding: [0xec,0x0a,0x10,0xd5]
+// CHECK: msr      dbgwcr11_el1, x12          // encoding: [0xec,0x0b,0x10,0xd5]
+// CHECK: msr      dbgwcr12_el1, x12          // encoding: [0xec,0x0c,0x10,0xd5]
+// CHECK: msr      dbgwcr13_el1, x12          // encoding: [0xec,0x0d,0x10,0xd5]
+// CHECK: msr      dbgwcr14_el1, x12          // encoding: [0xec,0x0e,0x10,0xd5]
+// CHECK: msr      dbgwcr15_el1, x12          // encoding: [0xec,0x0f,0x10,0xd5]
+// CHECK: msr      teehbr32_el1, x12          // encoding: [0x0c,0x10,0x12,0xd5]
+// CHECK: msr      oslar_el1, x12             // encoding: [0x8c,0x10,0x10,0xd5]
+// CHECK: msr      osdlr_el1, x12             // encoding: [0x8c,0x13,0x10,0xd5]
+// CHECK: msr      dbgprcr_el1, x12           // encoding: [0x8c,0x14,0x10,0xd5]
+// CHECK: msr      dbgclaimset_el1, x12       // encoding: [0xcc,0x78,0x10,0xd5]
+// CHECK: msr      dbgclaimclr_el1, x12       // encoding: [0xcc,0x79,0x10,0xd5]
+// CHECK: msr      csselr_el1, x12            // encoding: [0x0c,0x00,0x1a,0xd5]
+// CHECK: msr      vpidr_el2, x12             // encoding: [0x0c,0x00,0x1c,0xd5]
+// CHECK: msr      vmpidr_el2, x12            // encoding: [0xac,0x00,0x1c,0xd5]
+// CHECK: msr      sctlr_el1, x12             // encoding: [0x0c,0x10,0x18,0xd5]
+// CHECK: msr      sctlr_el2, x12             // encoding: [0x0c,0x10,0x1c,0xd5]
+// CHECK: msr      sctlr_el3, x12             // encoding: [0x0c,0x10,0x1e,0xd5]
+// CHECK: msr      actlr_el1, x12             // encoding: [0x2c,0x10,0x18,0xd5]
+// CHECK: msr      actlr_el2, x12             // encoding: [0x2c,0x10,0x1c,0xd5]
+// CHECK: msr      actlr_el3, x12             // encoding: [0x2c,0x10,0x1e,0xd5]
+// CHECK: msr      cpacr_el1, x12             // encoding: [0x4c,0x10,0x18,0xd5]
+// CHECK: msr      hcr_el2, x12               // encoding: [0x0c,0x11,0x1c,0xd5]
+// CHECK: msr      scr_el3, x12               // encoding: [0x0c,0x11,0x1e,0xd5]
+// CHECK: msr      mdcr_el2, x12              // encoding: [0x2c,0x11,0x1c,0xd5]
+// CHECK: msr      sder32_el3, x12            // encoding: [0x2c,0x11,0x1e,0xd5]
+// CHECK: msr      cptr_el2, x12              // encoding: [0x4c,0x11,0x1c,0xd5]
+// CHECK: msr      cptr_el3, x12              // encoding: [0x4c,0x11,0x1e,0xd5]
+// CHECK: msr      hstr_el2, x12              // encoding: [0x6c,0x11,0x1c,0xd5]
+// CHECK: msr      hacr_el2, x12              // encoding: [0xec,0x11,0x1c,0xd5]
+// CHECK: msr      mdcr_el3, x12              // encoding: [0x2c,0x13,0x1e,0xd5]
+// CHECK: msr      ttbr0_el1, x12             // encoding: [0x0c,0x20,0x18,0xd5]
+// CHECK: msr      ttbr0_el2, x12             // encoding: [0x0c,0x20,0x1c,0xd5]
+// CHECK: msr      ttbr0_el3, x12             // encoding: [0x0c,0x20,0x1e,0xd5]
+// CHECK: msr      ttbr1_el1, x12             // encoding: [0x2c,0x20,0x18,0xd5]
+// CHECK: msr      tcr_el1, x12               // encoding: [0x4c,0x20,0x18,0xd5]
+// CHECK: msr      tcr_el2, x12               // encoding: [0x4c,0x20,0x1c,0xd5]
+// CHECK: msr      tcr_el3, x12               // encoding: [0x4c,0x20,0x1e,0xd5]
+// CHECK: msr      vttbr_el2, x12             // encoding: [0x0c,0x21,0x1c,0xd5]
+// CHECK: msr      vtcr_el2, x12              // encoding: [0x4c,0x21,0x1c,0xd5]
+// CHECK: msr      dacr32_el2, x12            // encoding: [0x0c,0x30,0x1c,0xd5]
+// CHECK: msr      spsr_el1, x12              // encoding: [0x0c,0x40,0x18,0xd5]
+// CHECK: msr      spsr_el2, x12              // encoding: [0x0c,0x40,0x1c,0xd5]
+// CHECK: msr      spsr_el3, x12              // encoding: [0x0c,0x40,0x1e,0xd5]
+// CHECK: msr      elr_el1, x12               // encoding: [0x2c,0x40,0x18,0xd5]
+// CHECK: msr      elr_el2, x12               // encoding: [0x2c,0x40,0x1c,0xd5]
+// CHECK: msr      elr_el3, x12               // encoding: [0x2c,0x40,0x1e,0xd5]
+// CHECK: msr      sp_el0, x12                // encoding: [0x0c,0x41,0x18,0xd5]
+// CHECK: msr      sp_el1, x12                // encoding: [0x0c,0x41,0x1c,0xd5]
+// CHECK: msr      sp_el2, x12                // encoding: [0x0c,0x41,0x1e,0xd5]
+// CHECK: msr      spsel, x12                 // encoding: [0x0c,0x42,0x18,0xd5]
+// CHECK: msr      nzcv, x12                  // encoding: [0x0c,0x42,0x1b,0xd5]
+// CHECK: msr      daif, x12                  // encoding: [0x2c,0x42,0x1b,0xd5]
+// CHECK: msr      currentel, x12             // encoding: [0x4c,0x42,0x18,0xd5]
+// CHECK: msr      spsr_irq, x12              // encoding: [0x0c,0x43,0x1c,0xd5]
+// CHECK: msr      spsr_abt, x12              // encoding: [0x2c,0x43,0x1c,0xd5]
+// CHECK: msr      spsr_und, x12              // encoding: [0x4c,0x43,0x1c,0xd5]
+// CHECK: msr      spsr_fiq, x12              // encoding: [0x6c,0x43,0x1c,0xd5]
+// CHECK: msr      fpcr, x12                  // encoding: [0x0c,0x44,0x1b,0xd5]
+// CHECK: msr      fpsr, x12                  // encoding: [0x2c,0x44,0x1b,0xd5]
+// CHECK: msr      dspsr_el0, x12             // encoding: [0x0c,0x45,0x1b,0xd5]
+// CHECK: msr      dlr_el0, x12               // encoding: [0x2c,0x45,0x1b,0xd5]
+// CHECK: msr      ifsr32_el2, x12            // encoding: [0x2c,0x50,0x1c,0xd5]
+// CHECK: msr      afsr0_el1, x12             // encoding: [0x0c,0x51,0x18,0xd5]
+// CHECK: msr      afsr0_el2, x12             // encoding: [0x0c,0x51,0x1c,0xd5]
+// CHECK: msr      afsr0_el3, x12             // encoding: [0x0c,0x51,0x1e,0xd5]
+// CHECK: msr      afsr1_el1, x12             // encoding: [0x2c,0x51,0x18,0xd5]
+// CHECK: msr      afsr1_el2, x12             // encoding: [0x2c,0x51,0x1c,0xd5]
+// CHECK: msr      afsr1_el3, x12             // encoding: [0x2c,0x51,0x1e,0xd5]
+// CHECK: msr      esr_el1, x12               // encoding: [0x0c,0x52,0x18,0xd5]
+// CHECK: msr      esr_el2, x12               // encoding: [0x0c,0x52,0x1c,0xd5]
+// CHECK: msr      esr_el3, x12               // encoding: [0x0c,0x52,0x1e,0xd5]
+// CHECK: msr      fpexc32_el2, x12           // encoding: [0x0c,0x53,0x1c,0xd5]
+// CHECK: msr      far_el1, x12               // encoding: [0x0c,0x60,0x18,0xd5]
+// CHECK: msr      far_el2, x12               // encoding: [0x0c,0x60,0x1c,0xd5]
+// CHECK: msr      far_el3, x12               // encoding: [0x0c,0x60,0x1e,0xd5]
+// CHECK: msr      hpfar_el2, x12             // encoding: [0x8c,0x60,0x1c,0xd5]
+// CHECK: msr      par_el1, x12               // encoding: [0x0c,0x74,0x18,0xd5]
+// CHECK: msr      pmcr_el0, x12              // encoding: [0x0c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmcntenset_el0, x12        // encoding: [0x2c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmcntenclr_el0, x12        // encoding: [0x4c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmovsclr_el0, x12          // encoding: [0x6c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmselr_el0, x12            // encoding: [0xac,0x9c,0x1b,0xd5]
+// CHECK: msr      pmccntr_el0, x12           // encoding: [0x0c,0x9d,0x1b,0xd5]
+// CHECK: msr      pmxevtyper_el0, x12        // encoding: [0x2c,0x9d,0x1b,0xd5]
+// CHECK: msr      pmxevcntr_el0, x12         // encoding: [0x4c,0x9d,0x1b,0xd5]
+// CHECK: msr      pmuserenr_el0, x12         // encoding: [0x0c,0x9e,0x1b,0xd5]
+// CHECK: msr      pmintenset_el1, x12        // encoding: [0x2c,0x9e,0x18,0xd5]
+// CHECK: msr      pmintenclr_el1, x12        // encoding: [0x4c,0x9e,0x18,0xd5]
+// CHECK: msr      pmovsset_el0, x12          // encoding: [0x6c,0x9e,0x1b,0xd5]
+// CHECK: msr      mair_el1, x12              // encoding: [0x0c,0xa2,0x18,0xd5]
+// CHECK: msr      mair_el2, x12              // encoding: [0x0c,0xa2,0x1c,0xd5]
+// CHECK: msr      mair_el3, x12              // encoding: [0x0c,0xa2,0x1e,0xd5]
+// CHECK: msr      amair_el1, x12             // encoding: [0x0c,0xa3,0x18,0xd5]
+// CHECK: msr      amair_el2, x12             // encoding: [0x0c,0xa3,0x1c,0xd5]
+// CHECK: msr      amair_el3, x12             // encoding: [0x0c,0xa3,0x1e,0xd5]
+// CHECK: msr      vbar_el1, x12              // encoding: [0x0c,0xc0,0x18,0xd5]
+// CHECK: msr      vbar_el2, x12              // encoding: [0x0c,0xc0,0x1c,0xd5]
+// CHECK: msr      vbar_el3, x12              // encoding: [0x0c,0xc0,0x1e,0xd5]
+// CHECK: msr      rmr_el1, x12               // encoding: [0x4c,0xc0,0x18,0xd5]
+// CHECK: msr      rmr_el2, x12               // encoding: [0x4c,0xc0,0x1c,0xd5]
+// CHECK: msr      rmr_el3, x12               // encoding: [0x4c,0xc0,0x1e,0xd5]
+// CHECK: msr      contextidr_el1, x12        // encoding: [0x2c,0xd0,0x18,0xd5]
+// CHECK: msr      tpidr_el0, x12             // encoding: [0x4c,0xd0,0x1b,0xd5]
+// CHECK: msr      tpidr_el2, x12             // encoding: [0x4c,0xd0,0x1c,0xd5]
+// CHECK: msr      tpidr_el3, x12             // encoding: [0x4c,0xd0,0x1e,0xd5]
+// CHECK: msr      tpidrro_el0, x12           // encoding: [0x6c,0xd0,0x1b,0xd5]
+// CHECK: msr      tpidr_el1, x12             // encoding: [0x8c,0xd0,0x18,0xd5]
+// CHECK: msr      cntfrq_el0, x12            // encoding: [0x0c,0xe0,0x1b,0xd5]
+// CHECK: msr      cntvoff_el2, x12           // encoding: [0x6c,0xe0,0x1c,0xd5]
+// CHECK: msr      cntkctl_el1, x12           // encoding: [0x0c,0xe1,0x18,0xd5]
+// CHECK: msr      cnthctl_el2, x12           // encoding: [0x0c,0xe1,0x1c,0xd5]
+// CHECK: msr      cntp_tval_el0, x12         // encoding: [0x0c,0xe2,0x1b,0xd5]
+// CHECK: msr      cnthp_tval_el2, x12        // encoding: [0x0c,0xe2,0x1c,0xd5]
+// CHECK: msr      cntps_tval_el1, x12        // encoding: [0x0c,0xe2,0x1f,0xd5]
+// CHECK: msr      cntp_ctl_el0, x12          // encoding: [0x2c,0xe2,0x1b,0xd5]
+// CHECK: msr      cnthp_ctl_el2, x12         // encoding: [0x2c,0xe2,0x1c,0xd5]
+// CHECK: msr      cntps_ctl_el1, x12         // encoding: [0x2c,0xe2,0x1f,0xd5]
+// CHECK: msr      cntp_cval_el0, x12         // encoding: [0x4c,0xe2,0x1b,0xd5]
+// CHECK: msr      cnthp_cval_el2, x12        // encoding: [0x4c,0xe2,0x1c,0xd5]
+// CHECK: msr      cntps_cval_el1, x12        // encoding: [0x4c,0xe2,0x1f,0xd5]
+// CHECK: msr      cntv_tval_el0, x12         // encoding: [0x0c,0xe3,0x1b,0xd5]
+// CHECK: msr      cntv_ctl_el0, x12          // encoding: [0x2c,0xe3,0x1b,0xd5]
+// CHECK: msr      cntv_cval_el0, x12         // encoding: [0x4c,0xe3,0x1b,0xd5]
+// CHECK: msr      pmevcntr0_el0, x12         // encoding: [0x0c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr1_el0, x12         // encoding: [0x2c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr2_el0, x12         // encoding: [0x4c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr3_el0, x12         // encoding: [0x6c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr4_el0, x12         // encoding: [0x8c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr5_el0, x12         // encoding: [0xac,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr6_el0, x12         // encoding: [0xcc,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr7_el0, x12         // encoding: [0xec,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr8_el0, x12         // encoding: [0x0c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr9_el0, x12         // encoding: [0x2c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr10_el0, x12        // encoding: [0x4c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr11_el0, x12        // encoding: [0x6c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr12_el0, x12        // encoding: [0x8c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr13_el0, x12        // encoding: [0xac,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr14_el0, x12        // encoding: [0xcc,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr15_el0, x12        // encoding: [0xec,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr16_el0, x12        // encoding: [0x0c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr17_el0, x12        // encoding: [0x2c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr18_el0, x12        // encoding: [0x4c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr19_el0, x12        // encoding: [0x6c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr20_el0, x12        // encoding: [0x8c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr21_el0, x12        // encoding: [0xac,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr22_el0, x12        // encoding: [0xcc,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr23_el0, x12        // encoding: [0xec,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr24_el0, x12        // encoding: [0x0c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr25_el0, x12        // encoding: [0x2c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr26_el0, x12        // encoding: [0x4c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr27_el0, x12        // encoding: [0x6c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr28_el0, x12        // encoding: [0x8c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr29_el0, x12        // encoding: [0xac,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr30_el0, x12        // encoding: [0xcc,0xeb,0x1b,0xd5]
+// CHECK: msr      pmccfiltr_el0, x12         // encoding: [0xec,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper0_el0, x12        // encoding: [0x0c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper1_el0, x12        // encoding: [0x2c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper2_el0, x12        // encoding: [0x4c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper3_el0, x12        // encoding: [0x6c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper4_el0, x12        // encoding: [0x8c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper5_el0, x12        // encoding: [0xac,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper6_el0, x12        // encoding: [0xcc,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper7_el0, x12        // encoding: [0xec,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper8_el0, x12        // encoding: [0x0c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper9_el0, x12        // encoding: [0x2c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper10_el0, x12       // encoding: [0x4c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper11_el0, x12       // encoding: [0x6c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper12_el0, x12       // encoding: [0x8c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper13_el0, x12       // encoding: [0xac,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper14_el0, x12       // encoding: [0xcc,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper15_el0, x12       // encoding: [0xec,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper16_el0, x12       // encoding: [0x0c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper17_el0, x12       // encoding: [0x2c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper18_el0, x12       // encoding: [0x4c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper19_el0, x12       // encoding: [0x6c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper20_el0, x12       // encoding: [0x8c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper21_el0, x12       // encoding: [0xac,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper22_el0, x12       // encoding: [0xcc,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper23_el0, x12       // encoding: [0xec,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper24_el0, x12       // encoding: [0x0c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper25_el0, x12       // encoding: [0x2c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper26_el0, x12       // encoding: [0x4c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper27_el0, x12       // encoding: [0x6c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper28_el0, x12       // encoding: [0x8c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper29_el0, x12       // encoding: [0xac,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper30_el0, x12       // encoding: [0xcc,0xef,0x1b,0xd5]
+
+	mrs x9, TEECR32_EL1
+	mrs x9, OSDTRRX_EL1
+	mrs x9, MDCCSR_EL0
+	mrs x9, MDCCINT_EL1
+	mrs x9, MDSCR_EL1
+	mrs x9, OSDTRTX_EL1
+	mrs x9, DBGDTR_EL0
+	mrs x9, DBGDTRRX_EL0
+	mrs x9, OSECCR_EL1
+	mrs x9, DBGVCR32_EL2
+	mrs x9, DBGBVR0_EL1
+	mrs x9, DBGBVR1_EL1
+	mrs x9, DBGBVR2_EL1
+	mrs x9, DBGBVR3_EL1
+	mrs x9, DBGBVR4_EL1
+	mrs x9, DBGBVR5_EL1
+	mrs x9, DBGBVR6_EL1
+	mrs x9, DBGBVR7_EL1
+	mrs x9, DBGBVR8_EL1
+	mrs x9, DBGBVR9_EL1
+	mrs x9, DBGBVR10_EL1
+	mrs x9, DBGBVR11_EL1
+	mrs x9, DBGBVR12_EL1
+	mrs x9, DBGBVR13_EL1
+	mrs x9, DBGBVR14_EL1
+	mrs x9, DBGBVR15_EL1
+	mrs x9, DBGBCR0_EL1
+	mrs x9, DBGBCR1_EL1
+	mrs x9, DBGBCR2_EL1
+	mrs x9, DBGBCR3_EL1
+	mrs x9, DBGBCR4_EL1
+	mrs x9, DBGBCR5_EL1
+	mrs x9, DBGBCR6_EL1
+	mrs x9, DBGBCR7_EL1
+	mrs x9, DBGBCR8_EL1
+	mrs x9, DBGBCR9_EL1
+	mrs x9, DBGBCR10_EL1
+	mrs x9, DBGBCR11_EL1
+	mrs x9, DBGBCR12_EL1
+	mrs x9, DBGBCR13_EL1
+	mrs x9, DBGBCR14_EL1
+	mrs x9, DBGBCR15_EL1
+	mrs x9, DBGWVR0_EL1
+	mrs x9, DBGWVR1_EL1
+	mrs x9, DBGWVR2_EL1
+	mrs x9, DBGWVR3_EL1
+	mrs x9, DBGWVR4_EL1
+	mrs x9, DBGWVR5_EL1
+	mrs x9, DBGWVR6_EL1
+	mrs x9, DBGWVR7_EL1
+	mrs x9, DBGWVR8_EL1
+	mrs x9, DBGWVR9_EL1
+	mrs x9, DBGWVR10_EL1
+	mrs x9, DBGWVR11_EL1
+	mrs x9, DBGWVR12_EL1
+	mrs x9, DBGWVR13_EL1
+	mrs x9, DBGWVR14_EL1
+	mrs x9, DBGWVR15_EL1
+	mrs x9, DBGWCR0_EL1
+	mrs x9, DBGWCR1_EL1
+	mrs x9, DBGWCR2_EL1
+	mrs x9, DBGWCR3_EL1
+	mrs x9, DBGWCR4_EL1
+	mrs x9, DBGWCR5_EL1
+	mrs x9, DBGWCR6_EL1
+	mrs x9, DBGWCR7_EL1
+	mrs x9, DBGWCR8_EL1
+	mrs x9, DBGWCR9_EL1
+	mrs x9, DBGWCR10_EL1
+	mrs x9, DBGWCR11_EL1
+	mrs x9, DBGWCR12_EL1
+	mrs x9, DBGWCR13_EL1
+	mrs x9, DBGWCR14_EL1
+	mrs x9, DBGWCR15_EL1
+	mrs x9, MDRAR_EL1
+	mrs x9, TEEHBR32_EL1
+	mrs x9, OSLSR_EL1
+	mrs x9, OSDLR_EL1
+	mrs x9, DBGPRCR_EL1
+	mrs x9, DBGCLAIMSET_EL1
+	mrs x9, DBGCLAIMCLR_EL1
+	mrs x9, DBGAUTHSTATUS_EL1
+	mrs x9, MIDR_EL1
+	mrs x9, CCSIDR_EL1
+	mrs x9, CSSELR_EL1
+	mrs x9, VPIDR_EL2
+	mrs x9, CLIDR_EL1
+	mrs x9, CTR_EL0
+	mrs x9, MPIDR_EL1
+	mrs x9, VMPIDR_EL2
+	mrs x9, REVIDR_EL1
+	mrs x9, AIDR_EL1
+	mrs x9, DCZID_EL0
+	mrs x9, ID_PFR0_EL1
+	mrs x9, ID_PFR1_EL1
+	mrs x9, ID_DFR0_EL1
+	mrs x9, ID_AFR0_EL1
+	mrs x9, ID_MMFR0_EL1
+	mrs x9, ID_MMFR1_EL1
+	mrs x9, ID_MMFR2_EL1
+	mrs x9, ID_MMFR3_EL1
+	mrs x9, ID_ISAR0_EL1
+	mrs x9, ID_ISAR1_EL1
+	mrs x9, ID_ISAR2_EL1
+	mrs x9, ID_ISAR3_EL1
+	mrs x9, ID_ISAR4_EL1
+	mrs x9, ID_ISAR5_EL1
+	mrs x9, MVFR0_EL1
+	mrs x9, MVFR1_EL1
+	mrs x9, MVFR2_EL1
+	mrs x9, ID_AA64PFR0_EL1
+	mrs x9, ID_AA64PFR1_EL1
+	mrs x9, ID_AA64DFR0_EL1
+	mrs x9, ID_AA64DFR1_EL1
+	mrs x9, ID_AA64AFR0_EL1
+	mrs x9, ID_AA64AFR1_EL1
+	mrs x9, ID_AA64ISAR0_EL1
+	mrs x9, ID_AA64ISAR1_EL1
+	mrs x9, ID_AA64MMFR0_EL1
+	mrs x9, ID_AA64MMFR1_EL1
+	mrs x9, SCTLR_EL1
+	mrs x9, SCTLR_EL2
+	mrs x9, SCTLR_EL3
+	mrs x9, ACTLR_EL1
+	mrs x9, ACTLR_EL2
+	mrs x9, ACTLR_EL3
+	mrs x9, CPACR_EL1
+	mrs x9, HCR_EL2
+	mrs x9, SCR_EL3
+	mrs x9, MDCR_EL2
+	mrs x9, SDER32_EL3
+	mrs x9, CPTR_EL2
+	mrs x9, CPTR_EL3
+	mrs x9, HSTR_EL2
+	mrs x9, HACR_EL2
+	mrs x9, MDCR_EL3
+	mrs x9, TTBR0_EL1
+	mrs x9, TTBR0_EL2
+	mrs x9, TTBR0_EL3
+	mrs x9, TTBR1_EL1
+	mrs x9, TCR_EL1
+	mrs x9, TCR_EL2
+	mrs x9, TCR_EL3
+	mrs x9, VTTBR_EL2
+	mrs x9, VTCR_EL2
+	mrs x9, DACR32_EL2
+	mrs x9, SPSR_EL1
+	mrs x9, SPSR_EL2
+	mrs x9, SPSR_EL3
+	mrs x9, ELR_EL1
+	mrs x9, ELR_EL2
+	mrs x9, ELR_EL3
+	mrs x9, SP_EL0
+	mrs x9, SP_EL1
+	mrs x9, SP_EL2
+	mrs x9, SPSel
+	mrs x9, NZCV
+	mrs x9, DAIF
+	mrs x9, CurrentEL
+	mrs x9, SPSR_irq
+	mrs x9, SPSR_abt
+	mrs x9, SPSR_und
+	mrs x9, SPSR_fiq
+	mrs x9, FPCR
+	mrs x9, FPSR
+	mrs x9, DSPSR_EL0
+	mrs x9, DLR_EL0
+	mrs x9, IFSR32_EL2
+	mrs x9, AFSR0_EL1
+	mrs x9, AFSR0_EL2
+	mrs x9, AFSR0_EL3
+	mrs x9, AFSR1_EL1
+	mrs x9, AFSR1_EL2
+	mrs x9, AFSR1_EL3
+	mrs x9, ESR_EL1
+	mrs x9, ESR_EL2
+	mrs x9, ESR_EL3
+	mrs x9, FPEXC32_EL2
+	mrs x9, FAR_EL1
+	mrs x9, FAR_EL2
+	mrs x9, FAR_EL3
+	mrs x9, HPFAR_EL2
+	mrs x9, PAR_EL1
+	mrs x9, PMCR_EL0
+	mrs x9, PMCNTENSET_EL0
+	mrs x9, PMCNTENCLR_EL0
+	mrs x9, PMOVSCLR_EL0
+	mrs x9, PMSELR_EL0
+	mrs x9, PMCEID0_EL0
+	mrs x9, PMCEID1_EL0
+	mrs x9, PMCCNTR_EL0
+	mrs x9, PMXEVTYPER_EL0
+	mrs x9, PMXEVCNTR_EL0
+	mrs x9, PMUSERENR_EL0
+	mrs x9, PMINTENSET_EL1
+	mrs x9, PMINTENCLR_EL1
+	mrs x9, PMOVSSET_EL0
+	mrs x9, MAIR_EL1
+	mrs x9, MAIR_EL2
+	mrs x9, MAIR_EL3
+	mrs x9, AMAIR_EL1
+	mrs x9, AMAIR_EL2
+	mrs x9, AMAIR_EL3
+	mrs x9, VBAR_EL1
+	mrs x9, VBAR_EL2
+	mrs x9, VBAR_EL3
+	mrs x9, RVBAR_EL1
+	mrs x9, RVBAR_EL2
+	mrs x9, RVBAR_EL3
+	mrs x9, RMR_EL1
+	mrs x9, RMR_EL2
+	mrs x9, RMR_EL3
+	mrs x9, ISR_EL1
+	mrs x9, CONTEXTIDR_EL1
+	mrs x9, TPIDR_EL0
+	mrs x9, TPIDR_EL2
+	mrs x9, TPIDR_EL3
+	mrs x9, TPIDRRO_EL0
+	mrs x9, TPIDR_EL1
+	mrs x9, CNTFRQ_EL0
+	mrs x9, CNTPCT_EL0
+	mrs x9, CNTVCT_EL0
+	mrs x9, CNTVOFF_EL2
+	mrs x9, CNTKCTL_EL1
+	mrs x9, CNTHCTL_EL2
+	mrs x9, CNTP_TVAL_EL0
+	mrs x9, CNTHP_TVAL_EL2
+	mrs x9, CNTPS_TVAL_EL1
+	mrs x9, CNTP_CTL_EL0
+	mrs x9, CNTHP_CTL_EL2
+	mrs x9, CNTPS_CTL_EL1
+	mrs x9, CNTP_CVAL_EL0
+	mrs x9, CNTHP_CVAL_EL2
+	mrs x9, CNTPS_CVAL_EL1
+	mrs x9, CNTV_TVAL_EL0
+	mrs x9, CNTV_CTL_EL0
+	mrs x9, CNTV_CVAL_EL0
+	mrs x9, PMEVCNTR0_EL0
+	mrs x9, PMEVCNTR1_EL0
+	mrs x9, PMEVCNTR2_EL0
+	mrs x9, PMEVCNTR3_EL0
+	mrs x9, PMEVCNTR4_EL0
+	mrs x9, PMEVCNTR5_EL0
+	mrs x9, PMEVCNTR6_EL0
+	mrs x9, PMEVCNTR7_EL0
+	mrs x9, PMEVCNTR8_EL0
+	mrs x9, PMEVCNTR9_EL0
+	mrs x9, PMEVCNTR10_EL0
+	mrs x9, PMEVCNTR11_EL0
+	mrs x9, PMEVCNTR12_EL0
+	mrs x9, PMEVCNTR13_EL0
+	mrs x9, PMEVCNTR14_EL0
+	mrs x9, PMEVCNTR15_EL0
+	mrs x9, PMEVCNTR16_EL0
+	mrs x9, PMEVCNTR17_EL0
+	mrs x9, PMEVCNTR18_EL0
+	mrs x9, PMEVCNTR19_EL0
+	mrs x9, PMEVCNTR20_EL0
+	mrs x9, PMEVCNTR21_EL0
+	mrs x9, PMEVCNTR22_EL0
+	mrs x9, PMEVCNTR23_EL0
+	mrs x9, PMEVCNTR24_EL0
+	mrs x9, PMEVCNTR25_EL0
+	mrs x9, PMEVCNTR26_EL0
+	mrs x9, PMEVCNTR27_EL0
+	mrs x9, PMEVCNTR28_EL0
+	mrs x9, PMEVCNTR29_EL0
+	mrs x9, PMEVCNTR30_EL0
+	mrs x9, PMCCFILTR_EL0
+	mrs x9, PMEVTYPER0_EL0
+	mrs x9, PMEVTYPER1_EL0
+	mrs x9, PMEVTYPER2_EL0
+	mrs x9, PMEVTYPER3_EL0
+	mrs x9, PMEVTYPER4_EL0
+	mrs x9, PMEVTYPER5_EL0
+	mrs x9, PMEVTYPER6_EL0
+	mrs x9, PMEVTYPER7_EL0
+	mrs x9, PMEVTYPER8_EL0
+	mrs x9, PMEVTYPER9_EL0
+	mrs x9, PMEVTYPER10_EL0
+	mrs x9, PMEVTYPER11_EL0
+	mrs x9, PMEVTYPER12_EL0
+	mrs x9, PMEVTYPER13_EL0
+	mrs x9, PMEVTYPER14_EL0
+	mrs x9, PMEVTYPER15_EL0
+	mrs x9, PMEVTYPER16_EL0
+	mrs x9, PMEVTYPER17_EL0
+	mrs x9, PMEVTYPER18_EL0
+	mrs x9, PMEVTYPER19_EL0
+	mrs x9, PMEVTYPER20_EL0
+	mrs x9, PMEVTYPER21_EL0
+	mrs x9, PMEVTYPER22_EL0
+	mrs x9, PMEVTYPER23_EL0
+	mrs x9, PMEVTYPER24_EL0
+	mrs x9, PMEVTYPER25_EL0
+	mrs x9, PMEVTYPER26_EL0
+	mrs x9, PMEVTYPER27_EL0
+	mrs x9, PMEVTYPER28_EL0
+	mrs x9, PMEVTYPER29_EL0
+	mrs x9, PMEVTYPER30_EL0
+// CHECK: mrs      x9, teecr32_el1            // encoding: [0x09,0x00,0x32,0xd5]
+// CHECK: mrs      x9, osdtrrx_el1            // encoding: [0x49,0x00,0x30,0xd5]
+// CHECK: mrs      x9, mdccsr_el0             // encoding: [0x09,0x01,0x33,0xd5]
+// CHECK: mrs      x9, mdccint_el1            // encoding: [0x09,0x02,0x30,0xd5]
+// CHECK: mrs      x9, mdscr_el1              // encoding: [0x49,0x02,0x30,0xd5]
+// CHECK: mrs      x9, osdtrtx_el1            // encoding: [0x49,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgdtr_el0             // encoding: [0x09,0x04,0x33,0xd5]
+// CHECK: mrs      x9, dbgdtrrx_el0           // encoding: [0x09,0x05,0x33,0xd5]
+// CHECK: mrs      x9, oseccr_el1             // encoding: [0x49,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgvcr32_el2           // encoding: [0x09,0x07,0x34,0xd5]
+// CHECK: mrs      x9, dbgbvr0_el1            // encoding: [0x89,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr1_el1            // encoding: [0x89,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr2_el1            // encoding: [0x89,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr3_el1            // encoding: [0x89,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr4_el1            // encoding: [0x89,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr5_el1            // encoding: [0x89,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr6_el1            // encoding: [0x89,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr7_el1            // encoding: [0x89,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr8_el1            // encoding: [0x89,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr9_el1            // encoding: [0x89,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr10_el1           // encoding: [0x89,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr11_el1           // encoding: [0x89,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr12_el1           // encoding: [0x89,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr13_el1           // encoding: [0x89,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr14_el1           // encoding: [0x89,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr15_el1           // encoding: [0x89,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr0_el1            // encoding: [0xa9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr1_el1            // encoding: [0xa9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr2_el1            // encoding: [0xa9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr3_el1            // encoding: [0xa9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr4_el1            // encoding: [0xa9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr5_el1            // encoding: [0xa9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr6_el1            // encoding: [0xa9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr7_el1            // encoding: [0xa9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr8_el1            // encoding: [0xa9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr9_el1            // encoding: [0xa9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr10_el1           // encoding: [0xa9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr11_el1           // encoding: [0xa9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr12_el1           // encoding: [0xa9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr13_el1           // encoding: [0xa9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr14_el1           // encoding: [0xa9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr15_el1           // encoding: [0xa9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr0_el1            // encoding: [0xc9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr1_el1            // encoding: [0xc9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr2_el1            // encoding: [0xc9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr3_el1            // encoding: [0xc9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr4_el1            // encoding: [0xc9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr5_el1            // encoding: [0xc9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr6_el1            // encoding: [0xc9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr7_el1            // encoding: [0xc9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr8_el1            // encoding: [0xc9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr9_el1            // encoding: [0xc9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr10_el1           // encoding: [0xc9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr11_el1           // encoding: [0xc9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr12_el1           // encoding: [0xc9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr13_el1           // encoding: [0xc9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr14_el1           // encoding: [0xc9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr15_el1           // encoding: [0xc9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr0_el1            // encoding: [0xe9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr1_el1            // encoding: [0xe9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr2_el1            // encoding: [0xe9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr3_el1            // encoding: [0xe9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr4_el1            // encoding: [0xe9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr5_el1            // encoding: [0xe9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr6_el1            // encoding: [0xe9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr7_el1            // encoding: [0xe9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr8_el1            // encoding: [0xe9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr9_el1            // encoding: [0xe9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr10_el1           // encoding: [0xe9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr11_el1           // encoding: [0xe9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr12_el1           // encoding: [0xe9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr13_el1           // encoding: [0xe9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr14_el1           // encoding: [0xe9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr15_el1           // encoding: [0xe9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, mdrar_el1              // encoding: [0x09,0x10,0x30,0xd5]
+// CHECK: mrs      x9, teehbr32_el1           // encoding: [0x09,0x10,0x32,0xd5]
+// CHECK: mrs      x9, oslsr_el1              // encoding: [0x89,0x11,0x30,0xd5]
+// CHECK: mrs      x9, osdlr_el1              // encoding: [0x89,0x13,0x30,0xd5]
+// CHECK: mrs      x9, dbgprcr_el1            // encoding: [0x89,0x14,0x30,0xd5]
+// CHECK: mrs      x9, dbgclaimset_el1        // encoding: [0xc9,0x78,0x30,0xd5]
+// CHECK: mrs      x9, dbgclaimclr_el1        // encoding: [0xc9,0x79,0x30,0xd5]
+// CHECK: mrs      x9, dbgauthstatus_el1      // encoding: [0xc9,0x7e,0x30,0xd5]
+// CHECK: mrs      x9, midr_el1               // encoding: [0x09,0x00,0x38,0xd5]
+// CHECK: mrs      x9, ccsidr_el1             // encoding: [0x09,0x00,0x39,0xd5]
+// CHECK: mrs      x9, csselr_el1             // encoding: [0x09,0x00,0x3a,0xd5]
+// CHECK: mrs      x9, vpidr_el2              // encoding: [0x09,0x00,0x3c,0xd5]
+// CHECK: mrs      x9, clidr_el1              // encoding: [0x29,0x00,0x39,0xd5]
+// CHECK: mrs      x9, ctr_el0                // encoding: [0x29,0x00,0x3b,0xd5]
+// CHECK: mrs      x9, mpidr_el1              // encoding: [0xa9,0x00,0x38,0xd5]
+// CHECK: mrs      x9, vmpidr_el2             // encoding: [0xa9,0x00,0x3c,0xd5]
+// CHECK: mrs      x9, revidr_el1             // encoding: [0xc9,0x00,0x38,0xd5]
+// CHECK: mrs      x9, aidr_el1               // encoding: [0xe9,0x00,0x39,0xd5]
+// CHECK: mrs      x9, dczid_el0              // encoding: [0xe9,0x00,0x3b,0xd5]
+// CHECK: mrs      x9, id_pfr0_el1            // encoding: [0x09,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_pfr1_el1            // encoding: [0x29,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_dfr0_el1            // encoding: [0x49,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_afr0_el1            // encoding: [0x69,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr0_el1           // encoding: [0x89,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr1_el1           // encoding: [0xa9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr2_el1           // encoding: [0xc9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr3_el1           // encoding: [0xe9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_isar0_el1           // encoding: [0x09,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar1_el1           // encoding: [0x29,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar2_el1           // encoding: [0x49,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar3_el1           // encoding: [0x69,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar4_el1           // encoding: [0x89,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar5_el1           // encoding: [0xa9,0x02,0x38,0xd5]
+// CHECK: mrs      x9, mvfr0_el1              // encoding: [0x09,0x03,0x38,0xd5]
+// CHECK: mrs      x9, mvfr1_el1              // encoding: [0x29,0x03,0x38,0xd5]
+// CHECK: mrs      x9, mvfr2_el1              // encoding: [0x49,0x03,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64pfr0_el1        // encoding: [0x09,0x04,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64pfr1_el1        // encoding: [0x29,0x04,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64dfr0_el1        // encoding: [0x09,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64dfr1_el1        // encoding: [0x29,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64afr0_el1        // encoding: [0x89,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64afr1_el1        // encoding: [0xa9,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64isar0_el1       // encoding: [0x09,0x06,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64isar1_el1       // encoding: [0x29,0x06,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64mmfr0_el1       // encoding: [0x09,0x07,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64mmfr1_el1       // encoding: [0x29,0x07,0x38,0xd5]
+// CHECK: mrs      x9, sctlr_el1              // encoding: [0x09,0x10,0x38,0xd5]
+// CHECK: mrs      x9, sctlr_el2              // encoding: [0x09,0x10,0x3c,0xd5]
+// CHECK: mrs      x9, sctlr_el3              // encoding: [0x09,0x10,0x3e,0xd5]
+// CHECK: mrs      x9, actlr_el1              // encoding: [0x29,0x10,0x38,0xd5]
+// CHECK: mrs      x9, actlr_el2              // encoding: [0x29,0x10,0x3c,0xd5]
+// CHECK: mrs      x9, actlr_el3              // encoding: [0x29,0x10,0x3e,0xd5]
+// CHECK: mrs      x9, cpacr_el1              // encoding: [0x49,0x10,0x38,0xd5]
+// CHECK: mrs      x9, hcr_el2                // encoding: [0x09,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, scr_el3                // encoding: [0x09,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, mdcr_el2               // encoding: [0x29,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, sder32_el3             // encoding: [0x29,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, cptr_el2               // encoding: [0x49,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, cptr_el3               // encoding: [0x49,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, hstr_el2               // encoding: [0x69,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, hacr_el2               // encoding: [0xe9,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, mdcr_el3               // encoding: [0x29,0x13,0x3e,0xd5]
+// CHECK: mrs      x9, ttbr0_el1              // encoding: [0x09,0x20,0x38,0xd5]
+// CHECK: mrs      x9, ttbr0_el2              // encoding: [0x09,0x20,0x3c,0xd5]
+// CHECK: mrs      x9, ttbr0_el3              // encoding: [0x09,0x20,0x3e,0xd5]
+// CHECK: mrs      x9, ttbr1_el1              // encoding: [0x29,0x20,0x38,0xd5]
+// CHECK: mrs      x9, tcr_el1                // encoding: [0x49,0x20,0x38,0xd5]
+// CHECK: mrs      x9, tcr_el2                // encoding: [0x49,0x20,0x3c,0xd5]
+// CHECK: mrs      x9, tcr_el3                // encoding: [0x49,0x20,0x3e,0xd5]
+// CHECK: mrs      x9, vttbr_el2              // encoding: [0x09,0x21,0x3c,0xd5]
+// CHECK: mrs      x9, vtcr_el2               // encoding: [0x49,0x21,0x3c,0xd5]
+// CHECK: mrs      x9, dacr32_el2             // encoding: [0x09,0x30,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_el1               // encoding: [0x09,0x40,0x38,0xd5]
+// CHECK: mrs      x9, spsr_el2               // encoding: [0x09,0x40,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_el3               // encoding: [0x09,0x40,0x3e,0xd5]
+// CHECK: mrs      x9, elr_el1                // encoding: [0x29,0x40,0x38,0xd5]
+// CHECK: mrs      x9, elr_el2                // encoding: [0x29,0x40,0x3c,0xd5]
+// CHECK: mrs      x9, elr_el3                // encoding: [0x29,0x40,0x3e,0xd5]
+// CHECK: mrs      x9, sp_el0                 // encoding: [0x09,0x41,0x38,0xd5]
+// CHECK: mrs      x9, sp_el1                 // encoding: [0x09,0x41,0x3c,0xd5]
+// CHECK: mrs      x9, sp_el2                 // encoding: [0x09,0x41,0x3e,0xd5]
+// CHECK: mrs      x9, spsel                  // encoding: [0x09,0x42,0x38,0xd5]
+// CHECK: mrs      x9, nzcv                   // encoding: [0x09,0x42,0x3b,0xd5]
+// CHECK: mrs      x9, daif                   // encoding: [0x29,0x42,0x3b,0xd5]
+// CHECK: mrs      x9, currentel              // encoding: [0x49,0x42,0x38,0xd5]
+// CHECK: mrs      x9, spsr_irq               // encoding: [0x09,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_abt               // encoding: [0x29,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_und               // encoding: [0x49,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_fiq               // encoding: [0x69,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, fpcr                   // encoding: [0x09,0x44,0x3b,0xd5]
+// CHECK: mrs      x9, fpsr                   // encoding: [0x29,0x44,0x3b,0xd5]
+// CHECK: mrs      x9, dspsr_el0              // encoding: [0x09,0x45,0x3b,0xd5]
+// CHECK: mrs      x9, dlr_el0                // encoding: [0x29,0x45,0x3b,0xd5]
+// CHECK: mrs      x9, ifsr32_el2             // encoding: [0x29,0x50,0x3c,0xd5]
+// CHECK: mrs      x9, afsr0_el1              // encoding: [0x09,0x51,0x38,0xd5]
+// CHECK: mrs      x9, afsr0_el2              // encoding: [0x09,0x51,0x3c,0xd5]
+// CHECK: mrs      x9, afsr0_el3              // encoding: [0x09,0x51,0x3e,0xd5]
+// CHECK: mrs      x9, afsr1_el1              // encoding: [0x29,0x51,0x38,0xd5]
+// CHECK: mrs      x9, afsr1_el2              // encoding: [0x29,0x51,0x3c,0xd5]
+// CHECK: mrs      x9, afsr1_el3              // encoding: [0x29,0x51,0x3e,0xd5]
+// CHECK: mrs      x9, esr_el1                // encoding: [0x09,0x52,0x38,0xd5]
+// CHECK: mrs      x9, esr_el2                // encoding: [0x09,0x52,0x3c,0xd5]
+// CHECK: mrs      x9, esr_el3                // encoding: [0x09,0x52,0x3e,0xd5]
+// CHECK: mrs      x9, fpexc32_el2            // encoding: [0x09,0x53,0x3c,0xd5]
+// CHECK: mrs      x9, far_el1                // encoding: [0x09,0x60,0x38,0xd5]
+// CHECK: mrs      x9, far_el2                // encoding: [0x09,0x60,0x3c,0xd5]
+// CHECK: mrs      x9, far_el3                // encoding: [0x09,0x60,0x3e,0xd5]
+// CHECK: mrs      x9, hpfar_el2              // encoding: [0x89,0x60,0x3c,0xd5]
+// CHECK: mrs      x9, par_el1                // encoding: [0x09,0x74,0x38,0xd5]
+// CHECK: mrs      x9, pmcr_el0               // encoding: [0x09,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmcntenset_el0         // encoding: [0x29,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmcntenclr_el0         // encoding: [0x49,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmovsclr_el0           // encoding: [0x69,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmselr_el0             // encoding: [0xa9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmceid0_el0            // encoding: [0xc9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmceid1_el0            // encoding: [0xe9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmccntr_el0            // encoding: [0x09,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, pmxevtyper_el0         // encoding: [0x29,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, pmxevcntr_el0          // encoding: [0x49,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, pmuserenr_el0          // encoding: [0x09,0x9e,0x3b,0xd5]
+// CHECK: mrs      x9, pmintenset_el1         // encoding: [0x29,0x9e,0x38,0xd5]
+// CHECK: mrs      x9, pmintenclr_el1         // encoding: [0x49,0x9e,0x38,0xd5]
+// CHECK: mrs      x9, pmovsset_el0           // encoding: [0x69,0x9e,0x3b,0xd5]
+// CHECK: mrs      x9, mair_el1               // encoding: [0x09,0xa2,0x38,0xd5]
+// CHECK: mrs      x9, mair_el2               // encoding: [0x09,0xa2,0x3c,0xd5]
+// CHECK: mrs      x9, mair_el3               // encoding: [0x09,0xa2,0x3e,0xd5]
+// CHECK: mrs      x9, amair_el1              // encoding: [0x09,0xa3,0x38,0xd5]
+// CHECK: mrs      x9, amair_el2              // encoding: [0x09,0xa3,0x3c,0xd5]
+// CHECK: mrs      x9, amair_el3              // encoding: [0x09,0xa3,0x3e,0xd5]
+// CHECK: mrs      x9, vbar_el1               // encoding: [0x09,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, vbar_el2               // encoding: [0x09,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, vbar_el3               // encoding: [0x09,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, rvbar_el1              // encoding: [0x29,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, rvbar_el2              // encoding: [0x29,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, rvbar_el3              // encoding: [0x29,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, rmr_el1                // encoding: [0x49,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, rmr_el2                // encoding: [0x49,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, rmr_el3                // encoding: [0x49,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, isr_el1                // encoding: [0x09,0xc1,0x38,0xd5]
+// CHECK: mrs      x9, contextidr_el1         // encoding: [0x29,0xd0,0x38,0xd5]
+// CHECK: mrs      x9, tpidr_el0              // encoding: [0x49,0xd0,0x3b,0xd5]
+// CHECK: mrs      x9, tpidr_el2              // encoding: [0x49,0xd0,0x3c,0xd5]
+// CHECK: mrs      x9, tpidr_el3              // encoding: [0x49,0xd0,0x3e,0xd5]
+// CHECK: mrs      x9, tpidrro_el0            // encoding: [0x69,0xd0,0x3b,0xd5]
+// CHECK: mrs      x9, tpidr_el1              // encoding: [0x89,0xd0,0x38,0xd5]
+// CHECK: mrs      x9, cntfrq_el0             // encoding: [0x09,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, cntpct_el0             // encoding: [0x29,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, cntvct_el0             // encoding: [0x49,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, cntvoff_el2            // encoding: [0x69,0xe0,0x3c,0xd5]
+// CHECK: mrs      x9, cntkctl_el1            // encoding: [0x09,0xe1,0x38,0xd5]
+// CHECK: mrs      x9, cnthctl_el2            // encoding: [0x09,0xe1,0x3c,0xd5]
+// CHECK: mrs      x9, cntp_tval_el0          // encoding: [0x09,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, cnthp_tval_el2         // encoding: [0x09,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, cntps_tval_el1         // encoding: [0x09,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, cntp_ctl_el0           // encoding: [0x29,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, cnthp_ctl_el2          // encoding: [0x29,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, cntps_ctl_el1          // encoding: [0x29,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, cntp_cval_el0          // encoding: [0x49,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, cnthp_cval_el2         // encoding: [0x49,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, cntps_cval_el1         // encoding: [0x49,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, cntv_tval_el0          // encoding: [0x09,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, cntv_ctl_el0           // encoding: [0x29,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, cntv_cval_el0          // encoding: [0x49,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr0_el0          // encoding: [0x09,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr1_el0          // encoding: [0x29,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr2_el0          // encoding: [0x49,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr3_el0          // encoding: [0x69,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr4_el0          // encoding: [0x89,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr5_el0          // encoding: [0xa9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr6_el0          // encoding: [0xc9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr7_el0          // encoding: [0xe9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr8_el0          // encoding: [0x09,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr9_el0          // encoding: [0x29,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr10_el0         // encoding: [0x49,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr11_el0         // encoding: [0x69,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr12_el0         // encoding: [0x89,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr13_el0         // encoding: [0xa9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr14_el0         // encoding: [0xc9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr15_el0         // encoding: [0xe9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr16_el0         // encoding: [0x09,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr17_el0         // encoding: [0x29,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr18_el0         // encoding: [0x49,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr19_el0         // encoding: [0x69,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr20_el0         // encoding: [0x89,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr21_el0         // encoding: [0xa9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr22_el0         // encoding: [0xc9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr23_el0         // encoding: [0xe9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr24_el0         // encoding: [0x09,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr25_el0         // encoding: [0x29,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr26_el0         // encoding: [0x49,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr27_el0         // encoding: [0x69,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr28_el0         // encoding: [0x89,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr29_el0         // encoding: [0xa9,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr30_el0         // encoding: [0xc9,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmccfiltr_el0          // encoding: [0xe9,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper0_el0         // encoding: [0x09,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper1_el0         // encoding: [0x29,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper2_el0         // encoding: [0x49,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper3_el0         // encoding: [0x69,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper4_el0         // encoding: [0x89,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper5_el0         // encoding: [0xa9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper6_el0         // encoding: [0xc9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper7_el0         // encoding: [0xe9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper8_el0         // encoding: [0x09,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper9_el0         // encoding: [0x29,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper10_el0        // encoding: [0x49,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper11_el0        // encoding: [0x69,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper12_el0        // encoding: [0x89,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper13_el0        // encoding: [0xa9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper14_el0        // encoding: [0xc9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper15_el0        // encoding: [0xe9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper16_el0        // encoding: [0x09,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper17_el0        // encoding: [0x29,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper18_el0        // encoding: [0x49,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper19_el0        // encoding: [0x69,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper20_el0        // encoding: [0x89,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper21_el0        // encoding: [0xa9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper22_el0        // encoding: [0xc9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper23_el0        // encoding: [0xe9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper24_el0        // encoding: [0x09,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper25_el0        // encoding: [0x29,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper26_el0        // encoding: [0x49,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper27_el0        // encoding: [0x69,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper28_el0        // encoding: [0x89,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper29_el0        // encoding: [0xa9,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper30_el0        // encoding: [0xc9,0xef,0x3b,0xd5]
+
+        mrs x12, s3_7_c15_c1_5
+        mrs x13, s3_2_c11_c15_7
+        msr s3_0_c15_c0_0, x12
+        msr s3_7_c11_c13_7, x5
+// CHECK: mrs     x12, s3_7_c15_c1_5      // encoding: [0xac,0xf1,0x3f,0xd5]
+// CHECK: mrs     x13, s3_2_c11_c15_7     // encoding: [0xed,0xbf,0x3a,0xd5]
+// CHECK: msr     s3_0_c15_c0_0, x12      // encoding: [0x0c,0xf0,0x18,0xd5]
+// CHECK: msr     s3_7_c11_c13_7, x5      // encoding: [0xe5,0xbd,0x1f,0xd5]
+
+//------------------------------------------------------------------------------
+// Unconditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        tbz x5, #0, somewhere
+        tbz xzr, #63, elsewhere
+        tbnz x5, #45, nowhere
+// CHECK: tbz     x5, #0, somewhere       // encoding: [0x05'A',A,A,0x36'A']
+// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_tstbr
+// CHECK: tbz     xzr, #63, elsewhere     // encoding: [0x1f'A',A,0xf8'A',0xb6'A']
+// CHECK:                                 //   fixup A - offset: 0, value: elsewhere, kind: fixup_a64_tstbr
+// CHECK: tbnz    x5, #45, nowhere        // encoding: [0x05'A',A,0x68'A',0xb7'A']
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr
+
+        tbnz w3, #2, there
+        tbnz wzr, #31, nowhere
+        tbz w5, #12, anywhere
+// CHECK: tbnz    w3, #2, there           // encoding: [0x03'A',A,0x10'A',0x37'A']
+// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_a64_tstbr
+// CHECK: tbnz    wzr, #31, nowhere       // encoding: [0x1f'A',A,0xf8'A',0x37'A']
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr
+// CHECK: tbz     w5, #12, anywhere       // encoding: [0x05'A',A,0x60'A',0x36'A']
+// CHECK:                                 //   fixup A - offset: 0, value: anywhere, kind: fixup_a64_tstbr
+
+//------------------------------------------------------------------------------
+// Unconditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b somewhere
+        bl elsewhere
+// CHECK: b       somewhere               // encoding: [A,A,A,0x14'A']
+// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_uncondbr
+// CHECK: bl      elsewhere               // encoding: [A,A,A,0x94'A']
+// CHECK:                                 //   fixup A - offset: 0, value: elsewhere, kind: fixup_a64_call
+
+        b #4
+        bl #0
+        b #134217724
+        bl #-134217728
+// CHECK: b       #4                      // encoding: [0x01,0x00,0x00,0x14]
+// CHECK: bl      #0                      // encoding: [0x00,0x00,0x00,0x94]
+// CHECK: b       #134217724              // encoding: [0xff,0xff,0xff,0x15]
+// CHECK: bl      #-134217728             // encoding: [0x00,0x00,0x00,0x96]
+
+//------------------------------------------------------------------------------
+// Unconditional branch (register)
+//------------------------------------------------------------------------------
+
+        br x20
+        blr xzr
+        ret x10
+// CHECK: br       x20                        // encoding: [0x80,0x02,0x1f,0xd6]
+// CHECK: blr      xzr                        // encoding: [0xe0,0x03,0x3f,0xd6]
+// CHECK: ret      x10                        // encoding: [0x40,0x01,0x5f,0xd6]
+
+        ret
+        eret
+        drps
+// CHECK: ret                                 // encoding: [0xc0,0x03,0x5f,0xd6]
+// CHECK: eret                                // encoding: [0xe0,0x03,0x9f,0xd6]
+// CHECK: drps                                // encoding: [0xe0,0x03,0xbf,0xd6]
+
diff --git a/test/MC/AArch64/elf-globaladdress.ll b/test/MC/AArch64/elf-globaladdress.ll
new file mode 100644
index 0000000000..3ca361d9f2
--- /dev/null
+++ b/test/MC/AArch64/elf-globaladdress.ll
@@ -0,0 +1,111 @@
+;; RUN: llc -march=aarch64 -filetype=obj %s -o - | \
+;; RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+; Also take it on a round-trip through llvm-mc to stretch assembly-parsing's legs:
+;; RUN: llc -march=aarch64 %s -o - | \
+;; RUN:     llvm-mc -arch=aarch64 -filetype=obj -o - | \
+;; RUN:     elf-dump | FileCheck -check-prefix=OBJ %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @loadstore() {
+    %val8 = load i8* @var8
+    store volatile i8 %val8, i8* @var8
+
+    %val16 = load i16* @var16
+    store volatile i16 %val16, i16* @var16
+
+    %val32 = load i32* @var32
+    store volatile i32 %val32, i32* @var32
+
+    %val64 = load i64* @var64
+    store volatile i64 %val64, i64* @var64
+
+    ret void
+}
+
+@globaddr = global i64* null
+
+define void @address() {
+    store i64* @var64, i64** @globaddr
+    ret void
+}
+
+; Check we're using EM_AARCH64
+; OBJ: 'e_machine', 0x00
+
+; OBJ: .rela.text
+
+; var8
+; R_AARCH64_ADR_PREL_PG_HI21 against var8
+; OBJ: 'r_sym', 0x0000000f
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST8_ABS_LO12_NC against var8
+; OBJ: 'r_sym', 0x0000000f
+; OBJ-NEXT: 'r_type', 0x00000116
+
+
+; var16
+; R_AARCH64_ADR_PREL_PG_HI21 against var16
+; OBJ: 'r_sym', 0x0000000c
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST16_ABS_LO12_NC against var16
+; OBJ: 'r_sym', 0x0000000c
+; OBJ-NEXT: 'r_type', 0x0000011c
+
+
+; var32
+; R_AARCH64_ADR_PREL_PG_HI21 against var32
+; OBJ: 'r_sym', 0x0000000d
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST32_ABS_LO12_NC against var32
+; OBJ: 'r_sym', 0x0000000d
+; OBJ-NEXT: 'r_type', 0x0000011d
+
+
+; var64
+; R_AARCH64_ADR_PREL_PG_HI21 against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST64_ABS_LO12_NC against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x0000011e
+
+; This is on the store, so not really important, but it stops the next
+; match working.
+; R_AARCH64_LDST64_ABS_LO12_NC against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x0000011e
+
+
+; Pure address-calculation against var64
+; R_AARCH64_ADR_PREL_PG_HI21 against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_ADD_ABS_LO12_NC against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x00000115
+
+
+; Make sure the symbols don't move around, otherwise relocation info
+; will be wrong:
+
+; OBJ: Symbol 12
+; OBJ-NEXT: var16
+
+; OBJ: Symbol 13
+; OBJ-NEXT: var32
+
+; OBJ: Symbol 14
+; OBJ-NEXT: var64
+
+; OBJ: Symbol 15
+; OBJ-NEXT: var8
diff --git a/test/MC/AArch64/elf-objdump.s b/test/MC/AArch64/elf-objdump.s
new file mode 100644
index 0000000000..c5aa5b1989
--- /dev/null
+++ b/test/MC/AArch64/elf-objdump.s
@@ -0,0 +1,5 @@
+// 64 bit little endian
+// RUN: llvm-mc -filetype=obj -arch=aarch64 -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d
+
+// We just want to see if llvm-objdump works at all.
+// CHECK: .text
diff --git a/test/MC/AArch64/elf-reloc-addsubimm.s b/test/MC/AArch64/elf-reloc-addsubimm.s
new file mode 100644
index 0000000000..7fa6e90b5d
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-addsubimm.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        add x2, x3, #:lo12:some_label
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000115
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-condbr.s b/test/MC/AArch64/elf-reloc-condbr.s
new file mode 100644
index 0000000000..283d3b95d0
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-condbr.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        b.eq somewhere
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000118
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: somewhere
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s
new file mode 100644
index 0000000000..ce9ff49db4
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-ldrlit.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        ldr x0, some_label
+        ldr w3, some_label
+        ldrsw x9, some_label
+        prfm pldl3keep, some_label
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s
new file mode 100644
index 0000000000..345fc8247d
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        ldrb w0, [sp, #:lo12:some_label]
+        ldrh w0, [sp, #:lo12:some_label]
+        ldr w0, [sp, #:lo12:some_label]
+        ldr x0, [sp, #:lo12:some_label]
+        str q0, [sp, #:lo12:some_label]
+
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000116
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011c
+
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011d
+
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011e
+
+// OBJ: 'r_offset', 0x0000000000000010
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000012b
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
diff --git a/test/MC/AArch64/elf-reloc-movw.s b/test/MC/AArch64/elf-reloc-movw.s
new file mode 100644
index 0000000000..cb7dc6768e
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-movw.s
@@ -0,0 +1,98 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        movz x0, #:abs_g0:some_label
+        movk x0, #:abs_g0_nc:some_label
+
+        movz x3, #:abs_g1:some_label
+        movk x5, #:abs_g1_nc:some_label
+
+        movz x3, #:abs_g2:some_label
+        movk x5, #:abs_g2_nc:some_label
+
+        movz x7, #:abs_g3:some_label
+        movk x11, #:abs_g3:some_label
+
+        movz x13, #:abs_g0_s:some_label
+        movn x17, #:abs_g0_s:some_label
+
+        movz x19, #:abs_g1_s:some_label
+        movn x19, #:abs_g1_s:some_label
+
+        movz x19, #:abs_g2_s:some_label
+        movn x19, #:abs_g2_s:some_label
+// OBJ: .rela.text
+
+// :abs_g0: => R_AARCH64_MOVW_UABS_G0
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000107
+
+// :abs_g0_nc: => R_AARCH64_MOVW_UABS_G0_NC
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000108
+
+// :abs_g1: => R_AARCH64_MOVW_UABS_G1
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000109
+
+// :abs_g1_nc: => R_AARCH64_MOVW_UABS_G1_NC
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010a
+
+// :abs_g2: => R_AARCH64_MOVW_UABS_G2
+// OBJ: 'r_offset', 0x0000000000000010
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010b
+
+// :abs_g2_nc: => R_AARCH64_MOVW_UABS_G2_NC
+// OBJ: 'r_offset', 0x0000000000000014
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010c
+
+// :abs_g3: => R_AARCH64_MOVW_UABS_G3
+// OBJ: 'r_offset', 0x0000000000000018
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010d
+
+// :abs_g3: => R_AARCH64_MOVW_UABS_G3
+// OBJ: 'r_offset', 0x000000000000001c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010d
+
+// :abs_g0_s: => R_AARCH64_MOVW_SABS_G0
+// OBJ: 'r_offset', 0x0000000000000020
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010e
+
+// :abs_g0_s: => R_AARCH64_MOVW_SABS_G0
+// OBJ: 'r_offset', 0x0000000000000024
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010e
+
+// :abs_g1_s: => R_AARCH64_MOVW_SABS_G1
+// OBJ: 'r_offset', 0x0000000000000028
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010f
+
+// :abs_g1_s: => R_AARCH64_MOVW_SABS_G1
+// OBJ: 'r_offset', 0x000000000000002c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010f
+
+// :abs_g2_s: => R_AARCH64_MOVW_SABS_G2
+// OBJ: 'r_offset', 0x0000000000000030
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000110
+
+// :abs_g2_s: => R_AARCH64_MOVW_SABS_G2
+// OBJ: 'r_offset', 0x0000000000000034
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000110
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing.s b/test/MC/AArch64/elf-reloc-pcreladdressing.s
new file mode 100644
index 0000000000..39a8ba9402
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-pcreladdressing.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        adr x2, some_label
+        adrp x5, some_label
+
+        adrp x5, :got:some_label
+        ldr x0, [x5, #:got_lo12:some_label]
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000112
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000113
+
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000137
+
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000138
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s
new file mode 100644
index 0000000000..c5e2981a22
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-tstb.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        tbz x6, #45, somewhere
+        tbnz w3, #15, somewhere
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000117
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000117
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: somewhere
diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s
new file mode 100644
index 0000000000..0e97bc6669
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        b somewhere
+        bl somewhere
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011a
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011b
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: somewhere
+\ No newline at end of file
diff --git a/test/MC/AArch64/lit.local.cfg b/test/MC/AArch64/lit.local.cfg
new file mode 100644
index 0000000000..cc02173c8e
--- /dev/null
+++ b/test/MC/AArch64/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+\ No newline at end of file
diff --git a/test/MC/AArch64/mapping-across-sections.s b/test/MC/AArch64/mapping-across-sections.s
new file mode 100644
index 0000000000..3d32c1dfb4
--- /dev/null
+++ b/test/MC/AArch64/mapping-across-sections.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+
+        .text
+        add w0, w0, w0
+
+// .wibble should *not* inherit .text's mapping symbol. It's a completely different section.
+        .section .wibble
+        add w0, w0, w0
+
+// A setion should be able to start with a $d
+        .section .starts_data
+        .word 42
+
+// Changing back to .text should not emit a redundant $x
+        .text
+        add w0, w0, w0
+
+// With all those constraints, we want:
+//   + .text to have $x at 0 and no others
+//   + .wibble to have $x at 0
+//   + .starts_data to have $d at 0
+
+
+// CHECK: 00000000 .starts_data 00000000 $d
+// CHECK-NEXT: 00000000 .text 00000000 $x
+// CHECK-NEXT: 00000000 .wibble 00000000 $x
+// CHECK-NOT: ${{[adtx]}}
+
diff --git a/test/MC/AArch64/mapping-within-section.s b/test/MC/AArch64/mapping-within-section.s
new file mode 100644
index 0000000000..c8bd804fa0
--- /dev/null
+++ b/test/MC/AArch64/mapping-within-section.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+
+    .text
+// $x at 0x0000
+    add w0, w0, w0
+// $d at 0x0004
+    .ascii "012"
+    .byte 1
+    .hword 2
+    .word 4
+    .xword 8
+    .single 4.0
+    .double 8.0
+    .space 10
+    .zero 3
+    .fill 10, 2, 42
+    .org 100, 12
+// $x at 0x0018
+    add x0, x0, x0
+
+// CHECK: 00000004         .text  00000000 $d
+// CHECK-NEXT: 00000000         .text  00000000 $x
+// CHECK-NEXT: 00000064         .text  00000000 $x
diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s
new file mode 100644
index 0000000000..690fa8c009
--- /dev/null
+++ b/test/MC/AArch64/tls-relocs.s
@@ -0,0 +1,662 @@
+// RUN: llvm-mc -arch=aarch64 -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -arch=aarch64 -filetype=obj < %s -o %t
+// RUN: elf-dump %t | FileCheck --check-prefix=CHECK-ELF %s
+// RUN: llvm-objdump -r %t | FileCheck --check-prefix=CHECK-ELF-NAMES %s
+
+// CHECK-ELF:  .rela.text
+
+        // TLS local-dynamic forms
+        movz x1, #:dtprel_g2:var
+        movn x2, #:dtprel_g2:var
+        movz x3, #:dtprel_g2:var
+        movn x4, #:dtprel_g2:var
+// CHECK: movz    x1, #:dtprel_g2:var     // encoding: [0x01'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+// CHECK-NEXT: movn    x2, #:dtprel_g2:var     // encoding: [0x02'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+// CHECK-NEXT: movz    x3, #:dtprel_g2:var     // encoding: [0x03'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+// CHECK-NEXT: movn    x4, #:dtprel_g2:var     // encoding: [0x04'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+
+// CHECK-ELF: # Relocation 0
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000000)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM:0x[0-9a-f]+]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+// CHECK-ELF: # Relocation 1
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000004)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+// CHECK-ELF: # Relocation 2
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000008)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+// CHECK-ELF: # Relocation 3
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000000c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+
+// CHECK-ELF-NAMES: 0 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+// CHECK-ELF-NAMES: 4 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+// CHECK-ELF-NAMES: 8 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+// CHECK-ELF-NAMES: 12 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+
+        movz x5, #:dtprel_g1:var
+        movn x6, #:dtprel_g1:var
+        movz w7, #:dtprel_g1:var
+        movn w8, #:dtprel_g1:var
+// CHECK: movz    x5, #:dtprel_g1:var     // encoding: [0x05'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+// CHECK-NEXT: movn    x6, #:dtprel_g1:var     // encoding: [0x06'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+// CHECK-NEXT: movz    w7, #:dtprel_g1:var     // encoding: [0x07'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+// CHECK-NEXT: movn    w8, #:dtprel_g1:var     // encoding: [0x08'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+
+// CHECK-ELF: # Relocation 4
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000010)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+// CHECK-ELF: # Relocation 5
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000014)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+// CHECK-ELF: # Relocation 6
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000018)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+// CHECK-ELF: # Relocation 7
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000001c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+
+// CHECK-ELF-NAMES: 16 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+// CHECK-ELF-NAMES: 20 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+// CHECK-ELF-NAMES: 24 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+// CHECK-ELF-NAMES: 28 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+
+        movk x9, #:dtprel_g1_nc:var
+        movk w10, #:dtprel_g1_nc:var
+// CHECK: movk    x9, #:dtprel_g1_nc:var  // encoding: [0x09'A',A,0xa0'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
+// CHECK-NEXT: movk    w10, #:dtprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
+
+// CHECK-ELF: # Relocation 8
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000020)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020d)
+// CHECK-ELF: # Relocation 9
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000024)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020d)
+
+// CHECK-ELF-NAMES: 32 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC
+// CHECK-ELF-NAMES: 36 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC
+
+        movz x11, #:dtprel_g0:var
+        movn x12, #:dtprel_g0:var
+        movz w13, #:dtprel_g0:var
+        movn w14, #:dtprel_g0:var
+// CHECK: movz    x11, #:dtprel_g0:var    // encoding: [0x0b'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
+// CHECK-NEXT: movn    x12, #:dtprel_g0:var    // encoding: [0x0c'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
+// CHECK-NEXT: movz    w13, #:dtprel_g0:var    // encoding: [0x0d'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
+// CHECK-NEXT: movn    w14, #:dtprel_g0:var    // encoding: [0x0e'A',A,0x80'A',0x12'A']
+
+
+// CHECK-ELF: # Relocation 10
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000028)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+// CHECK-ELF: # Relocation 11
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000002c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+// CHECK-ELF: # Relocation 12
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000030)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+// CHECK-ELF: # Relocation 13
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000034)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+
+// CHECK-ELF-NAMES: 40 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+// CHECK-ELF-NAMES: 44 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+// CHECK-ELF-NAMES: 48 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+// CHECK-ELF-NAMES: 52 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+
+
+        movk x15, #:dtprel_g0_nc:var
+        movk w16, #:dtprel_g0_nc:var
+// CHECK: movk    x15, #:dtprel_g0_nc:var // encoding: [0x0f'A',A,0x80'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
+// CHECK-NEXT: movk    w16, #:dtprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
+
+// CHECK-ELF: # Relocation 14
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000038)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020f)
+// CHECK-ELF: # Relocation 15
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000003c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020f)
+
+// CHECK-ELF-NAMES: 56 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC
+// CHECK-ELF-NAMES: 60 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC
+
+        add x17, x18, #:dtprel_hi12:var, lsl #12
+        add w19, w20, #:dtprel_hi12:var, lsl #12
+// CHECK: add     x17, x18, #:dtprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
+// CHECK-NEXT: add     w19, w20, #:dtprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
+
+// CHECK-ELF: # Relocation 16
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000040)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000210)
+// CHECK-ELF: # Relocation 17
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000044)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000210)
+
+// CHECK-ELF-NAMES: 64 R_AARCH64_TLSLD_ADD_DTPREL_HI12
+// CHECK-ELF-NAMES: 68 R_AARCH64_TLSLD_ADD_DTPREL_HI12
+
+
+        add x21, x22, #:dtprel_lo12:var
+        add w23, w24, #:dtprel_lo12:var
+// CHECK: add     x21, x22, #:dtprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
+// CHECK-NEXT: add     w23, w24, #:dtprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
+
+// CHECK-ELF: # Relocation 18
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000048)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000211)
+// CHECK-ELF: # Relocation 19
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000004c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000211)
+
+// CHECK-ELF-NAMES: 72 R_AARCH64_TLSLD_ADD_DTPREL_LO12
+// CHECK-ELF-NAMES: 76 R_AARCH64_TLSLD_ADD_DTPREL_LO12
+
+        add x25, x26, #:dtprel_lo12_nc:var
+        add w27, w28, #:dtprel_lo12_nc:var
+// CHECK: add     x25, x26, #:dtprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
+// CHECK-NEXT: add     w27, w28, #:dtprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
+
+// CHECK-ELF: # Relocation 20
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000050)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000212)
+// CHECK-ELF: # Relocation 21
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000054)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000212)
+
+// CHECK-ELF-NAMES: 80 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+// CHECK-ELF-NAMES: 84 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+
+        ldrb w29, [x30, #:dtprel_lo12:var]
+        ldrsb x29, [x28, #:dtprel_lo12_nc:var]
+// CHECK: ldrb    w29, [x30, #:dtprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst8_dtprel_lo12
+// CHECK-NEXT: ldrsb   x29, [x28, #:dtprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst8_dtprel_lo12_nc
+
+// CHECK-ELF: # Relocation 22
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000058)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000213)
+// CHECK-ELF: # Relocation 23
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000005c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000214)
+
+// CHECK-ELF-NAMES: 88 R_AARCH64_TLSLD_LDST8_DTPREL_LO12
+// CHECK-ELF-NAMES: 92 R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC
+
+        strh w27, [x26, #:dtprel_lo12:var]
+        ldrsh x25, [x24, #:dtprel_lo12_nc:var]
+// CHECK: strh    w27, [x26, #:dtprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst16_dtprel_lo12
+// CHECK-NEXT: ldrsh   x25, [x24, #:dtprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst16_dtprel_lo12_n
+
+// CHECK-ELF: # Relocation 24
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000060)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000215)
+// CHECK-ELF: # Relocation 25
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000064)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000216)
+
+// CHECK-ELF-NAMES: 96 R_AARCH64_TLSLD_LDST16_DTPREL_LO12
+// CHECK-ELF-NAMES: 100 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC
+
+        ldr w23, [x22, #:dtprel_lo12:var]
+        ldrsw x21, [x20, #:dtprel_lo12_nc:var]
+// CHECK: ldr     w23, [x22, #:dtprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst32_dtprel_lo12
+// CHECK-NEXT: ldrsw   x21, [x20, #:dtprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst32_dtprel_lo12_n
+
+// CHECK-ELF: # Relocation 26
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000068)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000217)
+// CHECK-ELF: # Relocation 27
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000006c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000218)
+
+// CHECK-ELF-NAMES: 104 R_AARCH64_TLSLD_LDST32_DTPREL_LO12
+// CHECK-ELF-NAMES: 108 R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC
+
+        ldr x19, [x18, #:dtprel_lo12:var]
+        str x17, [x16, #:dtprel_lo12_nc:var]
+// CHECK: ldr     x19, [x18, #:dtprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst64_dtprel_lo12
+// CHECK-NEXT: str     x17, [x16, #:dtprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst64_dtprel_lo12_nc
+
+
+// CHECK-ELF: # Relocation 28
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000070)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000219)
+// CHECK-ELF: # Relocation 29
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000074)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021a)
+
+// CHECK-ELF-NAMES: 112 R_AARCH64_TLSLD_LDST64_DTPREL_LO12
+// CHECK-ELF-NAMES: 116 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC
+
+        // TLS initial-exec forms
+        movz x15, #:gottprel_g1:var
+        movz w14, #:gottprel_g1:var
+// CHECK: movz    x15, #:gottprel_g1:var  // encoding: [0x0f'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
+// CHECK-NEXT: movz    w14, #:gottprel_g1:var  // encoding: [0x0e'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
+
+// CHECK-ELF: # Relocation 30
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000078)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021b)
+// CHECK-ELF: # Relocation 31
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000007c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021b)
+
+// CHECK-ELF-NAMES: 120 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1
+// CHECK-ELF-NAMES: 124 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1
+
+        movk x13, #:gottprel_g0_nc:var
+        movk w12, #:gottprel_g0_nc:var
+// CHECK: movk    x13, #:gottprel_g0_nc:var // encoding: [0x0d'A',A,0x80'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
+// CHECK-NEXT: movk    w12, #:gottprel_g0_nc:var // encoding: [0x0c'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
+
+// CHECK-ELF: # Relocation 32
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000080)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021c)
+// CHECK-ELF: # Relocation 33
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000084)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021c)
+
+// CHECK-ELF-NAMES: 128 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC
+// CHECK-ELF-NAMES: 132 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC
+
+        adrp x11, :gottprel:var
+        ldr x10, [x0, #:gottprel_lo12:var]
+        ldr x9, :gottprel:var
+// CHECK: adrp    x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_adr_gottprel_page
+// CHECK-NEXT: ldr     x10, [x0, #:gottprel_lo12:var] // encoding: [0x0a'A',A,0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_a64_ld64_gottprel_lo12_nc
+// CHECK-NEXT: ldr     x9, :gottprel:var       // encoding: [0x09'A',A,A,0x58'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_ld_gottprel_prel19
+
+// CHECK-ELF: # Relocation 34
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000088)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021d)
+// CHECK-ELF: # Relocation 35
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000008c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021e)
+// CHECK-ELF: # Relocation 36
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000090)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021f)
+
+// CHECK-ELF-NAMES: 136 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE
+// CHECK-ELF-NAMES: 140 R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+// CHECK-ELF-NAMES: 144 R_AARCH64_TLSIE_LD_GOTTPREL_PREL19
+
+        // TLS local-exec forms
+        movz x3, #:tprel_g2:var
+        movn x4, #:tprel_g2:var
+// CHECK: movz    x3, #:tprel_g2:var      // encoding: [0x03'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
+// CHECK-NEXT: movn    x4, #:tprel_g2:var      // encoding: [0x04'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
+
+// CHECK-ELF: # Relocation 37
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000094)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000220)
+// CHECK-ELF: # Relocation 38
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000098)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000220)
+
+// CHECK-ELF-NAMES: 148 R_AARCH64_TLSLE_MOVW_TPREL_G2
+// CHECK-ELF-NAMES: 152 R_AARCH64_TLSLE_MOVW_TPREL_G2
+
+        movz x5, #:tprel_g1:var
+        movn x6, #:tprel_g1:var
+        movz w7, #:tprel_g1:var
+        movn w8, #:tprel_g1:var
+// CHECK: movz    x5, #:tprel_g1:var      // encoding: [0x05'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+// CHECK-NEXT: movn    x6, #:tprel_g1:var      // encoding: [0x06'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+// CHECK-NEXT: movz    w7, #:tprel_g1:var      // encoding: [0x07'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+// CHECK-NEXT: movn    w8, #:tprel_g1:var      // encoding: [0x08'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+
+// CHECK-ELF: # Relocation 39
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000009c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+// CHECK-ELF: # Relocation 40
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+// CHECK-ELF: # Relocation 41
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+// CHECK-ELF: # Relocation 42
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+
+// CHECK-ELF-NAMES: 156 R_AARCH64_TLSLE_MOVW_TPREL_G1
+// CHECK-ELF-NAMES: 160 R_AARCH64_TLSLE_MOVW_TPREL_G1
+// CHECK-ELF-NAMES: 164 R_AARCH64_TLSLE_MOVW_TPREL_G1
+// CHECK-ELF-NAMES: 168 R_AARCH64_TLSLE_MOVW_TPREL_G1
+
+        movk x9, #:tprel_g1_nc:var
+        movk w10, #:tprel_g1_nc:var
+// CHECK: movk    x9, #:tprel_g1_nc:var   // encoding: [0x09'A',A,0xa0'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
+// CHECK-NEXT: movk    w10, #:tprel_g1_nc:var  // encoding: [0x0a'A',A,0xa0'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
+
+// CHECK-ELF: # Relocation 43
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000ac)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000222)
+// CHECK-ELF: # Relocation 44
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000222)
+
+// CHECK-ELF-NAMES: 172 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
+// CHECK-ELF-NAMES: 176 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
+
+        movz x11, #:tprel_g0:var
+        movn x12, #:tprel_g0:var
+        movz w13, #:tprel_g0:var
+        movn w14, #:tprel_g0:var
+// CHECK: movz    x11, #:tprel_g0:var     // encoding: [0x0b'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+// CHECK-NEXT: movn    x12, #:tprel_g0:var     // encoding: [0x0c'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+// CHECK-NEXT: movz    w13, #:tprel_g0:var     // encoding: [0x0d'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+// CHECK-NEXT: movn    w14, #:tprel_g0:var     // encoding: [0x0e'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+
+// CHECK-ELF: # Relocation 45
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+// CHECK-ELF: # Relocation 46
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+// CHECK-ELF: # Relocation 47
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000bc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+// CHECK-ELF: # Relocation 48
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+
+// CHECK-ELF-NAMES: 180 R_AARCH64_TLSLE_MOVW_TPREL_G0
+// CHECK-ELF-NAMES: 184 R_AARCH64_TLSLE_MOVW_TPREL_G0
+// CHECK-ELF-NAMES: 188 R_AARCH64_TLSLE_MOVW_TPREL_G0
+// CHECK-ELF-NAMES: 192 R_AARCH64_TLSLE_MOVW_TPREL_G0
+
+        movk x15, #:tprel_g0_nc:var
+        movk w16, #:tprel_g0_nc:var
+// CHECK: movk    x15, #:tprel_g0_nc:var  // encoding: [0x0f'A',A,0x80'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
+// CHECK-NEXT: movk    w16, #:tprel_g0_nc:var  // encoding: [0x10'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
+
+// CHECK-ELF: # Relocation 49
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000224)
+// CHECK-ELF: # Relocation 50
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000224)
+
+// CHECK-ELF-NAMES: 196 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+// CHECK-ELF-NAMES: 200 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+
+        add x17, x18, #:tprel_hi12:var, lsl #12
+        add w19, w20, #:tprel_hi12:var, lsl #12
+// CHECK: add     x17, x18, #:tprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
+// CHECK-NEXT: add     w19, w20, #:tprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
+
+// CHECK-ELF: # Relocation 51
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000cc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000225)
+// CHECK-ELF: # Relocation 52
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000225)
+
+// CHECK-ELF-NAMES: 204 R_AARCH64_TLSLE_ADD_TPREL_HI12
+// CHECK-ELF-NAMES: 208 R_AARCH64_TLSLE_ADD_TPREL_HI12
+
+        add x21, x22, #:tprel_lo12:var
+        add w23, w24, #:tprel_lo12:var
+// CHECK: add     x21, x22, #:tprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
+// CHECK-NEXT: add     w23, w24, #:tprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
+
+// CHECK-ELF: # Relocation 53
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000226)
+// CHECK-ELF: # Relocation 54
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000226)
+
+// CHECK-ELF-NAMES: 212 R_AARCH64_TLSLE_ADD_TPREL_LO12
+// CHECK-ELF-NAMES: 216 R_AARCH64_TLSLE_ADD_TPREL_LO12
+
+        add x25, x26, #:tprel_lo12_nc:var
+        add w27, w28, #:tprel_lo12_nc:var
+// CHECK: add     x25, x26, #:tprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
+// CHECK-NEXT: add     w27, w28, #:tprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
+
+// CHECK-ELF: # Relocation 55
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000dc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000227)
+// CHECK-ELF: # Relocation 56
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000227)
+
+
+// CHECK-ELF-NAMES: 220 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
+// CHECK-ELF-NAMES: 224 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
+
+        ldrb w29, [x30, #:tprel_lo12:var]
+        ldrsb x29, [x28, #:tprel_lo12_nc:var]
+// CHECK: ldrb    w29, [x30, #:tprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst8_tprel_lo12
+// CHECK-NEXT: ldrsb   x29, [x28, #:tprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst8_tprel_lo12_nc
+
+// CHECK-ELF: # Relocation 57
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000228)
+// CHECK-ELF: # Relocation 58
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000229)
+
+// CHECK-ELF-NAMES: 228 R_AARCH64_TLSLE_LDST8_TPREL_LO12
+// CHECK-ELF-NAMES: 232 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC
+
+        strh w27, [x26, #:tprel_lo12:var]
+        ldrsh x25, [x24, #:tprel_lo12_nc:var]
+// CHECK: strh    w27, [x26, #:tprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst16_tprel_lo12
+// CHECK-NEXT: ldrsh   x25, [x24, #:tprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst16_tprel_lo12_n
+
+// CHECK-ELF: # Relocation 59
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000ec)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022a)
+// CHECK-ELF: # Relocation 60
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022b)
+
+// CHECK-ELF-NAMES: 236 R_AARCH64_TLSLE_LDST16_TPREL_LO12
+// CHECK-ELF-NAMES: 240 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC
+
+        ldr w23, [x22, #:tprel_lo12:var]
+        ldrsw x21, [x20, #:tprel_lo12_nc:var]
+// CHECK: ldr     w23, [x22, #:tprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst32_tprel_lo12
+// CHECK-NEXT: ldrsw   x21, [x20, #:tprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst32_tprel_lo12_n
+
+// CHECK-ELF: # Relocation 61
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022c)
+// CHECK-ELF: # Relocation 62
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022d)
+
+// CHECK-ELF-NAMES: 244 R_AARCH64_TLSLE_LDST32_TPREL_LO12
+// CHECK-ELF-NAMES: 248 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC
+
+        ldr x19, [x18, #:tprel_lo12:var]
+        str x17, [x16, #:tprel_lo12_nc:var]
+// CHECK: ldr     x19, [x18, #:tprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst64_tprel_lo12
+// CHECK-NEXT: str     x17, [x16, #:tprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst64_tprel_lo12_nc
+
+// CHECK-ELF: # Relocation 63
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000fc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022e)
+// CHECK-ELF: # Relocation 64
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000100)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022f)
+
+// CHECK-ELF-NAMES: 252 R_AARCH64_TLSLE_LDST64_TPREL_LO12
+// CHECK-ELF-NAMES: 256 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC
+
+        // TLS descriptor forms
+        adrp x8, :tlsdesc:var
+        ldr x7, [x6, :tlsdesc_lo12:var]
+        add x5, x4, #:tlsdesc_lo12:var
+        .tlsdesccall var
+        blr x3
+
+// CHECK: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_adr_page
+// CHECK-NEXT: ldr     x7, [x6, #:tlsdesc_lo12:var] // encoding: [0xc7'A',A,0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_ld64_lo12_nc
+// CHECK-NEXT: add     x5, x4, #:tlsdesc_lo12:var // encoding: [0x85'A',A,A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_add_lo12_nc
+// CHECK-NEXT: .tlsdesccall var                // encoding: []
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_call
+// CHECK: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
+
+
+// CHECK-ELF: # Relocation 65
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000104)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000232)
+// CHECK-ELF: # Relocation 66
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000108)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000233)
+// CHECK-ELF: # Relocation 67
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000010c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000234)
+// CHECK-ELF: # Relocation 68
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000110)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000239)
+
+// CHECK-ELF-NAMES: 260 R_AARCH64_TLSDESC_ADR_PAGE
+// CHECK-ELF-NAMES: 264 R_AARCH64_TLSDESC_LD64_LO12_NC
+// CHECK-ELF-NAMES: 268 R_AARCH64_TLSDESC_ADD_LO12_NC
+// CHECK-ELF-NAMES: 272 R_AARCH64_TLSDESC_CALL
+
+
+// Make sure symbol 5 has type STT_TLS:
+
+// CHECK-ELF: # Symbol 5
+// CHECK-ELF-NEXT: (('st_name', 0x00000006) # 'var'
+// CHECK-ELF-NEXT:  ('st_bind', 0x1)
+// CHECK-ELF-NEXT:  ('st_type', 0x6)
diff --git a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
new file mode 100644
index 0000000000..9c5a5ebcf7
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
@@ -0,0 +1,4145 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s | FileCheck %s
+
+#------------------------------------------------------------------------------
+# Add/sub (immediate)
+#------------------------------------------------------------------------------
+# CHECK: add      w4, w5, #0
+# CHECK: add      w2, w3, #4095
+# CHECK: add      w30, w29, #1, lsl #12
+# CHECK: add      w13, w5, #4095, lsl #12
+# CHECK: add      x5, x7, #1638
+0xa4 0x0 0x0 0x11
+0x62 0xfc 0x3f 0x11
+0xbe 0x7 0x40 0x11
+0xad 0xfc 0x7f 0x11
+0xe5 0x98 0x19 0x91
+
+# CHECK: add      w20, wsp, #801
+# CHECK: add      wsp, wsp, #1104
+# CHECK: add      wsp, w30, #4084
+0xf4 0x87 0xc 0x11
+0xff 0x43 0x11 0x11
+0xdf 0xd3 0x3f 0x11
+
+# CHECK: add      x0, x24, #291
+# CHECK: add      x3, x24, #4095, lsl #12
+# CHECK: add      x8, sp, #1074
+# CHECK: add      sp, x29, #3816
+0x0 0x8f 0x4 0x91
+0x3 0xff 0x7f 0x91
+0xe8 0xcb 0x10 0x91
+0xbf 0xa3 0x3b 0x91
+
+# CHECK: sub      w0, wsp, #4077
+# CHECK: sub      w4, w20, #546, lsl #12
+# CHECK: sub      sp, sp, #288
+# CHECK: sub      wsp, w19, #16
+0xe0 0xb7 0x3f 0x51
+0x84 0x8a 0x48 0x51
+0xff 0x83 0x4 0xd1
+0x7f 0x42 0x0 0x51
+
+
+# CHECK: adds     w13, w23, #291, lsl #12
+# CHECK: cmn      w2, #4095
+# CHECK: adds     w20, wsp, #0
+# CHECK: cmn      x3, #1, lsl #12
+0xed 0x8e 0x44 0x31
+0x5f 0xfc 0x3f 0x31
+0xf4 0x3 0x0 0x31
+0x7f 0x4 0x40 0xb1
+
+# CHECK: cmp      sp, #20, lsl #12
+# CHECK: cmp      x30, #4095
+# CHECK: subs     x4, sp, #3822
+0xff 0x53 0x40 0xf1
+0xdf 0xff 0x3f 0xf1
+0xe4 0xbb 0x3b 0xf1
+
+# These should really be CMN
+# CHECK: cmn      w3, #291, lsl #12
+# CHECK: cmn      wsp, #1365
+# CHECK: cmn      sp, #1092, lsl #12
+0x7f 0x8c 0x44 0x31
+0xff 0x57 0x15 0x31
+0xff 0x13 0x51 0xb1
+
+# CHECK: mov      sp, x30
+# CHECK: mov      wsp, w20
+# CHECK: mov      x11, sp
+# CHECK: mov      w24, wsp
+0xdf 0x3 0x0 0x91
+0x9f 0x2 0x0 0x11
+0xeb 0x3 0x0 0x91
+0xf8 0x3 0x0 0x11
+
+#------------------------------------------------------------------------------
+# Add-subtract (shifted register)
+#------------------------------------------------------------------------------
+
+# CHECK: add      w3, w5, w7
+# CHECK: add      wzr, w3, w5
+# CHECK: add      w20, wzr, w4
+# CHECK: add      w4, w6, wzr
+# CHECK: add      w11, w13, w15
+# CHECK: add      w9, w3, wzr, lsl #10
+# CHECK: add      w17, w29, w20, lsl #31
+# CHECK: add      w21, w22, w23, lsr #0
+# CHECK: add      w24, w25, w26, lsr #18
+# CHECK: add      w27, w28, w29, lsr #31
+# CHECK: add      w2, w3, w4, asr #0
+# CHECK: add      w5, w6, w7, asr #21
+# CHECK: add      w8, w9, w10, asr #31
+0xa3 0x0 0x7 0xb
+0x7f 0x0 0x5 0xb
+0xf4 0x3 0x4 0xb
+0xc4 0x0 0x1f 0xb
+0xab 0x1 0xf 0xb
+0x69 0x28 0x1f 0xb
+0xb1 0x7f 0x14 0xb
+0xd5 0x2 0x57 0xb
+0x38 0x4b 0x5a 0xb
+0x9b 0x7f 0x5d 0xb
+0x62 0x0 0x84 0xb
+0xc5 0x54 0x87 0xb
+0x28 0x7d 0x8a 0xb
+
+# CHECK: add      x3, x5, x7
+# CHECK: add      xzr, x3, x5
+# CHECK: add      x20, xzr, x4
+# CHECK: add      x4, x6, xzr
+# CHECK: add      x11, x13, x15
+# CHECK: add      x9, x3, xzr, lsl #10
+# CHECK: add      x17, x29, x20, lsl #63
+# CHECK: add      x21, x22, x23, lsr #0
+# CHECK: add      x24, x25, x26, lsr #18
+# CHECK: add      x27, x28, x29, lsr #63
+# CHECK: add      x2, x3, x4, asr #0
+# CHECK: add      x5, x6, x7, asr #21
+# CHECK: add      x8, x9, x10, asr #63
+0xa3 0x0 0x7 0x8b
+0x7f 0x0 0x5 0x8b
+0xf4 0x3 0x4 0x8b
+0xc4 0x0 0x1f 0x8b
+0xab 0x1 0xf 0x8b
+0x69 0x28 0x1f 0x8b
+0xb1 0xff 0x14 0x8b
+0xd5 0x2 0x57 0x8b
+0x38 0x4b 0x5a 0x8b
+0x9b 0xff 0x5d 0x8b
+0x62 0x0 0x84 0x8b
+0xc5 0x54 0x87 0x8b
+0x28 0xfd 0x8a 0x8b
+
+# CHECK: adds     w3, w5, w7
+# CHECK: cmn      w3, w5
+# CHECK: adds     w20, wzr, w4
+# CHECK: adds     w4, w6, wzr
+# CHECK: adds     w11, w13, w15
+# CHECK: adds     w9, w3, wzr, lsl #10
+# CHECK: adds     w17, w29, w20, lsl #31
+# CHECK: adds     w21, w22, w23, lsr #0
+# CHECK: adds     w24, w25, w26, lsr #18
+# CHECK: adds     w27, w28, w29, lsr #31
+# CHECK: adds     w2, w3, w4, asr #0
+# CHECK: adds     w5, w6, w7, asr #21
+# CHECK: adds     w8, w9, w10, asr #31
+0xa3 0x0 0x7 0x2b
+0x7f 0x0 0x5 0x2b
+0xf4 0x3 0x4 0x2b
+0xc4 0x0 0x1f 0x2b
+0xab 0x1 0xf 0x2b
+0x69 0x28 0x1f 0x2b
+0xb1 0x7f 0x14 0x2b
+0xd5 0x2 0x57 0x2b
+0x38 0x4b 0x5a 0x2b
+0x9b 0x7f 0x5d 0x2b
+0x62 0x0 0x84 0x2b
+0xc5 0x54 0x87 0x2b
+0x28 0x7d 0x8a 0x2b
+
+# CHECK: adds     x3, x5, x7
+# CHECK: cmn      x3, x5
+# CHECK: adds     x20, xzr, x4
+# CHECK: adds     x4, x6, xzr
+# CHECK: adds     x11, x13, x15
+# CHECK: adds     x9, x3, xzr, lsl #10
+# CHECK: adds     x17, x29, x20, lsl #63
+# CHECK: adds     x21, x22, x23, lsr #0
+# CHECK: adds     x24, x25, x26, lsr #18
+# CHECK: adds     x27, x28, x29, lsr #63
+# CHECK: adds     x2, x3, x4, asr #0
+# CHECK: adds     x5, x6, x7, asr #21
+# CHECK: adds     x8, x9, x10, asr #63
+0xa3 0x0 0x7 0xab
+0x7f 0x0 0x5 0xab
+0xf4 0x3 0x4 0xab
+0xc4 0x0 0x1f 0xab
+0xab 0x1 0xf 0xab
+0x69 0x28 0x1f 0xab
+0xb1 0xff 0x14 0xab
+0xd5 0x2 0x57 0xab
+0x38 0x4b 0x5a 0xab
+0x9b 0xff 0x5d 0xab
+0x62 0x0 0x84 0xab
+0xc5 0x54 0x87 0xab
+0x28 0xfd 0x8a 0xab
+
+# CHECK: sub      w3, w5, w7
+# CHECK: sub      wzr, w3, w5
+# CHECK: sub      w20, wzr, w4
+# CHECK: sub      w4, w6, wzr
+# CHECK: sub      w11, w13, w15
+# CHECK: sub      w9, w3, wzr, lsl #10
+# CHECK: sub      w17, w29, w20, lsl #31
+# CHECK: sub      w21, w22, w23, lsr #0
+# CHECK: sub      w24, w25, w26, lsr #18
+# CHECK: sub      w27, w28, w29, lsr #31
+# CHECK: sub      w2, w3, w4, asr #0
+# CHECK: sub      w5, w6, w7, asr #21
+# CHECK: sub      w8, w9, w10, asr #31
+0xa3 0x0 0x7 0x4b
+0x7f 0x0 0x5 0x4b
+0xf4 0x3 0x4 0x4b
+0xc4 0x0 0x1f 0x4b
+0xab 0x1 0xf 0x4b
+0x69 0x28 0x1f 0x4b
+0xb1 0x7f 0x14 0x4b
+0xd5 0x2 0x57 0x4b
+0x38 0x4b 0x5a 0x4b
+0x9b 0x7f 0x5d 0x4b
+0x62 0x0 0x84 0x4b
+0xc5 0x54 0x87 0x4b
+0x28 0x7d 0x8a 0x4b
+
+# CHECK: sub      x3, x5, x7
+# CHECK: sub      xzr, x3, x5
+# CHECK: sub      x20, xzr, x4
+# CHECK: sub      x4, x6, xzr
+# CHECK: sub      x11, x13, x15
+# CHECK: sub      x9, x3, xzr, lsl #10
+# CHECK: sub      x17, x29, x20, lsl #63
+# CHECK: sub      x21, x22, x23, lsr #0
+# CHECK: sub      x24, x25, x26, lsr #18
+# CHECK: sub      x27, x28, x29, lsr #63
+# CHECK: sub      x2, x3, x4, asr #0
+# CHECK: sub      x5, x6, x7, asr #21
+# CHECK: sub      x8, x9, x10, asr #63
+0xa3 0x0 0x7 0xcb
+0x7f 0x0 0x5 0xcb
+0xf4 0x3 0x4 0xcb
+0xc4 0x0 0x1f 0xcb
+0xab 0x1 0xf 0xcb
+0x69 0x28 0x1f 0xcb
+0xb1 0xff 0x14 0xcb
+0xd5 0x2 0x57 0xcb
+0x38 0x4b 0x5a 0xcb
+0x9b 0xff 0x5d 0xcb
+0x62 0x0 0x84 0xcb
+0xc5 0x54 0x87 0xcb
+0x28 0xfd 0x8a 0xcb
+
+# CHECK: subs     w3, w5, w7
+# CHECK: cmp      w3, w5
+# CHECK: subs     w20, wzr, w4
+# CHECK: subs     w4, w6, wzr
+# CHECK: subs     w11, w13, w15
+# CHECK: subs     w9, w3, wzr, lsl #10
+# CHECK: subs     w17, w29, w20, lsl #31
+# CHECK: subs     w21, w22, w23, lsr #0
+# CHECK: subs     w24, w25, w26, lsr #18
+# CHECK: subs     w27, w28, w29, lsr #31
+# CHECK: subs     w2, w3, w4, asr #0
+# CHECK: subs     w5, w6, w7, asr #21
+# CHECK: subs     w8, w9, w10, asr #31
+0xa3 0x0 0x7 0x6b
+0x7f 0x0 0x5 0x6b
+0xf4 0x3 0x4 0x6b
+0xc4 0x0 0x1f 0x6b
+0xab 0x1 0xf 0x6b
+0x69 0x28 0x1f 0x6b
+0xb1 0x7f 0x14 0x6b
+0xd5 0x2 0x57 0x6b
+0x38 0x4b 0x5a 0x6b
+0x9b 0x7f 0x5d 0x6b
+0x62 0x0 0x84 0x6b
+0xc5 0x54 0x87 0x6b
+0x28 0x7d 0x8a 0x6b
+
+# CHECK: subs     x3, x5, x7
+# CHECK: cmp      x3, x5
+# CHECK: subs     x20, xzr, x4
+# CHECK: subs     x4, x6, xzr
+# CHECK: subs     x11, x13, x15
+# CHECK: subs     x9, x3, xzr, lsl #10
+# CHECK: subs     x17, x29, x20, lsl #63
+# CHECK: subs     x21, x22, x23, lsr #0
+# CHECK: subs     x24, x25, x26, lsr #18
+# CHECK: subs     x27, x28, x29, lsr #63
+# CHECK: subs     x2, x3, x4, asr #0
+# CHECK: subs     x5, x6, x7, asr #21
+# CHECK: subs     x8, x9, x10, asr #63
+0xa3 0x0 0x7 0xeb
+0x7f 0x0 0x5 0xeb
+0xf4 0x3 0x4 0xeb
+0xc4 0x0 0x1f 0xeb
+0xab 0x1 0xf 0xeb
+0x69 0x28 0x1f 0xeb
+0xb1 0xff 0x14 0xeb
+0xd5 0x2 0x57 0xeb
+0x38 0x4b 0x5a 0xeb
+0x9b 0xff 0x5d 0xeb
+0x62 0x0 0x84 0xeb
+0xc5 0x54 0x87 0xeb
+0x28 0xfd 0x8a 0xeb
+
+# CHECK: cmn      w0, w3
+# CHECK: cmn      wzr, w4
+# CHECK: cmn      w5, wzr
+# CHECK: cmn      w6, w7
+# CHECK: cmn      w8, w9, lsl #15
+# CHECK: cmn      w10, w11, lsl #31
+# CHECK: cmn      w12, w13, lsr #0
+# CHECK: cmn      w14, w15, lsr #21
+# CHECK: cmn      w16, w17, lsr #31
+# CHECK: cmn      w18, w19, asr #0
+# CHECK: cmn      w20, w21, asr #22
+# CHECK: cmn      w22, w23, asr #31
+0x1f 0x0 0x3 0x2b
+0xff 0x3 0x4 0x2b
+0xbf 0x0 0x1f 0x2b
+0xdf 0x0 0x7 0x2b
+0x1f 0x3d 0x9 0x2b
+0x5f 0x7d 0xb 0x2b
+0x9f 0x1 0x4d 0x2b
+0xdf 0x55 0x4f 0x2b
+0x1f 0x7e 0x51 0x2b
+0x5f 0x2 0x93 0x2b
+0x9f 0x5a 0x95 0x2b
+0xdf 0x7e 0x97 0x2b
+
+# CHECK: cmn      x0, x3
+# CHECK: cmn      xzr, x4
+# CHECK: cmn      x5, xzr
+# CHECK: cmn      x6, x7
+# CHECK: cmn      x8, x9, lsl #15
+# CHECK: cmn      x10, x11, lsl #63
+# CHECK: cmn      x12, x13, lsr #0
+# CHECK: cmn      x14, x15, lsr #41
+# CHECK: cmn      x16, x17, lsr #63
+# CHECK: cmn      x18, x19, asr #0
+# CHECK: cmn      x20, x21, asr #55
+# CHECK: cmn      x22, x23, asr #63
+0x1f 0x0 0x3 0xab
+0xff 0x3 0x4 0xab
+0xbf 0x0 0x1f 0xab
+0xdf 0x0 0x7 0xab
+0x1f 0x3d 0x9 0xab
+0x5f 0xfd 0xb 0xab
+0x9f 0x1 0x4d 0xab
+0xdf 0xa5 0x4f 0xab
+0x1f 0xfe 0x51 0xab
+0x5f 0x2 0x93 0xab
+0x9f 0xde 0x95 0xab
+0xdf 0xfe 0x97 0xab
+
+# CHECK: cmp      w0, w3
+# CHECK: cmp      wzr, w4
+# CHECK: cmp      w5, wzr
+# CHECK: cmp      w6, w7
+# CHECK: cmp      w8, w9, lsl #15
+# CHECK: cmp      w10, w11, lsl #31
+# CHECK: cmp      w12, w13, lsr #0
+# CHECK: cmp      w14, w15, lsr #21
+# CHECK: cmp      w16, w17, lsr #31
+# CHECK: cmp      w18, w19, asr #0
+# CHECK: cmp      w20, w21, asr #22
+# CHECK: cmp      w22, w23, asr #31
+0x1f 0x0 0x3 0x6b
+0xff 0x3 0x4 0x6b
+0xbf 0x0 0x1f 0x6b
+0xdf 0x0 0x7 0x6b
+0x1f 0x3d 0x9 0x6b
+0x5f 0x7d 0xb 0x6b
+0x9f 0x1 0x4d 0x6b
+0xdf 0x55 0x4f 0x6b
+0x1f 0x7e 0x51 0x6b
+0x5f 0x2 0x93 0x6b
+0x9f 0x5a 0x95 0x6b
+0xdf 0x7e 0x97 0x6b
+
+# CHECK: cmp      x0, x3
+# CHECK: cmp      xzr, x4
+# CHECK: cmp      x5, xzr
+# CHECK: cmp      x6, x7
+# CHECK: cmp      x8, x9, lsl #15
+# CHECK: cmp      x10, x11, lsl #63
+# CHECK: cmp      x12, x13, lsr #0
+# CHECK: cmp      x14, x15, lsr #41
+# CHECK: cmp      x16, x17, lsr #63
+# CHECK: cmp      x18, x19, asr #0
+# CHECK: cmp      x20, x21, asr #55
+# CHECK: cmp      x22, x23, asr #63
+0x1f 0x0 0x3 0xeb
+0xff 0x3 0x4 0xeb
+0xbf 0x0 0x1f 0xeb
+0xdf 0x0 0x7 0xeb
+0x1f 0x3d 0x9 0xeb
+0x5f 0xfd 0xb 0xeb
+0x9f 0x1 0x4d 0xeb
+0xdf 0xa5 0x4f 0xeb
+0x1f 0xfe 0x51 0xeb
+0x5f 0x2 0x93 0xeb
+0x9f 0xde 0x95 0xeb
+0xdf 0xfe 0x97 0xeb
+
+# CHECK: sub      w29, wzr, w30
+# CHECK: sub      w30, wzr, wzr
+# CHECK: sub      wzr, wzr, w0
+# CHECK: sub      w28, wzr, w27
+# CHECK: sub      w26, wzr, w25, lsl #29
+# CHECK: sub      w24, wzr, w23, lsl #31
+# CHECK: sub      w22, wzr, w21, lsr #0
+# CHECK: sub      w20, wzr, w19, lsr #1
+# CHECK: sub      w18, wzr, w17, lsr #31
+# CHECK: sub      w16, wzr, w15, asr #0
+# CHECK: sub      w14, wzr, w13, asr #12
+# CHECK: sub      w12, wzr, w11, asr #31
+0xfd 0x3 0x1e 0x4b
+0xfe 0x3 0x1f 0x4b
+0xff 0x3 0x0 0x4b
+0xfc 0x3 0x1b 0x4b
+0xfa 0x77 0x19 0x4b
+0xf8 0x7f 0x17 0x4b
+0xf6 0x3 0x55 0x4b
+0xf4 0x7 0x53 0x4b
+0xf2 0x7f 0x51 0x4b
+0xf0 0x3 0x8f 0x4b
+0xee 0x33 0x8d 0x4b
+0xec 0x7f 0x8b 0x4b
+
+# CHECK: sub      x29, xzr, x30
+# CHECK: sub      x30, xzr, xzr
+# CHECK: sub      xzr, xzr, x0
+# CHECK: sub      x28, xzr, x27
+# CHECK: sub      x26, xzr, x25, lsl #29
+# CHECK: sub      x24, xzr, x23, lsl #31
+# CHECK: sub      x22, xzr, x21, lsr #0
+# CHECK: sub      x20, xzr, x19, lsr #1
+# CHECK: sub      x18, xzr, x17, lsr #31
+# CHECK: sub      x16, xzr, x15, asr #0
+# CHECK: sub      x14, xzr, x13, asr #12
+# CHECK: sub      x12, xzr, x11, asr #31
+0xfd 0x3 0x1e 0xcb
+0xfe 0x3 0x1f 0xcb
+0xff 0x3 0x0 0xcb
+0xfc 0x3 0x1b 0xcb
+0xfa 0x77 0x19 0xcb
+0xf8 0x7f 0x17 0xcb
+0xf6 0x3 0x55 0xcb
+0xf4 0x7 0x53 0xcb
+0xf2 0x7f 0x51 0xcb
+0xf0 0x3 0x8f 0xcb
+0xee 0x33 0x8d 0xcb
+0xec 0x7f 0x8b 0xcb
+
+# CHECK: subs     w29, wzr, w30
+# CHECK: subs     w30, wzr, wzr
+# CHECK: cmp      wzr, w0
+# CHECK: subs     w28, wzr, w27
+# CHECK: subs     w26, wzr, w25, lsl #29
+# CHECK: subs     w24, wzr, w23, lsl #31
+# CHECK: subs     w22, wzr, w21, lsr #0
+# CHECK: subs     w20, wzr, w19, lsr #1
+# CHECK: subs     w18, wzr, w17, lsr #31
+# CHECK: subs     w16, wzr, w15, asr #0
+# CHECK: subs     w14, wzr, w13, asr #12
+# CHECK: subs     w12, wzr, w11, asr #31
+0xfd 0x3 0x1e 0x6b
+0xfe 0x3 0x1f 0x6b
+0xff 0x3 0x0 0x6b
+0xfc 0x3 0x1b 0x6b
+0xfa 0x77 0x19 0x6b
+0xf8 0x7f 0x17 0x6b
+0xf6 0x3 0x55 0x6b
+0xf4 0x7 0x53 0x6b
+0xf2 0x7f 0x51 0x6b
+0xf0 0x3 0x8f 0x6b
+0xee 0x33 0x8d 0x6b
+0xec 0x7f 0x8b 0x6b
+
+# CHECK: subs     x29, xzr, x30
+# CHECK: subs     x30, xzr, xzr
+# CHECK: cmp      xzr, x0
+# CHECK: subs     x28, xzr, x27
+# CHECK: subs     x26, xzr, x25, lsl #29
+# CHECK: subs     x24, xzr, x23, lsl #31
+# CHECK: subs     x22, xzr, x21, lsr #0
+# CHECK: subs     x20, xzr, x19, lsr #1
+# CHECK: subs     x18, xzr, x17, lsr #31
+# CHECK: subs     x16, xzr, x15, asr #0
+# CHECK: subs     x14, xzr, x13, asr #12
+# CHECK: subs     x12, xzr, x11, asr #31
+0xfd 0x3 0x1e 0xeb
+0xfe 0x3 0x1f 0xeb
+0xff 0x3 0x0 0xeb
+0xfc 0x3 0x1b 0xeb
+0xfa 0x77 0x19 0xeb
+0xf8 0x7f 0x17 0xeb
+0xf6 0x3 0x55 0xeb
+0xf4 0x7 0x53 0xeb
+0xf2 0x7f 0x51 0xeb
+0xf0 0x3 0x8f 0xeb
+0xee 0x33 0x8d 0xeb
+0xec 0x7f 0x8b 0xeb
+
+#------------------------------------------------------------------------------
+# Add-subtract (shifted register)
+#------------------------------------------------------------------------------
+
+# CHECK: adc      w29, w27, w25
+# CHECK: adc      wzr, w3, w4
+# CHECK: adc      w9, wzr, w10
+# CHECK: adc      w20, w0, wzr
+0x7d 0x3 0x19 0x1a
+0x7f 0x0 0x4 0x1a
+0xe9 0x3 0xa 0x1a
+0x14 0x0 0x1f 0x1a
+
+# CHECK: adc      x29, x27, x25
+# CHECK: adc      xzr, x3, x4
+# CHECK: adc      x9, xzr, x10
+# CHECK: adc      x20, x0, xzr
+0x7d 0x3 0x19 0x9a
+0x7f 0x0 0x4 0x9a
+0xe9 0x3 0xa 0x9a
+0x14 0x0 0x1f 0x9a
+
+# CHECK: adcs     w29, w27, w25
+# CHECK: adcs     wzr, w3, w4
+# CHECK: adcs     w9, wzr, w10
+# CHECK: adcs     w20, w0, wzr
+0x7d 0x3 0x19 0x3a
+0x7f 0x0 0x4 0x3a
+0xe9 0x3 0xa 0x3a
+0x14 0x0 0x1f 0x3a
+
+# CHECK: adcs     x29, x27, x25
+# CHECK: adcs     xzr, x3, x4
+# CHECK: adcs     x9, xzr, x10
+# CHECK: adcs     x20, x0, xzr
+0x7d 0x3 0x19 0xba
+0x7f 0x0 0x4 0xba
+0xe9 0x3 0xa 0xba
+0x14 0x0 0x1f 0xba
+
+# CHECK: sbc      w29, w27, w25
+# CHECK: sbc      wzr, w3, w4
+# CHECK: ngc      w9, w10
+# CHECK: sbc      w20, w0, wzr
+0x7d 0x3 0x19 0x5a
+0x7f 0x0 0x4 0x5a
+0xe9 0x3 0xa 0x5a
+0x14 0x0 0x1f 0x5a
+
+# CHECK: sbc      x29, x27, x25
+# CHECK: sbc      xzr, x3, x4
+# CHECK: ngc      x9, x10
+# CHECK: sbc      x20, x0, xzr
+0x7d 0x3 0x19 0xda
+0x7f 0x0 0x4 0xda
+0xe9 0x3 0xa 0xda
+0x14 0x0 0x1f 0xda
+
+# CHECK: sbcs     w29, w27, w25
+# CHECK: sbcs     wzr, w3, w4
+# CHECK: ngcs     w9, w10
+# CHECK: sbcs     w20, w0, wzr
+0x7d 0x3 0x19 0x7a
+0x7f 0x0 0x4 0x7a
+0xe9 0x3 0xa 0x7a
+0x14 0x0 0x1f 0x7a
+
+# CHECK: sbcs     x29, x27, x25
+# CHECK: sbcs     xzr, x3, x4
+# CHECK: ngcs     x9, x10
+# CHECK: sbcs     x20, x0, xzr
+0x7d 0x3 0x19 0xfa
+0x7f 0x0 0x4 0xfa
+0xe9 0x3 0xa 0xfa
+0x14 0x0 0x1f 0xfa
+
+# CHECK: ngc      w3, w12
+# CHECK: ngc      wzr, w9
+# CHECK: ngc      w23, wzr
+0xe3 0x3 0xc 0x5a
+0xff 0x3 0x9 0x5a
+0xf7 0x3 0x1f 0x5a
+
+# CHECK: ngc      x29, x30
+# CHECK: ngc      xzr, x0
+# CHECK: ngc      x0, xzr
+0xfd 0x3 0x1e 0xda
+0xff 0x3 0x0 0xda
+0xe0 0x3 0x1f 0xda
+
+# CHECK: ngcs     w3, w12
+# CHECK: ngcs     wzr, w9
+# CHECK: ngcs     w23, wzr
+0xe3 0x3 0xc 0x7a
+0xff 0x3 0x9 0x7a
+0xf7 0x3 0x1f 0x7a
+
+# CHECK: ngcs     x29, x30
+# CHECK: ngcs     xzr, x0
+# CHECK: ngcs     x0, xzr
+0xfd 0x3 0x1e 0xfa
+0xff 0x3 0x0 0xfa
+0xe0 0x3 0x1f 0xfa
+
+#------------------------------------------------------------------------------
+# Compare and branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: sbfx     x1, x2, #3, #2
+# CHECK: asr      x3, x4, #63
+# CHECK: asr      wzr, wzr, #31
+# CHECK: sbfx     w12, w9, #0, #1
+0x41 0x10 0x43 0x93
+0x83 0xfc 0x7f 0x93
+0xff 0x7f 0x1f 0x13
+0x2c 0x1 0x0 0x13
+
+# CHECK: ubfiz    x4, x5, #52, #11
+# CHECK: ubfx     xzr, x4, #0, #1
+# CHECK: ubfiz    x4, xzr, #1, #6
+# CHECK: lsr      x5, x6, #12
+0xa4 0x28 0x4c 0xd3
+0x9f 0x0 0x40 0xd3
+0xe4 0x17 0x7f 0xd3
+0xc5 0xfc 0x4c 0xd3
+
+# CHECK: bfi      x4, x5, #52, #11
+# CHECK: bfxil    xzr, x4, #0, #1
+# CHECK: bfi      x4, xzr, #1, #6
+# CHECK: bfxil    x5, x6, #12, #52
+0xa4 0x28 0x4c 0xb3
+0x9f 0x0 0x40 0xb3
+0xe4 0x17 0x7f 0xb3
+0xc5 0xfc 0x4c 0xb3
+
+# CHECK: sxtb     w1, w2
+# CHECK: sxtb     xzr, w3
+# CHECK: sxth     w9, w10
+# CHECK: sxth     x0, w1
+# CHECK: sxtw     x3, w30
+0x41 0x1c 0x0 0x13
+0x7f 0x1c 0x40 0x93
+0x49 0x3d 0x0 0x13
+0x20 0x3c 0x40 0x93
+0xc3 0x7f 0x40 0x93
+
+# CHECK: uxtb     w1, w2
+# CHECK: uxth     w9, w10
+# CHECK: ubfx     x3, x30, #0, #32
+0x41 0x1c 0x0 0x53
+0x49 0x3d 0x0 0x53
+0xc3 0x7f 0x40 0xd3
+
+# CHECK: asr      w3, w2, #0
+# CHECK: asr      w9, w10, #31
+# CHECK: asr      x20, x21, #63
+# CHECK: asr      w1, wzr, #3
+0x43 0x7c 0x0 0x13
+0x49 0x7d 0x1f 0x13
+0xb4 0xfe 0x7f 0x93
+0xe1 0x7f 0x3 0x13
+
+# CHECK: lsr      w3, w2, #0
+# CHECK: lsr      w9, w10, #31
+# CHECK: lsr      x20, x21, #63
+# CHECK: lsr      wzr, wzr, #3
+0x43 0x7c 0x0 0x53
+0x49 0x7d 0x1f 0x53
+0xb4 0xfe 0x7f 0xd3
+0xff 0x7f 0x3 0x53
+
+# CHECK: lsr      w3, w2, #0
+# CHECK: lsl      w9, w10, #31
+# CHECK: lsl      x20, x21, #63
+# CHECK: lsl      w1, wzr, #3
+0x43 0x7c 0x0 0x53
+0x49 0x1 0x1 0x53
+0xb4 0x2 0x41 0xd3
+0xe1 0x73 0x1d 0x53
+
+# CHECK: sbfx     w9, w10, #0, #1
+# CHECK: sbfiz    x2, x3, #63, #1
+# CHECK: asr      x19, x20, #0
+# CHECK: sbfiz    x9, x10, #5, #59
+# CHECK: asr      w9, w10, #0
+# CHECK: sbfiz    w11, w12, #31, #1
+# CHECK: sbfiz    w13, w14, #29, #3
+# CHECK: sbfiz    xzr, xzr, #10, #11
+0x49 0x1 0x0 0x13
+0x62 0x0 0x41 0x93
+0x93 0xfe 0x40 0x93
+0x49 0xe9 0x7b 0x93
+0x49 0x7d 0x0 0x13
+0x8b 0x1 0x1 0x13
+0xcd 0x9 0x3 0x13
+0xff 0x2b 0x76 0x93
+
+# CHECK: sbfx     w9, w10, #0, #1
+# CHECK: asr      x2, x3, #63
+# CHECK: asr      x19, x20, #0
+# CHECK: asr      x9, x10, #5
+# CHECK: asr      w9, w10, #0
+# CHECK: asr      w11, w12, #31
+# CHECK: asr      w13, w14, #29
+# CHECK: sbfx     xzr, xzr, #10, #11
+0x49 0x1 0x0 0x13
+0x62 0xfc 0x7f 0x93
+0x93 0xfe 0x40 0x93
+0x49 0xfd 0x45 0x93
+0x49 0x7d 0x0 0x13
+0x8b 0x7d 0x1f 0x13
+0xcd 0x7d 0x1d 0x13
+0xff 0x53 0x4a 0x93
+
+# CHECK: bfxil    w9, w10, #0, #1
+# CHECK: bfi      x2, x3, #63, #1
+# CHECK: bfxil    x19, x20, #0, #64
+# CHECK: bfi      x9, x10, #5, #59
+# CHECK: bfxil    w9, w10, #0, #32
+# CHECK: bfi      w11, w12, #31, #1
+# CHECK: bfi      w13, w14, #29, #3
+# CHECK: bfi      xzr, xzr, #10, #11
+0x49 0x1 0x0 0x33
+0x62 0x0 0x41 0xb3
+0x93 0xfe 0x40 0xb3
+0x49 0xe9 0x7b 0xb3
+0x49 0x7d 0x0 0x33
+0x8b 0x1 0x1 0x33
+0xcd 0x9 0x3 0x33
+0xff 0x2b 0x76 0xb3
+
+# CHECK: bfxil    w9, w10, #0, #1
+# CHECK: bfxil    x2, x3, #63, #1
+# CHECK: bfxil    x19, x20, #0, #64
+# CHECK: bfxil    x9, x10, #5, #59
+# CHECK: bfxil    w9, w10, #0, #32
+# CHECK: bfxil    w11, w12, #31, #1
+# CHECK: bfxil    w13, w14, #29, #3
+# CHECK: bfxil    xzr, xzr, #10, #11
+0x49 0x1 0x0 0x33
+0x62 0xfc 0x7f 0xb3
+0x93 0xfe 0x40 0xb3
+0x49 0xfd 0x45 0xb3
+0x49 0x7d 0x0 0x33
+0x8b 0x7d 0x1f 0x33
+0xcd 0x7d 0x1d 0x33
+0xff 0x53 0x4a 0xb3
+
+# CHECK: ubfx     w9, w10, #0, #1
+# CHECK: lsl      x2, x3, #63
+# CHECK: lsr      x19, x20, #0
+# CHECK: lsl      x9, x10, #5
+# CHECK: lsr      w9, w10, #0
+# CHECK: lsl      w11, w12, #31
+# CHECK: lsl      w13, w14, #29
+# CHECK: ubfiz    xzr, xzr, #10, #11
+0x49 0x1 0x0 0x53
+0x62 0x0 0x41 0xd3
+0x93 0xfe 0x40 0xd3
+0x49 0xe9 0x7b 0xd3
+0x49 0x7d 0x0 0x53
+0x8b 0x1 0x1 0x53
+0xcd 0x9 0x3 0x53
+0xff 0x2b 0x76 0xd3
+
+# CHECK: ubfx     w9, w10, #0, #1
+# CHECK: lsr      x2, x3, #63
+# CHECK: lsr      x19, x20, #0
+# CHECK: lsr      x9, x10, #5
+# CHECK: lsr      w9, w10, #0
+# CHECK: lsr      w11, w12, #31
+# CHECK: lsr      w13, w14, #29
+# CHECK: ubfx     xzr, xzr, #10, #11
+0x49 0x1 0x0 0x53
+0x62 0xfc 0x7f 0xd3
+0x93 0xfe 0x40 0xd3
+0x49 0xfd 0x45 0xd3
+0x49 0x7d 0x0 0x53
+0x8b 0x7d 0x1f 0x53
+0xcd 0x7d 0x1d 0x53
+0xff 0x53 0x4a 0xd3
+
+
+#------------------------------------------------------------------------------
+# Compare and branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: cbz      w5, #4
+# CHECK: cbz      x5, #0
+# CHECK: cbnz     x2, #-4
+# CHECK: cbnz     x26, #1048572
+0x25 0x0 0x0 0x34
+0x05 0x0 0x0 0xb4
+0xe2 0xff 0xff 0xb5
+0xfa 0xff 0x7f 0xb5
+
+# CHECK: cbz      wzr, #0
+# CHECK: cbnz     xzr, #0
+0x1f 0x0 0x0 0x34
+0x1f 0x0 0x0 0xb5
+
+#------------------------------------------------------------------------------
+# Conditional branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: b.ne #4
+# CHECK: b.ge #1048572
+# CHECK: b.ge #-4
+0x21 0x00 0x00 0x54
+0xea 0xff 0x7f 0x54
+0xea 0xff 0xff 0x54
+
+#------------------------------------------------------------------------------
+# Conditional compare (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: ccmp w1, #31, #0, eq
+# CHECK: ccmp w3, #0, #15, hs
+# CHECK: ccmp wzr, #15, #13, hs
+0x20 0x08 0x5f 0x7a
+0x6f 0x28 0x40 0x7a
+0xed 0x2b 0x4f 0x7a
+
+# CHECK: ccmp x9, #31, #0, le
+# CHECK: ccmp x3, #0, #15, gt
+# CHECK: ccmp xzr, #5, #7, ne
+0x20 0xd9 0x5f 0xfa
+0x6f 0xc8 0x40 0xfa
+0xe7 0x1b 0x45 0xfa
+
+# CHECK: ccmn w1, #31, #0, eq
+# CHECK: ccmn w3, #0, #15, hs
+# CHECK: ccmn wzr, #15, #13, hs
+0x20 0x08 0x5f 0x3a
+0x6f 0x28 0x40 0x3a
+0xed 0x2b 0x4f 0x3a
+
+# CHECK: ccmn x9, #31, #0, le
+# CHECK: ccmn x3, #0, #15, gt
+# CHECK: ccmn xzr, #5, #7, ne
+0x20 0xd9 0x5f 0xba
+0x6f 0xc8 0x40 0xba
+0xe7 0x1b 0x45 0xba
+
+#------------------------------------------------------------------------------
+# Conditional compare (register)
+#------------------------------------------------------------------------------
+
+# CHECK: ccmp w1, wzr, #0, eq
+# CHECK: ccmp w3, w0, #15, hs
+# CHECK: ccmp wzr, w15, #13, hs
+0x20 0x00 0x5f 0x7a
+0x6f 0x20 0x40 0x7a
+0xed 0x23 0x4f 0x7a
+
+# CHECK: ccmp x9, xzr, #0, le
+# CHECK: ccmp x3, x0, #15, gt
+# CHECK: ccmp xzr, x5, #7, ne
+0x20 0xd1 0x5f 0xfa
+0x6f 0xc0 0x40 0xfa
+0xe7 0x13 0x45 0xfa
+
+# CHECK: ccmn w1, wzr, #0, eq
+# CHECK: ccmn w3, w0, #15, hs
+# CHECK: ccmn wzr, w15, #13, hs
+0x20 0x00 0x5f 0x3a
+0x6f 0x20 0x40 0x3a
+0xed 0x23 0x4f 0x3a
+
+# CHECK: ccmn x9, xzr, #0, le
+# CHECK: ccmn x3, x0, #15, gt
+# CHECK: ccmn xzr, x5, #7, ne
+0x20 0xd1 0x5f 0xba
+0x6f 0xc0 0x40 0xba
+0xe7 0x13 0x45 0xba
+
+#------------------------------------------------------------------------------
+# Conditional branch (immediate)
+#------------------------------------------------------------------------------
+# CHECK: csel     w1, w0, w19, ne
+# CHECK: csel     wzr, w5, w9, eq
+# CHECK: csel     w9, wzr, w30, gt
+# CHECK: csel     w1, w28, wzr, mi
+# CHECK: csel     x19, x23, x29, lt
+# CHECK: csel     xzr, x3, x4, ge
+# CHECK: csel     x5, xzr, x6, hs
+# CHECK: csel     x7, x8, xzr, lo
+0x1 0x10 0x93 0x1a
+0xbf 0x0 0x89 0x1a
+0xe9 0xc3 0x9e 0x1a
+0x81 0x43 0x9f 0x1a
+0xf3 0xb2 0x9d 0x9a
+0x7f 0xa0 0x84 0x9a
+0xe5 0x23 0x86 0x9a
+0x7 0x31 0x9f 0x9a
+
+# CHECK: csinc    w1, w0, w19, ne
+# CHECK: csinc    wzr, w5, w9, eq
+# CHECK: csinc    w9, wzr, w30, gt
+# CHECK: csinc    w1, w28, wzr, mi
+# CHECK: csinc    x19, x23, x29, lt
+# CHECK: csinc    xzr, x3, x4, ge
+# CHECK: csinc    x5, xzr, x6, hs
+# CHECK: csinc    x7, x8, xzr, lo
+0x1 0x14 0x93 0x1a
+0xbf 0x4 0x89 0x1a
+0xe9 0xc7 0x9e 0x1a
+0x81 0x47 0x9f 0x1a
+0xf3 0xb6 0x9d 0x9a
+0x7f 0xa4 0x84 0x9a
+0xe5 0x27 0x86 0x9a
+0x7 0x35 0x9f 0x9a
+
+# CHECK: csinv    w1, w0, w19, ne
+# CHECK: csinv    wzr, w5, w9, eq
+# CHECK: csinv    w9, wzr, w30, gt
+# CHECK: csinv    w1, w28, wzr, mi
+# CHECK: csinv    x19, x23, x29, lt
+# CHECK: csinv    xzr, x3, x4, ge
+# CHECK: csinv    x5, xzr, x6, hs
+# CHECK: csinv    x7, x8, xzr, lo
+0x1 0x10 0x93 0x5a
+0xbf 0x0 0x89 0x5a
+0xe9 0xc3 0x9e 0x5a
+0x81 0x43 0x9f 0x5a
+0xf3 0xb2 0x9d 0xda
+0x7f 0xa0 0x84 0xda
+0xe5 0x23 0x86 0xda
+0x7 0x31 0x9f 0xda
+
+# CHECK: csneg    w1, w0, w19, ne
+# CHECK: csneg    wzr, w5, w9, eq
+# CHECK: csneg    w9, wzr, w30, gt
+# CHECK: csneg    w1, w28, wzr, mi
+# CHECK: csneg    x19, x23, x29, lt
+# CHECK: csneg    xzr, x3, x4, ge
+# CHECK: csneg    x5, xzr, x6, hs
+# CHECK: csneg    x7, x8, xzr, lo
+0x1 0x14 0x93 0x5a
+0xbf 0x4 0x89 0x5a
+0xe9 0xc7 0x9e 0x5a
+0x81 0x47 0x9f 0x5a
+0xf3 0xb6 0x9d 0xda
+0x7f 0xa4 0x84 0xda
+0xe5 0x27 0x86 0xda
+0x7 0x35 0x9f 0xda
+
+# CHECK: csinc    w3, wzr, wzr, ne
+# CHECK: csinc    x9, xzr, xzr, mi
+# CHECK: csinv    w20, wzr, wzr, eq
+# CHECK: csinv    x30, xzr, xzr, lt
+0xe3 0x17 0x9f 0x1a
+0xe9 0x47 0x9f 0x9a
+0xf4 0x3 0x9f 0x5a
+0xfe 0xb3 0x9f 0xda
+
+# CHECK: csinc    w3, w5, w5, le
+# CHECK: csinc    wzr, w4, w4, gt
+# CHECK: csinc    w9, wzr, wzr, ge
+# CHECK: csinc    x3, x5, x5, le
+# CHECK: csinc    xzr, x4, x4, gt
+# CHECK: csinc    x9, xzr, xzr, ge
+0xa3 0xd4 0x85 0x1a
+0x9f 0xc4 0x84 0x1a
+0xe9 0xa7 0x9f 0x1a
+0xa3 0xd4 0x85 0x9a
+0x9f 0xc4 0x84 0x9a
+0xe9 0xa7 0x9f 0x9a
+
+# CHECK: csinv    w3, w5, w5, le
+# CHECK: csinv    wzr, w4, w4, gt
+# CHECK: csinv    w9, wzr, wzr, ge
+# CHECK: csinv    x3, x5, x5, le
+# CHECK: csinv    xzr, x4, x4, gt
+# CHECK: csinv    x9, xzr, xzr, ge
+0xa3 0xd0 0x85 0x5a
+0x9f 0xc0 0x84 0x5a
+0xe9 0xa3 0x9f 0x5a
+0xa3 0xd0 0x85 0xda
+0x9f 0xc0 0x84 0xda
+0xe9 0xa3 0x9f 0xda
+
+# CHECK: csneg     w3, w5, w5, le
+# CHECK: csneg     wzr, w4, w4, gt
+# CHECK: csneg     w9, wzr, wzr, ge
+# CHECK: csneg     x3, x5, x5, le
+# CHECK: csneg     xzr, x4, x4, gt
+# CHECK: csneg     x9, xzr, xzr, ge
+0xa3 0xd4 0x85 0x5a
+0x9f 0xc4 0x84 0x5a
+0xe9 0xa7 0x9f 0x5a
+0xa3 0xd4 0x85 0xda
+0x9f 0xc4 0x84 0xda
+0xe9 0xa7 0x9f 0xda
+
+#------------------------------------------------------------------------------
+# Data-processing (1 source)
+#------------------------------------------------------------------------------
+
+# CHECK: rbit	w0, w7
+# CHECK: rbit   x18, x3
+# CHECK: rev16	w17, w1
+# CHECK: rev16	x5, x2
+# CHECK: rev	w18, w0
+# CHECK: rev32	x20, x1
+0xe0 0x00 0xc0 0x5a
+0x72 0x00 0xc0 0xda
+0x31 0x04 0xc0 0x5a
+0x45 0x04 0xc0 0xda
+0x12 0x08 0xc0 0x5a
+0x34 0x08 0xc0 0xda
+
+# CHECK: rev	x22, x2
+# CHECK: clz	w24, w3
+# CHECK: clz	x26, x4
+# CHECK: cls	w3, w5
+# CHECK: cls	x20, x5
+0x56 0x0c 0xc0 0xda
+0x78 0x10 0xc0 0x5a
+0x9a 0x10 0xc0 0xda
+0xa3 0x14 0xc0 0x5a
+0xb4 0x14 0xc0 0xda
+
+#------------------------------------------------------------------------------
+# Data-processing (2 source)
+#------------------------------------------------------------------------------
+
+# CHECK: udiv	w0, w7, w10
+# CHECK: udiv	x9, x22, x4
+# CHECK: sdiv	w12, w21, w0
+# CHECK: sdiv	x13, x2, x1
+# CHECK: lsl	w11, w12, w13
+# CHECK: lsl	x14, x15, x16
+# CHECK: lsr	w17, w18, w19
+# CHECK: lsr	x20, x21, x22
+# CHECK: asr	w23, w24, w25
+# CHECK: asr	x26, x27, x28
+# CHECK: ror	w0, w1, w2
+# CHECK: ror    x3, x4, x5
+0xe0 0x08 0xca 0x1a
+0xc9 0x0a 0xc4 0x9a
+0xac 0x0e 0xc0 0x1a
+0x4d 0x0c 0xc1 0x9a
+0x8b 0x21 0xcd 0x1a
+0xee 0x21 0xd0 0x9a
+0x51 0x26 0xd3 0x1a
+0xb4 0x26 0xd6 0x9a
+0x17 0x2b 0xd9 0x1a
+0x7a 0x2b 0xdc 0x9a
+0x20 0x2c 0xc2 0x1a
+0x83 0x2c 0xc5 0x9a
+
+# CHECK: lsl	w6, w7, w8
+# CHECK: lsl	x9, x10, x11
+# CHECK: lsr	w12, w13, w14
+# CHECK: lsr	x15, x16, x17
+# CHECK: asr	w18, w19, w20
+# CHECK: asr	x21, x22, x23
+# CHECK: ror	w24, w25, w26
+# CHECK: ror	x27, x28, x29
+0xe6 0x20 0xc8 0x1a
+0x49 0x21 0xcb 0x9a
+0xac 0x25 0xce 0x1a
+0x0f 0x26 0xd1 0x9a
+0x72 0x2a 0xd4 0x1a
+0xd5 0x2a 0xd7 0x9a
+0x38 0x2f 0xda 0x1a
+0x9b 0x2f 0xdd 0x9a
+
+#------------------------------------------------------------------------------
+# Data-processing (3 sources)
+#------------------------------------------------------------------------------
+
+# First check some non-canonical encodings where Ra is not 0b11111 (only umulh
+# and smulh have them).
+
+# CHECK: smulh    x30, x29, x28
+# CHECK: smulh    xzr, x27, x26
+# CHECK: umulh    x30, x29, x28
+# CHECK: umulh    x23, x30, xzr
+0xbe 0x73 0x5c 0x9b
+0x7f 0x2f 0x5a 0x9b
+0xbe 0x3f 0xdc 0x9b
+0xd7 0x77 0xdf 0x9b
+
+# Now onto the boilerplate stuff
+
+# CHECK: madd     w1, w3, w7, w4
+# CHECK: madd     wzr, w0, w9, w11
+# CHECK: madd     w13, wzr, w4, w4
+# CHECK: madd     w19, w30, wzr, w29
+# CHECK: mul      w4, w5, w6
+0x61 0x10 0x7 0x1b
+0x1f 0x2c 0x9 0x1b
+0xed 0x13 0x4 0x1b
+0xd3 0x77 0x1f 0x1b
+0xa4 0x7c 0x6 0x1b
+
+# CHECK: madd     x1, x3, x7, x4
+# CHECK: madd     xzr, x0, x9, x11
+# CHECK: madd     x13, xzr, x4, x4
+# CHECK: madd     x19, x30, xzr, x29
+# CHECK: mul      x4, x5, x6
+0x61 0x10 0x7 0x9b
+0x1f 0x2c 0x9 0x9b
+0xed 0x13 0x4 0x9b
+0xd3 0x77 0x1f 0x9b
+0xa4 0x7c 0x6 0x9b
+
+# CHECK: msub     w1, w3, w7, w4
+# CHECK: msub     wzr, w0, w9, w11
+# CHECK: msub     w13, wzr, w4, w4
+# CHECK: msub     w19, w30, wzr, w29
+# CHECK: mneg     w4, w5, w6
+0x61 0x90 0x7 0x1b
+0x1f 0xac 0x9 0x1b
+0xed 0x93 0x4 0x1b
+0xd3 0xf7 0x1f 0x1b
+0xa4 0xfc 0x6 0x1b
+
+# CHECK: msub     x1, x3, x7, x4
+# CHECK: msub     xzr, x0, x9, x11
+# CHECK: msub     x13, xzr, x4, x4
+# CHECK: msub     x19, x30, xzr, x29
+# CHECK: mneg     x4, x5, x6
+0x61 0x90 0x7 0x9b
+0x1f 0xac 0x9 0x9b
+0xed 0x93 0x4 0x9b
+0xd3 0xf7 0x1f 0x9b
+0xa4 0xfc 0x6 0x9b
+
+# CHECK: smaddl   x3, w5, w2, x9
+# CHECK: smaddl   xzr, w10, w11, x12
+# CHECK: smaddl   x13, wzr, w14, x15
+# CHECK: smaddl   x16, w17, wzr, x18
+# CHECK: smull    x19, w20, w21
+0xa3 0x24 0x22 0x9b
+0x5f 0x31 0x2b 0x9b
+0xed 0x3f 0x2e 0x9b
+0x30 0x4a 0x3f 0x9b
+0x93 0x7e 0x35 0x9b
+
+# CHECK: smsubl   x3, w5, w2, x9
+# CHECK: smsubl   xzr, w10, w11, x12
+# CHECK: smsubl   x13, wzr, w14, x15
+# CHECK: smsubl   x16, w17, wzr, x18
+# CHECK: smnegl   x19, w20, w21
+0xa3 0xa4 0x22 0x9b
+0x5f 0xb1 0x2b 0x9b
+0xed 0xbf 0x2e 0x9b
+0x30 0xca 0x3f 0x9b
+0x93 0xfe 0x35 0x9b
+
+# CHECK: umaddl   x3, w5, w2, x9
+# CHECK: umaddl   xzr, w10, w11, x12
+# CHECK: umaddl   x13, wzr, w14, x15
+# CHECK: umaddl   x16, w17, wzr, x18
+# CHECK: umull    x19, w20, w21
+0xa3 0x24 0xa2 0x9b
+0x5f 0x31 0xab 0x9b
+0xed 0x3f 0xae 0x9b
+0x30 0x4a 0xbf 0x9b
+0x93 0x7e 0xb5 0x9b
+
+# CHECK: umsubl   x3, w5, w2, x9
+# CHECK: umsubl   xzr, w10, w11, x12
+# CHECK: umsubl   x13, wzr, w14, x15
+# CHECK: umsubl   x16, w17, wzr, x18
+# CHECK: umnegl   x19, w20, w21
+0xa3 0xa4 0xa2 0x9b
+0x5f 0xb1 0xab 0x9b
+0xed 0xbf 0xae 0x9b
+0x30 0xca 0xbf 0x9b
+0x93 0xfe 0xb5 0x9b
+
+# CHECK: smulh    x30, x29, x28
+# CHECK: smulh    xzr, x27, x26
+# CHECK: smulh    x25, xzr, x24
+# CHECK: smulh    x23, x22, xzr
+0xbe 0x7f 0x5c 0x9b
+0x7f 0x7f 0x5a 0x9b
+0xf9 0x7f 0x58 0x9b
+0xd7 0x7e 0x5f 0x9b
+
+# CHECK: umulh    x30, x29, x28
+# CHECK: umulh    xzr, x27, x26
+# CHECK: umulh    x25, xzr, x24
+# CHECK: umulh    x23, x22, xzr
+0xbe 0x7f 0xdc 0x9b
+0x7f 0x7f 0xda 0x9b
+0xf9 0x7f 0xd8 0x9b
+0xd7 0x7e 0xdf 0x9b
+
+# CHECK: mul      w3, w4, w5
+# CHECK: mul      wzr, w6, w7
+# CHECK: mul      w8, wzr, w9
+# CHECK: mul      w10, w11, wzr
+# CHECK: mul      x12, x13, x14
+# CHECK: mul      xzr, x15, x16
+# CHECK: mul      x17, xzr, x18
+# CHECK: mul      x19, x20, xzr
+0x83 0x7c 0x5 0x1b
+0xdf 0x7c 0x7 0x1b
+0xe8 0x7f 0x9 0x1b
+0x6a 0x7d 0x1f 0x1b
+0xac 0x7d 0xe 0x9b
+0xff 0x7d 0x10 0x9b
+0xf1 0x7f 0x12 0x9b
+0x93 0x7e 0x1f 0x9b
+
+# CHECK: mneg     w21, w22, w23
+# CHECK: mneg     wzr, w24, w25
+# CHECK: mneg     w26, wzr, w27
+# CHECK: mneg     w28, w29, wzr
+0xd5 0xfe 0x17 0x1b
+0x1f 0xff 0x19 0x1b
+0xfa 0xff 0x1b 0x1b
+0xbc 0xff 0x1f 0x1b
+
+# CHECK: smull    x11, w13, w17
+# CHECK: umull    x11, w13, w17
+# CHECK: smnegl   x11, w13, w17
+# CHECK: umnegl   x11, w13, w17
+0xab 0x7d 0x31 0x9b
+0xab 0x7d 0xb1 0x9b
+0xab 0xfd 0x31 0x9b
+0xab 0xfd 0xb1 0x9b
+
+#------------------------------------------------------------------------------
+# Exception generation
+#------------------------------------------------------------------------------
+
+# CHECK: svc      #0
+# CHECK: svc      #65535
+0x1 0x0 0x0 0xd4
+0xe1 0xff 0x1f 0xd4
+
+# CHECK: hvc      #1
+# CHECK: smc      #12000
+# CHECK: brk      #12
+# CHECK: hlt      #123
+0x22 0x0 0x0 0xd4
+0x3 0xdc 0x5 0xd4
+0x80 0x1 0x20 0xd4
+0x60 0xf 0x40 0xd4
+
+# CHECK: dcps1    #42
+# CHECK: dcps2    #9
+# CHECK: dcps3    #1000
+0x41 0x5 0xa0 0xd4
+0x22 0x1 0xa0 0xd4
+0x3 0x7d 0xa0 0xd4
+
+# CHECK: dcps1
+# CHECK: dcps2
+# CHECK: dcps3
+0x1 0x0 0xa0 0xd4
+0x2 0x0 0xa0 0xd4
+0x3 0x0 0xa0 0xd4
+
+#------------------------------------------------------------------------------
+# Extract (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: extr     w3, w5, w7, #0
+# CHECK: extr     w11, w13, w17, #31
+0xa3 0x0 0x87 0x13
+0xab 0x7d 0x91 0x13
+
+# CHECK: extr     x3, x5, x7, #15
+# CHECK: extr     x11, x13, x17, #63
+0xa3 0x3c 0xc7 0x93
+0xab 0xfd 0xd1 0x93
+
+# CHECK: extr     x19, x23, x23, #24
+# CHECK: extr     x29, xzr, xzr, #63
+# CHECK: extr     w9, w13, w13, #31
+0xf3 0x62 0xd7 0x93
+0xfd 0xff 0xdf 0x93
+0xa9 0x7d 0x8d 0x13
+
+#------------------------------------------------------------------------------
+# Floating-point compare
+#------------------------------------------------------------------------------
+
+# CHECK: fcmp    s3, s5
+# CHECK: fcmp    s31, #0.0
+# CHECK: fcmp    s31, #0.0
+0x60 0x20 0x25 0x1e
+0xe8 0x23 0x20 0x1e
+0xe8 0x23 0x3f 0x1e
+
+# CHECK: fcmpe   s29, s30
+# CHECK: fcmpe   s15, #0.0
+# CHECK: fcmpe   s15, #0.0
+0xb0 0x23 0x3e 0x1e
+0xf8 0x21 0x20 0x1e
+0xf8 0x21 0x2f 0x1e
+
+# CHECK: fcmp    d4, d12
+# CHECK: fcmp    d23, #0.0
+# CHECK: fcmp    d23, #0.0
+0x80 0x20 0x6c 0x1e
+0xe8 0x22 0x60 0x1e
+0xe8 0x22 0x77 0x1e
+
+# CHECK: fcmpe   d26, d22
+# CHECK: fcmpe   d29, #0.0
+# CHECK: fcmpe   d29, #0.0
+0x50 0x23 0x76 0x1e
+0xb8 0x23 0x60 0x1e
+0xb8 0x23 0x6d 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point conditional compare
+#------------------------------------------------------------------------------
+
+# CHECK: fccmp s1, s31, #0, eq
+# CHECK: fccmp s3, s0, #15, hs
+# CHECK: fccmp s31, s15, #13, hs
+0x20 0x04 0x3f 0x1e
+0x6f 0x24 0x20 0x1e
+0xed 0x27 0x2f 0x1e
+
+# CHECK: fccmp d9, d31, #0, le
+# CHECK: fccmp d3, d0, #15, gt
+# CHECK: fccmp d31, d5, #7, ne
+0x20 0xd5 0x7f 0x1e
+0x6f 0xc4 0x60 0x1e
+0xe7 0x17 0x65 0x1e
+
+# CHECK: fccmpe s1, s31, #0, eq
+# CHECK: fccmpe s3, s0, #15, hs
+# CHECK: fccmpe s31, s15, #13, hs
+0x30 0x04 0x3f 0x1e
+0x7f 0x24 0x20 0x1e
+0xfd 0x27 0x2f 0x1e
+
+# CHECK: fccmpe d9, d31, #0, le
+# CHECK: fccmpe d3, d0, #15, gt
+# CHECK: fccmpe d31, d5, #7, ne
+0x30 0xd5 0x7f 0x1e
+0x7f 0xc4 0x60 0x1e
+0xf7 0x17 0x65 0x1e
+
+#-------------------------------------------------------------------------------
+# Floating-point conditional compare
+#-------------------------------------------------------------------------------
+
+# CHECK: fcsel s3, s20, s9, pl
+# CHECK: fcsel d9, d10, d11, mi
+0x83 0x5e 0x29 0x1e
+0x49 0x4d 0x6b 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (1 source)
+#------------------------------------------------------------------------------
+
+# CHECK: fmov     s0, s1
+# CHECK: fabs     s2, s3
+# CHECK: fneg     s4, s5
+# CHECK: fsqrt    s6, s7
+# CHECK: fcvt     d8, s9
+# CHECK: fcvt     h10, s11
+# CHECK: frintn   s12, s13
+# CHECK: frintp   s14, s15
+# CHECK: frintm   s16, s17
+# CHECK: frintz   s18, s19
+# CHECK: frinta   s20, s21
+# CHECK: frintx   s22, s23
+# CHECK: frinti   s24, s25
+0x20 0x40 0x20 0x1e
+0x62 0xc0 0x20 0x1e
+0xa4 0x40 0x21 0x1e
+0xe6 0xc0 0x21 0x1e
+0x28 0xc1 0x22 0x1e
+0x6a 0xc1 0x23 0x1e
+0xac 0x41 0x24 0x1e
+0xee 0xc1 0x24 0x1e
+0x30 0x42 0x25 0x1e
+0x72 0xc2 0x25 0x1e
+0xb4 0x42 0x26 0x1e
+0xf6 0x42 0x27 0x1e
+0x38 0xc3 0x27 0x1e
+
+# CHECK: fmov     d0, d1
+# CHECK: fabs     d2, d3
+# CHECK: fneg     d4, d5
+# CHECK: fsqrt    d6, d7
+# CHECK: fcvt     s8, d9
+# CHECK: fcvt     h10, d11
+# CHECK: frintn   d12, d13
+# CHECK: frintp   d14, d15
+# CHECK: frintm   d16, d17
+# CHECK: frintz   d18, d19
+# CHECK: frinta   d20, d21
+# CHECK: frintx   d22, d23
+# CHECK: frinti   d24, d25
+0x20 0x40 0x60 0x1e
+0x62 0xc0 0x60 0x1e
+0xa4 0x40 0x61 0x1e
+0xe6 0xc0 0x61 0x1e
+0x28 0x41 0x62 0x1e
+0x6a 0xc1 0x63 0x1e
+0xac 0x41 0x64 0x1e
+0xee 0xc1 0x64 0x1e
+0x30 0x42 0x65 0x1e
+0x72 0xc2 0x65 0x1e
+0xb4 0x42 0x66 0x1e
+0xf6 0x42 0x67 0x1e
+0x38 0xc3 0x67 0x1e
+
+# CHECK: fcvt     s26, h27
+# CHECK: fcvt     d28, h29
+0x7a 0x43 0xe2 0x1e
+0xbc 0xc3 0xe2 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (2 sources)
+#------------------------------------------------------------------------------
+
+# CHECK: fmul     s20, s19, s17
+# CHECK: fdiv     s1, s2, s3
+# CHECK: fadd     s4, s5, s6
+# CHECK: fsub     s7, s8, s9
+# CHECK: fmax     s10, s11, s12
+# CHECK: fmin     s13, s14, s15
+# CHECK: fmaxnm   s16, s17, s18
+# CHECK: fminnm   s19, s20, s21
+# CHECK: fnmul    s22, s23, s2
+0x74 0xa 0x31 0x1e
+0x41 0x18 0x23 0x1e
+0xa4 0x28 0x26 0x1e
+0x7 0x39 0x29 0x1e
+0x6a 0x49 0x2c 0x1e
+0xcd 0x59 0x2f 0x1e
+0x30 0x6a 0x32 0x1e
+0x93 0x7a 0x35 0x1e
+0xf6 0x8a 0x38 0x1e
+
+
+# CHECK: fmul     d20, d19, d17
+# CHECK: fdiv     d1, d2, d3
+# CHECK: fadd     d4, d5, d6
+# CHECK: fsub     d7, d8, d9
+# CHECK: fmax     d10, d11, d12
+# CHECK: fmin     d13, d14, d15
+# CHECK: fmaxnm   d16, d17, d18
+# CHECK: fminnm   d19, d20, d21
+# CHECK: fnmul    d22, d23, d24
+0x74 0xa 0x71 0x1e
+0x41 0x18 0x63 0x1e
+0xa4 0x28 0x66 0x1e
+0x7 0x39 0x69 0x1e
+0x6a 0x49 0x6c 0x1e
+0xcd 0x59 0x6f 0x1e
+0x30 0x6a 0x72 0x1e
+0x93 0x7a 0x75 0x1e
+0xf6 0x8a 0x78 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (1 source)
+#------------------------------------------------------------------------------
+
+# CHECK: fmadd s3, s5, s6, s31
+# CHECK: fmadd d3, d13, d0, d23
+# CHECK: fmsub s3, s5, s6, s31
+# CHECK: fmsub d3, d13, d0, d23
+# CHECK: fnmadd s3, s5, s6, s31
+# CHECK: fnmadd d3, d13, d0, d23
+# CHECK: fnmsub s3, s5, s6, s31
+# CHECK: fnmsub d3, d13, d0, d23
+0xa3 0x7c 0x06 0x1f
+0xa3 0x5d 0x40 0x1f
+0xa3 0xfc 0x06 0x1f
+0xa3 0xdd 0x40 0x1f
+0xa3 0x7c 0x26 0x1f
+0xa3 0x5d 0x60 0x1f
+0xa3 0xfc 0x26 0x1f
+0xa3 0xdd 0x60 0x1f
+
+#------------------------------------------------------------------------------
+# Floating-point <-> fixed-point conversion
+#------------------------------------------------------------------------------
+
+# CHECK: fcvtzs  w3, s5, #1
+# CHECK: fcvtzs  wzr, s20, #13
+# CHECK: fcvtzs  w19, s0, #32
+0xa3 0xfc 0x18 0x1e
+0x9f 0xce 0x18 0x1e
+0x13 0x80 0x18 0x1e
+
+# CHECK: fcvtzs  x3, s5, #1
+# CHECK: fcvtzs  x12, s30, #45
+# CHECK: fcvtzs  x19, s0, #64
+0xa3 0xfc 0x18 0x9e
+0xcc 0x4f 0x18 0x9e
+0x13 0x00 0x18 0x9e
+
+# CHECK: fcvtzs  w3, d5, #1
+# CHECK: fcvtzs  wzr, d20, #13
+# CHECK: fcvtzs  w19, d0, #32
+0xa3 0xfc 0x58 0x1e
+0x9f 0xce 0x58 0x1e
+0x13 0x80 0x58 0x1e
+
+# CHECK: fcvtzs  x3, d5, #1
+# CHECK: fcvtzs  x12, d30, #45
+# CHECK: fcvtzs  x19, d0, #64
+0xa3 0xfc 0x58 0x9e
+0xcc 0x4f 0x58 0x9e
+0x13 0x00 0x58 0x9e
+
+# CHECK: fcvtzu  w3, s5, #1
+# CHECK: fcvtzu  wzr, s20, #13
+# CHECK: fcvtzu  w19, s0, #32
+0xa3 0xfc 0x19 0x1e
+0x9f 0xce 0x19 0x1e
+0x13 0x80 0x19 0x1e
+
+# CHECK: fcvtzu  x3, s5, #1
+# CHECK: fcvtzu  x12, s30, #45
+# CHECK: fcvtzu  x19, s0, #64
+0xa3 0xfc 0x19 0x9e
+0xcc 0x4f 0x19 0x9e
+0x13 0x00 0x19 0x9e
+
+# CHECK: fcvtzu  w3, d5, #1
+# CHECK: fcvtzu  wzr, d20, #13
+# CHECK: fcvtzu  w19, d0, #32
+0xa3 0xfc 0x59 0x1e
+0x9f 0xce 0x59 0x1e
+0x13 0x80 0x59 0x1e
+
+# CHECK: fcvtzu  x3, d5, #1
+# CHECK: fcvtzu  x12, d30, #45
+# CHECK: fcvtzu  x19, d0, #64
+0xa3 0xfc 0x59 0x9e
+0xcc 0x4f 0x59 0x9e
+0x13 0x00 0x59 0x9e
+
+# CHECK: scvtf   s23, w19, #1
+# CHECK: scvtf   s31, wzr, #20
+# CHECK: scvtf   s14, w0, #32
+0x77 0xfe 0x02 0x1e
+0xff 0xb3 0x02 0x1e
+0x0e 0x80 0x02 0x1e
+
+# CHECK: scvtf   s23, x19, #1
+# CHECK: scvtf   s31, xzr, #20
+# CHECK: scvtf   s14, x0, #64
+0x77 0xfe 0x02 0x9e
+0xff 0xb3 0x02 0x9e
+0x0e 0x00 0x02 0x9e
+
+# CHECK: scvtf   d23, w19, #1
+# CHECK: scvtf   d31, wzr, #20
+# CHECK: scvtf   d14, w0, #32
+0x77 0xfe 0x42 0x1e
+0xff 0xb3 0x42 0x1e
+0x0e 0x80 0x42 0x1e
+
+# CHECK: scvtf   d23, x19, #1
+# CHECK: scvtf   d31, xzr, #20
+# CHECK: scvtf   d14, x0, #64
+0x77 0xfe 0x42 0x9e
+0xff 0xb3 0x42 0x9e
+0x0e 0x00 0x42 0x9e
+
+# CHECK: ucvtf   s23, w19, #1
+# CHECK: ucvtf   s31, wzr, #20
+# CHECK: ucvtf   s14, w0, #32
+0x77 0xfe 0x03 0x1e
+0xff 0xb3 0x03 0x1e
+0x0e 0x80 0x03 0x1e
+
+# CHECK: ucvtf   s23, x19, #1
+# CHECK: ucvtf   s31, xzr, #20
+# CHECK: ucvtf   s14, x0, #64
+0x77 0xfe 0x03 0x9e
+0xff 0xb3 0x03 0x9e
+0x0e 0x00 0x03 0x9e
+
+# CHECK: ucvtf   d23, w19, #1
+# CHECK: ucvtf   d31, wzr, #20
+# CHECK: ucvtf   d14, w0, #32
+0x77 0xfe 0x43 0x1e
+0xff 0xb3 0x43 0x1e
+0x0e 0x80 0x43 0x1e
+
+# CHECK: ucvtf   d23, x19, #1
+# CHECK: ucvtf   d31, xzr, #20
+# CHECK: ucvtf   d14, x0, #64
+0x77 0xfe 0x43 0x9e
+0xff 0xb3 0x43 0x9e
+0x0e 0x00 0x43 0x9e
+
+#------------------------------------------------------------------------------
+# Floating-point <-> integer conversion
+#------------------------------------------------------------------------------
+# CHECK: fcvtns   w3, s31
+# CHECK: fcvtns   xzr, s12
+# CHECK: fcvtnu   wzr, s12
+# CHECK: fcvtnu   x0, s0
+0xe3 0x3 0x20 0x1e
+0x9f 0x1 0x20 0x9e
+0x9f 0x1 0x21 0x1e
+0x0 0x0 0x21 0x9e
+
+# CHECK: fcvtps   wzr, s9
+# CHECK: fcvtps   x12, s20
+# CHECK: fcvtpu   w30, s23
+# CHECK: fcvtpu   x29, s3
+0x3f 0x1 0x28 0x1e
+0x8c 0x2 0x28 0x9e
+0xfe 0x2 0x29 0x1e
+0x7d 0x0 0x29 0x9e
+
+# CHECK: fcvtms   w2, s3
+# CHECK: fcvtms   x4, s5
+# CHECK: fcvtmu   w6, s7
+# CHECK: fcvtmu   x8, s9
+0x62 0x0 0x30 0x1e
+0xa4 0x0 0x30 0x9e
+0xe6 0x0 0x31 0x1e
+0x28 0x1 0x31 0x9e
+
+# CHECK: fcvtzs   w10, s11
+# CHECK: fcvtzs   x12, s13
+# CHECK: fcvtzu   w14, s15
+# CHECK: fcvtzu   x15, s16
+0x6a 0x1 0x38 0x1e
+0xac 0x1 0x38 0x9e
+0xee 0x1 0x39 0x1e
+0xf 0x2 0x39 0x9e
+
+# CHECK: scvtf    s17, w18
+# CHECK: scvtf    s19, x20
+# CHECK: ucvtf    s21, w22
+# CHECK: scvtf    s23, x24
+0x51 0x2 0x22 0x1e
+0x93 0x2 0x22 0x9e
+0xd5 0x2 0x23 0x1e
+0x17 0x3 0x22 0x9e
+
+# CHECK: fcvtas   w25, s26
+# CHECK: fcvtas   x27, s28
+# CHECK: fcvtau   w29, s30
+# CHECK: fcvtau   xzr, s0
+0x59 0x3 0x24 0x1e
+0x9b 0x3 0x24 0x9e
+0xdd 0x3 0x25 0x1e
+0x1f 0x0 0x25 0x9e
+
+# CHECK: fcvtns   w3, d31
+# CHECK: fcvtns   xzr, d12
+# CHECK: fcvtnu   wzr, d12
+# CHECK: fcvtnu   x0, d0
+0xe3 0x3 0x60 0x1e
+0x9f 0x1 0x60 0x9e
+0x9f 0x1 0x61 0x1e
+0x0 0x0 0x61 0x9e
+
+# CHECK: fcvtps   wzr, d9
+# CHECK: fcvtps   x12, d20
+# CHECK: fcvtpu   w30, d23
+# CHECK: fcvtpu   x29, d3
+0x3f 0x1 0x68 0x1e
+0x8c 0x2 0x68 0x9e
+0xfe 0x2 0x69 0x1e
+0x7d 0x0 0x69 0x9e
+
+# CHECK: fcvtms   w2, d3
+# CHECK: fcvtms   x4, d5
+# CHECK: fcvtmu   w6, d7
+# CHECK: fcvtmu   x8, d9
+0x62 0x0 0x70 0x1e
+0xa4 0x0 0x70 0x9e
+0xe6 0x0 0x71 0x1e
+0x28 0x1 0x71 0x9e
+
+# CHECK: fcvtzs   w10, d11
+# CHECK: fcvtzs   x12, d13
+# CHECK: fcvtzu   w14, d15
+# CHECK: fcvtzu   x15, d16
+0x6a 0x1 0x78 0x1e
+0xac 0x1 0x78 0x9e
+0xee 0x1 0x79 0x1e
+0xf 0x2 0x79 0x9e
+
+# CHECK: scvtf    d17, w18
+# CHECK: scvtf    d19, x20
+# CHECK: ucvtf    d21, w22
+# CHECK: ucvtf    d23, x24
+0x51 0x2 0x62 0x1e
+0x93 0x2 0x62 0x9e
+0xd5 0x2 0x63 0x1e
+0x17 0x3 0x63 0x9e
+
+# CHECK: fcvtas   w25, d26
+# CHECK: fcvtas   x27, d28
+# CHECK: fcvtau   w29, d30
+# CHECK: fcvtau   xzr, d0
+0x59 0x3 0x64 0x1e
+0x9b 0x3 0x64 0x9e
+0xdd 0x3 0x65 0x1e
+0x1f 0x0 0x65 0x9e
+
+# CHECK: fmov     w3, s9
+# CHECK: fmov     s9, w3
+0x23 0x1 0x26 0x1e
+0x69 0x0 0x27 0x1e
+
+# CHECK: fmov     x20, d31
+# CHECK: fmov     d1, x15
+0xf4 0x3 0x66 0x9e
+0xe1 0x1 0x67 0x9e
+
+# CHECK: fmov     x3, v12.d[1]
+# CHECK: fmov     v1.d[1], x19
+0x83 0x1 0xae 0x9e
+0x61 0x2 0xaf 0x9e
+
+#------------------------------------------------------------------------------
+# Floating-point immediate
+#------------------------------------------------------------------------------
+
+# CHECK: fmov     s2, #0.12500000
+# CHECK: fmov     s3, #1.00000000
+# CHECK: fmov     d30, #16.00000000
+0x2 0x10 0x28 0x1e
+0x3 0x10 0x2e 0x1e
+0x1e 0x10 0x66 0x1e
+
+# CHECK: fmov     s4, #1.06250000
+# CHECK: fmov     d10, #1.93750000
+0x4 0x30 0x2e 0x1e
+0xa 0xf0 0x6f 0x1e
+
+# CHECK: fmov     s12, #-1.00000000
+0xc 0x10 0x3e 0x1e
+
+# CHECK: fmov     d16, #8.50000000
+0x10 0x30 0x64 0x1e
+
+#------------------------------------------------------------------------------
+# Load-register (literal)
+#------------------------------------------------------------------------------
+
+# CHECK: ldr       w3, #0
+# CHECK: ldr       x29, #4
+# CHECK: ldrsw     xzr, #-4
+0x03 0x00 0x00 0x18
+0x3d 0x00 0x00 0x58
+0xff 0xff 0xff 0x98
+
+# CHECK: ldr       s0, #8
+# CHECK: ldr       d0, #1048572
+# CHECK: ldr       q0, #-1048576
+0x40 0x00 0x00 0x1c
+0xe0 0xff 0x7f 0x5c
+0x00 0x00 0x80 0x9c
+
+# CHECK: prfm      pldl1strm, #0
+# CHECK: prfm      #22, #0
+0x01 0x00 0x00 0xd8
+0x16 0x00 0x00 0xd8
+
+#------------------------------------------------------------------------------
+# Load/store exclusive
+#------------------------------------------------------------------------------
+
+#CHECK: stxrb      w18, w8, [sp]
+#CHECK: stxrh      w24, w15, [x16]
+#CHECK: stxr       w5, w6, [x17]
+#CHECK: stxr       w1, x10, [x21]
+#CHECK: stxr       w1, x10, [x21]
+0xe8 0x7f 0x12 0x08
+0x0f 0x7e 0x18 0x48
+0x26 0x7e 0x05 0x88
+0xaa 0x7e 0x01 0xc8
+0xaa 0x7a 0x01 0xc8
+
+#CHECK: ldxrb      w30, [x0]
+#CHECK: ldxrh      w17, [x4]
+#CHECK: ldxr       w22, [sp]
+#CHECK: ldxr       x11, [x29]
+#CHECK: ldxr       x11, [x29]
+#CHECK: ldxr       x11, [x29]
+0x1e 0x7c 0x5f 0x08
+0x91 0x7c 0x5f 0x48
+0xf6 0x7f 0x5f 0x88
+0xab 0x7f 0x5f 0xc8
+0xab 0x6f 0x5f 0xc8
+0xab 0x7f 0x5e 0xc8
+
+#CHECK: stxp       w12, w11, w10, [sp]
+#CHECK: stxp       wzr, x27, x9, [x12]
+0xeb 0x2b 0x2c 0x88
+0x9b 0x25 0x3f 0xc8
+
+#CHECK: ldxp       w0, wzr, [sp]
+#CHECK: ldxp       x17, x0, [x18]
+#CHECK: ldxp       x17, x0, [x18]
+0xe0 0x7f 0x7f 0x88
+0x51 0x02 0x7f 0xc8
+0x51 0x02 0x7e 0xc8
+
+#CHECK: stlxrb     w12, w22, [x0]
+#CHECK: stlxrh     w10, w1, [x1]
+#CHECK: stlxr      w9, w2, [x2]
+#CHECK: stlxr      w9, x3, [sp]
+
+0x16 0xfc 0x0c 0x08
+0x21 0xfc 0x0a 0x48
+0x42 0xfc 0x09 0x88
+0xe3 0xff 0x09 0xc8
+
+#CHECK: ldaxrb     w8, [x4]
+#CHECK: ldaxrh     w7, [x5]
+#CHECK: ldaxr      w6, [sp]
+#CHECK: ldaxr      x5, [x6]
+#CHECK: ldaxr      x5, [x6]
+#CHECK: ldaxr      x5, [x6]
+0x88 0xfc 0x5f 0x08
+0xa7 0xfc 0x5f 0x48
+0xe6 0xff 0x5f 0x88
+0xc5 0xfc 0x5f 0xc8
+0xc5 0xec 0x5f 0xc8
+0xc5 0xfc 0x5e 0xc8
+
+#CHECK: stlxp      w4, w5, w6, [sp]
+#CHECK: stlxp      wzr, x6, x7, [x1]
+0xe5 0x9b 0x24 0x88
+0x26 0x9c 0x3f 0xc8
+
+#CHECK: ldaxp      w5, w18, [sp]
+#CHECK: ldaxp      x6, x19, [x22]
+#CHECK: ldaxp      x6, x19, [x22]
+0xe5 0xcb 0x7f 0x88
+0xc6 0xce 0x7f 0xc8
+0xc6 0xce 0x7e 0xc8
+
+#CHECK: stlrb      w24, [sp]
+#CHECK: stlrh      w25, [x30]
+#CHECK: stlr       w26, [x29]
+#CHECK: stlr       x27, [x28]
+#CHECK: stlr       x27, [x28]
+#CHECK: stlr       x27, [x28]
+0xf8 0xff 0x9f 0x08
+0xd9 0xff 0x9f 0x48
+0xba 0xff 0x9f 0x88
+0x9b 0xff 0x9f 0xc8
+0x9b 0xef 0x9f 0xc8
+0x9b 0xff 0x9e 0xc8
+
+#CHECK: ldarb      w23, [sp]
+#CHECK: ldarh      w22, [x30]
+#CHECK: ldar       wzr, [x29]
+#CHECK: ldar       x21, [x28]
+#CHECK: ldar       x21, [x28]
+#CHECK: ldar       x21, [x28]
+0xf7 0xff 0xdf 0x08
+0xd6 0xff 0xdf 0x48
+0xbf 0xff 0xdf 0x88
+0x95 0xff 0xdf 0xc8
+0x95 0xef 0xdf 0xc8
+0x95 0xff 0xde 0xc8
+
+#------------------------------------------------------------------------------
+# Load/store (unscaled  immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: sturb    w9, [sp]
+# CHECK: sturh    wzr, [x12, #255]
+# CHECK: stur     w16, [x0, #-256]
+# CHECK: stur     x28, [x14, #1]
+0xe9 0x3 0x0 0x38
+0x9f 0xf1 0xf 0x78
+0x10 0x0 0x10 0xb8
+0xdc 0x11 0x0 0xf8
+
+# CHECK: ldurb    w1, [x20, #255]
+# CHECK: ldurh    w20, [x1, #255]
+# CHECK: ldur     w12, [sp, #255]
+# CHECK: ldur     xzr, [x12, #255]
+0x81 0xf2 0x4f 0x38
+0x34 0xf0 0x4f 0x78
+0xec 0xf3 0x4f 0xb8
+0x9f 0xf1 0x4f 0xf8
+
+# CHECK: ldursb   x9, [x7, #-256]
+# CHECK: ldursh   x17, [x19, #-256]
+# CHECK: ldursw   x20, [x15, #-256]
+# CHECK: prfum    pldl2keep, [sp, #-256]
+# CHECK: ldursb   w19, [x1, #-256]
+# CHECK: ldursh   w15, [x21, #-256]
+0xe9 0x0 0x90 0x38
+0x71 0x2 0x90 0x78
+0xf4 0x1 0x90 0xb8
+0xe2 0x3 0x90 0xf8
+0x33 0x0 0xd0 0x38
+0xaf 0x2 0xd0 0x78
+
+# CHECK: stur     b0, [sp, #1]
+# CHECK: stur     h12, [x12, #-1]
+# CHECK: stur     s15, [x0, #255]
+# CHECK: stur     d31, [x5, #25]
+# CHECK: stur     q9, [x5]
+0xe0 0x13 0x0 0x3c
+0x8c 0xf1 0x1f 0x7c
+0xf 0xf0 0xf 0xbc
+0xbf 0x90 0x1 0xfc
+0xa9 0x0 0x80 0x3c
+
+# CHECK: ldur     b3, [sp]
+# CHECK: ldur     h5, [x4, #-256]
+# CHECK: ldur     s7, [x12, #-1]
+# CHECK: ldur     d11, [x19, #4]
+# CHECK: ldur     q13, [x1, #2]
+0xe3 0x3 0x40 0x3c
+0x85 0x0 0x50 0x7c
+0x87 0xf1 0x5f 0xbc
+0x6b 0x42 0x40 0xfc
+0x2d 0x20 0xc0 0x3c
+
+#------------------------------------------------------------------------------
+# Load/store (immediate post-indexed)
+#------------------------------------------------------------------------------
+
+# E.g. "str xzr, [sp], #4" is *not* unpredictable
+# CHECK-NOT: warning: potentially undefined instruction encoding
+0xff 0x47 0x40 0xb8
+
+# CHECK: strb     w9, [x2], #255
+# CHECK: strb     w10, [x3], #1
+# CHECK: strb     w10, [x3], #-256
+# CHECK: strh     w9, [x2], #255
+# CHECK: strh     w9, [x2], #1
+# CHECK: strh     w10, [x3], #-256
+0x49 0xf4 0xf 0x38
+0x6a 0x14 0x0 0x38
+0x6a 0x4 0x10 0x38
+0x49 0xf4 0xf 0x78
+0x49 0x14 0x0 0x78
+0x6a 0x4 0x10 0x78
+
+# CHECK: str      w19, [sp], #255
+# CHECK: str      w20, [x30], #1
+# CHECK: str      w21, [x12], #-256
+# CHECK: str      xzr, [x9], #255
+# CHECK: str      x2, [x3], #1
+# CHECK: str      x19, [x12], #-256
+0xf3 0xf7 0xf 0xb8
+0xd4 0x17 0x0 0xb8
+0x95 0x5 0x10 0xb8
+0x3f 0xf5 0xf 0xf8
+0x62 0x14 0x0 0xf8
+0x93 0x5 0x10 0xf8
+
+# CHECK: ldrb     w9, [x2], #255
+# CHECK: ldrb     w10, [x3], #1
+# CHECK: ldrb     w10, [x3], #-256
+# CHECK: ldrh     w9, [x2], #255
+# CHECK: ldrh     w9, [x2], #1
+# CHECK: ldrh     w10, [x3], #-256
+0x49 0xf4 0x4f 0x38
+0x6a 0x14 0x40 0x38
+0x6a 0x4 0x50 0x38
+0x49 0xf4 0x4f 0x78
+0x49 0x14 0x40 0x78
+0x6a 0x4 0x50 0x78
+
+# CHECK: ldr      w19, [sp], #255
+# CHECK: ldr      w20, [x30], #1
+# CHECK: ldr      w21, [x12], #-256
+# CHECK: ldr      xzr, [x9], #255
+# CHECK: ldr      x2, [x3], #1
+# CHECK: ldr      x19, [x12], #-256
+0xf3 0xf7 0x4f 0xb8
+0xd4 0x17 0x40 0xb8
+0x95 0x5 0x50 0xb8
+0x3f 0xf5 0x4f 0xf8
+0x62 0x14 0x40 0xf8
+0x93 0x5 0x50 0xf8
+
+# CHECK: ldrsb    xzr, [x9], #255
+# CHECK: ldrsb    x2, [x3], #1
+# CHECK: ldrsb    x19, [x12], #-256
+# CHECK: ldrsh    xzr, [x9], #255
+# CHECK: ldrsh    x2, [x3], #1
+# CHECK: ldrsh    x19, [x12], #-256
+# CHECK: ldrsw    xzr, [x9], #255
+# CHECK: ldrsw    x2, [x3], #1
+# CHECK: ldrsw    x19, [x12], #-256
+0x3f 0xf5 0x8f 0x38
+0x62 0x14 0x80 0x38
+0x93 0x5 0x90 0x38
+0x3f 0xf5 0x8f 0x78
+0x62 0x14 0x80 0x78
+0x93 0x5 0x90 0x78
+0x3f 0xf5 0x8f 0xb8
+0x62 0x14 0x80 0xb8
+0x93 0x5 0x90 0xb8
+
+# CHECK: ldrsb    wzr, [x9], #255
+# CHECK: ldrsb    w2, [x3], #1
+# CHECK: ldrsb    w19, [x12], #-256
+# CHECK: ldrsh    wzr, [x9], #255
+# CHECK: ldrsh    w2, [x3], #1
+# CHECK: ldrsh    w19, [x12], #-256
+0x3f 0xf5 0xcf 0x38
+0x62 0x14 0xc0 0x38
+0x93 0x5 0xd0 0x38
+0x3f 0xf5 0xcf 0x78
+0x62 0x14 0xc0 0x78
+0x93 0x5 0xd0 0x78
+
+# CHECK: str      b0, [x0], #255
+# CHECK: str      b3, [x3], #1
+# CHECK: str      b5, [sp], #-256
+# CHECK: str      h10, [x10], #255
+# CHECK: str      h13, [x23], #1
+# CHECK: str      h15, [sp], #-256
+# CHECK: str      s20, [x20], #255
+# CHECK: str      s23, [x23], #1
+# CHECK: str      s25, [x0], #-256
+# CHECK: str      d20, [x20], #255
+# CHECK: str      d23, [x23], #1
+# CHECK: str      d25, [x0], #-256
+0x0 0xf4 0xf 0x3c
+0x63 0x14 0x0 0x3c
+0xe5 0x7 0x10 0x3c
+0x4a 0xf5 0xf 0x7c
+0xed 0x16 0x0 0x7c
+0xef 0x7 0x10 0x7c
+0x94 0xf6 0xf 0xbc
+0xf7 0x16 0x0 0xbc
+0x19 0x4 0x10 0xbc
+0x94 0xf6 0xf 0xfc
+0xf7 0x16 0x0 0xfc
+0x19 0x4 0x10 0xfc
+
+# CHECK: ldr      b0, [x0], #255
+# CHECK: ldr      b3, [x3], #1
+# CHECK: ldr      b5, [sp], #-256
+# CHECK: ldr      h10, [x10], #255
+# CHECK: ldr      h13, [x23], #1
+# CHECK: ldr      h15, [sp], #-256
+# CHECK: ldr      s20, [x20], #255
+# CHECK: ldr      s23, [x23], #1
+# CHECK: ldr      s25, [x0], #-256
+# CHECK: ldr      d20, [x20], #255
+# CHECK: ldr      d23, [x23], #1
+# CHECK: ldr      d25, [x0], #-256
+0x0 0xf4 0x4f 0x3c
+0x63 0x14 0x40 0x3c
+0xe5 0x7 0x50 0x3c
+0x4a 0xf5 0x4f 0x7c
+0xed 0x16 0x40 0x7c
+0xef 0x7 0x50 0x7c
+0x94 0xf6 0x4f 0xbc
+0xf7 0x16 0x40 0xbc
+0x19 0x4 0x50 0xbc
+0x94 0xf6 0x4f 0xfc
+0xf7 0x16 0x40 0xfc
+0x19 0x4 0x50 0xfc
+0x34 0xf4 0xcf 0x3c
+
+# CHECK: ldr      q20, [x1], #255
+# CHECK: ldr      q23, [x9], #1
+# CHECK: ldr      q25, [x20], #-256
+# CHECK: str      q10, [x1], #255
+# CHECK: str      q22, [sp], #1
+# CHECK: str      q21, [x20], #-256
+0x37 0x15 0xc0 0x3c
+0x99 0x6 0xd0 0x3c
+0x2a 0xf4 0x8f 0x3c
+0xf6 0x17 0x80 0x3c
+0x95 0x6 0x90 0x3c
+
+#-------------------------------------------------------------------------------
+# Load-store register (immediate pre-indexed)
+#-------------------------------------------------------------------------------
+
+# E.g. "str xzr, [sp, #4]!" is *not* unpredictable
+# CHECK-NOT: warning: potentially undefined instruction encoding
+0xff 0xf 0x40 0xf8
+
+# CHECK: ldr      x3, [x4, #0]!
+0x83 0xc 0x40 0xf8
+
+# CHECK: strb     w9, [x2, #255]!
+# CHECK: strb     w10, [x3, #1]!
+# CHECK: strb     w10, [x3, #-256]!
+# CHECK: strh     w9, [x2, #255]!
+# CHECK: strh     w9, [x2, #1]!
+# CHECK: strh     w10, [x3, #-256]!
+0x49 0xfc 0xf 0x38
+0x6a 0x1c 0x0 0x38
+0x6a 0xc 0x10 0x38
+0x49 0xfc 0xf 0x78
+0x49 0x1c 0x0 0x78
+0x6a 0xc 0x10 0x78
+
+# CHECK: str      w19, [sp, #255]!
+# CHECK: str      w20, [x30, #1]!
+# CHECK: str      w21, [x12, #-256]!
+# CHECK: str      xzr, [x9, #255]!
+# CHECK: str      x2, [x3, #1]!
+# CHECK: str      x19, [x12, #-256]!
+0xf3 0xff 0xf 0xb8
+0xd4 0x1f 0x0 0xb8
+0x95 0xd 0x10 0xb8
+0x3f 0xfd 0xf 0xf8
+0x62 0x1c 0x0 0xf8
+0x93 0xd 0x10 0xf8
+
+# CHECK: ldrb     w9, [x2, #255]!
+# CHECK: ldrb     w10, [x3, #1]!
+# CHECK: ldrb     w10, [x3, #-256]!
+# CHECK: ldrh     w9, [x2, #255]!
+# CHECK: ldrh     w9, [x2, #1]!
+# CHECK: ldrh     w10, [x3, #-256]!
+0x49 0xfc 0x4f 0x38
+0x6a 0x1c 0x40 0x38
+0x6a 0xc 0x50 0x38
+0x49 0xfc 0x4f 0x78
+0x49 0x1c 0x40 0x78
+0x6a 0xc 0x50 0x78
+
+# CHECK: ldr      w19, [sp, #255]!
+# CHECK: ldr      w20, [x30, #1]!
+# CHECK: ldr      w21, [x12, #-256]!
+# CHECK: ldr      xzr, [x9, #255]!
+# CHECK: ldr      x2, [x3, #1]!
+# CHECK: ldr      x19, [x12, #-256]!
+0xf3 0xff 0x4f 0xb8
+0xd4 0x1f 0x40 0xb8
+0x95 0xd 0x50 0xb8
+0x3f 0xfd 0x4f 0xf8
+0x62 0x1c 0x40 0xf8
+0x93 0xd 0x50 0xf8
+
+# CHECK: ldrsb    xzr, [x9, #255]!
+# CHECK: ldrsb    x2, [x3, #1]!
+# CHECK: ldrsb    x19, [x12, #-256]!
+# CHECK: ldrsh    xzr, [x9, #255]!
+# CHECK: ldrsh    x2, [x3, #1]!
+# CHECK: ldrsh    x19, [x12, #-256]!
+# CHECK: ldrsw    xzr, [x9, #255]!
+# CHECK: ldrsw    x2, [x3, #1]!
+# CHECK: ldrsw    x19, [x12, #-256]!
+0x3f 0xfd 0x8f 0x38
+0x62 0x1c 0x80 0x38
+0x93 0xd 0x90 0x38
+0x3f 0xfd 0x8f 0x78
+0x62 0x1c 0x80 0x78
+0x93 0xd 0x90 0x78
+0x3f 0xfd 0x8f 0xb8
+0x62 0x1c 0x80 0xb8
+0x93 0xd 0x90 0xb8
+
+# CHECK: ldrsb    wzr, [x9, #255]!
+# CHECK: ldrsb    w2, [x3, #1]!
+# CHECK: ldrsb    w19, [x12, #-256]!
+# CHECK: ldrsh    wzr, [x9, #255]!
+# CHECK: ldrsh    w2, [x3, #1]!
+# CHECK: ldrsh    w19, [x12, #-256]!
+0x3f 0xfd 0xcf 0x38
+0x62 0x1c 0xc0 0x38
+0x93 0xd 0xd0 0x38
+0x3f 0xfd 0xcf 0x78
+0x62 0x1c 0xc0 0x78
+0x93 0xd 0xd0 0x78
+
+# CHECK: str      b0, [x0, #255]!
+# CHECK: str      b3, [x3, #1]!
+# CHECK: str      b5, [sp, #-256]!
+# CHECK: str      h10, [x10, #255]!
+# CHECK: str      h13, [x23, #1]!
+# CHECK: str      h15, [sp, #-256]!
+# CHECK: str      s20, [x20, #255]!
+# CHECK: str      s23, [x23, #1]!
+# CHECK: str      s25, [x0, #-256]!
+# CHECK: str      d20, [x20, #255]!
+# CHECK: str      d23, [x23, #1]!
+# CHECK: str      d25, [x0, #-256]!
+0x0 0xfc 0xf 0x3c
+0x63 0x1c 0x0 0x3c
+0xe5 0xf 0x10 0x3c
+0x4a 0xfd 0xf 0x7c
+0xed 0x1e 0x0 0x7c
+0xef 0xf 0x10 0x7c
+0x94 0xfe 0xf 0xbc
+0xf7 0x1e 0x0 0xbc
+0x19 0xc 0x10 0xbc
+0x94 0xfe 0xf 0xfc
+0xf7 0x1e 0x0 0xfc
+0x19 0xc 0x10 0xfc
+
+# CHECK: ldr      b0, [x0, #255]!
+# CHECK: ldr      b3, [x3, #1]!
+# CHECK: ldr      b5, [sp, #-256]!
+# CHECK: ldr      h10, [x10, #255]!
+# CHECK: ldr      h13, [x23, #1]!
+# CHECK: ldr      h15, [sp, #-256]!
+# CHECK: ldr      s20, [x20, #255]!
+# CHECK: ldr      s23, [x23, #1]!
+# CHECK: ldr      s25, [x0, #-256]!
+# CHECK: ldr      d20, [x20, #255]!
+# CHECK: ldr      d23, [x23, #1]!
+# CHECK: ldr      d25, [x0, #-256]!
+0x0 0xfc 0x4f 0x3c
+0x63 0x1c 0x40 0x3c
+0xe5 0xf 0x50 0x3c
+0x4a 0xfd 0x4f 0x7c
+0xed 0x1e 0x40 0x7c
+0xef 0xf 0x50 0x7c
+0x94 0xfe 0x4f 0xbc
+0xf7 0x1e 0x40 0xbc
+0x19 0xc 0x50 0xbc
+0x94 0xfe 0x4f 0xfc
+0xf7 0x1e 0x40 0xfc
+0x19 0xc 0x50 0xfc
+
+# CHECK: ldr      q20, [x1, #255]!
+# CHECK: ldr      q23, [x9, #1]!
+# CHECK: ldr      q25, [x20, #-256]!
+# CHECK: str      q10, [x1, #255]!
+# CHECK: str      q22, [sp, #1]!
+# CHECK: str      q21, [x20, #-256]!
+0x34 0xfc 0xcf 0x3c
+0x37 0x1d 0xc0 0x3c
+0x99 0xe 0xd0 0x3c
+0x2a 0xfc 0x8f 0x3c
+0xf6 0x1f 0x80 0x3c
+0x95 0xe 0x90 0x3c
+
+#------------------------------------------------------------------------------
+# Load/store (unprivileged)
+#------------------------------------------------------------------------------
+
+# CHECK: sttrb    w9, [sp]
+# CHECK: sttrh    wzr, [x12, #255]
+# CHECK: sttr     w16, [x0, #-256]
+# CHECK: sttr     x28, [x14, #1]
+0xe9 0x0b 0x0 0x38
+0x9f 0xf9 0xf 0x78
+0x10 0x08 0x10 0xb8
+0xdc 0x19 0x0 0xf8
+
+# CHECK: ldtrb    w1, [x20, #255]
+# CHECK: ldtrh    w20, [x1, #255]
+# CHECK: ldtr     w12, [sp, #255]
+# CHECK: ldtr     xzr, [x12, #255]
+0x81 0xfa 0x4f 0x38
+0x34 0xf8 0x4f 0x78
+0xec 0xfb 0x4f 0xb8
+0x9f 0xf9 0x4f 0xf8
+
+# CHECK: ldtrsb   x9, [x7, #-256]
+# CHECK: ldtrsh   x17, [x19, #-256]
+# CHECK: ldtrsw   x20, [x15, #-256]
+# CHECK: ldtrsb   w19, [x1, #-256]
+# CHECK: ldtrsh   w15, [x21, #-256]
+0xe9 0x08 0x90 0x38
+0x71 0x0a 0x90 0x78
+0xf4 0x09 0x90 0xb8
+0x33 0x08 0xd0 0x38
+0xaf 0x0a 0xd0 0x78
+
+#------------------------------------------------------------------------------
+# Load/store (unsigned  immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: ldr      x0, [x0]
+# CHECK: ldr      x4, [x29]
+# CHECK: ldr      x30, [x12, #32760]
+# CHECK: ldr      x20, [sp, #8]
+0x0 0x0 0x40 0xf9
+0xa4 0x3 0x40 0xf9
+0x9e 0xfd 0x7f 0xf9
+0xf4 0x7 0x40 0xf9
+
+# CHECK: ldr      xzr, [sp]
+0xff 0x3 0x40 0xf9
+
+# CHECK: ldr      w2, [sp]
+# CHECK: ldr      w17, [sp, #16380]
+# CHECK: ldr      w13, [x2, #4]
+0xe2 0x3 0x40 0xb9
+0xf1 0xff 0x7f 0xb9
+0x4d 0x4 0x40 0xb9
+
+# CHECK: ldrsw    x2, [x5, #4]
+# CHECK: ldrsw    x23, [sp, #16380]
+0xa2 0x4 0x80 0xb9
+0xf7 0xff 0xbf 0xb9
+
+# CHECK: ldrh     w2, [x4]
+# CHECK: ldrsh    w23, [x6, #8190]
+# CHECK: ldrsh    wzr, [sp, #2]
+# CHECK: ldrsh    x29, [x2, #2]
+0x82 0x0 0x40 0x79
+0xd7 0xfc 0xff 0x79
+0xff 0x7 0xc0 0x79
+0x5d 0x4 0x80 0x79
+
+# CHECK: ldrb     w26, [x3, #121]
+# CHECK: ldrb     w12, [x2]
+# CHECK: ldrsb    w27, [sp, #4095]
+# CHECK: ldrsb    xzr, [x15]
+0x7a 0xe4 0x41 0x39
+0x4c 0x0 0x40 0x39
+0xfb 0xff 0xff 0x39
+0xff 0x1 0x80 0x39
+
+# CHECK: str      x30, [sp]
+# CHECK: str      w20, [x4, #16380]
+# CHECK: strh     w20, [x10, #14]
+# CHECK: strh     w17, [sp, #8190]
+# CHECK: strb     w23, [x3, #4095]
+# CHECK: strb     wzr, [x2]
+0xfe 0x3 0x0 0xf9
+0x94 0xfc 0x3f 0xb9
+0x54 0x1d 0x0 0x79
+0xf1 0xff 0x3f 0x79
+0x77 0xfc 0x3f 0x39
+0x5f 0x0 0x0 0x39
+
+# CHECK: ldr      b31, [sp, #4095]
+# CHECK: ldr      h20, [x2, #8190]
+# CHECK: ldr      s10, [x19, #16380]
+# CHECK: ldr      d3, [x10, #32760]
+# CHECK: str      q12, [sp, #65520]
+0xff 0xff 0x7f 0x3d
+0x54 0xfc 0x7f 0x7d
+0x6a 0xfe 0x7f 0xbd
+0x43 0xfd 0x7f 0xfd
+0xec 0xff 0xbf 0x3d
+
+#------------------------------------------------------------------------------
+# Load/store (register offset)
+#------------------------------------------------------------------------------
+
+# CHECK: ldrb     w3, [sp, x5]
+# CHECK: ldrb     w9, [x27, x6]
+# CHECK: ldrsb    w10, [x30, x7]
+# CHECK: ldrb     w11, [x29, x3, sxtx]
+# CHECK: strb     w12, [x28, xzr, sxtx]
+# CHECK: ldrb     w14, [x26, w6, uxtw]
+# CHECK: ldrsb    w15, [x25, w7, uxtw]
+# CHECK: ldrb     w17, [x23, w9, sxtw]
+# CHECK: ldrsb    x18, [x22, w10, sxtw]
+0xe3 0x6b 0x65 0x38
+0x69 0x6b 0x66 0x38
+0xca 0x6b 0xe7 0x38
+0xab 0xeb 0x63 0x38
+0x8c 0xeb 0x3f 0x38
+0x4e 0x4b 0x66 0x38
+0x2f 0x4b 0xe7 0x38
+0xf1 0xca 0x69 0x38
+0xd2 0xca 0xaa 0x38
+
+# CHECK: ldrsh    w3, [sp, x5]
+# CHECK: ldrsh    w9, [x27, x6]
+# CHECK: ldrh     w10, [x30, x7, lsl #1]
+# CHECK: strh     w11, [x29, x3, sxtx]
+# CHECK: ldrh     w12, [x28, xzr, sxtx]
+# CHECK: ldrsh    x13, [x27, x5, sxtx #1]
+# CHECK: ldrh     w14, [x26, w6, uxtw]
+# CHECK: ldrh     w15, [x25, w7, uxtw]
+# CHECK: ldrsh    w16, [x24, w8, uxtw #1]
+# CHECK: ldrh     w17, [x23, w9, sxtw]
+# CHECK: ldrh     w18, [x22, w10, sxtw]
+# CHECK: strh     w19, [x21, wzr, sxtw #1]
+0xe3 0x6b 0xe5 0x78
+0x69 0x6b 0xe6 0x78
+0xca 0x7b 0x67 0x78
+0xab 0xeb 0x23 0x78
+0x8c 0xeb 0x7f 0x78
+0x6d 0xfb 0xa5 0x78
+0x4e 0x4b 0x66 0x78
+0x2f 0x4b 0x67 0x78
+0x10 0x5b 0xe8 0x78
+0xf1 0xca 0x69 0x78
+0xd2 0xca 0x6a 0x78
+0xb3 0xda 0x3f 0x78
+
+# CHECK: ldr      w3, [sp, x5]
+# CHECK: ldr      s9, [x27, x6]
+# CHECK: ldr      w10, [x30, x7, lsl #2]
+# CHECK: ldr      w11, [x29, x3, sxtx]
+# CHECK: str      s12, [x28, xzr, sxtx]
+# CHECK: str      w13, [x27, x5, sxtx #2]
+# CHECK: str      w14, [x26, w6, uxtw]
+# CHECK: ldr      w15, [x25, w7, uxtw]
+# CHECK: ldr      w16, [x24, w8, uxtw #2]
+# CHECK: ldrsw    x17, [x23, w9, sxtw]
+# CHECK: ldr      w18, [x22, w10, sxtw]
+# CHECK: ldrsw    x19, [x21, wzr, sxtw #2]
+0xe3 0x6b 0x65 0xb8
+0x69 0x6b 0x66 0xbc
+0xca 0x7b 0x67 0xb8
+0xab 0xeb 0x63 0xb8
+0x8c 0xeb 0x3f 0xbc
+0x6d 0xfb 0x25 0xb8
+0x4e 0x4b 0x26 0xb8
+0x2f 0x4b 0x67 0xb8
+0x10 0x5b 0x68 0xb8
+0xf1 0xca 0xa9 0xb8
+0xd2 0xca 0x6a 0xb8
+0xb3 0xda 0xbf 0xb8
+
+# CHECK: ldr      x3, [sp, x5]
+# CHECK: str      x9, [x27, x6]
+# CHECK: ldr      d10, [x30, x7, lsl #3]
+# CHECK: str      x11, [x29, x3, sxtx]
+# CHECK: ldr      x12, [x28, xzr, sxtx]
+# CHECK: ldr      x13, [x27, x5, sxtx #3]
+# CHECK: prfm     pldl1keep, [x26, w6, uxtw]
+# CHECK: ldr      x15, [x25, w7, uxtw]
+# CHECK: ldr      x16, [x24, w8, uxtw #3]
+# CHECK: ldr      x17, [x23, w9, sxtw]
+# CHECK: ldr      x18, [x22, w10, sxtw]
+# CHECK: str      d19, [x21, wzr, sxtw #3]
+0xe3 0x6b 0x65 0xf8
+0x69 0x6b 0x26 0xf8
+0xca 0x7b 0x67 0xfc
+0xab 0xeb 0x23 0xf8
+0x8c 0xeb 0x7f 0xf8
+0x6d 0xfb 0x65 0xf8
+0x40 0x4b 0xa6 0xf8
+0x2f 0x4b 0x67 0xf8
+0x10 0x5b 0x68 0xf8
+0xf1 0xca 0x69 0xf8
+0xd2 0xca 0x6a 0xf8
+0xb3 0xda 0x3f 0xfc
+
+# CHECK: ldr      q3, [sp, x5]
+# CHECK: ldr      q9, [x27, x6]
+# CHECK: ldr      q10, [x30, x7, lsl #4]
+# CHECK: str      q11, [x29, x3, sxtx]
+# CHECK: str      q12, [x28, xzr, sxtx]
+# CHECK: str      q13, [x27, x5, sxtx #4]
+# CHECK: ldr      q14, [x26, w6, uxtw]
+# CHECK: ldr      q15, [x25, w7, uxtw]
+# CHECK: ldr      q16, [x24, w8, uxtw #4]
+# CHECK: ldr      q17, [x23, w9, sxtw]
+# CHECK: str      q18, [x22, w10, sxtw]
+# CHECK: ldr      q19, [x21, wzr, sxtw #4]
+0xe3 0x6b 0xe5 0x3c
+0x69 0x6b 0xe6 0x3c
+0xca 0x7b 0xe7 0x3c
+0xab 0xeb 0xa3 0x3c
+0x8c 0xeb 0xbf 0x3c
+0x6d 0xfb 0xa5 0x3c
+0x4e 0x4b 0xe6 0x3c
+0x2f 0x4b 0xe7 0x3c
+0x10 0x5b 0xe8 0x3c
+0xf1 0xca 0xe9 0x3c
+0xd2 0xca 0xaa 0x3c
+0xb3 0xda 0xff 0x3c
+
+#------------------------------------------------------------------------------
+# Load/store register pair (offset)
+#------------------------------------------------------------------------------
+
+# CHECK: ldp      w3, w5, [sp]
+# CHECK: stp      wzr, w9, [sp, #252]
+# CHECK: ldp      w2, wzr, [sp, #-256]
+# CHECK: ldp      w9, w10, [sp, #4]
+0xe3 0x17 0x40 0x29
+0xff 0xa7 0x1f 0x29
+0xe2 0x7f 0x60 0x29
+0xe9 0xab 0x40 0x29
+
+# CHECK: ldpsw    x9, x10, [sp, #4]
+# CHECK: ldpsw    x9, x10, [x2, #-256]
+# CHECK: ldpsw    x20, x30, [sp, #252]
+0xe9 0xab 0x40 0x69
+0x49 0x28 0x60 0x69
+0xf4 0xfb 0x5f 0x69
+
+# CHECK: ldp      x21, x29, [x2, #504]
+# CHECK: ldp      x22, x23, [x3, #-512]
+# CHECK: ldp      x24, x25, [x4, #8]
+0x55 0xf4 0x5f 0xa9
+0x76 0x5c 0x60 0xa9
+0x98 0xe4 0x40 0xa9
+
+# CHECK: ldp      s29, s28, [sp, #252]
+# CHECK: stp      s27, s26, [sp, #-256]
+# CHECK: ldp      s1, s2, [x3, #44]
+0xfd 0xf3 0x5f 0x2d
+0xfb 0x6b 0x20 0x2d
+0x61 0x88 0x45 0x2d
+
+# CHECK: stp      d3, d5, [x9, #504]
+# CHECK: stp      d7, d11, [x10, #-512]
+# CHECK: ldp      d2, d3, [x30, #-8]
+0x23 0x95 0x1f 0x6d
+0x47 0x2d 0x20 0x6d
+0xc2 0x8f 0x7f 0x6d
+
+# CHECK: stp      q3, q5, [sp]
+# CHECK: stp      q17, q19, [sp, #1008]
+# CHECK: ldp      q23, q29, [x1, #-1024]
+0xe3 0x17 0x0 0xad
+0xf1 0xcf 0x1f 0xad
+0x37 0x74 0x60 0xad
+
+#------------------------------------------------------------------------------
+# Load/store register pair (post-indexed)
+#------------------------------------------------------------------------------
+
+# CHECK: ldp      w3, w5, [sp], #0
+# CHECK: stp      wzr, w9, [sp], #252
+# CHECK: ldp      w2, wzr, [sp], #-256
+# CHECK: ldp      w9, w10, [sp], #4
+0xe3 0x17 0xc0 0x28
+0xff 0xa7 0x9f 0x28
+0xe2 0x7f 0xe0 0x28
+0xe9 0xab 0xc0 0x28
+
+# CHECK: ldpsw    x9, x10, [sp], #4
+# CHECK: ldpsw    x9, x10, [x2], #-256
+# CHECK: ldpsw    x20, x30, [sp], #252
+0xe9 0xab 0xc0 0x68
+0x49 0x28 0xe0 0x68
+0xf4 0xfb 0xdf 0x68
+
+# CHECK: ldp      x21, x29, [x2], #504
+# CHECK: ldp      x22, x23, [x3], #-512
+# CHECK: ldp      x24, x25, [x4], #8
+0x55 0xf4 0xdf 0xa8
+0x76 0x5c 0xe0 0xa8
+0x98 0xe4 0xc0 0xa8
+
+# CHECK: ldp      s29, s28, [sp], #252
+# CHECK: stp      s27, s26, [sp], #-256
+# CHECK: ldp      s1, s2, [x3], #44
+0xfd 0xf3 0xdf 0x2c
+0xfb 0x6b 0xa0 0x2c
+0x61 0x88 0xc5 0x2c
+
+# CHECK: stp      d3, d5, [x9], #504
+# CHECK: stp      d7, d11, [x10], #-512
+# CHECK: ldp      d2, d3, [x30], #-8
+0x23 0x95 0x9f 0x6c
+0x47 0x2d 0xa0 0x6c
+0xc2 0x8f 0xff 0x6c
+
+# CHECK: stp      q3, q5, [sp], #0
+# CHECK: stp      q17, q19, [sp], #1008
+# CHECK: ldp      q23, q29, [x1], #-1024
+0xe3 0x17 0x80 0xac
+0xf1 0xcf 0x9f 0xac
+0x37 0x74 0xe0 0xac
+
+#------------------------------------------------------------------------------
+# Load/store register pair (pre-indexed)
+#------------------------------------------------------------------------------
+
+# CHECK: ldp      w3, w5, [sp, #0]!
+# CHECK: stp      wzr, w9, [sp, #252]!
+# CHECK: ldp      w2, wzr, [sp, #-256]!
+# CHECK: ldp      w9, w10, [sp, #4]!
+0xe3 0x17 0xc0 0x29
+0xff 0xa7 0x9f 0x29
+0xe2 0x7f 0xe0 0x29
+0xe9 0xab 0xc0 0x29
+
+# CHECK: ldpsw    x9, x10, [sp, #4]!
+# CHECK: ldpsw    x9, x10, [x2, #-256]!
+# CHECK: ldpsw    x20, x30, [sp, #252]!
+0xe9 0xab 0xc0 0x69
+0x49 0x28 0xe0 0x69
+0xf4 0xfb 0xdf 0x69
+
+# CHECK: ldp      x21, x29, [x2, #504]!
+# CHECK: ldp      x22, x23, [x3, #-512]!
+# CHECK: ldp      x24, x25, [x4, #8]!
+0x55 0xf4 0xdf 0xa9
+0x76 0x5c 0xe0 0xa9
+0x98 0xe4 0xc0 0xa9
+
+# CHECK: ldp      s29, s28, [sp, #252]!
+# CHECK: stp      s27, s26, [sp, #-256]!
+# CHECK: ldp      s1, s2, [x3, #44]!
+0xfd 0xf3 0xdf 0x2d
+0xfb 0x6b 0xa0 0x2d
+0x61 0x88 0xc5 0x2d
+
+# CHECK: stp      d3, d5, [x9, #504]!
+# CHECK: stp      d7, d11, [x10, #-512]!
+# CHECK: ldp      d2, d3, [x30, #-8]!
+0x23 0x95 0x9f 0x6d
+0x47 0x2d 0xa0 0x6d
+0xc2 0x8f 0xff 0x6d
+
+# CHECK: stp      q3, q5, [sp, #0]!
+# CHECK: stp      q17, q19, [sp, #1008]!
+# CHECK: ldp      q23, q29, [x1, #-1024]!
+0xe3 0x17 0x80 0xad
+0xf1 0xcf 0x9f 0xad
+0x37 0x74 0xe0 0xad
+
+#------------------------------------------------------------------------------
+# Load/store register pair (offset)
+#------------------------------------------------------------------------------
+
+# CHECK: ldnp      w3, w5, [sp]
+# CHECK: stnp      wzr, w9, [sp, #252]
+# CHECK: ldnp      w2, wzr, [sp, #-256]
+# CHECK: ldnp      w9, w10, [sp, #4]
+0xe3 0x17 0x40 0x28
+0xff 0xa7 0x1f 0x28
+0xe2 0x7f 0x60 0x28
+0xe9 0xab 0x40 0x28
+
+# CHECK: ldnp      x21, x29, [x2, #504]
+# CHECK: ldnp      x22, x23, [x3, #-512]
+# CHECK: ldnp      x24, x25, [x4, #8]
+0x55 0xf4 0x5f 0xa8
+0x76 0x5c 0x60 0xa8
+0x98 0xe4 0x40 0xa8
+
+# CHECK: ldnp      s29, s28, [sp, #252]
+# CHECK: stnp      s27, s26, [sp, #-256]
+# CHECK: ldnp      s1, s2, [x3, #44]
+0xfd 0xf3 0x5f 0x2c
+0xfb 0x6b 0x20 0x2c
+0x61 0x88 0x45 0x2c
+
+# CHECK: stnp      d3, d5, [x9, #504]
+# CHECK: stnp      d7, d11, [x10, #-512]
+# CHECK: ldnp      d2, d3, [x30, #-8]
+0x23 0x95 0x1f 0x6c
+0x47 0x2d 0x20 0x6c
+0xc2 0x8f 0x7f 0x6c
+
+# CHECK: stnp      q3, q5, [sp]
+# CHECK: stnp      q17, q19, [sp, #1008]
+# CHECK: ldnp      q23, q29, [x1, #-1024]
+0xe3 0x17 0x0 0xac
+0xf1 0xcf 0x1f 0xac
+0x37 0x74 0x60 0xac
+
+#------------------------------------------------------------------------------
+# Logical (immediate)
+#------------------------------------------------------------------------------
+# CHECK: orr      w3, w9, #0xffff0000
+# CHECK: orr      wsp, w10, #0xe00000ff
+# CHECK: orr      w9, w10, #0x3ff
+0x23 0x3d 0x10 0x32
+0x5f 0x29 0x3 0x32
+0x49 0x25 0x0 0x32
+
+# CHECK: and      w14, w15, #0x80008000
+# CHECK: and      w12, w13, #0xffc3ffc3
+# CHECK: and      w11, wzr, #0x30003
+0xee 0x81 0x1 0x12
+0xac 0xad 0xa 0x12
+0xeb 0x87 0x0 0x12
+
+# CHECK: eor      w3, w6, #0xe0e0e0e0
+# CHECK: eor      wsp, wzr, #0x3030303
+# CHECK: eor      w16, w17, #0x81818181
+0xc3 0xc8 0x3 0x52
+0xff 0xc7 0x0 0x52
+0x30 0xc6 0x1 0x52
+
+# CHECK: ands     wzr, w18, #0xcccccccc
+# CHECK: ands     w19, w20, #0x33333333
+# CHECK: ands     w21, w22, #0x99999999
+0x5f 0xe6 0x2 0x72
+0x93 0xe6 0x0 0x72
+0xd5 0xe6 0x1 0x72
+
+# CHECK: ands     wzr, w3, #0xaaaaaaaa
+# CHECK: ands     wzr, wzr, #0x55555555
+0x7f 0xf0 0x1 0x72
+0xff 0xf3 0x0 0x72
+
+# CHECK: eor      x3, x5, #0xffffffffc000000
+# CHECK: and      x9, x10, #0x7fffffffffff
+# CHECK: orr      x11, x12, #0x8000000000000fff
+0xa3 0x84 0x66 0xd2
+0x49 0xb9 0x40 0x92
+0x8b 0x31 0x41 0xb2
+
+# CHECK: orr      x3, x9, #0xffff0000ffff0000
+# CHECK: orr      sp, x10, #0xe00000ffe00000ff
+# CHECK: orr      x9, x10, #0x3ff000003ff
+0x23 0x3d 0x10 0xb2
+0x5f 0x29 0x3 0xb2
+0x49 0x25 0x0 0xb2
+
+# CHECK: and      x14, x15, #0x8000800080008000
+# CHECK: and      x12, x13, #0xffc3ffc3ffc3ffc3
+# CHECK: and      x11, xzr, #0x3000300030003
+0xee 0x81 0x1 0x92
+0xac 0xad 0xa 0x92
+0xeb 0x87 0x0 0x92
+
+# CHECK: eor      x3, x6, #0xe0e0e0e0e0e0e0e0
+# CHECK: eor      sp, xzr, #0x303030303030303
+# CHECK: eor      x16, x17, #0x8181818181818181
+0xc3 0xc8 0x3 0xd2
+0xff 0xc7 0x0 0xd2
+0x30 0xc6 0x1 0xd2
+
+# CHECK: ands     xzr, x18, #0xcccccccccccccccc
+# CHECK: ands     x19, x20, #0x3333333333333333
+# CHECK: ands     x21, x22, #0x9999999999999999
+0x5f 0xe6 0x2 0xf2
+0x93 0xe6 0x0 0xf2
+0xd5 0xe6 0x1 0xf2
+
+# CHECK: ands     xzr, x3, #0xaaaaaaaaaaaaaaaa
+# CHECK: ands     xzr, xzr, #0x5555555555555555
+0x7f 0xf0 0x1 0xf2
+0xff 0xf3 0x0 0xf2
+
+# CHECK: orr      w3, wzr, #0xf000f
+# CHECK: orr      x10, xzr, #0xaaaaaaaaaaaaaaaa
+0xe3 0x8f 0x0 0x32
+0xea 0xf3 0x1 0xb2
+
+# CHECK: orr      w3, wzr, #0xffff
+# CHECK: orr      x9, xzr, #0xffff00000000
+0xe3 0x3f 0x0 0x32
+0xe9 0x3f 0x60 0xb2
+
+#------------------------------------------------------------------------------
+# Logical (shifted register)
+#------------------------------------------------------------------------------
+
+# CHECK: and      w12, w23, w21
+# CHECK: and      w16, w15, w1, lsl #1
+# CHECK: and      w9, w4, w10, lsl #31
+# CHECK: and      w3, w30, w11
+# CHECK: and      x3, x5, x7, lsl #63
+0xec 0x2 0x15 0xa
+0xf0 0x5 0x1 0xa
+0x89 0x7c 0xa 0xa
+0xc3 0x3 0xb 0xa
+0xa3 0xfc 0x7 0x8a
+
+# CHECK: and      x5, x14, x19, asr #4
+# CHECK: and      w3, w17, w19, ror #31
+# CHECK: and      w0, w2, wzr, lsr #17
+# CHECK: and      w3, w30, w11, asr
+0xc5 0x11 0x93 0x8a
+0x23 0x7e 0xd3 0xa
+0x40 0x44 0x5f 0xa
+0xc3 0x3 0x8b 0xa
+
+# CHECK: and      xzr, x4, x26
+# CHECK: and      w3, wzr, w20, ror
+# CHECK: and      x7, x20, xzr, asr #63
+0x9f 0x0 0x1a 0x8a
+0xe3 0x3 0xd4 0xa
+0x87 0xfe 0x9f 0x8a
+
+# CHECK: bic      x13, x20, x14, lsl #47
+# CHECK: bic      w2, w7, w9
+# CHECK: orr      w2, w7, w0, asr #31
+# CHECK: orr      x8, x9, x10, lsl #12
+# CHECK: orn      x3, x5, x7, asr
+# CHECK: orn      w2, w5, w29
+0x8d 0xbe 0x2e 0x8a
+0xe2 0x0 0x29 0xa
+0xe2 0x7c 0x80 0x2a
+0x28 0x31 0xa 0xaa
+0xa3 0x0 0xa7 0xaa
+0xa2 0x0 0x3d 0x2a
+
+# CHECK: ands     w7, wzr, w9, lsl #1
+# CHECK: ands     x3, x5, x20, ror #63
+# CHECK: bics     w3, w5, w7
+# CHECK: bics     x3, xzr, x3, lsl #1
+# CHECK: tst      w3, w7, lsl #31
+# CHECK: tst      x2, x20, asr
+0xe7 0x7 0x9 0x6a
+0xa3 0xfc 0xd4 0xea
+0xa3 0x0 0x27 0x6a
+0xe3 0x7 0x23 0xea
+0x7f 0x7c 0x7 0x6a
+0x5f 0x0 0x94 0xea
+
+# CHECK: mov      x3, x6
+# CHECK: mov      x3, xzr
+# CHECK: mov      wzr, w2
+# CHECK: mov      w3, w5
+0xe3 0x3 0x6 0xaa
+0xe3 0x3 0x1f 0xaa
+0xff 0x3 0x2 0x2a
+0xe3 0x3 0x5 0x2a
+
+#------------------------------------------------------------------------------
+# Move wide (immediate)
+#------------------------------------------------------------------------------
+
+# N.b. (FIXME) canonical aliases aren't produced here because of
+# limitation in InstAlias. Lots of the "mov[nz]" instructions should
+# be "mov".
+
+# CHECK: movz     w1, #65535
+# CHECK: movz     w2, #0, lsl #16
+# CHECK: movn     w2, #1234
+0xe1 0xff 0x9f 0x52
+0x2 0x0 0xa0 0x52
+0x42 0x9a 0x80 0x12
+
+# CHECK: movz     x2, #1234, lsl #32
+# CHECK: movk     xzr, #4321, lsl #48
+0x42 0x9a 0xc0 0xd2
+0x3f 0x1c 0xe2 0xf2
+
+# CHECK: movz     x2, #0
+# CHECK: movk     w3, #0
+# CHECK: movz     x4, #0, lsl #16
+# CHECK: movk     w5, #0, lsl #16
+# CHECK: movz     x6, #0, lsl #32
+# CHECK: movk     x7, #0, lsl #32
+# CHECK: movz     x8, #0, lsl #48
+# CHECK: movk     x9, #0, lsl #48
+0x2 0x0 0x80 0xd2
+0x3 0x0 0x80 0x72
+0x4 0x0 0xa0 0xd2
+0x5 0x0 0xa0 0x72
+0x6 0x0 0xc0 0xd2
+0x7 0x0 0xc0 0xf2
+0x8 0x0 0xe0 0xd2
+0x9 0x0 0xe0 0xf2
+
+#------------------------------------------------------------------------------
+# PC-relative addressing
+#------------------------------------------------------------------------------
+
+# It's slightly dodgy using immediates here, but harmless enough when
+# it's all that's available.
+
+# CHECK: adr      x2, #1600
+# CHECK: adrp     x21, #6553600
+# CHECK: adr      x0, #262144
+0x02 0x32 0x00 0x10
+0x15 0x32 0x00 0x90
+0x00 0x00 0x20 0x10
+
+#------------------------------------------------------------------------------
+# System
+#------------------------------------------------------------------------------
+
+# CHECK: nop
+# CHECK: hint     #127
+# CHECK: nop
+# CHECK: yield
+# CHECK: wfe
+# CHECK: wfi
+# CHECK: sev
+# CHECK: sevl
+0x1f 0x20 0x3 0xd5
+0xff 0x2f 0x3 0xd5
+0x1f 0x20 0x3 0xd5
+0x3f 0x20 0x3 0xd5
+0x5f 0x20 0x3 0xd5
+0x7f 0x20 0x3 0xd5
+0x9f 0x20 0x3 0xd5
+0xbf 0x20 0x3 0xd5
+
+# CHECK: clrex
+# CHECK: clrex    #0
+# CHECK: clrex    #7
+# CHECK: clrex
+0x5f 0x3f 0x3 0xd5
+0x5f 0x30 0x3 0xd5
+0x5f 0x37 0x3 0xd5
+0x5f 0x3f 0x3 0xd5
+
+# CHECK: dsb      #0
+# CHECK: dsb      #12
+# CHECK: dsb      sy
+# CHECK: dsb      oshld
+# CHECK: dsb      oshst
+# CHECK: dsb      osh
+# CHECK: dsb      nshld
+# CHECK: dsb      nshst
+# CHECK: dsb      nsh
+# CHECK: dsb      ishld
+# CHECK: dsb      ishst
+# CHECK: dsb      ish
+# CHECK: dsb      ld
+# CHECK: dsb      st
+# CHECK: dsb      sy
+0x9f 0x30 0x3 0xd5
+0x9f 0x3c 0x3 0xd5
+0x9f 0x3f 0x3 0xd5
+0x9f 0x31 0x3 0xd5
+0x9f 0x32 0x3 0xd5
+0x9f 0x33 0x3 0xd5
+0x9f 0x35 0x3 0xd5
+0x9f 0x36 0x3 0xd5
+0x9f 0x37 0x3 0xd5
+0x9f 0x39 0x3 0xd5
+0x9f 0x3a 0x3 0xd5
+0x9f 0x3b 0x3 0xd5
+0x9f 0x3d 0x3 0xd5
+0x9f 0x3e 0x3 0xd5
+0x9f 0x3f 0x3 0xd5
+
+# CHECK: dmb      #0
+# CHECK: dmb      #12
+# CHECK: dmb      sy
+# CHECK: dmb      oshld
+# CHECK: dmb      oshst
+# CHECK: dmb      osh
+# CHECK: dmb      nshld
+# CHECK: dmb      nshst
+# CHECK: dmb      nsh
+# CHECK: dmb      ishld
+# CHECK: dmb      ishst
+# CHECK: dmb      ish
+# CHECK: dmb      ld
+# CHECK: dmb      st
+# CHECK: dmb      sy
+0xbf 0x30 0x3 0xd5
+0xbf 0x3c 0x3 0xd5
+0xbf 0x3f 0x3 0xd5
+0xbf 0x31 0x3 0xd5
+0xbf 0x32 0x3 0xd5
+0xbf 0x33 0x3 0xd5
+0xbf 0x35 0x3 0xd5
+0xbf 0x36 0x3 0xd5
+0xbf 0x37 0x3 0xd5
+0xbf 0x39 0x3 0xd5
+0xbf 0x3a 0x3 0xd5
+0xbf 0x3b 0x3 0xd5
+0xbf 0x3d 0x3 0xd5
+0xbf 0x3e 0x3 0xd5
+0xbf 0x3f 0x3 0xd5
+
+# CHECK: isb
+# CHECK: isb      #12
+0xdf 0x3f 0x3 0xd5
+0xdf 0x3c 0x3 0xd5
+
+# CHECK: msr      spsel, #0
+# CHECK: msr      daifset, #15
+# CHECK: msr      daifclr, #12
+0xbf 0x40 0x0 0xd5
+0xdf 0x4f 0x3 0xd5
+0xff 0x4c 0x3 0xd5
+
+# CHECK: sys      #7, c5, c9, #7, x5
+# CHECK: sys      #0, c15, c15, #2
+# CHECK: sysl     x9, #7, c5, c9, #7
+# CHECK: sysl     x1, #0, c15, c15, #2
+0xe5 0x59 0xf 0xd5
+0x5f 0xff 0x8 0xd5
+0xe9 0x59 0x2f 0xd5
+0x41 0xff 0x28 0xd5
+
+# CHECK: sys     #0, c7, c1, #0, xzr
+# CHECK: sys     #0, c7, c5, #0, xzr
+# CHECK: sys     #3, c7, c5, #1, x9
+0x1f 0x71 0x8 0xd5
+0x1f 0x75 0x8 0xd5
+0x29 0x75 0xb 0xd5
+
+# CHECK: sys     #3, c7, c4, #1, x12
+# CHECK: sys     #0, c7, c6, #1, xzr
+# CHECK: sys     #0, c7, c6, #2, x2
+# CHECK: sys     #3, c7, c10, #1, x9
+# CHECK: sys     #0, c7, c10, #2, x10
+# CHECK: sys     #3, c7, c11, #1, x0
+# CHECK: sys     #3, c7, c14, #1, x3
+# CHECK: sys     #0, c7, c14, #2, x30
+0x2c 0x74 0xb 0xd5
+0x3f 0x76 0x8 0xd5
+0x42 0x76 0x8 0xd5
+0x29 0x7a 0xb 0xd5
+0x4a 0x7a 0x8 0xd5
+0x20 0x7b 0xb 0xd5
+0x23 0x7e 0xb 0xd5
+0x5e 0x7e 0x8 0xd5
+
+
+# CHECK: msr      teecr32_el1, x12
+# CHECK: msr      osdtrrx_el1, x12
+# CHECK: msr      mdccint_el1, x12
+# CHECK: msr      mdscr_el1, x12
+# CHECK: msr      osdtrtx_el1, x12
+# CHECK: msr      dbgdtr_el0, x12
+# CHECK: msr      dbgdtrtx_el0, x12
+# CHECK: msr      oseccr_el1, x12
+# CHECK: msr      dbgvcr32_el2, x12
+# CHECK: msr      dbgbvr0_el1, x12
+# CHECK: msr      dbgbvr1_el1, x12
+# CHECK: msr      dbgbvr2_el1, x12
+# CHECK: msr      dbgbvr3_el1, x12
+# CHECK: msr      dbgbvr4_el1, x12
+# CHECK: msr      dbgbvr5_el1, x12
+# CHECK: msr      dbgbvr6_el1, x12
+# CHECK: msr      dbgbvr7_el1, x12
+# CHECK: msr      dbgbvr8_el1, x12
+# CHECK: msr      dbgbvr9_el1, x12
+# CHECK: msr      dbgbvr10_el1, x12
+# CHECK: msr      dbgbvr11_el1, x12
+# CHECK: msr      dbgbvr12_el1, x12
+# CHECK: msr      dbgbvr13_el1, x12
+# CHECK: msr      dbgbvr14_el1, x12
+# CHECK: msr      dbgbvr15_el1, x12
+# CHECK: msr      dbgbcr0_el1, x12
+# CHECK: msr      dbgbcr1_el1, x12
+# CHECK: msr      dbgbcr2_el1, x12
+# CHECK: msr      dbgbcr3_el1, x12
+# CHECK: msr      dbgbcr4_el1, x12
+# CHECK: msr      dbgbcr5_el1, x12
+# CHECK: msr      dbgbcr6_el1, x12
+# CHECK: msr      dbgbcr7_el1, x12
+# CHECK: msr      dbgbcr8_el1, x12
+# CHECK: msr      dbgbcr9_el1, x12
+# CHECK: msr      dbgbcr10_el1, x12
+# CHECK: msr      dbgbcr11_el1, x12
+# CHECK: msr      dbgbcr12_el1, x12
+# CHECK: msr      dbgbcr13_el1, x12
+# CHECK: msr      dbgbcr14_el1, x12
+# CHECK: msr      dbgbcr15_el1, x12
+# CHECK: msr      dbgwvr0_el1, x12
+# CHECK: msr      dbgwvr1_el1, x12
+# CHECK: msr      dbgwvr2_el1, x12
+# CHECK: msr      dbgwvr3_el1, x12
+# CHECK: msr      dbgwvr4_el1, x12
+# CHECK: msr      dbgwvr5_el1, x12
+# CHECK: msr      dbgwvr6_el1, x12
+# CHECK: msr      dbgwvr7_el1, x12
+# CHECK: msr      dbgwvr8_el1, x12
+# CHECK: msr      dbgwvr9_el1, x12
+# CHECK: msr      dbgwvr10_el1, x12
+# CHECK: msr      dbgwvr11_el1, x12
+# CHECK: msr      dbgwvr12_el1, x12
+# CHECK: msr      dbgwvr13_el1, x12
+# CHECK: msr      dbgwvr14_el1, x12
+# CHECK: msr      dbgwvr15_el1, x12
+# CHECK: msr      dbgwcr0_el1, x12
+# CHECK: msr      dbgwcr1_el1, x12
+# CHECK: msr      dbgwcr2_el1, x12
+# CHECK: msr      dbgwcr3_el1, x12
+# CHECK: msr      dbgwcr4_el1, x12
+# CHECK: msr      dbgwcr5_el1, x12
+# CHECK: msr      dbgwcr6_el1, x12
+# CHECK: msr      dbgwcr7_el1, x12
+# CHECK: msr      dbgwcr8_el1, x12
+# CHECK: msr      dbgwcr9_el1, x12
+# CHECK: msr      dbgwcr10_el1, x12
+# CHECK: msr      dbgwcr11_el1, x12
+# CHECK: msr      dbgwcr12_el1, x12
+# CHECK: msr      dbgwcr13_el1, x12
+# CHECK: msr      dbgwcr14_el1, x12
+# CHECK: msr      dbgwcr15_el1, x12
+# CHECK: msr      teehbr32_el1, x12
+# CHECK: msr      oslar_el1, x12
+# CHECK: msr      osdlr_el1, x12
+# CHECK: msr      dbgprcr_el1, x12
+# CHECK: msr      dbgclaimset_el1, x12
+# CHECK: msr      dbgclaimclr_el1, x12
+# CHECK: msr      csselr_el1, x12
+# CHECK: msr      vpidr_el2, x12
+# CHECK: msr      vmpidr_el2, x12
+# CHECK: msr      sctlr_el1, x12
+# CHECK: msr      sctlr_el2, x12
+# CHECK: msr      sctlr_el3, x12
+# CHECK: msr      actlr_el1, x12
+# CHECK: msr      actlr_el2, x12
+# CHECK: msr      actlr_el3, x12
+# CHECK: msr      cpacr_el1, x12
+# CHECK: msr      hcr_el2, x12
+# CHECK: msr      scr_el3, x12
+# CHECK: msr      mdcr_el2, x12
+# CHECK: msr      sder32_el3, x12
+# CHECK: msr      cptr_el2, x12
+# CHECK: msr      cptr_el3, x12
+# CHECK: msr      hstr_el2, x12
+# CHECK: msr      hacr_el2, x12
+# CHECK: msr      mdcr_el3, x12
+# CHECK: msr      ttbr0_el1, x12
+# CHECK: msr      ttbr0_el2, x12
+# CHECK: msr      ttbr0_el3, x12
+# CHECK: msr      ttbr1_el1, x12
+# CHECK: msr      tcr_el1, x12
+# CHECK: msr      tcr_el2, x12
+# CHECK: msr      tcr_el3, x12
+# CHECK: msr      vttbr_el2, x12
+# CHECK: msr      vtcr_el2, x12
+# CHECK: msr      dacr32_el2, x12
+# CHECK: msr      spsr_el1, x12
+# CHECK: msr      spsr_el2, x12
+# CHECK: msr      spsr_el3, x12
+# CHECK: msr      elr_el1, x12
+# CHECK: msr      elr_el2, x12
+# CHECK: msr      elr_el3, x12
+# CHECK: msr      sp_el0, x12
+# CHECK: msr      sp_el1, x12
+# CHECK: msr      sp_el2, x12
+# CHECK: msr      spsel, x12
+# CHECK: msr      nzcv, x12
+# CHECK: msr      daif, x12
+# CHECK: msr      currentel, x12
+# CHECK: msr      spsr_irq, x12
+# CHECK: msr      spsr_abt, x12
+# CHECK: msr      spsr_und, x12
+# CHECK: msr      spsr_fiq, x12
+# CHECK: msr      fpcr, x12
+# CHECK: msr      fpsr, x12
+# CHECK: msr      dspsr_el0, x12
+# CHECK: msr      dlr_el0, x12
+# CHECK: msr      ifsr32_el2, x12
+# CHECK: msr      afsr0_el1, x12
+# CHECK: msr      afsr0_el2, x12
+# CHECK: msr      afsr0_el3, x12
+# CHECK: msr      afsr1_el1, x12
+# CHECK: msr      afsr1_el2, x12
+# CHECK: msr      afsr1_el3, x12
+# CHECK: msr      esr_el1, x12
+# CHECK: msr      esr_el2, x12
+# CHECK: msr      esr_el3, x12
+# CHECK: msr      fpexc32_el2, x12
+# CHECK: msr      far_el1, x12
+# CHECK: msr      far_el2, x12
+# CHECK: msr      far_el3, x12
+# CHECK: msr      hpfar_el2, x12
+# CHECK: msr      par_el1, x12
+# CHECK: msr      pmcr_el0, x12
+# CHECK: msr      pmcntenset_el0, x12
+# CHECK: msr      pmcntenclr_el0, x12
+# CHECK: msr      pmovsclr_el0, x12
+# CHECK: msr      pmselr_el0, x12
+# CHECK: msr      pmccntr_el0, x12
+# CHECK: msr      pmxevtyper_el0, x12
+# CHECK: msr      pmxevcntr_el0, x12
+# CHECK: msr      pmuserenr_el0, x12
+# CHECK: msr      pmintenset_el1, x12
+# CHECK: msr      pmintenclr_el1, x12
+# CHECK: msr      pmovsset_el0, x12
+# CHECK: msr      mair_el1, x12
+# CHECK: msr      mair_el2, x12
+# CHECK: msr      mair_el3, x12
+# CHECK: msr      amair_el1, x12
+# CHECK: msr      amair_el2, x12
+# CHECK: msr      amair_el3, x12
+# CHECK: msr      vbar_el1, x12
+# CHECK: msr      vbar_el2, x12
+# CHECK: msr      vbar_el3, x12
+# CHECK: msr      rmr_el1, x12
+# CHECK: msr      rmr_el2, x12
+# CHECK: msr      rmr_el3, x12
+# CHECK: msr      tpidr_el0, x12
+# CHECK: msr      tpidr_el2, x12
+# CHECK: msr      tpidr_el3, x12
+# CHECK: msr      tpidrro_el0, x12
+# CHECK: msr      tpidr_el1, x12
+# CHECK: msr      cntfrq_el0, x12
+# CHECK: msr      cntvoff_el2, x12
+# CHECK: msr      cntkctl_el1, x12
+# CHECK: msr      cnthctl_el2, x12
+# CHECK: msr      cntp_tval_el0, x12
+# CHECK: msr      cnthp_tval_el2, x12
+# CHECK: msr      cntps_tval_el1, x12
+# CHECK: msr      cntp_ctl_el0, x12
+# CHECK: msr      cnthp_ctl_el2, x12
+# CHECK: msr      cntps_ctl_el1, x12
+# CHECK: msr      cntp_cval_el0, x12
+# CHECK: msr      cnthp_cval_el2, x12
+# CHECK: msr      cntps_cval_el1, x12
+# CHECK: msr      cntv_tval_el0, x12
+# CHECK: msr      cntv_ctl_el0, x12
+# CHECK: msr      cntv_cval_el0, x12
+# CHECK: msr      pmevcntr0_el0, x12
+# CHECK: msr      pmevcntr1_el0, x12
+# CHECK: msr      pmevcntr2_el0, x12
+# CHECK: msr      pmevcntr3_el0, x12
+# CHECK: msr      pmevcntr4_el0, x12
+# CHECK: msr      pmevcntr5_el0, x12
+# CHECK: msr      pmevcntr6_el0, x12
+# CHECK: msr      pmevcntr7_el0, x12
+# CHECK: msr      pmevcntr8_el0, x12
+# CHECK: msr      pmevcntr9_el0, x12
+# CHECK: msr      pmevcntr10_el0, x12
+# CHECK: msr      pmevcntr11_el0, x12
+# CHECK: msr      pmevcntr12_el0, x12
+# CHECK: msr      pmevcntr13_el0, x12
+# CHECK: msr      pmevcntr14_el0, x12
+# CHECK: msr      pmevcntr15_el0, x12
+# CHECK: msr      pmevcntr16_el0, x12
+# CHECK: msr      pmevcntr17_el0, x12
+# CHECK: msr      pmevcntr18_el0, x12
+# CHECK: msr      pmevcntr19_el0, x12
+# CHECK: msr      pmevcntr20_el0, x12
+# CHECK: msr      pmevcntr21_el0, x12
+# CHECK: msr      pmevcntr22_el0, x12
+# CHECK: msr      pmevcntr23_el0, x12
+# CHECK: msr      pmevcntr24_el0, x12
+# CHECK: msr      pmevcntr25_el0, x12
+# CHECK: msr      pmevcntr26_el0, x12
+# CHECK: msr      pmevcntr27_el0, x12
+# CHECK: msr      pmevcntr28_el0, x12
+# CHECK: msr      pmevcntr29_el0, x12
+# CHECK: msr      pmevcntr30_el0, x12
+# CHECK: msr      pmccfiltr_el0, x12
+# CHECK: msr      pmevtyper0_el0, x12
+# CHECK: msr      pmevtyper1_el0, x12
+# CHECK: msr      pmevtyper2_el0, x12
+# CHECK: msr      pmevtyper3_el0, x12
+# CHECK: msr      pmevtyper4_el0, x12
+# CHECK: msr      pmevtyper5_el0, x12
+# CHECK: msr      pmevtyper6_el0, x12
+# CHECK: msr      pmevtyper7_el0, x12
+# CHECK: msr      pmevtyper8_el0, x12
+# CHECK: msr      pmevtyper9_el0, x12
+# CHECK: msr      pmevtyper10_el0, x12
+# CHECK: msr      pmevtyper11_el0, x12
+# CHECK: msr      pmevtyper12_el0, x12
+# CHECK: msr      pmevtyper13_el0, x12
+# CHECK: msr      pmevtyper14_el0, x12
+# CHECK: msr      pmevtyper15_el0, x12
+# CHECK: msr      pmevtyper16_el0, x12
+# CHECK: msr      pmevtyper17_el0, x12
+# CHECK: msr      pmevtyper18_el0, x12
+# CHECK: msr      pmevtyper19_el0, x12
+# CHECK: msr      pmevtyper20_el0, x12
+# CHECK: msr      pmevtyper21_el0, x12
+# CHECK: msr      pmevtyper22_el0, x12
+# CHECK: msr      pmevtyper23_el0, x12
+# CHECK: msr      pmevtyper24_el0, x12
+# CHECK: msr      pmevtyper25_el0, x12
+# CHECK: msr      pmevtyper26_el0, x12
+# CHECK: msr      pmevtyper27_el0, x12
+# CHECK: msr      pmevtyper28_el0, x12
+# CHECK: msr      pmevtyper29_el0, x12
+# CHECK: msr      pmevtyper30_el0, x12
+# CHECK: mrs      x9, teecr32_el1
+# CHECK: mrs      x9, osdtrrx_el1
+# CHECK: mrs      x9, mdccsr_el0
+# CHECK: mrs      x9, mdccint_el1
+# CHECK: mrs      x9, mdscr_el1
+# CHECK: mrs      x9, osdtrtx_el1
+# CHECK: mrs      x9, dbgdtr_el0
+# CHECK: mrs      x9, dbgdtrrx_el0
+# CHECK: mrs      x9, oseccr_el1
+# CHECK: mrs      x9, dbgvcr32_el2
+# CHECK: mrs      x9, dbgbvr0_el1
+# CHECK: mrs      x9, dbgbvr1_el1
+# CHECK: mrs      x9, dbgbvr2_el1
+# CHECK: mrs      x9, dbgbvr3_el1
+# CHECK: mrs      x9, dbgbvr4_el1
+# CHECK: mrs      x9, dbgbvr5_el1
+# CHECK: mrs      x9, dbgbvr6_el1
+# CHECK: mrs      x9, dbgbvr7_el1
+# CHECK: mrs      x9, dbgbvr8_el1
+# CHECK: mrs      x9, dbgbvr9_el1
+# CHECK: mrs      x9, dbgbvr10_el1
+# CHECK: mrs      x9, dbgbvr11_el1
+# CHECK: mrs      x9, dbgbvr12_el1
+# CHECK: mrs      x9, dbgbvr13_el1
+# CHECK: mrs      x9, dbgbvr14_el1
+# CHECK: mrs      x9, dbgbvr15_el1
+# CHECK: mrs      x9, dbgbcr0_el1
+# CHECK: mrs      x9, dbgbcr1_el1
+# CHECK: mrs      x9, dbgbcr2_el1
+# CHECK: mrs      x9, dbgbcr3_el1
+# CHECK: mrs      x9, dbgbcr4_el1
+# CHECK: mrs      x9, dbgbcr5_el1
+# CHECK: mrs      x9, dbgbcr6_el1
+# CHECK: mrs      x9, dbgbcr7_el1
+# CHECK: mrs      x9, dbgbcr8_el1
+# CHECK: mrs      x9, dbgbcr9_el1
+# CHECK: mrs      x9, dbgbcr10_el1
+# CHECK: mrs      x9, dbgbcr11_el1
+# CHECK: mrs      x9, dbgbcr12_el1
+# CHECK: mrs      x9, dbgbcr13_el1
+# CHECK: mrs      x9, dbgbcr14_el1
+# CHECK: mrs      x9, dbgbcr15_el1
+# CHECK: mrs      x9, dbgwvr0_el1
+# CHECK: mrs      x9, dbgwvr1_el1
+# CHECK: mrs      x9, dbgwvr2_el1
+# CHECK: mrs      x9, dbgwvr3_el1
+# CHECK: mrs      x9, dbgwvr4_el1
+# CHECK: mrs      x9, dbgwvr5_el1
+# CHECK: mrs      x9, dbgwvr6_el1
+# CHECK: mrs      x9, dbgwvr7_el1
+# CHECK: mrs      x9, dbgwvr8_el1
+# CHECK: mrs      x9, dbgwvr9_el1
+# CHECK: mrs      x9, dbgwvr10_el1
+# CHECK: mrs      x9, dbgwvr11_el1
+# CHECK: mrs      x9, dbgwvr12_el1
+# CHECK: mrs      x9, dbgwvr13_el1
+# CHECK: mrs      x9, dbgwvr14_el1
+# CHECK: mrs      x9, dbgwvr15_el1
+# CHECK: mrs      x9, dbgwcr0_el1
+# CHECK: mrs      x9, dbgwcr1_el1
+# CHECK: mrs      x9, dbgwcr2_el1
+# CHECK: mrs      x9, dbgwcr3_el1
+# CHECK: mrs      x9, dbgwcr4_el1
+# CHECK: mrs      x9, dbgwcr5_el1
+# CHECK: mrs      x9, dbgwcr6_el1
+# CHECK: mrs      x9, dbgwcr7_el1
+# CHECK: mrs      x9, dbgwcr8_el1
+# CHECK: mrs      x9, dbgwcr9_el1
+# CHECK: mrs      x9, dbgwcr10_el1
+# CHECK: mrs      x9, dbgwcr11_el1
+# CHECK: mrs      x9, dbgwcr12_el1
+# CHECK: mrs      x9, dbgwcr13_el1
+# CHECK: mrs      x9, dbgwcr14_el1
+# CHECK: mrs      x9, dbgwcr15_el1
+# CHECK: mrs      x9, mdrar_el1
+# CHECK: mrs      x9, teehbr32_el1
+# CHECK: mrs      x9, oslsr_el1
+# CHECK: mrs      x9, osdlr_el1
+# CHECK: mrs      x9, dbgprcr_el1
+# CHECK: mrs      x9, dbgclaimset_el1
+# CHECK: mrs      x9, dbgclaimclr_el1
+# CHECK: mrs      x9, dbgauthstatus_el1
+# CHECK: mrs      x9, midr_el1
+# CHECK: mrs      x9, ccsidr_el1
+# CHECK: mrs      x9, csselr_el1
+# CHECK: mrs      x9, vpidr_el2
+# CHECK: mrs      x9, clidr_el1
+# CHECK: mrs      x9, ctr_el0
+# CHECK: mrs      x9, mpidr_el1
+# CHECK: mrs      x9, vmpidr_el2
+# CHECK: mrs      x9, revidr_el1
+# CHECK: mrs      x9, aidr_el1
+# CHECK: mrs      x9, dczid_el0
+# CHECK: mrs      x9, id_pfr0_el1
+# CHECK: mrs      x9, id_pfr1_el1
+# CHECK: mrs      x9, id_dfr0_el1
+# CHECK: mrs      x9, id_afr0_el1
+# CHECK: mrs      x9, id_mmfr0_el1
+# CHECK: mrs      x9, id_mmfr1_el1
+# CHECK: mrs      x9, id_mmfr2_el1
+# CHECK: mrs      x9, id_mmfr3_el1
+# CHECK: mrs      x9, id_isar0_el1
+# CHECK: mrs      x9, id_isar1_el1
+# CHECK: mrs      x9, id_isar2_el1
+# CHECK: mrs      x9, id_isar3_el1
+# CHECK: mrs      x9, id_isar4_el1
+# CHECK: mrs      x9, id_isar5_el1
+# CHECK: mrs      x9, mvfr0_el1
+# CHECK: mrs      x9, mvfr1_el1
+# CHECK: mrs      x9, mvfr2_el1
+# CHECK: mrs      x9, id_aa64pfr0_el1
+# CHECK: mrs      x9, id_aa64pfr1_el1
+# CHECK: mrs      x9, id_aa64dfr0_el1
+# CHECK: mrs      x9, id_aa64dfr1_el1
+# CHECK: mrs      x9, id_aa64afr0_el1
+# CHECK: mrs      x9, id_aa64afr1_el1
+# CHECK: mrs      x9, id_aa64isar0_el1
+# CHECK: mrs      x9, id_aa64isar1_el1
+# CHECK: mrs      x9, id_aa64mmfr0_el1
+# CHECK: mrs      x9, id_aa64mmfr1_el1
+# CHECK: mrs      x9, sctlr_el1
+# CHECK: mrs      x9, sctlr_el2
+# CHECK: mrs      x9, sctlr_el3
+# CHECK: mrs      x9, actlr_el1
+# CHECK: mrs      x9, actlr_el2
+# CHECK: mrs      x9, actlr_el3
+# CHECK: mrs      x9, cpacr_el1
+# CHECK: mrs      x9, hcr_el2
+# CHECK: mrs      x9, scr_el3
+# CHECK: mrs      x9, mdcr_el2
+# CHECK: mrs      x9, sder32_el3
+# CHECK: mrs      x9, cptr_el2
+# CHECK: mrs      x9, cptr_el3
+# CHECK: mrs      x9, hstr_el2
+# CHECK: mrs      x9, hacr_el2
+# CHECK: mrs      x9, mdcr_el3
+# CHECK: mrs      x9, ttbr0_el1
+# CHECK: mrs      x9, ttbr0_el2
+# CHECK: mrs      x9, ttbr0_el3
+# CHECK: mrs      x9, ttbr1_el1
+# CHECK: mrs      x9, tcr_el1
+# CHECK: mrs      x9, tcr_el2
+# CHECK: mrs      x9, tcr_el3
+# CHECK: mrs      x9, vttbr_el2
+# CHECK: mrs      x9, vtcr_el2
+# CHECK: mrs      x9, dacr32_el2
+# CHECK: mrs      x9, spsr_el1
+# CHECK: mrs      x9, spsr_el2
+# CHECK: mrs      x9, spsr_el3
+# CHECK: mrs      x9, elr_el1
+# CHECK: mrs      x9, elr_el2
+# CHECK: mrs      x9, elr_el3
+# CHECK: mrs      x9, sp_el0
+# CHECK: mrs      x9, sp_el1
+# CHECK: mrs      x9, sp_el2
+# CHECK: mrs      x9, spsel
+# CHECK: mrs      x9, nzcv
+# CHECK: mrs      x9, daif
+# CHECK: mrs      x9, currentel
+# CHECK: mrs      x9, spsr_irq
+# CHECK: mrs      x9, spsr_abt
+# CHECK: mrs      x9, spsr_und
+# CHECK: mrs      x9, spsr_fiq
+# CHECK: mrs      x9, fpcr
+# CHECK: mrs      x9, fpsr
+# CHECK: mrs      x9, dspsr_el0
+# CHECK: mrs      x9, dlr_el0
+# CHECK: mrs      x9, ifsr32_el2
+# CHECK: mrs      x9, afsr0_el1
+# CHECK: mrs      x9, afsr0_el2
+# CHECK: mrs      x9, afsr0_el3
+# CHECK: mrs      x9, afsr1_el1
+# CHECK: mrs      x9, afsr1_el2
+# CHECK: mrs      x9, afsr1_el3
+# CHECK: mrs      x9, esr_el1
+# CHECK: mrs      x9, esr_el2
+# CHECK: mrs      x9, esr_el3
+# CHECK: mrs      x9, fpexc32_el2
+# CHECK: mrs      x9, far_el1
+# CHECK: mrs      x9, far_el2
+# CHECK: mrs      x9, far_el3
+# CHECK: mrs      x9, hpfar_el2
+# CHECK: mrs      x9, par_el1
+# CHECK: mrs      x9, pmcr_el0
+# CHECK: mrs      x9, pmcntenset_el0
+# CHECK: mrs      x9, pmcntenclr_el0
+# CHECK: mrs      x9, pmovsclr_el0
+# CHECK: mrs      x9, pmselr_el0
+# CHECK: mrs      x9, pmceid0_el0
+# CHECK: mrs      x9, pmceid1_el0
+# CHECK: mrs      x9, pmccntr_el0
+# CHECK: mrs      x9, pmxevtyper_el0
+# CHECK: mrs      x9, pmxevcntr_el0
+# CHECK: mrs      x9, pmuserenr_el0
+# CHECK: mrs      x9, pmintenset_el1
+# CHECK: mrs      x9, pmintenclr_el1
+# CHECK: mrs      x9, pmovsset_el0
+# CHECK: mrs      x9, mair_el1
+# CHECK: mrs      x9, mair_el2
+# CHECK: mrs      x9, mair_el3
+# CHECK: mrs      x9, amair_el1
+# CHECK: mrs      x9, amair_el2
+# CHECK: mrs      x9, amair_el3
+# CHECK: mrs      x9, vbar_el1
+# CHECK: mrs      x9, vbar_el2
+# CHECK: mrs      x9, vbar_el3
+# CHECK: mrs      x9, rvbar_el1
+# CHECK: mrs      x9, rvbar_el2
+# CHECK: mrs      x9, rvbar_el3
+# CHECK: mrs      x9, rmr_el1
+# CHECK: mrs      x9, rmr_el2
+# CHECK: mrs      x9, rmr_el3
+# CHECK: mrs      x9, isr_el1
+# CHECK: mrs      x9, contextidr_el1
+# CHECK: mrs      x9, tpidr_el0
+# CHECK: mrs      x9, tpidr_el2
+# CHECK: mrs      x9, tpidr_el3
+# CHECK: mrs      x9, tpidrro_el0
+# CHECK: mrs      x9, tpidr_el1
+# CHECK: mrs      x9, cntfrq_el0
+# CHECK: mrs      x9, cntpct_el0
+# CHECK: mrs      x9, cntvct_el0
+# CHECK: mrs      x9, cntvoff_el2
+# CHECK: mrs      x9, cntkctl_el1
+# CHECK: mrs      x9, cnthctl_el2
+# CHECK: mrs      x9, cntp_tval_el0
+# CHECK: mrs      x9, cnthp_tval_el2
+# CHECK: mrs      x9, cntps_tval_el1
+# CHECK: mrs      x9, cntp_ctl_el0
+# CHECK: mrs      x9, cnthp_ctl_el2
+# CHECK: mrs      x9, cntps_ctl_el1
+# CHECK: mrs      x9, cntp_cval_el0
+# CHECK: mrs      x9, cnthp_cval_el2
+# CHECK: mrs      x9, cntps_cval_el1
+# CHECK: mrs      x9, cntv_tval_el0
+# CHECK: mrs      x9, cntv_ctl_el0
+# CHECK: mrs      x9, cntv_cval_el0
+# CHECK: mrs      x9, pmevcntr0_el0
+# CHECK: mrs      x9, pmevcntr1_el0
+# CHECK: mrs      x9, pmevcntr2_el0
+# CHECK: mrs      x9, pmevcntr3_el0
+# CHECK: mrs      x9, pmevcntr4_el0
+# CHECK: mrs      x9, pmevcntr5_el0
+# CHECK: mrs      x9, pmevcntr6_el0
+# CHECK: mrs      x9, pmevcntr7_el0
+# CHECK: mrs      x9, pmevcntr8_el0
+# CHECK: mrs      x9, pmevcntr9_el0
+# CHECK: mrs      x9, pmevcntr10_el0
+# CHECK: mrs      x9, pmevcntr11_el0
+# CHECK: mrs      x9, pmevcntr12_el0
+# CHECK: mrs      x9, pmevcntr13_el0
+# CHECK: mrs      x9, pmevcntr14_el0
+# CHECK: mrs      x9, pmevcntr15_el0
+# CHECK: mrs      x9, pmevcntr16_el0
+# CHECK: mrs      x9, pmevcntr17_el0
+# CHECK: mrs      x9, pmevcntr18_el0
+# CHECK: mrs      x9, pmevcntr19_el0
+# CHECK: mrs      x9, pmevcntr20_el0
+# CHECK: mrs      x9, pmevcntr21_el0
+# CHECK: mrs      x9, pmevcntr22_el0
+# CHECK: mrs      x9, pmevcntr23_el0
+# CHECK: mrs      x9, pmevcntr24_el0
+# CHECK: mrs      x9, pmevcntr25_el0
+# CHECK: mrs      x9, pmevcntr26_el0
+# CHECK: mrs      x9, pmevcntr27_el0
+# CHECK: mrs      x9, pmevcntr28_el0
+# CHECK: mrs      x9, pmevcntr29_el0
+# CHECK: mrs      x9, pmevcntr30_el0
+# CHECK: mrs      x9, pmccfiltr_el0
+# CHECK: mrs      x9, pmevtyper0_el0
+# CHECK: mrs      x9, pmevtyper1_el0
+# CHECK: mrs      x9, pmevtyper2_el0
+# CHECK: mrs      x9, pmevtyper3_el0
+# CHECK: mrs      x9, pmevtyper4_el0
+# CHECK: mrs      x9, pmevtyper5_el0
+# CHECK: mrs      x9, pmevtyper6_el0
+# CHECK: mrs      x9, pmevtyper7_el0
+# CHECK: mrs      x9, pmevtyper8_el0
+# CHECK: mrs      x9, pmevtyper9_el0
+# CHECK: mrs      x9, pmevtyper10_el0
+# CHECK: mrs      x9, pmevtyper11_el0
+# CHECK: mrs      x9, pmevtyper12_el0
+# CHECK: mrs      x9, pmevtyper13_el0
+# CHECK: mrs      x9, pmevtyper14_el0
+# CHECK: mrs      x9, pmevtyper15_el0
+# CHECK: mrs      x9, pmevtyper16_el0
+# CHECK: mrs      x9, pmevtyper17_el0
+# CHECK: mrs      x9, pmevtyper18_el0
+# CHECK: mrs      x9, pmevtyper19_el0
+# CHECK: mrs      x9, pmevtyper20_el0
+# CHECK: mrs      x9, pmevtyper21_el0
+# CHECK: mrs      x9, pmevtyper22_el0
+# CHECK: mrs      x9, pmevtyper23_el0
+# CHECK: mrs      x9, pmevtyper24_el0
+# CHECK: mrs      x9, pmevtyper25_el0
+# CHECK: mrs      x9, pmevtyper26_el0
+# CHECK: mrs      x9, pmevtyper27_el0
+# CHECK: mrs      x9, pmevtyper28_el0
+# CHECK: mrs      x9, pmevtyper29_el0
+# CHECK: mrs      x9, pmevtyper30_el0
+
+0xc 0x0 0x12 0xd5
+0x4c 0x0 0x10 0xd5
+0xc 0x2 0x10 0xd5
+0x4c 0x2 0x10 0xd5
+0x4c 0x3 0x10 0xd5
+0xc 0x4 0x13 0xd5
+0xc 0x5 0x13 0xd5
+0x4c 0x6 0x10 0xd5
+0xc 0x7 0x14 0xd5
+0x8c 0x0 0x10 0xd5
+0x8c 0x1 0x10 0xd5
+0x8c 0x2 0x10 0xd5
+0x8c 0x3 0x10 0xd5
+0x8c 0x4 0x10 0xd5
+0x8c 0x5 0x10 0xd5
+0x8c 0x6 0x10 0xd5
+0x8c 0x7 0x10 0xd5
+0x8c 0x8 0x10 0xd5
+0x8c 0x9 0x10 0xd5
+0x8c 0xa 0x10 0xd5
+0x8c 0xb 0x10 0xd5
+0x8c 0xc 0x10 0xd5
+0x8c 0xd 0x10 0xd5
+0x8c 0xe 0x10 0xd5
+0x8c 0xf 0x10 0xd5
+0xac 0x0 0x10 0xd5
+0xac 0x1 0x10 0xd5
+0xac 0x2 0x10 0xd5
+0xac 0x3 0x10 0xd5
+0xac 0x4 0x10 0xd5
+0xac 0x5 0x10 0xd5
+0xac 0x6 0x10 0xd5
+0xac 0x7 0x10 0xd5
+0xac 0x8 0x10 0xd5
+0xac 0x9 0x10 0xd5
+0xac 0xa 0x10 0xd5
+0xac 0xb 0x10 0xd5
+0xac 0xc 0x10 0xd5
+0xac 0xd 0x10 0xd5
+0xac 0xe 0x10 0xd5
+0xac 0xf 0x10 0xd5
+0xcc 0x0 0x10 0xd5
+0xcc 0x1 0x10 0xd5
+0xcc 0x2 0x10 0xd5
+0xcc 0x3 0x10 0xd5
+0xcc 0x4 0x10 0xd5
+0xcc 0x5 0x10 0xd5
+0xcc 0x6 0x10 0xd5
+0xcc 0x7 0x10 0xd5
+0xcc 0x8 0x10 0xd5
+0xcc 0x9 0x10 0xd5
+0xcc 0xa 0x10 0xd5
+0xcc 0xb 0x10 0xd5
+0xcc 0xc 0x10 0xd5
+0xcc 0xd 0x10 0xd5
+0xcc 0xe 0x10 0xd5
+0xcc 0xf 0x10 0xd5
+0xec 0x0 0x10 0xd5
+0xec 0x1 0x10 0xd5
+0xec 0x2 0x10 0xd5
+0xec 0x3 0x10 0xd5
+0xec 0x4 0x10 0xd5
+0xec 0x5 0x10 0xd5
+0xec 0x6 0x10 0xd5
+0xec 0x7 0x10 0xd5
+0xec 0x8 0x10 0xd5
+0xec 0x9 0x10 0xd5
+0xec 0xa 0x10 0xd5
+0xec 0xb 0x10 0xd5
+0xec 0xc 0x10 0xd5
+0xec 0xd 0x10 0xd5
+0xec 0xe 0x10 0xd5
+0xec 0xf 0x10 0xd5
+0xc 0x10 0x12 0xd5
+0x8c 0x10 0x10 0xd5
+0x8c 0x13 0x10 0xd5
+0x8c 0x14 0x10 0xd5
+0xcc 0x78 0x10 0xd5
+0xcc 0x79 0x10 0xd5
+0xc 0x0 0x1a 0xd5
+0xc 0x0 0x1c 0xd5
+0xac 0x0 0x1c 0xd5
+0xc 0x10 0x18 0xd5
+0xc 0x10 0x1c 0xd5
+0xc 0x10 0x1e 0xd5
+0x2c 0x10 0x18 0xd5
+0x2c 0x10 0x1c 0xd5
+0x2c 0x10 0x1e 0xd5
+0x4c 0x10 0x18 0xd5
+0xc 0x11 0x1c 0xd5
+0xc 0x11 0x1e 0xd5
+0x2c 0x11 0x1c 0xd5
+0x2c 0x11 0x1e 0xd5
+0x4c 0x11 0x1c 0xd5
+0x4c 0x11 0x1e 0xd5
+0x6c 0x11 0x1c 0xd5
+0xec 0x11 0x1c 0xd5
+0x2c 0x13 0x1e 0xd5
+0xc 0x20 0x18 0xd5
+0xc 0x20 0x1c 0xd5
+0xc 0x20 0x1e 0xd5
+0x2c 0x20 0x18 0xd5
+0x4c 0x20 0x18 0xd5
+0x4c 0x20 0x1c 0xd5
+0x4c 0x20 0x1e 0xd5
+0xc 0x21 0x1c 0xd5
+0x4c 0x21 0x1c 0xd5
+0xc 0x30 0x1c 0xd5
+0xc 0x40 0x18 0xd5
+0xc 0x40 0x1c 0xd5
+0xc 0x40 0x1e 0xd5
+0x2c 0x40 0x18 0xd5
+0x2c 0x40 0x1c 0xd5
+0x2c 0x40 0x1e 0xd5
+0xc 0x41 0x18 0xd5
+0xc 0x41 0x1c 0xd5
+0xc 0x41 0x1e 0xd5
+0xc 0x42 0x18 0xd5
+0xc 0x42 0x1b 0xd5
+0x2c 0x42 0x1b 0xd5
+0x4c 0x42 0x18 0xd5
+0xc 0x43 0x1c 0xd5
+0x2c 0x43 0x1c 0xd5
+0x4c 0x43 0x1c 0xd5
+0x6c 0x43 0x1c 0xd5
+0xc 0x44 0x1b 0xd5
+0x2c 0x44 0x1b 0xd5
+0xc 0x45 0x1b 0xd5
+0x2c 0x45 0x1b 0xd5
+0x2c 0x50 0x1c 0xd5
+0xc 0x51 0x18 0xd5
+0xc 0x51 0x1c 0xd5
+0xc 0x51 0x1e 0xd5
+0x2c 0x51 0x18 0xd5
+0x2c 0x51 0x1c 0xd5
+0x2c 0x51 0x1e 0xd5
+0xc 0x52 0x18 0xd5
+0xc 0x52 0x1c 0xd5
+0xc 0x52 0x1e 0xd5
+0xc 0x53 0x1c 0xd5
+0xc 0x60 0x18 0xd5
+0xc 0x60 0x1c 0xd5
+0xc 0x60 0x1e 0xd5
+0x8c 0x60 0x1c 0xd5
+0xc 0x74 0x18 0xd5
+0xc 0x9c 0x1b 0xd5
+0x2c 0x9c 0x1b 0xd5
+0x4c 0x9c 0x1b 0xd5
+0x6c 0x9c 0x1b 0xd5
+0xac 0x9c 0x1b 0xd5
+0xc 0x9d 0x1b 0xd5
+0x2c 0x9d 0x1b 0xd5
+0x4c 0x9d 0x1b 0xd5
+0xc 0x9e 0x1b 0xd5
+0x2c 0x9e 0x18 0xd5
+0x4c 0x9e 0x18 0xd5
+0x6c 0x9e 0x1b 0xd5
+0xc 0xa2 0x18 0xd5
+0xc 0xa2 0x1c 0xd5
+0xc 0xa2 0x1e 0xd5
+0xc 0xa3 0x18 0xd5
+0xc 0xa3 0x1c 0xd5
+0xc 0xa3 0x1e 0xd5
+0xc 0xc0 0x18 0xd5
+0xc 0xc0 0x1c 0xd5
+0xc 0xc0 0x1e 0xd5
+0x4c 0xc0 0x18 0xd5
+0x4c 0xc0 0x1c 0xd5
+0x4c 0xc0 0x1e 0xd5
+0x4c 0xd0 0x1b 0xd5
+0x4c 0xd0 0x1c 0xd5
+0x4c 0xd0 0x1e 0xd5
+0x6c 0xd0 0x1b 0xd5
+0x8c 0xd0 0x18 0xd5
+0xc 0xe0 0x1b 0xd5
+0x6c 0xe0 0x1c 0xd5
+0xc 0xe1 0x18 0xd5
+0xc 0xe1 0x1c 0xd5
+0xc 0xe2 0x1b 0xd5
+0xc 0xe2 0x1c 0xd5
+0xc 0xe2 0x1f 0xd5
+0x2c 0xe2 0x1b 0xd5
+0x2c 0xe2 0x1c 0xd5
+0x2c 0xe2 0x1f 0xd5
+0x4c 0xe2 0x1b 0xd5
+0x4c 0xe2 0x1c 0xd5
+0x4c 0xe2 0x1f 0xd5
+0xc 0xe3 0x1b 0xd5
+0x2c 0xe3 0x1b 0xd5
+0x4c 0xe3 0x1b 0xd5
+0xc 0xe8 0x1b 0xd5
+0x2c 0xe8 0x1b 0xd5
+0x4c 0xe8 0x1b 0xd5
+0x6c 0xe8 0x1b 0xd5
+0x8c 0xe8 0x1b 0xd5
+0xac 0xe8 0x1b 0xd5
+0xcc 0xe8 0x1b 0xd5
+0xec 0xe8 0x1b 0xd5
+0xc 0xe9 0x1b 0xd5
+0x2c 0xe9 0x1b 0xd5
+0x4c 0xe9 0x1b 0xd5
+0x6c 0xe9 0x1b 0xd5
+0x8c 0xe9 0x1b 0xd5
+0xac 0xe9 0x1b 0xd5
+0xcc 0xe9 0x1b 0xd5
+0xec 0xe9 0x1b 0xd5
+0xc 0xea 0x1b 0xd5
+0x2c 0xea 0x1b 0xd5
+0x4c 0xea 0x1b 0xd5
+0x6c 0xea 0x1b 0xd5
+0x8c 0xea 0x1b 0xd5
+0xac 0xea 0x1b 0xd5
+0xcc 0xea 0x1b 0xd5
+0xec 0xea 0x1b 0xd5
+0xc 0xeb 0x1b 0xd5
+0x2c 0xeb 0x1b 0xd5
+0x4c 0xeb 0x1b 0xd5
+0x6c 0xeb 0x1b 0xd5
+0x8c 0xeb 0x1b 0xd5
+0xac 0xeb 0x1b 0xd5
+0xcc 0xeb 0x1b 0xd5
+0xec 0xef 0x1b 0xd5
+0xc 0xec 0x1b 0xd5
+0x2c 0xec 0x1b 0xd5
+0x4c 0xec 0x1b 0xd5
+0x6c 0xec 0x1b 0xd5
+0x8c 0xec 0x1b 0xd5
+0xac 0xec 0x1b 0xd5
+0xcc 0xec 0x1b 0xd5
+0xec 0xec 0x1b 0xd5
+0xc 0xed 0x1b 0xd5
+0x2c 0xed 0x1b 0xd5
+0x4c 0xed 0x1b 0xd5
+0x6c 0xed 0x1b 0xd5
+0x8c 0xed 0x1b 0xd5
+0xac 0xed 0x1b 0xd5
+0xcc 0xed 0x1b 0xd5
+0xec 0xed 0x1b 0xd5
+0xc 0xee 0x1b 0xd5
+0x2c 0xee 0x1b 0xd5
+0x4c 0xee 0x1b 0xd5
+0x6c 0xee 0x1b 0xd5
+0x8c 0xee 0x1b 0xd5
+0xac 0xee 0x1b 0xd5
+0xcc 0xee 0x1b 0xd5
+0xec 0xee 0x1b 0xd5
+0xc 0xef 0x1b 0xd5
+0x2c 0xef 0x1b 0xd5
+0x4c 0xef 0x1b 0xd5
+0x6c 0xef 0x1b 0xd5
+0x8c 0xef 0x1b 0xd5
+0xac 0xef 0x1b 0xd5
+0xcc 0xef 0x1b 0xd5
+0x9 0x0 0x32 0xd5
+0x49 0x0 0x30 0xd5
+0x9 0x1 0x33 0xd5
+0x9 0x2 0x30 0xd5
+0x49 0x2 0x30 0xd5
+0x49 0x3 0x30 0xd5
+0x9 0x4 0x33 0xd5
+0x9 0x5 0x33 0xd5
+0x49 0x6 0x30 0xd5
+0x9 0x7 0x34 0xd5
+0x89 0x0 0x30 0xd5
+0x89 0x1 0x30 0xd5
+0x89 0x2 0x30 0xd5
+0x89 0x3 0x30 0xd5
+0x89 0x4 0x30 0xd5
+0x89 0x5 0x30 0xd5
+0x89 0x6 0x30 0xd5
+0x89 0x7 0x30 0xd5
+0x89 0x8 0x30 0xd5
+0x89 0x9 0x30 0xd5
+0x89 0xa 0x30 0xd5
+0x89 0xb 0x30 0xd5
+0x89 0xc 0x30 0xd5
+0x89 0xd 0x30 0xd5
+0x89 0xe 0x30 0xd5
+0x89 0xf 0x30 0xd5
+0xa9 0x0 0x30 0xd5
+0xa9 0x1 0x30 0xd5
+0xa9 0x2 0x30 0xd5
+0xa9 0x3 0x30 0xd5
+0xa9 0x4 0x30 0xd5
+0xa9 0x5 0x30 0xd5
+0xa9 0x6 0x30 0xd5
+0xa9 0x7 0x30 0xd5
+0xa9 0x8 0x30 0xd5
+0xa9 0x9 0x30 0xd5
+0xa9 0xa 0x30 0xd5
+0xa9 0xb 0x30 0xd5
+0xa9 0xc 0x30 0xd5
+0xa9 0xd 0x30 0xd5
+0xa9 0xe 0x30 0xd5
+0xa9 0xf 0x30 0xd5
+0xc9 0x0 0x30 0xd5
+0xc9 0x1 0x30 0xd5
+0xc9 0x2 0x30 0xd5
+0xc9 0x3 0x30 0xd5
+0xc9 0x4 0x30 0xd5
+0xc9 0x5 0x30 0xd5
+0xc9 0x6 0x30 0xd5
+0xc9 0x7 0x30 0xd5
+0xc9 0x8 0x30 0xd5
+0xc9 0x9 0x30 0xd5
+0xc9 0xa 0x30 0xd5
+0xc9 0xb 0x30 0xd5
+0xc9 0xc 0x30 0xd5
+0xc9 0xd 0x30 0xd5
+0xc9 0xe 0x30 0xd5
+0xc9 0xf 0x30 0xd5
+0xe9 0x0 0x30 0xd5
+0xe9 0x1 0x30 0xd5
+0xe9 0x2 0x30 0xd5
+0xe9 0x3 0x30 0xd5
+0xe9 0x4 0x30 0xd5
+0xe9 0x5 0x30 0xd5
+0xe9 0x6 0x30 0xd5
+0xe9 0x7 0x30 0xd5
+0xe9 0x8 0x30 0xd5
+0xe9 0x9 0x30 0xd5
+0xe9 0xa 0x30 0xd5
+0xe9 0xb 0x30 0xd5
+0xe9 0xc 0x30 0xd5
+0xe9 0xd 0x30 0xd5
+0xe9 0xe 0x30 0xd5
+0xe9 0xf 0x30 0xd5
+0x9 0x10 0x30 0xd5
+0x9 0x10 0x32 0xd5
+0x89 0x11 0x30 0xd5
+0x89 0x13 0x30 0xd5
+0x89 0x14 0x30 0xd5
+0xc9 0x78 0x30 0xd5
+0xc9 0x79 0x30 0xd5
+0xc9 0x7e 0x30 0xd5
+0x9 0x0 0x38 0xd5
+0x9 0x0 0x39 0xd5
+0x9 0x0 0x3a 0xd5
+0x9 0x0 0x3c 0xd5
+0x29 0x0 0x39 0xd5
+0x29 0x0 0x3b 0xd5
+0xa9 0x0 0x38 0xd5
+0xa9 0x0 0x3c 0xd5
+0xc9 0x0 0x38 0xd5
+0xe9 0x0 0x39 0xd5
+0xe9 0x0 0x3b 0xd5
+0x9 0x1 0x38 0xd5
+0x29 0x1 0x38 0xd5
+0x49 0x1 0x38 0xd5
+0x69 0x1 0x38 0xd5
+0x89 0x1 0x38 0xd5
+0xa9 0x1 0x38 0xd5
+0xc9 0x1 0x38 0xd5
+0xe9 0x1 0x38 0xd5
+0x9 0x2 0x38 0xd5
+0x29 0x2 0x38 0xd5
+0x49 0x2 0x38 0xd5
+0x69 0x2 0x38 0xd5
+0x89 0x2 0x38 0xd5
+0xa9 0x2 0x38 0xd5
+0x9 0x3 0x38 0xd5
+0x29 0x3 0x38 0xd5
+0x49 0x3 0x38 0xd5
+0x9 0x4 0x38 0xd5
+0x29 0x4 0x38 0xd5
+0x9 0x5 0x38 0xd5
+0x29 0x5 0x38 0xd5
+0x89 0x5 0x38 0xd5
+0xa9 0x5 0x38 0xd5
+0x9 0x6 0x38 0xd5
+0x29 0x6 0x38 0xd5
+0x9 0x7 0x38 0xd5
+0x29 0x7 0x38 0xd5
+0x9 0x10 0x38 0xd5
+0x9 0x10 0x3c 0xd5
+0x9 0x10 0x3e 0xd5
+0x29 0x10 0x38 0xd5
+0x29 0x10 0x3c 0xd5
+0x29 0x10 0x3e 0xd5
+0x49 0x10 0x38 0xd5
+0x9 0x11 0x3c 0xd5
+0x9 0x11 0x3e 0xd5
+0x29 0x11 0x3c 0xd5
+0x29 0x11 0x3e 0xd5
+0x49 0x11 0x3c 0xd5
+0x49 0x11 0x3e 0xd5
+0x69 0x11 0x3c 0xd5
+0xe9 0x11 0x3c 0xd5
+0x29 0x13 0x3e 0xd5
+0x9 0x20 0x38 0xd5
+0x9 0x20 0x3c 0xd5
+0x9 0x20 0x3e 0xd5
+0x29 0x20 0x38 0xd5
+0x49 0x20 0x38 0xd5
+0x49 0x20 0x3c 0xd5
+0x49 0x20 0x3e 0xd5
+0x9 0x21 0x3c 0xd5
+0x49 0x21 0x3c 0xd5
+0x9 0x30 0x3c 0xd5
+0x9 0x40 0x38 0xd5
+0x9 0x40 0x3c 0xd5
+0x9 0x40 0x3e 0xd5
+0x29 0x40 0x38 0xd5
+0x29 0x40 0x3c 0xd5
+0x29 0x40 0x3e 0xd5
+0x9 0x41 0x38 0xd5
+0x9 0x41 0x3c 0xd5
+0x9 0x41 0x3e 0xd5
+0x9 0x42 0x38 0xd5
+0x9 0x42 0x3b 0xd5
+0x29 0x42 0x3b 0xd5
+0x49 0x42 0x38 0xd5
+0x9 0x43 0x3c 0xd5
+0x29 0x43 0x3c 0xd5
+0x49 0x43 0x3c 0xd5
+0x69 0x43 0x3c 0xd5
+0x9 0x44 0x3b 0xd5
+0x29 0x44 0x3b 0xd5
+0x9 0x45 0x3b 0xd5
+0x29 0x45 0x3b 0xd5
+0x29 0x50 0x3c 0xd5
+0x9 0x51 0x38 0xd5
+0x9 0x51 0x3c 0xd5
+0x9 0x51 0x3e 0xd5
+0x29 0x51 0x38 0xd5
+0x29 0x51 0x3c 0xd5
+0x29 0x51 0x3e 0xd5
+0x9 0x52 0x38 0xd5
+0x9 0x52 0x3c 0xd5
+0x9 0x52 0x3e 0xd5
+0x9 0x53 0x3c 0xd5
+0x9 0x60 0x38 0xd5
+0x9 0x60 0x3c 0xd5
+0x9 0x60 0x3e 0xd5
+0x89 0x60 0x3c 0xd5
+0x9 0x74 0x38 0xd5
+0x9 0x9c 0x3b 0xd5
+0x29 0x9c 0x3b 0xd5
+0x49 0x9c 0x3b 0xd5
+0x69 0x9c 0x3b 0xd5
+0xa9 0x9c 0x3b 0xd5
+0xc9 0x9c 0x3b 0xd5
+0xe9 0x9c 0x3b 0xd5
+0x9 0x9d 0x3b 0xd5
+0x29 0x9d 0x3b 0xd5
+0x49 0x9d 0x3b 0xd5
+0x9 0x9e 0x3b 0xd5
+0x29 0x9e 0x38 0xd5
+0x49 0x9e 0x38 0xd5
+0x69 0x9e 0x3b 0xd5
+0x9 0xa2 0x38 0xd5
+0x9 0xa2 0x3c 0xd5
+0x9 0xa2 0x3e 0xd5
+0x9 0xa3 0x38 0xd5
+0x9 0xa3 0x3c 0xd5
+0x9 0xa3 0x3e 0xd5
+0x9 0xc0 0x38 0xd5
+0x9 0xc0 0x3c 0xd5
+0x9 0xc0 0x3e 0xd5
+0x29 0xc0 0x38 0xd5
+0x29 0xc0 0x3c 0xd5
+0x29 0xc0 0x3e 0xd5
+0x49 0xc0 0x38 0xd5
+0x49 0xc0 0x3c 0xd5
+0x49 0xc0 0x3e 0xd5
+0x9 0xc1 0x38 0xd5
+0x29 0xd0 0x38 0xd5
+0x49 0xd0 0x3b 0xd5
+0x49 0xd0 0x3c 0xd5
+0x49 0xd0 0x3e 0xd5
+0x69 0xd0 0x3b 0xd5
+0x89 0xd0 0x38 0xd5
+0x9 0xe0 0x3b 0xd5
+0x29 0xe0 0x3b 0xd5
+0x49 0xe0 0x3b 0xd5
+0x69 0xe0 0x3c 0xd5
+0x9 0xe1 0x38 0xd5
+0x9 0xe1 0x3c 0xd5
+0x9 0xe2 0x3b 0xd5
+0x9 0xe2 0x3c 0xd5
+0x9 0xe2 0x3f 0xd5
+0x29 0xe2 0x3b 0xd5
+0x29 0xe2 0x3c 0xd5
+0x29 0xe2 0x3f 0xd5
+0x49 0xe2 0x3b 0xd5
+0x49 0xe2 0x3c 0xd5
+0x49 0xe2 0x3f 0xd5
+0x9 0xe3 0x3b 0xd5
+0x29 0xe3 0x3b 0xd5
+0x49 0xe3 0x3b 0xd5
+0x9 0xe8 0x3b 0xd5
+0x29 0xe8 0x3b 0xd5
+0x49 0xe8 0x3b 0xd5
+0x69 0xe8 0x3b 0xd5
+0x89 0xe8 0x3b 0xd5
+0xa9 0xe8 0x3b 0xd5
+0xc9 0xe8 0x3b 0xd5
+0xe9 0xe8 0x3b 0xd5
+0x9 0xe9 0x3b 0xd5
+0x29 0xe9 0x3b 0xd5
+0x49 0xe9 0x3b 0xd5
+0x69 0xe9 0x3b 0xd5
+0x89 0xe9 0x3b 0xd5
+0xa9 0xe9 0x3b 0xd5
+0xc9 0xe9 0x3b 0xd5
+0xe9 0xe9 0x3b 0xd5
+0x9 0xea 0x3b 0xd5
+0x29 0xea 0x3b 0xd5
+0x49 0xea 0x3b 0xd5
+0x69 0xea 0x3b 0xd5
+0x89 0xea 0x3b 0xd5
+0xa9 0xea 0x3b 0xd5
+0xc9 0xea 0x3b 0xd5
+0xe9 0xea 0x3b 0xd5
+0x9 0xeb 0x3b 0xd5
+0x29 0xeb 0x3b 0xd5
+0x49 0xeb 0x3b 0xd5
+0x69 0xeb 0x3b 0xd5
+0x89 0xeb 0x3b 0xd5
+0xa9 0xeb 0x3b 0xd5
+0xc9 0xeb 0x3b 0xd5
+0xe9 0xef 0x3b 0xd5
+0x9 0xec 0x3b 0xd5
+0x29 0xec 0x3b 0xd5
+0x49 0xec 0x3b 0xd5
+0x69 0xec 0x3b 0xd5
+0x89 0xec 0x3b 0xd5
+0xa9 0xec 0x3b 0xd5
+0xc9 0xec 0x3b 0xd5
+0xe9 0xec 0x3b 0xd5
+0x9 0xed 0x3b 0xd5
+0x29 0xed 0x3b 0xd5
+0x49 0xed 0x3b 0xd5
+0x69 0xed 0x3b 0xd5
+0x89 0xed 0x3b 0xd5
+0xa9 0xed 0x3b 0xd5
+0xc9 0xed 0x3b 0xd5
+0xe9 0xed 0x3b 0xd5
+0x9 0xee 0x3b 0xd5
+0x29 0xee 0x3b 0xd5
+0x49 0xee 0x3b 0xd5
+0x69 0xee 0x3b 0xd5
+0x89 0xee 0x3b 0xd5
+0xa9 0xee 0x3b 0xd5
+0xc9 0xee 0x3b 0xd5
+0xe9 0xee 0x3b 0xd5
+0x9 0xef 0x3b 0xd5
+0x29 0xef 0x3b 0xd5
+0x49 0xef 0x3b 0xd5
+0x69 0xef 0x3b 0xd5
+0x89 0xef 0x3b 0xd5
+0xa9 0xef 0x3b 0xd5
+0xc9 0xef 0x3b 0xd5
+
+# CHECK: mrs     x12, s3_7_c15_c1_5
+# CHECK: mrs     x13, s3_2_c11_c15_7
+# CHECK: msr     s3_0_c15_c0_0, x12
+# CHECK: msr     s3_7_c11_c13_7, x5
+0xac 0xf1 0x3f 0xd5
+0xed 0xbf 0x3a 0xd5
+0x0c 0xf0 0x18 0xd5
+0xe5 0xbd 0x1f 0xd5
+
+#------------------------------------------------------------------------------
+# Test and branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: tbz     x12, #62, #0
+# CHECK: tbz     x12, #62, #4
+# CHECK: tbz     x12, #62, #-32768
+# CHECK: tbnz    x12, #60, #32764
+0x0c 0x00 0xf0 0xb6
+0x2c 0x00 0xf0 0xb6
+0x0c 0x00 0xf4 0xb6
+0xec 0xff 0xe3 0xb7
+
+#------------------------------------------------------------------------------
+# Unconditional branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: b        #4
+# CHECK: b        #-4
+# CHECK: b        #134217724
+0x01 0x00 0x00 0x14
+0xff 0xff 0xff 0x17
+0xff 0xff 0xff 0x15
+
+#------------------------------------------------------------------------------
+# Unconditional branch (register)
+#------------------------------------------------------------------------------
+
+# CHECK: br       x20
+# CHECK: blr      xzr
+# CHECK: ret      x10
+0x80 0x2 0x1f 0xd6
+0xe0 0x3 0x3f 0xd6
+0x40 0x1 0x5f 0xd6
+
+# CHECK: ret
+# CHECK: eret
+# CHECK: drps
+0xc0 0x3 0x5f 0xd6
+0xe0 0x3 0x9f 0xd6
+0xe0 0x3 0xbf 0xd6
+
diff --git a/test/MC/Disassembler/AArch64/basic-a64-undefined.txt b/test/MC/Disassembler/AArch64/basic-a64-undefined.txt
new file mode 100644
index 0000000000..a17579cb16
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/basic-a64-undefined.txt
@@ -0,0 +1,43 @@
+# These spawn another process so they're rather expensive. Not many.
+
+# Instructions notionally in the add/sub (extended register) sheet, but with
+# invalid shift amount or "opt" field.
+# RUN: echo "0x00 0x10 0xa0 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x10 0x60 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x14 0x20 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the add/sub (immediate) sheet, but with
+# invalid "shift" field.
+# RUN: echo "0xdf 0x3 0x80 0x91" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0xed 0x8e 0xc4 0x31" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x62 0xfc 0xbf 0x11" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x3 0xff 0xff 0x91" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the load/store (unsigned immediate) sheet.
+# Only unallocated (int-register) variants are: opc=0b11, size=0b10, 0b11
+# RUN: echo "0xd7 0xfc 0xff 0xb9" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0xd7 0xfc 0xcf 0xf9" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the floating-point <-> fixed-point conversion
+# Scale field is 64-<imm> and <imm> should be 1-32 for a 32-bit int register.
+# RUN: echo "0x23 0x01 0x18 0x1e" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x23 0x25 0x42 0x1e" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the logical (shifted register) sheet, but with out
+# of range shift: w-registers can only have 0-31.
+# RUN: echo "0x00 0x80 0x00 0x0a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the move wide (immediate) sheet, but with out
+# of range shift: w-registers can only have 0 or 16.
+# RUN: echo "0x00 0x00 0xc0 0x12" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x12 0x34 0xe0 0x52" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Data-processing instructions are undefined when S=1 and for the 0b0000111 value in opcode:sf
+# RUN: echo "0x00 0x00 0xc0 0x5f" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x56 0x0c 0xc0 0x5a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Data-processing instructions (2 source) are undefined for a value of 0001xx:0:x or 0011xx:0:x for opcode:S:sf
+# RUN: echo "0x00 0x30 0xc1 0x1a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x10 0xc1 0x1a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
new file mode 100644
index 0000000000..adb8f75ed9
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
@@ -0,0 +1,96 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+#------------------------------------------------------------------------------
+# Load-store exclusive
+#------------------------------------------------------------------------------
+
+#ldxp x14, x14, [sp]
+0xee 0x3b 0x7f 0xc8
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0xee 0x3b 0x7f 0xc8
+
+#ldaxp w19, w19, [x1]
+0x33 0xcc 0x7f 0x88
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x33 0xcc 0x7f 0x88
+
+#------------------------------------------------------------------------------
+# Load-store register (immediate post-indexed)
+#------------------------------------------------------------------------------
+
+0x63 0x44 0x40 0xf8
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x63 0x44 0x40 0xf8
+
+0x42 0x14 0xc0 0x38
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x42 0x14 0xc0 0x38
+
+#------------------------------------------------------------------------------
+# Load-store register (immediate pre-indexed)
+#------------------------------------------------------------------------------
+
+0x63 0x4c 0x40 0xf8
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x63 0x4c 0x40 0xf8
+
+0x42 0x1c 0xc0 0x38
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x42 0x1c 0xc0 0x38
+
+#------------------------------------------------------------------------------
+# Load-store register pair (offset)
+#------------------------------------------------------------------------------
+
+# Unpredictable if Rt == Rt2 on a load.
+
+0xe3 0x0f 0x40 0xa9
+# CHECK:  warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe3 0x0f 0x40 0xa9
+# CHECK-NEXT: ^
+
+0xe2 0x8b 0x41 0x69
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe2 0x8b 0x41 0x69
+# CHECK-NEXT: ^
+
+0x82 0x88 0x40 0x2d
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x82 0x88 0x40 0x2d
+# CHECK-NEXT: ^
+
+#------------------------------------------------------------------------------
+# Load-store register pair (post-indexed)
+#------------------------------------------------------------------------------
+
+# Unpredictable if Rt == Rt2 on a load.
+
+0xe3 0x0f 0xc0 0xa8
+# CHECK:  warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe3 0x0f 0xc0 0xa8
+# CHECK-NEXT: ^
+
+0xe2 0x8b 0xc1 0x68
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe2 0x8b 0xc1 0x68
+# CHECK-NEXT: ^
+
+0x82 0x88 0xc0 0x2c
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x82 0x88 0xc0 0x2c
+# CHECK-NEXT: ^
+
+# Also unpredictable if writeback clashes with either transfer register
+
+0x63 0x94 0xc0 0xa8
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x63 0x94 0xc0 0xa8
+
+0x69 0x2d 0x81 0xa8
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x69 0x2d 0x81 0xa8
+
+0x29 0xad 0xc0 0x28
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x29 0xad 0xc0 0x28
+
diff --git a/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt b/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt
new file mode 100644
index 0000000000..7ff495f499
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+# Stores are OK.
+0xe0 0x83 0x00 0xa9
+# CHECK-NOT: potentially undefined instruction encoding
+# CHECK: stp x0, x0, [sp, #8]
+
diff --git a/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
new file mode 100644
index 0000000000..775660bba8
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+# None of these instructions should be classified as unpredictable:
+
+# CHECK-NOT: potentially undefined instruction encoding
+
+# Stores from duplicated registers should be fine.
+0xe3 0x0f 0x80 0xa8
+# CHECK: stp x3, x3, [sp], #0
+
+# d5 != x5 so "ldp d5, d6, [x5], #24" is fine.
+0xa5 0x98 0xc1 0x6c
+# CHECK: ldp d5, d6, [x5], #24
+
+# xzr != sp so "stp xzr, xzr, [sp], #8" is fine.
+0xff 0xff 0x80 0xa8
+# CHECK: stp xzr, xzr, [sp], #8
diff --git a/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
new file mode 100644
index 0000000000..48ea8170ba
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+# None of these instructions should be classified as unpredictable:
+
+# CHECK-NOT: potentially undefined instruction encoding
+
+# Stores from duplicated registers should be fine.
+0xe3 0x0f 0x80 0xa9
+# CHECK: stp x3, x3, [sp, #0]!
+
+# d5 != x5 so "ldp d5, d6, [x5, #24]!" is fine.
+0xa5 0x98 0xc1 0x6d
+# CHECK: ldp d5, d6, [x5, #24]!
+
+# xzr != sp so "stp xzr, xzr, [sp, #8]!" is fine.
+0xff 0xff 0x80 0xa9
+# CHECK: stp xzr, xzr, [sp, #8]!
diff --git a/test/MC/Disassembler/AArch64/lit.local.cfg b/test/MC/Disassembler/AArch64/lit.local.cfg
new file mode 100644
index 0000000000..f9df30e4d3
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.txt']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index 2d11d2480d..5a2a41b456 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -127,8 +127,9 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
 
   // ARM and Thumb have a CHECK() macro to deal with DecodeStatuses.
   if (Target.getName() == "ARM" ||
-      Target.getName() == "Thumb") {
-    EmitFixedLenDecoder(Records, OS, "ARM",
+      Target.getName() == "Thumb" || 
+      Target.getName() == "AArch64") {
+    EmitFixedLenDecoder(Records, OS, Target.getName() == "AArch64" ? "AArch64" : "ARM",
                         "if (!Check(S, ", ")) return MCDisassembler::Fail;",
                         "S", "MCDisassembler::Fail",
                         "  MCDisassembler::DecodeStatus S = "