Table-driven disassembler for the X86 architecture (16-, 32-, and 64-bit

incarnations), integrated into the MC framework. The disassembler is table-driven, using a custom TableGen backend to generate hierarchical tables optimized for fast decode. The disassembler consumes MemoryObjects and produces arrays of MCInsts, adhering to the abstract base class MCDisassembler (llvm/MC/MCDisassembler.h). The disassembler is documented in detail in - lib/Target/X86/Disassembler/X86Disassembler.cpp (disassembler runtime) - utils/TableGen/DisassemblerEmitter.cpp (table emitter) You can test the disassembler by running llvm-mc -disassemble for i386 or x86_64 targets. Please let me know if you encounter any problems with it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@91749 91177308-0d34-0410-b5e6-96231b3b80d8
author: Sean Callanan <scallanan@apple.com> 2009-12-19 02:59:52 +0000
committer: Sean Callanan <scallanan@apple.com> 2009-12-19 02:59:52 +0000
commit: 8ed9f51663bc5533f36ca62e5668ae08e9a1313f (patch)
tree: 3054645839caee367e9403507d8487538819ed5b /lib/Target
parent: e9ec6ad1ba5fd9ad70f5d0c059c5a5aa44f501f7 (diff)
download: llvm-8ed9f51663bc5533f36ca62e5668ae08e9a1313f.tar.gz
llvm-8ed9f51663bc5533f36ca62e5668ae08e9a1313f.tar.bz2
llvm-8ed9f51663bc5533f36ca62e5668ae08e9a1313f.tar.xz
9 files changed, 2823 insertions, 5 deletions
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 3ad65fbedc..4186fecf4e 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS X86.td)
 tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header)
 tablegen(X86GenRegisterNames.inc -gen-register-enums)
 tablegen(X86GenRegisterInfo.inc -gen-register-desc)
+tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
 tablegen(X86GenInstrNames.inc -gen-instr-enums)
 tablegen(X86GenInstrInfo.inc -gen-instr-desc)
 tablegen(X86GenAsmWriter.inc -gen-asm-writer)
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index b329e897b9..2a83a9c268 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -2,5 +2,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 
 add_llvm_library(LLVMX86Disassembler
   X86Disassembler.cpp
+  X86DisassemblerDecoder.c
   )
 add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 2ebbc9bdbd..99617e7a40 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -6,18 +6,450 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains code to translate the data produced by the decoder into
+//  MCInsts.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
 
+#include "X86Disassembler.h"
+#include "X86DisassemblerDecoder.h"
+#include "X86InstrInfo.h"
+
+#include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Target/TargetRegistry.h"
-#include "X86.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
+using namespace llvm::X86Disassembler;
+
+namespace llvm {  
+  
+// Fill-ins to make the compiler happy.  These constants are never actually
+//   assigned; they are just filler to make an automatically-generated switch
+//   statement work.
+namespace X86 {
+  enum {
+    BX_SI = 500,
+    BX_DI = 501,
+    BP_SI = 502,
+    BP_DI = 503,
+    sib   = 504,
+    sib64 = 505
+  };
+}
+
+}
+
+static void translateInstruction(MCInst &target,
+                                 InternalInstruction &source);
+
+X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
+    MCDisassembler(),
+    fMode(mode) {
+}
+
+X86GenericDisassembler::~X86GenericDisassembler() {
+}
+
+/// regionReader - a callback function that wraps the readByte method from
+///   MemoryObject.
+///
+/// @param arg      - The generic callback parameter.  In this case, this should
+///                   be a pointer to a MemoryObject.
+/// @param byte     - A pointer to the byte to be read.
+/// @param address  - The address to be read.
+static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
+  MemoryObject* region = static_cast<MemoryObject*>(arg);
+  return region->readByte(address, byte);
+}
+
+/// logger - a callback function that wraps the operator<< method from
+///   raw_ostream.
+///
+/// @param arg      - The generic callback parameter.  This should be a pointe
+///                   to a raw_ostream.
+/// @param log      - A string to be logged.  logger() adds a newline.
+static void logger(void* arg, const char* log) {
+  if (!arg)
+    return;
+  
+  raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
+  vStream << log << "\n";
+}  
+  
+//
+// Public interface for the disassembler
+//
+
+bool X86GenericDisassembler::getInstruction(MCInst &instr,
+                                            uint64_t &size,
+                                            const MemoryObject &region,
+                                            uint64_t address,
+                                            raw_ostream &vStream) const {
+  InternalInstruction internalInstr;
+  
+  int ret = decodeInstruction(&internalInstr,
+                              regionReader,
+                              (void*)&region,
+                              logger,
+                              (void*)&vStream,
+                              address,
+                              fMode);
+
+  if(ret) {
+    size = internalInstr.readerCursor - address;
+    return false;
+  }
+  else {
+    size = internalInstr.length;
+    translateInstruction(instr, internalInstr);
+    return true;
+  }
+}
+
+//
+// Private code that translates from struct InternalInstructions to MCInsts.
+//
+
+/// translateRegister - Translates an internal register to the appropriate LLVM
+///   register, and appends it as an operand to an MCInst.
+///
+/// @param mcInst     - The MCInst to append to.
+/// @param reg        - The Reg to append.
+static void translateRegister(MCInst &mcInst, Reg reg) {
+#define ENTRY(x) X86::x,
+  uint8_t llvmRegnums[] = {
+    ALL_REGS
+    0
+  };
+#undef ENTRY
+
+  uint8_t llvmRegnum = llvmRegnums[reg];
+  mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
+}
+
+/// translateImmediate  - Appends an immediate operand to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param immediate    - The immediate value to append.
+static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
+  mcInst.addOperand(MCOperand::CreateImm(immediate));
+}
+
+/// translateRMRegister - Translates a register stored in the R/M field of the
+///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
+/// @param mcInst       - The MCInst to append to.
+/// @param insn         - The internal instruction to extract the R/M field
+///                       from.
+static void translateRMRegister(MCInst &mcInst,
+                                InternalInstruction &insn) {
+  assert(insn.eaBase != EA_BASE_sib && insn.eaBase != EA_BASE_sib64 && 
+         "A R/M register operand may not have a SIB byte");
+  
+  switch (insn.eaBase) {
+  case EA_BASE_NONE:
+    llvm_unreachable("EA_BASE_NONE for ModR/M base");
+    break;
+#define ENTRY(x) case EA_BASE_##x:
+  ALL_EA_BASES
+#undef ENTRY
+    llvm_unreachable("A R/M register operand may not have a base; "
+                     "the operand must be a register.");
+    break;
+#define ENTRY(x)                                                        \
+  case EA_REG_##x:                                                    \
+    mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
+  ALL_REGS
+#undef ENTRY
+  default:
+    llvm_unreachable("Unexpected EA base register");
+  }
+}
+
+/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
+///   fields of an internal instruction (and possibly its SIB byte) to a memory
+///   operand in LLVM's format, and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
+///                       from.
+static void translateRMMemory(MCInst &mcInst,
+                              InternalInstruction &insn) {
+  // Addresses in an MCInst are represented as five operands:
+  //   1. basereg       (register)  The R/M base, or (if there is a SIB) the 
+  //                                SIB base
+  //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified 
+  //                                scale amount
+  //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
+  //                                the index (which is multiplied by the 
+  //                                scale amount)
+  //   4. displacement  (immediate) 0, or the displacement if there is one
+  //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
+  //                                if we have segment overrides
+  
+  MCOperand baseReg;
+  MCOperand scaleAmount;
+  MCOperand indexReg;
+  MCOperand displacement;
+  MCOperand segmentReg;
+  
+  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+    if (insn.sibBase != SIB_BASE_NONE) {
+      switch (insn.sibBase) {
+      default:
+        llvm_unreachable("Unexpected sibBase");
+#define ENTRY(x)                                          \
+      case SIB_BASE_##x:                                \
+        baseReg = MCOperand::CreateReg(X86::x); break;
+      ALL_SIB_BASES
+#undef ENTRY
+      }
+    } else {
+      baseReg = MCOperand::CreateReg(0);
+    }
+    
+    if (insn.sibIndex != SIB_INDEX_NONE) {
+      switch (insn.sibIndex) {
+      default:
+        llvm_unreachable("Unexpected sibIndex");
+#define ENTRY(x)                                            \
+      case SIB_INDEX_##x:                                 \
+        indexReg = MCOperand::CreateReg(X86::x); break;
+      EA_BASES_32BIT
+      EA_BASES_64BIT
+#undef ENTRY
+      }
+    } else {
+      indexReg = MCOperand::CreateReg(0);
+    }
+    
+    scaleAmount = MCOperand::CreateImm(insn.sibScale);
+  } else {
+    switch (insn.eaBase) {
+    case EA_BASE_NONE:
+      assert(insn.eaDisplacement != EA_DISP_NONE && 
+             "EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
+      
+      if (insn.mode == MODE_64BIT)
+        baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
+      else
+        baseReg = MCOperand::CreateReg(0);
+      
+      indexReg = MCOperand::CreateReg(0);
+      break;
+    case EA_BASE_BX_SI:
+      baseReg = MCOperand::CreateReg(X86::BX);
+      indexReg = MCOperand::CreateReg(X86::SI);
+      break;
+    case EA_BASE_BX_DI:
+      baseReg = MCOperand::CreateReg(X86::BX);
+      indexReg = MCOperand::CreateReg(X86::DI);
+      break;
+    case EA_BASE_BP_SI:
+      baseReg = MCOperand::CreateReg(X86::BP);
+      indexReg = MCOperand::CreateReg(X86::SI);
+      break;
+    case EA_BASE_BP_DI:
+      baseReg = MCOperand::CreateReg(X86::BP);
+      indexReg = MCOperand::CreateReg(X86::DI);
+      break;
+    default:
+      indexReg = MCOperand::CreateReg(0);
+      switch (insn.eaBase) {
+      default:
+        llvm_unreachable("Unexpected eaBase");
+        break;
+        // Here, we will use the fill-ins defined above.  However,
+        //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
+        //   sib and sib64 were handled in the top-level if, so they're only
+        //   placeholders to keep the compiler happy.
+#define ENTRY(x)                                        \
+      case EA_BASE_##x:                                 \
+        baseReg = MCOperand::CreateReg(X86::x); break; 
+      ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) case EA_REG_##x:
+      ALL_REGS
+#undef ENTRY
+        llvm_unreachable("A R/M memory operand may not be a register; "
+                         "the base field must be a base.");
+            break;
+      }
+    }
+  }
+  
+  displacement = MCOperand::CreateImm(insn.displacement);
+  
+  static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
+    0,        // SEG_OVERRIDE_NONE
+    X86::CS,
+    X86::SS,
+    X86::DS,
+    X86::ES,
+    X86::FS,
+    X86::GS
+  };
+  
+  segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
+  
+  mcInst.addOperand(baseReg);
+  mcInst.addOperand(scaleAmount);
+  mcInst.addOperand(indexReg);
+  mcInst.addOperand(displacement);
+  mcInst.addOperand(segmentReg);
+}
+
+/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
+///   byte of an instruction to LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param operand      - The operand, as stored in the descriptor table.
+/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
+///                       from.
+static void translateRM(MCInst &mcInst,
+                        OperandSpecifier &operand,
+                        InternalInstruction &insn) {
+  switch (operand.type) {
+  default:
+    llvm_unreachable("Unexpected type for a R/M operand");
+  case TYPE_R8:
+  case TYPE_R16:
+  case TYPE_R32:
+  case TYPE_R64:
+  case TYPE_Rv:
+  case TYPE_MM:
+  case TYPE_MM32:
+  case TYPE_MM64:
+  case TYPE_XMM:
+  case TYPE_XMM32:
+  case TYPE_XMM64:
+  case TYPE_XMM128:
+  case TYPE_DEBUGREG:
+  case TYPE_CR32:
+  case TYPE_CR64:
+    translateRMRegister(mcInst, insn);
+    break;
+  case TYPE_M:
+  case TYPE_M8:
+  case TYPE_M16:
+  case TYPE_M32:
+  case TYPE_M64:
+  case TYPE_M128:
+  case TYPE_M512:
+  case TYPE_Mv:
+  case TYPE_M32FP:
+  case TYPE_M64FP:
+  case TYPE_M80FP:
+  case TYPE_M16INT:
+  case TYPE_M32INT:
+  case TYPE_M64INT:
+  case TYPE_M1616:
+  case TYPE_M1632:
+  case TYPE_M1664:
+    translateRMMemory(mcInst, insn);
+    break;
+  }
+}
+  
+/// translateFPRegister - Translates a stack position on the FPU stack to its
+///   LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param stackPos     - The stack position to translate.
+static void translateFPRegister(MCInst &mcInst,
+                                uint8_t stackPos) {
+  assert(stackPos < 8 && "Invalid FP stack position");
+  
+  mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
+}
+
+/// translateOperand - Translates an operand stored in an internal instruction 
+///   to LLVM's format and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param operand      - The operand, as stored in the descriptor table.
+/// @param insn         - The internal instruction.
+static void translateOperand(MCInst &mcInst,
+                             OperandSpecifier &operand,
+                             InternalInstruction &insn) {
+  switch (operand.encoding) {
+  default:
+    llvm_unreachable("Unhandled operand encoding during translation");
+  case ENCODING_REG:
+    translateRegister(mcInst, insn.reg);
+    break;
+  case ENCODING_RM:
+    translateRM(mcInst, operand, insn);
+    break;
+  case ENCODING_CB:
+  case ENCODING_CW:
+  case ENCODING_CD:
+  case ENCODING_CP:
+  case ENCODING_CO:
+  case ENCODING_CT:
+    llvm_unreachable("Translation of code offsets isn't supported.");
+  case ENCODING_IB:
+  case ENCODING_IW:
+  case ENCODING_ID:
+  case ENCODING_IO:
+  case ENCODING_Iv:
+  case ENCODING_Ia:
+    translateImmediate(mcInst, 
+                       insn.immediates[insn.numImmediatesTranslated++]);
+    break;
+  case ENCODING_RB:
+  case ENCODING_RW:
+  case ENCODING_RD:
+  case ENCODING_RO:
+    translateRegister(mcInst, insn.opcodeRegister);
+    break;
+  case ENCODING_I:
+    translateFPRegister(mcInst, insn.opcodeModifier);
+    break;
+  case ENCODING_Rv:
+    translateRegister(mcInst, insn.opcodeRegister);
+    break;
+  case ENCODING_DUP:
+    translateOperand(mcInst,
+                     insn.spec->operands[operand.type - TYPE_DUP0],
+                     insn);
+    break;
+  }
+}
+  
+/// translateInstruction - Translates an internal instruction and all its
+///   operands to an MCInst.
+///
+/// @param mcInst       - The MCInst to populate with the instruction's data.
+/// @param insn         - The internal instruction.
+static void translateInstruction(MCInst &mcInst,
+                                 InternalInstruction &insn) {  
+  assert(insn.spec);
+  
+  mcInst.setOpcode(insn.instructionID);
+  
+  int index;
+  
+  insn.numImmediatesTranslated = 0;
+  
+  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+    if (insn.spec->operands[index].encoding != ENCODING_NONE)                
+      translateOperand(mcInst, insn.spec->operands[index], insn);
+  }
+}
 
 static const MCDisassembler *createX86_32Disassembler(const Target &T) {
-  return 0;
+  return new X86Disassembler::X86_32Disassembler;
 }
 
 static const MCDisassembler *createX86_64Disassembler(const Target &T) {
-  return 0; 
+  return new X86Disassembler::X86_64Disassembler;
 }
 
 extern "C" void LLVMInitializeX86Disassembler() { 
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
new file mode 100644
index 0000000000..0e6e0b0e51
--- /dev/null
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -0,0 +1,150 @@
+//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
+// 64-bit X86 instruction sets.  The main decode sequence for an assembly
+// instruction in this disassembler is:
+//
+// 1. Read the prefix bytes and determine the attributes of the instruction.
+//    These attributes, recorded in enum attributeBits
+//    (X86DisassemblerDecoderCommon.h), form a bitmask.  The table CONTEXTS_SYM
+//    provides a mapping from bitmasks to contexts, which are represented by
+//    enum InstructionContext (ibid.).
+//
+// 2. Read the opcode, and determine what kind of opcode it is.  The
+//    disassembler distinguishes four kinds of opcodes, which are enumerated in
+//    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
+//    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a 
+//    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context.
+//
+// 3. Depending on the opcode type, look in one of four ClassDecision structures
+//    (X86DisassemblerDecoderCommon.h).  Use the opcode class to determine which
+//    OpcodeDecision (ibid.) to look the opcode in.  Look up the opcode, to get
+//    a ModRMDecision (ibid.).
+//
+// 4. Some instructions, such as escape opcodes or extended opcodes, or even
+//    instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
+//    ModR/M byte to complete decode.  The ModRMDecision's type is an entry from
+//    ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
+//    ModR/M byte is required and how to interpret it.
+//
+// 5. After resolving the ModRMDecision, the disassembler has a unique ID
+//    of type InstrUID (X86DisassemblerDecoderCommon.h).  Looking this ID up in
+//    INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
+//    meanings of its operands.
+//
+// 6. For each operand, its encoding is an entry from OperandEncoding
+//    (X86DisassemblerDecoderCommon.h) and its type is an entry from
+//    OperandType (ibid.).  The encoding indicates how to read it from the
+//    instruction; the type indicates how to interpret the value once it has
+//    been read.  For example, a register operand could be stored in the R/M
+//    field of the ModR/M byte, the REG field of the ModR/M byte, or added to
+//    the main opcode.  This is orthogonal from its meaning (an GPR or an XMM
+//    register, for instance).  Given this information, the operands can be
+//    extracted and interpreted.
+//
+// 7. As the last step, the disassembler translates the instruction information
+//    and operands into a format understandable by the client - in this case, an
+//    MCInst for use by the MC infrastructure.
+//
+// The disassembler is broken broadly into two parts: the table emitter that
+// emits the instruction decode tables discussed above during compilation, and
+// the disassembler itself.  The table emitter is documented in more detail in
+// utils/TableGen/X86DisassemblerEmitter.h.
+//
+// X86Disassembler.h contains the public interface for the disassembler,
+//   adhering to the MCDisassembler interface.
+// X86Disassembler.cpp contains the code responsible for step 7, and for
+//   invoking the decoder to execute steps 1-6.
+// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
+//   table emitter and the disassembler.
+// X86DisassemblerDecoder.h contains the public interface of the decoder,
+//   factored out into C for possible use by other projects.
+// X86DisassemblerDecoder.c contains the source code of the decoder, which is
+//   responsible for steps 1-6.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86DISASSEMBLER_H
+#define X86DISASSEMBLER_H
+
+#define INSTRUCTION_SPECIFIER_FIELDS  \
+  const char*             name;
+
+#define INSTRUCTION_IDS               \
+  InstrUID*  instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+  
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+  
+namespace X86Disassembler {
+
+/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
+///   All each platform class should have to do is subclass the constructor, and
+///   provide a different disassemblerMode value.
+class X86GenericDisassembler : public MCDisassembler {
+protected:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  /// @param mode     - The X86 architecture mode to decode for.
+  X86GenericDisassembler(DisassemblerMode mode);
+public:
+  ~X86GenericDisassembler();
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+private:
+  DisassemblerMode              fMode;
+};
+
+/// X86_16Disassembler - 16-bit X86 disassembler.
+class X86_16Disassembler : public X86GenericDisassembler {
+public:
+  X86_16Disassembler() :
+    X86GenericDisassembler(MODE_16BIT) {
+  }
+};  
+
+/// X86_16Disassembler - 32-bit X86 disassembler.
+class X86_32Disassembler : public X86GenericDisassembler {
+public:
+  X86_32Disassembler() :
+    X86GenericDisassembler(MODE_32BIT) {
+  }
+};
+
+/// X86_16Disassembler - 64-bit X86 disassembler.
+class X86_64Disassembler : public X86GenericDisassembler {
+public:
+  X86_64Disassembler() :
+    X86GenericDisassembler(MODE_64BIT) {
+  }
+};
+
+} // namespace X86Disassembler
+  
+} // namespace llvm
+  
+#endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
new file mode 100644
index 0000000000..99ae9cdd0b
--- /dev/null
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -0,0 +1,1361 @@
+/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the implementation of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include <assert.h>   /* for assert()     */
+#include <stdarg.h>   /* for va_*()       */
+#include <stdio.h>    /* for vsnprintf()  */
+#include <stdlib.h>   /* for exit()       */
+#include <string.h>   /* for bzero()      */
+
+#include "X86DisassemblerDecoder.h"
+
+#include "X86GenDisassemblerTables.inc"
+
+#define TRUE  1
+#define FALSE 0
+
+#ifdef __GNUC__
+#define NORETURN __attribute__((noreturn))
+#else
+#define NORETURN
+#endif
+
+#define unreachable(s)                                      \
+  do {                                                      \
+    fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, s);  \
+    exit(-1);                                               \
+  } while (0);
+
+/*
+ * contextForAttrs - Client for the instruction context table.  Takes a set of
+ *   attributes and returns the appropriate decode context.
+ *
+ * @param attrMask  - Attributes, from the enumeration attributeBits.
+ * @return          - The InstructionContext to use when looking up an
+ *                    an instruction with these attributes.
+ */
+static inline InstructionContext contextForAttrs(uint8_t attrMask) {
+  return CONTEXTS_SYM[attrMask];
+}
+
+/*
+ * modRMRequired - Reads the appropriate instruction table to determine whether
+ *   the ModR/M byte is required to decode a particular instruction.
+ *
+ * @param type        - The opcode type (i.e., how many bytes it has).
+ * @param insnContext - The context for the instruction, as returned by
+ *                      contextForAttrs.
+ * @param opcode      - The last byte of the instruction's opcode, not counting
+ *                      ModR/M extensions and escapes.
+ * @return            - TRUE if the ModR/M byte is required, FALSE otherwise.
+ */
+static inline int modRMRequired(OpcodeType type,
+                                InstructionContext insnContext,
+                                uint8_t opcode) {
+  const struct ContextDecision* decision;
+  
+  switch (type) {
+  case ONEBYTE:
+    decision = &ONEBYTE_SYM;
+    break;
+  case TWOBYTE:
+    decision = &TWOBYTE_SYM;
+    break;
+  case THREEBYTE_38:
+    decision = &THREEBYTE38_SYM;
+    break;
+  case THREEBYTE_3A:
+    decision = &THREEBYTE3A_SYM;
+    break;
+  }
+  
+  return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
+    modrm_type != MODRM_ONEENTRY;
+  
+  unreachable("Unknown opcode type");
+  return 0;
+}
+
+/*
+ * decode - Reads the appropriate instruction table to obtain the unique ID of
+ *   an instruction.
+ *
+ * @param type        - See modRMRequired().
+ * @param insnContext - See modRMRequired().
+ * @param opcode      - See modRMRequired().
+ * @param modRM       - The ModR/M byte if required, or any value if not.
+ */
+static inline InstrUID decode(OpcodeType type,
+                               InstructionContext insnContext,
+                               uint8_t opcode,
+                               uint8_t modRM) {
+  struct ModRMDecision* dec;
+  
+  switch (type) {
+  default:
+    unreachable("Unknown opcode type");
+  case ONEBYTE:
+    dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case TWOBYTE:
+    dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case THREEBYTE_38:
+    dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case THREEBYTE_3A:
+    dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  }
+  
+  switch (dec->modrm_type) {
+  default:
+    unreachable("Corrupt table!  Unknown modrm_type");
+  case MODRM_ONEENTRY:
+    return dec->instructionIDs[0];
+  case MODRM_SPLITRM:
+    if (modFromModRM(modRM) == 0x3)
+      return dec->instructionIDs[1];
+    else
+      return dec->instructionIDs[0];
+  case MODRM_FULL:
+    return dec->instructionIDs[modRM];
+  }
+  
+  return 0;
+}
+
+/*
+ * specifierForUID - Given a UID, returns the name and operand specification for
+ *   that instruction.
+ *
+ * @param uid - The unique ID for the instruction.  This should be returned by
+ *              decode(); specifierForUID will not check bounds.
+ * @return    - A pointer to the specification for that instruction.
+ */
+static inline struct InstructionSpecifier* specifierForUID(InstrUID uid) {
+  return &INSTRUCTIONS_SYM[uid];
+}
+
+/*
+ * consumeByte - Uses the reader function provided by the user to consume one
+ *   byte from the instruction's memory and advance the cursor.
+ *
+ * @param insn  - The instruction with the reader function to use.  The cursor
+ *                for this instruction is advanced.
+ * @param byte  - A pointer to a pre-allocated memory buffer to be populated
+ *                with the data read.
+ * @return      - 0 if the read was successful; nonzero otherwise.
+ */
+static inline int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
+  int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
+  
+  if (!ret)
+    ++(insn->readerCursor);
+  
+  return ret;
+}
+
+/*
+ * lookAtByte - Like consumeByte, but does not advance the cursor.
+ *
+ * @param insn  - See consumeByte().
+ * @param byte  - See consumeByte().
+ * @return      - See consumeByte().
+ */
+static inline int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
+  return insn->reader(insn->readerArg, byte, insn->readerCursor);
+}
+
+static inline void unconsumeByte(struct InternalInstruction* insn) {
+  insn->readerCursor--;
+}
+
+#define CONSUME_FUNC(name, type)                                          \
+  static inline int name(struct InternalInstruction* insn, type* ptr) {   \
+    type combined = 0;                                                    \
+    unsigned offset;                                                      \
+    for (offset = 0; offset < sizeof(type); ++offset) {                   \
+      uint8_t byte;                                                       \
+      int ret = insn->reader(insn->readerArg,                             \
+                             &byte,                                       \
+                             insn->readerCursor + offset);                \
+      if (ret)                                                            \
+        return ret;                                                       \
+      combined = combined | ((type)byte << ((type)offset * 8));           \
+    }                                                                     \
+    *ptr = combined;                                                      \
+    insn->readerCursor += sizeof(type);                                   \
+    return 0;                                                             \
+  }
+
+/*
+ * consume* - Use the reader function provided by the user to consume data
+ *   values of various sizes from the instruction's memory and advance the
+ *   cursor appropriately.  These readers perform endian conversion.
+ *
+ * @param insn    - See consumeByte().
+ * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
+ *                  be populated with the data read.
+ * @return        - See consumeByte().
+ */
+CONSUME_FUNC(consumeInt8, int8_t)
+CONSUME_FUNC(consumeInt16, int16_t)
+CONSUME_FUNC(consumeInt32, int32_t)
+CONSUME_FUNC(consumeUInt16, uint16_t)
+CONSUME_FUNC(consumeUInt32, uint32_t)
+CONSUME_FUNC(consumeUInt64, uint64_t)
+
+/*
+ * dprintf - Uses the logging function provided by the user to log a single
+ *   message, typically without a carriage-return.
+ *
+ * @param insn    - The instruction containing the logging function.
+ * @param format  - See printf().
+ * @param ...     - See printf().
+ */
+static inline void dprintf(struct InternalInstruction* insn,
+                           const char* format,
+                           ...) {  
+  char buffer[256];
+  va_list ap;
+  
+  if (!insn->dlog)
+    return;
+    
+  va_start(ap, format);
+  (void)vsnprintf(buffer, sizeof(buffer), format, ap);
+  va_end(ap);
+  
+  insn->dlog(insn->dlogArg, buffer);
+  
+  return;
+}
+
+/*
+ * setPrefixPresent - Marks that a particular prefix is present at a particular
+ *   location.
+ *
+ * @param insn      - The instruction to be marked as having the prefix.
+ * @param prefix    - The prefix that is present.
+ * @param location  - The location where the prefix is located (in the address
+ *                    space of the instruction's reader).
+ */
+static inline void setPrefixPresent(struct InternalInstruction* insn,
+                                    uint8_t prefix,
+                                    uint64_t location)
+{
+  insn->prefixPresent[prefix] = 1;
+  insn->prefixLocations[prefix] = location;
+}
+
+/*
+ * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
+ *   present at a given location.
+ *
+ * @param insn      - The instruction to be queried.
+ * @param prefix    - The prefix.
+ * @param location  - The location to query.
+ * @return          - Whether the prefix is at that location.
+ */
+static inline BOOL isPrefixAtLocation(struct InternalInstruction* insn,
+                                      uint8_t prefix,
+                                      uint64_t location)
+{
+  if (insn->prefixPresent[prefix] == 1 &&
+     insn->prefixLocations[prefix] == location)
+    return TRUE;
+  else
+    return FALSE;
+}
+
+/*
+ * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
+ *   instruction as having them.  Also sets the instruction's default operand,
+ *   address, and other relevant data sizes to report operands correctly.
+ *
+ * @param insn  - The instruction whose prefixes are to be read.
+ * @return      - 0 if the instruction could be read until the end of the prefix
+ *                bytes, and no prefixes conflicted; nonzero otherwise.
+ */
+static int readPrefixes(struct InternalInstruction* insn) {
+  BOOL isPrefix = TRUE;
+  BOOL prefixGroups[4] = { FALSE };
+  uint64_t prefixLocation;
+  uint8_t byte;
+  
+  BOOL hasAdSize = FALSE;
+  BOOL hasOpSize = FALSE;
+  
+  dprintf(insn, "readPrefixes()");
+    
+  while (isPrefix) {
+    prefixLocation = insn->readerCursor;
+    
+    if (consumeByte(insn, &byte))
+      return -1;
+    
+    switch (byte) {
+    case 0xf0:  /* LOCK */
+    case 0xf2:  /* REPNE/REPNZ */
+    case 0xf3:  /* REP or REPE/REPZ */
+      if (prefixGroups[0])
+        dprintf(insn, "Redundant Group 1 prefix");
+      prefixGroups[0] = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    case 0x2e:  /* CS segment override -OR- Branch not taken */
+    case 0x36:  /* SS segment override -OR- Branch taken */
+    case 0x3e:  /* DS segment override */
+    case 0x26:  /* ES segment override */
+    case 0x64:  /* FS segment override */
+    case 0x65:  /* GS segment override */
+      switch (byte) {
+      case 0x2e:
+        insn->segmentOverride = SEG_OVERRIDE_CS;
+        break;
+      case 0x36:
+        insn->segmentOverride = SEG_OVERRIDE_SS;
+        break;
+      case 0x3e:
+        insn->segmentOverride = SEG_OVERRIDE_DS;
+        break;
+      case 0x26:
+        insn->segmentOverride = SEG_OVERRIDE_ES;
+        break;
+      case 0x64:
+        insn->segmentOverride = SEG_OVERRIDE_FS;
+        break;
+      case 0x65:
+        insn->segmentOverride = SEG_OVERRIDE_GS;
+        break;
+      default:
+        unreachable("Unhandled override");
+      }
+      if (prefixGroups[1])
+        dprintf(insn, "Redundant Group 2 prefix");
+      prefixGroups[1] = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    case 0x66:  /* Operand-size override */
+      if (prefixGroups[2])
+        dprintf(insn, "Redundant Group 3 prefix");
+      prefixGroups[2] = TRUE;
+      hasOpSize = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    case 0x67:  /* Address-size override */
+      if (prefixGroups[3])
+        dprintf(insn, "Redundant Group 4 prefix");
+      prefixGroups[3] = TRUE;
+      hasAdSize = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    default:    /* Not a prefix byte */
+      isPrefix = FALSE;
+      break;
+    }
+    
+    if (isPrefix)
+      dprintf(insn, "Found prefix 0x%hhx", byte);
+  }
+  
+  if (insn->mode == MODE_64BIT) {
+    if ((byte & 0xf0) == 0x40) {
+      uint8_t opcodeByte;
+      
+      if(lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+        dprintf(insn, "Redundant REX prefix");
+        return -1;
+      }
+      
+      insn->rexPrefix = byte;
+      insn->necessaryPrefixLocation = insn->readerCursor - 2;
+      
+      dprintf(insn, "Found REX prefix 0x%hhx", byte);
+    } else {                
+      unconsumeByte(insn);
+      insn->necessaryPrefixLocation = insn->readerCursor - 1;
+    }
+  } else {
+    unconsumeByte(insn);
+  }
+  
+  if (insn->mode == MODE_16BIT) {
+    insn->registerSize       = (hasOpSize ? 4 : 2);
+    insn->addressSize        = (hasAdSize ? 4 : 2);
+    insn->displacementSize   = (hasAdSize ? 4 : 2);
+    insn->immediateSize      = (hasOpSize ? 4 : 2);
+  } else if (insn->mode == MODE_32BIT) {
+    insn->registerSize       = (hasOpSize ? 2 : 4);
+    insn->addressSize        = (hasAdSize ? 2 : 4);
+    insn->displacementSize   = (hasAdSize ? 2 : 4);
+    insn->immediateSize      = (hasAdSize ? 2 : 4);
+  } else if (insn->mode == MODE_64BIT) {
+    if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
+      insn->registerSize       = 8;
+      insn->addressSize        = (hasAdSize ? 4 : 8);
+      insn->displacementSize   = 4;
+      insn->immediateSize      = 4;
+    } else if (insn->rexPrefix) {
+      insn->registerSize       = (hasOpSize ? 2 : 4);
+      insn->addressSize        = (hasAdSize ? 4 : 8);
+      insn->displacementSize   = (hasOpSize ? 2 : 4);
+      insn->immediateSize      = (hasOpSize ? 2 : 4);
+    } else {
+      insn->registerSize       = (hasOpSize ? 2 : 4);
+      insn->addressSize        = (hasAdSize ? 4 : 8);
+      insn->displacementSize   = (hasOpSize ? 2 : 4);
+      insn->immediateSize      = (hasOpSize ? 2 : 4);
+    }
+  }
+  
+  return 0;
+}
+
+/*
+ * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
+ *   extended or escape opcodes).
+ *
+ * @param insn  - The instruction whose opcode is to be read.
+ * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
+ */
+static int readOpcode(struct InternalInstruction* insn) {  
+  /* Determine the length of the primary opcode */
+  
+  uint8_t current;
+  
+  dprintf(insn, "readOpcode()");
+  
+  insn->opcodeType = ONEBYTE;
+  if (consumeByte(insn, &current))
+    return -1;
+  
+  if (current == 0x0f) {
+    dprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
+    
+    insn->twoByteEscape = current;
+    
+    if (consumeByte(insn, &current))
+      return -1;
+    
+    if (current == 0x38) {
+      dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+      
+      insn->threeByteEscape = current;
+      
+      if (consumeByte(insn, &current))
+        return -1;
+      
+      insn->opcodeType = THREEBYTE_38;
+    } else if (current == 0x3a) {
+      dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+      
+      insn->threeByteEscape = current;
+      
+      if (consumeByte(insn, &current))
+        return -1;
+      
+      insn->opcodeType = THREEBYTE_3A;
+    } else {
+      dprintf(insn, "Didn't find a three-byte escape prefix");
+      
+      insn->opcodeType = TWOBYTE;
+    }
+  }
+  
+  /*
+   * At this point we have consumed the full opcode.
+   * Anything we consume from here on must be unconsumed.
+   */
+  
+  insn->opcode = current;
+  
+  return 0;
+}
+
+static int readModRM(struct InternalInstruction* insn);
+
+/*
+ * getIDWithAttrMask - Determines the ID of an instruction, consuming
+ *   the ModR/M byte as appropriate for extended and escape opcodes,
+ *   and using a supplied attribute mask.
+ *
+ * @param instructionID - A pointer whose target is filled in with the ID of the
+ *                        instruction.
+ * @param insn          - The instruction whose ID is to be determined.
+ * @param attrMask      - The attribute mask to search.
+ * @return              - 0 if the ModR/M could be read when needed or was not
+ *                        needed; nonzero otherwise.
+ */
+static int getIDWithAttrMask(uint16_t* instructionID,
+                             struct InternalInstruction* insn,
+                             uint8_t attrMask) {
+  BOOL hasModRMExtension;
+  
+  uint8_t instructionClass;
+
+  instructionClass = contextForAttrs(attrMask);
+  
+  hasModRMExtension = modRMRequired(insn->opcodeType,
+                                    instructionClass,
+                                    insn->opcode);
+  
+  if (hasModRMExtension) {
+    readModRM(insn);
+    
+    *instructionID = decode(insn->opcodeType,
+                            instructionClass,
+                            insn->opcode,
+                            insn->modRM);
+  } else {
+    *instructionID = decode(insn->opcodeType,
+                            instructionClass,
+                            insn->opcode,
+                            0);
+  }
+      
+  return 0;
+}
+
+/*
+ * is16BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 16-bit whereas the other is not.
+ *
+ * @param orig  - The instruction that is not 16-bit
+ * @param equiv - The instruction that is 16-bit
+ */
+static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
+  off_t i;
+  
+  for(i = 0;; i++) {
+    if(orig[i] == '\0' && equiv[i] == '\0')
+      return TRUE;
+    if(orig[i] == '\0' || equiv[i] == '\0')
+      return FALSE;
+    if(orig[i] != equiv[i]) {
+      if((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+        continue;
+      if((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+        continue;
+      if((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+        continue;
+      return FALSE;
+    }
+  }
+}
+
+/*
+ * is64BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 64-bit whereas the other is not.
+ *
+ * @param orig  - The instruction that is not 64-bit
+ * @param equiv - The instruction that is 64-bit
+ */
+static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
+  off_t i;
+  
+  for(i = 0;; i++) {
+    if(orig[i] == '\0' && equiv[i] == '\0')
+      return TRUE;
+    if(orig[i] == '\0' || equiv[i] == '\0')
+      return FALSE;
+    if(orig[i] != equiv[i]) {
+      if((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
+        continue;
+      if((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
+        continue;
+      if((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
+        continue;
+      return FALSE;
+    }
+  }
+}
+
+
+/*
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as 
+ *   appropriate for extended and escape opcodes.  Determines the attributes and 
+ *   context for the instruction before doing so.
+ *
+ * @param insn  - The instruction whose ID is to be determined.
+ * @return      - 0 if the ModR/M could be read when needed or was not needed;
+ *                nonzero otherwise.
+ */
+static int getID(struct InternalInstruction* insn) {  
+  uint8_t attrMask;
+  uint16_t instructionID;
+  
+  dprintf(insn, "getID()");
+    
+  attrMask = ATTR_NONE;
+  
+  if (insn->mode == MODE_64BIT)
+    attrMask |= ATTR_64BIT;
+  
+  if (insn->rexPrefix & 0x08)
+    attrMask |= ATTR_REXW;
+  
+  if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+    attrMask |= ATTR_OPSIZE;
+  else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+    attrMask |= ATTR_XS;
+  else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+    attrMask |= ATTR_XD;
+  
+  if(getIDWithAttrMask(&instructionID, insn, attrMask))
+    return -1;
+  
+  /* The following clauses compensate for limitations of the tables. */
+  
+  if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
+    /*
+     * Although for SSE instructions it is usually necessary to treat REX.W+F2
+     * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
+     * an occasional instruction where F2 is incidental and REX.W is the more
+     * significant.  If the decoded instruction is 32-bit and adding REX.W
+     * instead of F2 changes a 32 to a 64, we adopt the new encoding.
+     */
+    
+    struct InstructionSpecifier* spec;
+    uint16_t instructionIDWithREXw;
+    struct InstructionSpecifier* specWithREXw;
+    
+    spec = specifierForUID(instructionID);
+    
+    if (getIDWithAttrMask(&instructionIDWithREXw,
+                          insn,
+                          attrMask & (~ATTR_XD))) {
+      /*
+       * Decoding with REX.w would yield nothing; give up and return original
+       * decode.
+       */
+      
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+      return 0;
+    }
+    
+    specWithREXw = specifierForUID(instructionIDWithREXw);
+    
+    if (is64BitEquivalent(spec->name, specWithREXw->name)) {
+      insn->instructionID = instructionIDWithREXw;
+      insn->spec = specWithREXw;
+    } else {
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+    }
+    return 0;
+  }
+  
+  if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
+    /*
+     * The instruction tables make no distinction between instructions that
+     * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
+     * particular spot (i.e., many MMX operations).  In general we're
+     * conservative, but in the specific case where OpSize is present but not
+     * in the right place we check if there's a 16-bit operation.
+     */
+    
+    struct InstructionSpecifier* spec;
+    uint16_t instructionIDWithOpsize;
+    struct InstructionSpecifier* specWithOpsize;
+    
+    spec = specifierForUID(instructionID);
+    
+    if (getIDWithAttrMask(&instructionIDWithOpsize,
+                          insn,
+                          attrMask | ATTR_OPSIZE)) {
+      /* 
+       * ModRM required with OpSize but not present; give up and return version
+       * without OpSize set
+       */
+      
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+      return 0;
+    }
+    
+    specWithOpsize = specifierForUID(instructionIDWithOpsize);
+    
+    if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
+      insn->instructionID = instructionIDWithOpsize;
+      insn->spec = specWithOpsize;
+    } else {
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+    }
+    return 0;
+  }
+  
+  insn->instructionID = instructionID;
+  insn->spec = specifierForUID(insn->instructionID);
+  
+  return 0;
+}
+
+/*
+ * readSIB - Consumes the SIB byte to determine addressing information for an
+ *   instruction.
+ *
+ * @param insn  - The instruction whose SIB byte is to be read.
+ * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
+ */
+static int readSIB(struct InternalInstruction* insn) {
+  SIBIndex sibIndexBase;
+  SIBBase sibBaseBase;
+  uint8_t index, base;
+  
+  dprintf(insn, "readSIB()");
+  
+  if (insn->consumedSIB)
+    return 0;
+  
+  insn->consumedSIB = TRUE;
+  
+  switch (insn->addressSize) {
+  case 2:
+    dprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
+    return -1;
+    break;
+  case 4:
+    sibIndexBase = SIB_INDEX_EAX;
+    sibBaseBase = SIB_BASE_EAX;
+    break;
+  case 8:
+    sibIndexBase = SIB_INDEX_RAX;
+    sibBaseBase = SIB_BASE_RAX;
+    break;
+  }
+
+  if (consumeByte(insn, &insn->sib))
+    return -1;
+  
+  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+  
+  switch (index) {
+  case 0x4:
+    insn->sibIndex = SIB_INDEX_NONE;
+    break;
+  default:
+    insn->sibIndex = (EABase)(sibIndexBase + index);
+    if (insn->sibIndex == SIB_INDEX_sib ||
+        insn->sibIndex == SIB_INDEX_sib64)
+      insn->sibIndex = SIB_INDEX_NONE;
+    break;
+  }
+  
+  switch (scaleFromSIB(insn->sib)) {
+  case 0:
+    insn->sibScale = 1;
+    break;
+  case 1:
+    insn->sibScale = 2;
+    break;
+  case 2:
+    insn->sibScale = 4;
+    break;
+  case 3:
+    insn->sibScale = 8;
+    break;
+  }
+  
+  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
+  
+  switch (base) {
+  case 0x5:
+    switch (modFromModRM(insn->modRM)) {
+    case 0x0:
+      insn->eaDisplacement = EA_DISP_32;
+      insn->sibBase = SIB_BASE_NONE;
+      break;
+    case 0x1:
+      insn->eaDisplacement = EA_DISP_8;
+      insn->sibBase = (insn->addressSize == 4 ? 
+                       SIB_BASE_EBP : SIB_BASE_RBP);
+      break;
+    case 0x2:
+      insn->eaDisplacement = EA_DISP_32;
+      insn->sibBase = (insn->addressSize == 4 ? 
+                       SIB_BASE_EBP : SIB_BASE_RBP);
+      break;
+    case 0x3:
+      unreachable("Cannot have Mod = 0b11 and a SIB byte");
+    }
+    break;
+  default:
+    insn->sibBase = (EABase)(sibBaseBase + base);
+    break;
+  }
+  
+  return 0;
+}
+
+/*
+ * readDisplacement - Consumes the displacement of an instruction.
+ *
+ * @param insn  - The instruction whose displacement is to be read.
+ * @return      - 0 if the displacement byte was successfully read; nonzero 
+ *                otherwise.
+ */
+static int readDisplacement(struct InternalInstruction* insn) {  
+  int8_t d8;
+  int16_t d16;
+  int32_t d32;
+  
+  dprintf(insn, "readDisplacement()");
+  
+  if (insn->consumedDisplacement)
+    return 0;
+  
+  insn->consumedDisplacement = TRUE;
+  
+  switch (insn->eaDisplacement) {
+  case EA_DISP_NONE:
+    insn->consumedDisplacement = FALSE;
+    break;
+  case EA_DISP_8:
+    if (consumeInt8(insn, &d8))
+      return -1;
+    insn->displacement = d8;
+    break;
+  case EA_DISP_16:
+    if (consumeInt16(insn, &d16))
+      return -1;
+    insn->displacement = d16;
+    break;
+  case EA_DISP_32:
+    if (consumeInt32(insn, &d32))
+      return -1;
+    insn->displacement = d32;
+    break;
+  }
+  
+  insn->consumedDisplacement = TRUE;
+  return 0;
+}
+
+/*
+ * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
+ *   displacement) for an instruction and interprets it.
+ *
+ * @param insn  - The instruction whose addressing information is to be read.
+ * @return      - 0 if the information was successfully read; nonzero otherwise.
+ */
+static int readModRM(struct InternalInstruction* insn) {  
+  uint8_t mod, rm, reg;
+  
+  dprintf(insn, "readModRM()");
+  
+  if (insn->consumedModRM)
+    return 0;
+  
+  consumeByte(insn, &insn->modRM);
+  insn->consumedModRM = TRUE;
+  
+  mod     = modFromModRM(insn->modRM);
+  rm      = rmFromModRM(insn->modRM);
+  reg     = regFromModRM(insn->modRM);
+  
+  /*
+   * This goes by insn->registerSize to pick the correct register, which messes
+   * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
+   * fixupReg().
+   */
+  switch (insn->registerSize) {
+  case 2:
+    insn->regBase = REG_AX;
+    insn->eaRegBase = EA_REG_AX;
+    break;
+  case 4:
+    insn->regBase = REG_EAX;
+    insn->eaRegBase = EA_REG_EAX;
+    break;
+  case 8:
+    insn->regBase = REG_RAX;
+    insn->eaRegBase = EA_REG_RAX;
+    break;
+  }
+  
+  reg |= rFromREX(insn->rexPrefix) << 3;
+  rm  |= bFromREX(insn->rexPrefix) << 3;
+  
+  insn->reg = (Reg)(insn->regBase + reg);
+  
+  switch (insn->addressSize) {
+  case 2:
+    insn->eaBaseBase = EA_BASE_BX_SI;
+     
+    switch (mod) {
+    case 0x0:
+      if (rm == 0x6) {
+        insn->eaBase = EA_BASE_NONE;
+        insn->eaDisplacement = EA_DISP_16;
+        if(readDisplacement(insn))
+          return -1;
+      } else {
+        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+        insn->eaDisplacement = EA_DISP_NONE;
+      }
+      break;
+    case 0x1:
+      insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+      insn->eaDisplacement = EA_DISP_8;
+      if(readDisplacement(insn))
+        return -1;
+      break;
+    case 0x2:
+      insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+      insn->eaDisplacement = EA_DISP_16;
+      if(readDisplacement(insn))
+        return -1;
+      break;
+    case 0x3:
+      insn->eaBase = (EABase)(insn->eaRegBase + rm);
+      if(readDisplacement(insn))
+        return -1;
+      break;
+    }
+    break;
+  case 4:
+  case 8:
+    insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
+    
+    switch (mod) {
+    case 0x0:
+      insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
+      switch (rm) {
+      case 0x4:
+      case 0xc:   /* in case REXW.b is set */
+        insn->eaBase = (insn->addressSize == 4 ? 
+                        EA_BASE_sib : EA_BASE_sib64);
+        readSIB(insn);
+        if(readDisplacement(insn))
+          return -1;
+        break;
+      case 0x5:
+        insn->eaBase = EA_BASE_NONE;
+        insn->eaDisplacement = EA_DISP_32;
+        if(readDisplacement(insn))
+          return -1;
+        break;
+      default:
+        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+        break;
+      }
+      break;
+    case 0x1:
+    case 0x2:
+      insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
+      switch (rm) {
+      case 0x4:
+      case 0xc:   /* in case REXW.b is set */
+        insn->eaBase = EA_BASE_sib;
+        readSIB(insn);
+        if(readDisplacement(insn))
+          return -1;
+        break;
+      default:
+        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+        if(readDisplacement(insn))
+          return -1;
+        break;
+      }
+      break;
+    case 0x3:
+      insn->eaDisplacement = EA_DISP_NONE;
+      insn->eaBase = (EABase)(insn->eaRegBase + rm);
+      break;
+    }
+    break;
+  } /* switch (insn->addressSize) */
+  
+  return 0;
+}
+
+#define GENERIC_FIXUP_FUNC(name, base, prefix)            \
+  static uint8_t name(struct InternalInstruction *insn,   \
+                      OperandType type,                   \
+                      uint8_t index,                      \
+                      uint8_t *valid) {                   \
+    *valid = 1;                                           \
+    switch (type) {                                       \
+    default:                                              \
+      unreachable("Unhandled register type");             \
+    case TYPE_Rv:                                         \
+      return base + index;                                \
+    case TYPE_R8:                                         \
+      if(insn->rexPrefix &&                               \
+         index >= 4 && index <= 7) {                      \
+        return prefix##_SPL + (index - 4);                \
+      } else {                                            \
+        return prefix##_AL + index;                       \
+      }                                                   \
+    case TYPE_R16:                                        \
+      return prefix##_AX + index;                         \
+    case TYPE_R32:                                        \
+      return prefix##_EAX + index;                        \
+    case TYPE_R64:                                        \
+      return prefix##_RAX + index;                        \
+    case TYPE_XMM128:                                     \
+    case TYPE_XMM64:                                      \
+    case TYPE_XMM32:                                      \
+    case TYPE_XMM:                                        \
+      return prefix##_XMM0 + index;                       \
+    case TYPE_MM64:                                       \
+    case TYPE_MM32:                                       \
+    case TYPE_MM:                                         \
+      if(index > 7)                                       \
+        *valid = 0;                                       \
+      return prefix##_MM0 + index;                        \
+    case TYPE_SEGMENTREG:                                 \
+      if(index > 5)                                       \
+        *valid = 0;                                       \
+      return prefix##_ES + index;                         \
+    case TYPE_DEBUGREG:                                   \
+      if(index > 7)                                       \
+        *valid = 0;                                       \
+      return prefix##_DR0 + index;                        \
+    case TYPE_CR32:                                       \
+      if(index > 7)                                       \
+        *valid = 0;                                       \
+      return prefix##_ECR0 + index;                       \
+    case TYPE_CR64:                                       \
+      if(index > 8)                                       \
+        *valid = 0;                                       \
+      return prefix##_RCR0 + index;                       \
+    }                                                     \
+  }
+
+/*
+ * fixup*Value - Consults an operand type to determine the meaning of the
+ *   reg or R/M field.  If the operand is an XMM operand, for example, an
+ *   operand would be XMM0 instead of AX, which readModRM() would otherwise
+ *   misinterpret it as.
+ *
+ * @param insn  - The instruction containing the operand.
+ * @param type  - The operand type.
+ * @param index - The existing value of the field as reported by readModRM().
+ * @param valid - The address of a uint8_t.  The target is set to 1 if the
+ *                field is valid for the register class; 0 if not.
+ */
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    REG)
+GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
+
+/*
+ * fixupReg - Consults an operand specifier to determine which of the
+ *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
+ *
+ * @param insn  - See fixup*Value().
+ * @param op    - The operand specifier.
+ * @return      - 0 if fixup was successful; -1 if the register returned was
+ *                invalid for its class.
+ */
+static int fixupReg(struct InternalInstruction *insn, 
+                    struct OperandSpecifier *op) {
+  uint8_t valid;
+  
+  dprintf(insn, "fixupReg()");
+  
+  switch ((OperandEncoding)op->encoding) {
+  default:
+    unreachable("Expected a REG or R/M encoding in fixupReg");
+  case ENCODING_REG:
+    insn->reg = (Reg)fixupRegValue(insn,
+                                   (OperandType)op->type,
+                                   insn->reg - insn->regBase,
+                                   &valid);
+    if (!valid)
+      return -1;
+    break;
+  case ENCODING_RM:
+    if (insn->eaBase >= insn->eaRegBase) {
+      insn->eaBase = (EABase)fixupRMValue(insn,
+                                          (OperandType)op->type,
+                                          insn->eaBase - insn->eaRegBase,
+                                          &valid);
+      if (!valid)
+        return -1;
+    }
+    break;
+  }
+  
+  return 0;
+}
+
+/*
+ * readOpcodeModifier - Reads an operand from the opcode field of an 
+ *   instruction.  Handles AddRegFrm instructions.
+ *
+ * @param insn    - The instruction whose opcode field is to be read.
+ * @param inModRM - Indicates that the opcode field is to be read from the
+ *                  ModR/M extension; useful for escape opcodes
+ */
+static void readOpcodeModifier(struct InternalInstruction* insn) {
+  dprintf(insn, "readOpcodeModifier()");
+  
+  if (insn->consumedOpcodeModifier)
+    return;
+  
+  insn->consumedOpcodeModifier = TRUE;
+  
+  switch(insn->spec->modifierType) {
+  default:
+    unreachable("Unknown modifier type.");
+  case MODIFIER_NONE:
+    unreachable("No modifier but an operand expects one.");
+  case MODIFIER_OPCODE:
+    insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
+    break;
+  case MODIFIER_MODRM:
+    insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
+    break;
+  }  
+}
+
+/*
+ * readOpcodeRegister - Reads an operand from the opcode field of an 
+ *   instruction and interprets it appropriately given the operand width.
+ *   Handles AddRegFrm instructions.
+ *
+ * @param insn  - See readOpcodeModifier().
+ * @param size  - The width (in bytes) of the register being specified.
+ *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
+ *                RAX.
+ */
+static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
+  dprintf(insn, "readOpcodeRegister()");
+
+  readOpcodeModifier(insn);
+  
+  if (size == 0)
+    size = insn->registerSize;
+  
+  switch (size) {
+  case 1:
+    insn->opcodeRegister = (Reg)(REG_AL + ((bFromREX(insn->rexPrefix) << 3) 
+                                           | insn->opcodeModifier));
+    if(insn->rexPrefix && 
+       insn->opcodeRegister >= REG_AL + 0x4 &&
+       insn->opcodeRegister < REG_AL + 0x8) {
+      insn->opcodeRegister = (Reg)(REG_SPL + (insn->opcodeRegister - REG_AL - 4));
+    }
+      
+    break;
+  case 2:
+    insn->opcodeRegister = (Reg)(REG_AX + ((bFromREX(insn->rexPrefix) << 3) 
+                                            | insn->opcodeModifier));
+    break;
+  case 4:
+    insn->opcodeRegister = (Reg)(REG_EAX + ((bFromREX(insn->rexPrefix) << 3) 
+                                             | insn->opcodeModifier));
+    break;
+  case 8:
+    insn->opcodeRegister = (Reg)(REG_RAX + ((bFromREX(insn->rexPrefix) << 3) 
+                                             |insn->opcodeModifier));
+    break;
+  }
+}
+
+/*
+ * readImmediate - Consumes an immediate operand from an instruction, given the
+ *   desired operand size.
+ *
+ * @param insn  - The instruction whose operand is to be read.
+ * @param size  - The width (in bytes) of the operand.
+ * @return      - 0 if the immediate was successfully consumed; nonzero
+ *                otherwise.
+ */
+static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
+  uint8_t imm8;
+  uint16_t imm16;
+  uint32_t imm32;
+  uint64_t imm64;
+  
+  dprintf(insn, "readImmediate()");
+  
+  if (insn->numImmediatesConsumed == 2)
+    unreachable("Already consumed two immediates");
+  
+  if (size == 0)
+    size = insn->immediateSize;
+  else
+    insn->immediateSize = size;
+  
+  switch (size) {
+  case 1:
+    if (consumeByte(insn, &imm8))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm8;
+    break;
+  case 2:
+    if (consumeUInt16(insn, &imm16))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm16;
+    break;
+  case 4:
+    if (consumeUInt32(insn, &imm32))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm32;
+    break;
+  case 8:
+    if (consumeUInt64(insn, &imm64))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm64;
+    break;
+  }
+  
+  insn->numImmediatesConsumed++;
+  
+  return 0;
+}
+
+/*
+ * readOperands - Consults the specifier for an instruction and consumes all
+ *   operands for that instruction, interpreting them as it goes.
+ *
+ * @param insn  - The instruction whose operands are to be read and interpreted.
+ * @return      - 0 if all operands could be read; nonzero otherwise.
+ */
+static int readOperands(struct InternalInstruction* insn) {
+  int index;
+  
+  dprintf(insn, "readOperands()");
+  
+  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+    switch (insn->spec->operands[index].encoding) {
+    case ENCODING_NONE:
+      break;
+    case ENCODING_REG:
+    case ENCODING_RM:
+      if (readModRM(insn))
+        return -1;
+      if (fixupReg(insn, &insn->spec->operands[index]))
+        return -1;
+      break;
+    case ENCODING_CB:
+    case ENCODING_CW:
+    case ENCODING_CD:
+    case ENCODING_CP:
+    case ENCODING_CO:
+    case ENCODING_CT:
+      dprintf(insn, "We currently don't hande code-offset encodings");
+      return -1;
+    case ENCODING_IB:
+      if (readImmediate(insn, 1))
+        return -1;
+      break;
+    case ENCODING_IW:
+      if (readImmediate(insn, 2))
+        return -1;
+      break;
+    case ENCODING_ID:
+      if (readImmediate(insn, 4))
+        return -1;
+      break;
+    case ENCODING_IO:
+      if (readImmediate(insn, 8))
+        return -1;
+      break;
+    case ENCODING_Iv:
+      readImmediate(insn, insn->immediateSize);
+      break;
+    case ENCODING_Ia:
+      readImmediate(insn, insn->addressSize);
+      break;
+    case ENCODING_RB:
+      readOpcodeRegister(insn, 1);
+      break;
+    case ENCODING_RW:
+      readOpcodeRegister(insn, 2);
+      break;
+    case ENCODING_RD:
+      readOpcodeRegister(insn, 4);
+      break;
+    case ENCODING_RO:
+      readOpcodeRegister(insn, 8);
+      break;
+    case ENCODING_Rv:
+      readOpcodeRegister(insn, 0);
+      break;
+    case ENCODING_I:
+      readOpcodeModifier(insn);
+      break;
+    case ENCODING_DUP:
+      break;
+    default:
+      dprintf(insn, "Encountered an operand with an unknown encoding.");
+      return -1;
+    }
+  }
+  
+  return 0;
+}
+
+/*
+ * decodeInstruction - Reads and interprets a full instruction provided by the
+ *   user.
+ *
+ * @param insn      - A pointer to the instruction to be populated.  Must be 
+ *                    pre-allocated.
+ * @param reader    - The function to be used to read the instruction's bytes.
+ * @param readerArg - A generic argument to be passed to the reader to store
+ *                    any internal state.
+ * @param logger    - If non-NULL, the function to be used to write log messages
+ *                    and warnings.
+ * @param loggerArg - A generic argument to be passed to the logger to store
+ *                    any internal state.
+ * @param startLoc  - The address (in the reader's address space) of the first
+ *                    byte in the instruction.
+ * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
+ *                    decode the instruction in.
+ * @return          - 0 if the instruction's memory could be read; nonzero if
+ *                    not.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+                      byteReader_t reader,
+                      void* readerArg,
+                      dlog_t logger,
+                      void* loggerArg,
+                      uint64_t startLoc,
+                      DisassemblerMode mode) {
+  bzero(insn, sizeof(struct InternalInstruction));
+    
+  insn->reader = reader;
+  insn->readerArg = readerArg;
+  insn->dlog = logger;
+  insn->dlogArg = loggerArg;
+  insn->startLocation = startLoc;
+  insn->readerCursor = startLoc;
+  insn->mode = mode;
+  insn->numImmediatesConsumed = 0;
+  
+  if (readPrefixes(insn)       ||
+      readOpcode(insn)         ||
+      getID(insn)              ||
+      insn->instructionID == 0 ||
+      readOperands(insn))
+    return -1;
+  
+  insn->length = insn->readerCursor - insn->startLocation;
+  
+  dprintf(insn, "Read from 0x%llx to 0x%llx: length %llu",
+          startLoc, insn->readerCursor, insn->length);
+    
+  if (insn->length > 15)
+    dprintf(insn, "Instruction exceeds 15-byte limit");
+  
+  return 0;
+}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
new file mode 100644
index 0000000000..f548c65dad
--- /dev/null
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -0,0 +1,515 @@
+/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the public interface of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#ifndef X86DISASSEMBLERDECODER_H
+#define X86DISASSEMBLERDECODER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+  
+#define INSTRUCTION_SPECIFIER_FIELDS  \
+  const char*             name;
+
+#define INSTRUCTION_IDS     \
+  InstrUID*  instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+  
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+  
+/*
+ * Accessor functions for various fields of an Intel instruction
+ */
+static inline uint8_t modFromModRM(uint8_t modRM){ return (modRM & 0xc0) >> 6; }
+static inline uint8_t regFromModRM(uint8_t modRM){ return (modRM & 0x38) >> 3; }
+static inline uint8_t rmFromModRM(uint8_t modRM) { return (modRM & 0x7);       }
+static inline uint8_t scaleFromSIB(uint8_t sib)  { return (sib & 0xc0) >> 6;   }
+static inline uint8_t indexFromSIB(uint8_t sib)  { return (sib & 0x38) >> 3;   }
+static inline uint8_t baseFromSIB(uint8_t sib)   { return (sib & 0x7);         }
+static inline uint8_t wFromREX(uint8_t rex)      { return (rex & 0x8) >> 3;    }
+static inline uint8_t rFromREX(uint8_t rex)      { return (rex & 0x4) >> 2;    }
+static inline uint8_t xFromREX(uint8_t rex)      { return (rex & 0x2) >> 1;    }
+static inline uint8_t bFromREX(uint8_t rex)      { return (rex & 0x1);         }
+
+/*
+ * These enums represent Intel registers for use by the decoder.
+ */
+
+#define REGS_8BIT     \
+  ENTRY(AL)           \
+  ENTRY(CL)           \
+  ENTRY(DL)           \
+  ENTRY(BL)           \
+  ENTRY(AH)           \
+  ENTRY(CH)           \
+  ENTRY(DH)           \
+  ENTRY(BH)           \
+  ENTRY(R8B)          \
+  ENTRY(R9B)          \
+  ENTRY(R10B)         \
+  ENTRY(R11B)         \
+  ENTRY(R12B)         \
+  ENTRY(R13B)         \
+  ENTRY(R14B)         \
+  ENTRY(R15B)         \
+  ENTRY(SPL)          \
+  ENTRY(BPL)          \
+  ENTRY(SIL)          \
+  ENTRY(DIL)
+
+#define EA_BASES_16BIT  \
+  ENTRY(BX_SI)          \
+  ENTRY(BX_DI)          \
+  ENTRY(BP_SI)          \
+  ENTRY(BP_DI)          \
+  ENTRY(SI)             \
+  ENTRY(DI)             \
+  ENTRY(BP)             \
+  ENTRY(BX)             \
+  ENTRY(R8W)            \
+  ENTRY(R9W)            \
+  ENTRY(R10W)           \
+  ENTRY(R11W)           \
+  ENTRY(R12W)           \
+  ENTRY(R13W)           \
+  ENTRY(R14W)           \
+  ENTRY(R15W)
+
+#define REGS_16BIT    \
+  ENTRY(AX)           \
+  ENTRY(CX)           \
+  ENTRY(DX)           \
+  ENTRY(BX)           \
+  ENTRY(SP)           \
+  ENTRY(BP)           \
+  ENTRY(SI)           \
+  ENTRY(DI)           \
+  ENTRY(R8W)          \
+  ENTRY(R9W)          \
+  ENTRY(R10W)         \
+  ENTRY(R11W)         \
+  ENTRY(R12W)         \
+  ENTRY(R13W)         \
+  ENTRY(R14W)         \
+  ENTRY(R15W)
+
+#define EA_BASES_32BIT  \
+  ENTRY(EAX)            \
+  ENTRY(ECX)            \
+  ENTRY(EDX)            \
+  ENTRY(EBX)            \
+  ENTRY(sib)            \
+  ENTRY(EBP)            \
+  ENTRY(ESI)            \
+  ENTRY(EDI)            \
+  ENTRY(R8D)            \
+  ENTRY(R9D)            \
+  ENTRY(R10D)           \
+  ENTRY(R11D)           \
+  ENTRY(R12D)           \
+  ENTRY(R13D)           \
+  ENTRY(R14D)           \
+  ENTRY(R15D)
+
+#define REGS_32BIT  \
+  ENTRY(EAX)        \
+  ENTRY(ECX)        \
+  ENTRY(EDX)        \
+  ENTRY(EBX)        \
+  ENTRY(ESP)        \
+  ENTRY(EBP)        \
+  ENTRY(ESI)        \
+  ENTRY(EDI)        \
+  ENTRY(R8D)        \
+  ENTRY(R9D)        \
+  ENTRY(R10D)       \
+  ENTRY(R11D)       \
+  ENTRY(R12D)       \
+  ENTRY(R13D)       \
+  ENTRY(R14D)       \
+  ENTRY(R15D)
+
+#define EA_BASES_64BIT  \
+  ENTRY(RAX)            \
+  ENTRY(RCX)            \
+  ENTRY(RDX)            \
+  ENTRY(RBX)            \
+  ENTRY(sib64)          \
+  ENTRY(RBP)            \
+  ENTRY(RSI)            \
+  ENTRY(RDI)            \
+  ENTRY(R8)             \
+  ENTRY(R9)             \
+  ENTRY(R10)            \
+  ENTRY(R11)            \
+  ENTRY(R12)            \
+  ENTRY(R13)            \
+  ENTRY(R14)            \
+  ENTRY(R15)
+
+#define REGS_64BIT  \
+  ENTRY(RAX)        \
+  ENTRY(RCX)        \
+  ENTRY(RDX)        \
+  ENTRY(RBX)        \
+  ENTRY(RSP)        \
+  ENTRY(RBP)        \
+  ENTRY(RSI)        \
+  ENTRY(RDI)        \
+  ENTRY(R8)         \
+  ENTRY(R9)         \
+  ENTRY(R10)        \
+  ENTRY(R11)        \
+  ENTRY(R12)        \
+  ENTRY(R13)        \
+  ENTRY(R14)        \
+  ENTRY(R15)
+
+#define REGS_MMX  \
+  ENTRY(MM0)      \
+  ENTRY(MM1)      \
+  ENTRY(MM2)      \
+  ENTRY(MM3)      \
+  ENTRY(MM4)      \
+  ENTRY(MM5)      \
+  ENTRY(MM6)      \
+  ENTRY(MM7)
+
+#define REGS_XMM  \
+  ENTRY(XMM0)     \
+  ENTRY(XMM1)     \
+  ENTRY(XMM2)     \
+  ENTRY(XMM3)     \
+  ENTRY(XMM4)     \
+  ENTRY(XMM5)     \
+  ENTRY(XMM6)     \
+  ENTRY(XMM7)     \
+  ENTRY(XMM8)     \
+  ENTRY(XMM9)     \
+  ENTRY(XMM10)    \
+  ENTRY(XMM11)    \
+  ENTRY(XMM12)    \
+  ENTRY(XMM13)    \
+  ENTRY(XMM14)    \
+  ENTRY(XMM15)
+  
+#define REGS_SEGMENT \
+  ENTRY(ES)          \
+  ENTRY(CS)          \
+  ENTRY(SS)          \
+  ENTRY(DS)          \
+  ENTRY(FS)          \
+  ENTRY(GS)
+  
+#define REGS_DEBUG  \
+  ENTRY(DR0)        \
+  ENTRY(DR1)        \
+  ENTRY(DR2)        \
+  ENTRY(DR3)        \
+  ENTRY(DR4)        \
+  ENTRY(DR5)        \
+  ENTRY(DR6)        \
+  ENTRY(DR7)
+
+#define REGS_CONTROL_32BIT  \
+  ENTRY(ECR0)               \
+  ENTRY(ECR1)               \
+  ENTRY(ECR2)               \
+  ENTRY(ECR3)               \
+  ENTRY(ECR4)               \
+  ENTRY(ECR5)               \
+  ENTRY(ECR6)               \
+  ENTRY(ECR7)
+
+#define REGS_CONTROL_64BIT  \
+  ENTRY(RCR0)               \
+  ENTRY(RCR1)               \
+  ENTRY(RCR2)               \
+  ENTRY(RCR3)               \
+  ENTRY(RCR4)               \
+  ENTRY(RCR5)               \
+  ENTRY(RCR6)               \
+  ENTRY(RCR7)               \
+  ENTRY(RCR8)
+  
+#define ALL_EA_BASES  \
+  EA_BASES_16BIT      \
+  EA_BASES_32BIT      \
+  EA_BASES_64BIT
+  
+#define ALL_SIB_BASES \
+  REGS_32BIT          \
+  REGS_64BIT
+
+#define ALL_REGS      \
+  REGS_8BIT           \
+  REGS_16BIT          \
+  REGS_32BIT          \
+  REGS_64BIT          \
+  REGS_MMX            \
+  REGS_XMM            \
+  REGS_SEGMENT        \
+  REGS_DEBUG          \
+  REGS_CONTROL_32BIT  \
+  REGS_CONTROL_64BIT  \
+  ENTRY(RIP)
+
+/*
+ * EABase - All possible values of the base field for effective-address 
+ *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
+ *   distinguish between bases (EA_BASE_*) and registers that just happen to be
+ *   referred to when Mod == 0b11 (EA_REG_*).
+ */
+typedef enum {
+  EA_BASE_NONE,
+#define ENTRY(x) EA_BASE_##x,
+  ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) EA_REG_##x,
+  ALL_REGS
+#undef ENTRY
+  EA_max
+} EABase;
+  
+/* 
+ * SIBIndex - All possible values of the SIB index field.
+ *   Borrows entries from ALL_EA_BASES with the special case that
+ *   sib is synonymous with NONE.
+ */
+typedef enum {
+  SIB_INDEX_NONE,
+#define ENTRY(x) SIB_INDEX_##x,
+  ALL_EA_BASES
+#undef ENTRY
+  SIB_INDEX_max
+} SIBIndex;
+  
+/*
+ * SIBBase - All possible values of the SIB base field.
+ */
+typedef enum {
+  SIB_BASE_NONE,
+#define ENTRY(x) SIB_BASE_##x,
+  ALL_SIB_BASES
+#undef ENTRY
+  SIB_BASE_max
+} SIBBase;
+
+/*
+ * EADisplacement - Possible displacement types for effective-address
+ *   computations.
+ */
+typedef enum {
+  EA_DISP_NONE,
+  EA_DISP_8,
+  EA_DISP_16,
+  EA_DISP_32
+} EADisplacement;
+
+/*
+ * Reg - All possible values of the reg field in the ModR/M byte.
+ */
+typedef enum {
+#define ENTRY(x) REG_##x,
+  ALL_REGS
+#undef ENTRY
+  REG_max
+} Reg;
+  
+/*
+ * SegmentOverride - All possible segment overrides.
+ */
+typedef enum {
+  SEG_OVERRIDE_NONE,
+  SEG_OVERRIDE_CS,
+  SEG_OVERRIDE_SS,
+  SEG_OVERRIDE_DS,
+  SEG_OVERRIDE_ES,
+  SEG_OVERRIDE_FS,
+  SEG_OVERRIDE_GS,
+  SEG_OVERRIDE_max
+} SegmentOverride;
+
+typedef uint8_t BOOL;
+
+/*
+ * byteReader_t - Type for the byte reader that the consumer must provide to
+ *   the decoder.  Reads a single byte from the instruction's address space.
+ * @param arg     - A baton that the consumer can associate with any internal
+ *                  state that it needs.
+ * @param byte    - A pointer to a single byte in memory that should be set to
+ *                  contain the value at address.
+ * @param address - The address in the instruction's address space that should
+ *                  be read from.
+ * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
+ */
+typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
+
+/*
+ * dlog_t - Type for the logging function that the consumer can provide to
+ *   get debugging output from the decoder.
+ * @param arg     - A baton that the consumer can associate with any internal
+ *                  state that it needs.
+ * @param log     - A string that contains the message.  Will be reused after
+ *                  the logger returns.
+ */
+typedef void (*dlog_t)(void* arg, const char *log);
+
+/*
+ * The x86 internal instruction, which is produced by the decoder.
+ */
+struct InternalInstruction {
+  /* Reader interface (C) */
+  byteReader_t reader;
+  /* Opaque value passed to the reader */
+  void* readerArg;
+  /* The address of the next byte to read via the reader */
+  uint64_t readerCursor;
+
+  /* Logger interface (C) */
+  dlog_t dlog;
+  /* Opaque value passed to the logger */
+  void* dlogArg;
+
+  /* General instruction information */
+  
+  /* The mode to disassemble for (64-bit, protected, real) */
+  DisassemblerMode mode;
+  /* The start of the instruction, usable with the reader */
+  uint64_t startLocation;
+  /* The length of the instruction, in bytes */
+  size_t length;
+  
+  /* Prefix state */
+  
+  /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
+  uint8_t prefixPresent[0x100];
+  /* contains the location (for use with the reader) of the prefix byte */
+  uint64_t prefixLocations[0x100];
+  /* The value of the REX prefix, if present */
+  uint8_t rexPrefix;
+  /* The location of the REX prefix */
+  uint64_t rexLocation;
+  /* The location where a mandatory prefix would have to be (i.e., right before
+     the opcode, or right before the REX prefix if one is present) */
+  uint64_t necessaryPrefixLocation;
+  /* The segment override type */
+  SegmentOverride segmentOverride;
+  
+  /* Sizes of various critical pieces of data */
+  uint8_t registerSize;
+  uint8_t addressSize;
+  uint8_t displacementSize;
+  uint8_t immediateSize;
+  
+  /* opcode state */
+  
+  /* The value of the two-byte escape prefix (usually 0x0f) */
+  uint8_t twoByteEscape;
+  /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
+  uint8_t threeByteEscape;
+  /* The last byte of the opcode, not counting any ModR/M extension */
+  uint8_t opcode;
+  /* The ModR/M byte of the instruction, if it is an opcode extension */
+  uint8_t modRMExtension;
+  
+  /* decode state */
+  
+  /* The type of opcode, used for indexing into the array of decode tables */
+  OpcodeType opcodeType;
+  /* The instruction ID, extracted from the decode table */
+  uint16_t instructionID;
+  /* The specifier for the instruction, from the instruction info table */
+  struct InstructionSpecifier* spec;
+  
+  /* state for additional bytes, consumed during operand decode.  Pattern:
+     consumed___ indicates that the byte was already consumed and does not
+     need to be consumed again */
+  
+  /* The ModR/M byte, which contains most register operands and some portion of
+     all memory operands */
+  BOOL                          consumedModRM;
+  uint8_t                       modRM;
+  
+  /* The SIB byte, used for more complex 32- or 64-bit memory operands */
+  BOOL                          consumedSIB;
+  uint8_t                       sib;
+
+  /* The displacement, used for memory operands */
+  BOOL                          consumedDisplacement;
+  int32_t                       displacement;
+  
+  /* Immediates.  There can be two in some cases */
+  uint8_t                       numImmediatesConsumed;
+  uint8_t                       numImmediatesTranslated;
+  uint64_t                      immediates[2];
+  
+  /* A register or immediate operand encoded into the opcode */
+  BOOL                          consumedOpcodeModifier;
+  uint8_t                       opcodeModifier;
+  Reg                           opcodeRegister;
+  
+  /* Portions of the ModR/M byte */
+  
+  /* These fields determine the allowable values for the ModR/M fields, which
+     depend on operand and address widths */
+  EABase                        eaBaseBase;
+  EABase                        eaRegBase;
+  Reg                           regBase;
+
+  /* The Mod and R/M fields can encode a base for an effective address, or a
+     register.  These are separated into two fields here */
+  EABase                        eaBase;
+  EADisplacement                eaDisplacement;
+  /* The reg field always encodes a register */
+  Reg                           reg;
+  
+  /* SIB state */
+  SIBIndex                      sibIndex;
+  uint8_t                       sibScale;
+  SIBBase                       sibBase;
+};
+
+/* decodeInstruction - Decode one instruction and store the decoding results in
+ *   a buffer provided by the consumer.
+ * @param insn      - The buffer to store the instruction in.  Allocated by the
+ *                    consumer.
+ * @param reader    - The byteReader_t for the bytes to be read.
+ * @param readerArg - An argument to pass to the reader for storing context
+ *                    specific to the consumer.  May be NULL.
+ * @param logger    - The dlog_t to be used in printing status messages from the
+ *                    disassembler.  May be NULL.
+ * @param loggerArg - An argument to pass to the logger for storing context
+ *                    specific to the logger.  May be NULL.
+ * @param startLoc  - The address (in the reader's address space) of the first
+ *                    byte in the instruction.
+ * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
+ * @return          - Nonzero if there was an error during decode, 0 otherwise.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+                      byteReader_t reader,
+                      void* readerArg,
+                      dlog_t logger,
+                      void* loggerArg,
+                      uint64_t startLoc,
+                      DisassemblerMode mode);
+
+#ifdef __cplusplus 
+}
+#endif
+  
+#endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
new file mode 100644
index 0000000000..b226257c1e
--- /dev/null
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -0,0 +1,354 @@
+/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains common definitions used by both the disassembler and the table
+ *  generator.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+/*
+ * This header file provides those definitions that need to be shared between
+ * the decoder and the table generator in a C-friendly manner.
+ */
+
+#ifndef X86DISASSEMBLERDECODERCOMMON_H
+#define X86DISASSEMBLERDECODERCOMMON_H
+
+#include "llvm/System/DataTypes.h"
+
+#define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers
+#define CONTEXTS_SYM      x86DisassemblerContexts
+#define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes
+#define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes
+#define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes
+#define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes
+
+#define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
+#define CONTEXTS_STR      "x86DisassemblerContexts"
+#define ONEBYTE_STR       "x86DisassemblerOneByteOpcodes"
+#define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"
+#define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"
+#define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes"
+
+/*
+ * Attributes of an instruction that must be known before the opcode can be
+ * processed correctly.  Most of these indicate the presence of particular
+ * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
+ */
+#define ATTRIBUTE_BITS          \
+  ENUM_ENTRY(ATTR_NONE,   0x00) \
+  ENUM_ENTRY(ATTR_64BIT,  0x01) \
+  ENUM_ENTRY(ATTR_XS,     0x02) \
+  ENUM_ENTRY(ATTR_XD,     0x04) \
+  ENUM_ENTRY(ATTR_REXW,   0x08) \
+  ENUM_ENTRY(ATTR_OPSIZE, 0x10)
+
+#define ENUM_ENTRY(n, v) n = v,
+enum attributeBits {
+  ATTRIBUTE_BITS
+  ATTR_max
+};
+#undef ENUM_ENTRY
+
+/*
+ * Combinations of the above attributes that are relevant to instruction
+ * decode.  Although other combinations are possible, they can be reduced to
+ * these without affecting the ultimately decoded instruction.
+ */
+
+/*           Class name           Rank  Rationale for rank assignment         */
+#define INSTRUCTION_CONTEXTS                                                   \
+  ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \
+  ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \
+                                        "64-bit mode but no more")             \
+  ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \
+                                        "operands change width")               \
+  ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
+                                        "but not the operands")                \
+  ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
+                                        "but not the operands")                \
+  ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\
+                                        "change width; overrides IC_OPSIZE")   \
+  ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \
+  ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \
+                                        "secondary")                           \
+  ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \
+  ENUM_ENTRY(IC_64BIT_REXW_XS,      6,  "OPSIZE could mean a different "       \
+                                        "opcode")                              \
+  ENUM_ENTRY(IC_64BIT_REXW_XD,      6,  "Just as meaningful as "               \
+                                        "IC_64BIT_REXW_XS")                    \
+  ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \
+                                        "else because this changes most "      \
+                                        "operands' meaning")
+
+#define ENUM_ENTRY(n, r, d) n,    
+typedef enum {
+  INSTRUCTION_CONTEXTS
+  IC_max
+} InstructionContext;
+#undef ENUM_ENTRY
+
+/*
+ * Opcode types, which determine which decode table to use, both in the Intel
+ * manual and also for the decoder.
+ */
+typedef enum {
+  ONEBYTE       = 0,
+  TWOBYTE       = 1,
+  THREEBYTE_38  = 2,
+  THREEBYTE_3A  = 3
+} OpcodeType;
+
+/*
+ * The following structs are used for the hierarchical decode table.  After
+ * determining the instruction's class (i.e., which IC_* constant applies to
+ * it), the decoder reads the opcode.  Some instructions require specific
+ * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
+ *
+ * If a ModR/M byte is not required, "required" is left unset, and the values
+ * for each instructionID are identical.
+ */
+ 
+typedef uint16_t InstrUID;
+
+/*
+ * ModRMDecisionType - describes the type of ModR/M decision, allowing the 
+ * consumer to determine the number of entries in it.
+ *
+ * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
+ *                  instruction is the same.
+ * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode
+ *                  corresponds to one instruction; otherwise, it corresponds to
+ *                  a different instruction.
+ * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond
+ *                  to a different instruction.
+ */
+
+#define MODRMTYPES            \
+  ENUM_ENTRY(MODRM_ONEENTRY)  \
+  ENUM_ENTRY(MODRM_SPLITRM)   \
+  ENUM_ENTRY(MODRM_FULL)
+
+#define ENUM_ENTRY(n) n,    
+typedef enum {
+  MODRMTYPES
+  MODRM_max
+} ModRMDecisionType;
+#undef ENUM_ENTRY
+
+/*
+ * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which 
+ *  instruction each possible value of the ModR/M byte corresponds to.  Once
+ *  this information is known, we have narrowed down to a single instruction.
+ */
+struct ModRMDecision {
+  uint8_t     modrm_type;
+  
+  /* The macro below must be defined wherever this file is included. */
+  INSTRUCTION_IDS
+};
+
+/*
+ * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
+ *   given a particular opcode.
+ */
+struct OpcodeDecision {
+  struct ModRMDecision modRMDecisions[256];
+};
+
+/*
+ * ContextDecision - Specifies which opcode->instruction tables to look at given
+ *   a particular context (set of attributes).  Since there are many possible
+ *   contexts, the decoder first uses CONTEXTS_SYM to determine which context
+ *   applies given a specific set of attributes.  Hence there are only IC_max
+ *   entries in this table, rather than 2^(ATTR_max).
+ */
+struct ContextDecision {
+  struct OpcodeDecision opcodeDecisions[IC_max];
+};
+
+/* 
+ * Physical encodings of instruction operands.
+ */
+
+#define ENCODINGS                                                              \
+  ENUM_ENTRY(ENCODING_NONE,   "")                                              \
+  ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \
+  ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \
+  ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \
+  ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \
+  ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \
+  ENUM_ENTRY(ENCODING_CP,     "6-byte")                                        \
+  ENUM_ENTRY(ENCODING_CO,     "8-byte")                                        \
+  ENUM_ENTRY(ENCODING_CT,     "10-byte")                                       \
+  ENUM_ENTRY(ENCODING_IB,     "1-byte immediate")                              \
+  ENUM_ENTRY(ENCODING_IW,     "2-byte")                                        \
+  ENUM_ENTRY(ENCODING_ID,     "4-byte")                                        \
+  ENUM_ENTRY(ENCODING_IO,     "8-byte")                                        \
+  ENUM_ENTRY(ENCODING_RB,     "(AL..DIL, R8L..R15L) Register code added to "   \
+                              "the opcode byte")                               \
+  ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \
+  ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \
+  ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \
+  ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \
+                              "opcode byte")                                   \
+                                                                               \
+  ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \
+  ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \
+  ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \
+                              "opcode byte")                                   \
+  ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
+                              "in type")
+
+#define ENUM_ENTRY(n, d) n,    
+  typedef enum {
+    ENCODINGS
+    ENCODING_max
+  } OperandEncoding;
+#undef ENUM_ENTRY
+
+/* 
+ * Semantic interpretations of instruction operands.
+ */
+
+#define TYPES                                                                  \
+  ENUM_ENTRY(TYPE_NONE,       "")                                              \
+  ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \
+  ENUM_ENTRY(TYPE_REL16,      "2-byte")                                        \
+  ENUM_ENTRY(TYPE_REL32,      "4-byte")                                        \
+  ENUM_ENTRY(TYPE_REL64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_PTR1616,    "2+2-byte segment+offset address")               \
+  ENUM_ENTRY(TYPE_PTR1632,    "2+4-byte")                                      \
+  ENUM_ENTRY(TYPE_PTR1664,    "2+8-byte")                                      \
+  ENUM_ENTRY(TYPE_R8,         "1-byte register operand")                       \
+  ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \
+  ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
+  ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM8,       "1-byte immediate operand")                      \
+  ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
+  ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
+  ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
+  ENUM_ENTRY(TYPE_RM64,       "8-byte")                                        \
+  ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
+  ENUM_ENTRY(TYPE_M8,         "1-byte")                                        \
+  ENUM_ENTRY(TYPE_M16,        "2-byte")                                        \
+  ENUM_ENTRY(TYPE_M32,        "4-byte")                                        \
+  ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
+  ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
+  ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
+  ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
+  ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \
+  ENUM_ENTRY(TYPE_M16_32,     "2+4-byte two-part memory operand (LIDT, LGDT)") \
+  ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \
+  ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \
+  ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \
+  ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \
+                              "base)")                                         \
+  ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \
+  ENUM_ENTRY(TYPE_MOFFS32,    "4-byte")                                        \
+  ENUM_ENTRY(TYPE_MOFFS64,    "8-byte")                                        \
+  ENUM_ENTRY(TYPE_SREG,       "Byte with single bit set: 0 = ES, 1 = CS, "     \
+                              "2 = SS, 3 = DS, 4 = FS, 5 = GS")                \
+  ENUM_ENTRY(TYPE_M32FP,      "32-bit IEE754 memory floating-point operand")   \
+  ENUM_ENTRY(TYPE_M64FP,      "64-bit")                                        \
+  ENUM_ENTRY(TYPE_M80FP,      "80-bit extended")                               \
+  ENUM_ENTRY(TYPE_M16INT,     "2-byte memory integer operand for use in "      \
+                              "floating-point instructions")                   \
+  ENUM_ENTRY(TYPE_M32INT,     "4-byte")                                        \
+  ENUM_ENTRY(TYPE_M64INT,     "8-byte")                                        \
+  ENUM_ENTRY(TYPE_ST,         "Position on the floating-point stack")          \
+  ENUM_ENTRY(TYPE_MM,         "MMX register operand")                          \
+  ENUM_ENTRY(TYPE_MM32,       "4-byte MMX register or memory operand")         \
+  ENUM_ENTRY(TYPE_MM64,       "8-byte")                                        \
+  ENUM_ENTRY(TYPE_XMM,        "XMM register operand")                          \
+  ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \
+  ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \
+  ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
+  ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
+  ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
+  ENUM_ENTRY(TYPE_CR32,       "4-byte control register operand")               \
+  ENUM_ENTRY(TYPE_CR64,       "8-byte")                                        \
+                                                                               \
+  ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \
+  ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \
+  ENUM_ENTRY(TYPE_IMMv,       "Immediate operand of operand size")             \
+  ENUM_ENTRY(TYPE_RELv,       "Immediate address of operand size")             \
+  ENUM_ENTRY(TYPE_DUP0,       "Duplicate of operand 0")                        \
+  ENUM_ENTRY(TYPE_DUP1,       "operand 1")                                     \
+  ENUM_ENTRY(TYPE_DUP2,       "operand 2")                                     \
+  ENUM_ENTRY(TYPE_DUP3,       "operand 3")                                     \
+  ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \
+  ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")
+
+#define ENUM_ENTRY(n, d) n,    
+typedef enum {
+  TYPES
+  TYPE_max
+} OperandType;
+#undef ENUM_ENTRY
+
+/* 
+ * OperandSpecifier - The specification for how to extract and interpret one
+ *   operand.
+ */
+struct OperandSpecifier {
+  OperandEncoding  encoding;
+  OperandType      type;
+};
+
+/*
+ * Indicates where the opcode modifier (if any) is to be found.  Extended
+ * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
+ */
+
+#define MODIFIER_TYPES        \
+  ENUM_ENTRY(MODIFIER_NONE)   \
+  ENUM_ENTRY(MODIFIER_OPCODE) \
+  ENUM_ENTRY(MODIFIER_MODRM)
+
+#define ENUM_ENTRY(n) n,
+typedef enum {
+  MODIFIER_TYPES
+  MODIFIER_max
+} ModifierType;
+#undef ENUM_ENTRY
+
+#define X86_MAX_OPERANDS 5
+
+/*
+ * The specification for how to extract and interpret a full instruction and
+ * its operands.
+ */
+struct InstructionSpecifier {
+  ModifierType modifierType;
+  uint8_t modifierBase;
+  struct OperandSpecifier operands[X86_MAX_OPERANDS];
+  
+  /* The macro below must be defined wherever this file is included. */
+  INSTRUCTION_SPECIFIER_FIELDS
+};
+
+/*
+ * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode
+ * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
+ * respectively.
+ */
+typedef enum {
+  MODE_16BIT,
+  MODE_32BIT,
+  MODE_64BIT
+} DisassemblerMode;
+
+#endif
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index b311a6ed86..6098dbf5dd 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -15,8 +15,8 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
                 X86GenRegisterInfo.inc X86GenInstrNames.inc \
                 X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \
                 X86GenAsmWriter1.inc X86GenDAGISel.inc  \
-                X86GenFastISel.inc \
-                X86GenCallingConv.inc X86GenSubtarget.inc
+                X86GenDisassemblerTables.inc X86GenFastISel.inc \
+                X86GenCallingConv.inc X86GenSubtarget.inc \
 
 DIRS = AsmPrinter AsmParser Disassembler TargetInfo
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 0152121e53..90d9083d78 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -38,6 +38,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   }
 }
 
+extern "C" void LLVMInitializeX86Disassembler();
+
 extern "C" void LLVMInitializeX86Target() { 
   // Register the target.
   RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
@@ -47,6 +49,8 @@ extern "C" void LLVMInitializeX86Target() {
   RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);
   RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
 
+  LLVMInitializeX86Disassembler();
+
   // Register the code emitter.
   TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter);
   TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter);
author	Sean Callanan <scallanan@apple.com>	2009-12-19 02:59:52 +0000
committer	Sean Callanan <scallanan@apple.com>	2009-12-19 02:59:52 +0000
commit	8ed9f51663bc5533f36ca62e5668ae08e9a1313f (patch)
tree	3054645839caee367e9403507d8487538819ed5b /lib/Target
parent	e9ec6ad1ba5fd9ad70f5d0c059c5a5aa44f501f7 (diff)
download	llvm-8ed9f51663bc5533f36ca62e5668ae08e9a1313f.tar.gz llvm-8ed9f51663bc5533f36ca62e5668ae08e9a1313f.tar.bz2 llvm-8ed9f51663bc5533f36ca62e5668ae08e9a1313f.tar.xz