summaryrefslogtreecommitdiff
path: root/tools/edis
diff options
context:
space:
mode:
authorSean Callanan <scallanan@apple.com>2010-02-02 20:11:23 +0000
committerSean Callanan <scallanan@apple.com>2010-02-02 20:11:23 +0000
commit59b9c8879bebdbf0f745af0129abb51284867729 (patch)
treecc5e26b9643a2c9e0d1a2fcfc89f62ab4ca36777 /tools/edis
parent4db361395b762b1de6059827a6fabb1952373f98 (diff)
downloadllvm-59b9c8879bebdbf0f745af0129abb51284867729.tar.gz
llvm-59b9c8879bebdbf0f745af0129abb51284867729.tar.bz2
llvm-59b9c8879bebdbf0f745af0129abb51284867729.tar.xz
Renamed the ed directory to edis, as suggested
yesterday. This eliminates possible confusion about what exactly in this directory; the name is still short, though. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95118 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools/edis')
-rw-r--r--tools/edis/EDDisassembler.cpp386
-rw-r--r--tools/edis/EDDisassembler.h248
-rw-r--r--tools/edis/EDInst.cpp205
-rw-r--r--tools/edis/EDInst.h171
-rw-r--r--tools/edis/EDMain.cpp265
-rw-r--r--tools/edis/EDOperand.cpp148
-rw-r--r--tools/edis/EDOperand.h65
-rw-r--r--tools/edis/EDToken.cpp185
-rw-r--r--tools/edis/EDToken.h135
-rw-r--r--tools/edis/EnhancedDisassembly.exports31
-rw-r--r--tools/edis/Makefile55
11 files changed, 1894 insertions, 0 deletions
diff --git a/tools/edis/EDDisassembler.cpp b/tools/edis/EDDisassembler.cpp
new file mode 100644
index 0000000000..99864fb322
--- /dev/null
+++ b/tools/edis/EDDisassembler.cpp
@@ -0,0 +1,386 @@
+//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's disassembler class.
+// The disassembler is responsible for vending individual instructions according
+// to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/AsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelect.h"
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+
+#include "../../lib/Target/X86/X86GenEDInfo.inc"
+
+using namespace llvm;
+
+bool EDDisassembler::sInitialized = false;
+EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
+
+struct InfoMap {
+ Triple::ArchType Arch;
+ const char *String;
+ const InstInfo *Info;
+};
+
+static struct InfoMap infomap[] = {
+ { Triple::x86, "i386-unknown-unknown", instInfoX86 },
+ { Triple::x86_64, "x86_64-unknown-unknown", instInfoX86 },
+ { Triple::InvalidArch, NULL, NULL }
+};
+
+/// infoFromArch - Returns the InfoMap corresponding to a given architecture,
+/// or NULL if there is an error
+///
+/// @arg arch - The Triple::ArchType for the desired architecture
+static const InfoMap *infoFromArch(Triple::ArchType arch) {
+ unsigned int infoIndex;
+
+ for (infoIndex = 0; infomap[infoIndex].String != NULL; ++infoIndex) {
+ if(arch == infomap[infoIndex].Arch)
+ return &infomap[infoIndex];
+ }
+
+ return NULL;
+}
+
+/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
+/// for the desired assembly syntax, suitable for passing to
+/// Target::createMCInstPrinter()
+///
+/// @arg arch - The target architecture
+/// @arg syntax - The assembly syntax in sd form
+static int getLLVMSyntaxVariant(Triple::ArchType arch,
+ EDAssemblySyntax_t syntax) {
+ switch (syntax) {
+ default:
+ return -1;
+ // Mappings below from X86AsmPrinter.cpp
+ case kEDAssemblySyntaxX86ATT:
+ if (arch == Triple::x86 || arch == Triple::x86_64)
+ return 0;
+ else
+ return -1;
+ case kEDAssemblySyntaxX86Intel:
+ if (arch == Triple::x86 || arch == Triple::x86_64)
+ return 1;
+ else
+ return -1;
+ }
+}
+
+#define BRINGUP_TARGET(tgt) \
+ LLVMInitialize##tgt##TargetInfo(); \
+ LLVMInitialize##tgt##Target(); \
+ LLVMInitialize##tgt##AsmPrinter(); \
+ LLVMInitialize##tgt##AsmParser(); \
+ LLVMInitialize##tgt##Disassembler();
+
+void EDDisassembler::initialize() {
+ if (sInitialized)
+ return;
+
+ sInitialized = true;
+
+ BRINGUP_TARGET(X86)
+}
+
+#undef BRINGUP_TARGET
+
+EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
+ EDAssemblySyntax_t syntax) {
+ CPUKey key;
+ key.Arch = arch;
+ key.Syntax = syntax;
+
+ EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
+
+ if (i != sDisassemblers.end()) {
+ return i->second;
+ }
+ else {
+ EDDisassembler* sdd = new EDDisassembler(key);
+ if(!sdd->valid()) {
+ delete sdd;
+ return NULL;
+ }
+
+ sDisassemblers[key] = sdd;
+
+ return sdd;
+ }
+
+ return NULL;
+}
+
+EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
+ EDAssemblySyntax_t syntax) {
+ Triple triple(str);
+
+ return getDisassembler(triple.getArch(), syntax);
+}
+
+EDDisassembler::EDDisassembler(CPUKey &key) :
+ Valid(false), ErrorString(), ErrorStream(ErrorString), Key(key) {
+ const InfoMap *infoMap = infoFromArch(key.Arch);
+
+ if (!infoMap)
+ return;
+
+ const char *triple = infoMap->String;
+
+ int syntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
+
+ if (syntaxVariant < 0)
+ return;
+
+ std::string tripleString(triple);
+ std::string errorString;
+
+ Tgt = TargetRegistry::lookupTarget(tripleString,
+ errorString);
+
+ if (!Tgt)
+ return;
+
+ std::string featureString;
+
+ OwningPtr<const TargetMachine>
+ targetMachine(Tgt->createTargetMachine(tripleString,
+ featureString));
+
+ const TargetRegisterInfo *registerInfo = targetMachine->getRegisterInfo();
+
+ if (!registerInfo)
+ return;
+
+ AsmInfo.reset(Tgt->createAsmInfo(tripleString));
+
+ if (!AsmInfo)
+ return;
+
+ Disassembler.reset(Tgt->createMCDisassembler());
+
+ if (!Disassembler)
+ return;
+
+ InstString.reset(new std::string);
+ InstStream.reset(new raw_string_ostream(*InstString));
+
+ InstPrinter.reset(Tgt->createMCInstPrinter(syntaxVariant,
+ *AsmInfo,
+ *InstStream));
+
+ if (!InstPrinter)
+ return;
+
+ GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
+ SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
+ SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
+
+ InstInfos = infoMap->Info;
+
+ initMaps(*targetMachine->getRegisterInfo());
+
+ Valid = true;
+}
+
+EDDisassembler::~EDDisassembler() {
+ if(!valid())
+ return;
+}
+
+namespace {
+ /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
+ /// as provided by the sd interface. See MemoryObject.
+ class EDMemoryObject : public llvm::MemoryObject {
+ private:
+ EDByteReaderCallback Callback;
+ void *Arg;
+ public:
+ EDMemoryObject(EDByteReaderCallback callback,
+ void *arg) : Callback(callback), Arg(arg) { }
+ ~EDMemoryObject() { }
+ uint64_t getBase() const { return 0x0; }
+ uint64_t getExtent() const { return (uint64_t)-1; }
+ int readByte(uint64_t address, uint8_t *ptr) const {
+ if(!Callback)
+ return -1;
+
+ if(Callback(ptr, address, Arg))
+ return -1;
+
+ return 0;
+ }
+ };
+}
+
+EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg) {
+ EDMemoryObject memoryObject(byteReader, arg);
+
+ MCInst* inst = new MCInst;
+ uint64_t byteSize;
+
+ if (!Disassembler->getInstruction(*inst,
+ byteSize,
+ memoryObject,
+ address,
+ ErrorStream)) {
+ delete inst;
+ return NULL;
+ }
+ else {
+ const InstInfo *thisInstInfo = &InstInfos[inst->getOpcode()];
+
+ EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
+ return sdInst;
+ }
+}
+
+void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
+ unsigned numRegisters = registerInfo.getNumRegs();
+ unsigned registerIndex;
+
+ for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
+ const char* registerName = registerInfo.get(registerIndex).Name;
+
+ RegVec.push_back(registerName);
+ RegRMap[registerName] = registerIndex;
+ }
+
+ if (Key.Arch == Triple::x86 ||
+ Key.Arch == Triple::x86_64) {
+ stackPointers.insert(registerIDWithName("SP"));
+ stackPointers.insert(registerIDWithName("ESP"));
+ stackPointers.insert(registerIDWithName("RSP"));
+
+ programCounters.insert(registerIDWithName("IP"));
+ programCounters.insert(registerIDWithName("EIP"));
+ programCounters.insert(registerIDWithName("RIP"));
+ }
+}
+
+const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
+ if (registerID >= RegVec.size())
+ return NULL;
+ else
+ return RegVec[registerID].c_str();
+}
+
+unsigned EDDisassembler::registerIDWithName(const char *name) const {
+ regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
+ if (iter == RegRMap.end())
+ return 0;
+ else
+ return (*iter).second;
+}
+
+bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
+ return (stackPointers.find(registerID) != stackPointers.end());
+}
+
+bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
+ return (programCounters.find(registerID) != programCounters.end());
+}
+
+int EDDisassembler::printInst(std::string& str,
+ MCInst& inst) {
+ PrinterMutex.acquire();
+
+ InstPrinter->printInst(&inst);
+ InstStream->flush();
+ str = *InstString;
+ InstString->clear();
+
+ PrinterMutex.release();
+
+ return 0;
+}
+
+int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
+ SmallVectorImpl<AsmToken> &tokens,
+ const std::string &str) {
+ int ret = 0;
+
+ const char *cStr = str.c_str();
+ MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
+
+ StringRef instName;
+ SMLoc instLoc;
+
+ SourceMgr sourceMgr;
+ sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
+ MCContext context;
+ OwningPtr<MCStreamer> streamer
+ (createNullStreamer(context));
+ AsmParser genericParser(sourceMgr, context, *streamer, *AsmInfo);
+ OwningPtr<TargetAsmParser> specificParser
+ (Tgt->createAsmParser(genericParser));
+
+ AsmToken OpcodeToken = genericParser.Lex();
+
+ if(OpcodeToken.is(AsmToken::Identifier)) {
+ instName = OpcodeToken.getString();
+ instLoc = OpcodeToken.getLoc();
+ if (specificParser->ParseInstruction(instName, instLoc, operands))
+ ret = -1;
+ }
+ else {
+ ret = -1;
+ }
+
+ ParserMutex.acquire();
+
+ if (!ret) {
+ GenericAsmLexer->setBuffer(buf);
+
+ while (SpecificAsmLexer->Lex(),
+ SpecificAsmLexer->isNot(AsmToken::Eof) &&
+ SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
+ if (SpecificAsmLexer->is(AsmToken::Error)) {
+ ret = -1;
+ break;
+ }
+ tokens.push_back(SpecificAsmLexer->getTok());
+ }
+ }
+
+ ParserMutex.release();
+
+ return ret;
+}
+
+int EDDisassembler::llvmSyntaxVariant() const {
+ return LLVMSyntaxVariant;
+}
diff --git a/tools/edis/EDDisassembler.h b/tools/edis/EDDisassembler.h
new file mode 100644
index 0000000000..6be9152fac
--- /dev/null
+++ b/tools/edis/EDDisassembler.h
@@ -0,0 +1,248 @@
+//===-EDDisassembler.h - LLVM Enhanced Disassembler -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// disassembler class. The disassembler is responsible for vending individual
+// instructions according to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef EDDisassembler_
+#define EDDisassembler_
+
+#include "EDInfo.inc"
+
+#include "llvm-c/EnhancedDisassembly.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace llvm {
+class AsmLexer;
+class AsmToken;
+class MCContext;
+class MCAsmInfo;
+class MCAsmLexer;
+class AsmParser;
+class TargetAsmLexer;
+class TargetAsmParser;
+class MCDisassembler;
+class MCInstPrinter;
+class MCInst;
+class MCParsedAsmOperand;
+class MCStreamer;
+template <typename T> class SmallVectorImpl;
+class SourceMgr;
+class Target;
+class TargetRegisterInfo;
+}
+
+/// EDDisassembler - Encapsulates a disassembler for a single architecture and
+/// disassembly syntax. Also manages the static disassembler registry.
+struct EDDisassembler {
+ ////////////////////
+ // Static members //
+ ////////////////////
+
+ /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
+ /// pair
+ struct CPUKey {
+ /// The architecture type
+ llvm::Triple::ArchType Arch;
+
+ /// The assembly syntax
+ EDAssemblySyntax_t Syntax;
+
+ /// operator== - Equality operator
+ bool operator==(const CPUKey &key) const {
+ return (Arch == key.Arch &&
+ Syntax == key.Syntax);
+ }
+
+ /// operator< - Less-than operator
+ bool operator<(const CPUKey &key) const {
+ if(Arch > key.Arch)
+ return false;
+ if(Syntax >= key.Syntax)
+ return false;
+ return true;
+ }
+ };
+
+ typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
+
+ /// True if the disassembler registry has been initialized; false if not
+ static bool sInitialized;
+ /// A map from disassembler specifications to disassemblers. Populated
+ /// lazily.
+ static DisassemblerMap_t sDisassemblers;
+
+ /// getDisassembler - Returns the specified disassemble, or NULL on failure
+ ///
+ /// @arg arch - The desired architecture
+ /// @arg syntax - The desired disassembly syntax
+ static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
+ EDAssemblySyntax_t syntax);
+
+ /// getDisassembler - Returns the disassembler for a given combination of
+ /// CPU type, CPU subtype, and assembly syntax, or NULL on failure
+ ///
+ /// @arg str - The string representation of the architecture triple, e.g.,
+ /// "x86_64-apple-darwin"
+ /// @arg syntax - The disassembly syntax for the required disassembler
+ static EDDisassembler *getDisassembler(llvm::StringRef str,
+ EDAssemblySyntax_t syntax);
+
+ /// initialize - Initializes the disassembler registry and the LLVM backend
+ static void initialize();
+
+ ////////////////////////
+ // Per-object members //
+ ////////////////////////
+
+ /// True only if the object has been fully and successfully initialized
+ bool Valid;
+
+ /// The string that stores disassembler errors from the backend
+ std::string ErrorString;
+ /// The stream that wraps the ErrorString
+ llvm::raw_string_ostream ErrorStream;
+
+ /// The architecture/syntax pair for the current architecture
+ CPUKey Key;
+ /// The LLVM target corresponding to the disassembler
+ const llvm::Target *Tgt;
+ /// The assembly information for the target architecture
+ llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
+ /// The disassembler for the target architecture
+ llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
+ /// The output string for the instruction printer; must be guarded with
+ /// PrinterMutex
+ llvm::OwningPtr<std::string> InstString;
+ /// The output stream for the disassembler; must be guarded with
+ /// PrinterMutex
+ llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
+ /// The instruction printer for the target architecture; must be guarded with
+ /// PrinterMutex when printing
+ llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
+ /// The mutex that guards the instruction printer's printing functions, which
+ /// use a shared stream
+ llvm::sys::Mutex PrinterMutex;
+ /// The array of instruction information provided by the TableGen backend for
+ /// the target architecture
+ const InstInfo *InstInfos;
+ /// The target-specific lexer for use in tokenizing strings, in
+ /// target-independent and target-specific portions
+ llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
+ llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer;
+ /// The guard for the above
+ llvm::sys::Mutex ParserMutex;
+ /// The LLVM number used for the target disassembly syntax variant
+ int LLVMSyntaxVariant;
+
+ typedef std::vector<std::string> regvec_t;
+ typedef std::map<std::string, unsigned> regrmap_t;
+
+ /// A vector of registers for quick mapping from LLVM register IDs to names
+ regvec_t RegVec;
+ /// A map of registers for quick mapping from register names to LLVM IDs
+ regrmap_t RegRMap;
+
+ /// A set of register IDs for aliases of the stack pointer for the current
+ /// architecture
+ std::set<unsigned> stackPointers;
+ /// A set of register IDs for aliases of the program counter for the current
+ /// architecture
+ std::set<unsigned> programCounters;
+
+ /// Constructor - initializes a disassembler with all the necessary objects,
+ /// which come pre-allocated from the registry accessor function
+ ///
+ /// @arg key - the architecture and disassembly syntax for the
+ /// disassembler
+ EDDisassembler(CPUKey& key);
+
+ /// valid - reports whether there was a failure in the constructor.
+ bool valid() {
+ return Valid;
+ }
+
+ ~EDDisassembler();
+
+ /// createInst - creates and returns an instruction given a callback and
+ /// memory address, or NULL on failure
+ ///
+ /// @arg byteReader - A callback function that provides machine code bytes
+ /// @arg address - The address of the first byte of the instruction,
+ /// suitable for passing to byteReader
+ /// @arg arg - An opaque argument for byteReader
+ EDInst *createInst(EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg);
+
+ /// initMaps - initializes regVec and regRMap using the provided register
+ /// info
+ ///
+ /// @arg registerInfo - the register information to use as a source
+ void initMaps(const llvm::TargetRegisterInfo &registerInfo);
+ /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a
+ /// register for a given register ID, or NULL on failure
+ ///
+ /// @arg registerID - the ID of the register to be queried
+ const char *nameWithRegisterID(unsigned registerID) const;
+ /// registerIDWithName - Returns the ID of a register for a given register
+ /// name, or (unsigned)-1 on failure
+ ///
+ /// @arg name - The name of the register
+ unsigned registerIDWithName(const char *name) const;
+
+ /// registerIsStackPointer - reports whether a register ID is an alias for the
+ /// stack pointer register
+ ///
+ /// @arg registerID - The LLVM register ID
+ bool registerIsStackPointer(unsigned registerID);
+ /// registerIsStackPointer - reports whether a register ID is an alias for the
+ /// stack pointer register
+ ///
+ /// @arg registerID - The LLVM register ID
+ bool registerIsProgramCounter(unsigned registerID);
+
+ /// printInst - prints an MCInst to a string, returning 0 on success, or -1
+ /// otherwise
+ ///
+ /// @arg str - A reference to a string which is filled in with the string
+ /// representation of the instruction
+ /// @arg inst - A reference to the MCInst to be printed
+ int printInst(std::string& str,
+ llvm::MCInst& inst);
+
+ /// parseInst - extracts operands and tokens from a string for use in
+ /// tokenizing the string. Returns 0 on success, or -1 otherwise.
+ ///
+ /// @arg operands - A reference to a vector that will be filled in with the
+ /// parsed operands
+ /// @arg tokens - A reference to a vector that will be filled in with the
+ /// tokens
+ /// @arg str - The string representation of the instruction
+ int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
+ llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
+ const std::string &str);
+
+ /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
+ int llvmSyntaxVariant() const;
+};
+
+#endif
diff --git a/tools/edis/EDInst.cpp b/tools/edis/EDInst.cpp
new file mode 100644
index 0000000000..9ed27002ad
--- /dev/null
+++ b/tools/edis/EDInst.cpp
@@ -0,0 +1,205 @@
+//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's instruction class.
+// The instruction is responsible for vending the string representation,
+// individual tokens, and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "EDOperand.h"
+#include "EDToken.h"
+
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+EDInst::EDInst(llvm::MCInst *inst,
+ uint64_t byteSize,
+ EDDisassembler &disassembler,
+ const InstInfo *info) :
+ Disassembler(disassembler),
+ Inst(inst),
+ ThisInstInfo(info),
+ ByteSize(byteSize),
+ BranchTarget(-1),
+ MoveSource(-1),
+ MoveTarget(-1) {
+}
+
+EDInst::~EDInst() {
+ unsigned int index;
+ unsigned int numOperands = Operands.size();
+
+ for (index = 0; index < numOperands; ++index)
+ delete Operands[index];
+
+ unsigned int numTokens = Tokens.size();
+
+ for (index = 0; index < numTokens; ++index)
+ delete Tokens[index];
+
+ delete Inst;
+}
+
+uint64_t EDInst::byteSize() {
+ return ByteSize;
+}
+
+int EDInst::stringify() {
+ if (StringifyResult.valid())
+ return StringifyResult.result();
+
+ if (Disassembler.printInst(String, *Inst))
+ return StringifyResult.setResult(-1);
+
+ OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()];
+
+ return StringifyResult.setResult(0);
+}
+
+int EDInst::getString(const char*& str) {
+ if (stringify())
+ return -1;
+
+ str = String.c_str();
+
+ return 0;
+}
+
+unsigned EDInst::instID() {
+ return Inst->getOpcode();
+}
+
+bool EDInst::isBranch() {
+ if (ThisInstInfo)
+ return ThisInstInfo->instructionFlags & kInstructionFlagBranch;
+ else
+ return false;
+}
+
+bool EDInst::isMove() {
+ if (ThisInstInfo)
+ return ThisInstInfo->instructionFlags & kInstructionFlagMove;
+ else
+ return false;
+}
+
+int EDInst::parseOperands() {
+ if (ParseResult.valid())
+ return ParseResult.result();
+
+ if (!ThisInstInfo)
+ return ParseResult.setResult(-1);
+
+ unsigned int opIndex;
+ unsigned int mcOpIndex = 0;
+
+ for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) {
+ if (isBranch() &&
+ (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) {
+ BranchTarget = opIndex;
+ }
+ else if (isMove()) {
+ if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource)
+ MoveSource = opIndex;
+ else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)
+ MoveTarget = opIndex;
+ }
+
+ EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex);
+
+ Operands.push_back(operand);
+ }
+
+ return ParseResult.setResult(0);
+}
+
+int EDInst::branchTargetID() {
+ if (parseOperands())
+ return -1;
+ return BranchTarget;
+}
+
+int EDInst::moveSourceID() {
+ if (parseOperands())
+ return -1;
+ return MoveSource;
+}
+
+int EDInst::moveTargetID() {
+ if (parseOperands())
+ return -1;
+ return MoveTarget;
+}
+
+int EDInst::numOperands() {
+ if (parseOperands())
+ return -1;
+ return Operands.size();
+}
+
+int EDInst::getOperand(EDOperand *&operand, unsigned int index) {
+ if (parseOperands())
+ return -1;
+
+ if (index >= Operands.size())
+ return -1;
+
+ operand = Operands[index];
+ return 0;
+}
+
+int EDInst::tokenize() {
+ if (TokenizeResult.valid())
+ return TokenizeResult.result();
+
+ if (stringify())
+ return TokenizeResult.setResult(-1);
+
+ return TokenizeResult.setResult(EDToken::tokenize(Tokens,
+ String,
+ OperandOrder,
+ Disassembler));
+
+}
+
+int EDInst::numTokens() {
+ if (tokenize())
+ return -1;
+ return Tokens.size();
+}
+
+int EDInst::getToken(EDToken *&token, unsigned int index) {
+ if (tokenize())
+ return -1;
+ token = Tokens[index];
+ return 0;
+}
+
+#ifdef __BLOCKS__
+int EDInst::visitTokens(EDTokenVisitor_t visitor) {
+ if (tokenize())
+ return -1;
+
+ tokvec_t::iterator iter;
+
+ for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) {
+ int ret = visitor(*iter);
+ if (ret == 1)
+ return 0;
+ if (ret != 0)
+ return -1;
+ }
+
+ return 0;
+}
+#endif
diff --git a/tools/edis/EDInst.h b/tools/edis/EDInst.h
new file mode 100644
index 0000000000..db03a7852e
--- /dev/null
+++ b/tools/edis/EDInst.h
@@ -0,0 +1,171 @@
+//===-EDInst.h - LLVM Enhanced Disassembler ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// instruction class. The instruction is responsible for vending the string
+// representation, individual tokens and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef EDInst_
+#define EDInst_
+
+#include "llvm-c/EnhancedDisassembly.h"
+
+#include "llvm/ADT/SmallVector.h"
+
+#include <string>
+#include <vector>
+
+/// CachedResult - Encapsulates the result of a function along with the validity
+/// of that result, so that slow functions don't need to run twice
+struct CachedResult {
+ /// True if the result has been obtained by executing the function
+ bool Valid;
+ /// The result last obtained from the function
+ int Result;
+
+ /// Constructor - Initializes an invalid result
+ CachedResult() : Valid(false) { }
+ /// valid - Returns true if the result has been obtained by executing the
+ /// function and false otherwise
+ bool valid() { return Valid; }
+ /// result - Returns the result of the function or an undefined value if
+ /// valid() is false
+ int result() { return Result; }
+ /// setResult - Sets the result of the function and declares it valid
+ /// returning the result (so that setResult() can be called from inside a
+ /// return statement)
+ /// @arg result - The result of the function
+ int setResult(int result) { Result = result; Valid = true; return result; }
+};
+
+/// EDInst - Encapsulates a single instruction, which can be queried for its
+/// string representation, as well as its operands and tokens
+struct EDInst {
+ /// The parent disassembler
+ EDDisassembler &Disassembler;
+ /// The containing MCInst
+ llvm::MCInst *Inst;
+ /// The instruction information provided by TableGen for this instruction
+ const InstInfo *ThisInstInfo;
+ /// The number of bytes for the machine code representation of the instruction
+ uint64_t ByteSize;
+
+ /// The result of the stringify() function
+ CachedResult StringifyResult;
+ /// The string representation of the instruction
+ std::string String;
+ /// The order in which operands from the InstInfo's operand information appear
+ /// in String
+ const char* OperandOrder;
+
+ /// The result of the parseOperands() function
+ CachedResult ParseResult;
+ typedef llvm::SmallVector<EDOperand*, 5> opvec_t;
+ /// The instruction's operands
+ opvec_t Operands;
+ /// The operand corresponding to the target, if the instruction is a branch
+ int BranchTarget;
+ /// The operand corresponding to the source, if the instruction is a move
+ int MoveSource;
+ /// The operand corresponding to the target, if the instruction is a move
+ int MoveTarget;
+
+ /// The result of the tokenize() function
+ CachedResult TokenizeResult;
+ typedef std::vector<EDToken*> tokvec_t;
+ /// The instruction's tokens
+ tokvec_t Tokens;
+
+ /// Constructor - initializes an instruction given the output of the LLVM
+ /// C++ disassembler
+ ///
+ /// @arg inst - The MCInst, which will now be owned by this object
+ /// @arg byteSize - The size of the consumed instruction, in bytes
+ /// @arg disassembler - The parent disassembler
+ /// @arg instInfo - The instruction information produced by the table
+ /// generator for this instruction
+ EDInst(llvm::MCInst *inst,
+ uint64_t byteSize,
+ EDDisassembler &disassembler,
+ const InstInfo *instInfo);
+ ~EDInst();
+
+ /// byteSize - returns the number of bytes consumed by the machine code
+ /// representation of the instruction
+ uint64_t byteSize();
+ /// instID - returns the LLVM instruction ID of the instruction
+ unsigned instID();
+
+ /// stringify - populates the String and AsmString members of the instruction,
+ /// returning 0 on success or -1 otherwise
+ int stringify();
+ /// getString - retrieves a pointer to the string representation of the
+ /// instructinon, returning 0 on success or -1 otherwise
+ ///
+ /// @arg str - A reference to a pointer that, on success, is set to point to
+ /// the string representation of the instruction; this string is still owned
+ /// by the instruction and will be deleted when it is
+ int getString(const char *&str);
+
+ /// isBranch - Returns true if the instruction is a branch
+ bool isBranch();
+ /// isMove - Returns true if the instruction is a move
+ bool isMove();
+
+ /// parseOperands - populates the Operands member of the instruction,
+ /// returning 0 on success or -1 otherwise
+ int parseOperands();
+ /// branchTargetID - returns the ID (suitable for use with getOperand()) of
+ /// the target operand if the instruction is a branch, or -1 otherwise
+ int branchTargetID();
+ /// moveSourceID - returns the ID of the source operand if the instruction
+ /// is a move, or -1 otherwise
+ int moveSourceID();
+ /// moveTargetID - returns the ID of the target operand if the instruction
+ /// is a move, or -1 otherwise
+ int moveTargetID();
+
+ /// numOperands - returns the number of operands available to retrieve, or -1
+ /// on error
+ int numOperands();
+ /// getOperand - retrieves an operand from the instruction's operand list by
+ /// index, returning 0 on success or -1 on error
+ ///
+ /// @arg operand - A reference whose target is pointed at the operand on
+ /// success, although the operand is still owned by the EDInst
+ /// @arg index - The index of the operand in the instruction
+ int getOperand(EDOperand *&operand, unsigned int index);
+
+ /// tokenize - populates the Tokens member of the instruction, returning 0 on
+ /// success or -1 otherwise
+ int tokenize();
+ /// numTokens - returns the number of tokens in the instruction, or -1 on
+ /// error
+ int numTokens();
+ /// getToken - retrieves a token from the instruction's token list by index,
+ /// returning 0 on success or -1 on error
+ ///
+ /// @arg token - A reference whose target is pointed at the token on success,
+ /// although the token is still owned by the EDInst
+ /// @arg index - The index of the token in the instrcutino
+ int getToken(EDToken *&token, unsigned int index);
+
+#ifdef __BLOCKS__
+ /// visitTokens - Visits each token in turn and applies a block to it,
+ /// returning 0 if all blocks are visited and/or the block signals
+ /// termination by returning 1; returns -1 on error
+ ///
+ /// @arg visitor - The visitor block to apply to all tokens.
+ int visitTokens(EDTokenVisitor_t visitor);
+#endif
+};
+
+#endif
diff --git a/tools/edis/EDMain.cpp b/tools/edis/EDMain.cpp
new file mode 100644
index 0000000000..c2c179693e
--- /dev/null
+++ b/tools/edis/EDMain.cpp
@@ -0,0 +1,265 @@
+//===-EDMain.cpp - LLVM Enhanced Disassembly C API ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the enhanced disassembler's public C API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "EDOperand.h"
+#include "EDToken.h"
+
+#include "llvm-c/EnhancedDisassembly.h"
+
+int EDGetDisassembler(EDDisassemblerRef *disassembler,
+ const char *triple,
+ EDAssemblySyntax_t syntax) {
+ EDDisassembler::initialize();
+
+ EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple,
+ syntax);
+
+ if (ret) {
+ *disassembler = ret;
+ return 0;
+ }
+ else {
+ return -1;
+ }
+}
+
+int EDGetRegisterName(const char** regName,
+ EDDisassemblerRef disassembler,
+ unsigned regID) {
+ const char* name = disassembler->nameWithRegisterID(regID);
+ if(!name)
+ return -1;
+ *regName = name;
+ return 0;
+}
+
+int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
+ unsigned regID) {
+ return disassembler->registerIsStackPointer(regID) ? 1 : 0;
+}
+
+int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
+ unsigned regID) {
+ return disassembler->registerIsProgramCounter(regID) ? 1 : 0;
+}
+
+unsigned int EDCreateInsts(EDInstRef *insts,
+ unsigned int count,
+ EDDisassemblerRef disassembler,
+ EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg) {
+ unsigned int index;
+
+ for (index = 0; index < count; index++) {
+ EDInst *inst = disassembler->createInst(byteReader, address, arg);
+
+ if(!inst)
+ return index;
+
+ insts[index] = inst;
+ address += inst->byteSize();
+ }
+
+ return count;
+}
+
+void EDReleaseInst(EDInstRef inst) {
+ delete inst;
+}
+
+int EDInstByteSize(EDInstRef inst) {
+ return inst->byteSize();
+}
+
+int EDGetInstString(const char **buf,
+ EDInstRef inst) {
+ return inst->getString(*buf);
+}
+
+int EDInstID(unsigned *instID, EDInstRef inst) {
+ *instID = inst->instID();
+ return 0;
+}
+
+int EDInstIsBranch(EDInstRef inst) {
+ return inst->isBranch();
+}
+
+int EDInstIsMove(EDInstRef inst) {
+ return inst->isMove();
+}
+
+int EDBranchTargetID(EDInstRef inst) {
+ return inst->branchTargetID();
+}
+
+int EDMoveSourceID(EDInstRef inst) {
+ return inst->moveSourceID();
+}
+
+int EDMoveTargetID(EDInstRef inst) {
+ return inst->moveTargetID();
+}
+
+int EDNumTokens(EDInstRef inst) {
+ return inst->numTokens();
+}
+
+int EDGetToken(EDTokenRef *token,
+ EDInstRef inst,
+ int index) {
+ return inst->getToken(*token, index);
+}
+
+int EDGetTokenString(const char **buf,
+ EDTokenRef token) {
+ return token->getString(*buf);
+}
+
+int EDOperandIndexForToken(EDTokenRef token) {
+ return token->operandID();
+}
+
+int EDTokenIsWhitespace(EDTokenRef token) {
+ if(token->type() == EDToken::kTokenWhitespace)
+ return 1;
+ else
+ return 0;
+}
+
+int EDTokenIsPunctuation(EDTokenRef token) {
+ if(token->type() == EDToken::kTokenPunctuation)
+ return 1;
+ else
+ return 0;
+}
+
+int EDTokenIsOpcode(EDTokenRef token) {
+ if(token->type() == EDToken::kTokenOpcode)
+ return 1;
+ else
+ return 0;
+}
+
+int EDTokenIsLiteral(EDTokenRef token) {
+ if(token->type() == EDToken::kTokenLiteral)
+ return 1;
+ else
+ return 0;
+}
+
+int EDTokenIsRegister(EDTokenRef token) {
+ if(token->type() == EDToken::kTokenRegister)
+ return 1;
+ else
+ return 0;
+}
+
+int EDTokenIsNegativeLiteral(EDTokenRef token) {
+ if(token->type() != EDToken::kTokenLiteral)
+ return -1;
+
+ return token->literalSign();
+}
+
+int EDLiteralTokenAbsoluteValue(uint64_t *value,
+ EDTokenRef token) {
+ if(token->type() != EDToken::kTokenLiteral)
+ return -1;
+
+ return token->literalAbsoluteValue(*value);
+}
+
+int EDRegisterTokenValue(unsigned *registerID,
+ EDTokenRef token) {
+ if(token->type() != EDToken::kTokenRegister)
+ return -1;
+
+ return token->registerID(*registerID);
+}
+
+int EDNumOperands(EDInstRef inst) {
+ return inst->numOperands();
+}
+
+int EDGetOperand(EDOperandRef *operand,
+ EDInstRef inst,
+ int index) {
+ return inst->getOperand(*operand, index);
+}
+
+int EDEvaluateOperand(uint64_t *result,
+ EDOperandRef operand,
+ EDRegisterReaderCallback regReader,
+ void *arg) {
+ return operand->evaluate(*result, regReader, arg);
+}
+
+#ifdef __BLOCKS__
+
+struct ByteReaderWrapper {
+ EDByteBlock_t byteBlock;
+};
+
+static int readerWrapperCallback(uint8_t *byte,
+ uint64_t address,
+ void *arg) {
+ struct ByteReaderWrapper *wrapper = (struct ByteReaderWrapper *)arg;
+ return wrapper->byteBlock(byte, address);
+}
+
+unsigned int EDBlockCreateInsts(EDInstRef *insts,
+ int count,
+ EDDisassemblerRef disassembler,
+ EDByteBlock_t byteBlock,
+ uint64_t address) {
+ struct ByteReaderWrapper wrapper;
+ wrapper.byteBlock = byteBlock;
+
+ return EDCreateInsts(insts,
+ count,
+ disassembler,
+ readerWrapperCallback,
+ address,
+ (void*)&wrapper);
+}
+
+int EDBlockEvaluateOperand(uint64_t *result,
+ EDOperandRef operand,
+ EDRegisterBlock_t regBlock) {
+ return operand->evaluate(*result, regBlock);
+}
+
+int EDBlockVisitTokens(EDInstRef inst,
+ EDTokenVisitor_t visitor) {
+ return inst->visitTokens(visitor);
+}
+
+#else
+
+extern "C" unsigned int EDBlockCreateInsts() {
+ return 0;
+}
+
+extern "C" int EDBlockEvaluateOperand() {
+ return -1;
+}
+
+extern "C" int EDBlockVisitTokens() {
+ return -1;
+}
+
+#endif
diff --git a/tools/edis/EDOperand.cpp b/tools/edis/EDOperand.cpp
new file mode 100644
index 0000000000..c15860affd
--- /dev/null
+++ b/tools/edis/EDOperand.cpp
@@ -0,0 +1,148 @@
+//===-EDOperand.cpp - LLVM Enhanced Disassembler --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's operand class. The
+// operand is responsible for allowing evaluation given a particular register
+// context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "EDOperand.h"
+
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+EDOperand::EDOperand(const EDDisassembler &disassembler,
+ const EDInst &inst,
+ unsigned int opIndex,
+ unsigned int &mcOpIndex) :
+ Disassembler(disassembler),
+ Inst(inst),
+ OpIndex(opIndex),
+ MCOpIndex(mcOpIndex) {
+ unsigned int numMCOperands = 0;
+
+ if(Disassembler.Key.Arch == Triple::x86 ||
+ Disassembler.Key.Arch == Triple::x86_64) {
+ uint8_t operandFlags = inst.ThisInstInfo->operandFlags[opIndex];
+
+ if (operandFlags & kOperandFlagImmediate) {
+ numMCOperands = 1;
+ }
+ else if (operandFlags & kOperandFlagRegister) {
+ numMCOperands = 1;
+ }
+ else if (operandFlags & kOperandFlagMemory) {
+ if (operandFlags & kOperandFlagPCRelative) {
+ numMCOperands = 1;
+ }
+ else {
+ numMCOperands = 5;
+ }
+ }
+ else if (operandFlags & kOperandFlagEffectiveAddress) {
+ numMCOperands = 4;
+ }
+ }
+
+ mcOpIndex += numMCOperands;
+}
+
+EDOperand::~EDOperand() {
+}
+
+int EDOperand::evaluate(uint64_t &result,
+ EDRegisterReaderCallback callback,
+ void *arg) {
+ if (Disassembler.Key.Arch == Triple::x86 ||
+ Disassembler.Key.Arch == Triple::x86_64) {
+ uint8_t operandFlags = Inst.ThisInstInfo->operandFlags[OpIndex];
+
+ if (operandFlags & kOperandFlagImmediate) {
+ result = Inst.Inst->getOperand(MCOpIndex).getImm();
+ return 0;
+ }
+ if (operandFlags & kOperandFlagRegister) {
+ unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ return callback(&result, reg, arg);
+ }
+ if (operandFlags & kOperandFlagMemory ||
+ operandFlags & kOperandFlagEffectiveAddress){
+ if(operandFlags & kOperandFlagPCRelative) {
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+
+ uint64_t ripVal;
+
+ // TODO fix how we do this
+
+ if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg))
+ return -1;
+
+ result = ripVal + displacement;
+ return 0;
+ }
+ else {
+ unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
+ unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
+ //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
+
+ uint64_t addr = 0;
+
+ if(baseReg) {
+ uint64_t baseVal;
+ if (callback(&baseVal, baseReg, arg))
+ return -1;
+ addr += baseVal;
+ }
+
+ if(indexReg) {
+ uint64_t indexVal;
+ if (callback(&indexVal, indexReg, arg))
+ return -1;
+ addr += (scaleAmount * indexVal);
+ }
+
+ addr += displacement;
+
+ result = addr;
+ return 0;
+ }
+ }
+ return -1;
+ }
+
+ return -1;
+}
+
+#ifdef __BLOCKS__
+struct RegisterReaderWrapper {
+ EDRegisterBlock_t regBlock;
+};
+
+int readerWrapperCallback(uint64_t *value,
+ unsigned regID,
+ void *arg) {
+ struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg;
+ return wrapper->regBlock(value, regID);
+}
+
+int EDOperand::evaluate(uint64_t &result,
+ EDRegisterBlock_t regBlock) {
+ struct RegisterReaderWrapper wrapper;
+ wrapper.regBlock = regBlock;
+ return evaluate(result,
+ readerWrapperCallback,
+ (void*)&wrapper);
+}
+#endif
diff --git a/tools/edis/EDOperand.h b/tools/edis/EDOperand.h
new file mode 100644
index 0000000000..32d3a5ef83
--- /dev/null
+++ b/tools/edis/EDOperand.h
@@ -0,0 +1,65 @@
+//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// operand class. The operand is responsible for allowing evaluation given a
+// particular register context.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef EDOperand_
+#define EDOperand_
+
+#include "llvm-c/EnhancedDisassembly.h"
+
+/// EDOperand - Encapsulates a single operand, which can be evaluated by the
+/// client
+struct EDOperand {
+ /// The parent disassembler
+ const EDDisassembler &Disassembler;
+ /// The parent instruction
+ const EDInst &Inst;
+
+ /// The index of the operand in the EDInst
+ unsigned int OpIndex;
+ /// The index of the first component of the operand in the MCInst
+ unsigned int MCOpIndex;
+
+ /// Constructor - Initializes an EDOperand
+ ///
+ /// @arg disassembler - The disassembler responsible for the operand
+ /// @arg inst - The instruction containing this operand
+ /// @arg opIndex - The index of the operand in inst
+ /// @arg mcOpIndex - The index of the operand in the original MCInst
+ EDOperand(const EDDisassembler &disassembler,
+ const EDInst &inst,
+ unsigned int opIndex,
+ unsigned int &mcOpIndex);
+ ~EDOperand();
+
+ /// evaluate - Returns the numeric value of an operand to the extent possible,
+ /// returning 0 on success or -1 if there was some problem (such as a
+ /// register not being readable)
+ ///
+ /// @arg result - A reference whose target is filled in with the value of
+ /// the operand (the address if it is a memory operand)
+ /// @arg callback - A function to call to obtain register values
+ /// @arg arg - An opaque argument to pass to callback
+ int evaluate(uint64_t &result,
+ EDRegisterReaderCallback callback,
+ void *arg);
+
+#ifdef __BLOCKS__
+ /// evaluate - Like evaluate for a callback, but uses a block instead
+ int evaluate(uint64_t &result,
+ EDRegisterBlock_t regBlock);
+#endif
+};
+
+#endif
diff --git a/tools/edis/EDToken.cpp b/tools/edis/EDToken.cpp
new file mode 100644
index 0000000000..6b67e624e8
--- /dev/null
+++ b/tools/edis/EDToken.cpp
@@ -0,0 +1,185 @@
+//===-EDToken.cpp - LLVM Enhanced Disassembler ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembler library's token class. The
+// token is responsible for vending information about the token, such as its
+// type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDToken.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+using namespace llvm;
+
+EDToken::EDToken(StringRef str,
+ enum tokenType type,
+ uint64_t localType,
+ EDDisassembler &disassembler) :
+ Disassembler(disassembler),
+ Str(str),
+ Type(type),
+ LocalType(localType),
+ OperandID(-1) {
+}
+
+EDToken::~EDToken() {
+}
+
+void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) {
+ Type = kTokenLiteral;
+ LiteralSign = sign;
+ LiteralAbsoluteValue = absoluteValue;
+}
+
+void EDToken::makeRegister(unsigned registerID) {
+ Type = kTokenRegister;
+ RegisterID = registerID;
+}
+
+void EDToken::setOperandID(int operandID) {
+ OperandID = operandID;
+}
+
+enum EDToken::tokenType EDToken::type() const {
+ return Type;
+}
+
+uint64_t EDToken::localType() const {
+ return LocalType;
+}
+
+StringRef EDToken::string() const {
+ return Str;
+}
+
+int EDToken::operandID() const {
+ return OperandID;
+}
+
+int EDToken::literalSign() const {
+ if(Type != kTokenLiteral)
+ return -1;
+ return (LiteralSign ? 1 : 0);
+}
+
+int EDToken::literalAbsoluteValue(uint64_t &value) const {
+ if(Type != kTokenLiteral)
+ return -1;
+ value = LiteralAbsoluteValue;
+ return 0;
+}
+
+int EDToken::registerID(unsigned &registerID) const {
+ if(Type != kTokenRegister)
+ return -1;
+ registerID = RegisterID;
+ return 0;
+}
+
+int EDToken::tokenize(std::vector<EDToken*> &tokens,
+ std::string &str,
+ const char *operandOrder,
+ EDDisassembler &disassembler) {
+ SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
+ SmallVector<AsmToken, 10> asmTokens;
+
+ disassembler.parseInst(parsedOperands, asmTokens, str);
+
+ SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
+ unsigned int operandIndex;
+ SmallVectorImpl<AsmToken>::iterator tokenIterator;
+
+ operandIterator = parsedOperands.begin();
+ operandIndex = 0;
+
+ bool readOpcode = false;
+
+ for (tokenIterator = asmTokens.begin();
+ tokenIterator != asmTokens.end();
+ ++tokenIterator) {
+ SMLoc tokenLoc = tokenIterator->getLoc();
+
+ while (operandIterator != parsedOperands.end() &&
+ tokenLoc.getPointer() >
+ (*operandIterator)->getEndLoc().getPointer()) {
+ ++operandIterator;
+ ++operandIndex;
+ }
+
+ EDToken *token;
+
+ switch (tokenIterator->getKind()) {
+ case AsmToken::Identifier:
+ if (!readOpcode) {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenOpcode,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+ readOpcode = true;
+ break;
+ }
+ // any identifier that isn't an opcode is mere punctuation; so we fall
+ // through
+ default:
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenPunctuation,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+ break;
+ case AsmToken::Integer:
+ {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenLiteral,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+
+ int64_t intVal = tokenIterator->getIntVal();
+
+ if(intVal < 0)
+ token->makeLiteral(true, -intVal);
+ else
+ token->makeLiteral(false, intVal);
+ break;
+ }
+ case AsmToken::Register:
+ {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenLiteral,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+
+ token->makeRegister((unsigned)tokenIterator->getRegVal());
+ break;
+ }
+ }
+
+ if(operandIterator != parsedOperands.end() &&
+ tokenLoc.getPointer() >=
+ (*operandIterator)->getStartLoc().getPointer()) {
+ token->setOperandID(operandOrder[operandIndex]);
+ }
+
+ tokens.push_back(token);
+ }
+
+ return 0;
+}
+
+int EDToken::getString(const char*& buf) {
+ if(PermStr.length() == 0) {
+ PermStr = Str.str();
+ }
+ buf = PermStr.c_str();
+ return 0;
+}
diff --git a/tools/edis/EDToken.h b/tools/edis/EDToken.h
new file mode 100644
index 0000000000..e4ae91f7ec
--- /dev/null
+++ b/tools/edis/EDToken.h
@@ -0,0 +1,135 @@
+//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's token
+// class. The token is responsible for vending information about the token,
+// such as its type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef EDToken_
+#define EDToken_
+
+#include "llvm-c/EnhancedDisassembly.h"
+#include "llvm/ADT/StringRef.h"
+
+#include <string>
+#include <vector>
+
+/// EDToken - Encapsulates a single token, which can provide a string
+/// representation of itself or interpret itself in various ways, depending
+/// on the token type.
+struct EDToken {
+ enum tokenType {
+ kTokenWhitespace,
+ kTokenOpcode,
+ kTokenLiteral,
+ kTokenRegister,
+ kTokenPunctuation
+ };
+
+ /// The parent disassembler
+ EDDisassembler &Disassembler;
+
+ /// The token's string representation
+ llvm::StringRef Str;
+ /// The token's string representation, but in a form suitable for export
+ std::string PermStr;
+ /// The type of the token, as exposed through the external API
+ enum tokenType Type;
+ /// The type of the token, as recorded by the syntax-specific tokenizer
+ uint64_t LocalType;
+ /// The operand corresponding to the token, or (unsigned int)-1 if not
+ /// part of an operand.
+ int OperandID;
+
+ /// The sign if the token is a literal (1 if negative, 0 otherwise)
+ bool LiteralSign;
+ /// The absolute value if the token is a literal
+ uint64_t LiteralAbsoluteValue;
+ /// The LLVM register ID if the token is a register name
+ unsigned RegisterID;
+
+ /// Constructor - Initializes an EDToken with the information common to all
+ /// tokens
+ ///
+ /// @arg str - The string corresponding to the token
+ /// @arg type - The token's type as exposed through the public API
+ /// @arg localType - The token's type as recorded by the tokenizer
+ /// @arg disassembler - The disassembler responsible for the token
+ EDToken(llvm::StringRef str,
+ enum tokenType type,
+ uint64_t localType,
+ EDDisassembler &disassembler);
+
+ /// makeLiteral - Adds the information specific to a literal
+ /// @arg sign - The sign of the literal (1 if negative, 0
+ /// otherwise)
+ ///
+ /// @arg absoluteValue - The absolute value of the literal
+ void makeLiteral(bool sign, uint64_t absoluteValue);
+ /// makeRegister - Adds the information specific to a register
+ ///
+ /// @arg registerID - The LLVM register ID
+ void makeRegister(unsigned registerID);
+
+ /// setOperandID - Links the token to a numbered operand
+ ///
+ /// @arg operandID - The operand ID to link to
+ void setOperandID(int operandID);
+
+ ~EDToken();
+
+ /// type - Returns the public type of the token
+ enum tokenType type() const;
+ /// localType - Returns the tokenizer-specific type of the token
+ uint64_t localType() const;
+ /// string - Returns the string representation of the token
+ llvm::StringRef string() const;
+ /// operandID - Returns the operand ID of the token
+ int operandID() const;
+
+ /// literalSign - Returns the sign of the token
+ /// (1 if negative, 0 if positive or unsigned, -1 if it is not a literal)
+ int literalSign() const;
+ /// literalAbsoluteValue - Retrieves the absolute value of the token, and
+ /// returns -1 if the token is not a literal
+ /// @arg value - A reference to a value that is filled in with the absolute
+ /// value, if it is valid
+ int literalAbsoluteValue(uint64_t &value) const;
+ /// registerID - Retrieves the register ID of the token, and returns -1 if the
+ /// token is not a register
+ ///
+ /// @arg registerID - A reference to a value that is filled in with the
+ /// register ID, if it is valid
+ int registerID(unsigned &registerID) const;
+
+ /// tokenize - Tokenizes a string using the platform- and syntax-specific
+ /// tokenizer, and returns 0 on success (-1 on failure)
+ ///
+ /// @arg tokens - A vector that will be filled in with pointers to
+ /// allocated tokens
+ /// @arg str - The string, as outputted by the AsmPrinter
+ /// @arg operandOrder - The order of the operands from the operandFlags array
+ /// as they appear in str
+ /// @arg disassembler - The disassembler for the desired target and
+ // assembly syntax
+ static int tokenize(std::vector<EDToken*> &tokens,
+ std::string &str,
+ const char *operandOrder,
+ EDDisassembler &disassembler);
+
+ /// getString - Directs a character pointer to the string, returning 0 on
+ /// success (-1 on failure)
+ /// @arg buf - A reference to a pointer that is set to point to the string.
+ /// The string is still owned by the token.
+ int getString(const char*& buf);
+};
+
+#endif
diff --git a/tools/edis/EnhancedDisassembly.exports b/tools/edis/EnhancedDisassembly.exports
new file mode 100644
index 0000000000..63738e5366
--- /dev/null
+++ b/tools/edis/EnhancedDisassembly.exports
@@ -0,0 +1,31 @@
+_EDGetDisassembler
+_EDGetRegisterName
+_EDRegisterIsStackPointer
+_EDRegisterIsProgramCounter
+_EDCreateInsts
+_EDReleaseInst
+_EDInstByteSize
+_EDGetInstString
+_EDInstIsBranch
+_EDInstIsMove
+_EDBranchTargetID
+_EDMoveSourceID
+_EDMoveTargetID
+_EDNumTokens
+_EDGetToken
+_EDGetTokenString
+_EDOperandIndexForToken
+_EDTokenIsWhitespace
+_EDTokenIsPunctuation
+_EDTokenIsOpcode
+_EDTokenIsLiteral
+_EDTokenIsRegister
+_EDTokenIsNegativeLiteral
+_EDLiteralTokenAbsoluteValue
+_EDRegisterTokenValue
+_EDNumOperands
+_EDGetOperand
+_EDEvaluateOperand
+_EDBlockCreateInsts
+_EDBlockEvaluateOperand
+_EDBlockVisitTokens
diff --git a/tools/edis/Makefile b/tools/edis/Makefile
new file mode 100644
index 0000000000..a3c5879668
--- /dev/null
+++ b/tools/edis/Makefile
@@ -0,0 +1,55 @@
+##===- tools/ed/Makefile -----------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = EnhancedDisassembly
+
+BUILT_SOURCES = EDInfo.inc
+
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_LIBS_IN_SHARED = 1
+SHARED_LIBRARY = 1
+
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) x86asmprinter x86disassembler
+
+include $(LEVEL)/Makefile.common
+
+ifeq ($(HOST_OS),Darwin)
+ # set dylib internal version number to llvmCore submission number
+ ifdef LLVM_SUBMIT_VERSION
+ LLVMLibsOptions := $(LLVMLibsOptions) -Wl,-current_version \
+ -Wl,$(LLVM_SUBMIT_VERSION).$(LLVM_SUBMIT_SUBVERSION) \
+ -Wl,-compatibility_version -Wl,1
+ endif
+ # extra options to override libtool defaults
+ LLVMLibsOptions := $(LLVMLibsOptions) \
+ -avoid-version \
+ -Wl,-exported_symbols_list -Wl,$(PROJ_SRC_DIR)/EnhancedDisassembly.exports \
+ -Wl,-dead_strip \
+ -Wl,-seg1addr -Wl,0xE0000000
+
+ # Mac OS X 10.4 and earlier tools do not allow a second -install_name on command line
+ DARWIN_VERS := $(shell echo $(TARGET_TRIPLE) | sed 's/.*darwin\([0-9]*\).*/\1/')
+ ifneq ($(DARWIN_VERS),8)
+ LLVMLibsOptions := $(LLVMLibsOptions) \
+ -no-undefined -Wl,-install_name \
+ -Wl,"@executable_path/../lib/lib$(LIBRARYNAME)$(SHLIBEXT)"
+ endif
+endif
+
+EDInfo.inc: $(TBLGEN)
+ $(Echo) "Building semantic information header"
+ $(Verb) $(TableGen) -o $(call SYSPATH, $@) -gen-enhanced-disassembly-header /dev/null
+
+clean::
+ -$(Verb) $(RM) -f EDInfo.inc