diff options
-rw-r--r-- | include/llvm/MC/MCAtom.h | 174 | ||||
-rw-r--r-- | include/llvm/MC/MCFunction.h | 122 | ||||
-rw-r--r-- | include/llvm/MC/MCInstrAnalysis.h | 11 | ||||
-rw-r--r-- | include/llvm/MC/MCModule.h | 93 | ||||
-rw-r--r-- | include/llvm/MC/MCObjectDisassembler.h | 69 | ||||
-rw-r--r-- | include/llvm/Support/StringRefMemoryObject.h | 42 | ||||
-rw-r--r-- | lib/MC/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/MC/MCAtom.cpp | 121 | ||||
-rw-r--r-- | lib/MC/MCFunction.cpp | 55 | ||||
-rw-r--r-- | lib/MC/MCInstrAnalysis.cpp | 9 | ||||
-rw-r--r-- | lib/MC/MCModule.cpp | 79 | ||||
-rw-r--r-- | lib/MC/MCObjectDisassembler.cpp | 216 | ||||
-rw-r--r-- | lib/Support/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Support/StringRefMemoryObject.cpp | 34 | ||||
-rw-r--r-- | lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp | 10 | ||||
-rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 9 | ||||
-rw-r--r-- | tools/llvm-objdump/CMakeLists.txt | 1 | ||||
-rw-r--r-- | tools/llvm-objdump/MCFunction.cpp | 138 | ||||
-rw-r--r-- | tools/llvm-objdump/MCFunction.h | 100 | ||||
-rw-r--r-- | tools/llvm-objdump/MachODump.cpp | 334 | ||||
-rw-r--r-- | tools/llvm-objdump/llvm-objdump.cpp | 89 | ||||
-rw-r--r-- | tools/llvm-objdump/llvm-objdump.h | 21 |
22 files changed, 1047 insertions, 683 deletions
diff --git a/include/llvm/MC/MCAtom.h b/include/llvm/MC/MCAtom.h index ae5bf0bc20..6a937986fd 100644 --- a/include/llvm/MC/MCAtom.h +++ b/include/llvm/MC/MCAtom.h @@ -1,4 +1,4 @@ -//===-- llvm/MC/MCAtom.h - MCAtom class ---------------------*- C++ -*-===// +//===-- llvm/MC/MCAtom.h ----------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -9,7 +9,7 @@ // // This file contains the declaration of the MCAtom class, which is used to // represent a contiguous region in a decoded object that is uniformly data or -// instructions; +// instructions. // //===----------------------------------------------------------------------===// @@ -24,45 +24,169 @@ namespace llvm { class MCModule; -/// MCData - An entry in a data MCAtom. -// NOTE: This may change to a more complex type in the future. -typedef uint8_t MCData; +class MCAtom; +class MCTextAtom; +class MCDataAtom; /// MCAtom - Represents a contiguous range of either instructions (a TextAtom) /// or data (a DataAtom). Address ranges are expressed as _closed_ intervals. class MCAtom { - friend class MCModule; - typedef enum { TextAtom, DataAtom } AtomType; - - AtomType Type; +public: + virtual ~MCAtom() {} + + enum AtomKind { TextAtom, DataAtom }; + AtomKind getKind() const { return Kind; } + + /// \brief Get the start address of the atom. + uint64_t getBeginAddr() const { return Begin; } + /// \brief Get the end address, i.e. the last one inside the atom. + uint64_t getEndAddr() const { return End; } + + /// \name Atom modification methods: + /// When modifying a TextAtom, keep instruction boundaries in mind. + /// For instance, split must me given the start address of an instruction. + /// @{ + + /// \brief Splits the atom in two at a given address. + /// \param SplitPt Address at which to start a new atom, splitting this one. + /// \returns The newly created atom starting at \p SplitPt. + virtual MCAtom *split(uint64_t SplitPt) = 0; + + /// \brief Truncates an atom, discarding everything after \p TruncPt. + /// \param TruncPt Last byte address to be contained in this atom. + virtual void truncate(uint64_t TruncPt) = 0; + /// @} + + /// \name Naming: + /// + /// This is mostly for display purposes, and may contain anything that hints + /// at what the atom contains: section or symbol name, BB start address, .. + /// @{ + StringRef getName() const { return Name; } + void setName(StringRef NewName) { Name = NewName.str(); } + /// @} + +protected: + const AtomKind Kind; + std::string Name; MCModule *Parent; uint64_t Begin, End; - std::vector<std::pair<uint64_t, MCInst> > Text; - std::vector<MCData> Data; + friend class MCModule; + MCAtom(AtomKind K, MCModule *P, uint64_t B, uint64_t E) + : Kind(K), Name("(unknown)"), Parent(P), Begin(B), End(E) { } + + /// \name Atom remapping helpers + /// @{ + + /// \brief Remap the atom, using the given range, updating Begin/End. + /// One or both of the bounds can remain the same, but overlapping with other + /// atoms in the module is still forbidden. + void remap(uint64_t NewBegin, uint64_t NewEnd); + + /// \brief Remap the atom to prepare for a truncation at TruncPt. + /// Equivalent to: + /// \code + /// // Bound checks + /// remap(Begin, TruncPt); + /// \endcode + void remapForTruncate(uint64_t TruncPt); + + /// \brief Remap the atom to prepare for a split at SplitPt. + /// The bounds for the resulting atoms are returned in {L,R}{Begin,End}. + /// The current atom is truncated to \p LEnd. + void remapForSplit(uint64_t SplitPt, + uint64_t &LBegin, uint64_t &LEnd, + uint64_t &RBegin, uint64_t &REnd); + /// @} +}; - // Private constructor - only callable by MCModule - MCAtom(AtomType T, MCModule *P, uint64_t B, uint64_t E) - : Type(T), Parent(P), Begin(B), End(E) { } +/// \name Text atom +/// @{ + +/// \brief An entry in an MCTextAtom: a disassembled instruction. +/// NOTE: Both the Address and Size field are actually redundant when taken in +/// the context of the text atom, and may better be exposed in an iterator +/// instead of stored in the atom, which would replace this class. +class MCDecodedInst { +public: + MCInst Inst; + uint64_t Address; + uint64_t Size; + MCDecodedInst(const MCInst &Inst, uint64_t Address, uint64_t Size) + : Inst(Inst), Address(Address), Size(Size) {} +}; + +/// \brief An atom consisting of disassembled instructions. +class MCTextAtom : public MCAtom { +private: + typedef std::vector<MCDecodedInst> InstListTy; + InstListTy Insts; + /// \brief The address of the next appended instruction, i.e., the + /// address immediately after the last instruction in the atom. + uint64_t NextInstAddress; public: - bool isTextAtom() const { return Type == TextAtom; } - bool isDataAtom() const { return Type == DataAtom; } + /// Append an instruction, expanding the atom if necessary. + void addInst(const MCInst &Inst, uint64_t Size); + + /// \name Instruction list access + /// @{ + typedef InstListTy::const_iterator const_iterator; + const_iterator begin() const { return Insts.begin(); } + const_iterator end() const { return Insts.end(); } + + const MCDecodedInst &back() const { return Insts.back(); } + const MCDecodedInst &at(size_t n) const { return Insts.at(n); } + uint64_t size() const { return Insts.size(); } + /// @} + + /// \name Atom type specific split/truncate logic. + /// @{ + MCTextAtom *split(uint64_t SplitPt) LLVM_OVERRIDE; + void truncate(uint64_t TruncPt) LLVM_OVERRIDE; + /// @} + + // Class hierarchy. + static bool classof(const MCAtom *A) { return A->getKind() == TextAtom; } +private: + friend class MCModule; + // Private constructor - only callable by MCModule + MCTextAtom(MCModule *P, uint64_t Begin, uint64_t End) + : MCAtom(TextAtom, P, Begin, End), NextInstAddress(Begin) {} +}; +/// @} + +/// \name Data atom +/// @{ + +/// \brief An entry in an MCDataAtom. +// NOTE: This may change to a more complex type in the future. +typedef uint8_t MCData; - void addInst(const MCInst &I, uint64_t Address, unsigned Size); +/// \brief An atom consising of a sequence of bytes. +class MCDataAtom : public MCAtom { + std::vector<MCData> Data; + +public: + /// Append a data entry, expanding the atom if necessary. void addData(const MCData &D); - /// split - Splits the atom in two at a given address, which must align with - /// and instruction boundary if this is a TextAtom. Returns the newly created - /// atom representing the high part of the split. - MCAtom *split(uint64_t SplitPt); + /// \name Atom type specific split/truncate logic. + /// @{ + MCDataAtom *split(uint64_t SplitPt) LLVM_OVERRIDE; + void truncate(uint64_t TruncPt) LLVM_OVERRIDE; + /// @} - /// truncate - Truncates an atom so that TruncPt is the last byte address - /// contained in the atom. - void truncate(uint64_t TruncPt); + // Class hierarchy. + static bool classof(const MCAtom *A) { return A->getKind() == DataAtom; } +private: + friend class MCModule; + // Private constructor - only callable by MCModule + MCDataAtom(MCModule *P, uint64_t Begin, uint64_t End) + : MCAtom(DataAtom, P, Begin, End), Data(End - Begin) {} }; } #endif - diff --git a/include/llvm/MC/MCFunction.h b/include/llvm/MC/MCFunction.h new file mode 100644 index 0000000000..b85011eda7 --- /dev/null +++ b/include/llvm/MC/MCFunction.h @@ -0,0 +1,122 @@ +//===-- llvm/MC/MCFunction.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the data structures to hold a CFG reconstructed from +// machine code. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCFUNCTION_H +#define LLVM_MC_MCFUNCTION_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include <string> +#include <vector> + +namespace llvm { + +class MCFunction; +class MCModule; +class MCTextAtom; + +/// \brief Basic block containing a sequence of disassembled instructions. +/// The basic block is backed by an MCTextAtom, which holds the instructions, +/// and the address range it covers. +/// Create a basic block using MCFunction::createBlock. +class MCBasicBlock { + const MCTextAtom *Insts; + + // MCFunction owns the basic block. + MCFunction *Parent; + friend class MCFunction; + MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent); + + /// \name Predecessors/Successors, to represent the CFG. + /// @{ + typedef std::vector<const MCBasicBlock *> BasicBlockListTy; + BasicBlockListTy Successors; + BasicBlockListTy Predecessors; + /// @} +public: + + /// \brief Get the backing MCTextAtom, containing the instruction sequence. + const MCTextAtom *getInsts() const { return Insts; } + + /// \name Get the owning MCFunction. + /// @{ + const MCFunction *getParent() const { return Parent; } + MCFunction *getParent() { return Parent; } + /// @} + + /// MC CFG access: Predecessors/Successors. + /// @{ + typedef BasicBlockListTy::const_iterator succ_const_iterator; + succ_const_iterator succ_begin() const { return Successors.begin(); } + succ_const_iterator succ_end() const { return Successors.end(); } + + typedef BasicBlockListTy::const_iterator pred_const_iterator; + pred_const_iterator pred_begin() const { return Predecessors.begin(); } + pred_const_iterator pred_end() const { return Predecessors.end(); } + + void addSuccessor(const MCBasicBlock *MCBB); + bool isSuccessor(const MCBasicBlock *MCBB) const; + + void addPredecessor(const MCBasicBlock *MCBB); + bool isPredecessor(const MCBasicBlock *MCBB) const; + /// @} +}; + +/// \brief Represents a function in machine code, containing MCBasicBlocks. +/// MCFunctions are created using MCModule::createFunction. +class MCFunction { + MCFunction (const MCFunction&) LLVM_DELETED_FUNCTION; + MCFunction& operator=(const MCFunction&) LLVM_DELETED_FUNCTION; + + std::string Name; + typedef std::vector<MCBasicBlock*> BasicBlockListTy; + BasicBlockListTy Blocks; + + // MCModule owns the function. + friend class MCModule; + MCFunction(StringRef Name); +public: + ~MCFunction(); + + /// \brief Create an MCBasicBlock backed by Insts and add it to this function. + /// \param Insts Sequence of straight-line code backing the basic block. + /// \returns The newly created basic block. + MCBasicBlock &createBlock(const MCTextAtom &Insts); + + StringRef getName() const { return Name; } + + /// \name Access to the function's basic blocks. No ordering is enforced. + /// @{ + /// \brief Get the entry point basic block. + const MCBasicBlock *getEntryBlock() const { return front(); } + MCBasicBlock *getEntryBlock() { return front(); } + + // NOTE: Dereferencing iterators gives pointers, so maybe a list is best here. + typedef BasicBlockListTy::const_iterator const_iterator; + typedef BasicBlockListTy:: iterator iterator; + const_iterator begin() const { return Blocks.begin(); } + iterator begin() { return Blocks.begin(); } + const_iterator end() const { return Blocks.end(); } + iterator end() { return Blocks.end(); } + + const MCBasicBlock* front() const { return Blocks.front(); } + MCBasicBlock* front() { return Blocks.front(); } + const MCBasicBlock* back() const { return Blocks.back(); } + MCBasicBlock* back() { return Blocks.back(); } + /// @} +}; + +} + +#endif diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h index acad6336ac..17bfd1582a 100644 --- a/include/llvm/MC/MCInstrAnalysis.h +++ b/include/llvm/MC/MCInstrAnalysis.h @@ -52,10 +52,15 @@ public: return Info->get(Inst.getOpcode()).isReturn(); } + virtual bool isTerminator(const MCInst &Inst) const { + return Info->get(Inst.getOpcode()).isTerminator(); + } + /// evaluateBranch - Given a branch instruction try to get the address the - /// branch targets. Otherwise return -1. - virtual uint64_t - evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size) const; + /// branch targets. Return true on success, and the address in Target. + virtual bool + evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const; }; } diff --git a/include/llvm/MC/MCModule.h b/include/llvm/MC/MCModule.h index 755fa025fb..02f8ca05b4 100644 --- a/include/llvm/MC/MCModule.h +++ b/include/llvm/MC/MCModule.h @@ -15,44 +15,93 @@ #ifndef LLVM_MC_MCMODULE_H #define LLVM_MC_MCMODULE_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IntervalMap.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" +#include <vector> namespace llvm { class MCAtom; +class MCDataAtom; +class MCFunction; +class MCObjectDisassembler; +class MCTextAtom; -/// MCModule - This class represent a completely disassembled object file or -/// executable. It comprises a list of MCAtom's, and a branch target table. -/// Each atom represents a contiguous range of either instructions or data. +/// \brief A completely disassembled object file or executable. +/// It comprises a list of MCAtom's, each representing a contiguous range of +/// either instructions or data. +/// An MCModule is created using MCObjectDisassembler::buildModule. class MCModule { - /// AtomAllocationTracker - An MCModule owns its component MCAtom's, so it - /// must track them in order to ensure they are properly freed as atoms are - /// merged or otherwise manipulated. - SmallPtrSet<MCAtom*, 8> AtomAllocationTracker; + /// \name Atom tracking + /// @{ - /// OffsetMap - Efficiently maps offset ranges to MCAtom's. - IntervalMap<uint64_t, MCAtom*> OffsetMap; - - /// BranchTargetMap - Maps offsets that are determined to be branches and - /// can be statically resolved to their target offsets. - DenseMap<uint64_t, MCAtom*> BranchTargetMap; + /// \brief Atoms in this module, sorted by begin address. + /// FIXME: This doesn't handle overlapping atoms (which happen when a basic + /// block starts in the middle of an instruction of another basic block.) + typedef std::vector<MCAtom*> AtomListTy; + AtomListTy Atoms; friend class MCAtom; - - /// remap - Update the interval mapping for an MCAtom. + /// \brief Remap \p Atom to the given range, and update its Begin/End fields. + /// \param Atom An atom belonging to this module. + /// An atom should always use this method to update its bounds, because this + /// enables the owning MCModule to keep track of its atoms. void remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd); + /// \brief Insert an atom in the module, using its Begin and End addresses. + void map(MCAtom *NewAtom); + /// @} + + /// \name Function tracking + /// @{ + typedef std::vector<MCFunction*> FunctionListTy; + FunctionListTy Functions; + /// @} + + MCModule (const MCModule &) LLVM_DELETED_FUNCTION; + MCModule& operator=(const MCModule &) LLVM_DELETED_FUNCTION; + + // MCObjectDisassembler creates MCModules. + friend class MCObjectDisassembler; + MCModule() : Atoms() { } + public: - MCModule(IntervalMap<uint64_t, MCAtom*>::Allocator &A) : OffsetMap(A) { } + ~MCModule(); - /// createAtom - Creates a new MCAtom covering the specified offset range. - MCAtom *createAtom(MCAtom::AtomType Type, uint64_t Begin, uint64_t End); + /// \name Create a new MCAtom covering the specified offset range. + /// @{ + MCTextAtom *createTextAtom(uint64_t Begin, uint64_t End); + MCDataAtom *createDataAtom(uint64_t Begin, uint64_t End); + /// @} + + /// \name Access to the owned atom list, ordered by begin address. + /// @{ + const MCAtom *findAtomContaining(uint64_t Addr) const; + MCAtom *findAtomContaining(uint64_t Addr); + + typedef AtomListTy::const_iterator const_atom_iterator; + typedef AtomListTy:: iterator atom_iterator; + const_atom_iterator atom_begin() const { return Atoms.begin(); } + atom_iterator atom_begin() { return Atoms.begin(); } + const_atom_iterator atom_end() const { return Atoms.end(); } + atom_iterator atom_end() { return Atoms.end(); } + /// @} + + /// \name Create a new MCFunction. + MCFunction *createFunction(const StringRef &Name); + + /// \name Access to the owned function list. + /// @{ + typedef FunctionListTy::const_iterator const_func_iterator; + typedef FunctionListTy:: iterator func_iterator; + const_func_iterator func_begin() const { return Functions.begin(); } + func_iterator func_begin() { return Functions.begin(); } + const_func_iterator func_end() const { return Functions.end(); } + func_iterator func_end() { return Functions.end(); } + /// @} }; } #endif - diff --git a/include/llvm/MC/MCObjectDisassembler.h b/include/llvm/MC/MCObjectDisassembler.h new file mode 100644 index 0000000000..749a54e7f4 --- /dev/null +++ b/include/llvm/MC/MCObjectDisassembler.h @@ -0,0 +1,69 @@ +//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MCObjectDisassembler class, which +// can be used to construct an MCModule and an MC CFG from an ObjectFile. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H +#define LLVM_MC_MCOBJECTDISASSEMBLER_H + +namespace llvm { + +namespace object { + class ObjectFile; +} + +class MCBasicBlock; +class MCDisassembler; +class MCFunction; +class MCInstrAnalysis; +class MCModule; + +/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions. +/// This class builds on MCDisassembler to disassemble whole sections, creating +/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data). +/// It can also be used to create a control flow graph consisting of MCFunctions +/// and MCBasicBlocks. +class MCObjectDisassembler { + const object::ObjectFile &Obj; + const MCDisassembler &Dis; + const MCInstrAnalysis &MIA; + +public: + MCObjectDisassembler(const object::ObjectFile &Obj, + const MCDisassembler &Dis, + const MCInstrAnalysis &MIA); + + /// \brief Build an MCModule, creating atoms and optionally functions. + /// \param withCFG Also build a CFG by adding MCFunctions to the Module. + /// If withCFG is false, the MCModule built only contains atoms, representing + /// what was found in the object file. If withCFG is true, MCFunctions are + /// created, containing MCBasicBlocks. All text atoms are split to form basic + /// block atoms, which then each back an MCBasicBlock. + MCModule *buildModule(bool withCFG = false); + +private: + /// \brief Fill \p Module by creating an atom for each section. + /// This could be made much smarter, using information like symbols, but also + /// format-specific features, like mach-o function_start or data_in_code LCs. + void buildSectionAtoms(MCModule *Module); + + /// \brief Enrich \p Module with a CFG consisting of MCFunctions. + /// \param Module An MCModule returned by buildModule, with no CFG. + /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom. + /// When the CFG is built, contiguous instructions that were previously in a + /// single MCTextAtom will be split in multiple basic block atoms. + void buildCFG(MCModule *Module); +}; + +} + +#endif diff --git a/include/llvm/Support/StringRefMemoryObject.h b/include/llvm/Support/StringRefMemoryObject.h new file mode 100644 index 0000000000..a0ef35a9e1 --- /dev/null +++ b/include/llvm/Support/StringRefMemoryObject.h @@ -0,0 +1,42 @@ +//===- llvm/Support/StringRefMemoryObject.h ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the StringRefMemObject class, a simple +// wrapper around StringRef implementing the MemoryObject interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_STRINGREFMEMORYOBJECT_H +#define LLVM_SUPPORT_STRINGREFMEMORYOBJECT_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryObject.h" + +namespace llvm { + +/// StringRefMemoryObject - Simple StringRef-backed MemoryObject +class StringRefMemoryObject : public MemoryObject { + StringRef Bytes; + uint64_t Base; +public: + StringRefMemoryObject(StringRef Bytes, uint64_t Base = 0) + : Bytes(Bytes), Base(Base) {} + + uint64_t getBase() const { return Base; } + uint64_t getExtent() const { return Bytes.size(); } + + int readByte(uint64_t Addr, uint8_t *Byte) const; + int readBytes(uint64_t Addr, uint64_t Size, + uint8_t *Buf, uint64_t *Copied) const; + +}; + +} + +#endif diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 5377c5c8d8..89e2aaf48b 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_library(LLVMMC MCELF.cpp MCELFObjectTargetWriter.cpp MCELFStreamer.cpp + MCFunction.cpp MCExpr.cpp MCExternalSymbolizer.cpp MCInst.cpp @@ -26,6 +27,7 @@ add_llvm_library(LLVMMC MCModule.cpp MCNullStreamer.cpp MCObjectFileInfo.cpp + MCObjectDisassembler.cpp MCObjectStreamer.cpp MCObjectSymbolizer.cpp MCObjectWriter.cpp diff --git a/lib/MC/MCAtom.cpp b/lib/MC/MCAtom.cpp index d71444324f..2626b39db4 100644 --- a/lib/MC/MCAtom.cpp +++ b/lib/MC/MCAtom.cpp @@ -10,88 +10,101 @@ #include "llvm/MC/MCAtom.h" #include "llvm/MC/MCModule.h" #include "llvm/Support/ErrorHandling.h" +#include <iterator> using namespace llvm; -void MCAtom::addInst(const MCInst &I, uint64_t Address, unsigned Size) { - assert(Type == TextAtom && "Trying to add MCInst to a non-text atom!"); - - assert(Address < End+Size && - "Instruction not contiguous with end of atom!"); - if (Address > End) - Parent->remap(this, Begin, End+Size); - - Text.push_back(std::make_pair(Address, I)); +void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) { + Parent->remap(this, NewBegin, NewEnd); } -void MCAtom::addData(const MCData &D) { - assert(Type == DataAtom && "Trying to add MCData to a non-data atom!"); - Parent->remap(this, Begin, End+1); - - Data.push_back(D); +void MCAtom::remapForTruncate(uint64_t TruncPt) { + assert((TruncPt >= Begin && TruncPt < End) && + "Truncation point not contained in atom!"); + remap(Begin, TruncPt); } -MCAtom *MCAtom::split(uint64_t SplitPt) { +void MCAtom::remapForSplit(uint64_t SplitPt, + uint64_t &LBegin, uint64_t &LEnd, + uint64_t &RBegin, uint64_t &REnd) { assert((SplitPt > Begin && SplitPt <= End) && "Splitting at point not contained in atom!"); // Compute the new begin/end points. - uint64_t LeftBegin = Begin; - uint64_t LeftEnd = SplitPt - 1; - uint64_t RightBegin = SplitPt; - uint64_t RightEnd = End; + LBegin = Begin; + LEnd = SplitPt - 1; + RBegin = SplitPt; + REnd = End; // Remap this atom to become the lower of the two new ones. - Parent->remap(this, LeftBegin, LeftEnd); + remap(LBegin, LEnd); +} - // Create a new atom for the higher atom. - MCAtom *RightAtom = Parent->createAtom(Type, RightBegin, RightEnd); +// MCDataAtom - // Split the contents of the original atom between it and the new one. The - // precise method depends on whether this is a data or a text atom. - if (isDataAtom()) { - std::vector<MCData>::iterator I = Data.begin() + (RightBegin - LeftBegin); +void MCDataAtom::addData(const MCData &D) { + Data.push_back(D); + if (Data.size() > Begin - End) + remap(Begin, End + 1); +} - assert(I != Data.end() && "Split point not found in range!"); +void MCDataAtom::truncate(uint64_t TruncPt) { + remapForTruncate(TruncPt); - std::copy(I, Data.end(), RightAtom->Data.end()); - Data.erase(I, Data.end()); - } else if (isTextAtom()) { - std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin(); + Data.resize(TruncPt - Begin + 1); +} - while (I != Text.end() && I->first < SplitPt) ++I; +MCDataAtom *MCDataAtom::split(uint64_t SplitPt) { + uint64_t LBegin, LEnd, RBegin, REnd; + remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd); - assert(I != Text.end() && "Split point not found in disassembly!"); - assert(I->first == SplitPt && - "Split point does not fall on instruction boundary!"); + MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd); + RightAtom->setName(getName()); - std::copy(I, Text.end(), RightAtom->Text.end()); - Text.erase(I, Text.end()); - } else - llvm_unreachable("Unknown atom type!"); + std::vector<MCData>::iterator I = Data.begin() + (RBegin - LBegin); + assert(I != Data.end() && "Split point not found in range!"); + std::copy(I, Data.end(), std::back_inserter(RightAtom->Data)); + Data.erase(I, Data.end()); return RightAtom; } -void MCAtom::truncate(uint64_t TruncPt) { - assert((TruncPt >= Begin && TruncPt < End) && - "Truncation point not contained in atom!"); +// MCTextAtom - Parent->remap(this, Begin, TruncPt); +void MCTextAtom::addInst(const MCInst &I, uint64_t Size) { + if (NextInstAddress > End) + remap(Begin, NextInstAddress); + Insts.push_back(MCDecodedInst(I, NextInstAddress, Size)); + NextInstAddress += Size; +} - if (isDataAtom()) { - Data.resize(TruncPt - Begin + 1); - } else if (isTextAtom()) { - std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin(); +void MCTextAtom::truncate(uint64_t TruncPt) { + remapForTruncate(TruncPt); - while (I != Text.end() && I->first <= TruncPt) ++I; + InstListTy::iterator I = Insts.begin(); + while (I != Insts.end() && I->Address <= TruncPt) ++I; - assert(I != Text.end() && "Truncation point not found in disassembly!"); - assert(I->first == TruncPt+1 && - "Truncation point does not fall on instruction boundary"); + assert(I != Insts.end() && "Truncation point not found in disassembly!"); + assert(I->Address == TruncPt + 1 && + "Truncation point does not fall on instruction boundary"); - Text.erase(I, Text.end()); - } else - llvm_unreachable("Unknown atom type!"); + Insts.erase(I, Insts.end()); } +MCTextAtom *MCTextAtom::split(uint64_t SplitPt) { + uint64_t LBegin, LEnd, RBegin, REnd; + remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd); + + MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd); + RightAtom->setName(getName()); + + InstListTy::iterator I = Insts.begin(); + while (I != Insts.end() && I->Address < SplitPt) ++I; + assert(I != Insts.end() && "Split point not found in disassembly!"); + assert(I->Address == SplitPt && + "Split point does not fall on instruction boundary!"); + + std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts)); + Insts.erase(I, Insts.end()); + return RightAtom; +} diff --git a/lib/MC/MCFunction.cpp b/lib/MC/MCFunction.cpp new file mode 100644 index 0000000000..2665d3e167 --- /dev/null +++ b/lib/MC/MCFunction.cpp @@ -0,0 +1,55 @@ +//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCFunction.h" +#include "llvm/MC/MCAtom.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +using namespace llvm; + +// MCFunction + +MCFunction::MCFunction(StringRef Name) + : Name(Name) +{} + +MCFunction::~MCFunction() { + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; +} + +MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) { + Blocks.push_back(new MCBasicBlock(TA, this)); + return *Blocks.back(); +} + +// MCBasicBlock + +MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent) + : Insts(&Insts), Parent(Parent) +{} + +void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) { + Successors.push_back(MCBB); +} + +bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const { + return std::find(Successors.begin(), Successors.end(), + MCBB) != Successors.end(); +} + +void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) { + Predecessors.push_back(MCBB); +} + +bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const { + return std::find(Predecessors.begin(), Predecessors.end(), + MCBB) != Predecessors.end(); +} diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp index 7736702f35..2d8336d77a 100644 --- a/lib/MC/MCInstrAnalysis.cpp +++ b/lib/MC/MCInstrAnalysis.cpp @@ -10,12 +10,13 @@ #include "llvm/MC/MCInstrAnalysis.h" using namespace llvm; -uint64_t MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size) const { +bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target) const { if (Inst.getNumOperands() == 0 || Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL) - return -1ULL; + return false; int64_t Imm = Inst.getOperand(0).getImm(); - return Addr+Size+Imm; + Target = Addr+Size+Imm; + return true; } diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp index f563160833..50bac476fa 100644 --- a/lib/MC/MCModule.cpp +++ b/lib/MC/MCModule.cpp @@ -7,39 +7,92 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAtom.h" #include "llvm/MC/MCModule.h" +#include "llvm/MC/MCAtom.h" +#include "llvm/MC/MCFunction.h" +#include <algorithm> using namespace llvm; -MCAtom *MCModule::createAtom(MCAtom::AtomType Type, - uint64_t Begin, uint64_t End) { +static bool AtomComp(const MCAtom *L, uint64_t Addr) { + return L->getEndAddr() < Addr; +} + +void MCModule::map(MCAtom *NewAtom) { + uint64_t Begin = NewAtom->Begin, + End = NewAtom->End; + assert(Begin < End && "Creating MCAtom with endpoints reversed?"); // Check for atoms already covering this range. - IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Begin); - assert((!I.valid() || I.start() < End) && "Offset range already occupied!"); + AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(), + Begin, AtomComp); + assert((I == atom_end() || (*I)->getBeginAddr() > End) + && "Offset range already occupied!"); - // Create the new atom and add it to our maps. - MCAtom *NewAtom = new MCAtom(Type, this, Begin, End); - AtomAllocationTracker.insert(NewAtom); - OffsetMap.insert(Begin, End, NewAtom); + // Insert the new atom to the list. + Atoms.insert(I, NewAtom); +} + +MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) { + MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End); + map(NewAtom); + return NewAtom; +} + +MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) { + MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End); + map(NewAtom); return NewAtom; } // remap - Update the interval mapping for an atom. void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) { // Find and erase the old mapping. - IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Atom->Begin); - assert(I.valid() && "Atom offset not found in module!"); + AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(), + Atom->Begin, AtomComp); + assert(I != atom_end() && "Atom offset not found in module!"); assert(*I == Atom && "Previous atom mapping was invalid!"); - I.erase(); + Atoms.erase(I); // Insert the new mapping. - OffsetMap.insert(NewBegin, NewEnd, Atom); + AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(), + NewBegin, AtomComp); + Atoms.insert(NewI, Atom); // Update the atom internal bounds. Atom->Begin = NewBegin; Atom->End = NewEnd; } +const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const { + AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(), + Addr, AtomComp); + if (I != atom_end() && (*I)->getBeginAddr() <= Addr) + return *I; + return 0; +} + +MCAtom *MCModule::findAtomContaining(uint64_t Addr) { + AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(), + Addr, AtomComp); + if (I != atom_end() && (*I)->getBeginAddr() <= Addr) + return *I; + return 0; +} + +MCFunction *MCModule::createFunction(const StringRef &Name) { + Functions.push_back(new MCFunction(Name)); + return Functions.back(); +} + +MCModule::~MCModule() { + for (AtomListTy::iterator AI = atom_begin(), + AE = atom_end(); + AI != AE; ++AI) + delete *AI; + for (FunctionListTy::iterator FI = func_begin(), + FE = func_end(); + FI != FE; ++FI) + delete *FI; +} diff --git a/lib/MC/MCObjectDisassembler.cpp b/lib/MC/MCObjectDisassembler.cpp new file mode 100644 index 0000000000..bb3de1779e --- /dev/null +++ b/lib/MC/MCObjectDisassembler.cpp @@ -0,0 +1,216 @@ +//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectDisassembler.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAtom.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFunction.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCModule.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/StringRefMemoryObject.h" +#include "llvm/Support/raw_ostream.h" +#include <map> +#include <set> + +using namespace llvm; +using namespace object; + +MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj, + const MCDisassembler &Dis, + const MCInstrAnalysis &MIA) + : Obj(Obj), Dis(Dis), MIA(MIA) {} + +MCModule *MCObjectDisassembler::buildModule(bool withCFG) { + MCModule *Module = new MCModule; + buildSectionAtoms(Module); + if (withCFG) + buildCFG(Module); + return Module; +} + +void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) { + error_code ec; + for (section_iterator SI = Obj.begin_sections(), + SE = Obj.end_sections(); + SI != SE; + SI.increment(ec)) { + if (ec) break; + + bool isText; SI->isText(isText); + bool isData; SI->isData(isData); + if (!isData && !isText) + continue; + + uint64_t StartAddr; SI->getAddress(StartAddr); + uint64_t SecSize; SI->getSize(SecSize); + if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize) + continue; + + StringRef Contents; SI->getContents(Contents); + StringRefMemoryObject memoryObject(Contents); + + // We don't care about things like non-file-backed sections yet. + if (Contents.size() != SecSize || !SecSize) + continue; + uint64_t EndAddr = StartAddr + SecSize - 1; + + StringRef SecName; SI->getName(SecName); + + if (isText) { + MCTextAtom *Text = Module->createTextAtom(StartAddr, EndAddr); + Text->setName(SecName); + uint64_t InstSize; + for (uint64_t Index = 0; Index < SecSize; Index += InstSize) { + MCInst Inst; + if (Dis.getInstruction(Inst, InstSize, memoryObject, Index, + nulls(), nulls())) + Text->addInst(Inst, InstSize); + else + // We don't care about splitting mixed atoms either. + llvm_unreachable("Couldn't disassemble instruction in atom."); + } + + } else { + MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr); + Data->setName(SecName); + for (uint64_t Index = 0; Index < SecSize; ++Index) + Data->addData(Contents[Index]); + } + } +} + +namespace { + struct BBInfo; + typedef std::set<BBInfo*> BBInfoSetTy; + + struct BBInfo { + MCTextAtom *Atom; + MCBasicBlock *BB; + BBInfoSetTy Succs; + BBInfoSetTy Preds; + + void addSucc(BBInfo &Succ) { + Succs.insert(&Succ); + Succ.Preds.insert(this); + } + }; +} + +void MCObjectDisassembler::buildCFG(MCModule *Module) { + typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy; + BBInfoByAddrTy BBInfos; + typedef std::set<uint64_t> AddressSetTy; + AddressSetTy Splits; + AddressSetTy Calls; + + assert(Module->func_begin() == Module->func_end() + && "Module already has a CFG!"); + + // First, determine the basic block boundaries and call targets. + for (MCModule::atom_iterator AI = Module->atom_begin(), + AE = Module->atom_end(); + AI != AE; ++AI) { + MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); + if (!TA) continue; + Calls.insert(TA->getBeginAddr()); + for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); + II != IE; ++II) { + if (MIA.isTerminator(II->Inst)) + Splits.insert(II->Address + II->Size); + uint64_t Target; + if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) { + if (MIA.isCall(II->Inst)) + Calls.insert(Target); + Splits.insert(Target); + } + } + } + + // Split text atoms into basic block atoms. + for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end(); + SI != SE; ++SI) { + MCAtom *A = Module->findAtomContaining(*SI); + if (!A) continue; + MCTextAtom *TA = cast<MCTextAtom>(A); + BBInfos[TA->getBeginAddr()].Atom = TA; + if (TA->getBeginAddr() == *SI) + continue; + MCTextAtom *NewAtom = TA->split(*SI); + BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom; + StringRef BBName = TA->getName(); + BBName = BBName.substr(0, BBName.find_last_of(':')); + NewAtom->setName((BBName + ":" + utohexstr(*SI)).str()); + } + + // Compute succs/preds. + for (MCModule::atom_iterator AI = Module->atom_begin(), + AE = Module->atom_end(); + AI != AE; ++AI) { + MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); + if (!TA) continue; + BBInfo &CurBB = BBInfos[TA->getBeginAddr()]; + const MCDecodedInst &LI = TA->back(); + if (MIA.isBranch(LI.Inst)) { + uint64_t Target; + if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target)) + CurBB.addSucc(BBInfos[Target]); + if (MIA.isConditionalBranch(LI.Inst)) + CurBB.addSucc(BBInfos[LI.Address + LI.Size]); + } else if (!MIA.isTerminator(LI.Inst)) + CurBB.addSucc(BBInfos[LI.Address + LI.Size]); + } + + + // Create functions and basic blocks. + for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end(); + CI != CE; ++CI) { + BBInfo &BBI = BBInfos[*CI]; + if (!BBI.Atom) continue; + + MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName()); + + // Create MCBBs. + SmallSetVector<BBInfo*, 16> Worklist; + Worklist.insert(&BBI); + for (size_t WI = 0; WI < Worklist.size(); ++WI) { + BBInfo *BBI = Worklist[WI]; + if (!BBI->Atom) + continue; + BBI->BB = &MCFN.createBlock(*BBI->Atom); + // Add all predecessors and successors to the worklist. + for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); + SI != SE; ++SI) + Worklist.insert(*SI); + for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); + PI != PE; ++PI) + Worklist.insert(*PI); + } + + // Set preds/succs. + for (size_t WI = 0; WI < Worklist.size(); ++WI) { + BBInfo *BBI = Worklist[WI]; + MCBasicBlock *MCBB = BBI->BB; + if (!MCBB) + continue; + for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); + SI != SE; ++SI) + MCBB->addSuccessor((*SI)->BB); + for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); + PI != PE; ++PI) + MCBB->addPredecessor((*PI)->BB); + } + } +} diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index fbc38183bc..6cc8ab236a 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -48,6 +48,7 @@ add_llvm_library(LLVMSupport StringMap.cpp StringPool.cpp StringRef.cpp + StringRefMemoryObject.cpp SystemUtils.cpp Timer.cpp ToolOutputFile.cpp diff --git a/lib/Support/StringRefMemoryObject.cpp b/lib/Support/StringRefMemoryObject.cpp new file mode 100644 index 0000000000..5db11e918c --- /dev/null +++ b/lib/Support/StringRefMemoryObject.cpp @@ -0,0 +1,34 @@ +//===- lib/Support/StringRefMemoryObject.cpp --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/StringRefMemoryObject.h" + +using namespace llvm; + +int StringRefMemoryObject::readByte(uint64_t Addr, uint8_t *Byte) const { + if (Addr >= Base + getExtent() || Addr < Base) + return -1; + *Byte = Bytes[Addr - Base]; + return 0; +} + +int StringRefMemoryObject::readBytes(uint64_t Addr, + uint64_t Size, + uint8_t *Buf, + uint64_t *Copied) const { + if (Addr >= Base + getExtent() || Addr < Base) + return -1; + uint64_t Offset = Addr - Base; + if (Size > getExtent() - Offset) + Size = getExtent() - Offset; + memcpy(Buf, Bytes.data() + Offset, Size); + if (Copied) + *Copied = Size; + return 0; +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index eeec608820..48d48190fd 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -136,17 +136,17 @@ public: return MCInstrAnalysis::isConditionalBranch(Inst); } - uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size) const { + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target) const { unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0; // FIXME: We only handle PCRel branches for now. if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType != MCOI::OPERAND_PCREL) - return -1ULL; + return false; int64_t Imm = Inst.getOperand(LblOperand).getImm(); - - return Addr + Imm; + Target = Addr + Imm; + return true; } }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 52fc28d11d..c092801a67 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -240,15 +240,16 @@ public: return MCInstrAnalysis::isConditionalBranch(Inst); } - uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size) const { + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target) const { // We only handle PCRel branches for now. if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL) - return -1ULL; + return false; int64_t Imm = Inst.getOperand(0).getImm(); // FIXME: This is not right for thumb. - return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes. + Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes. + return true; } }; diff --git a/tools/llvm-objdump/CMakeLists.txt b/tools/llvm-objdump/CMakeLists.txt index 0c49d0b457..e983ec92fb 100644 --- a/tools/llvm-objdump/CMakeLists.txt +++ b/tools/llvm-objdump/CMakeLists.txt @@ -12,5 +12,4 @@ add_llvm_tool(llvm-objdump COFFDump.cpp ELFDump.cpp MachODump.cpp - MCFunction.cpp ) diff --git a/tools/llvm-objdump/MCFunction.cpp b/tools/llvm-objdump/MCFunction.cpp deleted file mode 100644 index 5c67f1b70a..0000000000 --- a/tools/llvm-objdump/MCFunction.cpp +++ /dev/null @@ -1,138 +0,0 @@ -//===-- MCFunction.cpp ----------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the algorithm to break down a region of machine code -// into basic blocks and try to reconstruct a CFG from it. -// -//===----------------------------------------------------------------------===// - -#include "MCFunction.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCInstrAnalysis.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/Support/MemoryObject.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" -#include <set> -using namespace llvm; - -MCFunction -MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, - const MemoryObject &Region, uint64_t Start, - uint64_t End, const MCInstrAnalysis *Ana, - raw_ostream &DebugOut, - SmallVectorImpl<uint64_t> &Calls) { - std::vector<MCDecodedInst> Instructions; - std::set<uint64_t> Splits; - Splits.insert(Start); - uint64_t Size; - - MCFunction f(Name); - - { - DenseSet<uint64_t> VisitedInsts; - SmallVector<uint64_t, 16> WorkList; - WorkList.push_back(Start); - // Disassemble code and gather basic block split points. - while (!WorkList.empty()) { - uint64_t Index = WorkList.pop_back_val(); - if (VisitedInsts.find(Index) != VisitedInsts.end()) - continue; // Already visited this location. - - for (;Index < End; Index += Size) { - VisitedInsts.insert(Index); - - MCInst Inst; - if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){ - Instructions.push_back(MCDecodedInst(Index, Size, Inst)); - if (Ana->isBranch(Inst)) { - uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); - if (targ != -1ULL && targ == Index+Size) - continue; // Skip nop jumps. - - // If we could determine the branch target, make a note to start a - // new basic block there and add the target to the worklist. - if (targ != -1ULL) { - Splits.insert(targ); - WorkList.push_back(targ); - WorkList.push_back(Index+Size); - } - Splits.insert(Index+Size); - break; - } else if (Ana->isReturn(Inst)) { - // Return instruction. This basic block ends here. - Splits.insert(Index+Size); - break; - } else if (Ana->isCall(Inst)) { - uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); - // Add the call to the call list if the destination is known. - if (targ != -1ULL && targ != Index+Size) - Calls.push_back(targ); - } - } else { - errs().write_hex(Index) << ": warning: invalid instruction encoding\n"; - if (Size == 0) - Size = 1; // skip illegible bytes - } - } - } - } - - // Make sure the instruction list is sorted. - std::sort(Instructions.begin(), Instructions.end()); - - // Create basic blocks. - unsigned ii = 0, ie = Instructions.size(); - for (std::set<uint64_t>::iterator spi = Splits.begin(), - spe = llvm::prior(Splits.end()); spi != spe; ++spi) { - MCBasicBlock BB; - uint64_t BlockEnd = *llvm::next(spi); - // Add instructions to the BB. - for (; ii != ie; ++ii) { - if (Instructions[ii].Address < *spi || - Instructions[ii].Address >= BlockEnd) - break; - BB.addInst(Instructions[ii]); - } - f.addBlock(*spi, BB); - } - - std::sort(f.Blocks.begin(), f.Blocks.end()); - - // Calculate successors of each block. - for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { - MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second); - if (BB.getInsts().empty()) continue; - const MCDecodedInst &Inst = BB.getInsts().back(); - - if (Ana->isBranch(Inst.Inst)) { - uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size); - if (targ == -1ULL) { - // Indirect branch. Bail and add all blocks of the function as a - // successor. - for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) - BB.addSucc(i->first); - } else if (targ != Inst.Address+Inst.Size) - BB.addSucc(targ); - // Conditional branches can also fall through to the next block. - if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e) - BB.addSucc(llvm::next(i)->first); - } else { - // No branch. Fall through to the next block. - if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e) - BB.addSucc(llvm::next(i)->first); - } - } - - return f; -} diff --git a/tools/llvm-objdump/MCFunction.h b/tools/llvm-objdump/MCFunction.h deleted file mode 100644 index 6d3a548d48..0000000000 --- a/tools/llvm-objdump/MCFunction.h +++ /dev/null @@ -1,100 +0,0 @@ -//===-- MCFunction.h ------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the data structures to hold a CFG reconstructed from -// machine code. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_OBJECTDUMP_MCFUNCTION_H -#define LLVM_OBJECTDUMP_MCFUNCTION_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/MC/MCInst.h" -#include <map> - -namespace llvm { - -class MCDisassembler; -class MCInstrAnalysis; -class MemoryObject; -class raw_ostream; - -/// MCDecodedInst - Small container to hold an MCInst and associated info like -/// address and size. -struct MCDecodedInst { - uint64_t Address; - uint64_t Size; - MCInst Inst; - - MCDecodedInst() {} - MCDecodedInst(uint64_t Address, uint64_t Size, MCInst Inst) - : Address(Address), Size(Size), Inst(Inst) {} - - bool operator<(const MCDecodedInst &RHS) const { - return Address < RHS.Address; - } -}; - -/// MCBasicBlock - Consists of multiple MCDecodedInsts and a list of successing -/// MCBasicBlocks. -class MCBasicBlock { - std::vector<MCDecodedInst> Insts; - typedef DenseSet<uint64_t> SetTy; - SetTy Succs; -public: - ArrayRef<MCDecodedInst> getInsts() const { return Insts; } - - typedef SetTy::const_iterator succ_iterator; - succ_iterator succ_begin() const { return Succs.begin(); } - succ_iterator succ_end() const { return Succs.end(); } - - bool contains(uint64_t Addr) const { return Succs.count(Addr); } - - void addInst(const MCDecodedInst &Inst) { Insts.push_back(Inst); } - void addSucc(uint64_t Addr) { Succs.insert(Addr); } - - bool operator<(const MCBasicBlock &RHS) const { - return Insts.size() < RHS.Insts.size(); - } -}; - -/// MCFunction - Represents a named function in machine code, containing -/// multiple MCBasicBlocks. -class MCFunction { - const StringRef Name; - // Keep BBs sorted by address. - typedef std::vector<std::pair<uint64_t, MCBasicBlock> > MapTy; - MapTy Blocks; -public: - MCFunction(StringRef Name) : Name(Name) {} - - // Create an MCFunction from a region of binary machine code. - static MCFunction - createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, - const MemoryObject &Region, uint64_t Start, uint64_t End, - const MCInstrAnalysis *Ana, raw_ostream &DebugOut, - SmallVectorImpl<uint64_t> &Calls); - - typedef MapTy::const_iterator iterator; - iterator begin() const { return Blocks.begin(); } - iterator end() const { return Blocks.end(); } - - StringRef getName() const { return Name; } - - MCBasicBlock &addBlock(uint64_t Address, const MCBasicBlock &BB) { - Blocks.push_back(std::make_pair(Address, BB)); - return Blocks.back().second; - } -}; - -} - -#endif diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index c7e5cc1ede..03a383eb12 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm-objdump.h" -#include "MCFunction.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/MC/MCAsmInfo.h" @@ -44,10 +44,6 @@ using namespace llvm; using namespace object; static cl::opt<bool> - CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and" - " write it to a graphviz file (MachO-only)")); - -static cl::opt<bool> UseDbg("g", cl::desc("Print line information from debug info if available")); static cl::opt<std::string> @@ -91,99 +87,6 @@ struct SymbolSorter { } }; -// Print additional information about an address, if available. -static void DumpAddress(uint64_t Address, ArrayRef<SectionRef> Sections, - const MachOObjectFile *MachOObj, raw_ostream &OS) { - for (unsigned i = 0; i != Sections.size(); ++i) { - uint64_t SectAddr = 0, SectSize = 0; - Sections[i].getAddress(SectAddr); - Sections[i].getSize(SectSize); - uint64_t addr = SectAddr; - if (SectAddr <= Address && - SectAddr + SectSize > Address) { - StringRef bytes, name; - Sections[i].getContents(bytes); - Sections[i].getName(name); - // Print constant strings. - if (!name.compare("__cstring")) - OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"'; - // Print constant CFStrings. - if (!name.compare("__cfstring")) - OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"'; - } - } -} - -typedef std::map<uint64_t, MCFunction*> FunctionMapTy; -typedef SmallVector<MCFunction, 16> FunctionListTy; -static void createMCFunctionAndSaveCalls(StringRef Name, - const MCDisassembler *DisAsm, - MemoryObject &Object, uint64_t Start, - uint64_t End, - MCInstrAnalysis *InstrAnalysis, - uint64_t Address, - raw_ostream &DebugOut, - FunctionMapTy &FunctionMap, - FunctionListTy &Functions) { - SmallVector<uint64_t, 16> Calls; - MCFunction f = - MCFunction::createFunctionFromMC(Name, DisAsm, Object, Start, End, - InstrAnalysis, DebugOut, Calls); - Functions.push_back(f); - FunctionMap[Address] = &Functions.back(); - - // Add the gathered callees to the map. - for (unsigned i = 0, e = Calls.size(); i != e; ++i) - FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0)); -} - -// Write a graphviz file for the CFG inside an MCFunction. -static void emitDOTFile(const char *FileName, const MCFunction &f, - MCInstPrinter *IP) { - // Start a new dot file. - std::string Error; - raw_fd_ostream Out(FileName, Error); - if (!Error.empty()) { - errs() << "llvm-objdump: warning: " << Error << '\n'; - return; - } - - Out << "digraph " << f.getName() << " {\n"; - Out << "graph [ rankdir = \"LR\" ];\n"; - for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { - bool hasPreds = false; - // Only print blocks that have predecessors. - // FIXME: Slow. - for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; - ++pi) - if (pi->second.contains(i->first)) { - hasPreds = true; - break; - } - - if (!hasPreds && i != f.begin()) - continue; - - Out << '"' << i->first << "\" [ label=\"<a>"; - // Print instructions. - for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie; - ++ii) { - // Escape special chars and print the instruction in mnemonic form. - std::string Str; - raw_string_ostream OS(Str); - IP->printInst(&i->second.getInsts()[ii].Inst, OS, ""); - Out << DOT::EscapeString(OS.str()) << '|'; - } - Out << "<o>\" shape=\"record\" ];\n"; - - // Add edges. - for (MCBasicBlock::succ_iterator si = i->second.succ_begin(), - se = i->second.succ_end(); si != se; ++si) - Out << i->first << ":o -> " << *si <<":a\n"; - } - Out << "}\n"; -} - static void getSectionsAndSymbols(const macho::Header Header, MachOObjectFile *MachOObj, @@ -272,6 +175,12 @@ static void DisassembleInputMachO2(StringRef Filename, macho::Header Header = MachOOF->getHeader(); + // FIXME: FoundFns isn't used anymore. Using symbols/LC_FUNCTION_STARTS to + // determine function locations will eventually go in MCObjectDisassembler. + // FIXME: Using the -cfg command line option, this code used to be able to + // annotate relocations with the referenced symbol's name, and if this was + // inside a __[cf]string section, the data it points to. This is now replaced + // by the upcoming MCSymbolizer, which needs the appropriate setup done above. std::vector<SectionRef> Sections; std::vector<SymbolRef> Symbols; SmallVector<uint64_t, 8> FoundFns; @@ -308,31 +217,24 @@ static void DisassembleInputMachO2(StringRef Filename, diContext.reset(DIContext::getDWARFContext(DbgObj)); } - FunctionMapTy FunctionMap; - FunctionListTy Functions; - for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { + + bool SectIsText = false; + Sections[SectIdx].isText(SectIsText); + if (SectIsText == false) + continue; + StringRef SectName; if (Sections[SectIdx].getName(SectName) || SectName != "__text") continue; // Skip non-text sections DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl(); + StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR); if (SegmentName != "__TEXT") continue; - // Insert the functions from the function starts segment into our map. - uint64_t VMAddr; - Sections[SectIdx].getAddress(VMAddr); - for (unsigned i = 0, e = FoundFns.size(); i != e; ++i) { - StringRef SectBegin; - Sections[SectIdx].getContents(SectBegin); - uint64_t Offset = (uint64_t)SectBegin.data(); - FunctionMap.insert(std::make_pair(VMAddr + FoundFns[i]-Offset, - (MCFunction*)0)); - } - StringRef Bytes; Sections[SectIdx].getContents(Bytes); StringRefMemoryObject memoryObject(Bytes); @@ -403,52 +305,39 @@ static void DisassembleInputMachO2(StringRef Filename, symbolTableWorked = true; - if (!CFG) { - // Normal disassembly, print addresses, bytes and mnemonic form. - StringRef SymName; - Symbols[SymIdx].getName(SymName); - - outs() << SymName << ":\n"; - DILineInfo lastLine; - for (uint64_t Index = Start; Index < End; Index += Size) { - MCInst Inst; - - if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, - DebugOut, nulls())) { - uint64_t SectAddress = 0; - Sections[SectIdx].getAddress(SectAddress); - outs() << format("%8" PRIx64 ":\t", SectAddress + Index); - - DumpBytes(StringRef(Bytes.data() + Index, Size)); - IP->printInst(&Inst, outs(), ""); - - // Print debug info. - if (diContext) { - DILineInfo dli = - diContext->getLineInfoForAddress(SectAddress + Index); - // Print valid line info if it changed. - if (dli != lastLine && dli.getLine() != 0) - outs() << "\t## " << dli.getFileName() << ':' - << dli.getLine() << ':' << dli.getColumn(); - lastLine = dli; - } - outs() << "\n"; - } else { - errs() << "llvm-objdump: warning: invalid instruction encoding\n"; - if (Size == 0) - Size = 1; // skip illegible bytes + outs() << SymName << ":\n"; + DILineInfo lastLine; + for (uint64_t Index = Start; Index < End; Index += Size) { + MCInst Inst; + + if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, + DebugOut, nulls())) { + uint64_t SectAddress = 0; + Sections[SectIdx].getAddress(SectAddress); + outs() << format("%8" PRIx64 ":\t", SectAddress + Index); + + DumpBytes(StringRef(Bytes.data() + Index, Size)); + IP->printInst(&Inst, outs(), ""); + + // Print debug info. + if (diContext) { + DILineInfo dli = + diContext->getLineInfoForAddress(SectAddress + Index); + // Print valid line info if it changed. + if (dli != lastLine && dli.getLine() != 0) + outs() << "\t## " << dli.getFileName() << ':' + << dli.getLine() << ':' << dli.getColumn(); + lastLine = dli; } + outs() << "\n"; + } else { + errs() << "llvm-objdump: warning: invalid instruction encoding\n"; + if (Size == 0) + Size = 1; // skip illegible bytes } - } else { - // Create CFG and use it for disassembly. - StringRef SymName; - Symbols[SymIdx].getName(SymName); - createMCFunctionAndSaveCalls( - SymName, DisAsm.get(), memoryObject, Start, End, - InstrAnalysis.get(), Start, DebugOut, FunctionMap, Functions); } } - if (!CFG && !symbolTableWorked) { + if (!symbolTableWorked) { // Reading the symbol table didn't work, disassemble the whole section. uint64_t SectAddress; Sections[SectIdx].getAddress(SectAddress); @@ -471,142 +360,5 @@ static void DisassembleInputMachO2(StringRef Filename, } } } - - if (CFG) { - if (!symbolTableWorked) { - // Reading the symbol table didn't work, create a big __TEXT symbol. - uint64_t SectSize = 0, SectAddress = 0; - Sections[SectIdx].getSize(SectSize); - Sections[SectIdx].getAddress(SectAddress); - createMCFunctionAndSaveCalls("__TEXT", DisAsm.get(), memoryObject, - 0, SectSize, - InstrAnalysis.get(), - SectAddress, DebugOut, - FunctionMap, Functions); - } - for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(), - me = FunctionMap.end(); mi != me; ++mi) - if (mi->second == 0) { - // Create functions for the remaining callees we have gathered, - // but we didn't find a name for them. - uint64_t SectSize = 0; - Sections[SectIdx].getSize(SectSize); - - SmallVector<uint64_t, 16> Calls; - MCFunction f = - MCFunction::createFunctionFromMC("unknown", DisAsm.get(), - memoryObject, mi->first, - SectSize, - InstrAnalysis.get(), DebugOut, - Calls); - Functions.push_back(f); - mi->second = &Functions.back(); - for (unsigned i = 0, e = Calls.size(); i != e; ++i) { - std::pair<uint64_t, MCFunction*> p(Calls[i], (MCFunction*)0); - if (FunctionMap.insert(p).second) - mi = FunctionMap.begin(); - } - } - - DenseSet<uint64_t> PrintedBlocks; - for (unsigned ffi = 0, ffe = Functions.size(); ffi != ffe; ++ffi) { - MCFunction &f = Functions[ffi]; - for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){ - if (!PrintedBlocks.insert(fi->first).second) - continue; // We already printed this block. - - // We assume a block has predecessors when it's the first block after - // a symbol. - bool hasPreds = FunctionMap.find(fi->first) != FunctionMap.end(); - - // See if this block has predecessors. - // FIXME: Slow. - for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; - ++pi) - if (pi->second.contains(fi->first)) { - hasPreds = true; - break; - } - - uint64_t SectSize = 0, SectAddress; - Sections[SectIdx].getSize(SectSize); - Sections[SectIdx].getAddress(SectAddress); - - // No predecessors, this is a data block. Print as .byte directives. - if (!hasPreds) { - uint64_t End = llvm::next(fi) == fe ? SectSize : - llvm::next(fi)->first; - outs() << "# " << End-fi->first << " bytes of data:\n"; - for (unsigned pos = fi->first; pos != End; ++pos) { - outs() << format("%8x:\t", SectAddress + pos); - DumpBytes(StringRef(Bytes.data() + pos, 1)); - outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]); - } - continue; - } - - if (fi->second.contains(fi->first)) // Print a header for simple loops - outs() << "# Loop begin:\n"; - - DILineInfo lastLine; - // Walk over the instructions and print them. - for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie; - ++ii) { - const MCDecodedInst &Inst = fi->second.getInsts()[ii]; - - // If there's a symbol at this address, print its name. - if (FunctionMap.find(SectAddress + Inst.Address) != - FunctionMap.end()) - outs() << FunctionMap[SectAddress + Inst.Address]-> getName() - << ":\n"; - - outs() << format("%8" PRIx64 ":\t", SectAddress + Inst.Address); - DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size)); - - if (fi->second.contains(fi->first)) // Indent simple loops. - outs() << '\t'; - - IP->printInst(&Inst.Inst, outs(), ""); - - // Look for relocations inside this instructions, if there is one - // print its target and additional information if available. - for (unsigned j = 0; j != Relocs.size(); ++j) - if (Relocs[j].first >= SectAddress + Inst.Address && - Relocs[j].first < SectAddress + Inst.Address + Inst.Size) { - StringRef SymName; - uint64_t Addr; - Relocs[j].second.getAddress(Addr); - Relocs[j].second.getName(SymName); - - outs() << "\t# " << SymName << ' '; - DumpAddress(Addr, Sections, MachOOF, outs()); - } - - // If this instructions contains an address, see if we can evaluate - // it and print additional information. - uint64_t targ = InstrAnalysis->evaluateBranch(Inst.Inst, - Inst.Address, - Inst.Size); - if (targ != -1ULL) - DumpAddress(targ, Sections, MachOOF, outs()); - - // Print debug info. - if (diContext) { - DILineInfo dli = - diContext->getLineInfoForAddress(SectAddress + Inst.Address); - // Print valid line info if it changed. - if (dli != lastLine && dli.getLine() != 0) - outs() << "\t## " << dli.getFileName() << ':' - << dli.getLine() << ':' << dli.getColumn(); - lastLine = dli; - } - - outs() << '\n'; - } - } - - emitDOTFile((f.getName().str() + ".dot").c_str(), f, IP.get()); - } - } } } diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 570ec7ed6f..d8611d8b3d 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -17,22 +17,26 @@ //===----------------------------------------------------------------------===// #include "llvm-objdump.h" -#include "MCFunction.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAtom.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFunction.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCModule.h" +#include "llvm/MC/MCObjectDisassembler.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectSymbolizer.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCRelocationInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Object/MachO.h" @@ -131,6 +135,10 @@ static cl::opt<bool> Symbolize("symbolize", cl::desc("When disassembling instructions, " "try to symbolize operands.")); +static cl::opt<bool> +CFG("cfg", cl::desc("Create a CFG for every function found in the object" + " and write it to a graphviz file")); + static StringRef ToolName; bool llvm::error(error_code ec) { @@ -169,7 +177,51 @@ static const Target *getTarget(const ObjectFile *Obj = NULL) { return TheTarget; } -void llvm::StringRefMemoryObject::anchor() { } +// Write a graphviz file for the CFG inside an MCFunction. +static void emitDOTFile(const char *FileName, const MCFunction &f, + MCInstPrinter *IP) { + // Start a new dot file. + std::string Error; + raw_fd_ostream Out(FileName, Error); + if (!Error.empty()) { + errs() << "llvm-objdump: warning: " << Error << '\n'; + return; + } + + Out << "digraph \"" << f.getName() << "\" {\n"; + Out << "graph [ rankdir = \"LR\" ];\n"; + for (MCFunction::const_iterator i = f.begin(), e = f.end(); i != e; ++i) { + // Only print blocks that have predecessors. + bool hasPreds = (*i)->pred_begin() != (*i)->pred_end(); + + if (!hasPreds && i != f.begin()) + continue; + + Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\"<a>"; + // Print instructions. + for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie; + ++ii) { + if (ii != 0) // Not the first line, start a new row. + Out << '|'; + if (ii + 1 == ie) // Last line, add an end id. + Out << "<o>"; + + // Escape special chars and print the instruction in mnemonic form. + std::string Str; + raw_string_ostream OS(Str); + IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, ""); + Out << DOT::EscapeString(OS.str()); + } + Out << "\" shape=\"record\" ];\n"; + + // Add edges. + for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(), + se = (*i)->succ_end(); si != se; ++si) + Out << (*i)->getInsts()->getBeginAddr() << ":o -> " + << (*si)->getInsts()->getBeginAddr() << ":a\n"; + } + Out << "}\n"; +} void llvm::DumpBytes(StringRef bytes) { static const char hex_rep[] = "0123456789abcdef"; @@ -269,6 +321,9 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { } } + OwningPtr<const MCInstrAnalysis> + MIA(TheTarget->createMCInstrAnalysis(MII.get())); + int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter( AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI)); @@ -278,6 +333,34 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { return; } + if (CFG) { + OwningPtr<MCObjectDisassembler> OD( + new MCObjectDisassembler(*Obj, *DisAsm, *MIA)); + OwningPtr<MCModule> Mod(OD->buildModule(/* withCFG */ true)); + for (MCModule::const_atom_iterator AI = Mod->atom_begin(), + AE = Mod->atom_end(); + AI != AE; ++AI) { + outs() << "Atom " << (*AI)->getName() << ": \n"; + if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI)) { + for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); + II != IE; + ++II) { + IP->printInst(&II->Inst, outs(), ""); + outs() << "\n"; + } + } + } + for (MCModule::const_func_iterator FI = Mod->func_begin(), + FE = Mod->func_end(); + FI != FE; ++FI) { + static int filenum = 0; + emitDOTFile((Twine((*FI)->getName()) + "_" + + utostr(filenum++) + ".dot").str().c_str(), + **FI, IP.get()); + } + } + + error_code ec; for (section_iterator i = Obj->begin_sections(), e = Obj->end_sections(); diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h index 3c62240f8f..87f19ba257 100644 --- a/tools/llvm-objdump/llvm-objdump.h +++ b/tools/llvm-objdump/llvm-objdump.h @@ -13,7 +13,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/StringRefMemoryObject.h" namespace llvm { @@ -35,25 +35,6 @@ void DisassembleInputMachO(StringRef Filename); void printCOFFUnwindInfo(const object::COFFObjectFile* o); void printELFFileHeader(const object::ObjectFile *o); -class StringRefMemoryObject : public MemoryObject { - virtual void anchor(); - StringRef Bytes; - uint64_t Base; -public: - StringRefMemoryObject(StringRef bytes, uint64_t Base = 0) - : Bytes(bytes), Base(Base) {} - - uint64_t getBase() const { return Base; } - uint64_t getExtent() const { return Bytes.size(); } - - int readByte(uint64_t Addr, uint8_t *Byte) const { - if (Addr >= Base + getExtent() || Addr < Base) - return -1; - *Byte = Bytes[Addr - Base]; - return 0; - } -}; - } #endif |