summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/MC/MCAtom.h174
-rw-r--r--include/llvm/MC/MCFunction.h122
-rw-r--r--include/llvm/MC/MCInstrAnalysis.h11
-rw-r--r--include/llvm/MC/MCModule.h93
-rw-r--r--include/llvm/MC/MCObjectDisassembler.h69
-rw-r--r--include/llvm/Support/StringRefMemoryObject.h42
-rw-r--r--lib/MC/CMakeLists.txt2
-rw-r--r--lib/MC/MCAtom.cpp121
-rw-r--r--lib/MC/MCFunction.cpp55
-rw-r--r--lib/MC/MCInstrAnalysis.cpp9
-rw-r--r--lib/MC/MCModule.cpp79
-rw-r--r--lib/MC/MCObjectDisassembler.cpp216
-rw-r--r--lib/Support/CMakeLists.txt1
-rw-r--r--lib/Support/StringRefMemoryObject.cpp34
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp10
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp9
-rw-r--r--tools/llvm-objdump/CMakeLists.txt1
-rw-r--r--tools/llvm-objdump/MCFunction.cpp138
-rw-r--r--tools/llvm-objdump/MCFunction.h100
-rw-r--r--tools/llvm-objdump/MachODump.cpp334
-rw-r--r--tools/llvm-objdump/llvm-objdump.cpp89
-rw-r--r--tools/llvm-objdump/llvm-objdump.h21
22 files changed, 1047 insertions, 683 deletions
diff --git a/include/llvm/MC/MCAtom.h b/include/llvm/MC/MCAtom.h
index ae5bf0bc20..6a937986fd 100644
--- a/include/llvm/MC/MCAtom.h
+++ b/include/llvm/MC/MCAtom.h
@@ -1,4 +1,4 @@
-//===-- llvm/MC/MCAtom.h - MCAtom class ---------------------*- C++ -*-===//
+//===-- llvm/MC/MCAtom.h ----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,7 +9,7 @@
//
// This file contains the declaration of the MCAtom class, which is used to
// represent a contiguous region in a decoded object that is uniformly data or
-// instructions;
+// instructions.
//
//===----------------------------------------------------------------------===//
@@ -24,45 +24,169 @@ namespace llvm {
class MCModule;
-/// MCData - An entry in a data MCAtom.
-// NOTE: This may change to a more complex type in the future.
-typedef uint8_t MCData;
+class MCAtom;
+class MCTextAtom;
+class MCDataAtom;
/// MCAtom - Represents a contiguous range of either instructions (a TextAtom)
/// or data (a DataAtom). Address ranges are expressed as _closed_ intervals.
class MCAtom {
- friend class MCModule;
- typedef enum { TextAtom, DataAtom } AtomType;
-
- AtomType Type;
+public:
+ virtual ~MCAtom() {}
+
+ enum AtomKind { TextAtom, DataAtom };
+ AtomKind getKind() const { return Kind; }
+
+ /// \brief Get the start address of the atom.
+ uint64_t getBeginAddr() const { return Begin; }
+ /// \brief Get the end address, i.e. the last one inside the atom.
+ uint64_t getEndAddr() const { return End; }
+
+ /// \name Atom modification methods:
+ /// When modifying a TextAtom, keep instruction boundaries in mind.
+ /// For instance, split must me given the start address of an instruction.
+ /// @{
+
+ /// \brief Splits the atom in two at a given address.
+ /// \param SplitPt Address at which to start a new atom, splitting this one.
+ /// \returns The newly created atom starting at \p SplitPt.
+ virtual MCAtom *split(uint64_t SplitPt) = 0;
+
+ /// \brief Truncates an atom, discarding everything after \p TruncPt.
+ /// \param TruncPt Last byte address to be contained in this atom.
+ virtual void truncate(uint64_t TruncPt) = 0;
+ /// @}
+
+ /// \name Naming:
+ ///
+ /// This is mostly for display purposes, and may contain anything that hints
+ /// at what the atom contains: section or symbol name, BB start address, ..
+ /// @{
+ StringRef getName() const { return Name; }
+ void setName(StringRef NewName) { Name = NewName.str(); }
+ /// @}
+
+protected:
+ const AtomKind Kind;
+ std::string Name;
MCModule *Parent;
uint64_t Begin, End;
- std::vector<std::pair<uint64_t, MCInst> > Text;
- std::vector<MCData> Data;
+ friend class MCModule;
+ MCAtom(AtomKind K, MCModule *P, uint64_t B, uint64_t E)
+ : Kind(K), Name("(unknown)"), Parent(P), Begin(B), End(E) { }
+
+ /// \name Atom remapping helpers
+ /// @{
+
+ /// \brief Remap the atom, using the given range, updating Begin/End.
+ /// One or both of the bounds can remain the same, but overlapping with other
+ /// atoms in the module is still forbidden.
+ void remap(uint64_t NewBegin, uint64_t NewEnd);
+
+ /// \brief Remap the atom to prepare for a truncation at TruncPt.
+ /// Equivalent to:
+ /// \code
+ /// // Bound checks
+ /// remap(Begin, TruncPt);
+ /// \endcode
+ void remapForTruncate(uint64_t TruncPt);
+
+ /// \brief Remap the atom to prepare for a split at SplitPt.
+ /// The bounds for the resulting atoms are returned in {L,R}{Begin,End}.
+ /// The current atom is truncated to \p LEnd.
+ void remapForSplit(uint64_t SplitPt,
+ uint64_t &LBegin, uint64_t &LEnd,
+ uint64_t &RBegin, uint64_t &REnd);
+ /// @}
+};
- // Private constructor - only callable by MCModule
- MCAtom(AtomType T, MCModule *P, uint64_t B, uint64_t E)
- : Type(T), Parent(P), Begin(B), End(E) { }
+/// \name Text atom
+/// @{
+
+/// \brief An entry in an MCTextAtom: a disassembled instruction.
+/// NOTE: Both the Address and Size field are actually redundant when taken in
+/// the context of the text atom, and may better be exposed in an iterator
+/// instead of stored in the atom, which would replace this class.
+class MCDecodedInst {
+public:
+ MCInst Inst;
+ uint64_t Address;
+ uint64_t Size;
+ MCDecodedInst(const MCInst &Inst, uint64_t Address, uint64_t Size)
+ : Inst(Inst), Address(Address), Size(Size) {}
+};
+
+/// \brief An atom consisting of disassembled instructions.
+class MCTextAtom : public MCAtom {
+private:
+ typedef std::vector<MCDecodedInst> InstListTy;
+ InstListTy Insts;
+ /// \brief The address of the next appended instruction, i.e., the
+ /// address immediately after the last instruction in the atom.
+ uint64_t NextInstAddress;
public:
- bool isTextAtom() const { return Type == TextAtom; }
- bool isDataAtom() const { return Type == DataAtom; }
+ /// Append an instruction, expanding the atom if necessary.
+ void addInst(const MCInst &Inst, uint64_t Size);
+
+ /// \name Instruction list access
+ /// @{
+ typedef InstListTy::const_iterator const_iterator;
+ const_iterator begin() const { return Insts.begin(); }
+ const_iterator end() const { return Insts.end(); }
+
+ const MCDecodedInst &back() const { return Insts.back(); }
+ const MCDecodedInst &at(size_t n) const { return Insts.at(n); }
+ uint64_t size() const { return Insts.size(); }
+ /// @}
+
+ /// \name Atom type specific split/truncate logic.
+ /// @{
+ MCTextAtom *split(uint64_t SplitPt) LLVM_OVERRIDE;
+ void truncate(uint64_t TruncPt) LLVM_OVERRIDE;
+ /// @}
+
+ // Class hierarchy.
+ static bool classof(const MCAtom *A) { return A->getKind() == TextAtom; }
+private:
+ friend class MCModule;
+ // Private constructor - only callable by MCModule
+ MCTextAtom(MCModule *P, uint64_t Begin, uint64_t End)
+ : MCAtom(TextAtom, P, Begin, End), NextInstAddress(Begin) {}
+};
+/// @}
+
+/// \name Data atom
+/// @{
+
+/// \brief An entry in an MCDataAtom.
+// NOTE: This may change to a more complex type in the future.
+typedef uint8_t MCData;
- void addInst(const MCInst &I, uint64_t Address, unsigned Size);
+/// \brief An atom consising of a sequence of bytes.
+class MCDataAtom : public MCAtom {
+ std::vector<MCData> Data;
+
+public:
+ /// Append a data entry, expanding the atom if necessary.
void addData(const MCData &D);
- /// split - Splits the atom in two at a given address, which must align with
- /// and instruction boundary if this is a TextAtom. Returns the newly created
- /// atom representing the high part of the split.
- MCAtom *split(uint64_t SplitPt);
+ /// \name Atom type specific split/truncate logic.
+ /// @{
+ MCDataAtom *split(uint64_t SplitPt) LLVM_OVERRIDE;
+ void truncate(uint64_t TruncPt) LLVM_OVERRIDE;
+ /// @}
- /// truncate - Truncates an atom so that TruncPt is the last byte address
- /// contained in the atom.
- void truncate(uint64_t TruncPt);
+ // Class hierarchy.
+ static bool classof(const MCAtom *A) { return A->getKind() == DataAtom; }
+private:
+ friend class MCModule;
+ // Private constructor - only callable by MCModule
+ MCDataAtom(MCModule *P, uint64_t Begin, uint64_t End)
+ : MCAtom(DataAtom, P, Begin, End), Data(End - Begin) {}
};
}
#endif
-
diff --git a/include/llvm/MC/MCFunction.h b/include/llvm/MC/MCFunction.h
new file mode 100644
index 0000000000..b85011eda7
--- /dev/null
+++ b/include/llvm/MC/MCFunction.h
@@ -0,0 +1,122 @@
+//===-- llvm/MC/MCFunction.h ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the data structures to hold a CFG reconstructed from
+// machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCFUNCTION_H
+#define LLVM_MC_MCFUNCTION_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInst.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class MCFunction;
+class MCModule;
+class MCTextAtom;
+
+/// \brief Basic block containing a sequence of disassembled instructions.
+/// The basic block is backed by an MCTextAtom, which holds the instructions,
+/// and the address range it covers.
+/// Create a basic block using MCFunction::createBlock.
+class MCBasicBlock {
+ const MCTextAtom *Insts;
+
+ // MCFunction owns the basic block.
+ MCFunction *Parent;
+ friend class MCFunction;
+ MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent);
+
+ /// \name Predecessors/Successors, to represent the CFG.
+ /// @{
+ typedef std::vector<const MCBasicBlock *> BasicBlockListTy;
+ BasicBlockListTy Successors;
+ BasicBlockListTy Predecessors;
+ /// @}
+public:
+
+ /// \brief Get the backing MCTextAtom, containing the instruction sequence.
+ const MCTextAtom *getInsts() const { return Insts; }
+
+ /// \name Get the owning MCFunction.
+ /// @{
+ const MCFunction *getParent() const { return Parent; }
+ MCFunction *getParent() { return Parent; }
+ /// @}
+
+ /// MC CFG access: Predecessors/Successors.
+ /// @{
+ typedef BasicBlockListTy::const_iterator succ_const_iterator;
+ succ_const_iterator succ_begin() const { return Successors.begin(); }
+ succ_const_iterator succ_end() const { return Successors.end(); }
+
+ typedef BasicBlockListTy::const_iterator pred_const_iterator;
+ pred_const_iterator pred_begin() const { return Predecessors.begin(); }
+ pred_const_iterator pred_end() const { return Predecessors.end(); }
+
+ void addSuccessor(const MCBasicBlock *MCBB);
+ bool isSuccessor(const MCBasicBlock *MCBB) const;
+
+ void addPredecessor(const MCBasicBlock *MCBB);
+ bool isPredecessor(const MCBasicBlock *MCBB) const;
+ /// @}
+};
+
+/// \brief Represents a function in machine code, containing MCBasicBlocks.
+/// MCFunctions are created using MCModule::createFunction.
+class MCFunction {
+ MCFunction (const MCFunction&) LLVM_DELETED_FUNCTION;
+ MCFunction& operator=(const MCFunction&) LLVM_DELETED_FUNCTION;
+
+ std::string Name;
+ typedef std::vector<MCBasicBlock*> BasicBlockListTy;
+ BasicBlockListTy Blocks;
+
+ // MCModule owns the function.
+ friend class MCModule;
+ MCFunction(StringRef Name);
+public:
+ ~MCFunction();
+
+ /// \brief Create an MCBasicBlock backed by Insts and add it to this function.
+ /// \param Insts Sequence of straight-line code backing the basic block.
+ /// \returns The newly created basic block.
+ MCBasicBlock &createBlock(const MCTextAtom &Insts);
+
+ StringRef getName() const { return Name; }
+
+ /// \name Access to the function's basic blocks. No ordering is enforced.
+ /// @{
+ /// \brief Get the entry point basic block.
+ const MCBasicBlock *getEntryBlock() const { return front(); }
+ MCBasicBlock *getEntryBlock() { return front(); }
+
+ // NOTE: Dereferencing iterators gives pointers, so maybe a list is best here.
+ typedef BasicBlockListTy::const_iterator const_iterator;
+ typedef BasicBlockListTy:: iterator iterator;
+ const_iterator begin() const { return Blocks.begin(); }
+ iterator begin() { return Blocks.begin(); }
+ const_iterator end() const { return Blocks.end(); }
+ iterator end() { return Blocks.end(); }
+
+ const MCBasicBlock* front() const { return Blocks.front(); }
+ MCBasicBlock* front() { return Blocks.front(); }
+ const MCBasicBlock* back() const { return Blocks.back(); }
+ MCBasicBlock* back() { return Blocks.back(); }
+ /// @}
+};
+
+}
+
+#endif
diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h
index acad6336ac..17bfd1582a 100644
--- a/include/llvm/MC/MCInstrAnalysis.h
+++ b/include/llvm/MC/MCInstrAnalysis.h
@@ -52,10 +52,15 @@ public:
return Info->get(Inst.getOpcode()).isReturn();
}
+ virtual bool isTerminator(const MCInst &Inst) const {
+ return Info->get(Inst.getOpcode()).isTerminator();
+ }
+
/// evaluateBranch - Given a branch instruction try to get the address the
- /// branch targets. Otherwise return -1.
- virtual uint64_t
- evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size) const;
+ /// branch targets. Return true on success, and the address in Target.
+ virtual bool
+ evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const;
};
}
diff --git a/include/llvm/MC/MCModule.h b/include/llvm/MC/MCModule.h
index 755fa025fb..02f8ca05b4 100644
--- a/include/llvm/MC/MCModule.h
+++ b/include/llvm/MC/MCModule.h
@@ -15,44 +15,93 @@
#ifndef LLVM_MC_MCMODULE_H
#define LLVM_MC_MCMODULE_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IntervalMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataTypes.h"
+#include <vector>
namespace llvm {
class MCAtom;
+class MCDataAtom;
+class MCFunction;
+class MCObjectDisassembler;
+class MCTextAtom;
-/// MCModule - This class represent a completely disassembled object file or
-/// executable. It comprises a list of MCAtom's, and a branch target table.
-/// Each atom represents a contiguous range of either instructions or data.
+/// \brief A completely disassembled object file or executable.
+/// It comprises a list of MCAtom's, each representing a contiguous range of
+/// either instructions or data.
+/// An MCModule is created using MCObjectDisassembler::buildModule.
class MCModule {
- /// AtomAllocationTracker - An MCModule owns its component MCAtom's, so it
- /// must track them in order to ensure they are properly freed as atoms are
- /// merged or otherwise manipulated.
- SmallPtrSet<MCAtom*, 8> AtomAllocationTracker;
+ /// \name Atom tracking
+ /// @{
- /// OffsetMap - Efficiently maps offset ranges to MCAtom's.
- IntervalMap<uint64_t, MCAtom*> OffsetMap;
-
- /// BranchTargetMap - Maps offsets that are determined to be branches and
- /// can be statically resolved to their target offsets.
- DenseMap<uint64_t, MCAtom*> BranchTargetMap;
+ /// \brief Atoms in this module, sorted by begin address.
+ /// FIXME: This doesn't handle overlapping atoms (which happen when a basic
+ /// block starts in the middle of an instruction of another basic block.)
+ typedef std::vector<MCAtom*> AtomListTy;
+ AtomListTy Atoms;
friend class MCAtom;
-
- /// remap - Update the interval mapping for an MCAtom.
+ /// \brief Remap \p Atom to the given range, and update its Begin/End fields.
+ /// \param Atom An atom belonging to this module.
+ /// An atom should always use this method to update its bounds, because this
+ /// enables the owning MCModule to keep track of its atoms.
void remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd);
+ /// \brief Insert an atom in the module, using its Begin and End addresses.
+ void map(MCAtom *NewAtom);
+ /// @}
+
+ /// \name Function tracking
+ /// @{
+ typedef std::vector<MCFunction*> FunctionListTy;
+ FunctionListTy Functions;
+ /// @}
+
+ MCModule (const MCModule &) LLVM_DELETED_FUNCTION;
+ MCModule& operator=(const MCModule &) LLVM_DELETED_FUNCTION;
+
+ // MCObjectDisassembler creates MCModules.
+ friend class MCObjectDisassembler;
+ MCModule() : Atoms() { }
+
public:
- MCModule(IntervalMap<uint64_t, MCAtom*>::Allocator &A) : OffsetMap(A) { }
+ ~MCModule();
- /// createAtom - Creates a new MCAtom covering the specified offset range.
- MCAtom *createAtom(MCAtom::AtomType Type, uint64_t Begin, uint64_t End);
+ /// \name Create a new MCAtom covering the specified offset range.
+ /// @{
+ MCTextAtom *createTextAtom(uint64_t Begin, uint64_t End);
+ MCDataAtom *createDataAtom(uint64_t Begin, uint64_t End);
+ /// @}
+
+ /// \name Access to the owned atom list, ordered by begin address.
+ /// @{
+ const MCAtom *findAtomContaining(uint64_t Addr) const;
+ MCAtom *findAtomContaining(uint64_t Addr);
+
+ typedef AtomListTy::const_iterator const_atom_iterator;
+ typedef AtomListTy:: iterator atom_iterator;
+ const_atom_iterator atom_begin() const { return Atoms.begin(); }
+ atom_iterator atom_begin() { return Atoms.begin(); }
+ const_atom_iterator atom_end() const { return Atoms.end(); }
+ atom_iterator atom_end() { return Atoms.end(); }
+ /// @}
+
+ /// \name Create a new MCFunction.
+ MCFunction *createFunction(const StringRef &Name);
+
+ /// \name Access to the owned function list.
+ /// @{
+ typedef FunctionListTy::const_iterator const_func_iterator;
+ typedef FunctionListTy:: iterator func_iterator;
+ const_func_iterator func_begin() const { return Functions.begin(); }
+ func_iterator func_begin() { return Functions.begin(); }
+ const_func_iterator func_end() const { return Functions.end(); }
+ func_iterator func_end() { return Functions.end(); }
+ /// @}
};
}
#endif
-
diff --git a/include/llvm/MC/MCObjectDisassembler.h b/include/llvm/MC/MCObjectDisassembler.h
new file mode 100644
index 0000000000..749a54e7f4
--- /dev/null
+++ b/include/llvm/MC/MCObjectDisassembler.h
@@ -0,0 +1,69 @@
+//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCObjectDisassembler class, which
+// can be used to construct an MCModule and an MC CFG from an ObjectFile.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H
+#define LLVM_MC_MCOBJECTDISASSEMBLER_H
+
+namespace llvm {
+
+namespace object {
+ class ObjectFile;
+}
+
+class MCBasicBlock;
+class MCDisassembler;
+class MCFunction;
+class MCInstrAnalysis;
+class MCModule;
+
+/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions.
+/// This class builds on MCDisassembler to disassemble whole sections, creating
+/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data).
+/// It can also be used to create a control flow graph consisting of MCFunctions
+/// and MCBasicBlocks.
+class MCObjectDisassembler {
+ const object::ObjectFile &Obj;
+ const MCDisassembler &Dis;
+ const MCInstrAnalysis &MIA;
+
+public:
+ MCObjectDisassembler(const object::ObjectFile &Obj,
+ const MCDisassembler &Dis,
+ const MCInstrAnalysis &MIA);
+
+ /// \brief Build an MCModule, creating atoms and optionally functions.
+ /// \param withCFG Also build a CFG by adding MCFunctions to the Module.
+ /// If withCFG is false, the MCModule built only contains atoms, representing
+ /// what was found in the object file. If withCFG is true, MCFunctions are
+ /// created, containing MCBasicBlocks. All text atoms are split to form basic
+ /// block atoms, which then each back an MCBasicBlock.
+ MCModule *buildModule(bool withCFG = false);
+
+private:
+ /// \brief Fill \p Module by creating an atom for each section.
+ /// This could be made much smarter, using information like symbols, but also
+ /// format-specific features, like mach-o function_start or data_in_code LCs.
+ void buildSectionAtoms(MCModule *Module);
+
+ /// \brief Enrich \p Module with a CFG consisting of MCFunctions.
+ /// \param Module An MCModule returned by buildModule, with no CFG.
+ /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom.
+ /// When the CFG is built, contiguous instructions that were previously in a
+ /// single MCTextAtom will be split in multiple basic block atoms.
+ void buildCFG(MCModule *Module);
+};
+
+}
+
+#endif
diff --git a/include/llvm/Support/StringRefMemoryObject.h b/include/llvm/Support/StringRefMemoryObject.h
new file mode 100644
index 0000000000..a0ef35a9e1
--- /dev/null
+++ b/include/llvm/Support/StringRefMemoryObject.h
@@ -0,0 +1,42 @@
+//===- llvm/Support/StringRefMemoryObject.h ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the StringRefMemObject class, a simple
+// wrapper around StringRef implementing the MemoryObject interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_STRINGREFMEMORYOBJECT_H
+#define LLVM_SUPPORT_STRINGREFMEMORYOBJECT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryObject.h"
+
+namespace llvm {
+
+/// StringRefMemoryObject - Simple StringRef-backed MemoryObject
+class StringRefMemoryObject : public MemoryObject {
+ StringRef Bytes;
+ uint64_t Base;
+public:
+ StringRefMemoryObject(StringRef Bytes, uint64_t Base = 0)
+ : Bytes(Bytes), Base(Base) {}
+
+ uint64_t getBase() const { return Base; }
+ uint64_t getExtent() const { return Bytes.size(); }
+
+ int readByte(uint64_t Addr, uint8_t *Byte) const;
+ int readBytes(uint64_t Addr, uint64_t Size,
+ uint8_t *Buf, uint64_t *Copied) const;
+
+};
+
+}
+
+#endif
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 5377c5c8d8..89e2aaf48b 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_library(LLVMMC
MCELF.cpp
MCELFObjectTargetWriter.cpp
MCELFStreamer.cpp
+ MCFunction.cpp
MCExpr.cpp
MCExternalSymbolizer.cpp
MCInst.cpp
@@ -26,6 +27,7 @@ add_llvm_library(LLVMMC
MCModule.cpp
MCNullStreamer.cpp
MCObjectFileInfo.cpp
+ MCObjectDisassembler.cpp
MCObjectStreamer.cpp
MCObjectSymbolizer.cpp
MCObjectWriter.cpp
diff --git a/lib/MC/MCAtom.cpp b/lib/MC/MCAtom.cpp
index d71444324f..2626b39db4 100644
--- a/lib/MC/MCAtom.cpp
+++ b/lib/MC/MCAtom.cpp
@@ -10,88 +10,101 @@
#include "llvm/MC/MCAtom.h"
#include "llvm/MC/MCModule.h"
#include "llvm/Support/ErrorHandling.h"
+#include <iterator>
using namespace llvm;
-void MCAtom::addInst(const MCInst &I, uint64_t Address, unsigned Size) {
- assert(Type == TextAtom && "Trying to add MCInst to a non-text atom!");
-
- assert(Address < End+Size &&
- "Instruction not contiguous with end of atom!");
- if (Address > End)
- Parent->remap(this, Begin, End+Size);
-
- Text.push_back(std::make_pair(Address, I));
+void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) {
+ Parent->remap(this, NewBegin, NewEnd);
}
-void MCAtom::addData(const MCData &D) {
- assert(Type == DataAtom && "Trying to add MCData to a non-data atom!");
- Parent->remap(this, Begin, End+1);
-
- Data.push_back(D);
+void MCAtom::remapForTruncate(uint64_t TruncPt) {
+ assert((TruncPt >= Begin && TruncPt < End) &&
+ "Truncation point not contained in atom!");
+ remap(Begin, TruncPt);
}
-MCAtom *MCAtom::split(uint64_t SplitPt) {
+void MCAtom::remapForSplit(uint64_t SplitPt,
+ uint64_t &LBegin, uint64_t &LEnd,
+ uint64_t &RBegin, uint64_t &REnd) {
assert((SplitPt > Begin && SplitPt <= End) &&
"Splitting at point not contained in atom!");
// Compute the new begin/end points.
- uint64_t LeftBegin = Begin;
- uint64_t LeftEnd = SplitPt - 1;
- uint64_t RightBegin = SplitPt;
- uint64_t RightEnd = End;
+ LBegin = Begin;
+ LEnd = SplitPt - 1;
+ RBegin = SplitPt;
+ REnd = End;
// Remap this atom to become the lower of the two new ones.
- Parent->remap(this, LeftBegin, LeftEnd);
+ remap(LBegin, LEnd);
+}
- // Create a new atom for the higher atom.
- MCAtom *RightAtom = Parent->createAtom(Type, RightBegin, RightEnd);
+// MCDataAtom
- // Split the contents of the original atom between it and the new one. The
- // precise method depends on whether this is a data or a text atom.
- if (isDataAtom()) {
- std::vector<MCData>::iterator I = Data.begin() + (RightBegin - LeftBegin);
+void MCDataAtom::addData(const MCData &D) {
+ Data.push_back(D);
+ if (Data.size() > Begin - End)
+ remap(Begin, End + 1);
+}
- assert(I != Data.end() && "Split point not found in range!");
+void MCDataAtom::truncate(uint64_t TruncPt) {
+ remapForTruncate(TruncPt);
- std::copy(I, Data.end(), RightAtom->Data.end());
- Data.erase(I, Data.end());
- } else if (isTextAtom()) {
- std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+ Data.resize(TruncPt - Begin + 1);
+}
- while (I != Text.end() && I->first < SplitPt) ++I;
+MCDataAtom *MCDataAtom::split(uint64_t SplitPt) {
+ uint64_t LBegin, LEnd, RBegin, REnd;
+ remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
- assert(I != Text.end() && "Split point not found in disassembly!");
- assert(I->first == SplitPt &&
- "Split point does not fall on instruction boundary!");
+ MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd);
+ RightAtom->setName(getName());
- std::copy(I, Text.end(), RightAtom->Text.end());
- Text.erase(I, Text.end());
- } else
- llvm_unreachable("Unknown atom type!");
+ std::vector<MCData>::iterator I = Data.begin() + (RBegin - LBegin);
+ assert(I != Data.end() && "Split point not found in range!");
+ std::copy(I, Data.end(), std::back_inserter(RightAtom->Data));
+ Data.erase(I, Data.end());
return RightAtom;
}
-void MCAtom::truncate(uint64_t TruncPt) {
- assert((TruncPt >= Begin && TruncPt < End) &&
- "Truncation point not contained in atom!");
+// MCTextAtom
- Parent->remap(this, Begin, TruncPt);
+void MCTextAtom::addInst(const MCInst &I, uint64_t Size) {
+ if (NextInstAddress > End)
+ remap(Begin, NextInstAddress);
+ Insts.push_back(MCDecodedInst(I, NextInstAddress, Size));
+ NextInstAddress += Size;
+}
- if (isDataAtom()) {
- Data.resize(TruncPt - Begin + 1);
- } else if (isTextAtom()) {
- std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+void MCTextAtom::truncate(uint64_t TruncPt) {
+ remapForTruncate(TruncPt);
- while (I != Text.end() && I->first <= TruncPt) ++I;
+ InstListTy::iterator I = Insts.begin();
+ while (I != Insts.end() && I->Address <= TruncPt) ++I;
- assert(I != Text.end() && "Truncation point not found in disassembly!");
- assert(I->first == TruncPt+1 &&
- "Truncation point does not fall on instruction boundary");
+ assert(I != Insts.end() && "Truncation point not found in disassembly!");
+ assert(I->Address == TruncPt + 1 &&
+ "Truncation point does not fall on instruction boundary");
- Text.erase(I, Text.end());
- } else
- llvm_unreachable("Unknown atom type!");
+ Insts.erase(I, Insts.end());
}
+MCTextAtom *MCTextAtom::split(uint64_t SplitPt) {
+ uint64_t LBegin, LEnd, RBegin, REnd;
+ remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
+
+ MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd);
+ RightAtom->setName(getName());
+
+ InstListTy::iterator I = Insts.begin();
+ while (I != Insts.end() && I->Address < SplitPt) ++I;
+ assert(I != Insts.end() && "Split point not found in disassembly!");
+ assert(I->Address == SplitPt &&
+ "Split point does not fall on instruction boundary!");
+
+ std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts));
+ Insts.erase(I, Insts.end());
+ return RightAtom;
+}
diff --git a/lib/MC/MCFunction.cpp b/lib/MC/MCFunction.cpp
new file mode 100644
index 0000000000..2665d3e167
--- /dev/null
+++ b/lib/MC/MCFunction.cpp
@@ -0,0 +1,55 @@
+//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+// MCFunction
+
+MCFunction::MCFunction(StringRef Name)
+ : Name(Name)
+{}
+
+MCFunction::~MCFunction() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+}
+
+MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) {
+ Blocks.push_back(new MCBasicBlock(TA, this));
+ return *Blocks.back();
+}
+
+// MCBasicBlock
+
+MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent)
+ : Insts(&Insts), Parent(Parent)
+{}
+
+void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) {
+ Successors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const {
+ return std::find(Successors.begin(), Successors.end(),
+ MCBB) != Successors.end();
+}
+
+void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) {
+ Predecessors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const {
+ return std::find(Predecessors.begin(), Predecessors.end(),
+ MCBB) != Predecessors.end();
+}
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
index 7736702f35..2d8336d77a 100644
--- a/lib/MC/MCInstrAnalysis.cpp
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -10,12 +10,13 @@
#include "llvm/MC/MCInstrAnalysis.h"
using namespace llvm;
-uint64_t MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
- uint64_t Size) const {
+bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const {
if (Inst.getNumOperands() == 0 ||
Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL)
- return -1ULL;
+ return false;
int64_t Imm = Inst.getOperand(0).getImm();
- return Addr+Size+Imm;
+ Target = Addr+Size+Imm;
+ return true;
}
diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp
index f563160833..50bac476fa 100644
--- a/lib/MC/MCModule.cpp
+++ b/lib/MC/MCModule.cpp
@@ -7,39 +7,92 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAtom.h"
#include "llvm/MC/MCModule.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCFunction.h"
+#include <algorithm>
using namespace llvm;
-MCAtom *MCModule::createAtom(MCAtom::AtomType Type,
- uint64_t Begin, uint64_t End) {
+static bool AtomComp(const MCAtom *L, uint64_t Addr) {
+ return L->getEndAddr() < Addr;
+}
+
+void MCModule::map(MCAtom *NewAtom) {
+ uint64_t Begin = NewAtom->Begin,
+ End = NewAtom->End;
+
assert(Begin < End && "Creating MCAtom with endpoints reversed?");
// Check for atoms already covering this range.
- IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Begin);
- assert((!I.valid() || I.start() < End) && "Offset range already occupied!");
+ AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+ Begin, AtomComp);
+ assert((I == atom_end() || (*I)->getBeginAddr() > End)
+ && "Offset range already occupied!");
- // Create the new atom and add it to our maps.
- MCAtom *NewAtom = new MCAtom(Type, this, Begin, End);
- AtomAllocationTracker.insert(NewAtom);
- OffsetMap.insert(Begin, End, NewAtom);
+ // Insert the new atom to the list.
+ Atoms.insert(I, NewAtom);
+}
+
+MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) {
+ MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End);
+ map(NewAtom);
+ return NewAtom;
+}
+
+MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) {
+ MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End);
+ map(NewAtom);
return NewAtom;
}
// remap - Update the interval mapping for an atom.
void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) {
// Find and erase the old mapping.
- IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Atom->Begin);
- assert(I.valid() && "Atom offset not found in module!");
+ AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+ Atom->Begin, AtomComp);
+ assert(I != atom_end() && "Atom offset not found in module!");
assert(*I == Atom && "Previous atom mapping was invalid!");
- I.erase();
+ Atoms.erase(I);
// Insert the new mapping.
- OffsetMap.insert(NewBegin, NewEnd, Atom);
+ AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(),
+ NewBegin, AtomComp);
+ Atoms.insert(NewI, Atom);
// Update the atom internal bounds.
Atom->Begin = NewBegin;
Atom->End = NewEnd;
}
+const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const {
+ AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(),
+ Addr, AtomComp);
+ if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
+ return *I;
+ return 0;
+}
+
+MCAtom *MCModule::findAtomContaining(uint64_t Addr) {
+ AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+ Addr, AtomComp);
+ if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
+ return *I;
+ return 0;
+}
+
+MCFunction *MCModule::createFunction(const StringRef &Name) {
+ Functions.push_back(new MCFunction(Name));
+ return Functions.back();
+}
+
+MCModule::~MCModule() {
+ for (AtomListTy::iterator AI = atom_begin(),
+ AE = atom_end();
+ AI != AE; ++AI)
+ delete *AI;
+ for (FunctionListTy::iterator FI = func_begin(),
+ FE = func_end();
+ FI != FE; ++FI)
+ delete *FI;
+}
diff --git a/lib/MC/MCObjectDisassembler.cpp b/lib/MC/MCObjectDisassembler.cpp
new file mode 100644
index 0000000000..bb3de1779e
--- /dev/null
+++ b/lib/MC/MCObjectDisassembler.cpp
@@ -0,0 +1,216 @@
+//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectDisassembler.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCModule.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/StringRefMemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <set>
+
+using namespace llvm;
+using namespace object;
+
+MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
+ const MCDisassembler &Dis,
+ const MCInstrAnalysis &MIA)
+ : Obj(Obj), Dis(Dis), MIA(MIA) {}
+
+MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
+ MCModule *Module = new MCModule;
+ buildSectionAtoms(Module);
+ if (withCFG)
+ buildCFG(Module);
+ return Module;
+}
+
+void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
+ error_code ec;
+ for (section_iterator SI = Obj.begin_sections(),
+ SE = Obj.end_sections();
+ SI != SE;
+ SI.increment(ec)) {
+ if (ec) break;
+
+ bool isText; SI->isText(isText);
+ bool isData; SI->isData(isData);
+ if (!isData && !isText)
+ continue;
+
+ uint64_t StartAddr; SI->getAddress(StartAddr);
+ uint64_t SecSize; SI->getSize(SecSize);
+ if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
+ continue;
+
+ StringRef Contents; SI->getContents(Contents);
+ StringRefMemoryObject memoryObject(Contents);
+
+ // We don't care about things like non-file-backed sections yet.
+ if (Contents.size() != SecSize || !SecSize)
+ continue;
+ uint64_t EndAddr = StartAddr + SecSize - 1;
+
+ StringRef SecName; SI->getName(SecName);
+
+ if (isText) {
+ MCTextAtom *Text = Module->createTextAtom(StartAddr, EndAddr);
+ Text->setName(SecName);
+ uint64_t InstSize;
+ for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
+ MCInst Inst;
+ if (Dis.getInstruction(Inst, InstSize, memoryObject, Index,
+ nulls(), nulls()))
+ Text->addInst(Inst, InstSize);
+ else
+ // We don't care about splitting mixed atoms either.
+ llvm_unreachable("Couldn't disassemble instruction in atom.");
+ }
+
+ } else {
+ MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
+ Data->setName(SecName);
+ for (uint64_t Index = 0; Index < SecSize; ++Index)
+ Data->addData(Contents[Index]);
+ }
+ }
+}
+
+namespace {
+ struct BBInfo;
+ typedef std::set<BBInfo*> BBInfoSetTy;
+
+ struct BBInfo {
+ MCTextAtom *Atom;
+ MCBasicBlock *BB;
+ BBInfoSetTy Succs;
+ BBInfoSetTy Preds;
+
+ void addSucc(BBInfo &Succ) {
+ Succs.insert(&Succ);
+ Succ.Preds.insert(this);
+ }
+ };
+}
+
+void MCObjectDisassembler::buildCFG(MCModule *Module) {
+ typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
+ BBInfoByAddrTy BBInfos;
+ typedef std::set<uint64_t> AddressSetTy;
+ AddressSetTy Splits;
+ AddressSetTy Calls;
+
+ assert(Module->func_begin() == Module->func_end()
+ && "Module already has a CFG!");
+
+ // First, determine the basic block boundaries and call targets.
+ for (MCModule::atom_iterator AI = Module->atom_begin(),
+ AE = Module->atom_end();
+ AI != AE; ++AI) {
+ MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+ if (!TA) continue;
+ Calls.insert(TA->getBeginAddr());
+ for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
+ II != IE; ++II) {
+ if (MIA.isTerminator(II->Inst))
+ Splits.insert(II->Address + II->Size);
+ uint64_t Target;
+ if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
+ if (MIA.isCall(II->Inst))
+ Calls.insert(Target);
+ Splits.insert(Target);
+ }
+ }
+ }
+
+ // Split text atoms into basic block atoms.
+ for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
+ SI != SE; ++SI) {
+ MCAtom *A = Module->findAtomContaining(*SI);
+ if (!A) continue;
+ MCTextAtom *TA = cast<MCTextAtom>(A);
+ BBInfos[TA->getBeginAddr()].Atom = TA;
+ if (TA->getBeginAddr() == *SI)
+ continue;
+ MCTextAtom *NewAtom = TA->split(*SI);
+ BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
+ StringRef BBName = TA->getName();
+ BBName = BBName.substr(0, BBName.find_last_of(':'));
+ NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
+ }
+
+ // Compute succs/preds.
+ for (MCModule::atom_iterator AI = Module->atom_begin(),
+ AE = Module->atom_end();
+ AI != AE; ++AI) {
+ MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+ if (!TA) continue;
+ BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
+ const MCDecodedInst &LI = TA->back();
+ if (MIA.isBranch(LI.Inst)) {
+ uint64_t Target;
+ if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
+ CurBB.addSucc(BBInfos[Target]);
+ if (MIA.isConditionalBranch(LI.Inst))
+ CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+ } else if (!MIA.isTerminator(LI.Inst))
+ CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+ }
+
+
+ // Create functions and basic blocks.
+ for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
+ CI != CE; ++CI) {
+ BBInfo &BBI = BBInfos[*CI];
+ if (!BBI.Atom) continue;
+
+ MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());
+
+ // Create MCBBs.
+ SmallSetVector<BBInfo*, 16> Worklist;
+ Worklist.insert(&BBI);
+ for (size_t WI = 0; WI < Worklist.size(); ++WI) {
+ BBInfo *BBI = Worklist[WI];
+ if (!BBI->Atom)
+ continue;
+ BBI->BB = &MCFN.createBlock(*BBI->Atom);
+ // Add all predecessors and successors to the worklist.
+ for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+ SI != SE; ++SI)
+ Worklist.insert(*SI);
+ for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+ PI != PE; ++PI)
+ Worklist.insert(*PI);
+ }
+
+ // Set preds/succs.
+ for (size_t WI = 0; WI < Worklist.size(); ++WI) {
+ BBInfo *BBI = Worklist[WI];
+ MCBasicBlock *MCBB = BBI->BB;
+ if (!MCBB)
+ continue;
+ for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+ SI != SE; ++SI)
+ MCBB->addSuccessor((*SI)->BB);
+ for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+ PI != PE; ++PI)
+ MCBB->addPredecessor((*PI)->BB);
+ }
+ }
+}
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index fbc38183bc..6cc8ab236a 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -48,6 +48,7 @@ add_llvm_library(LLVMSupport
StringMap.cpp
StringPool.cpp
StringRef.cpp
+ StringRefMemoryObject.cpp
SystemUtils.cpp
Timer.cpp
ToolOutputFile.cpp
diff --git a/lib/Support/StringRefMemoryObject.cpp b/lib/Support/StringRefMemoryObject.cpp
new file mode 100644
index 0000000000..5db11e918c
--- /dev/null
+++ b/lib/Support/StringRefMemoryObject.cpp
@@ -0,0 +1,34 @@
+//===- lib/Support/StringRefMemoryObject.cpp --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StringRefMemoryObject.h"
+
+using namespace llvm;
+
+int StringRefMemoryObject::readByte(uint64_t Addr, uint8_t *Byte) const {
+ if (Addr >= Base + getExtent() || Addr < Base)
+ return -1;
+ *Byte = Bytes[Addr - Base];
+ return 0;
+}
+
+int StringRefMemoryObject::readBytes(uint64_t Addr,
+ uint64_t Size,
+ uint8_t *Buf,
+ uint64_t *Copied) const {
+ if (Addr >= Base + getExtent() || Addr < Base)
+ return -1;
+ uint64_t Offset = Addr - Base;
+ if (Size > getExtent() - Offset)
+ Size = getExtent() - Offset;
+ memcpy(Buf, Bytes.data() + Offset, Size);
+ if (Copied)
+ *Copied = Size;
+ return 0;
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index eeec608820..48d48190fd 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -136,17 +136,17 @@ public:
return MCInstrAnalysis::isConditionalBranch(Inst);
}
- uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
- uint64_t Size) const {
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const {
unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0;
// FIXME: We only handle PCRel branches for now.
if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType
!= MCOI::OPERAND_PCREL)
- return -1ULL;
+ return false;
int64_t Imm = Inst.getOperand(LblOperand).getImm();
-
- return Addr + Imm;
+ Target = Addr + Imm;
+ return true;
}
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 52fc28d11d..c092801a67 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -240,15 +240,16 @@ public:
return MCInstrAnalysis::isConditionalBranch(Inst);
}
- uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
- uint64_t Size) const {
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const {
// We only handle PCRel branches for now.
if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
- return -1ULL;
+ return false;
int64_t Imm = Inst.getOperand(0).getImm();
// FIXME: This is not right for thumb.
- return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+ Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+ return true;
}
};
diff --git a/tools/llvm-objdump/CMakeLists.txt b/tools/llvm-objdump/CMakeLists.txt
index 0c49d0b457..e983ec92fb 100644
--- a/tools/llvm-objdump/CMakeLists.txt
+++ b/tools/llvm-objdump/CMakeLists.txt
@@ -12,5 +12,4 @@ add_llvm_tool(llvm-objdump
COFFDump.cpp
ELFDump.cpp
MachODump.cpp
- MCFunction.cpp
)
diff --git a/tools/llvm-objdump/MCFunction.cpp b/tools/llvm-objdump/MCFunction.cpp
deleted file mode 100644
index 5c67f1b70a..0000000000
--- a/tools/llvm-objdump/MCFunction.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-//===-- MCFunction.cpp ----------------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the algorithm to break down a region of machine code
-// into basic blocks and try to reconstruct a CFG from it.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCFunction.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCDisassembler.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrAnalysis.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include <set>
-using namespace llvm;
-
-MCFunction
-MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
- const MemoryObject &Region, uint64_t Start,
- uint64_t End, const MCInstrAnalysis *Ana,
- raw_ostream &DebugOut,
- SmallVectorImpl<uint64_t> &Calls) {
- std::vector<MCDecodedInst> Instructions;
- std::set<uint64_t> Splits;
- Splits.insert(Start);
- uint64_t Size;
-
- MCFunction f(Name);
-
- {
- DenseSet<uint64_t> VisitedInsts;
- SmallVector<uint64_t, 16> WorkList;
- WorkList.push_back(Start);
- // Disassemble code and gather basic block split points.
- while (!WorkList.empty()) {
- uint64_t Index = WorkList.pop_back_val();
- if (VisitedInsts.find(Index) != VisitedInsts.end())
- continue; // Already visited this location.
-
- for (;Index < End; Index += Size) {
- VisitedInsts.insert(Index);
-
- MCInst Inst;
- if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
- Instructions.push_back(MCDecodedInst(Index, Size, Inst));
- if (Ana->isBranch(Inst)) {
- uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
- if (targ != -1ULL && targ == Index+Size)
- continue; // Skip nop jumps.
-
- // If we could determine the branch target, make a note to start a
- // new basic block there and add the target to the worklist.
- if (targ != -1ULL) {
- Splits.insert(targ);
- WorkList.push_back(targ);
- WorkList.push_back(Index+Size);
- }
- Splits.insert(Index+Size);
- break;
- } else if (Ana->isReturn(Inst)) {
- // Return instruction. This basic block ends here.
- Splits.insert(Index+Size);
- break;
- } else if (Ana->isCall(Inst)) {
- uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
- // Add the call to the call list if the destination is known.
- if (targ != -1ULL && targ != Index+Size)
- Calls.push_back(targ);
- }
- } else {
- errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
- if (Size == 0)
- Size = 1; // skip illegible bytes
- }
- }
- }
- }
-
- // Make sure the instruction list is sorted.
- std::sort(Instructions.begin(), Instructions.end());
-
- // Create basic blocks.
- unsigned ii = 0, ie = Instructions.size();
- for (std::set<uint64_t>::iterator spi = Splits.begin(),
- spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
- MCBasicBlock BB;
- uint64_t BlockEnd = *llvm::next(spi);
- // Add instructions to the BB.
- for (; ii != ie; ++ii) {
- if (Instructions[ii].Address < *spi ||
- Instructions[ii].Address >= BlockEnd)
- break;
- BB.addInst(Instructions[ii]);
- }
- f.addBlock(*spi, BB);
- }
-
- std::sort(f.Blocks.begin(), f.Blocks.end());
-
- // Calculate successors of each block.
- for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
- MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second);
- if (BB.getInsts().empty()) continue;
- const MCDecodedInst &Inst = BB.getInsts().back();
-
- if (Ana->isBranch(Inst.Inst)) {
- uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size);
- if (targ == -1ULL) {
- // Indirect branch. Bail and add all blocks of the function as a
- // successor.
- for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
- BB.addSucc(i->first);
- } else if (targ != Inst.Address+Inst.Size)
- BB.addSucc(targ);
- // Conditional branches can also fall through to the next block.
- if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e)
- BB.addSucc(llvm::next(i)->first);
- } else {
- // No branch. Fall through to the next block.
- if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e)
- BB.addSucc(llvm::next(i)->first);
- }
- }
-
- return f;
-}
diff --git a/tools/llvm-objdump/MCFunction.h b/tools/llvm-objdump/MCFunction.h
deleted file mode 100644
index 6d3a548d48..0000000000
--- a/tools/llvm-objdump/MCFunction.h
+++ /dev/null
@@ -1,100 +0,0 @@
-//===-- MCFunction.h ------------------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the data structures to hold a CFG reconstructed from
-// machine code.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_OBJECTDUMP_MCFUNCTION_H
-#define LLVM_OBJECTDUMP_MCFUNCTION_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/MC/MCInst.h"
-#include <map>
-
-namespace llvm {
-
-class MCDisassembler;
-class MCInstrAnalysis;
-class MemoryObject;
-class raw_ostream;
-
-/// MCDecodedInst - Small container to hold an MCInst and associated info like
-/// address and size.
-struct MCDecodedInst {
- uint64_t Address;
- uint64_t Size;
- MCInst Inst;
-
- MCDecodedInst() {}
- MCDecodedInst(uint64_t Address, uint64_t Size, MCInst Inst)
- : Address(Address), Size(Size), Inst(Inst) {}
-
- bool operator<(const MCDecodedInst &RHS) const {
- return Address < RHS.Address;
- }
-};
-
-/// MCBasicBlock - Consists of multiple MCDecodedInsts and a list of successing
-/// MCBasicBlocks.
-class MCBasicBlock {
- std::vector<MCDecodedInst> Insts;
- typedef DenseSet<uint64_t> SetTy;
- SetTy Succs;
-public:
- ArrayRef<MCDecodedInst> getInsts() const { return Insts; }
-
- typedef SetTy::const_iterator succ_iterator;
- succ_iterator succ_begin() const { return Succs.begin(); }
- succ_iterator succ_end() const { return Succs.end(); }
-
- bool contains(uint64_t Addr) const { return Succs.count(Addr); }
-
- void addInst(const MCDecodedInst &Inst) { Insts.push_back(Inst); }
- void addSucc(uint64_t Addr) { Succs.insert(Addr); }
-
- bool operator<(const MCBasicBlock &RHS) const {
- return Insts.size() < RHS.Insts.size();
- }
-};
-
-/// MCFunction - Represents a named function in machine code, containing
-/// multiple MCBasicBlocks.
-class MCFunction {
- const StringRef Name;
- // Keep BBs sorted by address.
- typedef std::vector<std::pair<uint64_t, MCBasicBlock> > MapTy;
- MapTy Blocks;
-public:
- MCFunction(StringRef Name) : Name(Name) {}
-
- // Create an MCFunction from a region of binary machine code.
- static MCFunction
- createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
- const MemoryObject &Region, uint64_t Start, uint64_t End,
- const MCInstrAnalysis *Ana, raw_ostream &DebugOut,
- SmallVectorImpl<uint64_t> &Calls);
-
- typedef MapTy::const_iterator iterator;
- iterator begin() const { return Blocks.begin(); }
- iterator end() const { return Blocks.end(); }
-
- StringRef getName() const { return Name; }
-
- MCBasicBlock &addBlock(uint64_t Address, const MCBasicBlock &BB) {
- Blocks.push_back(std::make_pair(Address, BB));
- return Blocks.back().second;
- }
-};
-
-}
-
-#endif
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index c7e5cc1ede..03a383eb12 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm-objdump.h"
-#include "MCFunction.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -44,10 +44,6 @@ using namespace llvm;
using namespace object;
static cl::opt<bool>
- CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and"
- " write it to a graphviz file (MachO-only)"));
-
-static cl::opt<bool>
UseDbg("g", cl::desc("Print line information from debug info if available"));
static cl::opt<std::string>
@@ -91,99 +87,6 @@ struct SymbolSorter {
}
};
-// Print additional information about an address, if available.
-static void DumpAddress(uint64_t Address, ArrayRef<SectionRef> Sections,
- const MachOObjectFile *MachOObj, raw_ostream &OS) {
- for (unsigned i = 0; i != Sections.size(); ++i) {
- uint64_t SectAddr = 0, SectSize = 0;
- Sections[i].getAddress(SectAddr);
- Sections[i].getSize(SectSize);
- uint64_t addr = SectAddr;
- if (SectAddr <= Address &&
- SectAddr + SectSize > Address) {
- StringRef bytes, name;
- Sections[i].getContents(bytes);
- Sections[i].getName(name);
- // Print constant strings.
- if (!name.compare("__cstring"))
- OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"';
- // Print constant CFStrings.
- if (!name.compare("__cfstring"))
- OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"';
- }
- }
-}
-
-typedef std::map<uint64_t, MCFunction*> FunctionMapTy;
-typedef SmallVector<MCFunction, 16> FunctionListTy;
-static void createMCFunctionAndSaveCalls(StringRef Name,
- const MCDisassembler *DisAsm,
- MemoryObject &Object, uint64_t Start,
- uint64_t End,
- MCInstrAnalysis *InstrAnalysis,
- uint64_t Address,
- raw_ostream &DebugOut,
- FunctionMapTy &FunctionMap,
- FunctionListTy &Functions) {
- SmallVector<uint64_t, 16> Calls;
- MCFunction f =
- MCFunction::createFunctionFromMC(Name, DisAsm, Object, Start, End,
- InstrAnalysis, DebugOut, Calls);
- Functions.push_back(f);
- FunctionMap[Address] = &Functions.back();
-
- // Add the gathered callees to the map.
- for (unsigned i = 0, e = Calls.size(); i != e; ++i)
- FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0));
-}
-
-// Write a graphviz file for the CFG inside an MCFunction.
-static void emitDOTFile(const char *FileName, const MCFunction &f,
- MCInstPrinter *IP) {
- // Start a new dot file.
- std::string Error;
- raw_fd_ostream Out(FileName, Error);
- if (!Error.empty()) {
- errs() << "llvm-objdump: warning: " << Error << '\n';
- return;
- }
-
- Out << "digraph " << f.getName() << " {\n";
- Out << "graph [ rankdir = \"LR\" ];\n";
- for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
- bool hasPreds = false;
- // Only print blocks that have predecessors.
- // FIXME: Slow.
- for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
- ++pi)
- if (pi->second.contains(i->first)) {
- hasPreds = true;
- break;
- }
-
- if (!hasPreds && i != f.begin())
- continue;
-
- Out << '"' << i->first << "\" [ label=\"<a>";
- // Print instructions.
- for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
- ++ii) {
- // Escape special chars and print the instruction in mnemonic form.
- std::string Str;
- raw_string_ostream OS(Str);
- IP->printInst(&i->second.getInsts()[ii].Inst, OS, "");
- Out << DOT::EscapeString(OS.str()) << '|';
- }
- Out << "<o>\" shape=\"record\" ];\n";
-
- // Add edges.
- for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
- se = i->second.succ_end(); si != se; ++si)
- Out << i->first << ":o -> " << *si <<":a\n";
- }
- Out << "}\n";
-}
-
static void
getSectionsAndSymbols(const macho::Header Header,
MachOObjectFile *MachOObj,
@@ -272,6 +175,12 @@ static void DisassembleInputMachO2(StringRef Filename,
macho::Header Header = MachOOF->getHeader();
+ // FIXME: FoundFns isn't used anymore. Using symbols/LC_FUNCTION_STARTS to
+ // determine function locations will eventually go in MCObjectDisassembler.
+ // FIXME: Using the -cfg command line option, this code used to be able to
+ // annotate relocations with the referenced symbol's name, and if this was
+ // inside a __[cf]string section, the data it points to. This is now replaced
+ // by the upcoming MCSymbolizer, which needs the appropriate setup done above.
std::vector<SectionRef> Sections;
std::vector<SymbolRef> Symbols;
SmallVector<uint64_t, 8> FoundFns;
@@ -308,31 +217,24 @@ static void DisassembleInputMachO2(StringRef Filename,
diContext.reset(DIContext::getDWARFContext(DbgObj));
}
- FunctionMapTy FunctionMap;
- FunctionListTy Functions;
-
for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
+
+ bool SectIsText = false;
+ Sections[SectIdx].isText(SectIsText);
+ if (SectIsText == false)
+ continue;
+
StringRef SectName;
if (Sections[SectIdx].getName(SectName) ||
SectName != "__text")
continue; // Skip non-text sections
DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl();
+
StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR);
if (SegmentName != "__TEXT")
continue;
- // Insert the functions from the function starts segment into our map.
- uint64_t VMAddr;
- Sections[SectIdx].getAddress(VMAddr);
- for (unsigned i = 0, e = FoundFns.size(); i != e; ++i) {
- StringRef SectBegin;
- Sections[SectIdx].getContents(SectBegin);
- uint64_t Offset = (uint64_t)SectBegin.data();
- FunctionMap.insert(std::make_pair(VMAddr + FoundFns[i]-Offset,
- (MCFunction*)0));
- }
-
StringRef Bytes;
Sections[SectIdx].getContents(Bytes);
StringRefMemoryObject memoryObject(Bytes);
@@ -403,52 +305,39 @@ static void DisassembleInputMachO2(StringRef Filename,
symbolTableWorked = true;
- if (!CFG) {
- // Normal disassembly, print addresses, bytes and mnemonic form.
- StringRef SymName;
- Symbols[SymIdx].getName(SymName);
-
- outs() << SymName << ":\n";
- DILineInfo lastLine;
- for (uint64_t Index = Start; Index < End; Index += Size) {
- MCInst Inst;
-
- if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
- DebugOut, nulls())) {
- uint64_t SectAddress = 0;
- Sections[SectIdx].getAddress(SectAddress);
- outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
-
- DumpBytes(StringRef(Bytes.data() + Index, Size));
- IP->printInst(&Inst, outs(), "");
-
- // Print debug info.
- if (diContext) {
- DILineInfo dli =
- diContext->getLineInfoForAddress(SectAddress + Index);
- // Print valid line info if it changed.
- if (dli != lastLine && dli.getLine() != 0)
- outs() << "\t## " << dli.getFileName() << ':'
- << dli.getLine() << ':' << dli.getColumn();
- lastLine = dli;
- }
- outs() << "\n";
- } else {
- errs() << "llvm-objdump: warning: invalid instruction encoding\n";
- if (Size == 0)
- Size = 1; // skip illegible bytes
+ outs() << SymName << ":\n";
+ DILineInfo lastLine;
+ for (uint64_t Index = Start; Index < End; Index += Size) {
+ MCInst Inst;
+
+ if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
+ DebugOut, nulls())) {
+ uint64_t SectAddress = 0;
+ Sections[SectIdx].getAddress(SectAddress);
+ outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
+
+ DumpBytes(StringRef(Bytes.data() + Index, Size));
+ IP->printInst(&Inst, outs(), "");
+
+ // Print debug info.
+ if (diContext) {
+ DILineInfo dli =
+ diContext->getLineInfoForAddress(SectAddress + Index);
+ // Print valid line info if it changed.
+ if (dli != lastLine && dli.getLine() != 0)
+ outs() << "\t## " << dli.getFileName() << ':'
+ << dli.getLine() << ':' << dli.getColumn();
+ lastLine = dli;
}
+ outs() << "\n";
+ } else {
+ errs() << "llvm-objdump: warning: invalid instruction encoding\n";
+ if (Size == 0)
+ Size = 1; // skip illegible bytes
}
- } else {
- // Create CFG and use it for disassembly.
- StringRef SymName;
- Symbols[SymIdx].getName(SymName);
- createMCFunctionAndSaveCalls(
- SymName, DisAsm.get(), memoryObject, Start, End,
- InstrAnalysis.get(), Start, DebugOut, FunctionMap, Functions);
}
}
- if (!CFG && !symbolTableWorked) {
+ if (!symbolTableWorked) {
// Reading the symbol table didn't work, disassemble the whole section.
uint64_t SectAddress;
Sections[SectIdx].getAddress(SectAddress);
@@ -471,142 +360,5 @@ static void DisassembleInputMachO2(StringRef Filename,
}
}
}
-
- if (CFG) {
- if (!symbolTableWorked) {
- // Reading the symbol table didn't work, create a big __TEXT symbol.
- uint64_t SectSize = 0, SectAddress = 0;
- Sections[SectIdx].getSize(SectSize);
- Sections[SectIdx].getAddress(SectAddress);
- createMCFunctionAndSaveCalls("__TEXT", DisAsm.get(), memoryObject,
- 0, SectSize,
- InstrAnalysis.get(),
- SectAddress, DebugOut,
- FunctionMap, Functions);
- }
- for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(),
- me = FunctionMap.end(); mi != me; ++mi)
- if (mi->second == 0) {
- // Create functions for the remaining callees we have gathered,
- // but we didn't find a name for them.
- uint64_t SectSize = 0;
- Sections[SectIdx].getSize(SectSize);
-
- SmallVector<uint64_t, 16> Calls;
- MCFunction f =
- MCFunction::createFunctionFromMC("unknown", DisAsm.get(),
- memoryObject, mi->first,
- SectSize,
- InstrAnalysis.get(), DebugOut,
- Calls);
- Functions.push_back(f);
- mi->second = &Functions.back();
- for (unsigned i = 0, e = Calls.size(); i != e; ++i) {
- std::pair<uint64_t, MCFunction*> p(Calls[i], (MCFunction*)0);
- if (FunctionMap.insert(p).second)
- mi = FunctionMap.begin();
- }
- }
-
- DenseSet<uint64_t> PrintedBlocks;
- for (unsigned ffi = 0, ffe = Functions.size(); ffi != ffe; ++ffi) {
- MCFunction &f = Functions[ffi];
- for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){
- if (!PrintedBlocks.insert(fi->first).second)
- continue; // We already printed this block.
-
- // We assume a block has predecessors when it's the first block after
- // a symbol.
- bool hasPreds = FunctionMap.find(fi->first) != FunctionMap.end();
-
- // See if this block has predecessors.
- // FIXME: Slow.
- for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
- ++pi)
- if (pi->second.contains(fi->first)) {
- hasPreds = true;
- break;
- }
-
- uint64_t SectSize = 0, SectAddress;
- Sections[SectIdx].getSize(SectSize);
- Sections[SectIdx].getAddress(SectAddress);
-
- // No predecessors, this is a data block. Print as .byte directives.
- if (!hasPreds) {
- uint64_t End = llvm::next(fi) == fe ? SectSize :
- llvm::next(fi)->first;
- outs() << "# " << End-fi->first << " bytes of data:\n";
- for (unsigned pos = fi->first; pos != End; ++pos) {
- outs() << format("%8x:\t", SectAddress + pos);
- DumpBytes(StringRef(Bytes.data() + pos, 1));
- outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]);
- }
- continue;
- }
-
- if (fi->second.contains(fi->first)) // Print a header for simple loops
- outs() << "# Loop begin:\n";
-
- DILineInfo lastLine;
- // Walk over the instructions and print them.
- for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie;
- ++ii) {
- const MCDecodedInst &Inst = fi->second.getInsts()[ii];
-
- // If there's a symbol at this address, print its name.
- if (FunctionMap.find(SectAddress + Inst.Address) !=
- FunctionMap.end())
- outs() << FunctionMap[SectAddress + Inst.Address]-> getName()
- << ":\n";
-
- outs() << format("%8" PRIx64 ":\t", SectAddress + Inst.Address);
- DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size));
-
- if (fi->second.contains(fi->first)) // Indent simple loops.
- outs() << '\t';
-
- IP->printInst(&Inst.Inst, outs(), "");
-
- // Look for relocations inside this instructions, if there is one
- // print its target and additional information if available.
- for (unsigned j = 0; j != Relocs.size(); ++j)
- if (Relocs[j].first >= SectAddress + Inst.Address &&
- Relocs[j].first < SectAddress + Inst.Address + Inst.Size) {
- StringRef SymName;
- uint64_t Addr;
- Relocs[j].second.getAddress(Addr);
- Relocs[j].second.getName(SymName);
-
- outs() << "\t# " << SymName << ' ';
- DumpAddress(Addr, Sections, MachOOF, outs());
- }
-
- // If this instructions contains an address, see if we can evaluate
- // it and print additional information.
- uint64_t targ = InstrAnalysis->evaluateBranch(Inst.Inst,
- Inst.Address,
- Inst.Size);
- if (targ != -1ULL)
- DumpAddress(targ, Sections, MachOOF, outs());
-
- // Print debug info.
- if (diContext) {
- DILineInfo dli =
- diContext->getLineInfoForAddress(SectAddress + Inst.Address);
- // Print valid line info if it changed.
- if (dli != lastLine && dli.getLine() != 0)
- outs() << "\t## " << dli.getFileName() << ':'
- << dli.getLine() << ':' << dli.getColumn();
- lastLine = dli;
- }
-
- outs() << '\n';
- }
- }
-
- emitDOTFile((f.getName().str() + ".dot").c_str(), f, IP.get());
- }
- }
}
}
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 570ec7ed6f..d8611d8b3d 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -17,22 +17,26 @@
//===----------------------------------------------------------------------===//
#include "llvm-objdump.h"
-#include "MCFunction.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAtom.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFunction.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCModule.h"
+#include "llvm/MC/MCObjectDisassembler.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectSymbolizer.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCRelocationInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/MachO.h"
@@ -131,6 +135,10 @@ static cl::opt<bool>
Symbolize("symbolize", cl::desc("When disassembling instructions, "
"try to symbolize operands."));
+static cl::opt<bool>
+CFG("cfg", cl::desc("Create a CFG for every function found in the object"
+ " and write it to a graphviz file"));
+
static StringRef ToolName;
bool llvm::error(error_code ec) {
@@ -169,7 +177,51 @@ static const Target *getTarget(const ObjectFile *Obj = NULL) {
return TheTarget;
}
-void llvm::StringRefMemoryObject::anchor() { }
+// Write a graphviz file for the CFG inside an MCFunction.
+static void emitDOTFile(const char *FileName, const MCFunction &f,
+ MCInstPrinter *IP) {
+ // Start a new dot file.
+ std::string Error;
+ raw_fd_ostream Out(FileName, Error);
+ if (!Error.empty()) {
+ errs() << "llvm-objdump: warning: " << Error << '\n';
+ return;
+ }
+
+ Out << "digraph \"" << f.getName() << "\" {\n";
+ Out << "graph [ rankdir = \"LR\" ];\n";
+ for (MCFunction::const_iterator i = f.begin(), e = f.end(); i != e; ++i) {
+ // Only print blocks that have predecessors.
+ bool hasPreds = (*i)->pred_begin() != (*i)->pred_end();
+
+ if (!hasPreds && i != f.begin())
+ continue;
+
+ Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\"<a>";
+ // Print instructions.
+ for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie;
+ ++ii) {
+ if (ii != 0) // Not the first line, start a new row.
+ Out << '|';
+ if (ii + 1 == ie) // Last line, add an end id.
+ Out << "<o>";
+
+ // Escape special chars and print the instruction in mnemonic form.
+ std::string Str;
+ raw_string_ostream OS(Str);
+ IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, "");
+ Out << DOT::EscapeString(OS.str());
+ }
+ Out << "\" shape=\"record\" ];\n";
+
+ // Add edges.
+ for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(),
+ se = (*i)->succ_end(); si != se; ++si)
+ Out << (*i)->getInsts()->getBeginAddr() << ":o -> "
+ << (*si)->getInsts()->getBeginAddr() << ":a\n";
+ }
+ Out << "}\n";
+}
void llvm::DumpBytes(StringRef bytes) {
static const char hex_rep[] = "0123456789abcdef";
@@ -269,6 +321,9 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
}
}
+ OwningPtr<const MCInstrAnalysis>
+ MIA(TheTarget->createMCInstrAnalysis(MII.get()));
+
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
@@ -278,6 +333,34 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
return;
}
+ if (CFG) {
+ OwningPtr<MCObjectDisassembler> OD(
+ new MCObjectDisassembler(*Obj, *DisAsm, *MIA));
+ OwningPtr<MCModule> Mod(OD->buildModule(/* withCFG */ true));
+ for (MCModule::const_atom_iterator AI = Mod->atom_begin(),
+ AE = Mod->atom_end();
+ AI != AE; ++AI) {
+ outs() << "Atom " << (*AI)->getName() << ": \n";
+ if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI)) {
+ for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
+ II != IE;
+ ++II) {
+ IP->printInst(&II->Inst, outs(), "");
+ outs() << "\n";
+ }
+ }
+ }
+ for (MCModule::const_func_iterator FI = Mod->func_begin(),
+ FE = Mod->func_end();
+ FI != FE; ++FI) {
+ static int filenum = 0;
+ emitDOTFile((Twine((*FI)->getName()) + "_" +
+ utostr(filenum++) + ".dot").str().c_str(),
+ **FI, IP.get());
+ }
+ }
+
+
error_code ec;
for (section_iterator i = Obj->begin_sections(),
e = Obj->end_sections();
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index 3c62240f8f..87f19ba257 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -13,7 +13,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/StringRefMemoryObject.h"
namespace llvm {
@@ -35,25 +35,6 @@ void DisassembleInputMachO(StringRef Filename);
void printCOFFUnwindInfo(const object::COFFObjectFile* o);
void printELFFileHeader(const object::ObjectFile *o);
-class StringRefMemoryObject : public MemoryObject {
- virtual void anchor();
- StringRef Bytes;
- uint64_t Base;
-public:
- StringRefMemoryObject(StringRef bytes, uint64_t Base = 0)
- : Bytes(bytes), Base(Base) {}
-
- uint64_t getBase() const { return Base; }
- uint64_t getExtent() const { return Bytes.size(); }
-
- int readByte(uint64_t Addr, uint8_t *Byte) const {
- if (Addr >= Base + getExtent() || Addr < Base)
- return -1;
- *Byte = Bytes[Addr - Base];
- return 0;
- }
-};
-
}
#endif