summaryrefslogtreecommitdiff
path: root/include/llvm/MC
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/MC')
-rw-r--r--include/llvm/MC/MCAtom.h174
-rw-r--r--include/llvm/MC/MCFunction.h122
-rw-r--r--include/llvm/MC/MCInstrAnalysis.h11
-rw-r--r--include/llvm/MC/MCModule.h93
-rw-r--r--include/llvm/MC/MCObjectDisassembler.h69
5 files changed, 419 insertions, 50 deletions
diff --git a/include/llvm/MC/MCAtom.h b/include/llvm/MC/MCAtom.h
index ae5bf0bc20..6a937986fd 100644
--- a/include/llvm/MC/MCAtom.h
+++ b/include/llvm/MC/MCAtom.h
@@ -1,4 +1,4 @@
-//===-- llvm/MC/MCAtom.h - MCAtom class ---------------------*- C++ -*-===//
+//===-- llvm/MC/MCAtom.h ----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,7 +9,7 @@
//
// This file contains the declaration of the MCAtom class, which is used to
// represent a contiguous region in a decoded object that is uniformly data or
-// instructions;
+// instructions.
//
//===----------------------------------------------------------------------===//
@@ -24,45 +24,169 @@ namespace llvm {
class MCModule;
-/// MCData - An entry in a data MCAtom.
-// NOTE: This may change to a more complex type in the future.
-typedef uint8_t MCData;
+class MCAtom;
+class MCTextAtom;
+class MCDataAtom;
/// MCAtom - Represents a contiguous range of either instructions (a TextAtom)
/// or data (a DataAtom). Address ranges are expressed as _closed_ intervals.
class MCAtom {
- friend class MCModule;
- typedef enum { TextAtom, DataAtom } AtomType;
-
- AtomType Type;
+public:
+ virtual ~MCAtom() {}
+
+ enum AtomKind { TextAtom, DataAtom };
+ AtomKind getKind() const { return Kind; }
+
+ /// \brief Get the start address of the atom.
+ uint64_t getBeginAddr() const { return Begin; }
+ /// \brief Get the end address, i.e. the last one inside the atom.
+ uint64_t getEndAddr() const { return End; }
+
+ /// \name Atom modification methods:
+ /// When modifying a TextAtom, keep instruction boundaries in mind.
+ /// For instance, split must me given the start address of an instruction.
+ /// @{
+
+ /// \brief Splits the atom in two at a given address.
+ /// \param SplitPt Address at which to start a new atom, splitting this one.
+ /// \returns The newly created atom starting at \p SplitPt.
+ virtual MCAtom *split(uint64_t SplitPt) = 0;
+
+ /// \brief Truncates an atom, discarding everything after \p TruncPt.
+ /// \param TruncPt Last byte address to be contained in this atom.
+ virtual void truncate(uint64_t TruncPt) = 0;
+ /// @}
+
+ /// \name Naming:
+ ///
+ /// This is mostly for display purposes, and may contain anything that hints
+ /// at what the atom contains: section or symbol name, BB start address, ..
+ /// @{
+ StringRef getName() const { return Name; }
+ void setName(StringRef NewName) { Name = NewName.str(); }
+ /// @}
+
+protected:
+ const AtomKind Kind;
+ std::string Name;
MCModule *Parent;
uint64_t Begin, End;
- std::vector<std::pair<uint64_t, MCInst> > Text;
- std::vector<MCData> Data;
+ friend class MCModule;
+ MCAtom(AtomKind K, MCModule *P, uint64_t B, uint64_t E)
+ : Kind(K), Name("(unknown)"), Parent(P), Begin(B), End(E) { }
+
+ /// \name Atom remapping helpers
+ /// @{
+
+ /// \brief Remap the atom, using the given range, updating Begin/End.
+ /// One or both of the bounds can remain the same, but overlapping with other
+ /// atoms in the module is still forbidden.
+ void remap(uint64_t NewBegin, uint64_t NewEnd);
+
+ /// \brief Remap the atom to prepare for a truncation at TruncPt.
+ /// Equivalent to:
+ /// \code
+ /// // Bound checks
+ /// remap(Begin, TruncPt);
+ /// \endcode
+ void remapForTruncate(uint64_t TruncPt);
+
+ /// \brief Remap the atom to prepare for a split at SplitPt.
+ /// The bounds for the resulting atoms are returned in {L,R}{Begin,End}.
+ /// The current atom is truncated to \p LEnd.
+ void remapForSplit(uint64_t SplitPt,
+ uint64_t &LBegin, uint64_t &LEnd,
+ uint64_t &RBegin, uint64_t &REnd);
+ /// @}
+};
- // Private constructor - only callable by MCModule
- MCAtom(AtomType T, MCModule *P, uint64_t B, uint64_t E)
- : Type(T), Parent(P), Begin(B), End(E) { }
+/// \name Text atom
+/// @{
+
+/// \brief An entry in an MCTextAtom: a disassembled instruction.
+/// NOTE: Both the Address and Size field are actually redundant when taken in
+/// the context of the text atom, and may better be exposed in an iterator
+/// instead of stored in the atom, which would replace this class.
+class MCDecodedInst {
+public:
+ MCInst Inst;
+ uint64_t Address;
+ uint64_t Size;
+ MCDecodedInst(const MCInst &Inst, uint64_t Address, uint64_t Size)
+ : Inst(Inst), Address(Address), Size(Size) {}
+};
+
+/// \brief An atom consisting of disassembled instructions.
+class MCTextAtom : public MCAtom {
+private:
+ typedef std::vector<MCDecodedInst> InstListTy;
+ InstListTy Insts;
+ /// \brief The address of the next appended instruction, i.e., the
+ /// address immediately after the last instruction in the atom.
+ uint64_t NextInstAddress;
public:
- bool isTextAtom() const { return Type == TextAtom; }
- bool isDataAtom() const { return Type == DataAtom; }
+ /// Append an instruction, expanding the atom if necessary.
+ void addInst(const MCInst &Inst, uint64_t Size);
+
+ /// \name Instruction list access
+ /// @{
+ typedef InstListTy::const_iterator const_iterator;
+ const_iterator begin() const { return Insts.begin(); }
+ const_iterator end() const { return Insts.end(); }
+
+ const MCDecodedInst &back() const { return Insts.back(); }
+ const MCDecodedInst &at(size_t n) const { return Insts.at(n); }
+ uint64_t size() const { return Insts.size(); }
+ /// @}
+
+ /// \name Atom type specific split/truncate logic.
+ /// @{
+ MCTextAtom *split(uint64_t SplitPt) LLVM_OVERRIDE;
+ void truncate(uint64_t TruncPt) LLVM_OVERRIDE;
+ /// @}
+
+ // Class hierarchy.
+ static bool classof(const MCAtom *A) { return A->getKind() == TextAtom; }
+private:
+ friend class MCModule;
+ // Private constructor - only callable by MCModule
+ MCTextAtom(MCModule *P, uint64_t Begin, uint64_t End)
+ : MCAtom(TextAtom, P, Begin, End), NextInstAddress(Begin) {}
+};
+/// @}
+
+/// \name Data atom
+/// @{
+
+/// \brief An entry in an MCDataAtom.
+// NOTE: This may change to a more complex type in the future.
+typedef uint8_t MCData;
- void addInst(const MCInst &I, uint64_t Address, unsigned Size);
+/// \brief An atom consising of a sequence of bytes.
+class MCDataAtom : public MCAtom {
+ std::vector<MCData> Data;
+
+public:
+ /// Append a data entry, expanding the atom if necessary.
void addData(const MCData &D);
- /// split - Splits the atom in two at a given address, which must align with
- /// and instruction boundary if this is a TextAtom. Returns the newly created
- /// atom representing the high part of the split.
- MCAtom *split(uint64_t SplitPt);
+ /// \name Atom type specific split/truncate logic.
+ /// @{
+ MCDataAtom *split(uint64_t SplitPt) LLVM_OVERRIDE;
+ void truncate(uint64_t TruncPt) LLVM_OVERRIDE;
+ /// @}
- /// truncate - Truncates an atom so that TruncPt is the last byte address
- /// contained in the atom.
- void truncate(uint64_t TruncPt);
+ // Class hierarchy.
+ static bool classof(const MCAtom *A) { return A->getKind() == DataAtom; }
+private:
+ friend class MCModule;
+ // Private constructor - only callable by MCModule
+ MCDataAtom(MCModule *P, uint64_t Begin, uint64_t End)
+ : MCAtom(DataAtom, P, Begin, End), Data(End - Begin) {}
};
}
#endif
-
diff --git a/include/llvm/MC/MCFunction.h b/include/llvm/MC/MCFunction.h
new file mode 100644
index 0000000000..b85011eda7
--- /dev/null
+++ b/include/llvm/MC/MCFunction.h
@@ -0,0 +1,122 @@
+//===-- llvm/MC/MCFunction.h ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the data structures to hold a CFG reconstructed from
+// machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCFUNCTION_H
+#define LLVM_MC_MCFUNCTION_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInst.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class MCFunction;
+class MCModule;
+class MCTextAtom;
+
+/// \brief Basic block containing a sequence of disassembled instructions.
+/// The basic block is backed by an MCTextAtom, which holds the instructions,
+/// and the address range it covers.
+/// Create a basic block using MCFunction::createBlock.
+class MCBasicBlock {
+ const MCTextAtom *Insts;
+
+ // MCFunction owns the basic block.
+ MCFunction *Parent;
+ friend class MCFunction;
+ MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent);
+
+ /// \name Predecessors/Successors, to represent the CFG.
+ /// @{
+ typedef std::vector<const MCBasicBlock *> BasicBlockListTy;
+ BasicBlockListTy Successors;
+ BasicBlockListTy Predecessors;
+ /// @}
+public:
+
+ /// \brief Get the backing MCTextAtom, containing the instruction sequence.
+ const MCTextAtom *getInsts() const { return Insts; }
+
+ /// \name Get the owning MCFunction.
+ /// @{
+ const MCFunction *getParent() const { return Parent; }
+ MCFunction *getParent() { return Parent; }
+ /// @}
+
+ /// MC CFG access: Predecessors/Successors.
+ /// @{
+ typedef BasicBlockListTy::const_iterator succ_const_iterator;
+ succ_const_iterator succ_begin() const { return Successors.begin(); }
+ succ_const_iterator succ_end() const { return Successors.end(); }
+
+ typedef BasicBlockListTy::const_iterator pred_const_iterator;
+ pred_const_iterator pred_begin() const { return Predecessors.begin(); }
+ pred_const_iterator pred_end() const { return Predecessors.end(); }
+
+ void addSuccessor(const MCBasicBlock *MCBB);
+ bool isSuccessor(const MCBasicBlock *MCBB) const;
+
+ void addPredecessor(const MCBasicBlock *MCBB);
+ bool isPredecessor(const MCBasicBlock *MCBB) const;
+ /// @}
+};
+
+/// \brief Represents a function in machine code, containing MCBasicBlocks.
+/// MCFunctions are created using MCModule::createFunction.
+class MCFunction {
+ MCFunction (const MCFunction&) LLVM_DELETED_FUNCTION;
+ MCFunction& operator=(const MCFunction&) LLVM_DELETED_FUNCTION;
+
+ std::string Name;
+ typedef std::vector<MCBasicBlock*> BasicBlockListTy;
+ BasicBlockListTy Blocks;
+
+ // MCModule owns the function.
+ friend class MCModule;
+ MCFunction(StringRef Name);
+public:
+ ~MCFunction();
+
+ /// \brief Create an MCBasicBlock backed by Insts and add it to this function.
+ /// \param Insts Sequence of straight-line code backing the basic block.
+ /// \returns The newly created basic block.
+ MCBasicBlock &createBlock(const MCTextAtom &Insts);
+
+ StringRef getName() const { return Name; }
+
+ /// \name Access to the function's basic blocks. No ordering is enforced.
+ /// @{
+ /// \brief Get the entry point basic block.
+ const MCBasicBlock *getEntryBlock() const { return front(); }
+ MCBasicBlock *getEntryBlock() { return front(); }
+
+ // NOTE: Dereferencing iterators gives pointers, so maybe a list is best here.
+ typedef BasicBlockListTy::const_iterator const_iterator;
+ typedef BasicBlockListTy:: iterator iterator;
+ const_iterator begin() const { return Blocks.begin(); }
+ iterator begin() { return Blocks.begin(); }
+ const_iterator end() const { return Blocks.end(); }
+ iterator end() { return Blocks.end(); }
+
+ const MCBasicBlock* front() const { return Blocks.front(); }
+ MCBasicBlock* front() { return Blocks.front(); }
+ const MCBasicBlock* back() const { return Blocks.back(); }
+ MCBasicBlock* back() { return Blocks.back(); }
+ /// @}
+};
+
+}
+
+#endif
diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h
index acad6336ac..17bfd1582a 100644
--- a/include/llvm/MC/MCInstrAnalysis.h
+++ b/include/llvm/MC/MCInstrAnalysis.h
@@ -52,10 +52,15 @@ public:
return Info->get(Inst.getOpcode()).isReturn();
}
+ virtual bool isTerminator(const MCInst &Inst) const {
+ return Info->get(Inst.getOpcode()).isTerminator();
+ }
+
/// evaluateBranch - Given a branch instruction try to get the address the
- /// branch targets. Otherwise return -1.
- virtual uint64_t
- evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size) const;
+ /// branch targets. Return true on success, and the address in Target.
+ virtual bool
+ evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const;
};
}
diff --git a/include/llvm/MC/MCModule.h b/include/llvm/MC/MCModule.h
index 755fa025fb..02f8ca05b4 100644
--- a/include/llvm/MC/MCModule.h
+++ b/include/llvm/MC/MCModule.h
@@ -15,44 +15,93 @@
#ifndef LLVM_MC_MCMODULE_H
#define LLVM_MC_MCMODULE_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IntervalMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataTypes.h"
+#include <vector>
namespace llvm {
class MCAtom;
+class MCDataAtom;
+class MCFunction;
+class MCObjectDisassembler;
+class MCTextAtom;
-/// MCModule - This class represent a completely disassembled object file or
-/// executable. It comprises a list of MCAtom's, and a branch target table.
-/// Each atom represents a contiguous range of either instructions or data.
+/// \brief A completely disassembled object file or executable.
+/// It comprises a list of MCAtom's, each representing a contiguous range of
+/// either instructions or data.
+/// An MCModule is created using MCObjectDisassembler::buildModule.
class MCModule {
- /// AtomAllocationTracker - An MCModule owns its component MCAtom's, so it
- /// must track them in order to ensure they are properly freed as atoms are
- /// merged or otherwise manipulated.
- SmallPtrSet<MCAtom*, 8> AtomAllocationTracker;
+ /// \name Atom tracking
+ /// @{
- /// OffsetMap - Efficiently maps offset ranges to MCAtom's.
- IntervalMap<uint64_t, MCAtom*> OffsetMap;
-
- /// BranchTargetMap - Maps offsets that are determined to be branches and
- /// can be statically resolved to their target offsets.
- DenseMap<uint64_t, MCAtom*> BranchTargetMap;
+ /// \brief Atoms in this module, sorted by begin address.
+ /// FIXME: This doesn't handle overlapping atoms (which happen when a basic
+ /// block starts in the middle of an instruction of another basic block.)
+ typedef std::vector<MCAtom*> AtomListTy;
+ AtomListTy Atoms;
friend class MCAtom;
-
- /// remap - Update the interval mapping for an MCAtom.
+ /// \brief Remap \p Atom to the given range, and update its Begin/End fields.
+ /// \param Atom An atom belonging to this module.
+ /// An atom should always use this method to update its bounds, because this
+ /// enables the owning MCModule to keep track of its atoms.
void remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd);
+ /// \brief Insert an atom in the module, using its Begin and End addresses.
+ void map(MCAtom *NewAtom);
+ /// @}
+
+ /// \name Function tracking
+ /// @{
+ typedef std::vector<MCFunction*> FunctionListTy;
+ FunctionListTy Functions;
+ /// @}
+
+ MCModule (const MCModule &) LLVM_DELETED_FUNCTION;
+ MCModule& operator=(const MCModule &) LLVM_DELETED_FUNCTION;
+
+ // MCObjectDisassembler creates MCModules.
+ friend class MCObjectDisassembler;
+ MCModule() : Atoms() { }
+
public:
- MCModule(IntervalMap<uint64_t, MCAtom*>::Allocator &A) : OffsetMap(A) { }
+ ~MCModule();
- /// createAtom - Creates a new MCAtom covering the specified offset range.
- MCAtom *createAtom(MCAtom::AtomType Type, uint64_t Begin, uint64_t End);
+ /// \name Create a new MCAtom covering the specified offset range.
+ /// @{
+ MCTextAtom *createTextAtom(uint64_t Begin, uint64_t End);
+ MCDataAtom *createDataAtom(uint64_t Begin, uint64_t End);
+ /// @}
+
+ /// \name Access to the owned atom list, ordered by begin address.
+ /// @{
+ const MCAtom *findAtomContaining(uint64_t Addr) const;
+ MCAtom *findAtomContaining(uint64_t Addr);
+
+ typedef AtomListTy::const_iterator const_atom_iterator;
+ typedef AtomListTy:: iterator atom_iterator;
+ const_atom_iterator atom_begin() const { return Atoms.begin(); }
+ atom_iterator atom_begin() { return Atoms.begin(); }
+ const_atom_iterator atom_end() const { return Atoms.end(); }
+ atom_iterator atom_end() { return Atoms.end(); }
+ /// @}
+
+ /// \name Create a new MCFunction.
+ MCFunction *createFunction(const StringRef &Name);
+
+ /// \name Access to the owned function list.
+ /// @{
+ typedef FunctionListTy::const_iterator const_func_iterator;
+ typedef FunctionListTy:: iterator func_iterator;
+ const_func_iterator func_begin() const { return Functions.begin(); }
+ func_iterator func_begin() { return Functions.begin(); }
+ const_func_iterator func_end() const { return Functions.end(); }
+ func_iterator func_end() { return Functions.end(); }
+ /// @}
};
}
#endif
-
diff --git a/include/llvm/MC/MCObjectDisassembler.h b/include/llvm/MC/MCObjectDisassembler.h
new file mode 100644
index 0000000000..749a54e7f4
--- /dev/null
+++ b/include/llvm/MC/MCObjectDisassembler.h
@@ -0,0 +1,69 @@
+//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCObjectDisassembler class, which
+// can be used to construct an MCModule and an MC CFG from an ObjectFile.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H
+#define LLVM_MC_MCOBJECTDISASSEMBLER_H
+
+namespace llvm {
+
+namespace object {
+ class ObjectFile;
+}
+
+class MCBasicBlock;
+class MCDisassembler;
+class MCFunction;
+class MCInstrAnalysis;
+class MCModule;
+
+/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions.
+/// This class builds on MCDisassembler to disassemble whole sections, creating
+/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data).
+/// It can also be used to create a control flow graph consisting of MCFunctions
+/// and MCBasicBlocks.
+class MCObjectDisassembler {
+ const object::ObjectFile &Obj;
+ const MCDisassembler &Dis;
+ const MCInstrAnalysis &MIA;
+
+public:
+ MCObjectDisassembler(const object::ObjectFile &Obj,
+ const MCDisassembler &Dis,
+ const MCInstrAnalysis &MIA);
+
+ /// \brief Build an MCModule, creating atoms and optionally functions.
+ /// \param withCFG Also build a CFG by adding MCFunctions to the Module.
+ /// If withCFG is false, the MCModule built only contains atoms, representing
+ /// what was found in the object file. If withCFG is true, MCFunctions are
+ /// created, containing MCBasicBlocks. All text atoms are split to form basic
+ /// block atoms, which then each back an MCBasicBlock.
+ MCModule *buildModule(bool withCFG = false);
+
+private:
+ /// \brief Fill \p Module by creating an atom for each section.
+ /// This could be made much smarter, using information like symbols, but also
+ /// format-specific features, like mach-o function_start or data_in_code LCs.
+ void buildSectionAtoms(MCModule *Module);
+
+ /// \brief Enrich \p Module with a CFG consisting of MCFunctions.
+ /// \param Module An MCModule returned by buildModule, with no CFG.
+ /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom.
+ /// When the CFG is built, contiguous instructions that were previously in a
+ /// single MCTextAtom will be split in multiple basic block atoms.
+ void buildCFG(MCModule *Module);
+};
+
+}
+
+#endif