summaryrefslogtreecommitdiff
path: root/include/llvm/MC/MCAtom.h
diff options
context:
space:
mode:
authorAhmed Bougacha <ahmed.bougacha@gmail.com>2013-05-24 01:07:04 +0000
committerAhmed Bougacha <ahmed.bougacha@gmail.com>2013-05-24 01:07:04 +0000
commitef99356dfebb96f6f90efb912c2877214bad060e (patch)
tree76250a4be7eff5e9bec963f6ff0daef8cb8d84bf /include/llvm/MC/MCAtom.h
parent2c94d0faa0e1c268893d5e04dc77e8a35889db00 (diff)
downloadllvm-ef99356dfebb96f6f90efb912c2877214bad060e.tar.gz
llvm-ef99356dfebb96f6f90efb912c2877214bad060e.tar.bz2
llvm-ef99356dfebb96f6f90efb912c2877214bad060e.tar.xz
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from a disassembled binary: - MCModule represents the binary, and has a list of MCAtoms. - MCAtom represents either disassembled instructions (MCTextAtom), or contiguous data (MCDataAtom), and covers a specific range of addresses. - MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is backed by an MCTextAtom, and has the usual successors/predecessors. - MCObjectDisassembler creates a module from an ObjectFile using a disassembler. It first builds an atom for each section. It can also construct the CFG, and this splits the text atoms into basic blocks. MCModule and MCAtom were only sketched out; MCFunction and MCBB were implemented under the experimental "-cfg" llvm-objdump -macho option. This cleans them up for further use; llvm-objdump -d -cfg now generates graphviz files for each function found in the binary. In the future, MCObjectDisassembler may be the right place to do "intelligent" disassembly: for example, handling constant islands is just a matter of splitting the atom, using information that may be available in the ObjectFile. Also, better initial atom formation than just using sections is possible using symbols (and things like Mach-O's function_starts load command). This brings two minor regressions in llvm-objdump -macho -cfg: - The printing of a relocation's referenced symbol. - An annotation on loop BBs, i.e., which are their own successor. Relocation printing is replaced by the MCSymbolizer; the basic CFG annotation will be superseded by more related functionality. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182628 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include/llvm/MC/MCAtom.h')
-rw-r--r--include/llvm/MC/MCAtom.h174
1 files changed, 149 insertions, 25 deletions
diff --git a/include/llvm/MC/MCAtom.h b/include/llvm/MC/MCAtom.h
index ae5bf0bc20..6a937986fd 100644
--- a/include/llvm/MC/MCAtom.h
+++ b/include/llvm/MC/MCAtom.h
@@ -1,4 +1,4 @@
-//===-- llvm/MC/MCAtom.h - MCAtom class ---------------------*- C++ -*-===//
+//===-- llvm/MC/MCAtom.h ----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,7 +9,7 @@
//
// This file contains the declaration of the MCAtom class, which is used to
// represent a contiguous region in a decoded object that is uniformly data or
-// instructions;
+// instructions.
//
//===----------------------------------------------------------------------===//
@@ -24,45 +24,169 @@ namespace llvm {
class MCModule;
-/// MCData - An entry in a data MCAtom.
-// NOTE: This may change to a more complex type in the future.
-typedef uint8_t MCData;
+class MCAtom;
+class MCTextAtom;
+class MCDataAtom;
/// MCAtom - Represents a contiguous range of either instructions (a TextAtom)
/// or data (a DataAtom). Address ranges are expressed as _closed_ intervals.
class MCAtom {
- friend class MCModule;
- typedef enum { TextAtom, DataAtom } AtomType;
-
- AtomType Type;
+public:
+ virtual ~MCAtom() {}
+
+ enum AtomKind { TextAtom, DataAtom };
+ AtomKind getKind() const { return Kind; }
+
+ /// \brief Get the start address of the atom.
+ uint64_t getBeginAddr() const { return Begin; }
+ /// \brief Get the end address, i.e. the last one inside the atom.
+ uint64_t getEndAddr() const { return End; }
+
+ /// \name Atom modification methods:
+ /// When modifying a TextAtom, keep instruction boundaries in mind.
+ /// For instance, split must me given the start address of an instruction.
+ /// @{
+
+ /// \brief Splits the atom in two at a given address.
+ /// \param SplitPt Address at which to start a new atom, splitting this one.
+ /// \returns The newly created atom starting at \p SplitPt.
+ virtual MCAtom *split(uint64_t SplitPt) = 0;
+
+ /// \brief Truncates an atom, discarding everything after \p TruncPt.
+ /// \param TruncPt Last byte address to be contained in this atom.
+ virtual void truncate(uint64_t TruncPt) = 0;
+ /// @}
+
+ /// \name Naming:
+ ///
+ /// This is mostly for display purposes, and may contain anything that hints
+ /// at what the atom contains: section or symbol name, BB start address, ..
+ /// @{
+ StringRef getName() const { return Name; }
+ void setName(StringRef NewName) { Name = NewName.str(); }
+ /// @}
+
+protected:
+ const AtomKind Kind;
+ std::string Name;
MCModule *Parent;
uint64_t Begin, End;
- std::vector<std::pair<uint64_t, MCInst> > Text;
- std::vector<MCData> Data;
+ friend class MCModule;
+ MCAtom(AtomKind K, MCModule *P, uint64_t B, uint64_t E)
+ : Kind(K), Name("(unknown)"), Parent(P), Begin(B), End(E) { }
+
+ /// \name Atom remapping helpers
+ /// @{
+
+ /// \brief Remap the atom, using the given range, updating Begin/End.
+ /// One or both of the bounds can remain the same, but overlapping with other
+ /// atoms in the module is still forbidden.
+ void remap(uint64_t NewBegin, uint64_t NewEnd);
+
+ /// \brief Remap the atom to prepare for a truncation at TruncPt.
+ /// Equivalent to:
+ /// \code
+ /// // Bound checks
+ /// remap(Begin, TruncPt);
+ /// \endcode
+ void remapForTruncate(uint64_t TruncPt);
+
+ /// \brief Remap the atom to prepare for a split at SplitPt.
+ /// The bounds for the resulting atoms are returned in {L,R}{Begin,End}.
+ /// The current atom is truncated to \p LEnd.
+ void remapForSplit(uint64_t SplitPt,
+ uint64_t &LBegin, uint64_t &LEnd,
+ uint64_t &RBegin, uint64_t &REnd);
+ /// @}
+};
- // Private constructor - only callable by MCModule
- MCAtom(AtomType T, MCModule *P, uint64_t B, uint64_t E)
- : Type(T), Parent(P), Begin(B), End(E) { }
+/// \name Text atom
+/// @{
+
+/// \brief An entry in an MCTextAtom: a disassembled instruction.
+/// NOTE: Both the Address and Size field are actually redundant when taken in
+/// the context of the text atom, and may better be exposed in an iterator
+/// instead of stored in the atom, which would replace this class.
+class MCDecodedInst {
+public:
+ MCInst Inst;
+ uint64_t Address;
+ uint64_t Size;
+ MCDecodedInst(const MCInst &Inst, uint64_t Address, uint64_t Size)
+ : Inst(Inst), Address(Address), Size(Size) {}
+};
+
+/// \brief An atom consisting of disassembled instructions.
+class MCTextAtom : public MCAtom {
+private:
+ typedef std::vector<MCDecodedInst> InstListTy;
+ InstListTy Insts;
+ /// \brief The address of the next appended instruction, i.e., the
+ /// address immediately after the last instruction in the atom.
+ uint64_t NextInstAddress;
public:
- bool isTextAtom() const { return Type == TextAtom; }
- bool isDataAtom() const { return Type == DataAtom; }
+ /// Append an instruction, expanding the atom if necessary.
+ void addInst(const MCInst &Inst, uint64_t Size);
+
+ /// \name Instruction list access
+ /// @{
+ typedef InstListTy::const_iterator const_iterator;
+ const_iterator begin() const { return Insts.begin(); }
+ const_iterator end() const { return Insts.end(); }
+
+ const MCDecodedInst &back() const { return Insts.back(); }
+ const MCDecodedInst &at(size_t n) const { return Insts.at(n); }
+ uint64_t size() const { return Insts.size(); }
+ /// @}
+
+ /// \name Atom type specific split/truncate logic.
+ /// @{
+ MCTextAtom *split(uint64_t SplitPt) LLVM_OVERRIDE;
+ void truncate(uint64_t TruncPt) LLVM_OVERRIDE;
+ /// @}
+
+ // Class hierarchy.
+ static bool classof(const MCAtom *A) { return A->getKind() == TextAtom; }
+private:
+ friend class MCModule;
+ // Private constructor - only callable by MCModule
+ MCTextAtom(MCModule *P, uint64_t Begin, uint64_t End)
+ : MCAtom(TextAtom, P, Begin, End), NextInstAddress(Begin) {}
+};
+/// @}
+
+/// \name Data atom
+/// @{
+
+/// \brief An entry in an MCDataAtom.
+// NOTE: This may change to a more complex type in the future.
+typedef uint8_t MCData;
- void addInst(const MCInst &I, uint64_t Address, unsigned Size);
+/// \brief An atom consising of a sequence of bytes.
+class MCDataAtom : public MCAtom {
+ std::vector<MCData> Data;
+
+public:
+ /// Append a data entry, expanding the atom if necessary.
void addData(const MCData &D);
- /// split - Splits the atom in two at a given address, which must align with
- /// and instruction boundary if this is a TextAtom. Returns the newly created
- /// atom representing the high part of the split.
- MCAtom *split(uint64_t SplitPt);
+ /// \name Atom type specific split/truncate logic.
+ /// @{
+ MCDataAtom *split(uint64_t SplitPt) LLVM_OVERRIDE;
+ void truncate(uint64_t TruncPt) LLVM_OVERRIDE;
+ /// @}
- /// truncate - Truncates an atom so that TruncPt is the last byte address
- /// contained in the atom.
- void truncate(uint64_t TruncPt);
+ // Class hierarchy.
+ static bool classof(const MCAtom *A) { return A->getKind() == DataAtom; }
+private:
+ friend class MCModule;
+ // Private constructor - only callable by MCModule
+ MCDataAtom(MCModule *P, uint64_t Begin, uint64_t End)
+ : MCAtom(DataAtom, P, Begin, End), Data(End - Begin) {}
};
}
#endif
-