summaryrefslogtreecommitdiff
path: root/include/llvm/MC/MCAtom.h
blob: eab32d691fa84d96dc1726467139b6010a7713dd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
//===-- llvm/MC/MCAtom.h ----------------------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the MCAtom class, which is used to
// represent a contiguous region in a decoded object that is uniformly data or
// instructions.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_MC_MCATOM_H
#define LLVM_MC_MCATOM_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/DataTypes.h"
#include <vector>

namespace llvm {

class MCModule;

class MCAtom;
class MCTextAtom;
class MCDataAtom;

/// \brief Represents a contiguous range of either instructions (a TextAtom)
/// or data (a DataAtom).  Address ranges are expressed as _closed_ intervals.
class MCAtom {
  virtual void anchor();
public:
  virtual ~MCAtom() {}

  enum AtomKind { TextAtom, DataAtom };
  AtomKind getKind() const { return Kind; }

  /// \brief Get the start address of the atom.
  uint64_t getBeginAddr() const { return Begin; }
  /// \brief Get the end address, i.e. the last one inside the atom.
  uint64_t getEndAddr() const { return End; }

  /// \name Atom modification methods:
  /// When modifying a TextAtom, keep instruction boundaries in mind.
  /// For instance, split must me given the start address of an instruction.
  /// @{

  /// \brief Splits the atom in two at a given address.
  /// \param SplitPt Address at which to start a new atom, splitting this one.
  /// \returns The newly created atom starting at \p SplitPt.
  virtual MCAtom *split(uint64_t SplitPt) = 0;

  /// \brief Truncates an atom, discarding everything after \p TruncPt.
  /// \param TruncPt Last byte address to be contained in this atom.
  virtual void truncate(uint64_t TruncPt) = 0;
  /// @}

  /// \name Naming:
  ///
  /// This is mostly for display purposes, and may contain anything that hints
  /// at what the atom contains: section or symbol name, BB start address, ..
  /// @{
  StringRef getName() const { return Name; }
  void setName(StringRef NewName) { Name = NewName.str(); }
  /// @}

protected:
  const AtomKind Kind;
  std::string Name;
  MCModule *Parent;
  uint64_t Begin, End;

  friend class MCModule;
  MCAtom(AtomKind K, MCModule *P, uint64_t B, uint64_t E)
    : Kind(K), Name("(unknown)"), Parent(P), Begin(B), End(E) { }

  /// \name Atom remapping helpers
  /// @{

  /// \brief Remap the atom, using the given range, updating Begin/End.
  /// One or both of the bounds can remain the same, but overlapping with other
  /// atoms in the module is still forbidden.
  void remap(uint64_t NewBegin, uint64_t NewEnd);

  /// \brief Remap the atom to prepare for a truncation at TruncPt.
  /// Equivalent to:
  /// \code
  ///   // Bound checks
  ///   remap(Begin, TruncPt);
  /// \endcode
  void remapForTruncate(uint64_t TruncPt);

  /// \brief Remap the atom to prepare for a split at SplitPt.
  /// The bounds for the resulting atoms are returned in {L,R}{Begin,End}.
  /// The current atom is truncated to \p LEnd.
  void remapForSplit(uint64_t SplitPt,
                     uint64_t &LBegin, uint64_t &LEnd,
                     uint64_t &RBegin, uint64_t &REnd);
  /// @}
};

/// \name Text atom
/// @{

/// \brief An entry in an MCTextAtom: a disassembled instruction.
/// NOTE: Both the Address and Size field are actually redundant when taken in
/// the context of the text atom, and may better be exposed in an iterator
/// instead of stored in the atom, which would replace this class.
class MCDecodedInst {
public:
  MCInst Inst;
  uint64_t Address;
  uint64_t Size;
  MCDecodedInst(const MCInst &Inst, uint64_t Address, uint64_t Size)
    : Inst(Inst), Address(Address), Size(Size) {}
};

/// \brief An atom consisting of disassembled instructions.
class MCTextAtom : public MCAtom {
private:
  typedef std::vector<MCDecodedInst> InstListTy;
  InstListTy Insts;

  /// \brief The address of the next appended instruction, i.e., the
  /// address immediately after the last instruction in the atom.
  uint64_t NextInstAddress;
public:
  /// Append an instruction, expanding the atom if necessary.
  void addInst(const MCInst &Inst, uint64_t Size);

  /// \name Instruction list access
  /// @{
  typedef InstListTy::const_iterator const_iterator;
  const_iterator begin() const { return Insts.begin(); }
  const_iterator end()   const { return Insts.end(); }

  const MCDecodedInst &back() const { return Insts.back(); }
  const MCDecodedInst &at(size_t n) const { return Insts.at(n); }
  size_t size() const { return Insts.size(); }
  /// @}

  /// \name Atom type specific split/truncate logic.
  /// @{
  MCTextAtom *split(uint64_t SplitPt) LLVM_OVERRIDE;
  void     truncate(uint64_t TruncPt) LLVM_OVERRIDE;
  /// @}

  // Class hierarchy.
  static bool classof(const MCAtom *A) { return A->getKind() == TextAtom; }
private:
  friend class MCModule;
  // Private constructor - only callable by MCModule
  MCTextAtom(MCModule *P, uint64_t Begin, uint64_t End)
    : MCAtom(TextAtom, P, Begin, End), NextInstAddress(Begin) {}
};
/// @}

/// \name Data atom
/// @{

/// \brief An entry in an MCDataAtom.
// NOTE: This may change to a more complex type in the future.
typedef uint8_t MCData;

/// \brief An atom consising of a sequence of bytes.
class MCDataAtom : public MCAtom {
  std::vector<MCData> Data;

public:
  /// Append a data entry, expanding the atom if necessary.
  void addData(const MCData &D);

  /// Get a reference to the data in this atom.
  ArrayRef<MCData> getData() const { return Data; }

  /// \name Atom type specific split/truncate logic.
  /// @{
  MCDataAtom *split(uint64_t SplitPt) LLVM_OVERRIDE;
  void     truncate(uint64_t TruncPt) LLVM_OVERRIDE;
  /// @}

  // Class hierarchy.
  static bool classof(const MCAtom *A) { return A->getKind() == DataAtom; }
private:
  friend class MCModule;
  // Private constructor - only callable by MCModule
  MCDataAtom(MCModule *P, uint64_t Begin, uint64_t End)
    : MCAtom(DataAtom, P, Begin, End) {
    Data.reserve(End + 1 - Begin);
  }
};

}

#endif