summaryrefslogtreecommitdiff
path: root/include/llvm/MC/MCObjectDisassembler.h
blob: 10cc04b945bf1ae37b76684e4156e2e83a39a376 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the MCObjectDisassembler class, which
// can be used to construct an MCModule and an MC CFG from an ObjectFile.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H
#define LLVM_MC_MCOBJECTDISASSEMBLER_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/MemoryObject.h"
#include <vector>

namespace llvm {

namespace object {
  class ObjectFile;
  class MachOObjectFile;
}

class MCBasicBlock;
class MCDisassembler;
class MCFunction;
class MCInstrAnalysis;
class MCModule;
class MCObjectSymbolizer;

/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions.
/// This class builds on MCDisassembler to disassemble whole sections, creating
/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data).
/// It can also be used to create a control flow graph consisting of MCFunctions
/// and MCBasicBlocks.
class MCObjectDisassembler {
public:
  MCObjectDisassembler(const object::ObjectFile &Obj,
                       const MCDisassembler &Dis,
                       const MCInstrAnalysis &MIA);
  virtual ~MCObjectDisassembler() {}

  /// \brief Build an MCModule, creating atoms and optionally functions.
  /// \param withCFG Also build a CFG by adding MCFunctions to the Module.
  /// If withCFG is false, the MCModule built only contains atoms, representing
  /// what was found in the object file. If withCFG is true, MCFunctions are
  /// created, containing MCBasicBlocks. All text atoms are split to form basic
  /// block atoms, which then each back an MCBasicBlock.
  MCModule *buildModule(bool withCFG = false);

  MCModule *buildEmptyModule();

  typedef std::vector<uint64_t> AddressSetTy;
  /// \name Create a new MCFunction.
  MCFunction *createFunction(MCModule *Module, uint64_t BeginAddr,
                             AddressSetTy &CallTargets,
                             AddressSetTy &TailCallTargets);

  /// \brief Set the region on which to fallback if disassembly was requested
  /// somewhere not accessible in the object file.
  /// This is used for dynamic disassembly (see RawMemoryObject).
  void setFallbackRegion(OwningPtr<MemoryObject> &Region) {
    FallbackRegion.reset(Region.release());
  }

  /// \brief Set the symbolizer to use to get information on external functions.
  /// Note that this isn't used to do instruction-level symbolization (that is,
  /// plugged into MCDisassembler), but to symbolize function call targets.
  void setSymbolizer(MCObjectSymbolizer *ObjectSymbolizer) {
    MOS = ObjectSymbolizer;
  }

  /// \brief Get the effective address of the entrypoint, or 0 if there is none.
  virtual uint64_t getEntrypoint();

  /// \name Get the addresses of static constructors/destructors in the object.
  /// The caller is expected to know how to interpret the addresses;
  /// for example, Mach-O init functions expect 5 arguments, not for ELF.
  /// The addresses are original object file load addresses, not effective.
  /// @{
  virtual ArrayRef<uint64_t> getStaticInitFunctions();
  virtual ArrayRef<uint64_t> getStaticExitFunctions();
  /// @}

  /// \name Translation between effective and objectfile load address.
  /// @{
  /// \brief Compute the effective load address, from an objectfile virtual
  /// address. This is implemented in a format-specific way, to take into
  /// account things like PIE/ASLR when doing dynamic disassembly.
  /// For example, on Mach-O this would be done by adding the VM addr slide,
  /// on glibc ELF by keeping a map between segment load addresses, filled
  /// using dl_iterate_phdr, etc..
  /// In most static situations and in the default impl., this returns \p Addr.
  virtual uint64_t getEffectiveLoadAddr(uint64_t Addr);

  /// \brief Compute the original load address, as specified in the objectfile.
  /// This is the inverse of getEffectiveLoadAddr.
  virtual uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr);
  /// @}

protected:
  const object::ObjectFile &Obj;
  const MCDisassembler &Dis;
  const MCInstrAnalysis &MIA;
  MCObjectSymbolizer *MOS;

  /// \brief The fallback memory region, outside the object file.
  OwningPtr<MemoryObject> FallbackRegion;

  /// \brief Return a memory region suitable for reading starting at \p Addr.
  /// In most cases, this returns a StringRefMemoryObject backed by the
  /// containing section. When no section was found, this returns the
  /// FallbackRegion, if it is suitable.
  /// If it is not, or if there is no fallback region, this returns 0.
  MemoryObject *getRegionFor(uint64_t Addr);

private:
  /// \brief Fill \p Module by creating an atom for each section.
  /// This could be made much smarter, using information like symbols, but also
  /// format-specific features, like mach-o function_start or data_in_code LCs.
  void buildSectionAtoms(MCModule *Module);

  /// \brief Enrich \p Module with a CFG consisting of MCFunctions.
  /// \param Module An MCModule returned by buildModule, with no CFG.
  /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom.
  /// When the CFG is built, contiguous instructions that were previously in a
  /// single MCTextAtom will be split in multiple basic block atoms.
  void buildCFG(MCModule *Module);

  MCBasicBlock *getBBAt(MCModule *Module, MCFunction *MCFN, uint64_t BeginAddr,
                        AddressSetTy &CallTargets,
                        AddressSetTy &TailCallTargets);
};

class MCMachOObjectDisassembler : public MCObjectDisassembler {
  const object::MachOObjectFile &MOOF;

  uint64_t VMAddrSlide;
  uint64_t HeaderLoadAddress;

  // __DATA;__mod_init_func support.
  llvm::StringRef ModInitContents;
  // __DATA;__mod_exit_func support.
  llvm::StringRef ModExitContents;

public:
  /// \brief Construct a Mach-O specific object disassembler.
  /// \param VMAddrSlide The virtual address slide applied by dyld.
  /// \param HeaderLoadAddress The load address of the mach_header for this
  /// object.
  MCMachOObjectDisassembler(const object::MachOObjectFile &MOOF,
                            const MCDisassembler &Dis,
                            const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
                            uint64_t HeaderLoadAddress);

protected:
  uint64_t getEffectiveLoadAddr(uint64_t Addr) override;
  uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr) override;
  uint64_t getEntrypoint() override;

  ArrayRef<uint64_t> getStaticInitFunctions() override;
  ArrayRef<uint64_t> getStaticExitFunctions() override;
};

}

#endif