summaryrefslogtreecommitdiff
path: root/include/llvm/MC/MCObjectDisassembler.h
blob: edaf7dca5a70cd96e180576a7790fcc99ebc1a27 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the MCObjectDisassembler class, which
// can be used to construct an MCModule and an MC CFG from an ObjectFile.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H
#define LLVM_MC_MCOBJECTDISASSEMBLER_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"

namespace llvm {

namespace object {
  class ObjectFile;
}

class MCBasicBlock;
class MCDisassembler;
class MCFunction;
class MCInstrAnalysis;
class MCModule;

/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions.
/// This class builds on MCDisassembler to disassemble whole sections, creating
/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data).
/// It can also be used to create a control flow graph consisting of MCFunctions
/// and MCBasicBlocks.
class MCObjectDisassembler {
public:
  MCObjectDisassembler(const object::ObjectFile &Obj,
                       const MCDisassembler &Dis,
                       const MCInstrAnalysis &MIA);
  virtual ~MCObjectDisassembler() {}

  /// \brief Build an MCModule, creating atoms and optionally functions.
  /// \param withCFG Also build a CFG by adding MCFunctions to the Module.
  /// If withCFG is false, the MCModule built only contains atoms, representing
  /// what was found in the object file. If withCFG is true, MCFunctions are
  /// created, containing MCBasicBlocks. All text atoms are split to form basic
  /// block atoms, which then each back an MCBasicBlock.
  MCModule *buildModule(bool withCFG = false);

  MCModule *buildEmptyModule();

  /// \brief Get the effective address of the entrypoint, or 0 if there is none.
  virtual uint64_t getEntrypoint();

  /// \name Get the addresses of static constructors/destructors in the object.
  /// The caller is expected to know how to interpret the addresses;
  /// for example, Mach-O init functions expect 5 arguments, not for ELF.
  /// The addresses are original object file load addresses, not effective.
  /// @{
  virtual ArrayRef<uint64_t> getStaticInitFunctions();
  virtual ArrayRef<uint64_t> getStaticExitFunctions();
  /// @}

  /// \name Translation between effective and objectfile load address.
  /// @{
  /// \brief Compute the effective load address, from an objectfile virtual
  /// address. This is implemented in a format-specific way, to take into
  /// account things like PIE/ASLR when doing dynamic disassembly.
  /// For example, on Mach-O this would be done by adding the VM addr slide,
  /// on glibc ELF by keeping a map between segment load addresses, filled
  /// using dl_iterate_phdr, etc..
  /// In most static situations and in the default impl., this returns \p Addr.
  virtual uint64_t getEffectiveLoadAddr(uint64_t Addr);

  /// \brief Compute the original load address, as specified in the objectfile.
  /// This is the inverse of getEffectiveLoadAddr.
  virtual uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr);
  /// @}

protected:
  const object::ObjectFile &Obj;
  const MCDisassembler &Dis;
  const MCInstrAnalysis &MIA;

private:
  /// \brief Fill \p Module by creating an atom for each section.
  /// This could be made much smarter, using information like symbols, but also
  /// format-specific features, like mach-o function_start or data_in_code LCs.
  void buildSectionAtoms(MCModule *Module);

  /// \brief Enrich \p Module with a CFG consisting of MCFunctions.
  /// \param Module An MCModule returned by buildModule, with no CFG.
  /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom.
  /// When the CFG is built, contiguous instructions that were previously in a
  /// single MCTextAtom will be split in multiple basic block atoms.
  void buildCFG(MCModule *Module);
};

}

#endif