summaryrefslogtreecommitdiff
path: root/include/llvm/Object/MachOFormat.h
blob: c0f700d3c870ad0125ea3c6f9bf69518aace28b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
//===- MachOFormat.h - Mach-O Format Structures And Constants ---*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares various structures and constants which are platform
// independent and can be shared by any client which wishes to interact with
// Mach object files.
//
// The definitions here are purposely chosen to match the LLVM style as opposed
// to following the platform specific definition of the format.
//
// On a Mach system, see the <mach-o/...> includes for more information, in
// particular <mach-o/loader.h>.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_OBJECT_MACHOFORMAT_H
#define LLVM_OBJECT_MACHOFORMAT_H

#include "llvm/Support/DataTypes.h"

namespace llvm {
namespace object {

/// General Mach platform information.
namespace mach {
  /// @name CPU Type and Subtype Information
  /// {

  /// \brief Capability bits used in CPU type encoding.
  enum CPUTypeFlagsMask {
    CTFM_ArchMask =  0xFF000000,
    CTFM_ArchABI64 = 0x01000000
  };

  /// \brief Machine type IDs used in CPU type encoding.
  enum CPUTypeMachine {
    CTM_i386      = 7,
    CTM_x86_64    = CTM_i386 | CTFM_ArchABI64,
    CTM_ARM       = 12,
    CTM_SPARC     = 14,
    CTM_PowerPC   = 18,
    CTM_PowerPC64 = CTM_PowerPC | CTFM_ArchABI64
  };

  /// \brief Capability bits used in CPU subtype encoding.
  enum CPUSubtypeFlagsMask {
    CSFM_SubtypeMask =  0xFF000000,
    CSFM_SubtypeLib64 = 0x80000000
  };

  /// \brief ARM Machine Subtypes.
  enum CPUSubtypeARM {
    CSARM_ALL    = 0,
    CSARM_V4T    = 5,
    CSARM_V6     = 6,
    CSARM_V5TEJ  = 7,
    CSARM_XSCALE = 8,
    CSARM_V7     = 9,
    CSARM_V7F    = 10,
    CSARM_V7S    = 11,
    CSARM_V7K    = 12
  };

  /// \brief PowerPC Machine Subtypes.
  enum CPUSubtypePowerPC {
    CSPPC_ALL = 0
  };

  /// \brief SPARC Machine Subtypes.
  enum CPUSubtypeSPARC {
    CSSPARC_ALL = 0
  };

  /// \brief x86 Machine Subtypes.
  enum CPUSubtypeX86 {
    CSX86_ALL = 3
  };

  /// @}

} // end namespace mach

/// Format information for Mach object files.
namespace macho {
  /// \brief Constants for structure sizes.
  enum StructureSizes {
    Header32Size = 28,
    Header64Size = 32,
    SegmentLoadCommand32Size = 56,
    SegmentLoadCommand64Size = 72,
    Section32Size = 68,
    Section64Size = 80,
    SymtabLoadCommandSize = 24,
    DysymtabLoadCommandSize = 80,
    Nlist32Size = 12,
    Nlist64Size = 16,
    RelocationInfoSize = 8,
    LinkeditLoadCommandSize = 16
  };

  /// \brief Constants for header magic field.
  enum HeaderMagic {
    HM_Object32 = 0xFEEDFACE,  ///< 32-bit mach object file
    HM_Object64 = 0xFEEDFACF,  ///< 64-bit mach object file
    HM_Universal = 0xCAFEBABE  ///< Universal object file
  };

  /// \brief Header common to all Mach object files.
  struct Header {
    uint32_t Magic;
    uint32_t CPUType;
    uint32_t CPUSubtype;
    uint32_t FileType;
    uint32_t NumLoadCommands;
    uint32_t SizeOfLoadCommands;
    uint32_t Flags;
  };

  /// \brief Extended header for 64-bit object files.
  struct Header64Ext {
    uint32_t Reserved;
  };

  // See <mach-o/loader.h>.
  enum HeaderFileType {
    HFT_Object = 0x1
  };

  enum HeaderFlags {
    HF_SubsectionsViaSymbols = 0x2000
  };

  enum LoadCommandType {
    LCT_Segment = 0x1,
    LCT_Symtab = 0x2,
    LCT_Dysymtab = 0xb,
    LCT_Segment64 = 0x19,
    LCT_UUID = 0x1b,
    LCT_CodeSignature = 0x1d,
    LCT_SegmentSplitInfo = 0x1e,
    LCT_FunctionStarts = 0x26,
    LCT_DataInCode = 0x29
  };

  /// \brief Load command structure.
  struct LoadCommand {
    uint32_t Type;
    uint32_t Size;
  };

  /// @name Load Command Structures
  /// @{

  struct SegmentLoadCommand {
    uint32_t Type;
    uint32_t Size;
    char Name[16];
    uint32_t VMAddress;
    uint32_t VMSize;
    uint32_t FileOffset;
    uint32_t FileSize;
    uint32_t MaxVMProtection;
    uint32_t InitialVMProtection;
    uint32_t NumSections;
    uint32_t Flags;
  };

  struct Segment64LoadCommand {
    uint32_t Type;
    uint32_t Size;
    char Name[16];
    uint64_t VMAddress;
    uint64_t VMSize;
    uint64_t FileOffset;
    uint64_t FileSize;
    uint32_t MaxVMProtection;
    uint32_t InitialVMProtection;
    uint32_t NumSections;
    uint32_t Flags;
  };

  struct SymtabLoadCommand {
    uint32_t Type;
    uint32_t Size;
    uint32_t SymbolTableOffset;
    uint32_t NumSymbolTableEntries;
    uint32_t StringTableOffset;
    uint32_t StringTableSize;
  };

  struct DysymtabLoadCommand {
    uint32_t Type;
    uint32_t Size;

    uint32_t LocalSymbolsIndex;
    uint32_t NumLocalSymbols;

    uint32_t ExternalSymbolsIndex;
    uint32_t NumExternalSymbols;

    uint32_t UndefinedSymbolsIndex;
    uint32_t NumUndefinedSymbols;

    uint32_t TOCOffset;
    uint32_t NumTOCEntries;

    uint32_t ModuleTableOffset;
    uint32_t NumModuleTableEntries;

    uint32_t ReferenceSymbolTableOffset;
    uint32_t NumReferencedSymbolTableEntries;

    uint32_t IndirectSymbolTableOffset;
    uint32_t NumIndirectSymbolTableEntries;

    uint32_t ExternalRelocationTableOffset;
    uint32_t NumExternalRelocationTableEntries;

    uint32_t LocalRelocationTableOffset;
    uint32_t NumLocalRelocationTableEntries;
  };

  struct LinkeditDataLoadCommand {
    uint32_t Type;
    uint32_t Size;
    uint32_t DataOffset;
    uint32_t DataSize;
  };

  /// @}
  /// @name Section Data
  /// @{

  struct Section {
    char Name[16];
    char SegmentName[16];
    uint32_t Address;
    uint32_t Size;
    uint32_t Offset;
    uint32_t Align;
    uint32_t RelocationTableOffset;
    uint32_t NumRelocationTableEntries;
    uint32_t Flags;
    uint32_t Reserved1;
    uint32_t Reserved2;
  };
  struct Section64 {
    char Name[16];
    char SegmentName[16];
    uint64_t Address;
    uint64_t Size;
    uint32_t Offset;
    uint32_t Align;
    uint32_t RelocationTableOffset;
    uint32_t NumRelocationTableEntries;
    uint32_t Flags;
    uint32_t Reserved1;
    uint32_t Reserved2;
    uint32_t Reserved3;
  };

  /// @}
  /// @name Symbol Table Entries
  /// @{

  struct SymbolTableEntry {
    uint32_t StringIndex;
    uint8_t Type;
    uint8_t SectionIndex;
    uint16_t Flags;
    uint32_t Value;
  };
  // Despite containing a uint64_t, this structure is only 4-byte aligned within
  // a MachO file.
#pragma pack(push)
#pragma pack(4)
  struct Symbol64TableEntry {
    uint32_t StringIndex;
    uint8_t Type;
    uint8_t SectionIndex;
    uint16_t Flags;
    uint64_t Value;
  };
#pragma pack(pop)

  /// @}
  /// @name Data-in-code Table Entry
  /// @{

  // See <mach-o/loader.h>.
  enum DataRegionType { Data = 1, JumpTable8, JumpTable16, JumpTable32 };
  struct DataInCodeTableEntry {
    uint32_t Offset;  /* from mach_header to start of data region */
    uint16_t Length;  /* number of bytes in data region */
    uint16_t Kind;    /* a DataRegionType value  */
  };

  /// @}
  /// @name Indirect Symbol Table
  /// @{

  struct IndirectSymbolTableEntry {
    uint32_t Index;
  };

  /// @}
  /// @name Relocation Data
  /// @{

  struct RelocationEntry {
    uint32_t Word0;
    uint32_t Word1;
  };

  /// @}

  // See <mach-o/nlist.h>.
  enum SymbolTypeType {
    STT_Undefined = 0x00,
    STT_Absolute  = 0x02,
    STT_Section   = 0x0e
  };

  enum SymbolTypeFlags {
    // If any of these bits are set, then the entry is a stab entry number (see
    // <mach-o/stab.h>. Otherwise the other masks apply.
    STF_StabsEntryMask = 0xe0,

    STF_TypeMask       = 0x0e,
    STF_External       = 0x01,
    STF_PrivateExtern  = 0x10
  };

  /// IndirectSymbolFlags - Flags for encoding special values in the indirect
  /// symbol entry.
  enum IndirectSymbolFlags {
    ISF_Local    = 0x80000000,
    ISF_Absolute = 0x40000000
  };

  /// RelocationFlags - Special flags for addresses.
  enum RelocationFlags {
    RF_Scattered = 0x80000000
  };

  /// Common relocation info types.
  enum RelocationInfoType {
    RIT_Vanilla             = 0,
    RIT_Pair                = 1,
    RIT_Difference          = 2
  };

  /// Generic relocation info types, which are shared by some (but not all)
  /// platforms.
  enum RelocationInfoType_Generic {
    RIT_Generic_PreboundLazyPointer = 3,
    RIT_Generic_LocalDifference     = 4,
    RIT_Generic_TLV                 = 5
  };

  /// X86_64 uses its own relocation types.
  enum RelocationInfoTypeX86_64 {
    // Note that x86_64 doesn't even share the common relocation types.
    RIT_X86_64_Unsigned   = 0,
    RIT_X86_64_Signed     = 1,
    RIT_X86_64_Branch     = 2,
    RIT_X86_64_GOTLoad    = 3,
    RIT_X86_64_GOT        = 4,
    RIT_X86_64_Subtractor = 5,
    RIT_X86_64_Signed1    = 6,
    RIT_X86_64_Signed2    = 7,
    RIT_X86_64_Signed4    = 8,
    RIT_X86_64_TLV        = 9
  };

  /// ARM uses its own relocation types.
  enum RelocationInfoTypeARM {
    RIT_ARM_LocalDifference = 3,
    RIT_ARM_PreboundLazyPointer = 4,
    RIT_ARM_Branch24Bit = 5,
    RIT_ARM_ThumbBranch22Bit = 6,
    RIT_ARM_ThumbBranch32Bit = 7,
    RIT_ARM_Half = 8,
    RIT_ARM_HalfDifference = 9

  };

} // end namespace macho

} // end namespace object
} // end namespace llvm

#endif