lib/CodeGen/AsmPrinter/DwarfAccelTable.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283

//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains support for writing dwarf accelerator tables.
//
//===----------------------------------------------------------------------===//

#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__

#include "DIE.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
#include <map>
#include <vector>

// The dwarf accelerator tables are an indirect hash table optimized
// for null lookup rather than access to known data. They are output into
// an on-disk format that looks like this:
//
// .-------------.
// |  HEADER     |
// |-------------|
// |  BUCKETS    |
// |-------------|
// |  HASHES     |
// |-------------|
// |  OFFSETS    |
// |-------------|
// |  DATA       |
// `-------------'
//
// where the header contains a magic number, version, type of hash function,
// the number of buckets, total number of hashes, and room for a special
// struct of data and the length of that struct.
//
// The buckets contain an index (e.g. 6) into the hashes array. The hashes
// section contains all of the 32-bit hash values in contiguous memory, and
// the offsets contain the offset into the data area for the particular
// hash.
//
// For a lookup example, we could hash a function name and take it modulo the
// number of buckets giving us our bucket. From there we take the bucket value
// as an index into the hashes table and look at each successive hash as long
// as the hash value is still the same modulo result (bucket value) as earlier.
// If we have a match we look at that same entry in the offsets table and
// grab the offset in the data for our final match.

namespace llvm {

class AsmPrinter;
class DIE;
class DwarfUnits;

class DwarfAccelTable {

  enum HashFunctionType {
    eHashFunctionDJB = 0u
  };

  static uint32_t HashDJB (StringRef Str) {
    uint32_t h = 5381;
    for (unsigned i = 0, e = Str.size(); i != e; ++i)
      h = ((h << 5) + h) + Str[i];
    return h;
  }

  // Helper function to compute the number of buckets needed based on
  // the number of unique hashes.
  void ComputeBucketCount (void);

  struct TableHeader {
    uint32_t   magic;           // 'HASH' magic value to allow endian detection
    uint16_t   version;         // Version number.
    uint16_t   hash_function;   // The hash function enumeration that was used.
    uint32_t   bucket_count;    // The number of buckets in this hash table.
    uint32_t   hashes_count;    // The total number of unique hash values
                                // and hash data offsets in this table.
    uint32_t   header_data_len; // The bytes to skip to get to the hash
                                // indexes (buckets) for correct alignment.
    // Also written to disk is the implementation specific header data.

    static const uint32_t MagicHash = 0x48415348;

    TableHeader (uint32_t data_len) :
      magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
      bucket_count (0), hashes_count (0), header_data_len (data_len)
    {}

#ifndef NDEBUG
    void print(raw_ostream &O) {
      O << "Magic: " << format("0x%x", magic) << "\n"
        << "Version: " << version << "\n"
        << "Hash Function: " << hash_function << "\n"
        << "Bucket Count: " << bucket_count << "\n"
        << "Header Data Length: " << header_data_len << "\n";
    }
    void dump() { print(dbgs()); }
#endif
  };

public:
  // The HeaderData describes the form of each set of data. In general this
  // is as a list of atoms (atom_count) where each atom contains a type
  // (AtomType type) of data, and an encoding form (form). In the case of
  // data that is referenced via DW_FORM_ref_* the die_offset_base is
  // used to describe the offset for all forms in the list of atoms.
  // This also serves as a public interface of sorts.
  // When written to disk this will have the form:
  //
  // uint32_t die_offset_base
  // uint32_t atom_count
  // atom_count Atoms
  enum AtomType {
    eAtomTypeNULL       = 0u,
    eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
    eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that
                                // contains the item in question
    eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as
                                // DW_FORM_data1 (if no tags exceed 255) or
                                // DW_FORM_data2.
    eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
    eAtomTypeTypeFlags  = 5u    // Flags from enum TypeFlags
  };

  enum TypeFlags {
    eTypeFlagClassMask = 0x0000000fu,

    // Always set for C++, only set for ObjC if this is the
    // @implementation for a class.
    eTypeFlagClassIsImplementation  = ( 1u << 1 )
  };

  // Make these public so that they can be used as a general interface to
  // the class.
  struct Atom {
    AtomType type; // enum AtomType
    uint16_t form; // DWARF DW_FORM_ defines

    Atom(AtomType type, uint16_t form) : type(type), form(form) {}
    static const char * AtomTypeString(enum AtomType);
#ifndef NDEBUG
    void print(raw_ostream &O) {
      O << "Type: " << AtomTypeString(type) << "\n"
        << "Form: " << dwarf::FormEncodingString(form) << "\n";
    }
    void dump() {
      print(dbgs());
    }
#endif
  };

 private:
  struct TableHeaderData {
    uint32_t die_offset_base;
    SmallVector<Atom, 1> Atoms;

    TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0)
      : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { }

#ifndef NDEBUG
    void print (raw_ostream &O) {
      O << "die_offset_base: " << die_offset_base << "\n";
      for (size_t i = 0; i < Atoms.size(); i++)
        Atoms[i].print(O);
    }
    void dump() {
      print(dbgs());
    }
#endif
  };

  // The data itself consists of a str_offset, a count of the DIEs in the
  // hash and the offsets to the DIEs themselves.
  // On disk each data section is ended with a 0 KeyType as the end of the
  // hash chain.
  // On output this looks like:
  // uint32_t str_offset
  // uint32_t hash_data_count
  // HashData[hash_data_count]
public:
  struct HashDataContents {
    DIE *Die; // Offsets
    char Flags; // Specific flags to output

    HashDataContents(DIE *D, char Flags) :
      Die(D),
      Flags(Flags) { }
    #ifndef NDEBUG
    void print(raw_ostream &O) const {
      O << "  Offset: " << Die->getOffset() << "\n";
      O << "  Tag: " << dwarf::TagString(Die->getTag()) << "\n";
      O << "  Flags: " << Flags << "\n";
    }
    #endif
  };
private:
  struct HashData {
    StringRef Str;
    uint32_t HashValue;
    MCSymbol *Sym;
    ArrayRef<HashDataContents*> Data; // offsets
    HashData(StringRef S, ArrayRef<HashDataContents*> Data)
      : Str(S), Data(Data) {
      HashValue = DwarfAccelTable::HashDJB(S);
    }
    #ifndef NDEBUG
    void print(raw_ostream &O) {
      O << "Name: " << Str << "\n";
      O << "  Hash Value: " << format("0x%x", HashValue) << "\n";
      O << "  Symbol: " ;
      if (Sym) Sym->print(O);
      else O << "<none>";
      O << "\n";
      for (size_t i = 0; i < Data.size(); i++) {
        O << "  Offset: " << Data[i]->Die->getOffset() << "\n";
        O << "  Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n";
        O << "  Flags: " << Data[i]->Flags << "\n";
      }
    }
    void dump() {
      print(dbgs());
    }
    #endif
  };

  DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
  void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;

  // Internal Functions
  void EmitHeader(AsmPrinter *);
  void EmitBuckets(AsmPrinter *);
  void EmitHashes(AsmPrinter *);
  void EmitOffsets(AsmPrinter *, MCSymbol *);
  void EmitData(AsmPrinter *, DwarfUnits *D);

  // Allocator for HashData and HashDataContents.
  BumpPtrAllocator Allocator;

  // Output Variables
  TableHeader Header;
  TableHeaderData HeaderData;
  std::vector<HashData*> Data;

  // String Data
  typedef std::vector<HashDataContents*> DataArray;
  typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries;
  StringEntries Entries;

  // Buckets/Hashes/Offsets
  typedef std::vector<HashData*> HashList;
  typedef std::vector<HashList> BucketList;
  BucketList Buckets;
  HashList Hashes;

  // Public Implementation
 public:
  DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
  ~DwarfAccelTable();
  void AddName(StringRef, DIE*, char = 0);
  void FinalizeTable(AsmPrinter *, const char *);
  void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *);
#ifndef NDEBUG
  void print(raw_ostream &O);
  void dump() { print(dbgs()); }
#endif
};

}
#endif