summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp250
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h254
2 files changed, 504 insertions, 0 deletions
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
new file mode 100644
index 0000000000..b7c8c6ee47
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -0,0 +1,250 @@
+//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "DwarfAccelTable.h"
+#include "DwarfDebug.h"
+#include "DIE.h"
+
+using namespace llvm;
+
+const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
+ switch (AT) {
+ default: llvm_unreachable("invalid AtomType!");
+ case eAtomTypeNULL: return "eAtomTypeNULL";
+ case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset";
+ case eAtomTypeCUOffset: return "eAtomTypeCUOffset";
+ case eAtomTypeTag: return "eAtomTypeTag";
+ case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
+ case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
+ }
+}
+
+// The general case would need to have a less hard coded size for the
+// length of the HeaderData, however, if we're constructing based on a
+// single Atom then we know it will always be: 4 + 4 + 2 + 2.
+DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) :
+ Header(12),
+ HeaderData(atom) {
+}
+
+void DwarfAccelTable::AddName(StringRef Name, DIE* die) {
+ // If the string is in the list already then add this die to the list
+ // otherwise add a new one.
+ DIEArray &DIEs = Entries[Name];
+ DIEs.push_back(die);
+}
+
+void DwarfAccelTable::ComputeBucketCount(void) {
+ // First get the number of unique hashes.
+ std::vector<uint32_t> uniques;
+ uniques.resize(Data.size());
+ for (size_t i = 0; i < Data.size(); ++i)
+ uniques[i] = Data[i]->HashValue;
+ std::sort(uniques.begin(), uniques.end());
+ std::vector<uint32_t>::iterator p =
+ std::unique(uniques.begin(), uniques.end());
+ uint32_t num = std::distance(uniques.begin(), p);
+
+ // Then compute the bucket size, minimum of 1 bucket.
+ if (num > 1024) Header.bucket_count = num/4;
+ if (num > 16) Header.bucket_count = num/2;
+ else Header.bucket_count = num > 0 ? num : 1;
+
+ Header.hashes_count = num;
+}
+
+void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) {
+ // Create the individual hash data outputs.
+ for (StringMap<DIEArray>::const_iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+ struct HashData *Entry = new HashData((*EI).getKeyData());
+ for (DIEArray::const_iterator DI = (*EI).second.begin(),
+ DE = (*EI).second.end();
+ DI != DE; ++DI)
+ Entry->addOffset((*DI)->getOffset());
+ Data.push_back(Entry);
+ }
+
+ // Figure out how many buckets we need, then compute the bucket
+ // contents and the final ordering. We'll emit the hashes and offsets
+ // by doing a walk during the emission phase. We add temporary
+ // symbols to the data so that we can reference them during the offset
+ // later, we'll emit them when we emit the data.
+ ComputeBucketCount();
+
+ // Compute bucket contents and final ordering.
+ Buckets.resize(Header.bucket_count);
+ for (size_t i = 0; i < Data.size(); ++i) {
+ uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
+ Buckets[bucket].push_back(Data[i]);
+ Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+ }
+}
+
+// Emits the header for the table via the AsmPrinter.
+void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
+ Asm->OutStreamer.AddComment("Header Magic");
+ Asm->EmitInt32(Header.magic);
+ Asm->OutStreamer.AddComment("Header Version");
+ Asm->EmitInt16(Header.version);
+ Asm->OutStreamer.AddComment("Header Hash Function");
+ Asm->EmitInt16(Header.hash_function);
+ Asm->OutStreamer.AddComment("Header Bucket Count");
+ Asm->EmitInt32(Header.bucket_count);
+ Asm->OutStreamer.AddComment("Header Hash Count");
+ Asm->EmitInt32(Header.hashes_count);
+ Asm->OutStreamer.AddComment("Header Data Length");
+ Asm->EmitInt32(Header.header_data_len);
+ Asm->OutStreamer.AddComment("HeaderData Die Offset Base");
+ Asm->EmitInt32(HeaderData.die_offset_base);
+ Asm->OutStreamer.AddComment("HeaderData Atom Count");
+ Asm->EmitInt32(HeaderData.Atoms.size());
+ for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
+ Atom A = HeaderData.Atoms[i];
+ Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type));
+ Asm->EmitInt16(A.type);
+ Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
+ Asm->EmitInt16(A.form);
+ }
+}
+
+// Walk through and emit the buckets for the table. This will look
+// like a list of numbers of how many elements are in each bucket.
+void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
+ unsigned index = 0;
+ for (size_t i = 0; i < Buckets.size(); ++i) {
+ Twine Comment = Twine("Bucket ") + Twine(i);
+ Asm->OutStreamer.AddComment(Comment);
+ if (Buckets[i].size() != 0)
+ Asm->EmitInt32(index);
+ else
+ Asm->EmitInt32(UINT32_MAX);
+ index += Buckets[i].size();
+ }
+}
+
+// Walk through the buckets and emit the individual hashes for each
+// bucket.
+void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+ for (size_t i = 0; i < Buckets.size(); ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ Twine Comment = Twine("Hash in Bucket ") + Twine(i);
+ Asm->OutStreamer.AddComment(Comment);
+ Asm->EmitInt32((*HI)->HashValue);
+ }
+ }
+}
+
+// Walk through the buckets and emit the individual offsets for each
+// element in each bucket. This is done via a symbol subtraction from the
+// beginning of the section. The non-section symbol will be output later
+// when we emit the actual data.
+void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+ for (size_t i = 0; i < Buckets.size(); ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ Twine Comment = Twine("Offset in Bucket ") + Twine(i);
+ Asm->OutStreamer.AddComment(Comment);
+ MCContext &Context = Asm->OutStreamer.getContext();
+ const MCExpr *Sub =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
+ MCSymbolRefExpr::Create(SecBegin, Context),
+ Context);
+ Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0);
+ }
+ }
+}
+
+// Walk through the buckets and emit the full data for each element in
+// the bucket. For the string case emit the dies and the various offsets.
+// Terminate each HashData bucket with 0.
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
+ uint64_t PrevHash = UINT64_MAX;
+ for (size_t i = 0; i < Buckets.size(); ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ // Remember to emit the label for our offset.
+ Asm->OutStreamer.EmitLabel((*HI)->Sym);
+ Asm->OutStreamer.AddComment((*HI)->Str);
+ Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
+ D->getDwarfStrSectionSym());
+ Asm->OutStreamer.AddComment("Num DIEs");
+ Asm->EmitInt32((*HI)->DIEOffsets.size());
+ for (std::vector<uint32_t>::const_iterator
+ DI = (*HI)->DIEOffsets.begin(), DE = (*HI)->DIEOffsets.end();
+ DI != DE; ++DI) {
+ Asm->EmitInt32((*DI));
+ }
+ // Emit a 0 to terminate the data unless we have a hash collision.
+ if (PrevHash != (*HI)->HashValue)
+ Asm->EmitInt32(0);
+ PrevHash = (*HI)->HashValue;
+ }
+ }
+}
+
+// Emit the entire data structure to the output file.
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
+ DwarfDebug *D) {
+ // Emit the header.
+ EmitHeader(Asm);
+
+ // Emit the buckets.
+ EmitBuckets(Asm);
+
+ // Emit the hashes.
+ EmitHashes(Asm);
+
+ // Emit the offsets.
+ EmitOffsets(Asm, SecBegin);
+
+ // Emit the hash data.
+ EmitData(Asm, D);
+}
+
+#ifndef NDEBUG
+void DwarfAccelTable::print(raw_ostream &O) {
+
+ Header.print(O);
+ HeaderData.print(O);
+
+ O << "Entries: \n";
+ for (StringMap<DIEArray>::const_iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+ O << "Name: " << (*EI).getKeyData() << "\n";
+ for (DIEArray::const_iterator DI = (*EI).second.begin(),
+ DE = (*EI).second.end();
+ DI != DE; ++DI)
+ (*DI)->print(O);
+ }
+
+ O << "Buckets and Hashes: \n";
+ for (size_t i = 0; i < Buckets.size(); ++i)
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI)
+ (*HI)->print(O);
+
+ O << "Data: \n";
+ for (std::vector<HashData*>::const_iterator
+ DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
+ (*DI)->print(O);
+
+
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
new file mode 100644
index 0000000000..242841a509
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -0,0 +1,254 @@
+//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include <vector>
+#include <map>
+
+// The apple dwarf accelerator tables are an indirect hash table optimized
+// for null lookup rather than access to known data. They are output into
+// an on-disk format that looks like this:
+//
+// .-------------.
+// | HEADER |
+// |-------------|
+// | BUCKETS |
+// |-------------|
+// | HASHES |
+// |-------------|
+// | OFFSETS |
+// |-------------|
+// | DATA |
+// `-------------'
+//
+// where the header contains a magic number, version, type of hash function,
+// the number of buckets, total number of hashes, and room for a special
+// struct of data and the length of that struct.
+//
+// The buckets contain an index (e.g. 6) into the hashes array. The hashes
+// section contains all of the 32-bit hash values in contiguous memory, and
+// the offsets contain the offset into the data area for the particular
+// hash.
+//
+// For a lookup example, we could hash a function name and take it modulo the
+// number of buckets giving us our bucket. From there we take the bucket value
+// as an index into the hashes table and look at each successive hash as long
+// as the hash value is still the same modulo result (bucket value) as earlier.
+// If we have a match we look at that same entry in the offsets table and
+// grab the offset in the data for our final match.
+
+namespace llvm {
+
+class AsmPrinter;
+class DIE;
+class DwarfDebug;
+
+class DwarfAccelTable {
+
+ enum HashFunctionType {
+ eHashFunctionDJB = 0u
+ };
+
+ static uint32_t HashDJB (const char *s) {
+ uint32_t h = 5381;
+ for (unsigned char c = *s; c; c = *++s)
+ h = ((h << 5) + h) + c;
+ return h;
+ }
+
+ // Helper function to compute the number of buckets needed based on
+ // the number of unique hashes.
+ void ComputeBucketCount (void);
+
+ struct TableHeader {
+ uint32_t magic; // 'HASH' magic value to allow endian detection
+ uint16_t version; // Version number.
+ uint16_t hash_function; // The hash function enumeration that was used.
+ uint32_t bucket_count; // The number of buckets in this hash table.
+ uint32_t hashes_count; // The total number of unique hash values
+ // and hash data offsets in this table.
+ uint32_t header_data_len; // The bytes to skip to get to the hash
+ // indexes (buckets) for correct alignment.
+ // Also written to disk is the implementation specific header data.
+
+ static const uint32_t MagicHash = 0x48415348;
+
+ TableHeader (uint32_t data_len) :
+ magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
+ bucket_count (0), hashes_count (0), header_data_len (data_len)
+ {};
+
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Magic: " << format("0x%x", magic) << "\n"
+ << "Version: " << version << "\n"
+ << "Hash Function: " << hash_function << "\n"
+ << "Bucket Count: " << bucket_count << "\n"
+ << "Header Data Length: " << header_data_len << "\n";
+ }
+ void dump() { print(dbgs()); }
+#endif
+ };
+
+public:
+ // The HeaderData describes the form of each set of data. In general this
+ // is as a list of atoms (atom_count) where each atom contains a type
+ // (AtomType type) of data, and an encoding form (form). In the case of
+ // data that is referenced via DW_FORM_ref_* the die_offset_base is
+ // used to describe the offset for all forms in the list of atoms.
+ // This also serves as a public interface of sorts.
+ // When written to disk this will have the form:
+ //
+ // uint32_t die_offset_base
+ // uint32_t atom_count
+ // atom_count Atoms
+ enum AtomType {
+ eAtomTypeNULL = 0u,
+ eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding
+ eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that
+ // contains the item in question
+ eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as
+ // DW_FORM_data1 (if no tags exceed 255) or
+ // DW_FORM_data2.
+ eAtomTypeNameFlags = 4u, // Flags from enum NameFlags
+ eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags
+ };
+
+ // Make these public so that they can be used as a general interface to
+ // the class.
+ struct Atom {
+ AtomType type; // enum AtomType
+ uint16_t form; // DWARF DW_FORM_ defines
+
+ Atom(AtomType type, uint16_t form) : type(type), form(form) {};
+ static const char * AtomTypeString(enum AtomType);
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Type: " << dwarf::TagString(type) << "\n"
+ << "Form: " << dwarf::FormEncodingString(form) << "\n";
+ }
+ void dump() {
+ print(dbgs());
+ }
+#endif
+ };
+
+ private:
+ struct TableHeaderData {
+
+ uint32_t die_offset_base;
+ std::vector<Atom> Atoms;
+
+ TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0)
+ : die_offset_base(offset) {
+ Atoms.push_back(Atom);
+ }
+
+#ifndef NDEBUG
+ void print (raw_ostream &O) {
+ O << "die_offset_base: " << die_offset_base << "\n";
+ for (size_t i = 0; i < Atoms.size(); i++)
+ Atoms[i].print(O);
+ }
+ void dump() {
+ print(dbgs());
+ }
+#endif
+ };
+
+ // The data itself consists of a str_offset (to deal with collisions in
+ // some magical way? this looks like the KeyType from the spec, which
+ // should mean an integer compare on read), a count of the DIEs in the
+ // hash and the offsets to the DIEs themselves.
+ // On disk each data section is ended with a 0 KeyType as the end of the
+ // hash chain.
+ // On output this looks like:
+ // uint32_t str_offset
+ // uint32_t hash_data_count
+ // HashData[hash_data_count]
+ struct HashData {
+ StringRef Str;
+ uint32_t HashValue;
+ MCSymbol *Sym;
+ std::vector<uint32_t> DIEOffsets; // offsets
+ HashData(StringRef S) : Str(S) {
+ HashValue = DwarfAccelTable::HashDJB(S.str().c_str());
+ }
+ void addOffset(uint32_t off) { DIEOffsets.push_back(off); }
+ #ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Name: " << Str << "\n";
+ O << " Hash Value: " << format("0x%x", HashValue) << "\n";
+ O << " Symbol: " ;
+ if (Sym) Sym->print(O);
+ else O << "<none>";
+ O << "\n";
+ for (size_t i = 0; i < DIEOffsets.size(); i++)
+ O << " Offset: " << DIEOffsets[i] << "\n";
+ }
+ void dump() {
+ print(dbgs());
+ }
+ #endif
+ };
+
+ DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT
+ void operator=(const DwarfAccelTable&); // DO NOT IMPLEMENT
+
+ // Internal Functions
+ void EmitHeader(AsmPrinter *);
+ void EmitBuckets(AsmPrinter *);
+ void EmitHashes(AsmPrinter *);
+ void EmitOffsets(AsmPrinter *, MCSymbol *);
+ void EmitData(AsmPrinter *, DwarfDebug *D);
+
+ // Output Variables
+ TableHeader Header;
+ TableHeaderData HeaderData;
+ std::vector<HashData*> Data;
+
+ // String Data
+ typedef std::vector<DIE*> DIEArray;
+ typedef StringMap<DIEArray> StringEntries;
+ StringEntries Entries;
+
+ // Buckets/Hashes/Offsets
+ typedef std::vector<HashData*> HashList;
+ typedef std::vector<HashList> BucketList;
+ BucketList Buckets;
+ HashList Hashes;
+
+ // Public Implementation
+ public:
+ DwarfAccelTable(DwarfAccelTable::Atom Atom);
+ void AddName(StringRef, DIE*);
+ void FinalizeTable(AsmPrinter *, const char *);
+ void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *);
+#ifndef NDEBUG
+ void print(raw_ostream &O);
+ void dump() { print(dbgs()); }
+#endif
+};
+
+}
+#endif