From a8febf2283921157da1539c079cd74a55bf89a5a Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 30 Apr 2014 16:25:02 +0000 Subject: ELFObjectWriter: deduplicate suffices in strtab We already do this for shstrtab, so might as well do it for strtab. This extracts the string table building code into a separate class. The idea is to use it for other object formats too. I mostly wanted to do this for the general principle, but it does save a little bit on object file size. I tried this on a clang bootstrap and saved 0.54% on the sum of object file sizes (1.14 MB out of 212 MB for a release build). Differential Revision: http://reviews.llvm.org/D3533 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207670 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/ELFObjectWriter.cpp | 118 +++++++++++--------------------------- lib/Object/CMakeLists.txt | 1 + lib/Object/StringTableBuilder.cpp | 51 ++++++++++++++++ 3 files changed, 84 insertions(+), 86 deletions(-) create mode 100644 lib/Object/StringTableBuilder.cpp (limited to 'lib') diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 2e07e22dc3..ebcc691f0a 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/Object/StringTableBuilder.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" @@ -132,11 +133,11 @@ class ELFObjectWriter : public MCObjectWriter { MCSymbolData *SymbolData; uint64_t StringIndex; uint32_t SectionIndex; + StringRef Name; // Support lexicographic sorting. bool operator<(const ELFSymbolData &RHS) const { - return SymbolData->getSymbol().getName() < - RHS.SymbolData->getSymbol().getName(); + return Name < RHS.Name; } }; @@ -149,13 +150,13 @@ class ELFObjectWriter : public MCObjectWriter { llvm::DenseMap> Relocations; - DenseMap SectionStringTableIndex; + StringTableBuilder ShStrTabBuilder; /// @} /// @name Symbol Table Data /// @{ - SmallString<256> StringTable; + StringTableBuilder StrTabBuilder; std::vector FileSymbolData; std::vector LocalSymbolData; std::vector ExternalSymbolData; @@ -676,7 +677,6 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, SectionIndexMapTy &SectionIndexMap) { // The string table must be emitted first because we need the index // into the string table for all the symbol names. - assert(StringTable.size() && "Missing string table"); // FIXME: Make sure the start of the symbol table is aligned. @@ -1031,27 +1031,6 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, MCELF::SetBinding(Data, ELF::STB_GLOBAL); } - // Index 0 is always the empty string. - StringMap StringIndexMap; - StringTable += '\x00'; - - // FIXME: We could optimize suffixes in strtab in the same way we - // optimize them in shstrtab. - - for (MCAssembler::const_file_name_iterator it = Asm.file_names_begin(), - ie = Asm.file_names_end(); - it != ie; - ++it) { - StringRef Name = *it; - uint64_t &Entry = StringIndexMap[Name]; - if (!Entry) { - Entry = StringTable.size(); - StringTable += Name; - StringTable += '\x00'; - } - FileSymbolData.push_back(Entry); - } - // Add the data for the symbols. for (MCSymbolData &SD : Asm.symbols()) { const MCSymbol &Symbol = SD.getSymbol(); @@ -1102,7 +1081,6 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, // @@ in defined ones. StringRef Name = Symbol.getName(); SmallString<32> Buf; - size_t Pos = Name.find("@@@"); if (Pos != StringRef::npos) { Buf += Name.substr(0, Pos); @@ -1110,14 +1088,8 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, Buf += Name.substr(Pos + Skip); Name = Buf; } + MSD.Name = StrTabBuilder.add(Name); - uint64_t &Entry = StringIndexMap[Name]; - if (!Entry) { - Entry = StringTable.size(); - StringTable += Name; - StringTable += '\x00'; - } - MSD.StringIndex = Entry; if (MSD.SectionIndex == ELF::SHN_UNDEF) UndefinedSymbolData.push_back(MSD); else if (Local) @@ -1126,6 +1098,21 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, ExternalSymbolData.push_back(MSD); } + for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i) + StrTabBuilder.add(*i); + + StrTabBuilder.finalize(); + + for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i) + FileSymbolData.push_back(StrTabBuilder.getOffset(*i)); + + for (ELFSymbolData& MSD : LocalSymbolData) + MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name); + for (ELFSymbolData& MSD : ExternalSymbolData) + MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name); + for (ELFSymbolData& MSD : UndefinedSymbolData) + MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name); + // Symbols are required to be in lexicographic order. array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end()); array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); @@ -1436,23 +1423,6 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm, } } -static int compareBySuffix(const MCSectionELF *const *a, - const MCSectionELF *const *b) { - const StringRef &NameA = (*a)->getSectionName(); - const StringRef &NameB = (*b)->getSectionName(); - const unsigned sizeA = NameA.size(); - const unsigned sizeB = NameB.size(); - const unsigned len = std::min(sizeA, sizeB); - for (unsigned int i = 0; i < len; ++i) { - char ca = NameA[sizeA - i - 1]; - char cb = NameB[sizeB - i - 1]; - if (ca != cb) - return cb - ca; - } - - return sizeB - sizeA; -} - void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout, SectionIndexMapTy &SectionIndexMap, @@ -1493,45 +1463,20 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, WriteSymbolTable(F, Asm, Layout, SectionIndexMap); F = new MCDataFragment(&StrtabSD); - F->getContents().append(StringTable.begin(), StringTable.end()); + F->getContents().append(StrTabBuilder.data().begin(), + StrTabBuilder.data().end()); F = new MCDataFragment(&ShstrtabSD); - std::vector Sections; - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { + // Section header string table. + for (auto it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { const MCSectionELF &Section = static_cast(it->getSection()); - Sections.push_back(&Section); - } - array_pod_sort(Sections.begin(), Sections.end(), compareBySuffix); - - // Section header string table. - // - // The first entry of a string table holds a null character so skip - // section 0. - uint64_t Index = 1; - F->getContents().push_back('\x00'); - - for (unsigned int I = 0, E = Sections.size(); I != E; ++I) { - const MCSectionELF &Section = *Sections[I]; - - StringRef Name = Section.getSectionName(); - if (I != 0) { - StringRef PreviousName = Sections[I - 1]->getSectionName(); - if (PreviousName.endswith(Name)) { - SectionStringTableIndex[&Section] = Index - Name.size() - 1; - continue; - } - } - // Remember the index into the string table so we can write it - // into the sh_name field of the section header table. - SectionStringTableIndex[&Section] = Index; - - Index += Name.size() + 1; - F->getContents().append(Name.begin(), Name.end()); - F->getContents().push_back('\x00'); + ShStrTabBuilder.add(Section.getSectionName()); } + ShStrTabBuilder.finalize(); + F->getContents().append(ShStrTabBuilder.data().begin(), + ShStrTabBuilder.data().end()); } void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm, @@ -1599,7 +1544,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, switch(Section.getType()) { case ELF::SHT_DYNAMIC: - sh_link = SectionStringTableIndex[&Section]; + sh_link = ShStrTabBuilder.getOffset(Section.getSectionName()); sh_info = 0; break; @@ -1680,7 +1625,8 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, } } - WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(), + WriteSecHdrEntry(ShStrTabBuilder.getOffset(Section.getSectionName()), + Section.getType(), Section.getFlags(), 0, Offset, Size, sh_link, sh_info, Alignment, Section.getEntrySize()); } diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index dc182966ae..cd8c9efe7b 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_library(LLVMObject MachOUniversal.cpp Object.cpp ObjectFile.cpp + StringTableBuilder.cpp SymbolicFile.cpp YAML.cpp ) diff --git a/lib/Object/StringTableBuilder.cpp b/lib/Object/StringTableBuilder.cpp new file mode 100644 index 0000000000..9152834a29 --- /dev/null +++ b/lib/Object/StringTableBuilder.cpp @@ -0,0 +1,51 @@ +//===-- StringTableBuilder.cpp - String table building utility ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Object/StringTableBuilder.h" + +using namespace llvm; + +static bool compareBySuffix(StringRef a, StringRef b) { + size_t sizeA = a.size(); + size_t sizeB = b.size(); + size_t len = std::min(sizeA, sizeB); + for (size_t i = 0; i < len; ++i) { + char ca = a[sizeA - i - 1]; + char cb = b[sizeB - i - 1]; + if (ca != cb) + return ca > cb; + } + return sizeA > sizeB; +} + +void StringTableBuilder::finalize() { + SmallVector Strings; + for (auto i = StringIndexMap.begin(), e = StringIndexMap.end(); i != e; ++i) + Strings.push_back(i->getKey()); + + std::sort(Strings.begin(), Strings.end(), compareBySuffix); + + // FIXME: Starting with a null byte is ELF specific. Generalize this so we + // can use the class with other object formats. + StringTable += '\x00'; + + StringRef Previous; + for (StringRef s : Strings) { + if (Previous.endswith(s)) { + StringIndexMap[s] = StringTable.size() - 1 - s.size(); + continue; + } + + StringIndexMap[s] = StringTable.size(); + StringTable += s; + StringTable += '\x00'; + Previous = s; + } +} -- cgit v1.2.3