From e153fb33e49bd6d44189d3659287338c410bc0ce Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Fri, 18 Apr 2014 21:48:40 +0000 Subject: ProfileData: Add support for the indexed instrprof format This adds support for an indexed instrumentation based profiling format, which is just a small header and an on disk hash table. This format will be used by clang's -fprofile-instr-use= for PGO. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206656 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ProfileData/InstrProf.cpp | 2 + lib/ProfileData/InstrProfIndexed.h | 54 +++++++++++++++++ lib/ProfileData/InstrProfReader.cpp | 118 ++++++++++++++++++++++++++++++++++-- lib/ProfileData/InstrProfWriter.cpp | 88 +++++++++++++++++++++++---- 4 files changed, 245 insertions(+), 17 deletions(-) create mode 100644 lib/ProfileData/InstrProfIndexed.h (limited to 'lib/ProfileData') diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index 850f61354e..2eca8b2045 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -33,6 +33,8 @@ class InstrProfErrorCategoryType : public error_category { return "Invalid header"; case instrprof_error::unsupported_version: return "Unsupported format version"; + case instrprof_error::unsupported_hash_type: + return "Unsupported hash function"; case instrprof_error::too_large: return "Too much profile data"; case instrprof_error::truncated: diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h new file mode 100644 index 0000000000..87eb4c3a02 --- /dev/null +++ b/lib/ProfileData/InstrProfIndexed.h @@ -0,0 +1,54 @@ +//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Shared header for the instrumented profile data reader and writer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_ +#define LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_ + +#include "llvm/Support/MD5.h" + +namespace llvm { + +namespace IndexedInstrProf { +enum class HashT : uint32_t { + MD5, + + Last = MD5 +}; + +static inline uint64_t MD5Hash(StringRef Str) { + MD5 Hash; + Hash.update(Str); + llvm::MD5::MD5Result Result; + Hash.final(Result); + // Return the least significant 8 bytes. Our MD5 implementation returns the + // result in little endian, so we may need to swap bytes. + using namespace llvm::support; + return endian::read(Result); +} + +uint64_t ComputeHash(HashT Type, StringRef K) { + switch (Type) { + case HashT::MD5: + return IndexedInstrProf::MD5Hash(K); + } + llvm_unreachable("Unhandled hash type"); +} + +const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" +const uint64_t Version = 1; +const HashT HashType = HashT::MD5; +} + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_ diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index b07f402777..c8d2c2781c 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -15,30 +15,62 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProf.h" +#include "InstrProfIndexed.h" + #include using namespace llvm; -error_code InstrProfReader::create(std::string Path, - std::unique_ptr &Result) { - std::unique_ptr Buffer; +static error_code setupMemoryBuffer(std::string Path, + std::unique_ptr &Buffer) { if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer)) return EC; // Sanity check the file. if (Buffer->getBufferSize() > std::numeric_limits::max()) return instrprof_error::too_large; + return instrprof_error::success; +} + +static error_code initializeReader(InstrProfReader &Reader) { + return Reader.readHeader(); +} + +error_code InstrProfReader::create(std::string Path, + std::unique_ptr &Result) { + // Set up the buffer to read. + std::unique_ptr Buffer; + if (error_code EC = setupMemoryBuffer(Path, Buffer)) + return EC; // Create the reader. - if (RawInstrProfReader64::hasFormat(*Buffer)) + if (IndexedInstrProfReader::hasFormat(*Buffer)) + Result.reset(new IndexedInstrProfReader(std::move(Buffer))); + else if (RawInstrProfReader64::hasFormat(*Buffer)) Result.reset(new RawInstrProfReader64(std::move(Buffer))); else if (RawInstrProfReader32::hasFormat(*Buffer)) Result.reset(new RawInstrProfReader32(std::move(Buffer))); else Result.reset(new TextInstrProfReader(std::move(Buffer))); - // Read the header and return the result. - return Result->readHeader(); + // Initialize the reader and return the result. + return initializeReader(*Result); +} + +error_code IndexedInstrProfReader::create( + std::string Path, std::unique_ptr &Result) { + // Set up the buffer to read. + std::unique_ptr Buffer; + if (error_code EC = setupMemoryBuffer(Path, Buffer)) + return EC; + + // Create the reader. + if (!IndexedInstrProfReader::hasFormat(*Buffer)) + return instrprof_error::bad_magic; + Result.reset(new IndexedInstrProfReader(std::move(Buffer))); + + // Initialize the reader and return the result. + return initializeReader(*Result); } void InstrProfIterator::Increment() { @@ -210,3 +242,77 @@ namespace llvm { template class RawInstrProfReader; template class RawInstrProfReader; } + +bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { + if (DataBuffer.getBufferSize() < 8) + return false; + using namespace support; + uint64_t Magic = + endian::read(DataBuffer.getBufferStart()); + return Magic == IndexedInstrProf::Magic; +} + +error_code IndexedInstrProfReader::readHeader() { + const unsigned char *Start = (unsigned char *)DataBuffer->getBufferStart(); + const unsigned char *Cur = Start; + if ((unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) + return error(instrprof_error::truncated); + + using namespace support; + + // Check the magic number. + uint64_t Magic = endian::readNext(Cur); + if (Magic != IndexedInstrProf::Magic) + return error(instrprof_error::bad_magic); + + // Read the version. + uint64_t Version = endian::readNext(Cur); + if (Version != IndexedInstrProf::Version) + return error(instrprof_error::unsupported_version); + + // Read the maximal function count. + MaxFunctionCount = endian::readNext(Cur); + + // Read the hash type and start offset. + IndexedInstrProf::HashT HashType = static_cast( + endian::readNext(Cur)); + if (HashType > IndexedInstrProf::HashT::Last) + return error(instrprof_error::unsupported_hash_type); + uint64_t HashOffset = endian::readNext(Cur); + + // The rest of the file is an on disk hash table. + Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start, + InstrProfLookupTrait(HashType))); + // Set up our iterator for readNextRecord. + RecordIterator = Index->data_begin(); + + return success(); +} + +error_code IndexedInstrProfReader::getFunctionCounts( + StringRef FuncName, uint64_t &FuncHash, std::vector &Counts) { + const auto &Iter = Index->find(FuncName); + if (Iter == Index->end()) + return error(instrprof_error::unknown_function); + + // Found it. Make sure it's valid before giving back a result. + const InstrProfRecord &Record = *Iter; + if (Record.Name.empty()) + return error(instrprof_error::malformed); + FuncHash = Record.Hash; + Counts = Record.Counts; + return success(); +} + +error_code IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { + // Are we out of records? + if (RecordIterator == Index->data_end()) + return error(instrprof_error::eof); + + // Read the next one. + Record = *RecordIterator; + ++RecordIterator; + if (Record.Name.empty()) + return error(instrprof_error::malformed); + return success(); +} diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 3024f9676e..1a3eae1db9 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -13,10 +13,59 @@ //===----------------------------------------------------------------------===// #include "llvm/ProfileData/InstrProfWriter.h" -#include "llvm/Support/Endian.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/OnDiskHashTable.h" + +#include "InstrProfIndexed.h" using namespace llvm; +namespace { +class InstrProfRecordTrait { +public: + typedef StringRef key_type; + typedef StringRef key_type_ref; + + typedef InstrProfWriter::CounterData data_type; + typedef const InstrProfWriter::CounterData &data_type_ref; + + typedef uint64_t hash_value_type; + typedef uint64_t offset_type; + + static hash_value_type ComputeHash(key_type_ref K) { + return IndexedInstrProf::ComputeHash(IndexedInstrProf::HashType, K); + } + + static std::pair + EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { + using namespace llvm::support; + endian::Writer LE(Out); + + unsigned N = K.size(); + LE.write(N); + + unsigned M = (1 + V.Counts.size()) * sizeof(uint64_t); + LE.write(M); + + return std::make_pair(N, M); + } + + static void EmitKey(raw_ostream &Out, key_type_ref K, unsigned N){ + Out.write(K.data(), N); + } + + static void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, + unsigned) { + using namespace llvm::support; + endian::Writer LE(Out); + LE.write(V.Hash); + for (uint64_t I : V.Counts) + LE.write(I); + } +}; +} + error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName, uint64_t FunctionHash, ArrayRef Counters) { @@ -45,16 +94,33 @@ error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName, return instrprof_error::success; } -void InstrProfWriter::write(raw_ostream &OS) { - // Write out the counts for each function. +void InstrProfWriter::write(raw_fd_ostream &OS) { + OnDiskChainedHashTableGenerator Generator; + uint64_t MaxFunctionCount = 0; + + // Populate the hash table generator. for (const auto &I : FunctionData) { - StringRef Name = I.getKey(); - uint64_t Hash = I.getValue().Hash; - const std::vector &Counts = I.getValue().Counts; - - OS << Name << "\n" << Hash << "\n" << Counts.size() << "\n"; - for (uint64_t Count : Counts) - OS << Count << "\n"; - OS << "\n"; + Generator.insert(I.getKey(), I.getValue()); + if (I.getValue().Counts[0] > MaxFunctionCount) + MaxFunctionCount = I.getValue().Counts[0]; } + + using namespace llvm::support; + endian::Writer LE(OS); + + // Write the header. + LE.write(IndexedInstrProf::Magic); + LE.write(IndexedInstrProf::Version); + LE.write(MaxFunctionCount); + LE.write(static_cast(IndexedInstrProf::HashType)); + + // Save a space to write the hash table start location. + uint64_t HashTableStartLoc = OS.tell(); + LE.write(0); + // Write the hash table. + uint64_t HashTableStart = Generator.Emit(OS); + + // Go back and fill in the hash table start. + OS.seek(HashTableStartLoc); + LE.write(HashTableStart); } -- cgit v1.2.3