summaryrefslogtreecommitdiff
path: root/lib/ProfileData
diff options
context:
space:
mode:
authorJustin Bogner <mail@justinbogner.com>2014-04-18 21:48:40 +0000
committerJustin Bogner <mail@justinbogner.com>2014-04-18 21:48:40 +0000
commite153fb33e49bd6d44189d3659287338c410bc0ce (patch)
tree28b8eb8580f22648394e4d4e629d97cd0d72e4c9 /lib/ProfileData
parent4c464def6ae721b09ebb7cf202e04339267f761a (diff)
downloadllvm-e153fb33e49bd6d44189d3659287338c410bc0ce.tar.gz
llvm-e153fb33e49bd6d44189d3659287338c410bc0ce.tar.bz2
llvm-e153fb33e49bd6d44189d3659287338c410bc0ce.tar.xz
ProfileData: Add support for the indexed instrprof format
This adds support for an indexed instrumentation based profiling format, which is just a small header and an on disk hash table. This format will be used by clang's -fprofile-instr-use= for PGO. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206656 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/ProfileData')
-rw-r--r--lib/ProfileData/InstrProf.cpp2
-rw-r--r--lib/ProfileData/InstrProfIndexed.h54
-rw-r--r--lib/ProfileData/InstrProfReader.cpp118
-rw-r--r--lib/ProfileData/InstrProfWriter.cpp88
4 files changed, 245 insertions, 17 deletions
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 850f61354e..2eca8b2045 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -33,6 +33,8 @@ class InstrProfErrorCategoryType : public error_category {
return "Invalid header";
case instrprof_error::unsupported_version:
return "Unsupported format version";
+ case instrprof_error::unsupported_hash_type:
+ return "Unsupported hash function";
case instrprof_error::too_large:
return "Too much profile data";
case instrprof_error::truncated:
diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h
new file mode 100644
index 0000000000..87eb4c3a02
--- /dev/null
+++ b/lib/ProfileData/InstrProfIndexed.h
@@ -0,0 +1,54 @@
+//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Shared header for the instrumented profile data reader and writer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+#define LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+
+#include "llvm/Support/MD5.h"
+
+namespace llvm {
+
+namespace IndexedInstrProf {
+enum class HashT : uint32_t {
+ MD5,
+
+ Last = MD5
+};
+
+static inline uint64_t MD5Hash(StringRef Str) {
+ MD5 Hash;
+ Hash.update(Str);
+ llvm::MD5::MD5Result Result;
+ Hash.final(Result);
+ // Return the least significant 8 bytes. Our MD5 implementation returns the
+ // result in little endian, so we may need to swap bytes.
+ using namespace llvm::support;
+ return endian::read<uint64_t, little, unaligned>(Result);
+}
+
+uint64_t ComputeHash(HashT Type, StringRef K) {
+ switch (Type) {
+ case HashT::MD5:
+ return IndexedInstrProf::MD5Hash(K);
+ }
+ llvm_unreachable("Unhandled hash type");
+}
+
+const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
+const uint64_t Version = 1;
+const HashT HashType = HashT::MD5;
+}
+
+} // end namespace llvm
+
+#endif // LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index b07f402777..c8d2c2781c 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -15,30 +15,62 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "InstrProfIndexed.h"
+
#include <cassert>
using namespace llvm;
-error_code InstrProfReader::create(std::string Path,
- std::unique_ptr<InstrProfReader> &Result) {
- std::unique_ptr<MemoryBuffer> Buffer;
+static error_code setupMemoryBuffer(std::string Path,
+ std::unique_ptr<MemoryBuffer> &Buffer) {
if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer))
return EC;
// Sanity check the file.
if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
return instrprof_error::too_large;
+ return instrprof_error::success;
+}
+
+static error_code initializeReader(InstrProfReader &Reader) {
+ return Reader.readHeader();
+}
+
+error_code InstrProfReader::create(std::string Path,
+ std::unique_ptr<InstrProfReader> &Result) {
+ // Set up the buffer to read.
+ std::unique_ptr<MemoryBuffer> Buffer;
+ if (error_code EC = setupMemoryBuffer(Path, Buffer))
+ return EC;
// Create the reader.
- if (RawInstrProfReader64::hasFormat(*Buffer))
+ if (IndexedInstrProfReader::hasFormat(*Buffer))
+ Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
+ else if (RawInstrProfReader64::hasFormat(*Buffer))
Result.reset(new RawInstrProfReader64(std::move(Buffer)));
else if (RawInstrProfReader32::hasFormat(*Buffer))
Result.reset(new RawInstrProfReader32(std::move(Buffer)));
else
Result.reset(new TextInstrProfReader(std::move(Buffer)));
- // Read the header and return the result.
- return Result->readHeader();
+ // Initialize the reader and return the result.
+ return initializeReader(*Result);
+}
+
+error_code IndexedInstrProfReader::create(
+ std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) {
+ // Set up the buffer to read.
+ std::unique_ptr<MemoryBuffer> Buffer;
+ if (error_code EC = setupMemoryBuffer(Path, Buffer))
+ return EC;
+
+ // Create the reader.
+ if (!IndexedInstrProfReader::hasFormat(*Buffer))
+ return instrprof_error::bad_magic;
+ Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
+
+ // Initialize the reader and return the result.
+ return initializeReader(*Result);
}
void InstrProfIterator::Increment() {
@@ -210,3 +242,77 @@ namespace llvm {
template class RawInstrProfReader<uint32_t>;
template class RawInstrProfReader<uint64_t>;
}
+
+bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
+ if (DataBuffer.getBufferSize() < 8)
+ return false;
+ using namespace support;
+ uint64_t Magic =
+ endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
+ return Magic == IndexedInstrProf::Magic;
+}
+
+error_code IndexedInstrProfReader::readHeader() {
+ const unsigned char *Start = (unsigned char *)DataBuffer->getBufferStart();
+ const unsigned char *Cur = Start;
+ if ((unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
+ return error(instrprof_error::truncated);
+
+ using namespace support;
+
+ // Check the magic number.
+ uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);
+ if (Magic != IndexedInstrProf::Magic)
+ return error(instrprof_error::bad_magic);
+
+ // Read the version.
+ uint64_t Version = endian::readNext<uint64_t, little, unaligned>(Cur);
+ if (Version != IndexedInstrProf::Version)
+ return error(instrprof_error::unsupported_version);
+
+ // Read the maximal function count.
+ MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);
+
+ // Read the hash type and start offset.
+ IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
+ endian::readNext<uint64_t, little, unaligned>(Cur));
+ if (HashType > IndexedInstrProf::HashT::Last)
+ return error(instrprof_error::unsupported_hash_type);
+ uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
+
+ // The rest of the file is an on disk hash table.
+ Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start,
+ InstrProfLookupTrait(HashType)));
+ // Set up our iterator for readNextRecord.
+ RecordIterator = Index->data_begin();
+
+ return success();
+}
+
+error_code IndexedInstrProfReader::getFunctionCounts(
+ StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) {
+ const auto &Iter = Index->find(FuncName);
+ if (Iter == Index->end())
+ return error(instrprof_error::unknown_function);
+
+ // Found it. Make sure it's valid before giving back a result.
+ const InstrProfRecord &Record = *Iter;
+ if (Record.Name.empty())
+ return error(instrprof_error::malformed);
+ FuncHash = Record.Hash;
+ Counts = Record.Counts;
+ return success();
+}
+
+error_code IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
+ // Are we out of records?
+ if (RecordIterator == Index->data_end())
+ return error(instrprof_error::eof);
+
+ // Read the next one.
+ Record = *RecordIterator;
+ ++RecordIterator;
+ if (Record.Name.empty())
+ return error(instrprof_error::malformed);
+ return success();
+}
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 3024f9676e..1a3eae1db9 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -13,10 +13,59 @@
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/InstrProfWriter.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/OnDiskHashTable.h"
+
+#include "InstrProfIndexed.h"
using namespace llvm;
+namespace {
+class InstrProfRecordTrait {
+public:
+ typedef StringRef key_type;
+ typedef StringRef key_type_ref;
+
+ typedef InstrProfWriter::CounterData data_type;
+ typedef const InstrProfWriter::CounterData &data_type_ref;
+
+ typedef uint64_t hash_value_type;
+ typedef uint64_t offset_type;
+
+ static hash_value_type ComputeHash(key_type_ref K) {
+ return IndexedInstrProf::ComputeHash(IndexedInstrProf::HashType, K);
+ }
+
+ static std::pair<offset_type, offset_type>
+ EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
+ using namespace llvm::support;
+ endian::Writer<little> LE(Out);
+
+ unsigned N = K.size();
+ LE.write<offset_type>(N);
+
+ unsigned M = (1 + V.Counts.size()) * sizeof(uint64_t);
+ LE.write<offset_type>(M);
+
+ return std::make_pair(N, M);
+ }
+
+ static void EmitKey(raw_ostream &Out, key_type_ref K, unsigned N){
+ Out.write(K.data(), N);
+ }
+
+ static void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V,
+ unsigned) {
+ using namespace llvm::support;
+ endian::Writer<little> LE(Out);
+ LE.write<uint64_t>(V.Hash);
+ for (uint64_t I : V.Counts)
+ LE.write<uint64_t>(I);
+ }
+};
+}
+
error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName,
uint64_t FunctionHash,
ArrayRef<uint64_t> Counters) {
@@ -45,16 +94,33 @@ error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName,
return instrprof_error::success;
}
-void InstrProfWriter::write(raw_ostream &OS) {
- // Write out the counts for each function.
+void InstrProfWriter::write(raw_fd_ostream &OS) {
+ OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator;
+ uint64_t MaxFunctionCount = 0;
+
+ // Populate the hash table generator.
for (const auto &I : FunctionData) {
- StringRef Name = I.getKey();
- uint64_t Hash = I.getValue().Hash;
- const std::vector<uint64_t> &Counts = I.getValue().Counts;
-
- OS << Name << "\n" << Hash << "\n" << Counts.size() << "\n";
- for (uint64_t Count : Counts)
- OS << Count << "\n";
- OS << "\n";
+ Generator.insert(I.getKey(), I.getValue());
+ if (I.getValue().Counts[0] > MaxFunctionCount)
+ MaxFunctionCount = I.getValue().Counts[0];
}
+
+ using namespace llvm::support;
+ endian::Writer<little> LE(OS);
+
+ // Write the header.
+ LE.write<uint64_t>(IndexedInstrProf::Magic);
+ LE.write<uint64_t>(IndexedInstrProf::Version);
+ LE.write<uint64_t>(MaxFunctionCount);
+ LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType));
+
+ // Save a space to write the hash table start location.
+ uint64_t HashTableStartLoc = OS.tell();
+ LE.write<uint64_t>(0);
+ // Write the hash table.
+ uint64_t HashTableStart = Generator.Emit(OS);
+
+ // Go back and fill in the hash table start.
+ OS.seek(HashTableStartLoc);
+ LE.write<uint64_t>(HashTableStart);
}