ProfileData: Add support for the indexed instrprof format

This adds support for an indexed instrumentation based profiling format, which is just a small header and an on disk hash table. This format will be used by clang's -fprofile-instr-use= for PGO. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206656 91177308-0d34-0410-b5e6-96231b3b80d8
author: Justin Bogner <mail@justinbogner.com> 2014-04-18 21:48:40 +0000
committer: Justin Bogner <mail@justinbogner.com> 2014-04-18 21:48:40 +0000
commit: e153fb33e49bd6d44189d3659287338c410bc0ce (patch)
tree: 28b8eb8580f22648394e4d4e629d97cd0d72e4c9 /lib/ProfileData
parent: 4c464def6ae721b09ebb7cf202e04339267f761a (diff)
download: llvm-e153fb33e49bd6d44189d3659287338c410bc0ce.tar.gz
llvm-e153fb33e49bd6d44189d3659287338c410bc0ce.tar.bz2
llvm-e153fb33e49bd6d44189d3659287338c410bc0ce.tar.xz
4 files changed, 245 insertions, 17 deletions
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 850f61354e..2eca8b2045 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -33,6 +33,8 @@ class InstrProfErrorCategoryType : public error_category {
       return "Invalid header";
     case instrprof_error::unsupported_version:
       return "Unsupported format version";
+    case instrprof_error::unsupported_hash_type:
+      return "Unsupported hash function";
     case instrprof_error::too_large:
       return "Too much profile data";
     case instrprof_error::truncated:
diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h
new file mode 100644
index 0000000000..87eb4c3a02
--- /dev/null
+++ b/lib/ProfileData/InstrProfIndexed.h
@@ -0,0 +1,54 @@
+//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Shared header for the instrumented profile data reader and writer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+#define LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+
+#include "llvm/Support/MD5.h"
+
+namespace llvm {
+
+namespace IndexedInstrProf {
+enum class HashT : uint32_t {
+  MD5,
+
+  Last = MD5
+};
+
+static inline uint64_t MD5Hash(StringRef Str) {
+  MD5 Hash;
+  Hash.update(Str);
+  llvm::MD5::MD5Result Result;
+  Hash.final(Result);
+  // Return the least significant 8 bytes. Our MD5 implementation returns the
+  // result in little endian, so we may need to swap bytes.
+  using namespace llvm::support;
+  return endian::read<uint64_t, little, unaligned>(Result);
+}
+
+uint64_t ComputeHash(HashT Type, StringRef K) {
+  switch (Type) {
+  case HashT::MD5:
+    return IndexedInstrProf::MD5Hash(K);
+  }
+  llvm_unreachable("Unhandled hash type");
+}
+
+const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
+const uint64_t Version = 1;
+const HashT HashType = HashT::MD5;
+}
+
+} // end namespace llvm
+
+#endif // LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index b07f402777..c8d2c2781c 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -15,30 +15,62 @@
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/InstrProf.h"
 
+#include "InstrProfIndexed.h"
+
 #include <cassert>
 
 using namespace llvm;
 
-error_code InstrProfReader::create(std::string Path,
-                                   std::unique_ptr<InstrProfReader> &Result) {
-  std::unique_ptr<MemoryBuffer> Buffer;
+static error_code setupMemoryBuffer(std::string Path,
+                                    std::unique_ptr<MemoryBuffer> &Buffer) {
   if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer))
     return EC;
 
   // Sanity check the file.
   if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
     return instrprof_error::too_large;
+  return instrprof_error::success;
+}
+
+static error_code initializeReader(InstrProfReader &Reader) {
+  return Reader.readHeader();
+}
+
+error_code InstrProfReader::create(std::string Path,
+                                   std::unique_ptr<InstrProfReader> &Result) {
+  // Set up the buffer to read.
+  std::unique_ptr<MemoryBuffer> Buffer;
+  if (error_code EC = setupMemoryBuffer(Path, Buffer))
+    return EC;
 
   // Create the reader.
-  if (RawInstrProfReader64::hasFormat(*Buffer))
+  if (IndexedInstrProfReader::hasFormat(*Buffer))
+    Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
+  else if (RawInstrProfReader64::hasFormat(*Buffer))
     Result.reset(new RawInstrProfReader64(std::move(Buffer)));
   else if (RawInstrProfReader32::hasFormat(*Buffer))
     Result.reset(new RawInstrProfReader32(std::move(Buffer)));
   else
     Result.reset(new TextInstrProfReader(std::move(Buffer)));
 
-  // Read the header and return the result.
-  return Result->readHeader();
+  // Initialize the reader and return the result.
+  return initializeReader(*Result);
+}
+
+error_code IndexedInstrProfReader::create(
+    std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) {
+  // Set up the buffer to read.
+  std::unique_ptr<MemoryBuffer> Buffer;
+  if (error_code EC = setupMemoryBuffer(Path, Buffer))
+    return EC;
+
+  // Create the reader.
+  if (!IndexedInstrProfReader::hasFormat(*Buffer))
+    return instrprof_error::bad_magic;
+  Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
+
+  // Initialize the reader and return the result.
+  return initializeReader(*Result);
 }
 
 void InstrProfIterator::Increment() {
@@ -210,3 +242,77 @@ namespace llvm {
 template class RawInstrProfReader<uint32_t>;
 template class RawInstrProfReader<uint64_t>;
 }
+
+bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
+  if (DataBuffer.getBufferSize() < 8)
+    return false;
+  using namespace support;
+  uint64_t Magic =
+      endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
+  return Magic == IndexedInstrProf::Magic;
+}
+
+error_code IndexedInstrProfReader::readHeader() {
+  const unsigned char *Start = (unsigned char *)DataBuffer->getBufferStart();
+  const unsigned char *Cur = Start;
+  if ((unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
+    return error(instrprof_error::truncated);
+
+  using namespace support;
+
+  // Check the magic number.
+  uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);
+  if (Magic != IndexedInstrProf::Magic)
+    return error(instrprof_error::bad_magic);
+
+  // Read the version.
+  uint64_t Version = endian::readNext<uint64_t, little, unaligned>(Cur);
+  if (Version != IndexedInstrProf::Version)
+    return error(instrprof_error::unsupported_version);
+
+  // Read the maximal function count.
+  MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);
+
+  // Read the hash type and start offset.
+  IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
+      endian::readNext<uint64_t, little, unaligned>(Cur));
+  if (HashType > IndexedInstrProf::HashT::Last)
+    return error(instrprof_error::unsupported_hash_type);
+  uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
+
+  // The rest of the file is an on disk hash table.
+  Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start,
+                                           InstrProfLookupTrait(HashType)));
+  // Set up our iterator for readNextRecord.
+  RecordIterator = Index->data_begin();
+
+  return success();
+}
+
+error_code IndexedInstrProfReader::getFunctionCounts(
+    StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) {
+  const auto &Iter = Index->find(FuncName);
+  if (Iter == Index->end())
+    return error(instrprof_error::unknown_function);
+
+  // Found it. Make sure it's valid before giving back a result.
+  const InstrProfRecord &Record = *Iter;
+  if (Record.Name.empty())
+    return error(instrprof_error::malformed);
+  FuncHash = Record.Hash;
+  Counts = Record.Counts;
+  return success();
+}
+
+error_code IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
+  // Are we out of records?
+  if (RecordIterator == Index->data_end())
+    return error(instrprof_error::eof);
+
+  // Read the next one.
+  Record = *RecordIterator;
+  ++RecordIterator;
+  if (Record.Name.empty())
+    return error(instrprof_error::malformed);
+  return success();
+}
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 3024f9676e..1a3eae1db9 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -13,10 +13,59 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ProfileData/InstrProfWriter.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/OnDiskHashTable.h"
+
+#include "InstrProfIndexed.h"
 
 using namespace llvm;
 
+namespace {
+class InstrProfRecordTrait {
+public:
+  typedef StringRef key_type;
+  typedef StringRef key_type_ref;
+
+  typedef InstrProfWriter::CounterData data_type;
+  typedef const InstrProfWriter::CounterData &data_type_ref;
+
+  typedef uint64_t hash_value_type;
+  typedef uint64_t offset_type;
+
+  static hash_value_type ComputeHash(key_type_ref K) {
+    return IndexedInstrProf::ComputeHash(IndexedInstrProf::HashType, K);
+  }
+
+  static std::pair<offset_type, offset_type>
+  EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
+    using namespace llvm::support;
+    endian::Writer<little> LE(Out);
+
+    unsigned N = K.size();
+    LE.write<offset_type>(N);
+
+    unsigned M = (1 + V.Counts.size()) * sizeof(uint64_t);
+    LE.write<offset_type>(M);
+
+    return std::make_pair(N, M);
+  }
+
+  static void EmitKey(raw_ostream &Out, key_type_ref K, unsigned N){
+    Out.write(K.data(), N);
+  }
+
+  static void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V,
+                       unsigned) {
+    using namespace llvm::support;
+    endian::Writer<little> LE(Out);
+    LE.write<uint64_t>(V.Hash);
+    for (uint64_t I : V.Counts)
+      LE.write<uint64_t>(I);
+  }
+};
+}
+
 error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName,
                                               uint64_t FunctionHash,
                                               ArrayRef<uint64_t> Counters) {
@@ -45,16 +94,33 @@ error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName,
   return instrprof_error::success;
 }
 
-void InstrProfWriter::write(raw_ostream &OS) {
-  // Write out the counts for each function.
+void InstrProfWriter::write(raw_fd_ostream &OS) {
+  OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator;
+  uint64_t MaxFunctionCount = 0;
+
+  // Populate the hash table generator.
   for (const auto &I : FunctionData) {
-    StringRef Name = I.getKey();
-    uint64_t Hash = I.getValue().Hash;
-    const std::vector<uint64_t> &Counts = I.getValue().Counts;
-
-    OS << Name << "\n" << Hash << "\n" << Counts.size() << "\n";
-    for (uint64_t Count : Counts)
-      OS << Count << "\n";
-    OS << "\n";
+    Generator.insert(I.getKey(), I.getValue());
+    if (I.getValue().Counts[0] > MaxFunctionCount)
+      MaxFunctionCount = I.getValue().Counts[0];
   }
+
+  using namespace llvm::support;
+  endian::Writer<little> LE(OS);
+
+  // Write the header.
+  LE.write<uint64_t>(IndexedInstrProf::Magic);
+  LE.write<uint64_t>(IndexedInstrProf::Version);
+  LE.write<uint64_t>(MaxFunctionCount);
+  LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType));
+
+  // Save a space to write the hash table start location.
+  uint64_t HashTableStartLoc = OS.tell();
+  LE.write<uint64_t>(0);
+  // Write the hash table.
+  uint64_t HashTableStart = Generator.Emit(OS);
+
+  // Go back and fill in the hash table start.
+  OS.seek(HashTableStartLoc);
+  LE.write<uint64_t>(HashTableStart);
 }
author	Justin Bogner <mail@justinbogner.com>	2014-04-18 21:48:40 +0000
committer	Justin Bogner <mail@justinbogner.com>	2014-04-18 21:48:40 +0000
commit	e153fb33e49bd6d44189d3659287338c410bc0ce (patch)
tree	28b8eb8580f22648394e4d4e629d97cd0d72e4c9 /lib/ProfileData
parent	4c464def6ae721b09ebb7cf202e04339267f761a (diff)
download	llvm-e153fb33e49bd6d44189d3659287338c410bc0ce.tar.gz llvm-e153fb33e49bd6d44189d3659287338c410bc0ce.tar.bz2 llvm-e153fb33e49bd6d44189d3659287338c410bc0ce.tar.xz