summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Bytecode/Analyzer.h97
-rw-r--r--include/llvm/Instruction.h7
-rw-r--r--lib/Bytecode/Analyzer/Analyzer.cpp242
-rw-r--r--lib/Bytecode/Analyzer/AnalyzerInternals.h65
-rw-r--r--lib/Bytecode/Analyzer/AnalyzerWrappers.cpp208
-rw-r--r--lib/Bytecode/Analyzer/BytecodeHandler.cpp220
-rw-r--r--lib/Bytecode/Analyzer/BytecodeHandler.h247
-rw-r--r--lib/Bytecode/Analyzer/Dumper.cpp311
-rw-r--r--lib/Bytecode/Analyzer/Makefile13
-rw-r--r--lib/Bytecode/Analyzer/Parser.cpp877
-rw-r--r--lib/Bytecode/Analyzer/Parser.h178
-rw-r--r--lib/Bytecode/Analyzer/ReaderPrimitives.h101
-rw-r--r--lib/Bytecode/Makefile2
-rw-r--r--lib/Bytecode/Reader/Analyzer.cpp242
-rw-r--r--lib/Bytecode/Reader/AnalyzerInternals.h65
-rw-r--r--lib/Bytecode/Reader/AnalyzerWrappers.cpp208
-rw-r--r--lib/Bytecode/Reader/Dumper.cpp311
-rw-r--r--lib/Bytecode/Reader/Parser.cpp877
-rw-r--r--lib/Bytecode/Reader/Parser.h178
-rw-r--r--tools/Makefile3
-rw-r--r--tools/llvm-abcd/Makefile13
-rw-r--r--tools/llvm-abcd/llvm-abcd.cpp115
22 files changed, 4577 insertions, 3 deletions
diff --git a/include/llvm/Bytecode/Analyzer.h b/include/llvm/Bytecode/Analyzer.h
new file mode 100644
index 0000000000..d3e1b95503
--- /dev/null
+++ b/include/llvm/Bytecode/Analyzer.h
@@ -0,0 +1,97 @@
+//===-- llvm/Bytecode/Analyzer.h - Analyzer for bytecode files --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This functionality is implemented by the lib/Bytecode/Analysis library.
+// This library is used to read VM bytecode files from an iostream and print
+// out a diagnostic analysis of the contents of the file. It is intended for
+// three uses: (a) understanding the bytecode format, (b) ensuring correctness
+// of bytecode format, (c) statistical analysis of generated bytecode files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BYTECODE_ANALYZER_H
+#define LLVM_BYTECODE_ANALYZER_H
+
+#include <string>
+#include <map>
+
+namespace llvm {
+
+/// This structure is used to contain the output of the Bytecode Analysis
+/// library. It simply contains fields to hold each item of the analysis
+/// results.
+/// @brief Bytecode Analysis results structure
+struct BytecodeAnalysis {
+ unsigned byteSize; ///< The size of the bytecode file in bytes
+ unsigned numTypes; ///< The number of types
+ unsigned numValues; ///< The number of values
+ unsigned numFunctions; ///< The number of functions defined
+ unsigned numConstants; ///< The number of constants
+ unsigned numGlobalVars; ///< The number of global variables
+ unsigned numInstructions; ///< The number of instructions in all functions
+ unsigned numBasicBlocks; ///< The number of BBs in all functions
+ unsigned numOperands; ///< The number of BBs in all functions
+ unsigned maxTypeSlot; ///< The maximum slot number for types
+ unsigned maxValueSlot; ///< The maximum slot number for values
+ double density; ///< Density of file (bytes/defs)
+
+ /// A structure that contains various pieces of information related to
+ /// an analysis of a single function.
+ struct BytecodeFunctionInfo {
+ unsigned byteSize; ///< The size of the function in bytecode bytes
+ unsigned numInstructions; ///< The number of instructions in the function
+ unsigned numBasicBlocks; ///< The number of basic blocks in the function
+ unsigned numOperands; ///< The number of operands in the function
+ double density; ///< Density of function
+ double vbrEffectiveness; ///< Effectiveness of variable bit rate encoding.
+ ///< This is the average number of bytes per unsigned value written in the
+ ///< vbr encoding. A "perfect" score of 1.0 means all vbr values were
+ ///< encoded in one byte. A score between 1.0 and 4.0 means that some
+ ///< savings were achieved. A score of 4.0 means vbr didn't help. A score
+ ///< greater than 4.0 means vbr negatively impacted size of the file.
+ };
+
+ /// A mapping of function names to the collected information about the
+ /// function.
+ std::map<std::string,BytecodeFunctionInfo> FunctionInfo;
+
+ /// Flags for what should be done
+ bool dumpBytecode;
+};
+
+/// This function is the main entry point into the bytecode analysis library. It
+/// allows you to simply provide a \P filename and storage for the \P Results
+/// that will be filled in with the analysis results.
+/// @brief Analyze contents of a bytecode File
+void AnalyzeBytecodeFile(
+ const std::string& Filename, ///< The name of the bytecode file to read
+ BytecodeAnalysis& Results, ///< The results of the analysis
+ std::string* ErrorStr = 0 ///< Errors, if any.
+ );
+
+/// This function is an alternate entry point into the bytecode analysis
+/// library. It allows you to provide an arbitrary memory buffer which is
+/// assumed to contain a complete bytecode file. The \P Buffer is analyzed and
+/// the \P Results are filled in.
+/// @brief Analyze contents of a bytecode buffer.
+void AnalyzeBytecodeBuffer(
+ const unsigned char* Buffer, ///< Pointer to start of bytecode buffer
+ unsigned BufferSize, ///< Size of the bytecode buffer
+ BytecodeAnalysis& Results, ///< The results of the analysis
+ std::string* ErrorStr = 0 ///< Errors, if any.
+ );
+
+/// This function prints the contents of rhe BytecodeAnalysis structure in
+/// a human legible form.
+/// @brief Print BytecodeAnalysis structure to an ostream
+void PrintBytecodeAnalysis(BytecodeAnalysis& bca, std::ostream& Out );
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
index 17eec871f7..caba4afd8b 100644
--- a/include/llvm/Instruction.h
+++ b/include/llvm/Instruction.h
@@ -87,9 +87,14 @@ public:
}
static const char* getOpcodeName(unsigned OpCode);
+ static inline bool isTerminator(unsigned OpCode) {
+ return OpCode >= TermOpsBegin && OpCode < TermOpsEnd;
+ }
+
inline bool isTerminator() const { // Instance of TerminatorInst?
- return iType >= TermOpsBegin && iType < TermOpsEnd;
+ return isTerminator(iType);
}
+
inline bool isBinaryOp() const {
return iType >= BinaryOpsBegin && iType < BinaryOpsEnd;
}
diff --git a/lib/Bytecode/Analyzer/Analyzer.cpp b/lib/Bytecode/Analyzer/Analyzer.cpp
new file mode 100644
index 0000000000..99c3e41f9f
--- /dev/null
+++ b/lib/Bytecode/Analyzer/Analyzer.cpp
@@ -0,0 +1,242 @@
+//===-- BytecodeHandler.cpp - Parsing Handler -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the BytecodeHandler class that gets called by the
+// AbstractBytecodeParser when parsing events occur.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AnalyzerInternals.h"
+
+using namespace llvm;
+
+
+namespace {
+
+class AnalyzerHandler : public BytecodeHandler {
+public:
+ bool handleError(const std::string& str )
+ {
+ return false;
+ }
+
+ void handleStart()
+ {
+ }
+
+ void handleFinish()
+ {
+ }
+
+ void handleModuleBegin(const std::string& id)
+ {
+ }
+
+ void handleModuleEnd(const std::string& id)
+ {
+ }
+
+ void handleVersionInfo(
+ unsigned char RevisionNum, ///< Byte code revision number
+ Module::Endianness Endianness, ///< Endianness indicator
+ Module::PointerSize PointerSize ///< PointerSize indicator
+ )
+ {
+ }
+
+ void handleModuleGlobalsBegin()
+ {
+ }
+
+ void handleGlobalVariable(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes ///< The linkage type of the GV
+ )
+ {
+ }
+
+ void handleInitializedGV(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes,///< The linkage type of the GV
+ unsigned initSlot ///< Slot number of GV's initializer
+ )
+ {
+ }
+
+ virtual void handleType( const Type* Ty )
+ {
+ }
+
+ void handleFunctionDeclaration(
+ const Type* FuncType ///< The type of the function
+ )
+ {
+ }
+
+ void handleModuleGlobalsEnd()
+ {
+ }
+
+ void handleCompactionTableBegin()
+ {
+ }
+
+ void handleCompactionTablePlane(
+ unsigned Ty,
+ unsigned NumEntries
+ )
+ {
+ }
+
+ void handleCompactionTableType(
+ unsigned i,
+ unsigned TypSlot,
+ const Type*
+ )
+ {
+ }
+
+ void handleCompactionTableValue(
+ unsigned i,
+ unsigned ValSlot,
+ const Type*
+ )
+ {
+ }
+
+ void handleCompactionTableEnd()
+ {
+ }
+
+ void handleSymbolTableBegin()
+ {
+ }
+
+ void handleSymbolTablePlane(
+ unsigned Ty,
+ unsigned NumEntries,
+ const Type* Typ
+ )
+ {
+ }
+
+ void handleSymbolTableType(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+ )
+ {
+ }
+
+ void handleSymbolTableValue(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+ )
+ {
+ }
+
+ void handleSymbolTableEnd()
+ {
+ }
+
+ void handleFunctionBegin(
+ const Type* FType,
+ GlobalValue::LinkageTypes linkage
+ )
+ {
+ }
+
+ void handleFunctionEnd(
+ const Type* FType
+ )
+ {
+ }
+
+ void handleBasicBlockBegin(
+ unsigned blocknum
+ )
+ {
+ }
+
+ bool handleInstruction(
+ unsigned Opcode,
+ const Type* iType,
+ std::vector<unsigned>& Operands
+ )
+ {
+ return false;
+ }
+
+ void handleBasicBlockEnd(unsigned blocknum)
+ {
+ }
+
+ void handleGlobalConstantsBegin()
+ {
+ }
+
+ void handleConstantExpression(
+ unsigned Opcode,
+ const Type* Typ,
+ std::vector<std::pair<const Type*,unsigned> > ArgVec
+ )
+ {
+ }
+
+ void handleConstantValue( Constant * c )
+ {
+ }
+
+ void handleConstantArray(
+ const ArrayType* AT,
+ std::vector<unsigned>& Elements )
+ {
+ }
+
+ void handleConstantStruct(
+ const StructType* ST,
+ std::vector<unsigned>& ElementSlots)
+ {
+ }
+
+ void handleConstantPointer(
+ const PointerType* PT, unsigned Slot)
+ {
+ }
+
+ void handleConstantString( const ConstantArray* CA )
+ {
+ }
+
+
+ void handleGlobalConstantsEnd()
+ {
+ }
+
+};
+
+}
+
+void llvm::BytecodeAnalyzer::AnalyzeBytecode(
+ const unsigned char *Buf,
+ unsigned Length,
+ BytecodeAnalysis& bca,
+ const std::string &ModuleID
+)
+{
+ AnalyzerHandler TheHandler;
+ AbstractBytecodeParser TheParser(&TheHandler);
+ TheParser.ParseBytecode( Buf, Length, ModuleID );
+ TheParser.ParseAllFunctionBodies();
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Analyzer/AnalyzerInternals.h b/lib/Bytecode/Analyzer/AnalyzerInternals.h
new file mode 100644
index 0000000000..d9a2e843d8
--- /dev/null
+++ b/lib/Bytecode/Analyzer/AnalyzerInternals.h
@@ -0,0 +1,65 @@
+//===-- ReaderInternals.h - Definitions internal to the reader --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines various stuff that is used by the bytecode reader.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ANALYZER_INTERNALS_H
+#define ANALYZER_INTERNALS_H
+
+#include "Parser.h"
+#include "llvm/Bytecode/Analyzer.h"
+
+// Enable to trace to figure out what the heck is going on when parsing fails
+//#define TRACE_LEVEL 10
+//#define DEBUG_OUTPUT
+
+#if TRACE_LEVEL // ByteCodeReading_TRACEr
+#define BCR_TRACE(n, X) \
+ if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X
+#else
+#define BCR_TRACE(n, X)
+#endif
+
+namespace llvm {
+
+class BytecodeAnalyzer {
+ BytecodeAnalyzer(const BytecodeAnalyzer &); // DO NOT IMPLEMENT
+ void operator=(const BytecodeAnalyzer &); // DO NOT IMPLEMENT
+public:
+ BytecodeAnalyzer() { }
+ ~BytecodeAnalyzer() { }
+
+ void AnalyzeBytecode(
+ const unsigned char *Buf,
+ unsigned Length,
+ BytecodeAnalysis& bca,
+ const std::string &ModuleID
+ );
+
+ void DumpBytecode(
+ const unsigned char *Buf,
+ unsigned Length,
+ BytecodeAnalysis& bca,
+ const std::string &ModuleID
+ );
+
+ void dump() const {
+ std::cerr << "BytecodeParser instance!\n";
+ }
+private:
+ BytecodeAnalysis TheAnalysis;
+};
+
+} // End llvm namespace
+
+#endif
+
+// vim: sw=2
diff --git a/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp b/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp
new file mode 100644
index 0000000000..a0e4845a1b
--- /dev/null
+++ b/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp
@@ -0,0 +1,208 @@
+//===- AnalyzerWrappers.cpp - Analyze bytecode from file or buffer -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements loading and analysis of a bytecode file and analyzing a
+// bytecode buffer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bytecode/Analyzer.h"
+#include "AnalyzerInternals.h"
+#include "Support/FileUtilities.h"
+#include "Support/StringExtras.h"
+#include "Config/unistd.h"
+#include <cerrno>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// BytecodeFileAnalyzer - Analyze from an mmap'able file descriptor.
+//
+
+namespace {
+ /// BytecodeFileAnalyzer - parses a bytecode file from a file
+ class BytecodeFileAnalyzer : public BytecodeAnalyzer {
+ private:
+ unsigned char *Buffer;
+ unsigned Length;
+
+ BytecodeFileAnalyzer(const BytecodeFileAnalyzer&); // Do not implement
+ void operator=(const BytecodeFileAnalyzer &BFR); // Do not implement
+
+ public:
+ BytecodeFileAnalyzer(const std::string &Filename, BytecodeAnalysis& bca);
+ ~BytecodeFileAnalyzer();
+ };
+}
+
+static std::string ErrnoMessage (int savedErrNum, std::string descr) {
+ return ::strerror(savedErrNum) + std::string(", while trying to ") + descr;
+}
+
+BytecodeFileAnalyzer::BytecodeFileAnalyzer(const std::string &Filename,
+ BytecodeAnalysis& bca) {
+ Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length);
+ if (Buffer == 0)
+ throw "Error reading file '" + Filename + "'.";
+
+ try {
+ // Parse the bytecode we mmapped in
+ if ( bca.dumpBytecode )
+ DumpBytecode(Buffer, Length, bca, Filename);
+ AnalyzeBytecode(Buffer, Length, bca, Filename);
+ } catch (...) {
+ UnmapFileFromAddressSpace(Buffer, Length);
+ throw;
+ }
+}
+
+BytecodeFileAnalyzer::~BytecodeFileAnalyzer() {
+ // Unmmap the bytecode...
+ UnmapFileFromAddressSpace(Buffer, Length);
+}
+
+//===----------------------------------------------------------------------===//
+// BytecodeBufferAnalyzer - Read from a memory buffer
+//
+
+namespace {
+ /// BytecodeBufferAnalyzer - parses a bytecode file from a buffer
+ ///
+ class BytecodeBufferAnalyzer : public BytecodeAnalyzer {
+ private:
+ const unsigned char *Buffer;
+ bool MustDelete;
+
+ BytecodeBufferAnalyzer(const BytecodeBufferAnalyzer&); // Do not implement
+ void operator=(const BytecodeBufferAnalyzer &BFR); // Do not implement
+
+ public:
+ BytecodeBufferAnalyzer(const unsigned char *Buf, unsigned Length,
+ BytecodeAnalysis& bca, const std::string &ModuleID);
+ ~BytecodeBufferAnalyzer();
+
+ };
+}
+
+BytecodeBufferAnalyzer::BytecodeBufferAnalyzer(const unsigned char *Buf,
+ unsigned Length,
+ BytecodeAnalysis& bca,
+ const std::string &ModuleID) {
+ // If not aligned, allocate a new buffer to hold the bytecode...
+ const unsigned char *ParseBegin = 0;
+ if ((intptr_t)Buf & 3) {
+ Buffer = new unsigned char[Length+4];
+ unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned
+ ParseBegin = Buffer + Offset;
+ memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over
+ MustDelete = true;
+ } else {
+ // If we don't need to copy it over, just use the caller's copy
+ ParseBegin = Buffer = Buf;
+ MustDelete = false;
+ }
+ try {
+ if ( bca.dumpBytecode )
+ DumpBytecode(ParseBegin, Length, bca, ModuleID);
+ AnalyzeBytecode(ParseBegin, Length, bca, ModuleID);
+ } catch (...) {
+ if (MustDelete) delete [] Buffer;
+ throw;
+ }
+}
+
+BytecodeBufferAnalyzer::~BytecodeBufferAnalyzer() {
+ if (MustDelete) delete [] Buffer;
+}
+
+//===----------------------------------------------------------------------===//
+// BytecodeStdinAnalyzer - Read bytecode from Standard Input
+//
+
+namespace {
+ /// BytecodeStdinAnalyzer - parses a bytecode file from stdin
+ ///
+ class BytecodeStdinAnalyzer : public BytecodeAnalyzer {
+ private:
+ std::vector<unsigned char> FileData;
+ unsigned char *FileBuf;
+
+ BytecodeStdinAnalyzer(const BytecodeStdinAnalyzer&); // Do not implement
+ void operator=(const BytecodeStdinAnalyzer &BFR); // Do not implement
+
+ public:
+ BytecodeStdinAnalyzer(BytecodeAnalysis& bca);
+ };
+}
+
+BytecodeStdinAnalyzer::BytecodeStdinAnalyzer(BytecodeAnalysis& bca ) {
+ int BlockSize;
+ unsigned char Buffer[4096*4];
+
+ // Read in all of the data from stdin, we cannot mmap stdin...
+ while ((BlockSize = ::read(0 /*stdin*/, Buffer, 4096*4))) {
+ if (BlockSize == -1)
+ throw ErrnoMessage(errno, "read from standard input");
+
+ FileData.insert(FileData.end(), Buffer, Buffer+BlockSize);
+ }
+
+ if (FileData.empty())
+ throw std::string("Standard Input empty!");
+
+ FileBuf = &FileData[0];
+ if (bca.dumpBytecode)
+ DumpBytecode(&FileData[0], FileData.size(), bca, "<stdin>");
+ AnalyzeBytecode(FileBuf, FileData.size(), bca, "<stdin>");
+}
+
+//===----------------------------------------------------------------------===//
+// Wrapper functions
+//===----------------------------------------------------------------------===//
+
+// AnalyzeBytecodeFile - analyze one file
+void llvm::AnalyzeBytecodeFile(const std::string &Filename,
+ BytecodeAnalysis& bca,
+ std::string *ErrorStr)
+{
+ try {
+ if ( Filename != "-" )
+ BytecodeFileAnalyzer bfa(Filename,bca);
+ else
+ BytecodeStdinAnalyzer bsa(bca);
+ } catch (std::string &err) {
+ if (ErrorStr) *ErrorStr = err;
+ }
+}
+
+// AnalyzeBytecodeBuffer - analyze a buffer
+void llvm::AnalyzeBytecodeBuffer(
+ const unsigned char* Buffer, ///< Pointer to start of bytecode buffer
+ unsigned BufferSize, ///< Size of the bytecode buffer
+ BytecodeAnalysis& Results, ///< The results of the analysis
+ std::string* ErrorStr ///< Errors, if any.
+ )
+{
+ try {
+ BytecodeBufferAnalyzer(Buffer, BufferSize, Results, "<buffer>" );
+ } catch (std::string& err ) {
+ if ( ErrorStr) *ErrorStr = err;
+ }
+}
+
+
+/// This function prints the contents of rhe BytecodeAnalysis structure in
+/// a human legible form.
+/// @brief Print BytecodeAnalysis structure to an ostream
+void llvm::PrintBytecodeAnalysis(BytecodeAnalysis& bca, std::ostream& Out )
+{
+ Out << "Not Implemented Yet.\n";
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Analyzer/BytecodeHandler.cpp b/lib/Bytecode/Analyzer/BytecodeHandler.cpp
new file mode 100644
index 0000000000..2415958867
--- /dev/null
+++ b/lib/Bytecode/Analyzer/BytecodeHandler.cpp
@@ -0,0 +1,220 @@
+//===-- BytecodeHandler.cpp - Parsing Handler -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the BytecodeHandler class that gets called by the
+// AbstractBytecodeParser when parsing events occur.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BytecodeHandler.h"
+
+using namespace llvm;
+
+bool BytecodeHandler::handleError(const std::string& str )
+{
+ return false;
+}
+
+void BytecodeHandler::handleStart()
+{
+}
+
+void BytecodeHandler::handleFinish()
+{
+}
+
+void BytecodeHandler::handleModuleBegin(const std::string& id)
+{
+}
+
+void BytecodeHandler::handleModuleEnd(const std::string& id)
+{
+}
+
+void BytecodeHandler::handleVersionInfo(
+ unsigned char RevisionNum, ///< Byte code revision number
+ Module::Endianness Endianness, ///< Endianness indicator
+ Module::PointerSize PointerSize ///< PointerSize indicator
+)
+{
+}
+
+void BytecodeHandler::handleModuleGlobalsBegin()
+{
+}
+
+void BytecodeHandler::handleGlobalVariable(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes ///< The linkage type of the GV
+)
+{
+}
+
+void BytecodeHandler::handleInitializedGV(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes,///< The linkage type of the GV
+ unsigned initSlot ///< Slot number of GV's initializer
+)
+{
+}
+
+void BytecodeHandler::handleType( const Type* Ty )
+{
+}
+
+void BytecodeHandler::handleFunctionDeclaration(
+ const Type* FuncType ///< The type of the function
+)
+{
+}
+
+void BytecodeHandler::handleModuleGlobalsEnd()
+{
+}
+
+void BytecodeHandler::handleCompactionTableBegin()
+{
+}
+
+void BytecodeHandler::handleCompactionTablePlane(
+ unsigned Ty,
+ unsigned NumEntries
+)
+{
+}
+
+void BytecodeHandler::handleCompactionTableType(
+ unsigned i,
+ unsigned TypSlot,
+ const Type*
+)
+{
+}
+
+void BytecodeHandler::handleCompactionTableValue(
+ unsigned i,
+ unsigned ValSlot,
+ const Type*
+)
+{
+}
+
+void BytecodeHandler::handleCompactionTableEnd()
+{
+}
+
+void BytecodeHandler::handleSymbolTableBegin()
+{
+}
+
+void BytecodeHandler::handleSymbolTablePlane(
+ unsigned Ty,
+ unsigned NumEntries,
+ const Type* Typ
+)
+{
+}
+
+void BytecodeHandler::handleSymbolTableType(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+)
+{
+}
+
+void BytecodeHandler::handleSymbolTableValue(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+)
+{
+}
+
+void BytecodeHandler::handleSymbolTableEnd()
+{
+}
+
+void BytecodeHandler::handleFunctionBegin(
+ const Type* FType,
+ GlobalValue::LinkageTypes linkage
+)
+{
+}
+
+void BytecodeHandler::handleFunctionEnd(
+ const Type* FType
+)
+{
+}
+
+void BytecodeHandler::handleBasicBlockBegin(
+ unsigned blocknum
+)
+{
+}
+
+bool BytecodeHandler::handleInstruction(
+ unsigned Opcode,
+ const Type* iType,
+ std::vector<unsigned>& Operands
+)
+{
+ return false;
+}
+
+void BytecodeHandler::handleBasicBlockEnd(unsigned blocknum)
+{
+}
+
+void BytecodeHandler::handleGlobalConstantsBegin()
+{
+}
+
+void BytecodeHandler::handleConstantExpression(
+ unsigned Opcode,
+ const Type* Typ,
+ std::vector<std::pair<const Type*,unsigned> > ArgVec
+ )
+{
+}
+
+void BytecodeHandler::handleConstantValue( Constant * c )
+{
+}
+
+void BytecodeHandler::handleConstantArray(
+ const ArrayType* AT,
+ std::vector<unsigned>& Elements )
+{
+}
+
+void BytecodeHandler::handleConstantStruct(
+ const StructType* ST,
+ std::vector<unsigned>& ElementSlots)
+{
+}
+
+void BytecodeHandler::handleConstantPointer(
+ const PointerType* PT, unsigned Slot)
+{
+}
+
+void BytecodeHandler::handleConstantString( const ConstantArray* CA )
+{
+}
+
+
+void BytecodeHandler::handleGlobalConstantsEnd()
+{
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Analyzer/BytecodeHandler.h b/lib/Bytecode/Analyzer/BytecodeHandler.h
new file mode 100644
index 0000000000..2b03e2d332
--- /dev/null
+++ b/lib/Bytecode/Analyzer/BytecodeHandler.h
@@ -0,0 +1,247 @@
+//===-- BytecodeHandler.h - Parsing Handler ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the BytecodeHandler class that gets called by the
+// AbstractBytecodeParser when parsing events occur.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BYTECODE_HANDLER_H
+#define BYTECODE_HANDLER_H
+
+#include "llvm/Module.h"
+#include "llvm/GlobalValue.h"
+#include <vector>
+
+namespace llvm {
+
+class ArrayType;
+class StructType;
+class PointerType;
+class ConstantArray;
+
+/// This class provides the interface for the handling bytecode events during
+/// parsing. The methods on this interface are invoked by the
+/// AbstractBytecodeParser as it discovers the content of a bytecode stream.
+/// This class provides a a clear separation of concerns between recognizing
+/// the semantic units of a bytecode file and deciding what to do with them.
+/// The AbstractBytecodeParser recognizes the content of the bytecode file and
+/// calls the BytecodeHandler methods to determine what should be done. This
+/// arrangement allows Bytecode files to be read and handled for a number of
+/// purposes simply by creating a subclass of BytecodeHandler. None of the
+/// parsing details need to be understood, only the meaning of the calls
+/// made on this interface.
+///
+/// Another paradigm that uses this design pattern is the XML SAX Parser. The
+/// ContentHandler for SAX plays the same role as the BytecodeHandler here.
+/// @brief Handle Bytecode Parsing Events
+class BytecodeHandler {
+
+/// @name Constructors And Operators
+/// @{
+public:
+ /// @brief Default constructor (empty)
+ BytecodeHandler() {}
+ /// @brief Virtual destructor (empty)
+ virtual ~BytecodeHandler() {}
+
+private:
+ BytecodeHandler(const BytecodeHandler &); // DO NOT IMPLEMENT
+ void operator=(const BytecodeHandler &); // DO NOT IMPLEMENT
+
+/// @}
+/// @name Handler Methods
+/// @{
+public:
+
+ /// This method is called whenever the parser detects an error in the
+ /// bytecode formatting. Returning true will cause the parser to keep
+ /// going, however this is inadvisable in most cases. Returning false will
+ /// cause the parser to throw the message as a std::string.
+ /// @brief Handle parsing errors.
+ virtual bool handleError(const std::string& str );
+
+ /// This method is called at the beginning of a parse before anything is
+ /// read in order to give the handler a chance to initialize.
+ /// @brief Handle the start of a bytecode parse
+ virtual void handleStart();
+
+ /// This method is called at the end of a parse after everything has been
+ /// read in order to give the handler a chance to terminate.
+ /// @brief Handle the end of a bytecode parse
+ virtual void handleFinish();
+
+ /// This method is called at the start of a module to indicate that a
+ /// module is being parsed.
+ /// @brief Handle the start of a module.
+ virtual void handleModuleBegin(const std::string& id);
+
+ /// This method is called at the end of a module to indicate that the module
+ /// previously being parsed has concluded.
+ /// @brief Handle the end of a module.
+ virtual void handleModuleEnd(const std::string& id);
+
+ /// This method is called once the version information has been parsed. It
+ /// provides the information about the version of the bytecode file being
+ /// read.
+ /// @brief Handle the bytecode prolog
+ virtual void handleVersionInfo(
+ unsigned char RevisionNum, ///< Byte code revision number
+ Module::Endianness Endianness, ///< Endianness indicator
+ Module::PointerSize PointerSize ///< PointerSize indicator
+ );
+
+ /// This method is called at the start of a module globals block which
+ /// contains the global variables and the function placeholders
+ virtual void handleModuleGlobalsBegin();
+
+ /// This method is called when a non-initialized global variable is
+ /// recognized. Its type, constness, and linkage type are provided.
+ /// @brief Handle a non-initialized global variable
+ virtual void handleGlobalVariable(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes ///< The linkage type of the GV
+ );
+
+ /// This method is called when an initialized global variable is recognized.
+ /// Its type constness, linkage type, and the slot number of the initializer
+ /// are provided.
+ /// @brief Handle an intialized global variable.
+ virtual void handleInitializedGV(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes,///< The linkage type of the GV
+ unsigned initSlot ///< Slot number of GV's initializer
+ );
+
+ /// This method is called when a new type is recognized. The type is
+ /// converted from the bytecode and passed to this method.
+ /// @brief Handle a type
+ virtual void handleType( const Type* Ty );
+
+ /// This method is called when the function prototype for a function is
+ /// encountered in the module globals block.
+ virtual void handleFunctionDeclaration(
+ const Type* FuncType ///< The type of the function
+ );
+
+ /// This method is called at the end of the module globals block.
+ /// @brief Handle end of module globals block.
+ virtual void handleModuleGlobalsEnd();
+
+ /// This method is called at the beginning of a compaction table.
+ /// @brief Handle start of compaction table.
+ virtual void handleCompactionTableBegin();
+ virtual void handleCompactionTablePlane(
+ unsigned Ty,
+ unsigned NumEntries
+ );
+
+ virtual void handleCompactionTableType(
+ unsigned i,
+ unsigned TypSlot,
+ const Type*
+ );
+
+ virtual void handleCompactionTableValue(
+ unsigned i,
+ unsigned ValSlot,
+ const Type*
+ );
+
+ virtual void handleCompactionTableEnd();
+
+ virtual void handleSymbolTableBegin();
+
+ virtual void handleSymbolTablePlane(
+ unsigned Ty,
+ unsigned NumEntries,
+ const Type* Ty
+ );
+
+ virtual void handleSymbolTableType(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+ );
+
+ virtual void handleSymbolTableValue(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+ );
+
+ virtual void handleSymbolTableEnd();
+
+ virtual void handleFunctionBegin(
+ const Type* FType,
+ GlobalValue::LinkageTypes linkage
+ );
+
+ virtual void handleFunctionEnd(
+ const Type* FType
+ );
+
+ virtual void handleBasicBlockBegin(
+ unsigned blocknum
+ );
+
+ /// This method is called for each instruction that is parsed.
+ /// @returns true if the instruction is a block terminating instruction
+ /// @brief Handle an instruction
+ virtual bool handleInstruction(
+ unsigned Opcode,
+ const Type* iType,
+ std::vector<unsigned>& Operands
+ );
+
+ /// This method is called for each block that is parsed.
+ virtual void handleBasicBlockEnd(unsigned blocknum);
+ /// This method is called at the start of the global constants block.
+ /// @brief Handle start of global constants block.
+ virtual void handleGlobalConstantsBegin();
+
+ virtual void handleConstantExpression(
+ unsigned Opcode,
+ const Type* Typ,
+ std::vector<std::pair<const Type*,unsigned> > ArgVec
+ );
+
+ virtual void handleConstantArray(
+ const ArrayType* AT,
+ std::vector<unsigned>& ElementSlots
+ );
+
+ virtual void handleConstantStruct(
+ const StructType* ST,
+ std::vector<unsigned>& ElementSlots
+ );
+
+ virtual void handleConstantPointer(
+ const PointerType* PT,
+ unsigned Slot
+ );
+
+ virtual void handleConstantString(
+ const ConstantArray* CA
+ );
+
+ virtual void handleConstantValue( Constant * c );
+ virtual void handleGlobalConstantsEnd();
+
+/// @}
+
+};
+
+} // End llvm namespace
+
+#endif
+
+// vim: sw=2
diff --git a/lib/Bytecode/Analyzer/Dumper.cpp b/lib/Bytecode/Analyzer/Dumper.cpp
new file mode 100644
index 0000000000..6ff4ea0c79
--- /dev/null
+++ b/lib/Bytecode/Analyzer/Dumper.cpp
@@ -0,0 +1,311 @@
+//===-- BytecodeDumper.cpp - Parsing Handler --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the BytecodeDumper class that gets called by the
+// AbstractBytecodeParser when parsing events occur. It merely dumps the
+// information presented to it from the parser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AnalyzerInternals.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instruction.h"
+#include "llvm/Type.h"
+
+using namespace llvm;
+
+namespace {
+
+class BytecodeDumper : public llvm::BytecodeHandler {
+public:
+
+ virtual bool handleError(const std::string& str )
+ {
+ std::cout << "ERROR: " << str << "\n";
+ return true;
+ }
+
+ virtual void handleStart()
+ {
+ std::cout << "Bytecode {\n";
+ }
+
+ virtual void handleFinish()
+ {
+ std::cout << "} End Bytecode\n";
+ }
+
+ virtual void handleModuleBegin(const std::string& id)
+ {
+ std::cout << " Module " << id << " {\n";
+ }
+
+ virtual void handleModuleEnd(const std::string& id)
+ {
+ std::cout << " } End Module " << id << "\n";
+ }
+
+ virtual void handleVersionInfo(
+ unsigned char RevisionNum, ///< Byte code revision number
+ Module::Endianness Endianness, ///< Endianness indicator
+ Module::PointerSize PointerSize ///< PointerSize indicator
+ )
+ {
+ std::cout << " RevisionNum: " << int(RevisionNum)
+ << " Endianness: " << Endianness
+ << " PointerSize: " << PointerSize << "\n";
+ }
+
+ virtual void handleModuleGlobalsBegin()
+ {
+ std::cout << " BLOCK: ModuleGlobalInfo {\n";
+ }
+
+ virtual void handleGlobalVariable(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes Linkage ///< The linkage type of the GV
+ )
+ {
+ std::cout << " GV: Uninitialized, "
+ << ( isConstant? "Constant, " : "Variable, ")
+ << " Linkage=" << Linkage << " Type="
+ << ElemType->getDescription() << "\n";
+ }
+
+ virtual void handleInitializedGV(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes Linkage,///< The linkage type of the GV
+ unsigned initSlot ///< Slot number of GV's initializer
+ )
+ {
+ std::cout << " GV: Initialized, "
+ << ( isConstant? "Constant, " : "Variable, ")
+ << " Linkage=" << Linkage << " Type="
+ << ElemType->getDescription()
+ << " InitializerSlot=" << initSlot << "\n";
+ }
+
+ virtual void handleType( const Type* Ty )
+ {
+ std::cout << " Type: " << Ty->getDescription() << "\n";
+ }
+
+ virtual void handleFunctionDeclaration( const Type* FuncType )
+ {
+ std::cout << " Function: " << FuncType->getDescription() << "\n";
+ }
+
+ virtual void handleModuleGlobalsEnd()
+ {
+ std::cout << " } END BLOCK: ModuleGlobalInfo\n";
+ }
+
+ void handleCompactionTableBegin()
+ {
+ std::cout << " BLOCK: CompactionTable {\n";
+ }
+
+ virtual void handleCompactionTablePlane( unsigned Ty, unsigned NumEntries )
+ {
+ std::cout << " Plane: Ty=" << Ty << " Size=" << NumEntries << "\n";
+ }
+
+ virtual void handleCompactionTableType(
+ unsigned i,
+ unsigned TypSlot,
+ const Type* Ty
+ )
+ {
+ std::cout << " Type: " << i << " Slot:" << TypSlot
+ << " is " << Ty->getDescription() << "\n";
+ }
+
+ virtual void handleCompactionTableValue(
+ unsigned i,
+ unsigned ValSlot,
+ const Type* Ty
+ )
+ {
+ std::cout << " Value: " << i << " Slot:" << ValSlot
+ << " is " << Ty->getDescription() << "\n";
+ }
+
+ virtual void handleCompactionTableEnd()
+ {
+ std::cout << " } END BLOCK: CompactionTable\n";
+ }
+
+ virtual void handleSymbolTableBegin()
+ {
+ std::cout << " BLOCK: SymbolTable {\n";
+ }
+
+ virtual void handleSymbolTablePlane(
+ unsigned Ty,
+ unsigned NumEntries,
+ const Type* Typ
+ )
+ {
+ std::cout << " Plane: Ty=" << Ty << " Size=" << NumEntries
+ << " Type: " << Typ->getDescription() << "\n";
+ }
+
+ virtual void handleSymbolTableType(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+ )
+ {
+ std::cout << " Type " << i << " Slot=" << slot
+ << " Name: " << name << "\n";
+ }
+
+ virtual void handleSymbolTableValue(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+ )
+ {
+ std::cout << " Value " << i << " Slot=" << slot
+ << " Name: " << name << "\n";
+ }
+
+ virtual void handleSymbolTableEnd()
+ {
+ std::cout << " } END BLOCK: SymbolTable\n";
+ }
+
+ virtual void handleFunctionBegin(
+ const Type* FType,
+ GlobalValue::LinkageTypes linkage
+ )
+ {
+ std::cout << " BLOCK: Function {\n";
+ std::cout << " Linkage: " << linkage << "\n";
+ std::cout << " Type: " << FType->getDescription() << "\n";
+ }
+
+ virtual void handleFunctionEnd(
+ const Type* FType
+ )
+ {
+ std::cout << " } END BLOCK: Function\n";
+ }
+
+ virtual void handleBasicBlockBegin(
+ unsigned blocknum
+ )
+ {
+ std::cout << " BLOCK: BasicBlock #" << blocknum << "{\n";
+ }
+
+ virtual bool handleInstruction(
+ unsigned Opcode,
+ const Type* iType,
+ std::vector<unsigned>& Operands
+ )
+ {
+ std::cout << " INST: OpCode="
+ << Instruction::getOpcodeName(Opcode) << " Type="
+ << iType->getDescription() << "\n";
+ for ( unsigned i = 0; i < Operands.size(); ++i )
+ std::cout << " Op#" << i << " Slot=" << Operands[i] << "\n";
+
+ return Instruction::isTerminator(Opcode);
+ }
+
+ virtual void handleBasicBlockEnd(unsigned blocknum)
+ {
+ std::cout << " } END BLOCK: BasicBlock #" << blocknum << "{\n";
+ }
+
+ virtual void handleGlobalConstantsBegin()
+ {
+ std::cout << " BLOCK: GlobalConstants {\n";
+ }
+
+ virtual void handleConstantExpression(
+ unsigned Opcode,
+ const Type* Typ,
+ std::vector<std::pair<const Type*,unsigned> > ArgVec
+ )
+ {
+ std::cout << " EXPR: " << Instruction::getOpcodeName(Opcode)
+ << " Type=" << Typ->getDescription() << "\n";
+ for ( unsigned i = 0; i < ArgVec.size(); ++i )
+ std::cout << " Arg#" << i << " Type="
+ << ArgVec[i].first->getDescription() << " Slot="
+ << ArgVec[i].second << "\n";
+ }
+
+ virtual void handleConstantValue( Constant * c )
+ {
+ std::cout << " VALUE: ";
+ c->print(std::cout);
+ std::cout << "\n";
+ }
+
+ virtual void handleConstantArray(
+ const ArrayType* AT,
+ std::vector<unsigned>& Elements )
+ {
+ std::cout << " ARRAY: " << AT->getDescription() << "\n";
+ for ( unsigned i = 0; i < Elements.size(); ++i )
+ std::cout << " #" << i << " Slot=" << Elements[i] << "\n";
+ }
+
+ virtual void handleConstantStruct(
+ const StructType* ST,
+ std::vector<unsigned>& Elements)
+ {
+ std::cout << " STRUC: " << ST->getDescription() << "\n";
+ for ( unsigned i = 0; i < Elements.size(); ++i )
+ std::cout << " #" << i << " Slot=" << Elements[i] << "\n";
+ }
+
+ virtual void handleConstantPointer(
+ const PointerType* PT, unsigned Slot)
+ {
+ std::cout << " POINT: " << PT->getDescription()
+ << " Slot=" << Slot << "\n";
+ }
+
+ virtual void handleConstantString( const ConstantArray* CA )
+ {
+ std::cout << " STRNG: ";
+ CA->print(std::cout);
+ std::cout << "\n";
+ }
+
+ virtual void handleGlobalConstantsEnd()
+ {
+ std::cout << " } END BLOCK: GlobalConstants\n";
+ }
+};
+
+}
+
+void BytecodeAnalyzer::DumpBytecode(
+ const unsigned char *Buf,
+ unsigned Length,
+ BytecodeAnalysis& bca,
+ const std::string &ModuleID
+ )
+{
+ BytecodeDumper TheHandler;
+ AbstractBytecodeParser TheParser(&TheHandler);
+ TheParser.ParseBytecode( Buf, Length, ModuleID );
+ TheParser.ParseAllFunctionBodies();
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Analyzer/Makefile b/lib/Bytecode/Analyzer/Makefile
new file mode 100644
index 0000000000..f3327cd8d5
--- /dev/null
+++ b/lib/Bytecode/Analyzer/Makefile
@@ -0,0 +1,13 @@
+##===- lib/Bytecode/Reader/Makefile ------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = bcanalyzer
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Bytecode/Analyzer/Parser.cpp b/lib/Bytecode/Analyzer/Parser.cpp
new file mode 100644
index 0000000000..d236b64aae
--- /dev/null
+++ b/lib/Bytecode/Analyzer/Parser.cpp
@@ -0,0 +1,877 @@
+//===- Reader.cpp - Code to read bytecode files ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Bytecode/Reader.h
+//
+// Note that this library should be as fast as possible, reentrant, and
+// threadsafe!!
+//
+// TODO: Allow passing in an option to ignore the symbol table
+//
+//===----------------------------------------------------------------------===//
+
+#include "AnalyzerInternals.h"
+#include "llvm/Module.h"
+#include "llvm/Bytecode/Format.h"
+#include "Support/StringExtras.h"
+#include <iostream>
+#include <sstream>
+
+using namespace llvm;
+
+#define PARSE_ERROR(inserters) \
+ { \
+ std::ostringstream errormsg; \
+ errormsg << inserters; \
+ if ( ! handler->handleError( errormsg.str() ) ) \
+ throw std::string(errormsg.str()); \
+ }
+
+const Type *AbstractBytecodeParser::getType(unsigned ID) {
+ //cerr << "Looking up Type ID: " << ID << "\n";
+
+ if (ID < Type::FirstDerivedTyID)
+ if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID))
+ return T; // Asked for a primitive type...
+
+ // Otherwise, derived types need offset...
+ ID -= Type::FirstDerivedTyID;
+
+ if (!CompactionTypeTable.empty()) {
+ if (ID >= CompactionTypeTable.size())
+ PARSE_ERROR("Type ID out of range for compaction table!");
+ return CompactionTypeTable[ID];
+ }
+
+ // Is it a module-level type?
+ if (ID < ModuleTypes.size())
+ return ModuleTypes[ID].get();
+
+ // Nope, is it a function-level type?
+ ID -= ModuleTypes.size();
+ if (ID < FunctionTypes.size())
+ return FunctionTypes[ID].get();
+
+ PARSE_ERROR("Illegal type reference!");
+ return Type::VoidTy;
+}
+
+bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf,
+ std::vector<unsigned> &Operands) {
+ Operands.clear();
+ unsigned iType = 0;
+ unsigned Opcode = 0;
+ unsigned Op = read(Buf, EndBuf);
+
+ // bits Instruction format: Common to all formats
+ // --------------------------
+ // 01-00: Opcode type, fixed to 1.
+ // 07-02: Opcode
+ Opcode = (Op >> 2) & 63;
+ Operands.resize((Op >> 0) & 03);
+
+ switch (Operands.size()) {
+ case 1:
+ // bits Instruction format:
+ // --------------------------
+ // 19-08: Resulting type plane
+ // 31-20: Operand #1 (if set to (2^12-1), then zero operands)
+ //
+ iType = (Op >> 8) & 4095;
+ Operands[0] = (Op >> 20) & 4095;
+ if (Operands[0] == 4095) // Handle special encoding for 0 operands...
+ Operands.resize(0);
+ break;
+ case 2:
+ // bits Instruction format:
+ // --------------------------
+ // 15-08: Resulting type plane
+ // 23-16: Operand #1
+ // 31-24: Operand #2
+ //
+ iType = (Op >> 8) & 255;
+ Operands[0] = (Op >> 16) & 255;
+ Operands[1] = (Op >> 24) & 255;
+ break;
+ case 3:
+ // bits Instruction format:
+ // --------------------------
+ // 13-08: Resulting type plane
+ // 19-14: Operand #1
+ // 25-20: Operand #2
+ // 31-26: Operand #3
+ //
+ iType = (Op >> 8) & 63;
+ Operands[0] = (Op >> 14) & 63;
+ Operands[1] = (Op >> 20) & 63;
+ Operands[2] = (Op >> 26) & 63;
+ break;
+ case 0:
+ Buf -= 4; // Hrm, try this again...
+ Opcode = read_vbr_uint(Buf, EndBuf);
+ Opcode >>= 2;
+ iType = read_vbr_uint(Buf, EndBuf);
+
+ unsigned NumOperands = read_vbr_uint(Buf, EndBuf);
+ Operands.resize(NumOperands);
+
+ if (NumOperands == 0)
+ PARSE_ERROR("Zero-argument instruction found; this is invalid.");
+
+ for (unsigned i = 0; i != NumOperands; ++i)
+ Operands[i] = read_vbr_uint(Buf, EndBuf);
+ align32(Buf, EndBuf);
+ break;
+ }
+
+ return handler->handleInstruction(Opcode, getType(iType), Operands);
+}
+
+/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one
+/// basicblock at a time. This method reads in one of the basicblock packets.
+void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf,
+ BufPtr EndBuf,
+ unsigned BlockNo) {
+ handler->handleBasicBlockBegin( BlockNo );
+
+ std::vector<unsigned> Args;
+ bool is_terminating = false;
+ while (Buf < EndBuf)
+ is_terminating = ParseInstruction(Buf, EndBuf, Args);
+
+ if ( ! is_terminating )
+ PARSE_ERROR(
+ "Failed to recognize instruction as terminating at end of block");
+
+ handler->handleBasicBlockEnd( BlockNo );
+}
+
+
+/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the
+/// body of a function. In post 1.0 bytecode files, we no longer emit basic
+/// block individually, in order to avoid per-basic-block overhead.
+unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, BufPtr EndBuf) {
+ unsigned BlockNo = 0;
+ std::vector<unsigned> Args;
+
+ while (Buf < EndBuf) {
+ handler->handleBasicBlockBegin( BlockNo );
+
+ // Read instructions into this basic block until we get to a terminator
+ bool is_terminating = false;
+ while (Buf < EndBuf && !is_terminating )
+ is_terminating = ParseInstruction(Buf, EndBuf, Args ) ;
+
+ if (!is_terminating)
+ PARSE_ERROR( "Non-terminated basic block found!");
+
+ handler->handleBasicBlockEnd( BlockNo );
+ ++BlockNo;
+ }
+ return BlockNo;
+}
+
+void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) {
+ handler->handleSymbolTableBegin();
+
+ while (Buf < EndBuf) {
+ // Symtab block header: [num entries][type id number]
+ unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
+ unsigned Typ = read_vbr_uint(Buf, EndBuf);
+ const Type *Ty = getType(Typ);
+
+ handler->handleSymbolTablePlane( Typ, NumEntries, Ty );
+
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ // Symtab entry: [def slot #][name]
+ unsigned slot = read_vbr_uint(Buf, EndBuf);
+ std::string Name = read_str(Buf, EndBuf);
+
+ if (Typ == Type::TypeTyID)
+ handler->handleSymbolTableType( i, slot, Name );
+ else
+ handler->handleSymbolTableValue( i, slot, Name );
+ }
+ }
+
+ if (Buf > EndBuf)
+ PARSE_ERROR("Tried to read past end of buffer while reading symbol table.");
+
+ handler->handleSymbolTableEnd();
+}
+
+void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) {
+ if (FunctionSignatureList.empty())
+ throw std::string("FunctionSignatureList empty!");
+
+ const Type *FType = FunctionSignatureList.back();
+ FunctionSignatureList.pop_back();
+
+ // Save the information for future reading of the function
+ LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf);
+ // Pretend we've `parsed' this function
+ Buf = EndBuf;
+}
+
+void AbstractBytecodeParser::ParseNextFunction(Type* FType) {
+ // Find {start, end} pointers and slot in the map. If not there, we're done.
+ LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(FType);
+
+ // Make sure we found it
+ if ( Fi == LazyFunctionLoadMap.end() ) {
+ PARSE_ERROR("Unrecognized function of type " << FType->getDescription());
+ return;
+ }
+
+ BufPtr Buf = Fi->second.Buf;
+ BufPtr EndBuf = Fi->second.EndBuf;
+ assert(Fi->first == FType);
+
+ LazyFunctionLoadMap.erase(Fi);
+
+ this->ParseFunctionBody( FType, Buf, EndBuf );
+}
+
+void AbstractBytecodeParser::ParseFunctionBody(const Type* FType,
+ BufPtr &Buf, BufPtr EndBuf ) {
+
+ GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage;
+
+ unsigned LinkageType = read_vbr_uint(Buf, EndBuf);
+ switch (LinkageType) {
+ case 0: Linkage = GlobalValue::ExternalLinkage; break;
+ case 1: Linkage = GlobalValue::WeakLinkage; break;
+ case 2: Linkage = GlobalValue::AppendingLinkage; break;
+ case 3: Linkage = GlobalValue::InternalLinkage; break;
+ case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
+ default:
+ PARSE_ERROR("Invalid linkage type for Function.");
+ Linkage = GlobalValue::InternalLinkage;
+ break;
+ }
+
+ handler->handleFunctionBegin(FType,Linkage);
+
+ // Keep track of how many basic blocks we have read in...
+ unsigned BlockNum = 0;
+ bool InsertedArguments = false;
+
+ while (Buf < EndBuf) {
+ unsigned Type, Size;
+ BufPtr OldBuf = Buf;
+ readBlock(Buf, EndBuf, Type, Size);
+
+ switch (Type) {
+ case BytecodeFormat::ConstantPool:
+ ParseConstantPool(Buf, Buf+Size, FunctionTypes );
+ break;
+
+ case BytecodeFormat::CompactionTable:
+ ParseCompactionTable(Buf, Buf+Size);
+ break;
+
+ case BytecodeFormat::BasicBlock:
+ ParseBasicBlock(Buf, Buf+Size, BlockNum++);
+ break;
+
+ case BytecodeFormat::InstructionList:
+ if (BlockNum)
+ PARSE_ERROR("InstructionList must come before basic blocks!");
+ BlockNum = ParseInstructionList(Buf, Buf+Size);
+ break;
+
+ case BytecodeFormat::SymbolTable:
+ ParseSymbolTable(Buf, Buf+Size );
+ break;
+
+ default:
+ Buf += Size;
+ if (OldBuf > Buf)
+ PARSE_ERROR("Wrapped around reading bytecode");
+ break;
+ }
+
+ // Malformed bc file if read past end of block.
+ align32(Buf, EndBuf);
+ }
+
+ handler->handleFunctionEnd(FType);
+
+ // Clear out function-level types...
+ FunctionTypes.clear();
+ CompactionTypeTable.clear();
+}
+
+void AbstractBytecodeParser::ParseAllFunctionBodies() {
+ LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin();
+ LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end();
+
+ while ( Fi != Fe ) {
+ const Type* FType = Fi->first;
+ this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf);
+ }
+}
+
+void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) {
+
+ handler->handleCompactionTableBegin();
+
+ while (Buf != End) {
+ unsigned NumEntries = read_vbr_uint(Buf, End);
+ unsigned Ty;
+
+ if ((NumEntries & 3) == 3) {
+ NumEntries >>= 2;
+ Ty = read_vbr_uint(Buf, End);
+ } else {
+ Ty = NumEntries >> 2;
+ NumEntries &= 3;
+ }
+
+ handler->handleCompactionTablePlane( Ty, NumEntries );
+
+ if (Ty == Type::TypeTyID) {
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ unsigned TypeSlot = read_vbr_uint(Buf,End);
+ const Type *Typ = getGlobalTableType(TypeSlot);
+ handler->handleCompactionTableType( i, TypeSlot, Typ );
+ }
+ } else {
+ const Type *Typ = getType(Ty);
+ // Push the implicit zero
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ unsigned ValSlot = read_vbr_uint(Buf, End);
+ handler->handleCompactionTableValue( i, ValSlot, Typ );
+ }
+ }
+ }
+ handler->handleCompactionTableEnd();
+}
+
+const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf,
+ const unsigned char *EndBuf) {
+ unsigned PrimType = read_vbr_uint(Buf, EndBuf);
+
+ const Type *Val = 0;
+ if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType)))
+ return Val;
+
+ switch (PrimType) {
+ case Type::FunctionTyID: {
+ const Type *RetType = getType(read_vbr_uint(Buf, EndBuf));
+
+ unsigned NumParams = read_vbr_uint(Buf, EndBuf);
+
+ std::vector<const Type*> Params;
+ while (NumParams--)
+ Params.push_back(getType(read_vbr_uint(Buf, EndBuf)));
+
+ bool isVarArg = Params.size() && Params.back() == Type::VoidTy;
+ if (isVarArg) Params.pop_back();
+
+ Type* result = FunctionType::get(RetType, Params, isVarArg);
+ handler->handleType( result );
+ return result;
+ }
+ case Type::ArrayTyID: {
+ unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
+ const Type *ElementType = getType(ElTyp);
+
+ unsigned NumElements = read_vbr_uint(Buf, EndBuf);
+
+ BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size="
+ << NumElements << "\n");
+ Type* result = ArrayType::get(ElementType, NumElements);
+ handler->handleType( result );
+ return result;
+ }
+ case Type::StructTyID: {
+ std::vector<const Type*> Elements;
+ unsigned Typ = read_vbr_uint(Buf, EndBuf);
+ while (Typ) { // List is terminated by void/0 typeid
+ Elements.push_back(getType(Typ));
+ Typ = read_vbr_uint(Buf, EndBuf);
+ }
+
+ Type* result = StructType::get(Elements);
+ handler->handleType( result );
+ return result;
+ }
+ case Type::PointerTyID: {
+ unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
+ BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n");
+ Type* result = PointerType::get(getType(ElTyp));
+ handler->handleType( result );
+ return result;
+ }
+
+ case Type::OpaqueTyID: {
+ Type* result = OpaqueType::get();
+ handler->handleType( result );
+ return result;
+ }
+
+ default:
+ PARSE_ERROR("Don't know how to deserialize primitive type" << PrimType << "\n");
+ return Val;
+ }
+}
+
+// ParseTypeConstants - We have to use this weird code to handle recursive
+// types. We know that recursive types will only reference the current slab of
+// values in the type plane, but they can forward reference types before they
+// have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might
+// be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix
+// this ugly problem, we pessimistically insert an opaque type for each type we
+// are about to read. This means that forward references will resolve to
+// something and when we reread the type later, we can replace the opaque type
+// with a new resolved concrete type.
+//
+void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ TypeListTy &Tab,
+ unsigned NumEntries) {
+ assert(Tab.size() == 0 && "should not have read type constants in before!");
+
+ // Insert a bunch of opaque types to be resolved later...
+ Tab.reserve(NumEntries);
+ for (unsigned i = 0; i != NumEntries; ++i)
+ Tab.push_back(OpaqueType::get());
+
+ // Loop through reading all of the types. Forward types will make use of the
+ // opaque types just inserted.
+ //
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get();
+ if (NewTy == 0) throw std::string("Couldn't parse type!");
+ BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy <<
+ "' Replacing: " << OldTy << "\n");
+
+ // Don't insertValue the new type... instead we want to replace the opaque
+ // type with the new concrete value...
+ //
+
+ // Refine the abstract type to the new type. This causes all uses of the
+ // abstract type to use NewTy. This also will cause the opaque type to be
+ // deleted...
+ //
+ cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy);
+
+ // This should have replace the old opaque type with the new type in the
+ // value table... or with a preexisting type that was already in the system
+ assert(Tab[i] != OldTy && "refineAbstractType didn't work!");
+ }
+
+ BCR_TRACE(5, "Resulting types:\n");
+ for (unsigned i = 0; i < NumEntries; ++i) {
+ BCR_TRACE(5, (void*)Tab[i].get() << " - " << Tab[i].get() << "\n");
+ }
+}
+
+
+void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ unsigned TypeID) {
+
+ // We must check for a ConstantExpr before switching by type because
+ // a ConstantExpr can be of any type, and has no explicit value.
+ //
+ // 0 if not expr; numArgs if is expr
+ unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf);
+
+ if (isExprNumArgs) {
+ unsigned Opcode = read_vbr_uint(Buf, EndBuf);
+ const Type* Typ = getType(TypeID);
+
+ // FIXME: Encoding of constant exprs could be much more compact!
+ std::vector<std::pair<const Type*,unsigned> > ArgVec;
+ ArgVec.reserve(isExprNumArgs);
+
+ // Read the slot number and types of each of the arguments
+ for (unsigned i = 0; i != isExprNumArgs; ++i) {
+ unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf);
+ unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf);
+ BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot)
+ << "' slot: " << ArgValSlot << "\n");
+
+ // Get the arg value from its slot if it exists, otherwise a placeholder
+ ArgVec.push_back(std::make_pair(getType(ArgTypeSlot), ArgValSlot));
+ }
+
+ handler->handleConstantExpression( Opcode, Typ, ArgVec );
+ return;
+ }
+
+ // Ok, not an ConstantExpr. We now know how to read the given type...
+ const Type *Ty = getType(TypeID);
+ switch (Ty->getPrimitiveID()) {
+ case Type::BoolTyID: {
+ unsigned Val = read_vbr_uint(Buf, EndBuf);
+ if (Val != 0 && Val != 1)
+ PARSE_ERROR("Invalid boolean value read.");
+
+ handler->handleConstantValue( ConstantBool::get(Val == 1));
+ break;
+ }
+
+ case Type::UByteTyID: // Unsigned integer types...
+ case Type::UShortTyID:
+ case Type::UIntTyID: {
+ unsigned Val = read_vbr_uint(Buf, EndBuf);
+ if (!ConstantUInt::isValueValidForType(Ty, Val))
+ throw std::string("Invalid unsigned byte/short/int read.");
+ handler->handleConstantValue( ConstantUInt::get(Ty, Val) );
+ break;
+ }
+
+ case Type::ULongTyID: {
+ handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) );
+ break;
+ }
+
+ case Type::SByteTyID: // Signed integer types...
+ case Type::ShortTyID:
+ case Type::IntTyID: {
+ case Type::LongTyID:
+ int64_t Val = read_vbr_int64(Buf, EndBuf);
+ if (!ConstantSInt::isValueValidForType(Ty, Val))
+ throw std::string("Invalid signed byte/short/int/long read.");
+ handler->handleConstantValue( ConstantSInt::get(Ty, Val) );
+ break;
+ }
+
+ case Type::FloatTyID: {
+ float F;
+ input_data(Buf, EndBuf, &F, &F+1);
+ handler->handleConstantValue( ConstantFP::get(Ty, F) );
+ break;
+ }
+
+ case Type::DoubleTyID: {
+ double Val;
+ input_data(Buf, EndBuf, &Val, &Val+1);
+ handler->handleConstantValue( ConstantFP::get(Ty, Val) );
+ break;
+ }
+
+ case Type::TypeTyID:
+ PARSE_ERROR("Type constants shouldn't live in constant table!");
+ break;
+
+ case Type::ArrayTyID: {
+ const ArrayType *AT = cast<ArrayType>(Ty);
+ unsigned NumElements = AT->getNumElements();
+ std::vector<unsigned> Elements;
+ Elements.reserve(NumElements);
+ while (NumElements--) // Read all of the elements of the constant.
+ Elements.push_back(read_vbr_uint(Buf, EndBuf));
+
+ handler->handleConstantArray( AT, Elements );
+ break;
+ }
+
+ case Type::StructTyID: {
+ const StructType *ST = cast<StructType>(Ty);
+ std::vector<unsigned> Elements;
+ Elements.reserve(ST->getNumElements());
+ for (unsigned i = 0; i != ST->getNumElements(); ++i)
+ Elements.push_back(read_vbr_uint(Buf, EndBuf));
+
+ handler->handleConstantStruct( ST, Elements );
+ }
+
+ case Type::PointerTyID: { // ConstantPointerRef value...
+ const PointerType *PT = cast<PointerType>(Ty);
+ unsigned Slot = read_vbr_uint(Buf, EndBuf);
+ handler->handleConstantPointer( PT, Slot );
+ }
+
+ default:
+ PARSE_ERROR("Don't know how to deserialize constant value of type '"+
+ Ty->getDescription());
+ }
+}
+
+void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf,
+ const unsigned char *EndBuf) {
+ ParseConstantPool(Buf, EndBuf, ModuleTypes);
+}
+
+void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ unsigned NumEntries ){
+ for (; NumEntries; --NumEntries) {
+ unsigned Typ = read_vbr_uint(Buf, EndBuf);
+ const Type *Ty = getType(Typ);
+ if (!isa<ArrayType>(Ty))
+ throw std::string("String constant data invalid!");
+
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ if (ATy->getElementType() != Type::SByteTy &&
+ ATy->getElementType() != Type::UByteTy)
+ throw std::string("String constant data invalid!");
+
+ // Read character data. The type tells us how long the string is.
+ char Data[ATy->getNumElements()];
+ input_data(Buf, EndBuf, Data, Data+ATy->getNumElements());
+
+ std::vector<Constant*> Elements(ATy->getNumElements());
+ if (ATy->getElementType() == Type::SByteTy)
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]);
+ else
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]);
+
+ // Create the constant, inserting it as needed.
+ ConstantArray *C = cast<ConstantArray>( ConstantArray::get(ATy, Elements) );
+ handler->handleConstantString( C );
+ }
+}
+
+
+void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ TypeListTy &TypeTab) {
+ while (Buf < EndBuf) {
+ unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
+ unsigned Typ = read_vbr_uint(Buf, EndBuf);
+ if (Typ == Type::TypeTyID) {
+ ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries);
+ } else if (Typ == Type::VoidTyID) {
+ ParseStringConstants(Buf, EndBuf, NumEntries);
+ } else {
+ BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: "
+ << NumEntries << "\n");
+
+ for (unsigned i = 0; i < NumEntries; ++i) {
+ ParseConstantValue(Buf, EndBuf, Typ);
+ }
+ }
+ }
+
+ if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer.");
+}
+
+void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) {
+
+ handler->handleModuleGlobalsBegin();
+
+ // Read global variables...
+ unsigned VarType = read_vbr_uint(Buf, End);
+ while (VarType != Type::VoidTyID) { // List is terminated by Void
+ // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 =
+ // Linkage, bit4+ = slot#
+ unsigned SlotNo = VarType >> 5;
+ unsigned LinkageID = (VarType >> 2) & 7;
+ bool isConstant = VarType & 1;
+ bool hasInitializer = VarType & 2;
+ GlobalValue::LinkageTypes Linkage;
+
+ switch (LinkageID) {
+ case 0: Linkage = GlobalValue::ExternalLinkage; break;
+ case 1: Linkage = GlobalValue::WeakLinkage; break;
+ case 2: Linkage = GlobalValue::AppendingLinkage; break;
+ case 3: Linkage = GlobalValue::InternalLinkage; break;
+ case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
+ default:
+ PARSE_ERROR("Unknown linkage type: " << LinkageID);
+ Linkage = GlobalValue::InternalLinkage;
+ break;
+ }
+
+ const Type *Ty = getType(SlotNo);
+ if ( !Ty ) {
+ PARSE_ERROR("Global has no type! SlotNo=" << SlotNo);
+ }
+
+ if ( !isa<PointerType>(Ty)) {
+ PARSE_ERROR("Global not a pointer type! Ty= " << Ty->getDescription());
+ }
+
+ const Type *ElTy = cast<PointerType>(Ty)->getElementType();
+
+ // Create the global variable...
+ if (hasInitializer)
+ handler->handleGlobalVariable( ElTy, isConstant, Linkage );
+ else {
+ unsigned initSlot = read_vbr_uint(Buf,End);
+ handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot );
+ }
+
+ // Get next item
+ VarType = read_vbr_uint(Buf, End);
+ }
+
+ // Read the function objects for all of the functions that are coming
+ unsigned FnSignature = read_vbr_uint(Buf, End);
+ while (FnSignature != Type::VoidTyID) { // List is terminated by Void
+ const Type *Ty = getType(FnSignature);
+ if (!isa<PointerType>(Ty) ||
+ !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) {
+ PARSE_ERROR( "Function not a pointer to function type! Ty = " +
+ Ty->getDescription());
+ // FIXME: what should Ty be if handler continues?
+ }
+
+ // We create functions by passing the underlying FunctionType to create...
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ // Save this for later so we know type of lazily instantiated functions
+ FunctionSignatureList.push_back(Ty);
+
+ handler->handleFunctionDeclaration(Ty);
+
+ // Get Next function signature
+ FnSignature = read_vbr_uint(Buf, End);
+ }
+
+ if (hasInconsistentModuleGlobalInfo)
+ align32(Buf, End);
+
+ // This is for future proofing... in the future extra fields may be added that
+ // we don't understand, so we transparently ignore them.
+ //
+ Buf = End;
+
+ handler->handleModuleGlobalsEnd();
+}
+
+void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) {
+ unsigned Version = read_vbr_uint(Buf, EndBuf);
+
+ // Unpack version number: low four bits are for flags, top bits = version
+ Module::Endianness Endianness;
+ Module::PointerSize PointerSize;
+ Endianness = (Version & 1) ? Module::BigEndian : Module::LittleEndian;
+ PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32;
+
+ bool hasNoEndianness = Version & 4;
+ bool hasNoPointerSize = Version & 8;
+
+ RevisionNum = Version >> 4;
+
+ // Default values for the current bytecode version
+ hasInconsistentModuleGlobalInfo = false;
+ hasExplicitPrimitiveZeros = false;
+ hasRestrictedGEPTypes = false;
+
+ switch (RevisionNum) {
+ case 0: // LLVM 1.0, 1.1 release version
+ // Base LLVM 1.0 bytecode format.
+ hasInconsistentModuleGlobalInfo = true;
+ hasExplicitPrimitiveZeros = true;
+ // FALL THROUGH
+ case 1: // LLVM 1.2 release version
+ // LLVM 1.2 added explicit support for emitting strings efficiently.
+
+ // Also, it fixed the problem where the size of the ModuleGlobalInfo block
+ // included the size for the alignment at the end, where the rest of the
+ // blocks did not.
+
+ // LLVM 1.2 and before required that GEP indices be ubyte constants for
+ // structures and longs for sequential types.
+ hasRestrictedGEPTypes = true;
+
+ // FALL THROUGH
+ case 2: // LLVM 1.3 release version
+ break;
+
+ default:
+ PARSE_ERROR("Unknown bytecode version number: " << RevisionNum);
+ }
+
+ if (hasNoEndianness) Endianness = Module::AnyEndianness;
+ if (hasNoPointerSize) PointerSize = Module::AnyPointerSize;
+
+ handler->handleVersionInfo(RevisionNum, Endianness, PointerSize );
+}
+
+void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) {
+ unsigned Type, Size;
+ readBlock(Buf, EndBuf, Type, Size);
+ if (Type != BytecodeFormat::Module || Buf+Size != EndBuf)
+ // Hrm, not a class?
+ PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) <<
+ ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf)));
+
+ // Read into instance variables...
+ ParseVersionInfo(Buf, EndBuf);
+ align32(Buf, EndBuf);
+
+ bool SeenModuleGlobalInfo = false;
+ bool SeenGlobalTypePlane = false;
+ while (Buf < EndBuf) {
+ BufPtr OldBuf = Buf;
+ readBlock(Buf, EndBuf, Type, Size);
+
+ switch (Type) {
+
+ case BytecodeFormat::GlobalTypePlane:
+ if ( SeenGlobalTypePlane )
+ PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!");
+
+ ParseGlobalTypes(Buf, Buf+Size);
+ SeenGlobalTypePlane = true;
+ break;
+
+ case BytecodeFormat::ModuleGlobalInfo:
+ if ( SeenModuleGlobalInfo )
+ PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!");
+ ParseModuleGlobalInfo(Buf, Buf+Size);
+ SeenModuleGlobalInfo = true;
+ break;
+
+ case BytecodeFormat::ConstantPool:
+ ParseConstantPool(Buf, Buf+Size, ModuleTypes);
+ break;
+
+ case BytecodeFormat::Function:
+ ParseFunctionLazily(Buf, Buf+Size);
+ break;
+
+ case BytecodeFormat::SymbolTable:
+ ParseSymbolTable(Buf, Buf+Size );
+ break;
+
+ default:
+ Buf += Size;
+ if (OldBuf > Buf)
+ {
+ PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" );
+ }
+ break;
+ }
+ align32(Buf, EndBuf);
+ }
+}
+
+void AbstractBytecodeParser::ParseBytecode(
+ BufPtr Buf, unsigned Length,
+ const std::string &ModuleID) {
+
+ handler->handleStart();
+ unsigned char *EndBuf = (unsigned char*)(Buf + Length);
+
+ // Read and check signature...
+ unsigned Sig = read(Buf, EndBuf);
+ if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
+ PARSE_ERROR("Invalid bytecode signature: " << Sig);
+ }
+
+ handler->handleModuleBegin(ModuleID);
+
+ this->ParseModule(Buf, EndBuf);
+
+ handler->handleModuleEnd(ModuleID);
+
+ handler->handleFinish();
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Analyzer/Parser.h b/lib/Bytecode/Analyzer/Parser.h
new file mode 100644
index 0000000000..027047b3f8
--- /dev/null
+++ b/lib/Bytecode/Analyzer/Parser.h
@@ -0,0 +1,178 @@
+//===-- Parser.h - Definitions internal to the reader -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the interface to the Bytecode Parser
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BYTECODE_PARSER_H
+#define BYTECODE_PARSER_H
+
+#include "ReaderPrimitives.h"
+#include "BytecodeHandler.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include <utility>
+#include <vector>
+#include <map>
+
+namespace llvm {
+
+struct LazyFunctionInfo {
+ const unsigned char *Buf, *EndBuf;
+ LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0)
+ : Buf(B), EndBuf(EB) {}
+};
+
+typedef std::map<const Type*, LazyFunctionInfo> LazyFunctionMap;
+
+class AbstractBytecodeParser {
+ AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
+ void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
+public:
+ AbstractBytecodeParser( BytecodeHandler* h ) { handler = h; }
+ ~AbstractBytecodeParser() { }
+
+ void ParseBytecode(const unsigned char *Buf, unsigned Length,
+ const std::string &ModuleID);
+
+ void dump() const {
+ std::cerr << "AbstractBytecodeParser instance!\n";
+ }
+
+private:
+ // Information about the module, extracted from the bytecode revision number.
+ unsigned char RevisionNum; // The rev # itself
+
+ // Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
+
+ // Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo
+ // block. This was fixed to be like all other blocks in 1.2
+ bool hasInconsistentModuleGlobalInfo;
+
+ // Revision #0 also explicitly encoded zero values for primitive types like
+ // int/sbyte/etc.
+ bool hasExplicitPrimitiveZeros;
+
+ // Flags to control features specific the LLVM 1.2 and before (revision #1)
+
+ // LLVM 1.2 and earlier required that getelementptr structure indices were
+ // ubyte constants and that sequential type indices were longs.
+ bool hasRestrictedGEPTypes;
+
+
+ /// CompactionTable - If a compaction table is active in the current function,
+ /// this is the mapping that it contains.
+ std::vector<Type*> CompactionTypeTable;
+
+ // ConstantFwdRefs - This maintains a mapping between <Type, Slot #>'s and
+ // forward references to constants. Such values may be referenced before they
+ // are defined, and if so, the temporary object that they represent is held
+ // here.
+ //
+ typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType;
+ ConstantRefsType ConstantFwdRefs;
+
+ // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used
+ // to deal with forward references to types.
+ //
+ typedef std::vector<PATypeHolder> TypeListTy;
+ TypeListTy ModuleTypes;
+ TypeListTy FunctionTypes;
+
+ // When the ModuleGlobalInfo section is read, we create a FunctionType object
+ // for each function in the module. When the function is loaded, this type is
+ // used to instantiate the actual function object.
+ std::vector<const Type*> FunctionSignatureList;
+
+ // Constant values are read in after global variables. Because of this, we
+ // must defer setting the initializers on global variables until after module
+ // level constants have been read. In the mean time, this list keeps track of
+ // what we must do.
+ //
+ std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
+
+ // For lazy reading-in of functions, we need to save away several pieces of
+ // information about each function: its begin and end pointer in the buffer
+ // and its FunctionSlot.
+ //
+ LazyFunctionMap LazyFunctionLoadMap;
+
+ /// The handler for parsing
+ BytecodeHandler* handler;
+
+private:
+ const Type *AbstractBytecodeParser::getType(unsigned ID);
+ /// getGlobalTableType - This is just like getType, but when a compaction
+ /// table is in use, it is ignored. Also, no forward references or other
+ /// fancy features are supported.
+ const Type *getGlobalTableType(unsigned Slot) {
+ if (Slot < Type::FirstDerivedTyID) {
+ const Type *Ty = Type::getPrimitiveType((Type::PrimitiveID)Slot);
+ assert(Ty && "Not a primitive type ID?");
+ return Ty;
+ }
+ Slot -= Type::FirstDerivedTyID;
+ if (Slot >= ModuleTypes.size())
+ throw std::string("Illegal compaction table type reference!");
+ return ModuleTypes[Slot];
+ }
+
+ unsigned getGlobalTableTypeSlot(const Type *Ty) {
+ if (Ty->isPrimitiveType())
+ return Ty->getPrimitiveID();
+ TypeListTy::iterator I = find(ModuleTypes.begin(),
+ ModuleTypes.end(), Ty);
+ if (I == ModuleTypes.end())
+ throw std::string("Didn't find type in ModuleTypes.");
+ return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]);
+ }
+
+public:
+ typedef const unsigned char* BufPtr;
+ void ParseModule (BufPtr &Buf, BufPtr End);
+ void ParseNextFunction (Type* FType) ;
+ void ParseAllFunctionBodies ();
+
+private:
+ void ParseVersionInfo (BufPtr &Buf, BufPtr End);
+ void ParseModuleGlobalInfo (BufPtr &Buf, BufPtr End);
+ void ParseSymbolTable (BufPtr &Buf, BufPtr End);
+ void ParseFunctionLazily (BufPtr &Buf, BufPtr End);
+ void ParseFunctionBody (const Type* FType, BufPtr &Buf, BufPtr EndBuf);
+ void ParseCompactionTable (BufPtr &Buf, BufPtr End);
+ void ParseGlobalTypes (BufPtr &Buf, BufPtr End);
+
+ void ParseBasicBlock (BufPtr &Buf, BufPtr End, unsigned BlockNo);
+ unsigned ParseInstructionList(BufPtr &Buf, BufPtr End);
+
+ bool ParseInstruction (BufPtr &Buf, BufPtr End,
+ std::vector<unsigned>& Args);
+
+ void ParseConstantPool (BufPtr &Buf, BufPtr End, TypeListTy& List);
+ void ParseConstantValue (BufPtr &Buf, BufPtr End, unsigned TypeID);
+ void ParseTypeConstants (BufPtr &Buf, BufPtr End, TypeListTy &Tab,
+ unsigned NumEntries);
+ const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End);
+ void ParseStringConstants (BufPtr &Buf, BufPtr End, unsigned NumEntries);
+
+};
+
+
+static inline void readBlock(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ unsigned &Type, unsigned &Size) {
+ Type = read(Buf, EndBuf);
+ Size = read(Buf, EndBuf);
+}
+
+} // End llvm namespace
+
+#endif
+// vim: sw=2
diff --git a/lib/Bytecode/Analyzer/ReaderPrimitives.h b/lib/Bytecode/Analyzer/ReaderPrimitives.h
new file mode 100644
index 0000000000..496ab2a5b1
--- /dev/null
+++ b/lib/Bytecode/Analyzer/ReaderPrimitives.h
@@ -0,0 +1,101 @@
+//===-- ReaderPrimitives.h - Bytecode file format reading prims -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines some basic functions for reading basic primitive types
+// from a bytecode stream.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef READERPRIMITIVES_H
+#define READERPRIMITIVES_H
+
+#include "Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+
+ static inline unsigned read(const unsigned char *&Buf,
+ const unsigned char *EndBuf) {
+ if (Buf+4 > EndBuf) throw std::string("Ran out of data!");
+ Buf += 4;
+ return Buf[-4] | (Buf[-3] << 8) | (Buf[-2] << 16) | (Buf[-1] << 24);
+ }
+
+
+ // read_vbr - Read an unsigned integer encoded in variable bitrate format.
+ //
+ static inline unsigned read_vbr_uint(const unsigned char *&Buf,
+ const unsigned char *EndBuf) {
+ unsigned Shift = 0;
+ unsigned Result = 0;
+
+ do {
+ if (Buf == EndBuf) throw std::string("Ran out of data!");
+ Result |= (unsigned)((*Buf++) & 0x7F) << Shift;
+ Shift += 7;
+ } while (Buf[-1] & 0x80);
+ return Result;
+ }
+
+ static inline uint64_t read_vbr_uint64(const unsigned char *&Buf,
+ const unsigned char *EndBuf) {
+ unsigned Shift = 0;
+ uint64_t Result = 0;
+
+ do {
+ if (Buf == EndBuf) throw std::string("Ran out of data!");
+ Result |= (uint64_t)((*Buf++) & 0x7F) << Shift;
+ Shift += 7;
+ } while (Buf[-1] & 0x80);
+ return Result;
+ }
+
+ static inline int64_t read_vbr_int64(const unsigned char *&Buf,
+ const unsigned char *EndBuf) {
+ uint64_t R = read_vbr_uint64(Buf, EndBuf);
+ if (R & 1) {
+ if (R != 1)
+ return -(int64_t)(R >> 1);
+ else // There is no such thing as -0 with integers. "-0" really means
+ // 0x8000000000000000.
+ return 1LL << 63;
+ } else
+ return (int64_t)(R >> 1);
+ }
+
+ // align32 - Round up to multiple of 32 bits...
+ static inline void align32(const unsigned char *&Buf,
+ const unsigned char *EndBuf) {
+ Buf = (const unsigned char *)((unsigned long)(Buf+3) & (~3UL));
+ if (Buf > EndBuf) throw std::string("Ran out of data!");
+ }
+
+ static inline std::string read_str(const unsigned char *&Buf,
+ const unsigned char *EndBuf) {
+ unsigned Size = read_vbr_uint(Buf, EndBuf);
+ const unsigned char *OldBuf = Buf;
+ Buf += Size;
+ if (Buf > EndBuf) // Size invalid?
+ throw std::string("Ran out of data reading a string!");
+ return std::string((char*)OldBuf, Size);
+ }
+
+ static inline void input_data(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ void *Ptr, void *End) {
+ unsigned char *Start = (unsigned char *)Ptr;
+ unsigned Amount = (unsigned char *)End - Start;
+ if (Buf+Amount > EndBuf) throw std::string("Ran out of data!");
+ std::copy(Buf, Buf+Amount, Start);
+ Buf += Amount;
+ }
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Bytecode/Makefile b/lib/Bytecode/Makefile
index 92494b2c35..42daa9d2ad 100644
--- a/lib/Bytecode/Makefile
+++ b/lib/Bytecode/Makefile
@@ -7,7 +7,7 @@
#
##===----------------------------------------------------------------------===##
LEVEL = ../..
-DIRS = Reader Writer
+DIRS = Analyzer Reader Writer
include $(LEVEL)/Makefile.common
diff --git a/lib/Bytecode/Reader/Analyzer.cpp b/lib/Bytecode/Reader/Analyzer.cpp
new file mode 100644
index 0000000000..99c3e41f9f
--- /dev/null
+++ b/lib/Bytecode/Reader/Analyzer.cpp
@@ -0,0 +1,242 @@
+//===-- BytecodeHandler.cpp - Parsing Handler -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the BytecodeHandler class that gets called by the
+// AbstractBytecodeParser when parsing events occur.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AnalyzerInternals.h"
+
+using namespace llvm;
+
+
+namespace {
+
+class AnalyzerHandler : public BytecodeHandler {
+public:
+ bool handleError(const std::string& str )
+ {
+ return false;
+ }
+
+ void handleStart()
+ {
+ }
+
+ void handleFinish()
+ {
+ }
+
+ void handleModuleBegin(const std::string& id)
+ {
+ }
+
+ void handleModuleEnd(const std::string& id)
+ {
+ }
+
+ void handleVersionInfo(
+ unsigned char RevisionNum, ///< Byte code revision number
+ Module::Endianness Endianness, ///< Endianness indicator
+ Module::PointerSize PointerSize ///< PointerSize indicator
+ )
+ {
+ }
+
+ void handleModuleGlobalsBegin()
+ {
+ }
+
+ void handleGlobalVariable(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes ///< The linkage type of the GV
+ )
+ {
+ }
+
+ void handleInitializedGV(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes,///< The linkage type of the GV
+ unsigned initSlot ///< Slot number of GV's initializer
+ )
+ {
+ }
+
+ virtual void handleType( const Type* Ty )
+ {
+ }
+
+ void handleFunctionDeclaration(
+ const Type* FuncType ///< The type of the function
+ )
+ {
+ }
+
+ void handleModuleGlobalsEnd()
+ {
+ }
+
+ void handleCompactionTableBegin()
+ {
+ }
+
+ void handleCompactionTablePlane(
+ unsigned Ty,
+ unsigned NumEntries
+ )
+ {
+ }
+
+ void handleCompactionTableType(
+ unsigned i,
+ unsigned TypSlot,
+ const Type*
+ )
+ {
+ }
+
+ void handleCompactionTableValue(
+ unsigned i,
+ unsigned ValSlot,
+ const Type*
+ )
+ {
+ }
+
+ void handleCompactionTableEnd()
+ {
+ }
+
+ void handleSymbolTableBegin()
+ {
+ }
+
+ void handleSymbolTablePlane(
+ unsigned Ty,
+ unsigned NumEntries,
+ const Type* Typ
+ )
+ {
+ }
+
+ void handleSymbolTableType(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+ )
+ {
+ }
+
+ void handleSymbolTableValue(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+ )
+ {
+ }
+
+ void handleSymbolTableEnd()
+ {
+ }
+
+ void handleFunctionBegin(
+ const Type* FType,
+ GlobalValue::LinkageTypes linkage
+ )
+ {
+ }
+
+ void handleFunctionEnd(
+ const Type* FType
+ )
+ {
+ }
+
+ void handleBasicBlockBegin(
+ unsigned blocknum
+ )
+ {
+ }
+
+ bool handleInstruction(
+ unsigned Opcode,
+ const Type* iType,
+ std::vector<unsigned>& Operands
+ )
+ {
+ return false;
+ }
+
+ void handleBasicBlockEnd(unsigned blocknum)
+ {
+ }
+
+ void handleGlobalConstantsBegin()
+ {
+ }
+
+ void handleConstantExpression(
+ unsigned Opcode,
+ const Type* Typ,
+ std::vector<std::pair<const Type*,unsigned> > ArgVec
+ )
+ {
+ }
+
+ void handleConstantValue( Constant * c )
+ {
+ }
+
+ void handleConstantArray(
+ const ArrayType* AT,
+ std::vector<unsigned>& Elements )
+ {
+ }
+
+ void handleConstantStruct(
+ const StructType* ST,
+ std::vector<unsigned>& ElementSlots)
+ {
+ }
+
+ void handleConstantPointer(
+ const PointerType* PT, unsigned Slot)
+ {
+ }
+
+ void handleConstantString( const ConstantArray* CA )
+ {
+ }
+
+
+ void handleGlobalConstantsEnd()
+ {
+ }
+
+};
+
+}
+
+void llvm::BytecodeAnalyzer::AnalyzeBytecode(
+ const unsigned char *Buf,
+ unsigned Length,
+ BytecodeAnalysis& bca,
+ const std::string &ModuleID
+)
+{
+ AnalyzerHandler TheHandler;
+ AbstractBytecodeParser TheParser(&TheHandler);
+ TheParser.ParseBytecode( Buf, Length, ModuleID );
+ TheParser.ParseAllFunctionBodies();
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Reader/AnalyzerInternals.h b/lib/Bytecode/Reader/AnalyzerInternals.h
new file mode 100644
index 0000000000..d9a2e843d8
--- /dev/null
+++ b/lib/Bytecode/Reader/AnalyzerInternals.h
@@ -0,0 +1,65 @@
+//===-- ReaderInternals.h - Definitions internal to the reader --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines various stuff that is used by the bytecode reader.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ANALYZER_INTERNALS_H
+#define ANALYZER_INTERNALS_H
+
+#include "Parser.h"
+#include "llvm/Bytecode/Analyzer.h"
+
+// Enable to trace to figure out what the heck is going on when parsing fails
+//#define TRACE_LEVEL 10
+//#define DEBUG_OUTPUT
+
+#if TRACE_LEVEL // ByteCodeReading_TRACEr
+#define BCR_TRACE(n, X) \
+ if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X
+#else
+#define BCR_TRACE(n, X)
+#endif
+
+namespace llvm {
+
+class BytecodeAnalyzer {
+ BytecodeAnalyzer(const BytecodeAnalyzer &); // DO NOT IMPLEMENT
+ void operator=(const BytecodeAnalyzer &); // DO NOT IMPLEMENT
+public:
+ BytecodeAnalyzer() { }
+ ~BytecodeAnalyzer() { }
+
+ void AnalyzeBytecode(
+ const unsigned char *Buf,
+ unsigned Length,
+ BytecodeAnalysis& bca,
+ const std::string &ModuleID
+ );
+
+ void DumpBytecode(
+ const unsigned char *Buf,
+ unsigned Length,
+ BytecodeAnalysis& bca,
+ const std::string &ModuleID
+ );
+
+ void dump() const {
+ std::cerr << "BytecodeParser instance!\n";
+ }
+private:
+ BytecodeAnalysis TheAnalysis;
+};
+
+} // End llvm namespace
+
+#endif
+
+// vim: sw=2
diff --git a/lib/Bytecode/Reader/AnalyzerWrappers.cpp b/lib/Bytecode/Reader/AnalyzerWrappers.cpp
new file mode 100644
index 0000000000..a0e4845a1b
--- /dev/null
+++ b/lib/Bytecode/Reader/AnalyzerWrappers.cpp
@@ -0,0 +1,208 @@
+//===- AnalyzerWrappers.cpp - Analyze bytecode from file or buffer -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements loading and analysis of a bytecode file and analyzing a
+// bytecode buffer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bytecode/Analyzer.h"
+#include "AnalyzerInternals.h"
+#include "Support/FileUtilities.h"
+#include "Support/StringExtras.h"
+#include "Config/unistd.h"
+#include <cerrno>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// BytecodeFileAnalyzer - Analyze from an mmap'able file descriptor.
+//
+
+namespace {
+ /// BytecodeFileAnalyzer - parses a bytecode file from a file
+ class BytecodeFileAnalyzer : public BytecodeAnalyzer {
+ private:
+ unsigned char *Buffer;
+ unsigned Length;
+
+ BytecodeFileAnalyzer(const BytecodeFileAnalyzer&); // Do not implement
+ void operator=(const BytecodeFileAnalyzer &BFR); // Do not implement
+
+ public:
+ BytecodeFileAnalyzer(const std::string &Filename, BytecodeAnalysis& bca);
+ ~BytecodeFileAnalyzer();
+ };
+}
+
+static std::string ErrnoMessage (int savedErrNum, std::string descr) {
+ return ::strerror(savedErrNum) + std::string(", while trying to ") + descr;
+}
+
+BytecodeFileAnalyzer::BytecodeFileAnalyzer(const std::string &Filename,
+ BytecodeAnalysis& bca) {
+ Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length);
+ if (Buffer == 0)
+ throw "Error reading file '" + Filename + "'.";
+
+ try {
+ // Parse the bytecode we mmapped in
+ if ( bca.dumpBytecode )
+ DumpBytecode(Buffer, Length, bca, Filename);
+ AnalyzeBytecode(Buffer, Length, bca, Filename);
+ } catch (...) {
+ UnmapFileFromAddressSpace(Buffer, Length);
+ throw;
+ }
+}
+
+BytecodeFileAnalyzer::~BytecodeFileAnalyzer() {
+ // Unmmap the bytecode...
+ UnmapFileFromAddressSpace(Buffer, Length);
+}
+
+//===----------------------------------------------------------------------===//
+// BytecodeBufferAnalyzer - Read from a memory buffer
+//
+
+namespace {
+ /// BytecodeBufferAnalyzer - parses a bytecode file from a buffer
+ ///
+ class BytecodeBufferAnalyzer : public BytecodeAnalyzer {
+ private:
+ const unsigned char *Buffer;
+ bool MustDelete;
+
+ BytecodeBufferAnalyzer(const BytecodeBufferAnalyzer&); // Do not implement
+ void operator=(const BytecodeBufferAnalyzer &BFR); // Do not implement
+
+ public:
+ BytecodeBufferAnalyzer(const unsigned char *Buf, unsigned Length,
+ BytecodeAnalysis& bca, const std::string &ModuleID);
+ ~BytecodeBufferAnalyzer();
+
+ };
+}
+
+BytecodeBufferAnalyzer::BytecodeBufferAnalyzer(const unsigned char *Buf,
+ unsigned Length,
+ BytecodeAnalysis& bca,
+ const std::string &ModuleID) {
+ // If not aligned, allocate a new buffer to hold the bytecode...
+ const unsigned char *ParseBegin = 0;
+ if ((intptr_t)Buf & 3) {
+ Buffer = new unsigned char[Length+4];
+ unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned
+ ParseBegin = Buffer + Offset;
+ memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over
+ MustDelete = true;
+ } else {
+ // If we don't need to copy it over, just use the caller's copy
+ ParseBegin = Buffer = Buf;
+ MustDelete = false;
+ }
+ try {
+ if ( bca.dumpBytecode )
+ DumpBytecode(ParseBegin, Length, bca, ModuleID);
+ AnalyzeBytecode(ParseBegin, Length, bca, ModuleID);
+ } catch (...) {
+ if (MustDelete) delete [] Buffer;
+ throw;
+ }
+}
+
+BytecodeBufferAnalyzer::~BytecodeBufferAnalyzer() {
+ if (MustDelete) delete [] Buffer;
+}
+
+//===----------------------------------------------------------------------===//
+// BytecodeStdinAnalyzer - Read bytecode from Standard Input
+//
+
+namespace {
+ /// BytecodeStdinAnalyzer - parses a bytecode file from stdin
+ ///
+ class BytecodeStdinAnalyzer : public BytecodeAnalyzer {
+ private:
+ std::vector<unsigned char> FileData;
+ unsigned char *FileBuf;
+
+ BytecodeStdinAnalyzer(const BytecodeStdinAnalyzer&); // Do not implement
+ void operator=(const BytecodeStdinAnalyzer &BFR); // Do not implement
+
+ public:
+ BytecodeStdinAnalyzer(BytecodeAnalysis& bca);
+ };
+}
+
+BytecodeStdinAnalyzer::BytecodeStdinAnalyzer(BytecodeAnalysis& bca ) {
+ int BlockSize;
+ unsigned char Buffer[4096*4];
+
+ // Read in all of the data from stdin, we cannot mmap stdin...
+ while ((BlockSize = ::read(0 /*stdin*/, Buffer, 4096*4))) {
+ if (BlockSize == -1)
+ throw ErrnoMessage(errno, "read from standard input");
+
+ FileData.insert(FileData.end(), Buffer, Buffer+BlockSize);
+ }
+
+ if (FileData.empty())
+ throw std::string("Standard Input empty!");
+
+ FileBuf = &FileData[0];
+ if (bca.dumpBytecode)
+ DumpBytecode(&FileData[0], FileData.size(), bca, "<stdin>");
+ AnalyzeBytecode(FileBuf, FileData.size(), bca, "<stdin>");
+}
+
+//===----------------------------------------------------------------------===//
+// Wrapper functions
+//===----------------------------------------------------------------------===//
+
+// AnalyzeBytecodeFile - analyze one file
+void llvm::AnalyzeBytecodeFile(const std::string &Filename,
+ BytecodeAnalysis& bca,
+ std::string *ErrorStr)
+{
+ try {
+ if ( Filename != "-" )
+ BytecodeFileAnalyzer bfa(Filename,bca);
+ else
+ BytecodeStdinAnalyzer bsa(bca);
+ } catch (std::string &err) {
+ if (ErrorStr) *ErrorStr = err;
+ }
+}
+
+// AnalyzeBytecodeBuffer - analyze a buffer
+void llvm::AnalyzeBytecodeBuffer(
+ const unsigned char* Buffer, ///< Pointer to start of bytecode buffer
+ unsigned BufferSize, ///< Size of the bytecode buffer
+ BytecodeAnalysis& Results, ///< The results of the analysis
+ std::string* ErrorStr ///< Errors, if any.
+ )
+{
+ try {
+ BytecodeBufferAnalyzer(Buffer, BufferSize, Results, "<buffer>" );
+ } catch (std::string& err ) {
+ if ( ErrorStr) *ErrorStr = err;
+ }
+}
+
+
+/// This function prints the contents of rhe BytecodeAnalysis structure in
+/// a human legible form.
+/// @brief Print BytecodeAnalysis structure to an ostream
+void llvm::PrintBytecodeAnalysis(BytecodeAnalysis& bca, std::ostream& Out )
+{
+ Out << "Not Implemented Yet.\n";
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Reader/Dumper.cpp b/lib/Bytecode/Reader/Dumper.cpp
new file mode 100644
index 0000000000..6ff4ea0c79
--- /dev/null
+++ b/lib/Bytecode/Reader/Dumper.cpp
@@ -0,0 +1,311 @@
+//===-- BytecodeDumper.cpp - Parsing Handler --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the BytecodeDumper class that gets called by the
+// AbstractBytecodeParser when parsing events occur. It merely dumps the
+// information presented to it from the parser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AnalyzerInternals.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instruction.h"
+#include "llvm/Type.h"
+
+using namespace llvm;
+
+namespace {
+
+class BytecodeDumper : public llvm::BytecodeHandler {
+public:
+
+ virtual bool handleError(const std::string& str )
+ {
+ std::cout << "ERROR: " << str << "\n";
+ return true;
+ }
+
+ virtual void handleStart()
+ {
+ std::cout << "Bytecode {\n";
+ }
+
+ virtual void handleFinish()
+ {
+ std::cout << "} End Bytecode\n";
+ }
+
+ virtual void handleModuleBegin(const std::string& id)
+ {
+ std::cout << " Module " << id << " {\n";
+ }
+
+ virtual void handleModuleEnd(const std::string& id)
+ {
+ std::cout << " } End Module " << id << "\n";
+ }
+
+ virtual void handleVersionInfo(
+ unsigned char RevisionNum, ///< Byte code revision number
+ Module::Endianness Endianness, ///< Endianness indicator
+ Module::PointerSize PointerSize ///< PointerSize indicator
+ )
+ {
+ std::cout << " RevisionNum: " << int(RevisionNum)
+ << " Endianness: " << Endianness
+ << " PointerSize: " << PointerSize << "\n";
+ }
+
+ virtual void handleModuleGlobalsBegin()
+ {
+ std::cout << " BLOCK: ModuleGlobalInfo {\n";
+ }
+
+ virtual void handleGlobalVariable(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes Linkage ///< The linkage type of the GV
+ )
+ {
+ std::cout << " GV: Uninitialized, "
+ << ( isConstant? "Constant, " : "Variable, ")
+ << " Linkage=" << Linkage << " Type="
+ << ElemType->getDescription() << "\n";
+ }
+
+ virtual void handleInitializedGV(
+ const Type* ElemType, ///< The type of the global variable
+ bool isConstant, ///< Whether the GV is constant or not
+ GlobalValue::LinkageTypes Linkage,///< The linkage type of the GV
+ unsigned initSlot ///< Slot number of GV's initializer
+ )
+ {
+ std::cout << " GV: Initialized, "
+ << ( isConstant? "Constant, " : "Variable, ")
+ << " Linkage=" << Linkage << " Type="
+ << ElemType->getDescription()
+ << " InitializerSlot=" << initSlot << "\n";
+ }
+
+ virtual void handleType( const Type* Ty )
+ {
+ std::cout << " Type: " << Ty->getDescription() << "\n";
+ }
+
+ virtual void handleFunctionDeclaration( const Type* FuncType )
+ {
+ std::cout << " Function: " << FuncType->getDescription() << "\n";
+ }
+
+ virtual void handleModuleGlobalsEnd()
+ {
+ std::cout << " } END BLOCK: ModuleGlobalInfo\n";
+ }
+
+ void handleCompactionTableBegin()
+ {
+ std::cout << " BLOCK: CompactionTable {\n";
+ }
+
+ virtual void handleCompactionTablePlane( unsigned Ty, unsigned NumEntries )
+ {
+ std::cout << " Plane: Ty=" << Ty << " Size=" << NumEntries << "\n";
+ }
+
+ virtual void handleCompactionTableType(
+ unsigned i,
+ unsigned TypSlot,
+ const Type* Ty
+ )
+ {
+ std::cout << " Type: " << i << " Slot:" << TypSlot
+ << " is " << Ty->getDescription() << "\n";
+ }
+
+ virtual void handleCompactionTableValue(
+ unsigned i,
+ unsigned ValSlot,
+ const Type* Ty
+ )
+ {
+ std::cout << " Value: " << i << " Slot:" << ValSlot
+ << " is " << Ty->getDescription() << "\n";
+ }
+
+ virtual void handleCompactionTableEnd()
+ {
+ std::cout << " } END BLOCK: CompactionTable\n";
+ }
+
+ virtual void handleSymbolTableBegin()
+ {
+ std::cout << " BLOCK: SymbolTable {\n";
+ }
+
+ virtual void handleSymbolTablePlane(
+ unsigned Ty,
+ unsigned NumEntries,
+ const Type* Typ
+ )
+ {
+ std::cout << " Plane: Ty=" << Ty << " Size=" << NumEntries
+ << " Type: " << Typ->getDescription() << "\n";
+ }
+
+ virtual void handleSymbolTableType(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+ )
+ {
+ std::cout << " Type " << i << " Slot=" << slot
+ << " Name: " << name << "\n";
+ }
+
+ virtual void handleSymbolTableValue(
+ unsigned i,
+ unsigned slot,
+ const std::string& name
+ )
+ {
+ std::cout << " Value " << i << " Slot=" << slot
+ << " Name: " << name << "\n";
+ }
+
+ virtual void handleSymbolTableEnd()
+ {
+ std::cout << " } END BLOCK: SymbolTable\n";
+ }
+
+ virtual void handleFunctionBegin(
+ const Type* FType,
+ GlobalValue::LinkageTypes linkage
+ )
+ {
+ std::cout << " BLOCK: Function {\n";
+ std::cout << " Linkage: " << linkage << "\n";
+ std::cout << " Type: " << FType->getDescription() << "\n";
+ }
+
+ virtual void handleFunctionEnd(
+ const Type* FType
+ )
+ {
+ std::cout << " } END BLOCK: Function\n";
+ }
+
+ virtual void handleBasicBlockBegin(
+ unsigned blocknum
+ )
+ {
+ std::cout << " BLOCK: BasicBlock #" << blocknum << "{\n";
+ }
+
+ virtual bool handleInstruction(
+ unsigned Opcode,
+ const Type* iType,
+ std::vector<unsigned>& Operands
+ )
+ {
+ std::cout << " INST: OpCode="
+ << Instruction::getOpcodeName(Opcode) << " Type="
+ << iType->getDescription() << "\n";
+ for ( unsigned i = 0; i < Operands.size(); ++i )
+ std::cout << " Op#" << i << " Slot=" << Operands[i] << "\n";
+
+ return Instruction::isTerminator(Opcode);
+ }
+
+ virtual void handleBasicBlockEnd(unsigned blocknum)
+ {
+ std::cout << " } END BLOCK: BasicBlock #" << blocknum << "{\n";
+ }
+
+ virtual void handleGlobalConstantsBegin()
+ {
+ std::cout << " BLOCK: GlobalConstants {\n";
+ }
+
+ virtual void handleConstantExpression(
+ unsigned Opcode,
+ const Type* Typ,
+ std::vector<std::pair<const Type*,unsigned> > ArgVec
+ )
+ {
+ std::cout << " EXPR: " << Instruction::getOpcodeName(Opcode)
+ << " Type=" << Typ->getDescription() << "\n";
+ for ( unsigned i = 0; i < ArgVec.size(); ++i )
+ std::cout << " Arg#" << i << " Type="
+ << ArgVec[i].first->getDescription() << " Slot="
+ << ArgVec[i].second << "\n";
+ }
+
+ virtual void handleConstantValue( Constant * c )
+ {
+ std::cout << " VALUE: ";
+ c->print(std::cout);
+ std::cout << "\n";
+ }
+
+ virtual void handleConstantArray(
+ const ArrayType* AT,
+ std::vector<unsigned>& Elements )
+ {
+ std::cout << " ARRAY: " << AT->getDescription() << "\n";
+ for ( unsigned i = 0; i < Elements.size(); ++i )
+ std::cout << " #" << i << " Slot=" << Elements[i] << "\n";
+ }
+
+ virtual void handleConstantStruct(
+ const StructType* ST,
+ std::vector<unsigned>& Elements)
+ {
+ std::cout << " STRUC: " << ST->getDescription() << "\n";
+ for ( unsigned i = 0; i < Elements.size(); ++i )
+ std::cout << " #" << i << " Slot=" << Elements[i] << "\n";
+ }
+
+ virtual void handleConstantPointer(
+ const PointerType* PT, unsigned Slot)
+ {
+ std::cout << " POINT: " << PT->getDescription()
+ << " Slot=" << Slot << "\n";
+ }
+
+ virtual void handleConstantString( const ConstantArray* CA )
+ {
+ std::cout << " STRNG: ";
+ CA->print(std::cout);
+ std::cout << "\n";
+ }
+
+ virtual void handleGlobalConstantsEnd()
+ {
+ std::cout << " } END BLOCK: GlobalConstants\n";
+ }
+};
+
+}
+
+void BytecodeAnalyzer::DumpBytecode(
+ const unsigned char *Buf,
+ unsigned Length,
+ BytecodeAnalysis& bca,
+ const std::string &ModuleID
+ )
+{
+ BytecodeDumper TheHandler;
+ AbstractBytecodeParser TheParser(&TheHandler);
+ TheParser.ParseBytecode( Buf, Length, ModuleID );
+ TheParser.ParseAllFunctionBodies();
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Reader/Parser.cpp b/lib/Bytecode/Reader/Parser.cpp
new file mode 100644
index 0000000000..d236b64aae
--- /dev/null
+++ b/lib/Bytecode/Reader/Parser.cpp
@@ -0,0 +1,877 @@
+//===- Reader.cpp - Code to read bytecode files ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Bytecode/Reader.h
+//
+// Note that this library should be as fast as possible, reentrant, and
+// threadsafe!!
+//
+// TODO: Allow passing in an option to ignore the symbol table
+//
+//===----------------------------------------------------------------------===//
+
+#include "AnalyzerInternals.h"
+#include "llvm/Module.h"
+#include "llvm/Bytecode/Format.h"
+#include "Support/StringExtras.h"
+#include <iostream>
+#include <sstream>
+
+using namespace llvm;
+
+#define PARSE_ERROR(inserters) \
+ { \
+ std::ostringstream errormsg; \
+ errormsg << inserters; \
+ if ( ! handler->handleError( errormsg.str() ) ) \
+ throw std::string(errormsg.str()); \
+ }
+
+const Type *AbstractBytecodeParser::getType(unsigned ID) {
+ //cerr << "Looking up Type ID: " << ID << "\n";
+
+ if (ID < Type::FirstDerivedTyID)
+ if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID))
+ return T; // Asked for a primitive type...
+
+ // Otherwise, derived types need offset...
+ ID -= Type::FirstDerivedTyID;
+
+ if (!CompactionTypeTable.empty()) {
+ if (ID >= CompactionTypeTable.size())
+ PARSE_ERROR("Type ID out of range for compaction table!");
+ return CompactionTypeTable[ID];
+ }
+
+ // Is it a module-level type?
+ if (ID < ModuleTypes.size())
+ return ModuleTypes[ID].get();
+
+ // Nope, is it a function-level type?
+ ID -= ModuleTypes.size();
+ if (ID < FunctionTypes.size())
+ return FunctionTypes[ID].get();
+
+ PARSE_ERROR("Illegal type reference!");
+ return Type::VoidTy;
+}
+
+bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf,
+ std::vector<unsigned> &Operands) {
+ Operands.clear();
+ unsigned iType = 0;
+ unsigned Opcode = 0;
+ unsigned Op = read(Buf, EndBuf);
+
+ // bits Instruction format: Common to all formats
+ // --------------------------
+ // 01-00: Opcode type, fixed to 1.
+ // 07-02: Opcode
+ Opcode = (Op >> 2) & 63;
+ Operands.resize((Op >> 0) & 03);
+
+ switch (Operands.size()) {
+ case 1:
+ // bits Instruction format:
+ // --------------------------
+ // 19-08: Resulting type plane
+ // 31-20: Operand #1 (if set to (2^12-1), then zero operands)
+ //
+ iType = (Op >> 8) & 4095;
+ Operands[0] = (Op >> 20) & 4095;
+ if (Operands[0] == 4095) // Handle special encoding for 0 operands...
+ Operands.resize(0);
+ break;
+ case 2:
+ // bits Instruction format:
+ // --------------------------
+ // 15-08: Resulting type plane
+ // 23-16: Operand #1
+ // 31-24: Operand #2
+ //
+ iType = (Op >> 8) & 255;
+ Operands[0] = (Op >> 16) & 255;
+ Operands[1] = (Op >> 24) & 255;
+ break;
+ case 3:
+ // bits Instruction format:
+ // --------------------------
+ // 13-08: Resulting type plane
+ // 19-14: Operand #1
+ // 25-20: Operand #2
+ // 31-26: Operand #3
+ //
+ iType = (Op >> 8) & 63;
+ Operands[0] = (Op >> 14) & 63;
+ Operands[1] = (Op >> 20) & 63;
+ Operands[2] = (Op >> 26) & 63;
+ break;
+ case 0:
+ Buf -= 4; // Hrm, try this again...
+ Opcode = read_vbr_uint(Buf, EndBuf);
+ Opcode >>= 2;
+ iType = read_vbr_uint(Buf, EndBuf);
+
+ unsigned NumOperands = read_vbr_uint(Buf, EndBuf);
+ Operands.resize(NumOperands);
+
+ if (NumOperands == 0)
+ PARSE_ERROR("Zero-argument instruction found; this is invalid.");
+
+ for (unsigned i = 0; i != NumOperands; ++i)
+ Operands[i] = read_vbr_uint(Buf, EndBuf);
+ align32(Buf, EndBuf);
+ break;
+ }
+
+ return handler->handleInstruction(Opcode, getType(iType), Operands);
+}
+
+/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one
+/// basicblock at a time. This method reads in one of the basicblock packets.
+void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf,
+ BufPtr EndBuf,
+ unsigned BlockNo) {
+ handler->handleBasicBlockBegin( BlockNo );
+
+ std::vector<unsigned> Args;
+ bool is_terminating = false;
+ while (Buf < EndBuf)
+ is_terminating = ParseInstruction(Buf, EndBuf, Args);
+
+ if ( ! is_terminating )
+ PARSE_ERROR(
+ "Failed to recognize instruction as terminating at end of block");
+
+ handler->handleBasicBlockEnd( BlockNo );
+}
+
+
+/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the
+/// body of a function. In post 1.0 bytecode files, we no longer emit basic
+/// block individually, in order to avoid per-basic-block overhead.
+unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, BufPtr EndBuf) {
+ unsigned BlockNo = 0;
+ std::vector<unsigned> Args;
+
+ while (Buf < EndBuf) {
+ handler->handleBasicBlockBegin( BlockNo );
+
+ // Read instructions into this basic block until we get to a terminator
+ bool is_terminating = false;
+ while (Buf < EndBuf && !is_terminating )
+ is_terminating = ParseInstruction(Buf, EndBuf, Args ) ;
+
+ if (!is_terminating)
+ PARSE_ERROR( "Non-terminated basic block found!");
+
+ handler->handleBasicBlockEnd( BlockNo );
+ ++BlockNo;
+ }
+ return BlockNo;
+}
+
+void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) {
+ handler->handleSymbolTableBegin();
+
+ while (Buf < EndBuf) {
+ // Symtab block header: [num entries][type id number]
+ unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
+ unsigned Typ = read_vbr_uint(Buf, EndBuf);
+ const Type *Ty = getType(Typ);
+
+ handler->handleSymbolTablePlane( Typ, NumEntries, Ty );
+
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ // Symtab entry: [def slot #][name]
+ unsigned slot = read_vbr_uint(Buf, EndBuf);
+ std::string Name = read_str(Buf, EndBuf);
+
+ if (Typ == Type::TypeTyID)
+ handler->handleSymbolTableType( i, slot, Name );
+ else
+ handler->handleSymbolTableValue( i, slot, Name );
+ }
+ }
+
+ if (Buf > EndBuf)
+ PARSE_ERROR("Tried to read past end of buffer while reading symbol table.");
+
+ handler->handleSymbolTableEnd();
+}
+
+void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) {
+ if (FunctionSignatureList.empty())
+ throw std::string("FunctionSignatureList empty!");
+
+ const Type *FType = FunctionSignatureList.back();
+ FunctionSignatureList.pop_back();
+
+ // Save the information for future reading of the function
+ LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf);
+ // Pretend we've `parsed' this function
+ Buf = EndBuf;
+}
+
+void AbstractBytecodeParser::ParseNextFunction(Type* FType) {
+ // Find {start, end} pointers and slot in the map. If not there, we're done.
+ LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(FType);
+
+ // Make sure we found it
+ if ( Fi == LazyFunctionLoadMap.end() ) {
+ PARSE_ERROR("Unrecognized function of type " << FType->getDescription());
+ return;
+ }
+
+ BufPtr Buf = Fi->second.Buf;
+ BufPtr EndBuf = Fi->second.EndBuf;
+ assert(Fi->first == FType);
+
+ LazyFunctionLoadMap.erase(Fi);
+
+ this->ParseFunctionBody( FType, Buf, EndBuf );
+}
+
+void AbstractBytecodeParser::ParseFunctionBody(const Type* FType,
+ BufPtr &Buf, BufPtr EndBuf ) {
+
+ GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage;
+
+ unsigned LinkageType = read_vbr_uint(Buf, EndBuf);
+ switch (LinkageType) {
+ case 0: Linkage = GlobalValue::ExternalLinkage; break;
+ case 1: Linkage = GlobalValue::WeakLinkage; break;
+ case 2: Linkage = GlobalValue::AppendingLinkage; break;
+ case 3: Linkage = GlobalValue::InternalLinkage; break;
+ case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
+ default:
+ PARSE_ERROR("Invalid linkage type for Function.");
+ Linkage = GlobalValue::InternalLinkage;
+ break;
+ }
+
+ handler->handleFunctionBegin(FType,Linkage);
+
+ // Keep track of how many basic blocks we have read in...
+ unsigned BlockNum = 0;
+ bool InsertedArguments = false;
+
+ while (Buf < EndBuf) {
+ unsigned Type, Size;
+ BufPtr OldBuf = Buf;
+ readBlock(Buf, EndBuf, Type, Size);
+
+ switch (Type) {
+ case BytecodeFormat::ConstantPool:
+ ParseConstantPool(Buf, Buf+Size, FunctionTypes );
+ break;
+
+ case BytecodeFormat::CompactionTable:
+ ParseCompactionTable(Buf, Buf+Size);
+ break;
+
+ case BytecodeFormat::BasicBlock:
+ ParseBasicBlock(Buf, Buf+Size, BlockNum++);
+ break;
+
+ case BytecodeFormat::InstructionList:
+ if (BlockNum)
+ PARSE_ERROR("InstructionList must come before basic blocks!");
+ BlockNum = ParseInstructionList(Buf, Buf+Size);
+ break;
+
+ case BytecodeFormat::SymbolTable:
+ ParseSymbolTable(Buf, Buf+Size );
+ break;
+
+ default:
+ Buf += Size;
+ if (OldBuf > Buf)
+ PARSE_ERROR("Wrapped around reading bytecode");
+ break;
+ }
+
+ // Malformed bc file if read past end of block.
+ align32(Buf, EndBuf);
+ }
+
+ handler->handleFunctionEnd(FType);
+
+ // Clear out function-level types...
+ FunctionTypes.clear();
+ CompactionTypeTable.clear();
+}
+
+void AbstractBytecodeParser::ParseAllFunctionBodies() {
+ LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin();
+ LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end();
+
+ while ( Fi != Fe ) {
+ const Type* FType = Fi->first;
+ this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf);
+ }
+}
+
+void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) {
+
+ handler->handleCompactionTableBegin();
+
+ while (Buf != End) {
+ unsigned NumEntries = read_vbr_uint(Buf, End);
+ unsigned Ty;
+
+ if ((NumEntries & 3) == 3) {
+ NumEntries >>= 2;
+ Ty = read_vbr_uint(Buf, End);
+ } else {
+ Ty = NumEntries >> 2;
+ NumEntries &= 3;
+ }
+
+ handler->handleCompactionTablePlane( Ty, NumEntries );
+
+ if (Ty == Type::TypeTyID) {
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ unsigned TypeSlot = read_vbr_uint(Buf,End);
+ const Type *Typ = getGlobalTableType(TypeSlot);
+ handler->handleCompactionTableType( i, TypeSlot, Typ );
+ }
+ } else {
+ const Type *Typ = getType(Ty);
+ // Push the implicit zero
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ unsigned ValSlot = read_vbr_uint(Buf, End);
+ handler->handleCompactionTableValue( i, ValSlot, Typ );
+ }
+ }
+ }
+ handler->handleCompactionTableEnd();
+}
+
+const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf,
+ const unsigned char *EndBuf) {
+ unsigned PrimType = read_vbr_uint(Buf, EndBuf);
+
+ const Type *Val = 0;
+ if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType)))
+ return Val;
+
+ switch (PrimType) {
+ case Type::FunctionTyID: {
+ const Type *RetType = getType(read_vbr_uint(Buf, EndBuf));
+
+ unsigned NumParams = read_vbr_uint(Buf, EndBuf);
+
+ std::vector<const Type*> Params;
+ while (NumParams--)
+ Params.push_back(getType(read_vbr_uint(Buf, EndBuf)));
+
+ bool isVarArg = Params.size() && Params.back() == Type::VoidTy;
+ if (isVarArg) Params.pop_back();
+
+ Type* result = FunctionType::get(RetType, Params, isVarArg);
+ handler->handleType( result );
+ return result;
+ }
+ case Type::ArrayTyID: {
+ unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
+ const Type *ElementType = getType(ElTyp);
+
+ unsigned NumElements = read_vbr_uint(Buf, EndBuf);
+
+ BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size="
+ << NumElements << "\n");
+ Type* result = ArrayType::get(ElementType, NumElements);
+ handler->handleType( result );
+ return result;
+ }
+ case Type::StructTyID: {
+ std::vector<const Type*> Elements;
+ unsigned Typ = read_vbr_uint(Buf, EndBuf);
+ while (Typ) { // List is terminated by void/0 typeid
+ Elements.push_back(getType(Typ));
+ Typ = read_vbr_uint(Buf, EndBuf);
+ }
+
+ Type* result = StructType::get(Elements);
+ handler->handleType( result );
+ return result;
+ }
+ case Type::PointerTyID: {
+ unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
+ BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n");
+ Type* result = PointerType::get(getType(ElTyp));
+ handler->handleType( result );
+ return result;
+ }
+
+ case Type::OpaqueTyID: {
+ Type* result = OpaqueType::get();
+ handler->handleType( result );
+ return result;
+ }
+
+ default:
+ PARSE_ERROR("Don't know how to deserialize primitive type" << PrimType << "\n");
+ return Val;
+ }
+}
+
+// ParseTypeConstants - We have to use this weird code to handle recursive
+// types. We know that recursive types will only reference the current slab of
+// values in the type plane, but they can forward reference types before they
+// have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might
+// be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix
+// this ugly problem, we pessimistically insert an opaque type for each type we
+// are about to read. This means that forward references will resolve to
+// something and when we reread the type later, we can replace the opaque type
+// with a new resolved concrete type.
+//
+void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ TypeListTy &Tab,
+ unsigned NumEntries) {
+ assert(Tab.size() == 0 && "should not have read type constants in before!");
+
+ // Insert a bunch of opaque types to be resolved later...
+ Tab.reserve(NumEntries);
+ for (unsigned i = 0; i != NumEntries; ++i)
+ Tab.push_back(OpaqueType::get());
+
+ // Loop through reading all of the types. Forward types will make use of the
+ // opaque types just inserted.
+ //
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get();
+ if (NewTy == 0) throw std::string("Couldn't parse type!");
+ BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy <<
+ "' Replacing: " << OldTy << "\n");
+
+ // Don't insertValue the new type... instead we want to replace the opaque
+ // type with the new concrete value...
+ //
+
+ // Refine the abstract type to the new type. This causes all uses of the
+ // abstract type to use NewTy. This also will cause the opaque type to be
+ // deleted...
+ //
+ cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy);
+
+ // This should have replace the old opaque type with the new type in the
+ // value table... or with a preexisting type that was already in the system
+ assert(Tab[i] != OldTy && "refineAbstractType didn't work!");
+ }
+
+ BCR_TRACE(5, "Resulting types:\n");
+ for (unsigned i = 0; i < NumEntries; ++i) {
+ BCR_TRACE(5, (void*)Tab[i].get() << " - " << Tab[i].get() << "\n");
+ }
+}
+
+
+void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ unsigned TypeID) {
+
+ // We must check for a ConstantExpr before switching by type because
+ // a ConstantExpr can be of any type, and has no explicit value.
+ //
+ // 0 if not expr; numArgs if is expr
+ unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf);
+
+ if (isExprNumArgs) {
+ unsigned Opcode = read_vbr_uint(Buf, EndBuf);
+ const Type* Typ = getType(TypeID);
+
+ // FIXME: Encoding of constant exprs could be much more compact!
+ std::vector<std::pair<const Type*,unsigned> > ArgVec;
+ ArgVec.reserve(isExprNumArgs);
+
+ // Read the slot number and types of each of the arguments
+ for (unsigned i = 0; i != isExprNumArgs; ++i) {
+ unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf);
+ unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf);
+ BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot)
+ << "' slot: " << ArgValSlot << "\n");
+
+ // Get the arg value from its slot if it exists, otherwise a placeholder
+ ArgVec.push_back(std::make_pair(getType(ArgTypeSlot), ArgValSlot));
+ }
+
+ handler->handleConstantExpression( Opcode, Typ, ArgVec );
+ return;
+ }
+
+ // Ok, not an ConstantExpr. We now know how to read the given type...
+ const Type *Ty = getType(TypeID);
+ switch (Ty->getPrimitiveID()) {
+ case Type::BoolTyID: {
+ unsigned Val = read_vbr_uint(Buf, EndBuf);
+ if (Val != 0 && Val != 1)
+ PARSE_ERROR("Invalid boolean value read.");
+
+ handler->handleConstantValue( ConstantBool::get(Val == 1));
+ break;
+ }
+
+ case Type::UByteTyID: // Unsigned integer types...
+ case Type::UShortTyID:
+ case Type::UIntTyID: {
+ unsigned Val = read_vbr_uint(Buf, EndBuf);
+ if (!ConstantUInt::isValueValidForType(Ty, Val))
+ throw std::string("Invalid unsigned byte/short/int read.");
+ handler->handleConstantValue( ConstantUInt::get(Ty, Val) );
+ break;
+ }
+
+ case Type::ULongTyID: {
+ handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) );
+ break;
+ }
+
+ case Type::SByteTyID: // Signed integer types...
+ case Type::ShortTyID:
+ case Type::IntTyID: {
+ case Type::LongTyID:
+ int64_t Val = read_vbr_int64(Buf, EndBuf);
+ if (!ConstantSInt::isValueValidForType(Ty, Val))
+ throw std::string("Invalid signed byte/short/int/long read.");
+ handler->handleConstantValue( ConstantSInt::get(Ty, Val) );
+ break;
+ }
+
+ case Type::FloatTyID: {
+ float F;
+ input_data(Buf, EndBuf, &F, &F+1);
+ handler->handleConstantValue( ConstantFP::get(Ty, F) );
+ break;
+ }
+
+ case Type::DoubleTyID: {
+ double Val;
+ input_data(Buf, EndBuf, &Val, &Val+1);
+ handler->handleConstantValue( ConstantFP::get(Ty, Val) );
+ break;
+ }
+
+ case Type::TypeTyID:
+ PARSE_ERROR("Type constants shouldn't live in constant table!");
+ break;
+
+ case Type::ArrayTyID: {
+ const ArrayType *AT = cast<ArrayType>(Ty);
+ unsigned NumElements = AT->getNumElements();
+ std::vector<unsigned> Elements;
+ Elements.reserve(NumElements);
+ while (NumElements--) // Read all of the elements of the constant.
+ Elements.push_back(read_vbr_uint(Buf, EndBuf));
+
+ handler->handleConstantArray( AT, Elements );
+ break;
+ }
+
+ case Type::StructTyID: {
+ const StructType *ST = cast<StructType>(Ty);
+ std::vector<unsigned> Elements;
+ Elements.reserve(ST->getNumElements());
+ for (unsigned i = 0; i != ST->getNumElements(); ++i)
+ Elements.push_back(read_vbr_uint(Buf, EndBuf));
+
+ handler->handleConstantStruct( ST, Elements );
+ }
+
+ case Type::PointerTyID: { // ConstantPointerRef value...
+ const PointerType *PT = cast<PointerType>(Ty);
+ unsigned Slot = read_vbr_uint(Buf, EndBuf);
+ handler->handleConstantPointer( PT, Slot );
+ }
+
+ default:
+ PARSE_ERROR("Don't know how to deserialize constant value of type '"+
+ Ty->getDescription());
+ }
+}
+
+void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf,
+ const unsigned char *EndBuf) {
+ ParseConstantPool(Buf, EndBuf, ModuleTypes);
+}
+
+void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ unsigned NumEntries ){
+ for (; NumEntries; --NumEntries) {
+ unsigned Typ = read_vbr_uint(Buf, EndBuf);
+ const Type *Ty = getType(Typ);
+ if (!isa<ArrayType>(Ty))
+ throw std::string("String constant data invalid!");
+
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ if (ATy->getElementType() != Type::SByteTy &&
+ ATy->getElementType() != Type::UByteTy)
+ throw std::string("String constant data invalid!");
+
+ // Read character data. The type tells us how long the string is.
+ char Data[ATy->getNumElements()];
+ input_data(Buf, EndBuf, Data, Data+ATy->getNumElements());
+
+ std::vector<Constant*> Elements(ATy->getNumElements());
+ if (ATy->getElementType() == Type::SByteTy)
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]);
+ else
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]);
+
+ // Create the constant, inserting it as needed.
+ ConstantArray *C = cast<ConstantArray>( ConstantArray::get(ATy, Elements) );
+ handler->handleConstantString( C );
+ }
+}
+
+
+void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ TypeListTy &TypeTab) {
+ while (Buf < EndBuf) {
+ unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
+ unsigned Typ = read_vbr_uint(Buf, EndBuf);
+ if (Typ == Type::TypeTyID) {
+ ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries);
+ } else if (Typ == Type::VoidTyID) {
+ ParseStringConstants(Buf, EndBuf, NumEntries);
+ } else {
+ BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: "
+ << NumEntries << "\n");
+
+ for (unsigned i = 0; i < NumEntries; ++i) {
+ ParseConstantValue(Buf, EndBuf, Typ);
+ }
+ }
+ }
+
+ if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer.");
+}
+
+void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) {
+
+ handler->handleModuleGlobalsBegin();
+
+ // Read global variables...
+ unsigned VarType = read_vbr_uint(Buf, End);
+ while (VarType != Type::VoidTyID) { // List is terminated by Void
+ // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 =
+ // Linkage, bit4+ = slot#
+ unsigned SlotNo = VarType >> 5;
+ unsigned LinkageID = (VarType >> 2) & 7;
+ bool isConstant = VarType & 1;
+ bool hasInitializer = VarType & 2;
+ GlobalValue::LinkageTypes Linkage;
+
+ switch (LinkageID) {
+ case 0: Linkage = GlobalValue::ExternalLinkage; break;
+ case 1: Linkage = GlobalValue::WeakLinkage; break;
+ case 2: Linkage = GlobalValue::AppendingLinkage; break;
+ case 3: Linkage = GlobalValue::InternalLinkage; break;
+ case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
+ default:
+ PARSE_ERROR("Unknown linkage type: " << LinkageID);
+ Linkage = GlobalValue::InternalLinkage;
+ break;
+ }
+
+ const Type *Ty = getType(SlotNo);
+ if ( !Ty ) {
+ PARSE_ERROR("Global has no type! SlotNo=" << SlotNo);
+ }
+
+ if ( !isa<PointerType>(Ty)) {
+ PARSE_ERROR("Global not a pointer type! Ty= " << Ty->getDescription());
+ }
+
+ const Type *ElTy = cast<PointerType>(Ty)->getElementType();
+
+ // Create the global variable...
+ if (hasInitializer)
+ handler->handleGlobalVariable( ElTy, isConstant, Linkage );
+ else {
+ unsigned initSlot = read_vbr_uint(Buf,End);
+ handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot );
+ }
+
+ // Get next item
+ VarType = read_vbr_uint(Buf, End);
+ }
+
+ // Read the function objects for all of the functions that are coming
+ unsigned FnSignature = read_vbr_uint(Buf, End);
+ while (FnSignature != Type::VoidTyID) { // List is terminated by Void
+ const Type *Ty = getType(FnSignature);
+ if (!isa<PointerType>(Ty) ||
+ !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) {
+ PARSE_ERROR( "Function not a pointer to function type! Ty = " +
+ Ty->getDescription());
+ // FIXME: what should Ty be if handler continues?
+ }
+
+ // We create functions by passing the underlying FunctionType to create...
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ // Save this for later so we know type of lazily instantiated functions
+ FunctionSignatureList.push_back(Ty);
+
+ handler->handleFunctionDeclaration(Ty);
+
+ // Get Next function signature
+ FnSignature = read_vbr_uint(Buf, End);
+ }
+
+ if (hasInconsistentModuleGlobalInfo)
+ align32(Buf, End);
+
+ // This is for future proofing... in the future extra fields may be added that
+ // we don't understand, so we transparently ignore them.
+ //
+ Buf = End;
+
+ handler->handleModuleGlobalsEnd();
+}
+
+void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) {
+ unsigned Version = read_vbr_uint(Buf, EndBuf);
+
+ // Unpack version number: low four bits are for flags, top bits = version
+ Module::Endianness Endianness;
+ Module::PointerSize PointerSize;
+ Endianness = (Version & 1) ? Module::BigEndian : Module::LittleEndian;
+ PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32;
+
+ bool hasNoEndianness = Version & 4;
+ bool hasNoPointerSize = Version & 8;
+
+ RevisionNum = Version >> 4;
+
+ // Default values for the current bytecode version
+ hasInconsistentModuleGlobalInfo = false;
+ hasExplicitPrimitiveZeros = false;
+ hasRestrictedGEPTypes = false;
+
+ switch (RevisionNum) {
+ case 0: // LLVM 1.0, 1.1 release version
+ // Base LLVM 1.0 bytecode format.
+ hasInconsistentModuleGlobalInfo = true;
+ hasExplicitPrimitiveZeros = true;
+ // FALL THROUGH
+ case 1: // LLVM 1.2 release version
+ // LLVM 1.2 added explicit support for emitting strings efficiently.
+
+ // Also, it fixed the problem where the size of the ModuleGlobalInfo block
+ // included the size for the alignment at the end, where the rest of the
+ // blocks did not.
+
+ // LLVM 1.2 and before required that GEP indices be ubyte constants for
+ // structures and longs for sequential types.
+ hasRestrictedGEPTypes = true;
+
+ // FALL THROUGH
+ case 2: // LLVM 1.3 release version
+ break;
+
+ default:
+ PARSE_ERROR("Unknown bytecode version number: " << RevisionNum);
+ }
+
+ if (hasNoEndianness) Endianness = Module::AnyEndianness;
+ if (hasNoPointerSize) PointerSize = Module::AnyPointerSize;
+
+ handler->handleVersionInfo(RevisionNum, Endianness, PointerSize );
+}
+
+void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) {
+ unsigned Type, Size;
+ readBlock(Buf, EndBuf, Type, Size);
+ if (Type != BytecodeFormat::Module || Buf+Size != EndBuf)
+ // Hrm, not a class?
+ PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) <<
+ ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf)));
+
+ // Read into instance variables...
+ ParseVersionInfo(Buf, EndBuf);
+ align32(Buf, EndBuf);
+
+ bool SeenModuleGlobalInfo = false;
+ bool SeenGlobalTypePlane = false;
+ while (Buf < EndBuf) {
+ BufPtr OldBuf = Buf;
+ readBlock(Buf, EndBuf, Type, Size);
+
+ switch (Type) {
+
+ case BytecodeFormat::GlobalTypePlane:
+ if ( SeenGlobalTypePlane )
+ PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!");
+
+ ParseGlobalTypes(Buf, Buf+Size);
+ SeenGlobalTypePlane = true;
+ break;
+
+ case BytecodeFormat::ModuleGlobalInfo:
+ if ( SeenModuleGlobalInfo )
+ PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!");
+ ParseModuleGlobalInfo(Buf, Buf+Size);
+ SeenModuleGlobalInfo = true;
+ break;
+
+ case BytecodeFormat::ConstantPool:
+ ParseConstantPool(Buf, Buf+Size, ModuleTypes);
+ break;
+
+ case BytecodeFormat::Function:
+ ParseFunctionLazily(Buf, Buf+Size);
+ break;
+
+ case BytecodeFormat::SymbolTable:
+ ParseSymbolTable(Buf, Buf+Size );
+ break;
+
+ default:
+ Buf += Size;
+ if (OldBuf > Buf)
+ {
+ PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" );
+ }
+ break;
+ }
+ align32(Buf, EndBuf);
+ }
+}
+
+void AbstractBytecodeParser::ParseBytecode(
+ BufPtr Buf, unsigned Length,
+ const std::string &ModuleID) {
+
+ handler->handleStart();
+ unsigned char *EndBuf = (unsigned char*)(Buf + Length);
+
+ // Read and check signature...
+ unsigned Sig = read(Buf, EndBuf);
+ if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
+ PARSE_ERROR("Invalid bytecode signature: " << Sig);
+ }
+
+ handler->handleModuleBegin(ModuleID);
+
+ this->ParseModule(Buf, EndBuf);
+
+ handler->handleModuleEnd(ModuleID);
+
+ handler->handleFinish();
+}
+
+// vim: sw=2
diff --git a/lib/Bytecode/Reader/Parser.h b/lib/Bytecode/Reader/Parser.h
new file mode 100644
index 0000000000..027047b3f8
--- /dev/null
+++ b/lib/Bytecode/Reader/Parser.h
@@ -0,0 +1,178 @@
+//===-- Parser.h - Definitions internal to the reader -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the interface to the Bytecode Parser
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BYTECODE_PARSER_H
+#define BYTECODE_PARSER_H
+
+#include "ReaderPrimitives.h"
+#include "BytecodeHandler.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include <utility>
+#include <vector>
+#include <map>
+
+namespace llvm {
+
+struct LazyFunctionInfo {
+ const unsigned char *Buf, *EndBuf;
+ LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0)
+ : Buf(B), EndBuf(EB) {}
+};
+
+typedef std::map<const Type*, LazyFunctionInfo> LazyFunctionMap;
+
+class AbstractBytecodeParser {
+ AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
+ void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
+public:
+ AbstractBytecodeParser( BytecodeHandler* h ) { handler = h; }
+ ~AbstractBytecodeParser() { }
+
+ void ParseBytecode(const unsigned char *Buf, unsigned Length,
+ const std::string &ModuleID);
+
+ void dump() const {
+ std::cerr << "AbstractBytecodeParser instance!\n";
+ }
+
+private:
+ // Information about the module, extracted from the bytecode revision number.
+ unsigned char RevisionNum; // The rev # itself
+
+ // Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
+
+ // Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo
+ // block. This was fixed to be like all other blocks in 1.2
+ bool hasInconsistentModuleGlobalInfo;
+
+ // Revision #0 also explicitly encoded zero values for primitive types like
+ // int/sbyte/etc.
+ bool hasExplicitPrimitiveZeros;
+
+ // Flags to control features specific the LLVM 1.2 and before (revision #1)
+
+ // LLVM 1.2 and earlier required that getelementptr structure indices were
+ // ubyte constants and that sequential type indices were longs.
+ bool hasRestrictedGEPTypes;
+
+
+ /// CompactionTable - If a compaction table is active in the current function,
+ /// this is the mapping that it contains.
+ std::vector<Type*> CompactionTypeTable;
+
+ // ConstantFwdRefs - This maintains a mapping between <Type, Slot #>'s and
+ // forward references to constants. Such values may be referenced before they
+ // are defined, and if so, the temporary object that they represent is held
+ // here.
+ //
+ typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType;
+ ConstantRefsType ConstantFwdRefs;
+
+ // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used
+ // to deal with forward references to types.
+ //
+ typedef std::vector<PATypeHolder> TypeListTy;
+ TypeListTy ModuleTypes;
+ TypeListTy FunctionTypes;
+
+ // When the ModuleGlobalInfo section is read, we create a FunctionType object
+ // for each function in the module. When the function is loaded, this type is
+ // used to instantiate the actual function object.
+ std::vector<const Type*> FunctionSignatureList;
+
+ // Constant values are read in after global variables. Because of this, we
+ // must defer setting the initializers on global variables until after module
+ // level constants have been read. In the mean time, this list keeps track of
+ // what we must do.
+ //
+ std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
+
+ // For lazy reading-in of functions, we need to save away several pieces of
+ // information about each function: its begin and end pointer in the buffer
+ // and its FunctionSlot.
+ //
+ LazyFunctionMap LazyFunctionLoadMap;
+
+ /// The handler for parsing
+ BytecodeHandler* handler;
+
+private:
+ const Type *AbstractBytecodeParser::getType(unsigned ID);
+ /// getGlobalTableType - This is just like getType, but when a compaction
+ /// table is in use, it is ignored. Also, no forward references or other
+ /// fancy features are supported.
+ const Type *getGlobalTableType(unsigned Slot) {
+ if (Slot < Type::FirstDerivedTyID) {
+ const Type *Ty = Type::getPrimitiveType((Type::PrimitiveID)Slot);
+ assert(Ty && "Not a primitive type ID?");
+ return Ty;
+ }
+ Slot -= Type::FirstDerivedTyID;
+ if (Slot >= ModuleTypes.size())
+ throw std::string("Illegal compaction table type reference!");
+ return ModuleTypes[Slot];
+ }
+
+ unsigned getGlobalTableTypeSlot(const Type *Ty) {
+ if (Ty->isPrimitiveType())
+ return Ty->getPrimitiveID();
+ TypeListTy::iterator I = find(ModuleTypes.begin(),
+ ModuleTypes.end(), Ty);
+ if (I == ModuleTypes.end())
+ throw std::string("Didn't find type in ModuleTypes.");
+ return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]);
+ }
+
+public:
+ typedef const unsigned char* BufPtr;
+ void ParseModule (BufPtr &Buf, BufPtr End);
+ void ParseNextFunction (Type* FType) ;
+ void ParseAllFunctionBodies ();
+
+private:
+ void ParseVersionInfo (BufPtr &Buf, BufPtr End);
+ void ParseModuleGlobalInfo (BufPtr &Buf, BufPtr End);
+ void ParseSymbolTable (BufPtr &Buf, BufPtr End);
+ void ParseFunctionLazily (BufPtr &Buf, BufPtr End);
+ void ParseFunctionBody (const Type* FType, BufPtr &Buf, BufPtr EndBuf);
+ void ParseCompactionTable (BufPtr &Buf, BufPtr End);
+ void ParseGlobalTypes (BufPtr &Buf, BufPtr End);
+
+ void ParseBasicBlock (BufPtr &Buf, BufPtr End, unsigned BlockNo);
+ unsigned ParseInstructionList(BufPtr &Buf, BufPtr End);
+
+ bool ParseInstruction (BufPtr &Buf, BufPtr End,
+ std::vector<unsigned>& Args);
+
+ void ParseConstantPool (BufPtr &Buf, BufPtr End, TypeListTy& List);
+ void ParseConstantValue (BufPtr &Buf, BufPtr End, unsigned TypeID);
+ void ParseTypeConstants (BufPtr &Buf, BufPtr End, TypeListTy &Tab,
+ unsigned NumEntries);
+ const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End);
+ void ParseStringConstants (BufPtr &Buf, BufPtr End, unsigned NumEntries);
+
+};
+
+
+static inline void readBlock(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ unsigned &Type, unsigned &Size) {
+ Type = read(Buf, EndBuf);
+ Size = read(Buf, EndBuf);
+}
+
+} // End llvm namespace
+
+#endif
+// vim: sw=2
diff --git a/tools/Makefile b/tools/Makefile
index bc252aa424..dd03498bea 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -9,7 +9,8 @@
LEVEL := ..
PARALLEL_DIRS := llvm-as llvm-dis opt gccas llc llvm-link lli gccld llvm-stub \
- analyze extract bugpoint llvm-nm llvm-prof llvm-db llvm-ar
+ analyze extract bugpoint llvm-nm llvm-prof llvm-db llvm-ar \
+ llvm-abcd
include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-abcd/Makefile b/tools/llvm-abcd/Makefile
new file mode 100644
index 0000000000..f85c2081f3
--- /dev/null
+++ b/tools/llvm-abcd/Makefile
@@ -0,0 +1,13 @@
+##===- tools/llvm-abcd/Makefile ----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by Reid Spencer and is distributed under the
+# University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+
+TOOLNAME = llvm-abcd
+USEDLIBS = bcanalyzer vmcore support.a
+include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-abcd/llvm-abcd.cpp b/tools/llvm-abcd/llvm-abcd.cpp
new file mode 100644
index 0000000000..22ab0b049c
--- /dev/null
+++ b/tools/llvm-abcd/llvm-abcd.cpp
@@ -0,0 +1,115 @@
+//===-- llvm-dis.cpp - The low-level LLVM disassembler --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility may be invoked in the following manner:
+// llvm-dis [options] - Read LLVM bytecode from stdin, write asm to stdout
+// llvm-dis [options] x.bc - Read LLVM bytecode from the x.bc file, write asm
+// to the x.ll file.
+// Options:
+// --help - Output information about command line switches
+// -c - Print C code instead of LLVM assembly
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bytecode/Analyzer.h"
+#include "Support/CommandLine.h"
+#include "llvm/System/Signals.h"
+#include <fstream>
+#include <iostream>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+ InputFilename(cl::Positional, cl::desc("<input bytecode>"), cl::init("-"));
+
+static cl::opt<std::string>
+ OutputFilename("o", cl::desc("Override output filename"),
+ cl::value_desc("filename"));
+
+static cl::opt<bool> Force ("f", cl::desc("Overwrite output files"));
+static cl::opt<bool> Detailed ("d", cl::desc("Detailed output"));
+
+int
+main(int argc, char **argv)
+{
+ cl::ParseCommandLineOptions(argc, argv,
+ " llvm-abcd Analysis of ByteCode Dumper\n");
+
+ PrintStackTraceOnErrorSignal();
+
+ std::ostream* Out = &std::cout; // Default to printing to stdout...
+ std::istream* In = &std::cin; // Default to reading stdin
+ std::string ErrorMessage;
+ BytecodeAnalysis bca;
+
+ /// Analyze the bytecode file
+ AnalyzeBytecodeFile(InputFilename, bca, &ErrorMessage);
+
+ // If there was an error, print it and stop.
+ if ( ErrorMessage.size() ) {
+ std::cerr << argv[0] << ": " << ErrorMessage << "\n";
+ return 1;
+ }
+
+ // Figure out where the output is going
+ if (OutputFilename != "") { // Specified an output filename?
+ if (OutputFilename != "-") { // Not stdout?
+ if (!Force && std::ifstream(OutputFilename.c_str())) {
+ // If force is not specified, make sure not to overwrite a file!
+ std::cerr << argv[0] << ": error opening '" << OutputFilename
+ << "': file exists! Sending to standard output.\n";
+ } else {
+ Out = new std::ofstream(OutputFilename.c_str());
+ }
+ }
+ } else {
+ if (InputFilename == "-") {
+ OutputFilename = "-";
+ } else {
+ std::string IFN = InputFilename;
+ int Len = IFN.length();
+ if (IFN[Len-3] == '.' && IFN[Len-2] == 'b' && IFN[Len-1] == 'c') {
+ // Source ends in .bc
+ OutputFilename = std::string(IFN.begin(), IFN.end()-3)+".abc";
+ } else {
+ OutputFilename = IFN+".abc";
+ }
+
+ if (!Force && std::ifstream(OutputFilename.c_str())) {
+ // If force is not specified, make sure not to overwrite a file!
+ std::cerr << argv[0] << ": error opening '" << OutputFilename
+ << "': file exists! Sending to standard output.\n";
+ } else {
+ Out = new std::ofstream(OutputFilename.c_str());
+
+ // Make sure that the Out file gets unlinked from the disk if we get a
+ // SIGINT
+ RemoveFileOnSignal(OutputFilename);
+ }
+ }
+ }
+
+ if (!Out->good()) {
+ std::cerr << argv[0] << ": error opening " << OutputFilename
+ << ": sending to stdout instead!\n";
+ Out = &std::cout;
+ }
+
+ // All that abcd does is write the gathered statistics to the output
+ bca.dumpBytecode = true;
+ PrintBytecodeAnalysis(bca,*Out);
+
+ if (Out != &std::cout) {
+ ((std::ofstream*)Out)->close();
+ delete Out;
+ }
+ return 0;
+}
+
+// vim: sw=2