diff options
Diffstat (limited to 'lib/Bytecode/Reader/Parser.cpp')
-rw-r--r-- | lib/Bytecode/Reader/Parser.cpp | 877 |
1 files changed, 877 insertions, 0 deletions
diff --git a/lib/Bytecode/Reader/Parser.cpp b/lib/Bytecode/Reader/Parser.cpp new file mode 100644 index 0000000000..d236b64aae --- /dev/null +++ b/lib/Bytecode/Reader/Parser.cpp @@ -0,0 +1,877 @@ +//===- Reader.cpp - Code to read bytecode files ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library implements the functionality defined in llvm/Bytecode/Reader.h +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Allow passing in an option to ignore the symbol table +// +//===----------------------------------------------------------------------===// + +#include "AnalyzerInternals.h" +#include "llvm/Module.h" +#include "llvm/Bytecode/Format.h" +#include "Support/StringExtras.h" +#include <iostream> +#include <sstream> + +using namespace llvm; + +#define PARSE_ERROR(inserters) \ + { \ + std::ostringstream errormsg; \ + errormsg << inserters; \ + if ( ! handler->handleError( errormsg.str() ) ) \ + throw std::string(errormsg.str()); \ + } + +const Type *AbstractBytecodeParser::getType(unsigned ID) { + //cerr << "Looking up Type ID: " << ID << "\n"; + + if (ID < Type::FirstDerivedTyID) + if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) + return T; // Asked for a primitive type... + + // Otherwise, derived types need offset... + ID -= Type::FirstDerivedTyID; + + if (!CompactionTypeTable.empty()) { + if (ID >= CompactionTypeTable.size()) + PARSE_ERROR("Type ID out of range for compaction table!"); + return CompactionTypeTable[ID]; + } + + // Is it a module-level type? + if (ID < ModuleTypes.size()) + return ModuleTypes[ID].get(); + + // Nope, is it a function-level type? + ID -= ModuleTypes.size(); + if (ID < FunctionTypes.size()) + return FunctionTypes[ID].get(); + + PARSE_ERROR("Illegal type reference!"); + return Type::VoidTy; +} + +bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, + std::vector<unsigned> &Operands) { + Operands.clear(); + unsigned iType = 0; + unsigned Opcode = 0; + unsigned Op = read(Buf, EndBuf); + + // bits Instruction format: Common to all formats + // -------------------------- + // 01-00: Opcode type, fixed to 1. + // 07-02: Opcode + Opcode = (Op >> 2) & 63; + Operands.resize((Op >> 0) & 03); + + switch (Operands.size()) { + case 1: + // bits Instruction format: + // -------------------------- + // 19-08: Resulting type plane + // 31-20: Operand #1 (if set to (2^12-1), then zero operands) + // + iType = (Op >> 8) & 4095; + Operands[0] = (Op >> 20) & 4095; + if (Operands[0] == 4095) // Handle special encoding for 0 operands... + Operands.resize(0); + break; + case 2: + // bits Instruction format: + // -------------------------- + // 15-08: Resulting type plane + // 23-16: Operand #1 + // 31-24: Operand #2 + // + iType = (Op >> 8) & 255; + Operands[0] = (Op >> 16) & 255; + Operands[1] = (Op >> 24) & 255; + break; + case 3: + // bits Instruction format: + // -------------------------- + // 13-08: Resulting type plane + // 19-14: Operand #1 + // 25-20: Operand #2 + // 31-26: Operand #3 + // + iType = (Op >> 8) & 63; + Operands[0] = (Op >> 14) & 63; + Operands[1] = (Op >> 20) & 63; + Operands[2] = (Op >> 26) & 63; + break; + case 0: + Buf -= 4; // Hrm, try this again... + Opcode = read_vbr_uint(Buf, EndBuf); + Opcode >>= 2; + iType = read_vbr_uint(Buf, EndBuf); + + unsigned NumOperands = read_vbr_uint(Buf, EndBuf); + Operands.resize(NumOperands); + + if (NumOperands == 0) + PARSE_ERROR("Zero-argument instruction found; this is invalid."); + + for (unsigned i = 0; i != NumOperands; ++i) + Operands[i] = read_vbr_uint(Buf, EndBuf); + align32(Buf, EndBuf); + break; + } + + return handler->handleInstruction(Opcode, getType(iType), Operands); +} + +/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one +/// basicblock at a time. This method reads in one of the basicblock packets. +void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf, + BufPtr EndBuf, + unsigned BlockNo) { + handler->handleBasicBlockBegin( BlockNo ); + + std::vector<unsigned> Args; + bool is_terminating = false; + while (Buf < EndBuf) + is_terminating = ParseInstruction(Buf, EndBuf, Args); + + if ( ! is_terminating ) + PARSE_ERROR( + "Failed to recognize instruction as terminating at end of block"); + + handler->handleBasicBlockEnd( BlockNo ); +} + + +/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the +/// body of a function. In post 1.0 bytecode files, we no longer emit basic +/// block individually, in order to avoid per-basic-block overhead. +unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, BufPtr EndBuf) { + unsigned BlockNo = 0; + std::vector<unsigned> Args; + + while (Buf < EndBuf) { + handler->handleBasicBlockBegin( BlockNo ); + + // Read instructions into this basic block until we get to a terminator + bool is_terminating = false; + while (Buf < EndBuf && !is_terminating ) + is_terminating = ParseInstruction(Buf, EndBuf, Args ) ; + + if (!is_terminating) + PARSE_ERROR( "Non-terminated basic block found!"); + + handler->handleBasicBlockEnd( BlockNo ); + ++BlockNo; + } + return BlockNo; +} + +void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) { + handler->handleSymbolTableBegin(); + + while (Buf < EndBuf) { + // Symtab block header: [num entries][type id number] + unsigned NumEntries = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(Buf, EndBuf); + const Type *Ty = getType(Typ); + + handler->handleSymbolTablePlane( Typ, NumEntries, Ty ); + + for (unsigned i = 0; i != NumEntries; ++i) { + // Symtab entry: [def slot #][name] + unsigned slot = read_vbr_uint(Buf, EndBuf); + std::string Name = read_str(Buf, EndBuf); + + if (Typ == Type::TypeTyID) + handler->handleSymbolTableType( i, slot, Name ); + else + handler->handleSymbolTableValue( i, slot, Name ); + } + } + + if (Buf > EndBuf) + PARSE_ERROR("Tried to read past end of buffer while reading symbol table."); + + handler->handleSymbolTableEnd(); +} + +void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { + if (FunctionSignatureList.empty()) + throw std::string("FunctionSignatureList empty!"); + + const Type *FType = FunctionSignatureList.back(); + FunctionSignatureList.pop_back(); + + // Save the information for future reading of the function + LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf); + // Pretend we've `parsed' this function + Buf = EndBuf; +} + +void AbstractBytecodeParser::ParseNextFunction(Type* FType) { + // Find {start, end} pointers and slot in the map. If not there, we're done. + LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(FType); + + // Make sure we found it + if ( Fi == LazyFunctionLoadMap.end() ) { + PARSE_ERROR("Unrecognized function of type " << FType->getDescription()); + return; + } + + BufPtr Buf = Fi->second.Buf; + BufPtr EndBuf = Fi->second.EndBuf; + assert(Fi->first == FType); + + LazyFunctionLoadMap.erase(Fi); + + this->ParseFunctionBody( FType, Buf, EndBuf ); +} + +void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, + BufPtr &Buf, BufPtr EndBuf ) { + + GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage; + + unsigned LinkageType = read_vbr_uint(Buf, EndBuf); + switch (LinkageType) { + case 0: Linkage = GlobalValue::ExternalLinkage; break; + case 1: Linkage = GlobalValue::WeakLinkage; break; + case 2: Linkage = GlobalValue::AppendingLinkage; break; + case 3: Linkage = GlobalValue::InternalLinkage; break; + case 4: Linkage = GlobalValue::LinkOnceLinkage; break; + default: + PARSE_ERROR("Invalid linkage type for Function."); + Linkage = GlobalValue::InternalLinkage; + break; + } + + handler->handleFunctionBegin(FType,Linkage); + + // Keep track of how many basic blocks we have read in... + unsigned BlockNum = 0; + bool InsertedArguments = false; + + while (Buf < EndBuf) { + unsigned Type, Size; + BufPtr OldBuf = Buf; + readBlock(Buf, EndBuf, Type, Size); + + switch (Type) { + case BytecodeFormat::ConstantPool: + ParseConstantPool(Buf, Buf+Size, FunctionTypes ); + break; + + case BytecodeFormat::CompactionTable: + ParseCompactionTable(Buf, Buf+Size); + break; + + case BytecodeFormat::BasicBlock: + ParseBasicBlock(Buf, Buf+Size, BlockNum++); + break; + + case BytecodeFormat::InstructionList: + if (BlockNum) + PARSE_ERROR("InstructionList must come before basic blocks!"); + BlockNum = ParseInstructionList(Buf, Buf+Size); + break; + + case BytecodeFormat::SymbolTable: + ParseSymbolTable(Buf, Buf+Size ); + break; + + default: + Buf += Size; + if (OldBuf > Buf) + PARSE_ERROR("Wrapped around reading bytecode"); + break; + } + + // Malformed bc file if read past end of block. + align32(Buf, EndBuf); + } + + handler->handleFunctionEnd(FType); + + // Clear out function-level types... + FunctionTypes.clear(); + CompactionTypeTable.clear(); +} + +void AbstractBytecodeParser::ParseAllFunctionBodies() { + LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin(); + LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end(); + + while ( Fi != Fe ) { + const Type* FType = Fi->first; + this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf); + } +} + +void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) { + + handler->handleCompactionTableBegin(); + + while (Buf != End) { + unsigned NumEntries = read_vbr_uint(Buf, End); + unsigned Ty; + + if ((NumEntries & 3) == 3) { + NumEntries >>= 2; + Ty = read_vbr_uint(Buf, End); + } else { + Ty = NumEntries >> 2; + NumEntries &= 3; + } + + handler->handleCompactionTablePlane( Ty, NumEntries ); + + if (Ty == Type::TypeTyID) { + for (unsigned i = 0; i != NumEntries; ++i) { + unsigned TypeSlot = read_vbr_uint(Buf,End); + const Type *Typ = getGlobalTableType(TypeSlot); + handler->handleCompactionTableType( i, TypeSlot, Typ ); + } + } else { + const Type *Typ = getType(Ty); + // Push the implicit zero + for (unsigned i = 0; i != NumEntries; ++i) { + unsigned ValSlot = read_vbr_uint(Buf, End); + handler->handleCompactionTableValue( i, ValSlot, Typ ); + } + } + } + handler->handleCompactionTableEnd(); +} + +const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, + const unsigned char *EndBuf) { + unsigned PrimType = read_vbr_uint(Buf, EndBuf); + + const Type *Val = 0; + if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType))) + return Val; + + switch (PrimType) { + case Type::FunctionTyID: { + const Type *RetType = getType(read_vbr_uint(Buf, EndBuf)); + + unsigned NumParams = read_vbr_uint(Buf, EndBuf); + + std::vector<const Type*> Params; + while (NumParams--) + Params.push_back(getType(read_vbr_uint(Buf, EndBuf))); + + bool isVarArg = Params.size() && Params.back() == Type::VoidTy; + if (isVarArg) Params.pop_back(); + + Type* result = FunctionType::get(RetType, Params, isVarArg); + handler->handleType( result ); + return result; + } + case Type::ArrayTyID: { + unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + const Type *ElementType = getType(ElTyp); + + unsigned NumElements = read_vbr_uint(Buf, EndBuf); + + BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size=" + << NumElements << "\n"); + Type* result = ArrayType::get(ElementType, NumElements); + handler->handleType( result ); + return result; + } + case Type::StructTyID: { + std::vector<const Type*> Elements; + unsigned Typ = read_vbr_uint(Buf, EndBuf); + while (Typ) { // List is terminated by void/0 typeid + Elements.push_back(getType(Typ)); + Typ = read_vbr_uint(Buf, EndBuf); + } + + Type* result = StructType::get(Elements); + handler->handleType( result ); + return result; + } + case Type::PointerTyID: { + unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n"); + Type* result = PointerType::get(getType(ElTyp)); + handler->handleType( result ); + return result; + } + + case Type::OpaqueTyID: { + Type* result = OpaqueType::get(); + handler->handleType( result ); + return result; + } + + default: + PARSE_ERROR("Don't know how to deserialize primitive type" << PrimType << "\n"); + return Val; + } +} + +// ParseTypeConstants - We have to use this weird code to handle recursive +// types. We know that recursive types will only reference the current slab of +// values in the type plane, but they can forward reference types before they +// have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might +// be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix +// this ugly problem, we pessimistically insert an opaque type for each type we +// are about to read. This means that forward references will resolve to +// something and when we reread the type later, we can replace the opaque type +// with a new resolved concrete type. +// +void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, + const unsigned char *EndBuf, + TypeListTy &Tab, + unsigned NumEntries) { + assert(Tab.size() == 0 && "should not have read type constants in before!"); + + // Insert a bunch of opaque types to be resolved later... + Tab.reserve(NumEntries); + for (unsigned i = 0; i != NumEntries; ++i) + Tab.push_back(OpaqueType::get()); + + // Loop through reading all of the types. Forward types will make use of the + // opaque types just inserted. + // + for (unsigned i = 0; i != NumEntries; ++i) { + const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get(); + if (NewTy == 0) throw std::string("Couldn't parse type!"); + BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy << + "' Replacing: " << OldTy << "\n"); + + // Don't insertValue the new type... instead we want to replace the opaque + // type with the new concrete value... + // + + // Refine the abstract type to the new type. This causes all uses of the + // abstract type to use NewTy. This also will cause the opaque type to be + // deleted... + // + cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy); + + // This should have replace the old opaque type with the new type in the + // value table... or with a preexisting type that was already in the system + assert(Tab[i] != OldTy && "refineAbstractType didn't work!"); + } + + BCR_TRACE(5, "Resulting types:\n"); + for (unsigned i = 0; i < NumEntries; ++i) { + BCR_TRACE(5, (void*)Tab[i].get() << " - " << Tab[i].get() << "\n"); + } +} + + +void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, + const unsigned char *EndBuf, + unsigned TypeID) { + + // We must check for a ConstantExpr before switching by type because + // a ConstantExpr can be of any type, and has no explicit value. + // + // 0 if not expr; numArgs if is expr + unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf); + + if (isExprNumArgs) { + unsigned Opcode = read_vbr_uint(Buf, EndBuf); + const Type* Typ = getType(TypeID); + + // FIXME: Encoding of constant exprs could be much more compact! + std::vector<std::pair<const Type*,unsigned> > ArgVec; + ArgVec.reserve(isExprNumArgs); + + // Read the slot number and types of each of the arguments + for (unsigned i = 0; i != isExprNumArgs; ++i) { + unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf); + unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf); + BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot) + << "' slot: " << ArgValSlot << "\n"); + + // Get the arg value from its slot if it exists, otherwise a placeholder + ArgVec.push_back(std::make_pair(getType(ArgTypeSlot), ArgValSlot)); + } + + handler->handleConstantExpression( Opcode, Typ, ArgVec ); + return; + } + + // Ok, not an ConstantExpr. We now know how to read the given type... + const Type *Ty = getType(TypeID); + switch (Ty->getPrimitiveID()) { + case Type::BoolTyID: { + unsigned Val = read_vbr_uint(Buf, EndBuf); + if (Val != 0 && Val != 1) + PARSE_ERROR("Invalid boolean value read."); + + handler->handleConstantValue( ConstantBool::get(Val == 1)); + break; + } + + case Type::UByteTyID: // Unsigned integer types... + case Type::UShortTyID: + case Type::UIntTyID: { + unsigned Val = read_vbr_uint(Buf, EndBuf); + if (!ConstantUInt::isValueValidForType(Ty, Val)) + throw std::string("Invalid unsigned byte/short/int read."); + handler->handleConstantValue( ConstantUInt::get(Ty, Val) ); + break; + } + + case Type::ULongTyID: { + handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) ); + break; + } + + case Type::SByteTyID: // Signed integer types... + case Type::ShortTyID: + case Type::IntTyID: { + case Type::LongTyID: + int64_t Val = read_vbr_int64(Buf, EndBuf); + if (!ConstantSInt::isValueValidForType(Ty, Val)) + throw std::string("Invalid signed byte/short/int/long read."); + handler->handleConstantValue( ConstantSInt::get(Ty, Val) ); + break; + } + + case Type::FloatTyID: { + float F; + input_data(Buf, EndBuf, &F, &F+1); + handler->handleConstantValue( ConstantFP::get(Ty, F) ); + break; + } + + case Type::DoubleTyID: { + double Val; + input_data(Buf, EndBuf, &Val, &Val+1); + handler->handleConstantValue( ConstantFP::get(Ty, Val) ); + break; + } + + case Type::TypeTyID: + PARSE_ERROR("Type constants shouldn't live in constant table!"); + break; + + case Type::ArrayTyID: { + const ArrayType *AT = cast<ArrayType>(Ty); + unsigned NumElements = AT->getNumElements(); + std::vector<unsigned> Elements; + Elements.reserve(NumElements); + while (NumElements--) // Read all of the elements of the constant. + Elements.push_back(read_vbr_uint(Buf, EndBuf)); + + handler->handleConstantArray( AT, Elements ); + break; + } + + case Type::StructTyID: { + const StructType *ST = cast<StructType>(Ty); + std::vector<unsigned> Elements; + Elements.reserve(ST->getNumElements()); + for (unsigned i = 0; i != ST->getNumElements(); ++i) + Elements.push_back(read_vbr_uint(Buf, EndBuf)); + + handler->handleConstantStruct( ST, Elements ); + } + + case Type::PointerTyID: { // ConstantPointerRef value... + const PointerType *PT = cast<PointerType>(Ty); + unsigned Slot = read_vbr_uint(Buf, EndBuf); + handler->handleConstantPointer( PT, Slot ); + } + + default: + PARSE_ERROR("Don't know how to deserialize constant value of type '"+ + Ty->getDescription()); + } +} + +void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf, + const unsigned char *EndBuf) { + ParseConstantPool(Buf, EndBuf, ModuleTypes); +} + +void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, + const unsigned char *EndBuf, + unsigned NumEntries ){ + for (; NumEntries; --NumEntries) { + unsigned Typ = read_vbr_uint(Buf, EndBuf); + const Type *Ty = getType(Typ); + if (!isa<ArrayType>(Ty)) + throw std::string("String constant data invalid!"); + + const ArrayType *ATy = cast<ArrayType>(Ty); + if (ATy->getElementType() != Type::SByteTy && + ATy->getElementType() != Type::UByteTy) + throw std::string("String constant data invalid!"); + + // Read character data. The type tells us how long the string is. + char Data[ATy->getNumElements()]; + input_data(Buf, EndBuf, Data, Data+ATy->getNumElements()); + + std::vector<Constant*> Elements(ATy->getNumElements()); + if (ATy->getElementType() == Type::SByteTy) + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]); + else + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]); + + // Create the constant, inserting it as needed. + ConstantArray *C = cast<ConstantArray>( ConstantArray::get(ATy, Elements) ); + handler->handleConstantString( C ); + } +} + + +void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf, + const unsigned char *EndBuf, + TypeListTy &TypeTab) { + while (Buf < EndBuf) { + unsigned NumEntries = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(Buf, EndBuf); + if (Typ == Type::TypeTyID) { + ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries); + } else if (Typ == Type::VoidTyID) { + ParseStringConstants(Buf, EndBuf, NumEntries); + } else { + BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: " + << NumEntries << "\n"); + + for (unsigned i = 0; i < NumEntries; ++i) { + ParseConstantValue(Buf, EndBuf, Typ); + } + } + } + + if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer."); +} + +void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { + + handler->handleModuleGlobalsBegin(); + + // Read global variables... + unsigned VarType = read_vbr_uint(Buf, End); + while (VarType != Type::VoidTyID) { // List is terminated by Void + // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 = + // Linkage, bit4+ = slot# + unsigned SlotNo = VarType >> 5; + unsigned LinkageID = (VarType >> 2) & 7; + bool isConstant = VarType & 1; + bool hasInitializer = VarType & 2; + GlobalValue::LinkageTypes Linkage; + + switch (LinkageID) { + case 0: Linkage = GlobalValue::ExternalLinkage; break; + case 1: Linkage = GlobalValue::WeakLinkage; break; + case 2: Linkage = GlobalValue::AppendingLinkage; break; + case 3: Linkage = GlobalValue::InternalLinkage; break; + case 4: Linkage = GlobalValue::LinkOnceLinkage; break; + default: + PARSE_ERROR("Unknown linkage type: " << LinkageID); + Linkage = GlobalValue::InternalLinkage; + break; + } + + const Type *Ty = getType(SlotNo); + if ( !Ty ) { + PARSE_ERROR("Global has no type! SlotNo=" << SlotNo); + } + + if ( !isa<PointerType>(Ty)) { + PARSE_ERROR("Global not a pointer type! Ty= " << Ty->getDescription()); + } + + const Type *ElTy = cast<PointerType>(Ty)->getElementType(); + + // Create the global variable... + if (hasInitializer) + handler->handleGlobalVariable( ElTy, isConstant, Linkage ); + else { + unsigned initSlot = read_vbr_uint(Buf,End); + handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot ); + } + + // Get next item + VarType = read_vbr_uint(Buf, End); + } + + // Read the function objects for all of the functions that are coming + unsigned FnSignature = read_vbr_uint(Buf, End); + while (FnSignature != Type::VoidTyID) { // List is terminated by Void + const Type *Ty = getType(FnSignature); + if (!isa<PointerType>(Ty) || + !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) { + PARSE_ERROR( "Function not a pointer to function type! Ty = " + + Ty->getDescription()); + // FIXME: what should Ty be if handler continues? + } + + // We create functions by passing the underlying FunctionType to create... + Ty = cast<PointerType>(Ty)->getElementType(); + + // Save this for later so we know type of lazily instantiated functions + FunctionSignatureList.push_back(Ty); + + handler->handleFunctionDeclaration(Ty); + + // Get Next function signature + FnSignature = read_vbr_uint(Buf, End); + } + + if (hasInconsistentModuleGlobalInfo) + align32(Buf, End); + + // This is for future proofing... in the future extra fields may be added that + // we don't understand, so we transparently ignore them. + // + Buf = End; + + handler->handleModuleGlobalsEnd(); +} + +void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) { + unsigned Version = read_vbr_uint(Buf, EndBuf); + + // Unpack version number: low four bits are for flags, top bits = version + Module::Endianness Endianness; + Module::PointerSize PointerSize; + Endianness = (Version & 1) ? Module::BigEndian : Module::LittleEndian; + PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32; + + bool hasNoEndianness = Version & 4; + bool hasNoPointerSize = Version & 8; + + RevisionNum = Version >> 4; + + // Default values for the current bytecode version + hasInconsistentModuleGlobalInfo = false; + hasExplicitPrimitiveZeros = false; + hasRestrictedGEPTypes = false; + + switch (RevisionNum) { + case 0: // LLVM 1.0, 1.1 release version + // Base LLVM 1.0 bytecode format. + hasInconsistentModuleGlobalInfo = true; + hasExplicitPrimitiveZeros = true; + // FALL THROUGH + case 1: // LLVM 1.2 release version + // LLVM 1.2 added explicit support for emitting strings efficiently. + + // Also, it fixed the problem where the size of the ModuleGlobalInfo block + // included the size for the alignment at the end, where the rest of the + // blocks did not. + + // LLVM 1.2 and before required that GEP indices be ubyte constants for + // structures and longs for sequential types. + hasRestrictedGEPTypes = true; + + // FALL THROUGH + case 2: // LLVM 1.3 release version + break; + + default: + PARSE_ERROR("Unknown bytecode version number: " << RevisionNum); + } + + if (hasNoEndianness) Endianness = Module::AnyEndianness; + if (hasNoPointerSize) PointerSize = Module::AnyPointerSize; + + handler->handleVersionInfo(RevisionNum, Endianness, PointerSize ); +} + +void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) { + unsigned Type, Size; + readBlock(Buf, EndBuf, Type, Size); + if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) + // Hrm, not a class? + PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) << + ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf))); + + // Read into instance variables... + ParseVersionInfo(Buf, EndBuf); + align32(Buf, EndBuf); + + bool SeenModuleGlobalInfo = false; + bool SeenGlobalTypePlane = false; + while (Buf < EndBuf) { + BufPtr OldBuf = Buf; + readBlock(Buf, EndBuf, Type, Size); + + switch (Type) { + + case BytecodeFormat::GlobalTypePlane: + if ( SeenGlobalTypePlane ) + PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!"); + + ParseGlobalTypes(Buf, Buf+Size); + SeenGlobalTypePlane = true; + break; + + case BytecodeFormat::ModuleGlobalInfo: + if ( SeenModuleGlobalInfo ) + PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!"); + ParseModuleGlobalInfo(Buf, Buf+Size); + SeenModuleGlobalInfo = true; + break; + + case BytecodeFormat::ConstantPool: + ParseConstantPool(Buf, Buf+Size, ModuleTypes); + break; + + case BytecodeFormat::Function: + ParseFunctionLazily(Buf, Buf+Size); + break; + + case BytecodeFormat::SymbolTable: + ParseSymbolTable(Buf, Buf+Size ); + break; + + default: + Buf += Size; + if (OldBuf > Buf) + { + PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" ); + } + break; + } + align32(Buf, EndBuf); + } +} + +void AbstractBytecodeParser::ParseBytecode( + BufPtr Buf, unsigned Length, + const std::string &ModuleID) { + + handler->handleStart(); + unsigned char *EndBuf = (unsigned char*)(Buf + Length); + + // Read and check signature... + unsigned Sig = read(Buf, EndBuf); + if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { + PARSE_ERROR("Invalid bytecode signature: " << Sig); + } + + handler->handleModuleBegin(ModuleID); + + this->ParseModule(Buf, EndBuf); + + handler->handleModuleEnd(ModuleID); + + handler->handleFinish(); +} + +// vim: sw=2 |