From c8a1169c935ad9d3dfbdd4f72d80abf8f5acb03c Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Fri, 27 Jun 2014 18:19:56 +0000 Subject: IR: Add COMDATs to the IR This new IR facility allows us to represent the object-file semantic of a COMDAT group. COMDATs allow us to tie together sections and make the inclusion of one dependent on another. This is required to implement features like MS ABI VFTables and optimizing away certain kinds of initialization in C++. This functionality is only representable in COFF and ELF, Mach-O has no similar mechanism. Differential Revision: http://reviews.llvm.org/D4178 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211920 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLLexer.cpp | 54 +++++++++++++++++++++---- lib/AsmParser/LLLexer.h | 1 + lib/AsmParser/LLParser.cpp | 98 +++++++++++++++++++++++++++++++++++++++++++++- lib/AsmParser/LLParser.h | 10 +++++ lib/AsmParser/LLToken.h | 10 +++++ 5 files changed, 165 insertions(+), 8 deletions(-) (limited to 'lib/AsmParser') diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 1334825a7d..1e5bcdd930 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -209,6 +209,7 @@ lltok::Kind LLLexer::LexToken() { return LexToken(); case '+': return LexPositive(); case '@': return LexAt(); + case '$': return LexDollar(); case '%': return LexPercent(); case '"': return LexQuote(); case '.': @@ -222,13 +223,6 @@ lltok::Kind LLLexer::LexToken() { return lltok::dotdotdot; } return lltok::Error; - case '$': - if (const char *Ptr = isLabelTail(CurPtr)) { - CurPtr = Ptr; - StrVal.assign(TokStart, CurPtr-1); - return lltok::LabelStr; - } - return lltok::Error; case ';': SkipLineComment(); return LexToken(); @@ -307,6 +301,43 @@ lltok::Kind LLLexer::LexAt() { return lltok::Error; } +lltok::Kind LLLexer::LexDollar() { + if (const char *Ptr = isLabelTail(TokStart)) { + CurPtr = Ptr; + StrVal.assign(TokStart, CurPtr - 1); + return lltok::LabelStr; + } + + // Handle DollarStringConstant: $\"[^\"]*\" + if (CurPtr[0] == '"') { + ++CurPtr; + + while (1) { + int CurChar = getNextChar(); + + if (CurChar == EOF) { + Error("end of file in COMDAT variable name"); + return lltok::Error; + } + if (CurChar == '"') { + StrVal.assign(TokStart + 2, CurPtr - 1); + UnEscapeLexed(StrVal); + if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { + Error("Null bytes are not allowed in names"); + return lltok::Error; + } + return lltok::ComdatVar; + } + } + } + + // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]* + if (ReadVarName()) + return lltok::ComdatVar; + + return lltok::Error; +} + /// ReadString - Read a string until the closing quote. lltok::Kind LLLexer::ReadString(lltok::Kind kind) { const char *Start = CurPtr; @@ -618,6 +649,15 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(type); KEYWORD(opaque); + KEYWORD(comdat); + + // Comdat types + KEYWORD(any); + KEYWORD(exactmatch); + KEYWORD(largest); + KEYWORD(noduplicates); + KEYWORD(samesize); + KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h index ad11d49b25..d42de57a3d 100644 --- a/lib/AsmParser/LLLexer.h +++ b/lib/AsmParser/LLLexer.h @@ -81,6 +81,7 @@ namespace llvm { lltok::Kind LexDigitOrNegative(); lltok::Kind LexPositive(); lltok::Kind LexAt(); + lltok::Kind LexDollar(); lltok::Kind LexExclaim(); lltok::Kind LexPercent(); lltok::Kind LexQuote(); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index f444206852..be55ac6481 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -163,6 +163,11 @@ bool LLParser::ValidateEndOfModule() { return Error(I->second.second, "use of undefined type named '" + I->getKey() + "'"); + if (!ForwardRefComdats.empty()) + return Error(ForwardRefComdats.begin()->second, + "use of undefined comdat '$" + + ForwardRefComdats.begin()->first + "'"); + if (!ForwardRefVals.empty()) return Error(ForwardRefVals.begin()->second.second, "use of undefined value '@" + ForwardRefVals.begin()->first + @@ -238,6 +243,7 @@ bool LLParser::ParseTopLevelEntities() { case lltok::LocalVar: if (ParseNamedType()) return true; break; case lltok::GlobalID: if (ParseUnnamedGlobal()) return true; break; case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break; + case lltok::ComdatVar: if (parseComdat()) return true; break; case lltok::exclaim: if (ParseStandaloneMetadata()) return true; break; case lltok::MetadataVar:if (ParseNamedMetadata()) return true; break; @@ -513,6 +519,56 @@ bool LLParser::ParseNamedGlobal() { UnnamedAddr); } +bool LLParser::parseComdat() { + assert(Lex.getKind() == lltok::ComdatVar); + std::string Name = Lex.getStrVal(); + LocTy NameLoc = Lex.getLoc(); + Lex.Lex(); + + if (ParseToken(lltok::equal, "expected '=' here")) + return true; + + if (ParseToken(lltok::kw_comdat, "expected comdat keyword")) + return TokError("expected comdat type"); + + Comdat::SelectionKind SK; + switch (Lex.getKind()) { + default: + return TokError("unknown selection kind"); + case lltok::kw_any: + SK = Comdat::Any; + break; + case lltok::kw_exactmatch: + SK = Comdat::ExactMatch; + break; + case lltok::kw_largest: + SK = Comdat::Largest; + break; + case lltok::kw_noduplicates: + SK = Comdat::NoDuplicates; + break; + case lltok::kw_samesize: + SK = Comdat::SameSize; + break; + } + Lex.Lex(); + + // See if the comdat was forward referenced, if so, use the comdat. + Module::ComdatSymTabType &ComdatSymTab = M->getComdatSymbolTable(); + Module::ComdatSymTabType::iterator I = ComdatSymTab.find(Name); + if (I != ComdatSymTab.end() && !ForwardRefComdats.erase(Name)) + return Error(NameLoc, "redefinition of comdat '$" + Name + "'"); + + Comdat *C; + if (I != ComdatSymTab.end()) + C = &I->second; + else + C = M->getOrInsertComdat(Name); + C->setSelectionKind(SK); + + return false; +} + // MDString: // ::= '!' STRINGCONSTANT bool LLParser::ParseMDString(MDString *&Result) { @@ -838,7 +894,13 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, if (ParseOptionalAlignment(Alignment)) return true; GV->setAlignment(Alignment); } else { - TokError("unknown global variable property!"); + Comdat *C; + if (parseOptionalComdat(C)) + return true; + if (C) + GV->setComdat(C); + else + return TokError("unknown global variable property!"); } } @@ -1096,6 +1158,24 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) { } +//===----------------------------------------------------------------------===// +// Comdat Reference/Resolution Routines. +//===----------------------------------------------------------------------===// + +Comdat *LLParser::getComdat(const std::string &Name, LocTy Loc) { + // Look this name up in the comdat symbol table. + Module::ComdatSymTabType &ComdatSymTab = M->getComdatSymbolTable(); + Module::ComdatSymTabType::iterator I = ComdatSymTab.find(Name); + if (I != ComdatSymTab.end()) + return &I->second; + + // Otherwise, create a new forward reference for this value and remember it. + Comdat *C = M->getOrInsertComdat(Name); + ForwardRefComdats[Name] = Loc; + return C; +} + + //===----------------------------------------------------------------------===// // Helper Routines. //===----------------------------------------------------------------------===// @@ -2790,6 +2870,19 @@ bool LLParser::ParseGlobalTypeAndValue(Constant *&V) { ParseGlobalValue(Ty, V); } +bool LLParser::parseOptionalComdat(Comdat *&C) { + C = nullptr; + if (!EatIfPresent(lltok::kw_comdat)) + return false; + if (Lex.getKind() != lltok::ComdatVar) + return TokError("expected comdat variable"); + LocTy Loc = Lex.getLoc(); + StringRef Name = Lex.getStrVal(); + C = getComdat(Name, Loc); + Lex.Lex(); + return false; +} + /// ParseGlobalValueVector /// ::= /*empty*/ /// ::= TypeAndValue (',' TypeAndValue)* @@ -3090,6 +3183,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { bool UnnamedAddr; LocTy UnnamedAddrLoc; Constant *Prefix = nullptr; + Comdat *C; if (ParseArgumentList(ArgList, isVarArg) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, @@ -3098,6 +3192,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { BuiltinLoc) || (EatIfPresent(lltok::kw_section) && ParseStringConstant(Section)) || + parseOptionalComdat(C) || ParseOptionalAlignment(Alignment) || (EatIfPresent(lltok::kw_gc) && ParseStringConstant(GC)) || @@ -3200,6 +3295,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { Fn->setUnnamedAddr(UnnamedAddr); Fn->setAlignment(Alignment); Fn->setSection(Section); + Fn->setComdat(C); if (!GC.empty()) Fn->setGC(GC.c_str()); Fn->setPrefixData(Prefix); ForwardRefAttrGroups[Fn] = FwdRefAttrGrps; diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index f7d69d267d..2efb260d66 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -34,6 +34,7 @@ namespace llvm { class Instruction; class Constant; class GlobalValue; + class Comdat; class MDString; class MDNode; class StructType; @@ -122,6 +123,9 @@ namespace llvm { std::map > ForwardRefValIDs; std::vector NumberedVals; + // Comdat forward reference information. + std::map ForwardRefComdats; + // References to blockaddress. The key is the function ValID, the value is // a list of references to blocks in that function. std::map > > @@ -154,6 +158,10 @@ namespace llvm { GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc); GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc); + /// Get a Comdat with the specified name, creating a forward reference + /// record if needed. + Comdat *getComdat(const std::string &N, LocTy Loc); + // Helper Routines. bool ParseToken(lltok::Kind T, const char *ErrMsg); bool EatIfPresent(lltok::Kind T) { @@ -247,6 +255,7 @@ namespace llvm { bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility, unsigned DLLStorageClass, GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr); + bool parseComdat(); bool ParseStandaloneMetadata(); bool ParseNamedMetadata(); bool ParseMDString(MDString *&Result); @@ -358,6 +367,7 @@ namespace llvm { bool ParseGlobalValue(Type *Ty, Constant *&V); bool ParseGlobalTypeAndValue(Constant *&V); bool ParseGlobalValueVector(SmallVectorImpl &Elts); + bool parseOptionalComdat(Comdat *&C); bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS); bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS); bool ParseMDNodeVector(SmallVectorImpl &, PerFunctionState *PFS); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index af8b0da78b..534d82415f 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -142,6 +142,15 @@ namespace lltok { kw_type, kw_opaque, + kw_comdat, + + // Comdat types + kw_any, + kw_exactmatch, + kw_largest, + kw_noduplicates, + kw_samesize, + kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule, kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno, kw_ueq, kw_une, @@ -180,6 +189,7 @@ namespace lltok { // String valued tokens (StrVal). LabelStr, // foo: GlobalVar, // @foo @"foo" + ComdatVar, // $foo LocalVar, // %foo %"foo" MetadataVar, // !foo StringConstant, // "foo" -- cgit v1.2.3