From 7dd4dc88921421cd2a1e6c1711689d5993106767 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 31 Oct 2012 23:24:13 +0000 Subject: MC: Simple example parser for MC assembly markup. Nothing fancy, just a simple demonstration parser. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167181 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/CMakeLists.txt | 1 + tools/LLVMBuild.txt | 2 +- tools/Makefile | 2 +- tools/llvm-mcmarkup/CMakeLists.txt | 5 + tools/llvm-mcmarkup/LLVMBuild.txt | 22 ++++ tools/llvm-mcmarkup/Makefile | 17 +++ tools/llvm-mcmarkup/llvm-mcmarkup.cpp | 225 ++++++++++++++++++++++++++++++++++ 7 files changed, 272 insertions(+), 2 deletions(-) create mode 100644 tools/llvm-mcmarkup/CMakeLists.txt create mode 100644 tools/llvm-mcmarkup/LLVMBuild.txt create mode 100644 tools/llvm-mcmarkup/Makefile create mode 100644 tools/llvm-mcmarkup/llvm-mcmarkup.cpp (limited to 'tools') diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 1bfc2fe3e8..6918285622 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -36,6 +36,7 @@ add_subdirectory(bugpoint) add_subdirectory(bugpoint-passes) add_subdirectory(llvm-bcanalyzer) add_subdirectory(llvm-stress) +add_subdirectory(llvm-mcmarkup) if( NOT WIN32 ) add_subdirectory(lto) diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt index df4aa9ff4e..64164792a7 100644 --- a/tools/LLVMBuild.txt +++ b/tools/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt +subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup [component_0] type = Group diff --git a/tools/Makefile b/tools/Makefile index 2b4b9b7878..a29e49f0a1 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -34,7 +34,7 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \ bugpoint llvm-bcanalyzer \ llvm-diff macho-dump llvm-objdump llvm-readobj \ llvm-rtdyld llvm-dwarfdump llvm-cov \ - llvm-size llvm-stress + llvm-size llvm-stress llvm-mcmarkup # Let users override the set of tools to build from the command line. ifdef ONLY_TOOLS diff --git a/tools/llvm-mcmarkup/CMakeLists.txt b/tools/llvm-mcmarkup/CMakeLists.txt new file mode 100644 index 0000000000..0a51e99f19 --- /dev/null +++ b/tools/llvm-mcmarkup/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS support) + +add_llvm_tool(llvm-mcmarkup + llvm-mcmarkup.cpp + ) diff --git a/tools/llvm-mcmarkup/LLVMBuild.txt b/tools/llvm-mcmarkup/LLVMBuild.txt new file mode 100644 index 0000000000..6423493a54 --- /dev/null +++ b/tools/llvm-mcmarkup/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./tools/llvm-mcmarkup/LLVMBuild.txt ----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-mcmarkup +parent = Tools +required_libraries = Support diff --git a/tools/llvm-mcmarkup/Makefile b/tools/llvm-mcmarkup/Makefile new file mode 100644 index 0000000000..5633a9c301 --- /dev/null +++ b/tools/llvm-mcmarkup/Makefile @@ -0,0 +1,17 @@ +##===- tools/llvm-mcmarkup/Makefile ------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../.. +TOOLNAME := llvm-mcmarkup +LINK_COMPONENTS := support + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS = 1 + +include $(LEVEL)/Makefile.common diff --git a/tools/llvm-mcmarkup/llvm-mcmarkup.cpp b/tools/llvm-mcmarkup/llvm-mcmarkup.cpp new file mode 100644 index 0000000000..888761f10f --- /dev/null +++ b/tools/llvm-mcmarkup/llvm-mcmarkup.cpp @@ -0,0 +1,225 @@ +//===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Example simple parser implementation for the MC assembly markup language. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +using namespace llvm; + +static cl::list + InputFilenames(cl::Positional, cl::desc(""), + cl::ZeroOrMore); +static cl::opt +DumpTags("dump-tags", cl::desc("List all tags encountered in input")); + +static StringRef ToolName; + +/// Trivial lexer for the markup parser. Input is always handled a character +/// at a time. The lexer just encapsulates EOF and lookahead handling. +class MarkupLexer { + StringRef::const_iterator Start; + StringRef::const_iterator CurPtr; + StringRef::const_iterator End; +public: + MarkupLexer(StringRef Source) + : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {} + // When processing non-markup, input is consumed a character at a time. + bool isEOF() { return CurPtr == End; } + int getNextChar() { + if (CurPtr == End) return EOF; + return *CurPtr++; + } + int peekNextChar() { + if (CurPtr == End) return EOF; + return *CurPtr; + } + StringRef::const_iterator getPosition() const { return CurPtr; } +}; + +/// A markup tag is a name and a (usually empty) list of modifiers. +class MarkupTag { + StringRef Name; + StringRef Modifiers; + SMLoc StartLoc; +public: + MarkupTag(StringRef n, StringRef m, SMLoc Loc) + : Name(n), Modifiers(m), StartLoc(Loc) {} + StringRef getName() const { return Name; } + StringRef getModifiers() const { return Modifiers; } + SMLoc getLoc() const { return StartLoc; } +}; + +/// A simple parser implementation for creating MarkupTags from input text. +class MarkupParser { + MarkupLexer &Lex; + SourceMgr &SM; +public: + MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {} + /// Create a MarkupTag from the current position in the MarkupLexer. + /// The parseTag() method should be called when the lexer has processed + /// the opening '<' character. Input will be consumed up to and including + /// the ':' which terminates the tag open. + MarkupTag parseTag(); + /// Issue a diagnostic and terminate program execution. + void FatalError(SMLoc Loc, StringRef Msg); +}; + +void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) { + SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg); + exit(1); +} + +// Example handler for when a tag is recognized. +static void processStartTag(MarkupTag &Tag) { + // If we're just printing the tags, do that, otherwise do some simple + // colorization. + if (DumpTags) { + outs() << Tag.getName(); + if (Tag.getModifiers().size()) + outs() << " " << Tag.getModifiers(); + outs() << "\n"; + return; + } + + if (!outs().has_colors()) + return; + // Color registers as red and immediates as cyan. Those don't have nested + // tags, so don't bother keeping a stack of colors to reset to. + if (Tag.getName() == "reg") + outs().changeColor(raw_ostream::RED); + else if (Tag.getName() == "imm") + outs().changeColor(raw_ostream::CYAN); +} + +// Example handler for when the end of a tag is recognized. +static void processEndTag(MarkupTag &Tag) { + // If we're printing the tags, there's nothing more to do here. Otherwise, + // set the color back the normal. + if (DumpTags) + return; + if (!outs().has_colors()) + return; + // Just reset to basic white. + outs().changeColor(raw_ostream::WHITE, false); +} + +MarkupTag MarkupParser::parseTag() { + // First off, extract the tag into it's own StringRef so we can look at it + // outside of the context of consuming input. + StringRef::const_iterator Start = Lex.getPosition(); + SMLoc Loc = SMLoc::getFromPointer(Start - 1); + while(Lex.getNextChar() != ':') { + // EOF is an error. + if (Lex.isEOF()) + FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag"); + } + StringRef RawTag(Start, Lex.getPosition() - Start - 1); + std::pair SplitTag = RawTag.split(' '); + return MarkupTag(SplitTag.first, SplitTag.second, Loc); +} + +static void parseMCMarkup(StringRef Filename) { + OwningPtr BufferPtr; + if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, BufferPtr)) { + errs() << ToolName << ": " << ec.message() << '\n'; + return; + } + MemoryBuffer *Buffer = BufferPtr.take(); + + SourceMgr SrcMgr; + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + + StringRef InputSource = Buffer->getBuffer(); + MarkupLexer Lex(InputSource); + MarkupParser Parser(Lex, SrcMgr); + + SmallVector TagStack; + + for (int CurChar = Lex.getNextChar(); + CurChar != EOF; + CurChar = Lex.getNextChar()) { + switch (CurChar) { + case '<': { + // A "<<" is output as a literal '<' and does not start a markup tag. + if (Lex.peekNextChar() == '<') { + (void)Lex.getNextChar(); + break; + } + // Parse the markup entry. + TagStack.push_back(Parser.parseTag()); + + // Do any special handling for the start of a tag. + processStartTag(TagStack.back()); + continue; + } + case '>': { + SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1); + // A ">>" is output as a literal '>' and does not end a markup tag. + if (Lex.peekNextChar() == '>') { + (void)Lex.getNextChar(); + break; + } + // Close out the innermost tag. + if (TagStack.empty()) + Parser.FatalError(Loc, "'>' without matching '<'"); + + // Do any special handling for the end of a tag. + processEndTag(TagStack.back()); + + TagStack.pop_back(); + continue; + } + default: + break; + } + // For anything else, just echo the character back out. + if (!DumpTags && CurChar != EOF) + outs() << (char)CurChar; + } + + // If there are any unterminated markup tags, issue diagnostics for them. + while (!TagStack.empty()) { + MarkupTag &Tag = TagStack.back(); + SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error, + "unterminated markup tag"); + TagStack.pop_back(); + } +} + +int main(int argc, char **argv) { + // Print a stack trace if we signal out. + sys::PrintStackTraceOnErrorSignal(); + PrettyStackTraceProgram X(argc, argv); + + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n"); + + ToolName = argv[0]; + + // If no input files specified, read from stdin. + if (InputFilenames.size() == 0) + InputFilenames.push_back("-"); + + std::for_each(InputFilenames.begin(), InputFilenames.end(), + parseMCMarkup); + return 0; +} -- cgit v1.2.3