summaryrefslogtreecommitdiff
path: root/tools/llvm-mcmarkup/llvm-mcmarkup.cpp
blob: 7eb14d22a9dbb1c45988a5a9285cefd904ce33ee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
//===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Example simple parser implementation for the MC assembly markup language.
//
//===----------------------------------------------------------------------===//

#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <system_error>
using namespace llvm;

static cl::list<std::string>
       InputFilenames(cl::Positional, cl::desc("<input files>"),
                      cl::ZeroOrMore);
static cl::opt<bool>
DumpTags("dump-tags", cl::desc("List all tags encountered in input"));

static StringRef ToolName;

/// Trivial lexer for the markup parser. Input is always handled a character
/// at a time. The lexer just encapsulates EOF and lookahead handling.
class MarkupLexer {
  StringRef::const_iterator Start;
  StringRef::const_iterator CurPtr;
  StringRef::const_iterator End;
public:
  MarkupLexer(StringRef Source)
    : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {}
  // When processing non-markup, input is consumed a character at a time.
  bool isEOF() { return CurPtr == End; }
  int getNextChar() {
    if (CurPtr == End) return EOF;
    return *CurPtr++;
  }
  int peekNextChar() {
    if (CurPtr == End) return EOF;
    return *CurPtr;
  }
  StringRef::const_iterator getPosition() const { return CurPtr; }
};

/// A markup tag is a name and a (usually empty) list of modifiers.
class MarkupTag {
  StringRef Name;
  StringRef Modifiers;
  SMLoc StartLoc;
public:
  MarkupTag(StringRef n, StringRef m, SMLoc Loc)
    : Name(n), Modifiers(m), StartLoc(Loc) {}
  StringRef getName() const { return Name; }
  StringRef getModifiers() const { return Modifiers; }
  SMLoc getLoc() const { return StartLoc; }
};

/// A simple parser implementation for creating MarkupTags from input text.
class MarkupParser {
  MarkupLexer &Lex;
  SourceMgr &SM;
public:
  MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {}
  /// Create a MarkupTag from the current position in the MarkupLexer.
  /// The parseTag() method should be called when the lexer has processed
  /// the opening '<' character. Input will be consumed up to and including
  /// the ':' which terminates the tag open.
  MarkupTag parseTag();
  /// Issue a diagnostic and terminate program execution.
  void FatalError(SMLoc Loc, StringRef Msg);
};

void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) {
  SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg);
  exit(1);
}

// Example handler for when a tag is recognized.
static void processStartTag(MarkupTag &Tag) {
  // If we're just printing the tags, do that, otherwise do some simple
  // colorization.
  if (DumpTags) {
    outs() << Tag.getName();
    if (Tag.getModifiers().size())
      outs() << " " << Tag.getModifiers();
    outs() << "\n";
    return;
  }

  if (!outs().has_colors())
    return;
  // Color registers as red and immediates as cyan. Those don't have nested
  // tags, so don't bother keeping a stack of colors to reset to.
  if (Tag.getName() == "reg")
    outs().changeColor(raw_ostream::RED);
  else if (Tag.getName() == "imm")
    outs().changeColor(raw_ostream::CYAN);
}

// Example handler for when the end of a tag is recognized.
static void processEndTag(MarkupTag &Tag) {
  // If we're printing the tags, there's nothing more to do here. Otherwise,
  // set the color back the normal.
  if (DumpTags)
    return;
  if (!outs().has_colors())
    return;
  // Just reset to basic white.
  outs().changeColor(raw_ostream::WHITE, false);
}

MarkupTag MarkupParser::parseTag() {
  // First off, extract the tag into it's own StringRef so we can look at it
  // outside of the context of consuming input.
  StringRef::const_iterator Start = Lex.getPosition();
  SMLoc Loc = SMLoc::getFromPointer(Start - 1);
  while(Lex.getNextChar() != ':') {
    // EOF is an error.
    if (Lex.isEOF())
      FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag");
  }
  StringRef RawTag(Start, Lex.getPosition() - Start - 1);
  std::pair<StringRef, StringRef> SplitTag = RawTag.split(' ');
  return MarkupTag(SplitTag.first, SplitTag.second, Loc);
}

static void parseMCMarkup(StringRef Filename) {
  std::unique_ptr<MemoryBuffer> BufferPtr;
  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, BufferPtr)) {
    errs() << ToolName << ": " << ec.message() << '\n';
    return;
  }
  MemoryBuffer *Buffer = BufferPtr.release();

  SourceMgr SrcMgr;

  // Tell SrcMgr about this buffer, which is what the parser will pick up.
  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());

  StringRef InputSource = Buffer->getBuffer();
  MarkupLexer Lex(InputSource);
  MarkupParser Parser(Lex, SrcMgr);

  SmallVector<MarkupTag, 4> TagStack;

  for (int CurChar = Lex.getNextChar();
       CurChar != EOF;
       CurChar = Lex.getNextChar()) {
    switch (CurChar) {
    case '<': {
      // A "<<" is output as a literal '<' and does not start a markup tag.
      if (Lex.peekNextChar() == '<') {
        (void)Lex.getNextChar();
        break;
      }
      // Parse the markup entry.
      TagStack.push_back(Parser.parseTag());

      // Do any special handling for the start of a tag.
      processStartTag(TagStack.back());
      continue;
    }
    case '>': {
      SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1);
      // A ">>" is output as a literal '>' and does not end a markup tag.
      if (Lex.peekNextChar() == '>') {
        (void)Lex.getNextChar();
        break;
      }
      // Close out the innermost tag.
      if (TagStack.empty())
        Parser.FatalError(Loc, "'>' without matching '<'");

      // Do any special handling for the end of a tag.
      processEndTag(TagStack.back());

      TagStack.pop_back();
      continue;
    }
    default:
      break;
    }
    // For anything else, just echo the character back out.
    if (!DumpTags && CurChar != EOF)
      outs() << (char)CurChar;
  }

  // If there are any unterminated markup tags, issue diagnostics for them.
  while (!TagStack.empty()) {
    MarkupTag &Tag = TagStack.back();
    SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error,
                        "unterminated markup tag");
    TagStack.pop_back();
  }
}

int main(int argc, char **argv) {
  // Print a stack trace if we signal out.
  sys::PrintStackTraceOnErrorSignal();
  PrettyStackTraceProgram X(argc, argv);

  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
  cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n");

  ToolName = argv[0];

  // If no input files specified, read from stdin.
  if (InputFilenames.size() == 0)
    InputFilenames.push_back("-");

  std::for_each(InputFilenames.begin(), InputFilenames.end(),
                parseMCMarkup);
  return 0;
}