summaryrefslogtreecommitdiff
path: root/tools/llvm-mc/AsmLexer.cpp
blob: 27454ee922cca33940163d2552aa86493b470d20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class implements the lexer for assembly files.
//
//===----------------------------------------------------------------------===//

#include "AsmLexer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Config/config.h"  // for strtoull.
#include <cerrno>
#include <cstdio>
#include <cstdlib>
using namespace llvm;

AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM)  {
  CurBuffer = 0;
  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
  CurPtr = CurBuf->getBufferStart();
  TokStart = 0;
}

AsmLexer::~AsmLexer() {
}

SMLoc AsmLexer::getLoc() const {
  return SMLoc::getFromPointer(TokStart);
}

void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg, 
                            const char *Type) const {
  SrcMgr.PrintMessage(Loc, Msg, Type);
}

/// ReturnError - Set the error to the specified string at the specified
/// location.  This is defined to always return AsmToken::Error.
AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
  SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
  return AsmToken(AsmToken::Error, StringRef(Loc, 0));
}

/// EnterIncludeFile - Enter the specified file.  This prints an error and
/// returns true on failure.
bool AsmLexer::EnterIncludeFile(const std::string &Filename) {
  int NewBuf = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr));
  if (NewBuf == -1)
    return true;
  
  // Save the line number and lex buffer of the includer.
  CurBuffer = NewBuf;
  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
  CurPtr = CurBuf->getBufferStart();
  return false;
}


int AsmLexer::getNextChar() {
  char CurChar = *CurPtr++;
  switch (CurChar) {
  default:
    return (unsigned char)CurChar;
  case 0: {
    // A nul character in the stream is either the end of the current buffer or
    // a random nul in the file.  Disambiguate that here.
    if (CurPtr-1 != CurBuf->getBufferEnd())
      return 0;  // Just whitespace.
    
    // If this is the end of an included file, pop the parent file off the
    // include stack.
    SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
    if (ParentIncludeLoc != SMLoc()) {
      CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
      CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
      CurPtr = ParentIncludeLoc.getPointer();
      
      // Reset the token start pointer to the start of the new file.
      TokStart = CurPtr;
      
      return getNextChar();
    }
    
    // Otherwise, return end of file.
    --CurPtr;  // Another call to lex will return EOF again.  
    return EOF;
  }
  }
}

/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
AsmToken AsmLexer::LexIdentifier() {
  while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
         *CurPtr == '.' || *CurPtr == '@')
    ++CurPtr;
  return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
}

/// LexPercent: Register: %[a-zA-Z0-9]+
AsmToken AsmLexer::LexPercent() {
  if (!isalnum(*CurPtr))
    return AsmToken(AsmToken::Percent, StringRef(CurPtr, 1));  // Single %.
  
  while (isalnum(*CurPtr))
    ++CurPtr;
  
  return AsmToken(AsmToken::Register, StringRef(TokStart, CurPtr - TokStart));
}

/// LexSlash: Slash: /
///           C-Style Comment: /* ... */
AsmToken AsmLexer::LexSlash() {
  switch (*CurPtr) {
  case '*': break; // C style comment.
  case '/': return ++CurPtr, LexLineComment();
  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1));
  }

  // C Style comment.
  ++CurPtr;  // skip the star.
  while (1) {
    int CurChar = getNextChar();
    switch (CurChar) {
    case EOF:
      return ReturnError(TokStart, "unterminated comment");
    case '*':
      // End of the comment?
      if (CurPtr[0] != '/') break;
      
      ++CurPtr;   // End the */.
      return LexToken();
    }
  }
}

/// LexLineComment: Comment: #[^\n]*
///                        : //[^\n]*
AsmToken AsmLexer::LexLineComment() {
  // FIXME: This is broken if we happen to a comment at the end of a file, which
  // was .included, and which doesn't end with a newline.
  int CurChar = getNextChar();
  while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
    CurChar = getNextChar();
  
  if (CurChar == EOF)
    return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
  return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
}


/// LexDigit: First character is [0-9].
///   Local Label: [0-9][:]
///   Forward/Backward Label: [0-9][fb]
///   Binary integer: 0b[01]+
///   Octal integer: 0[0-7]+
///   Hex integer: 0x[0-9a-fA-F]+
///   Decimal integer: [1-9][0-9]*
/// TODO: FP literal.
AsmToken AsmLexer::LexDigit() {
  if (*CurPtr == ':')
    return ReturnError(TokStart, "FIXME: local label not implemented");
  if (*CurPtr == 'f' || *CurPtr == 'b')
    return ReturnError(TokStart, "FIXME: directional label not implemented");
  
  // Decimal integer: [1-9][0-9]*
  if (CurPtr[-1] != '0') {
    while (isdigit(*CurPtr))
      ++CurPtr;
    return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), 
                    strtoll(TokStart, 0, 10));
  }
  
  if (*CurPtr == 'b') {
    ++CurPtr;
    const char *NumStart = CurPtr;
    while (CurPtr[0] == '0' || CurPtr[0] == '1')
      ++CurPtr;
    
    // Requires at least one binary digit.
    if (CurPtr == NumStart)
      return ReturnError(CurPtr-2, "Invalid binary number");
    return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
                    strtoll(NumStart, 0, 2));
  }
 
  if (*CurPtr == 'x') {
    ++CurPtr;
    const char *NumStart = CurPtr;
    while (isxdigit(CurPtr[0]))
      ++CurPtr;
    
    // Requires at least one hex digit.
    if (CurPtr == NumStart)
      return ReturnError(CurPtr-2, "Invalid hexadecimal number");
    
    errno = 0;
    if (errno == EINVAL)
      return ReturnError(CurPtr-2, "Invalid hexadecimal number");
    if (errno == ERANGE) {
      errno = 0;
      if (errno == EINVAL)
        return ReturnError(CurPtr-2, "Invalid hexadecimal number");
      if (errno == ERANGE)
        return ReturnError(CurPtr-2, "Hexadecimal number out of range");
    }
    return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
                    (int64_t) strtoull(NumStart, 0, 16));
  }
  
  // Must be an octal number, it starts with 0.
  while (*CurPtr >= '0' && *CurPtr <= '7')
    ++CurPtr;
  return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
                  strtoll(TokStart, 0, 8));
}

/// LexQuote: String: "..."
AsmToken AsmLexer::LexQuote() {
  int CurChar = getNextChar();
  // TODO: does gas allow multiline string constants?
  while (CurChar != '"') {
    if (CurChar == '\\') {
      // Allow \", etc.
      CurChar = getNextChar();
    }
    
    if (CurChar == EOF)
      return ReturnError(TokStart, "unterminated string constant");

    CurChar = getNextChar();
  }
  
  return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
}

StringRef AsmLexer::LexUntilEndOfStatement() {
  TokStart = CurPtr;

  while (*CurPtr != '#' &&  // Start of line comment.
         *CurPtr != ';' &&  // End of statement marker.
         *CurPtr != '\n' &&
         *CurPtr != '\r' &&
         (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd()))
    ++CurPtr;
  return StringRef(TokStart, CurPtr-TokStart);
}

AsmToken AsmLexer::LexToken() {
  TokStart = CurPtr;
  // This always consumes at least one character.
  int CurChar = getNextChar();
  
  switch (CurChar) {
  default:
    // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
    if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
      return LexIdentifier();
    
    // Unknown character, emit an error.
    return ReturnError(TokStart, "invalid character in input");
  case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
  case 0:
  case ' ':
  case '\t':
    // Ignore whitespace.
    return LexToken();
  case '\n': // FALL THROUGH.
  case '\r': // FALL THROUGH.
  case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
  case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
  case '=': 
    if (*CurPtr == '=')
      return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
    return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
  case '|': 
    if (*CurPtr == '|')
      return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
    return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
  case '&': 
    if (*CurPtr == '&')
      return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
    return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
  case '!': 
    if (*CurPtr == '=')
      return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
    return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
  case '%': return LexPercent();
  case '/': return LexSlash();
  case '#': return LexLineComment();
  case '"': return LexQuote();
  case '0': case '1': case '2': case '3': case '4':
  case '5': case '6': case '7': case '8': case '9':
    return LexDigit();
  case '<':
    switch (*CurPtr) {
    case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, 
                                        StringRef(TokStart, 2));
    case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, 
                                        StringRef(TokStart, 2));
    case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, 
                                        StringRef(TokStart, 2));
    default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
    }
  case '>':
    switch (*CurPtr) {
    case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, 
                                        StringRef(TokStart, 2));
    case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, 
                                        StringRef(TokStart, 2));
    default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
    }
      
  // TODO: Quoted identifiers (objc methods etc)
  // local labels: [0-9][:]
  // Forward/backward labels: [0-9][fb]
  // Integers, fp constants, character constants.
  }
}