summaryrefslogtreecommitdiff
path: root/utils/TableGen/TGLexer.cpp
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2007-11-18 02:57:27 +0000
committerChris Lattner <sabre@nondot.org>2007-11-18 02:57:27 +0000
commita8058744229a44e80f90c8530bb7fe47cbab1b70 (patch)
treea06234bf6fcaa882d47d7eb597cec3a91b3e2c67 /utils/TableGen/TGLexer.cpp
parentb348d18caf834bc26a80e60aa7a2c9b1748734e8 (diff)
downloadllvm-a8058744229a44e80f90c8530bb7fe47cbab1b70.tar.gz
llvm-a8058744229a44e80f90c8530bb7fe47cbab1b70.tar.bz2
llvm-a8058744229a44e80f90c8530bb7fe47cbab1b70.tar.xz
reimplement the tblgen lexer with a simple hand-written lexer. This eliminates
one dependency on flex and gets rid of two ".cvs" files. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44210 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'utils/TableGen/TGLexer.cpp')
-rw-r--r--utils/TableGen/TGLexer.cpp439
1 files changed, 439 insertions, 0 deletions
diff --git a/utils/TableGen/TGLexer.cpp b/utils/TableGen/TGLexer.cpp
new file mode 100644
index 0000000000..69d2dc4ced
--- /dev/null
+++ b/utils/TableGen/TGLexer.cpp
@@ -0,0 +1,439 @@
+//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Lexer for TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Record.h"
+#include "llvm/Support/Streams.h"
+#include "Record.h"
+#include "TGLexer.h"
+#include "llvm/Support/MemoryBuffer.h"
+typedef std::pair<llvm::Record*, std::vector<llvm::Init*>*> SubClassRefTy;
+#include "FileParser.h"
+#include <cctype>
+using namespace llvm;
+
+// FIXME: REMOVE THIS.
+#define YYEOF 0
+#define YYERROR -2
+
+TGLexer::TGLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) {
+ CurPtr = CurBuf->getBufferStart();
+}
+
+TGLexer::~TGLexer() {
+ while (!IncludeStack.empty()) {
+ delete IncludeStack.back().Buffer;
+ IncludeStack.pop_back();
+ }
+ delete CurBuf;
+}
+
+
+std::ostream &TGLexer::err() {
+ PrintIncludeStack(*cerr.stream());
+ return *cerr.stream();
+}
+
+
+void TGLexer::PrintIncludeStack(std::ostream &OS) {
+ for (unsigned i = 0, e = IncludeStack.size(); i != e; ++i)
+ OS << "Included from " << IncludeStack[i].Buffer->getBufferIdentifier()
+ << ":" << IncludeStack[i].LineNo << ":\n";
+ OS << "Parsing " << CurBuf->getBufferIdentifier() << ":"
+ << CurLineNo << ": ";
+}
+
+int TGLexer::getNextChar() {
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default:
+ return CurChar;
+ case 0:
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr-1 != CurBuf->getBufferEnd())
+ return 0; // Just whitespace.
+
+ // If this is the end of an included file, pop the parent file off the
+ // include stack.
+ if (!IncludeStack.empty()) {
+ delete CurBuf;
+ CurBuf = IncludeStack.back().Buffer;
+ CurLineNo = IncludeStack.back().LineNo;
+ CurPtr = IncludeStack.back().CurPtr;
+ IncludeStack.pop_back();
+ return getNextChar();
+ }
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
+ return EOF;
+ case '\n':
+ case '\r':
+ // Handle the newline character by ignoring it and incrementing the line
+ // count. However, be careful about 'dos style' files with \n\r in them.
+ // Only treat a \n\r or \r\n as a single line.
+ if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
+ *CurPtr != CurChar)
+ ++CurPtr; // Each the two char newline sequence.
+
+ ++CurLineNo;
+ return '\n';
+ }
+}
+
+int TGLexer::LexToken() {
+ // This always consumes at least one character.
+ int CurChar = getNextChar();
+
+ switch (CurChar) {
+ default:
+ // Handle letters: [a-zA-Z_]
+ if (isalpha(CurChar) || CurChar == '_')
+ return LexIdentifier();
+
+ // Unknown character, return the char itself.
+ return (unsigned char)CurChar;
+ case EOF: return YYEOF;
+ case 0:
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ // Ignore whitespace.
+ return LexToken();
+ case '/':
+ // If this is the start of a // comment, skip until the end of the line or
+ // the end of the buffer.
+ if (*CurPtr == '/')
+ SkipBCPLComment();
+ else if (*CurPtr == '*') {
+ if (SkipCComment())
+ return YYERROR;
+ } else // Otherwise, return this / as a token.
+ return CurChar;
+ return LexToken();
+ case '-': case '+':
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ return LexNumber();
+ case '"': return LexString();
+ case '$': return LexVarName();
+ case '[': return LexBracket();
+ case '!': return LexExclaim();
+ }
+}
+
+/// LexString - Lex "[^"]*"
+int TGLexer::LexString() {
+ const char *StrStart = CurPtr;
+
+ while (*CurPtr != '"') {
+ // If we hit the end of the buffer, report an error.
+ if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd()) {
+ TheError = "End of file in string literal";
+ return YYERROR;
+ } else if (*CurPtr == '\n' || *CurPtr == '\r') {
+ TheError = "End of line in string literal";
+ return YYERROR;
+ }
+
+ ++CurPtr;
+ }
+
+ Filelval.StrVal = new std::string(StrStart, CurPtr);
+ ++CurPtr;
+ return STRVAL;
+}
+
+int TGLexer::LexVarName() {
+ if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
+ return '$'; // Invalid varname.
+
+ // Otherwise, we're ok, consume the rest of the characters.
+ const char *VarNameStart = CurPtr++;
+
+ while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+ ++CurPtr;
+
+ Filelval.StrVal = new std::string(VarNameStart, CurPtr);
+ return VARNAME;
+}
+
+
+int TGLexer::LexIdentifier() {
+ // The first letter is [a-zA-Z_].
+ const char *IdentStart = CurPtr-1;
+
+ // Match the rest of the identifier regex: [0-9a-zA-Z_]*
+ while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+ ++CurPtr;
+
+ // Check to see if this identifier is a keyword.
+ unsigned Len = CurPtr-IdentStart;
+
+ if (Len == 3 && !memcmp(IdentStart, "int", 3)) return INT;
+ if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return BIT;
+ if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return BITS;
+ if (Len == 6 && !memcmp(IdentStart, "string", 6)) return STRING;
+ if (Len == 4 && !memcmp(IdentStart, "list", 4)) return LIST;
+ if (Len == 4 && !memcmp(IdentStart, "code", 4)) return CODE;
+ if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return DAG;
+
+ if (Len == 5 && !memcmp(IdentStart, "class", 5)) return CLASS;
+ if (Len == 3 && !memcmp(IdentStart, "def", 3)) return DEF;
+ if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return DEFM;
+ if (Len == 10 && !memcmp(IdentStart, "multiclass", 10)) return MULTICLASS;
+ if (Len == 5 && !memcmp(IdentStart, "field", 5)) return FIELD;
+ if (Len == 3 && !memcmp(IdentStart, "let", 3)) return LET;
+ if (Len == 2 && !memcmp(IdentStart, "in", 2)) return IN;
+
+ if (Len == 7 && !memcmp(IdentStart, "include", 7)) {
+ if (LexInclude()) return YYERROR;
+ return LexToken();
+ }
+
+ Filelval.StrVal = new std::string(IdentStart, CurPtr);
+ return ID;
+}
+
+/// LexInclude - We just read the "include" token. Get the string token that
+/// comes next and enter the include.
+bool TGLexer::LexInclude() {
+ // The token after the include must be a string.
+ int Tok = LexToken();
+ if (Tok == YYERROR) return true;
+ if (Tok != STRVAL) {
+ TheError = "Expected filename after include";
+ return true;
+ }
+
+ // Get the string.
+ std::string Filename = *Filelval.StrVal;
+ delete Filelval.StrVal;
+
+ // Try to find the file.
+ MemoryBuffer *NewBuf = MemoryBuffer::getFile(&Filename[0], Filename.size());
+
+ // If the file didn't exist directly, see if it's in an include path.
+ for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
+ std::string IncFile = IncludeDirectories[i] + "/" + Filename;
+ NewBuf = MemoryBuffer::getFile(&IncFile[0], IncFile.size());
+ }
+
+ if (NewBuf == 0) {
+ TheError = "Could not find include file '" + Filename + "'";
+ return true;
+ }
+
+ // Save the line number and lex buffer of the includer.
+ IncludeStack.push_back(IncludeRec(CurBuf, CurPtr, CurLineNo));
+
+ CurLineNo = 1; // Reset line numbering.
+ CurBuf = NewBuf;
+ CurPtr = CurBuf->getBufferStart();
+ return false;
+}
+
+void TGLexer::SkipBCPLComment() {
+ ++CurPtr; // skip the second slash.
+ while (1) {
+ switch (*CurPtr) {
+ case '\n':
+ case '\r':
+ return; // Newline is end of comment.
+ case 0:
+ // If this is the end of the buffer, end the comment.
+ if (CurPtr == CurBuf->getBufferEnd())
+ return;
+ break;
+ }
+ // Otherwise, skip the character.
+ ++CurPtr;
+ }
+}
+
+/// SkipCComment - This skips C-style /**/ comments. The only difference from C
+/// is that we allow nesting.
+bool TGLexer::SkipCComment() {
+ ++CurPtr; // skip the star.
+ unsigned CommentDepth = 1;
+
+ while (1) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ TheError = "Unterminated comment!";
+ return true;
+ case '*':
+ // End of the comment?
+ if (CurPtr[0] != '/') break;
+
+ ++CurPtr; // End the */.
+ if (--CommentDepth == 0)
+ return false;
+ break;
+ case '/':
+ // Start of a nested comment?
+ if (CurPtr[0] != '*') break;
+ ++CurPtr;
+ ++CommentDepth;
+ break;
+ }
+ }
+}
+
+/// LexNumber - Lex:
+/// [-+]?[0-9]+
+/// 0x[0-9a-fA-F]+
+/// 0b[01]+
+int TGLexer::LexNumber() {
+ const char *NumStart = CurPtr-1;
+
+ if (CurPtr[-1] == '0') {
+ if (CurPtr[0] == 'x') {
+ ++CurPtr;
+ NumStart = CurPtr;
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ if (CurPtr == NumStart) {
+ TheError = "Invalid hexadecimal number";
+ return YYERROR;
+ }
+ Filelval.IntVal = strtoll(NumStart, 0, 16);
+ return INTVAL;
+ } else if (CurPtr[0] == 'b') {
+ ++CurPtr;
+ NumStart = CurPtr;
+ while (CurPtr[0] == '0' || CurPtr[0] == '1')
+ ++CurPtr;
+
+ if (CurPtr == NumStart) {
+ TheError = "Invalid binary number";
+ return YYERROR;
+ }
+ Filelval.IntVal = strtoll(NumStart, 0, 2);
+ return INTVAL;
+ }
+ }
+
+ // Check for a sign without a digit.
+ if (CurPtr[-1] == '-' || CurPtr[-1] == '+') {
+ if (!isdigit(CurPtr[0]))
+ return CurPtr[-1];
+ }
+
+ while (isdigit(CurPtr[0]))
+ ++CurPtr;
+ Filelval.IntVal = strtoll(NumStart, 0, 10);
+ return INTVAL;
+}
+
+/// LexBracket - We just read '['. If this is a code block, return it,
+/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
+int TGLexer::LexBracket() {
+ if (CurPtr[0] != '{')
+ return '[';
+ ++CurPtr;
+ const char *CodeStart = CurPtr;
+ while (1) {
+ int Char = getNextChar();
+ if (Char == EOF) break;
+
+ if (Char != '}') continue;
+
+ Char = getNextChar();
+ if (Char == EOF) break;
+ if (Char == ']') {
+ Filelval.StrVal = new std::string(CodeStart, CurPtr-2);
+ return CODEFRAGMENT;
+ }
+ }
+
+ TheError = "Invalid Code Block";
+ return YYERROR;
+}
+
+/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
+int TGLexer::LexExclaim() {
+ if (!isalpha(*CurPtr))
+ return '!';
+
+ const char *Start = CurPtr++;
+ while (isalpha(*CurPtr))
+ ++CurPtr;
+
+ // Check to see which operator this is.
+ unsigned Len = CurPtr-Start;
+
+ if (Len == 3 && !memcmp(Start, "con", 3)) return CONCATTOK;
+ if (Len == 3 && !memcmp(Start, "sra", 3)) return SRATOK;
+ if (Len == 3 && !memcmp(Start, "srl", 3)) return SRLTOK;
+ if (Len == 3 && !memcmp(Start, "shl", 3)) return SHLTOK;
+ if (Len == 9 && !memcmp(Start, "strconcat", 9)) return STRCONCATTOK;
+
+ TheError = "Unknown operator";
+ return YYERROR;
+}
+
+//===----------------------------------------------------------------------===//
+// Interfaces used by the Bison parser.
+//===----------------------------------------------------------------------===//
+
+int Fileparse();
+static TGLexer *TheLexer;
+
+namespace llvm {
+
+std::ostream &err() {
+ return TheLexer->err();
+}
+
+/// ParseFile - this function begins the parsing of the specified tablegen
+/// file.
+///
+void ParseFile(const std::string &Filename,
+ const std::vector<std::string> &IncludeDirs) {
+ std::string ErrorStr;
+ MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(&Filename[0], Filename.size(),
+ &ErrorStr);
+ if (F == 0) {
+ cerr << "Could not open input file '" + Filename + "': " << ErrorStr <<"\n";
+ exit(1);
+ }
+
+ assert(!TheLexer && "Lexer isn't reentrant yet!");
+ TheLexer = new TGLexer(F);
+
+ // Record the location of the include directory so that the lexer can find
+ // it later.
+ TheLexer->setIncludeDirs(IncludeDirs);
+
+ Fileparse();
+
+ // Cleanup
+ delete TheLexer;
+ TheLexer = 0;
+}
+} // End llvm namespace
+
+
+int Filelex() {
+ assert(TheLexer && "No lexer setup yet!");
+ int Tok = TheLexer->LexToken();
+ if (Tok == YYERROR) {
+ err() << TheLexer->getError() << "\n";
+ exit(1);
+ }
+ return Tok;
+}