summaryrefslogtreecommitdiff
path: root/tools/llvm-mc
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2009-06-21 19:21:25 +0000
committerChris Lattner <sabre@nondot.org>2009-06-21 19:21:25 +0000
commit4651bca31bdad27184fa0d36640bf5ef1d83cf5c (patch)
treeb97455dfc60462b5a65c04d5b95393f838627af2 /tools/llvm-mc
parent1c3329f7072356c8da84534ed0a7033b10f73062 (diff)
downloadllvm-4651bca31bdad27184fa0d36640bf5ef1d83cf5c.tar.gz
llvm-4651bca31bdad27184fa0d36640bf5ef1d83cf5c.tar.bz2
llvm-4651bca31bdad27184fa0d36640bf5ef1d83cf5c.tar.xz
implement enough of a lexer to get through Olden/health/Output/health.llc.s
without errors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73855 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools/llvm-mc')
-rw-r--r--tools/llvm-mc/AsmLexer.cpp162
-rw-r--r--tools/llvm-mc/AsmLexer.h16
-rw-r--r--tools/llvm-mc/llvm-mc.cpp16
3 files changed, 184 insertions, 10 deletions
diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp
index da86465d7f..578eec1852 100644
--- a/tools/llvm-mc/AsmLexer.cpp
+++ b/tools/llvm-mc/AsmLexer.cpp
@@ -14,6 +14,7 @@
#include "AsmLexer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
+#include <cerrno>
using namespace llvm;
AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
@@ -23,6 +24,10 @@ AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
TokStart = 0;
}
+SMLoc AsmLexer::getLoc() const {
+ return SMLoc::getFromPointer(TokStart);
+}
+
void AsmLexer::PrintError(const char *Loc, const std::string &Msg) const {
SrcMgr.PrintError(SMLoc::getFromPointer(Loc), Msg);
}
@@ -31,6 +36,13 @@ void AsmLexer::PrintError(SMLoc Loc, const std::string &Msg) const {
SrcMgr.PrintError(Loc, Msg);
}
+/// ReturnError - Set the error to the specified string at the specified
+/// location. This is defined to always return asmtok::Error.
+asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
+ PrintError(Loc, Msg);
+ return asmtok::Error;
+}
+
int AsmLexer::getNextChar() {
char CurChar = *CurPtr++;
switch (CurChar) {
@@ -59,6 +71,129 @@ int AsmLexer::getNextChar() {
}
}
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+asmtok::TokKind AsmLexer::LexIdentifier() {
+ while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
+ *CurPtr == '.' || *CurPtr == '@')
+ ++CurPtr;
+ CurStrVal.assign(TokStart, CurPtr); // Skip %
+ return asmtok::Identifier;
+}
+
+/// LexPercent: Register: %[a-zA-Z0-9]+
+asmtok::TokKind AsmLexer::LexPercent() {
+ if (!isalnum(*CurPtr))
+ return asmtok::Error; // Must have at least one character.
+ while (isalnum(*CurPtr))
+ ++CurPtr;
+ CurStrVal.assign(TokStart, CurPtr); // Skip %
+ return asmtok::Register;
+}
+
+/// LexSlash: Slash: /
+/// C-Style Comment: /* ... */
+asmtok::TokKind AsmLexer::LexSlash() {
+ if (*CurPtr != '*')
+ return asmtok::Slash;
+
+ // C Style comment.
+ ++CurPtr; // skip the star.
+ while (1) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ PrintError(TokStart, "Unterminated comment!");
+ return asmtok::Error;
+ case '*':
+ // End of the comment?
+ if (CurPtr[0] != '/') break;
+
+ ++CurPtr; // End the */.
+ return LexToken();
+ }
+ }
+}
+
+/// LexHash: Comment: #[^\n]*
+asmtok::TokKind AsmLexer::LexHash() {
+ int CurChar = getNextChar();
+ while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
+ CurChar = getNextChar();
+
+ if (CurChar == EOF)
+ return asmtok::Eof;
+ return asmtok::EndOfStatement;
+}
+
+
+/// LexDigit: First character is [0-9].
+/// Local Label: [0-9][:]
+/// Forward/Backward Label: [0-9][fb]
+/// Binary integer: 0b[01]+
+/// Octal integer: 0[0-7]+
+/// Hex integer: 0x[0-9a-fA-F]+
+/// Decimal integer: [1-9][0-9]*
+/// TODO: FP literal.
+asmtok::TokKind AsmLexer::LexDigit() {
+ if (*CurPtr == ':')
+ return asmtok::Error; // FIXME LOCAL LABEL.
+ if (*CurPtr == 'f' || *CurPtr == 'b')
+ return asmtok::Error; // FIXME FORWARD/BACKWARD LABEL.
+
+ // Decimal integer: [1-9][0-9]*
+ if (CurPtr[-1] != '0') {
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+ CurIntVal = strtoll(TokStart, 0, 10);
+ return asmtok::IntVal;
+ }
+
+ if (*CurPtr == 'b') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (CurPtr[0] == '0' || CurPtr[0] == '1')
+ ++CurPtr;
+
+ // Requires at least one binary digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid binary number");
+ CurIntVal = strtoll(NumStart, 0, 2);
+ return asmtok::IntVal;
+ }
+
+ if (*CurPtr == 'x') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ // Requires at least one hex digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+
+ errno = 0;
+ CurIntVal = strtoll(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+ if (errno == ERANGE) {
+ errno = 0;
+ CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+ if (errno == ERANGE)
+ return ReturnError(CurPtr-2, "Hexadecimal number out of range");
+ }
+ return asmtok::IntVal;
+ }
+
+ // Must be an octal number, it starts with 0.
+ while (*CurPtr >= '0' && *CurPtr <= '7')
+ ++CurPtr;
+ CurIntVal = strtoll(TokStart, 0, 8);
+ return asmtok::IntVal;
+}
+
+
asmtok::TokKind AsmLexer::LexToken() {
TokStart = CurPtr;
// This always consumes at least one character.
@@ -66,9 +201,9 @@ asmtok::TokKind AsmLexer::LexToken() {
switch (CurChar) {
default:
- // Handle letters: [a-zA-Z_]
-// if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
-// return LexIdentifier();
+ // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+ if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
+ return LexIdentifier();
// Unknown character, emit an error.
return asmtok::Error;
@@ -76,12 +211,29 @@ asmtok::TokKind AsmLexer::LexToken() {
case 0:
case ' ':
case '\t':
- case '\n':
- case '\r':
// Ignore whitespace.
return LexToken();
+ case '\n': // FALL THROUGH.
+ case '\r': // FALL THROUGH.
+ case ';': return asmtok::EndOfStatement;
case ':': return asmtok::Colon;
case '+': return asmtok::Plus;
case '-': return asmtok::Minus;
+ case '(': return asmtok::LParen;
+ case ')': return asmtok::RParen;
+ case '*': return asmtok::Star;
+ case ',': return asmtok::Comma;
+ case '$': return asmtok::Dollar;
+ case '%': return LexPercent();
+ case '/': return LexSlash();
+ case '#': return LexHash();
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return LexDigit();
+
+ // TODO: Quoted identifiers (objc methods etc)
+ // local labels: [0-9][:]
+ // Forward/backward labels: [0-9][fb]
+ // Integers, fp constants, character constants.
}
} \ No newline at end of file
diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h
index 08e6f9c6ee..9e694c7a30 100644
--- a/tools/llvm-mc/AsmLexer.h
+++ b/tools/llvm-mc/AsmLexer.h
@@ -29,12 +29,16 @@ namespace asmtok {
Eof, Error,
Identifier,
+ Register,
IntVal,
-
+ EndOfStatement,
Colon,
Plus,
- Minus
+ Minus,
+ Slash, // '/'
+ LParen, RParen,
+ Star, Comma, Dollar
};
}
@@ -66,7 +70,7 @@ public:
asmtok::TokKind getKind() const { return CurKind; }
const std::string &getCurStrVal() const {
- assert(CurKind == asmtok::Identifier &&
+ assert((CurKind == asmtok::Identifier || CurKind == asmtok::Register) &&
"This token doesn't have a string value");
return CurStrVal;
}
@@ -82,9 +86,15 @@ public:
private:
int getNextChar();
+ asmtok::TokKind ReturnError(const char *Loc, const std::string &Msg);
/// LexToken - Read the next token and return its code.
asmtok::TokKind LexToken();
+ asmtok::TokKind LexIdentifier();
+ asmtok::TokKind LexPercent();
+ asmtok::TokKind LexSlash();
+ asmtok::TokKind LexHash();
+ asmtok::TokKind LexDigit();
};
} // end namespace llvm
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 83642988e3..20f353ca67 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -72,17 +72,29 @@ static int AssembleInput(const char *ProgName) {
asmtok::TokKind Tok = Lexer.Lex();
while (Tok != asmtok::Eof) {
switch (Tok) {
- default: outs() << "<<unknown token>>\n"; break;
- case asmtok::Error: outs() << "<<error>>\n"; break;
+ default: Lexer.PrintError(Lexer.getLoc(), "driver: unknown token"); break;
+ case asmtok::Error:
+ Lexer.PrintError(Lexer.getLoc(), "error, bad token");
+ break;
case asmtok::Identifier:
outs() << "identifier: " << Lexer.getCurStrVal() << '\n';
break;
+ case asmtok::Register:
+ outs() << "register: " << Lexer.getCurStrVal() << '\n';
+ break;
case asmtok::IntVal:
outs() << "int: " << Lexer.getCurIntVal() << '\n';
break;
+ case asmtok::EndOfStatement: outs() << "EndOfStatement\n"; break;
case asmtok::Colon: outs() << "Colon\n"; break;
case asmtok::Plus: outs() << "Plus\n"; break;
case asmtok::Minus: outs() << "Minus\n"; break;
+ case asmtok::Slash: outs() << "Slash\n"; break;
+ case asmtok::LParen: outs() << "LParen\n"; break;
+ case asmtok::RParen: outs() << "RParen\n"; break;
+ case asmtok::Star: outs() << "Star\n"; break;
+ case asmtok::Comma: outs() << "Comma\n"; break;
+ case asmtok::Dollar: outs() << "Dollar\n"; break;
}
Tok = Lexer.Lex();