summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Peixotto <dpeixott@codeaurora.org>2013-12-06 20:35:58 +0000
committerDavid Peixotto <dpeixott@codeaurora.org>2013-12-06 20:35:58 +0000
commitd05b93515d19f2462909676db3e32ea08a6e359f (patch)
tree2760a61fb5f77553a1012da407931d0c3085db0d
parent3221b5bd23ed14b381b29652b1756ef7e40e7f02 (diff)
downloadllvm-d05b93515d19f2462909676db3e32ea08a6e359f.tar.gz
llvm-d05b93515d19f2462909676db3e32ea08a6e359f.tar.bz2
llvm-d05b93515d19f2462909676db3e32ea08a6e359f.tar.xz
Integrated assembler incorrectly lexes ARM-style comments
The integrated assembler fails to properly lex arm comments when they are adjacent to an identifier in the input stream. The reason is that the arm comment symbol '@' is also used as symbol variant in other assembly languages so when lexing an identifier it allows the '@' symbol as part of the identifier. Example: $ cat comment.s foo: add r0, r0@got to parse this as a comment $ llvm-mc -triple armv7 comment.s comment.s:4:18: error: unexpected token in argument list add r0, r0@got to parse this as a comment ^ This should be parsed as correctly as `add r0, r0`. This commit modifes the assembly lexer to not include the '@' symbol in identifiers when lexing for targets that use '@' for comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196607 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp11
-rw-r--r--test/MC/ARM/comment.s24
2 files changed, 31 insertions, 4 deletions
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index b49dd01047..a066e64830 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -139,20 +139,23 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
}
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
-static bool IsIdentifierChar(char c) {
- return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@' || c == '?';
+static bool IsIdentifierChar(char c, bool AllowAt) {
+ return isalnum(c) || c == '_' || c == '$' || c == '.' ||
+ (c == '@' && AllowAt) || c == '?';
}
AsmToken AsmLexer::LexIdentifier() {
+ bool AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
// Check for floating point literals.
if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
// Disambiguate a .1243foo identifier from a floating literal.
while (isdigit(*CurPtr))
++CurPtr;
- if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr))
+ if (*CurPtr == 'e' || *CurPtr == 'E' ||
+ !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
return LexFloatLiteral();
}
- while (IsIdentifierChar(*CurPtr))
+ while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
++CurPtr;
// Handle . as a special case.
diff --git a/test/MC/ARM/comment.s b/test/MC/ARM/comment.s
new file mode 100644
index 0000000000..e95f313aca
--- /dev/null
+++ b/test/MC/ARM/comment.s
@@ -0,0 +1,24 @@
+@ Tests to check that '@' does not get lexed as an identifier for arm
+@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi | FileCheck %s
+@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s --check-prefix=ERROR
+
+foo:
+ bl boo@plt should be ignored
+ bl goo@plt
+ .long bar@got to parse this as a comment
+ .long baz@got
+ add r0, r0@ignore this extra junk
+
+@CHECK-LABEL: foo:
+@CHECK: bl boo
+@CHECK-NOT: @
+@CHECK: bl goo
+@CHECK-NOT: @
+@CHECK: .long bar
+@CHECK-NOT: @
+@CHECK: .long baz
+@CHECK-NOT: @
+@CHECK: add r0, r0
+@CHECK-NOT: @
+
+@ERROR-NOT: error: