summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorDaniel Dunbar <daniel@zuster.org>2009-08-14 18:19:52 +0000
committerDaniel Dunbar <daniel@zuster.org>2009-08-14 18:19:52 +0000
commit1ab75949460b92df31b911ea9f99a3e32d779e3f (patch)
tree432a54c2dcebf6c297ef06b65e212f4e27a49675 /tools
parent2247276c6f97abf8ebb6ea0a566e6fed5a4c4fe2 (diff)
downloadllvm-1ab75949460b92df31b911ea9f99a3e32d779e3f.tar.gz
llvm-1ab75949460b92df31b911ea9f99a3e32d779e3f.tar.bz2
llvm-1ab75949460b92df31b911ea9f99a3e32d779e3f.tar.xz
llvm-mc: Support escaped characters in string literals (for .ascii and .asciz)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79010 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools')
-rw-r--r--tools/llvm-mc/AsmParser.cpp68
-rw-r--r--tools/llvm-mc/AsmParser.h4
2 files changed, 67 insertions, 5 deletions
diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp
index eec4cc8829..c37abfd3ff 100644
--- a/tools/llvm-mc/AsmParser.cpp
+++ b/tools/llvm-mc/AsmParser.cpp
@@ -765,6 +765,64 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
return false;
}
+bool AsmParser::ParseEscapedString(std::string &Data) {
+ assert(Lexer.is(AsmToken::String) && "Unexpected current token!");
+
+ Data = "";
+ StringRef Str = Lexer.getTok().getStringContents();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (Str[i] != '\\') {
+ Data += Str[i];
+ continue;
+ }
+
+ // Recognize escaped characters. Note that this escape semantics currently
+ // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
+ ++i;
+ if (i == e)
+ return TokError("unexpected backslash at end of string");
+
+ // Recognize octal sequences.
+ if ((unsigned) (Str[i] - '0') <= 7) {
+ // Consume up to three octal characters.
+ unsigned Value = Str[i] - '0';
+
+ if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+ ++i;
+ Value = Value * 8 + (Str[i] - '0');
+
+ if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+ ++i;
+ Value = Value * 8 + (Str[i] - '0');
+ }
+ }
+
+ if (Value > 255)
+ return TokError("invalid octal escape sequence (out of range)");
+
+ Data += (unsigned char) Value;
+ continue;
+ }
+
+ // Otherwise recognize individual escapes.
+ switch (Str[i]) {
+ default:
+ // Just reject invalid escape sequences for now.
+ return TokError("invalid escape sequence (unrecognized character)");
+
+ case 'b': Data += '\b'; break;
+ case 'f': Data += '\f'; break;
+ case 'n': Data += '\n'; break;
+ case 'r': Data += '\r'; break;
+ case 't': Data += '\t'; break;
+ case '"': Data += '"'; break;
+ case '\\': Data += '\\'; break;
+ }
+ }
+
+ return false;
+}
+
/// ParseDirectiveAscii:
/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
@@ -773,11 +831,11 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
if (Lexer.isNot(AsmToken::String))
return TokError("expected string in '.ascii' or '.asciz' directive");
- // FIXME: This shouldn't use a const char* + strlen, the string could have
- // embedded nulls.
- // FIXME: Should have accessor for getting string contents.
- StringRef Str = Lexer.getTok().getString();
- Out.EmitBytes(Str.substr(1, Str.size() - 2));
+ std::string Data;
+ if (ParseEscapedString(Data))
+ return true;
+
+ Out.EmitBytes(Data);
if (ZeroTerminated)
Out.EmitBytes(StringRef("\0", 1));
diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h
index 55efa84bc7..a19f35f64a 100644
--- a/tools/llvm-mc/AsmParser.h
+++ b/tools/llvm-mc/AsmParser.h
@@ -135,6 +135,10 @@ private:
bool ParseDirectiveFile(SMLoc DirectiveLoc); // ".file"
bool ParseDirectiveLine(SMLoc DirectiveLoc); // ".line"
bool ParseDirectiveLoc(SMLoc DirectiveLoc); // ".loc"
+
+ /// ParseEscapedString - Parse the current token as a string which may include
+ /// escaped characters and return the string contents.
+ bool ParseEscapedString(std::string &Data);
};
} // end namespace llvm