summaryrefslogtreecommitdiff
path: root/lib/Lex/LiteralSupport.cpp
diff options
context:
space:
mode:
authorRichard Smith <richard-llvm@metafoo.co.uk>2012-03-05 04:02:15 +0000
committerRichard Smith <richard-llvm@metafoo.co.uk>2012-03-05 04:02:15 +0000
commit5cc2c6eb67b6e5361bbe96f79b519fd62ec666d6 (patch)
tree2c1f49624f8fd182adf9d9473f9b9c6f1229183e /lib/Lex/LiteralSupport.cpp
parent9d008fd572fa3411e93084d51f12ea12a998786c (diff)
downloadclang-5cc2c6eb67b6e5361bbe96f79b519fd62ec666d6.tar.gz
clang-5cc2c6eb67b6e5361bbe96f79b519fd62ec666d6.tar.bz2
clang-5cc2c6eb67b6e5361bbe96f79b519fd62ec666d6.tar.xz
Lexing support for user-defined literals. Currently these lex as the same token
kinds as the underlying string literals, and we silently drop the ud-suffix; those issues will be fixed by subsequent patches. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@152012 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/LiteralSupport.cpp')
-rw-r--r--lib/Lex/LiteralSupport.cpp57
1 files changed, 52 insertions, 5 deletions
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 547bd4e0c8..e3ff77f4f0 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -731,7 +731,11 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
}
-/// character-literal: [C++0x lex.ccon]
+/// user-defined-character-literal: [C++11 lex.ext]
+/// character-literal ud-suffix
+/// ud-suffix:
+/// identifier
+/// character-literal: [C++11 lex.ccon]
/// ' c-char-sequence '
/// u' c-char-sequence '
/// U' c-char-sequence '
@@ -744,7 +748,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
/// backslash \, or new-line character
/// escape-sequence
/// universal-character-name
-/// escape-sequence: [C++0x lex.ccon]
+/// escape-sequence:
/// simple-escape-sequence
/// octal-escape-sequence
/// hexadecimal-escape-sequence
@@ -757,7 +761,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
/// hexadecimal-escape-sequence:
/// \x hexadecimal-digit
/// hexadecimal-escape-sequence hexadecimal-digit
-/// universal-character-name:
+/// universal-character-name: [C++11 lex.charset]
/// \u hex-quad
/// \U hex-quad hex-quad
/// hex-quad:
@@ -780,8 +784,17 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
assert(begin[0] == '\'' && "Invalid token lexed");
++begin;
+ // Remove an optional ud-suffix.
+ if (end[-1] != '\'') {
+ const char *UDSuffixEnd = end;
+ do {
+ --end;
+ } while (end[-1] != '\'');
+ UDSuffixBuf.assign(end, UDSuffixEnd);
+ }
+
// Trim the ending quote.
- assert(end[-1] == '\'' && "Invalid token lexed");
+ assert(end != begin && "Invalid token lexed");
--end;
// FIXME: The "Value" is an uint64_t so we can handle char literals of
@@ -1071,6 +1084,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
Pascal = false;
+ SourceLocation UDSuffixTokLoc;
+
for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
const char *ThisTokBuf = &TokenBuf[0];
// Get the spelling of the token, which eliminates trigraphs, etc. We know
@@ -1085,7 +1100,39 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
continue;
}
- const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
+ const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
+
+ // Remove an optional ud-suffix.
+ if (ThisTokEnd[-1] != '"') {
+ const char *UDSuffixEnd = ThisTokEnd;
+ do {
+ --ThisTokEnd;
+ } while (ThisTokEnd[-1] != '"');
+
+ StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
+
+ if (UDSuffixBuf.empty()) {
+ UDSuffixBuf.assign(UDSuffix);
+ UDSuffixTokLoc = StringToks[i].getLocation();
+ } else if (!UDSuffixBuf.equals(UDSuffix)) {
+ // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
+ // result of a concatenation involving at least one user-defined-string-
+ // literal, all the participating user-defined-string-literals shall
+ // have the same ud-suffix.
+ if (Diags) {
+ SourceLocation TokLoc = StringToks[i].getLocation();
+ Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
+ << UDSuffixBuf << UDSuffix
+ << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
+ << SourceRange(TokLoc, TokLoc);
+ }
+ hadError = true;
+ }
+ }
+
+ // Strip the end quote.
+ --ThisTokEnd;
+
// TODO: Input character set mapping support.
// Skip marker for wide or unicode strings.