include/clang/Lex/LiteralSupport.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260

//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the NumericLiteralParser, CharLiteralParser, and
// StringLiteralParser interfaces.
//
//===----------------------------------------------------------------------===//

#ifndef CLANG_LITERALSUPPORT_H
#define CLANG_LITERALSUPPORT_H

#include "clang/Basic/CharInfo.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"

namespace clang {

class DiagnosticsEngine;
class Preprocessor;
class Token;
class SourceLocation;
class TargetInfo;
class SourceManager;
class LangOptions;

/// Copy characters from Input to Buf, expanding any UCNs.
void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);

/// NumericLiteralParser - This performs strict semantic analysis of the content
/// of a ppnumber, classifying it as either integer, floating, or erroneous,
/// determines the radix of the value and can convert it to a useful value.
class NumericLiteralParser {
  Preprocessor &PP; // needed for diagnostics

  const char *const ThisTokBegin;
  const char *const ThisTokEnd;
  const char *DigitsBegin, *SuffixBegin; // markers
  const char *s; // cursor

  unsigned radix;

  bool saw_exponent, saw_period, saw_ud_suffix;

  SmallString<32> UDSuffixBuf;

public:
  NumericLiteralParser(StringRef TokSpelling,
                       SourceLocation TokLoc,
                       Preprocessor &PP);
  bool hadError;
  bool isUnsigned;
  bool isLong;        // This is *not* set for long long.
  bool isLongLong;
  bool isFloat;       // 1.0f
  bool isImaginary;   // 1.0i
  uint8_t MicrosoftInteger;  // Microsoft suffix extension i8, i16, i32, or i64.

  bool isIntegerLiteral() const {
    return !saw_period && !saw_exponent;
  }
  bool isFloatingLiteral() const {
    return saw_period || saw_exponent;
  }

  bool hasUDSuffix() const {
    return saw_ud_suffix;
  }
  StringRef getUDSuffix() const {
    assert(saw_ud_suffix);
    return UDSuffixBuf;
  }
  unsigned getUDSuffixOffset() const {
    assert(saw_ud_suffix);
    return SuffixBegin - ThisTokBegin;
  }

  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);

  unsigned getRadix() const { return radix; }

  /// GetIntegerValue - Convert this numeric literal value to an APInt that
  /// matches Val's input width.  If there is an overflow (i.e., if the unsigned
  /// value read is larger than the APInt's bits will hold), set Val to the low
  /// bits of the result and return true.  Otherwise, return false.
  bool GetIntegerValue(llvm::APInt &Val);

  /// GetFloatValue - Convert this numeric literal to a floating value, using
  /// the specified APFloat fltSemantics (specifying float, double, etc).
  /// The optional bool isExact (passed-by-reference) has its value
  /// set to true if the returned APFloat can represent the number in the
  /// literal exactly, and false otherwise.
  llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);

private:

  void ParseNumberStartingWithZero(SourceLocation TokLoc);

  static bool isDigitSeparator(char C) { return C == '\''; }

  enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };

  /// \brief Ensure that we don't have a digit separator here.
  void checkSeparator(SourceLocation TokLoc, const char *Pos,
                      CheckSeparatorKind IsAfterDigits);

  /// SkipHexDigits - Read and skip over any hex digits, up to End.
  /// Return a pointer to the first non-hex digit or End.
  const char *SkipHexDigits(const char *ptr) {
    while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr)))
      ptr++;
    return ptr;
  }

  /// SkipOctalDigits - Read and skip over any octal digits, up to End.
  /// Return a pointer to the first non-hex digit or End.
  const char *SkipOctalDigits(const char *ptr) {
    while (ptr != ThisTokEnd &&
           ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr)))
      ptr++;
    return ptr;
  }

  /// SkipDigits - Read and skip over any digits, up to End.
  /// Return a pointer to the first non-hex digit or End.
  const char *SkipDigits(const char *ptr) {
    while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr)))
      ptr++;
    return ptr;
  }

  /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
  /// Return a pointer to the first non-binary digit or End.
  const char *SkipBinaryDigits(const char *ptr) {
    while (ptr != ThisTokEnd &&
           (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr)))
      ptr++;
    return ptr;
  }

};

/// CharLiteralParser - Perform interpretation and semantic analysis of a
/// character literal.
class CharLiteralParser {
  uint64_t Value;
  tok::TokenKind Kind;
  bool IsMultiChar;
  bool HadError;
  SmallString<32> UDSuffixBuf;
  unsigned UDSuffixOffset;
public:
  CharLiteralParser(const char *begin, const char *end,
                    SourceLocation Loc, Preprocessor &PP,
                    tok::TokenKind kind);

  bool hadError() const { return HadError; }
  bool isAscii() const { return Kind == tok::char_constant; }
  bool isWide() const { return Kind == tok::wide_char_constant; }
  bool isUTF16() const { return Kind == tok::utf16_char_constant; }
  bool isUTF32() const { return Kind == tok::utf32_char_constant; }
  bool isMultiChar() const { return IsMultiChar; }
  uint64_t getValue() const { return Value; }
  StringRef getUDSuffix() const { return UDSuffixBuf; }
  unsigned getUDSuffixOffset() const {
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
    return UDSuffixOffset;
  }
};

/// StringLiteralParser - This decodes string escape characters and performs
/// wide string analysis and Translation Phase #6 (concatenation of string
/// literals) (C99 5.1.1.2p1).
class StringLiteralParser {
  const SourceManager &SM;
  const LangOptions &Features;
  const TargetInfo &Target;
  DiagnosticsEngine *Diags;
  
  unsigned MaxTokenLength;
  unsigned SizeBound;
  unsigned CharByteWidth;
  tok::TokenKind Kind;
  SmallString<512> ResultBuf;
  char *ResultPtr; // cursor
  SmallString<32> UDSuffixBuf;
  unsigned UDSuffixToken;
  unsigned UDSuffixOffset;
public:
  StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
                      Preprocessor &PP, bool Complain = true);
  StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
                      const SourceManager &sm, const LangOptions &features,
                      const TargetInfo &target,
                      DiagnosticsEngine *diags = nullptr)
    : SM(sm), Features(features), Target(target), Diags(diags),
      MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
      ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
    init(StringToks, NumStringToks);
  }
    

  bool hadError;
  bool Pascal;

  StringRef GetString() const {
    return StringRef(ResultBuf.data(), GetStringLength());
  }
  unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }

  unsigned GetNumStringChars() const {
    return GetStringLength() / CharByteWidth;
  }
  /// getOffsetOfStringByte - This function returns the offset of the
  /// specified byte of the string data represented by Token.  This handles
  /// advancing over escape sequences in the string.
  ///
  /// If the Diagnostics pointer is non-null, then this will do semantic
  /// checking of the string literal and emit errors and warnings.
  unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;

  bool isAscii() const { return Kind == tok::string_literal; }
  bool isWide() const { return Kind == tok::wide_string_literal; }
  bool isUTF8() const { return Kind == tok::utf8_string_literal; }
  bool isUTF16() const { return Kind == tok::utf16_string_literal; }
  bool isUTF32() const { return Kind == tok::utf32_string_literal; }
  bool isPascal() const { return Pascal; }

  StringRef getUDSuffix() const { return UDSuffixBuf; }

  /// Get the index of a token containing a ud-suffix.
  unsigned getUDSuffixToken() const {
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
    return UDSuffixToken;
  }
  /// Get the spelling offset of the first byte of the ud-suffix.
  unsigned getUDSuffixOffset() const {
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
    return UDSuffixOffset;
  }

private:
  void init(const Token *StringToks, unsigned NumStringToks);
  bool CopyStringFragment(const Token &Tok, const char *TokBegin,
                          StringRef Fragment);
  void DiagnoseLexingError(SourceLocation Loc);
};

}  // end namespace clang

#endif