diff options
author | Reid Kleckner <reid@kleckner.net> | 2013-07-16 17:14:33 +0000 |
---|---|---|
committer | Reid Kleckner <reid@kleckner.net> | 2013-07-16 17:14:33 +0000 |
commit | 06c847e83e558f0cc6fea742498b2730eb6837c6 (patch) | |
tree | 8f823c8d8da88e220787734872375279ef139d94 /include | |
parent | 5a5ebb7f9fa7fa82c0c466a36a90e5c18bb13073 (diff) | |
download | llvm-06c847e83e558f0cc6fea742498b2730eb6837c6.tar.gz llvm-06c847e83e558f0cc6fea742498b2730eb6837c6.tar.bz2 llvm-06c847e83e558f0cc6fea742498b2730eb6837c6.tar.xz |
[Support] Add a Unicode conversion wrapper from UTF16 to UTF8
This is to support parsing UTF16 response files in LLVM/lib/Option for
lld and clang.
Reviewers: hans
Differential Revision: http://llvm-reviews.chandlerc.com/D1138
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186426 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include')
-rw-r--r-- | include/llvm/Support/ConvertUTF.h | 24 |
1 files changed, 22 insertions, 2 deletions
diff --git a/include/llvm/Support/ConvertUTF.h b/include/llvm/Support/ConvertUTF.h index 1eae6d6622..282036619c 100644 --- a/include/llvm/Support/ConvertUTF.h +++ b/include/llvm/Support/ConvertUTF.h @@ -87,8 +87,8 @@ ------------------------------------------------------------------------ */ -#ifndef CLANG_BASIC_CONVERTUTF_H -#define CLANG_BASIC_CONVERTUTF_H +#ifndef LLVM_SUPPORT_CONVERTUTF_H +#define LLVM_SUPPORT_CONVERTUTF_H /* --------------------------------------------------------------------- The following 4 definitions are compiler-specific. @@ -112,6 +112,9 @@ typedef unsigned char Boolean; /* 0 or 1 */ #define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4 +#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF +#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE + typedef enum { conversionOK, /* conversion successful */ sourceExhausted, /* partial character in source, but hit end */ @@ -165,6 +168,7 @@ unsigned getNumBytesForUTF8(UTF8 firstByte); /*************************************************************************/ /* Below are LLVM-specific wrappers of the functions above. */ +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" namespace llvm { @@ -219,6 +223,22 @@ static inline ConversionResult convertUTF8Sequence(const UTF8 **source, return sourceExhausted; return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags); } + +/** + * Returns true if a blob of text starts with a UTF-16 big or little endian byte + * order mark. + */ +bool hasUTF16ByteOrderMark(ArrayRef<char> SrcBytes); + +/** + * Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string. + * + * \param [in] SrcBytes A buffer of what is assumed to be UTF-16 encoded text. + * \param [out] Out Converted UTF-8 is stored here on success. + * \returns true on success + */ +bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out); + } /* end namespace llvm */ #endif |