summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorReid Kleckner <reid@kleckner.net>2013-07-16 17:14:33 +0000
committerReid Kleckner <reid@kleckner.net>2013-07-16 17:14:33 +0000
commit06c847e83e558f0cc6fea742498b2730eb6837c6 (patch)
tree8f823c8d8da88e220787734872375279ef139d94 /include
parent5a5ebb7f9fa7fa82c0c466a36a90e5c18bb13073 (diff)
downloadllvm-06c847e83e558f0cc6fea742498b2730eb6837c6.tar.gz
llvm-06c847e83e558f0cc6fea742498b2730eb6837c6.tar.bz2
llvm-06c847e83e558f0cc6fea742498b2730eb6837c6.tar.xz
[Support] Add a Unicode conversion wrapper from UTF16 to UTF8
This is to support parsing UTF16 response files in LLVM/lib/Option for lld and clang. Reviewers: hans Differential Revision: http://llvm-reviews.chandlerc.com/D1138 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186426 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include')
-rw-r--r--include/llvm/Support/ConvertUTF.h24
1 files changed, 22 insertions, 2 deletions
diff --git a/include/llvm/Support/ConvertUTF.h b/include/llvm/Support/ConvertUTF.h
index 1eae6d6622..282036619c 100644
--- a/include/llvm/Support/ConvertUTF.h
+++ b/include/llvm/Support/ConvertUTF.h
@@ -87,8 +87,8 @@
------------------------------------------------------------------------ */
-#ifndef CLANG_BASIC_CONVERTUTF_H
-#define CLANG_BASIC_CONVERTUTF_H
+#ifndef LLVM_SUPPORT_CONVERTUTF_H
+#define LLVM_SUPPORT_CONVERTUTF_H
/* ---------------------------------------------------------------------
The following 4 definitions are compiler-specific.
@@ -112,6 +112,9 @@ typedef unsigned char Boolean; /* 0 or 1 */
#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
+#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF
+#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
+
typedef enum {
conversionOK, /* conversion successful */
sourceExhausted, /* partial character in source, but hit end */
@@ -165,6 +168,7 @@ unsigned getNumBytesForUTF8(UTF8 firstByte);
/*************************************************************************/
/* Below are LLVM-specific wrappers of the functions above. */
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
namespace llvm {
@@ -219,6 +223,22 @@ static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
return sourceExhausted;
return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
}
+
+/**
+ * Returns true if a blob of text starts with a UTF-16 big or little endian byte
+ * order mark.
+ */
+bool hasUTF16ByteOrderMark(ArrayRef<char> SrcBytes);
+
+/**
+ * Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
+ *
+ * \param [in] SrcBytes A buffer of what is assumed to be UTF-16 encoded text.
+ * \param [out] Out Converted UTF-8 is stored here on success.
+ * \returns true on success
+ */
+bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out);
+
} /* end namespace llvm */
#endif