From 06c847e83e558f0cc6fea742498b2730eb6837c6 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 16 Jul 2013 17:14:33 +0000 Subject: [Support] Add a Unicode conversion wrapper from UTF16 to UTF8 This is to support parsing UTF16 response files in LLVM/lib/Option for lld and clang. Reviewers: hans Differential Revision: http://llvm-reviews.chandlerc.com/D1138 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186426 91177308-0d34-0410-b5e6-96231b3b80d8 --- unittests/Support/CMakeLists.txt | 1 + unittests/Support/ConvertUTFTest.cpp | 65 ++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 unittests/Support/ConvertUTFTest.cpp (limited to 'unittests') diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt index fedfb3d225..2cbe730ef7 100644 --- a/unittests/Support/CMakeLists.txt +++ b/unittests/Support/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_unittest(SupportTests CommandLineTest.cpp CompressionTest.cpp ConstantRangeTest.cpp + ConvertUTFTest.cpp DataExtractorTest.cpp EndianTest.cpp ErrorOrTest.cpp diff --git a/unittests/Support/ConvertUTFTest.cpp b/unittests/Support/ConvertUTFTest.cpp new file mode 100644 index 0000000000..13ea75b157 --- /dev/null +++ b/unittests/Support/ConvertUTFTest.cpp @@ -0,0 +1,65 @@ +//===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ConvertUTF.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; + +TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) { + // Src is the look of disapproval. + static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c"; + ArrayRef Ref(Src, sizeof(Src) - 1); + std::string Result; + bool Success = convertUTF16ToUTF8String(Ref, Result); + EXPECT_TRUE(Success); + std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); + EXPECT_EQ(Expected, Result); +} + +TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) { + // Src is the look of disapproval. + static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0"; + ArrayRef Ref(Src, sizeof(Src) - 1); + std::string Result; + bool Success = convertUTF16ToUTF8String(Ref, Result); + EXPECT_TRUE(Success); + std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); + EXPECT_EQ(Expected, Result); +} + +TEST(ConvertUTFTest, OddLengthInput) { + std::string Result; + bool Success = convertUTF16ToUTF8String(ArrayRef("xxxxx", 5), Result); + EXPECT_FALSE(Success); +} + +TEST(ConvertUTFTest, Empty) { + std::string Result; + bool Success = convertUTF16ToUTF8String(ArrayRef(), Result); + EXPECT_TRUE(Success); + EXPECT_TRUE(Result.empty()); +} + +TEST(ConvertUTFTest, HasUTF16BOM) { + bool HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xff\xfe", 2)); + EXPECT_TRUE(HasBOM); + HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe\xff", 2)); + EXPECT_TRUE(HasBOM); + HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe\xff ", 3)); + EXPECT_TRUE(HasBOM); // Don't care about odd lengths. + HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe\xff\x00asdf", 6)); + EXPECT_TRUE(HasBOM); + + HasBOM = hasUTF16ByteOrderMark(ArrayRef()); + EXPECT_FALSE(HasBOM); + HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe", 1)); + EXPECT_FALSE(HasBOM); +} -- cgit v1.2.3