From 9f27b050b03bbd4b06d7f7d0e24b899906b4e057 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Tue, 17 Jun 2014 09:33:24 +0000 Subject: ConvertUTF tests: remove uses of initializer lists to restore compatibility with MSVC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211093 91177308-0d34-0410-b5e6-96231b3b80d8 --- unittests/Support/ConvertUTFTest.cpp | 1803 +++++++++++++++++++++------------- 1 file changed, 1108 insertions(+), 695 deletions(-) (limited to 'unittests') diff --git a/unittests/Support/ConvertUTFTest.cpp b/unittests/Support/ConvertUTFTest.cpp index 3b71ed1b6a..16c9bebfde 100644 --- a/unittests/Support/ConvertUTFTest.cpp +++ b/unittests/Support/ConvertUTFTest.cpp @@ -11,6 +11,7 @@ #include "gtest/gtest.h" #include #include +#include using namespace llvm; @@ -65,6 +66,39 @@ TEST(ConvertUTFTest, HasUTF16BOM) { EXPECT_FALSE(HasBOM); } +struct ConvertUTFResultContainer { + ConversionResult ErrorCode; + std::vector UnicodeScalars; + + ConvertUTFResultContainer(ConversionResult ErrorCode) + : ErrorCode(ErrorCode) {} + + ConvertUTFResultContainer + withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000, + unsigned US2 = 0x110000, unsigned US3 = 0x110000, + unsigned US4 = 0x110000, unsigned US5 = 0x110000, + unsigned US6 = 0x110000, unsigned US7 = 0x110000) { + ConvertUTFResultContainer Result(*this); + if (US0 != 0x110000) + Result.UnicodeScalars.push_back(US0); + if (US1 != 0x110000) + Result.UnicodeScalars.push_back(US1); + if (US2 != 0x110000) + Result.UnicodeScalars.push_back(US2); + if (US3 != 0x110000) + Result.UnicodeScalars.push_back(US3); + if (US4 != 0x110000) + Result.UnicodeScalars.push_back(US4); + if (US5 != 0x110000) + Result.UnicodeScalars.push_back(US5); + if (US6 != 0x110000) + Result.UnicodeScalars.push_back(US6); + if (US7 != 0x110000) + Result.UnicodeScalars.push_back(US7); + return Result; + } +}; + std::pair> ConvertUTF8ToUnicodeScalarsLenient(StringRef S) { const UTF8 *SourceStart = reinterpret_cast(S.data()); @@ -73,17 +107,55 @@ ConvertUTF8ToUnicodeScalarsLenient(StringRef S) { std::vector Decoded(S.size(), 0); UTF32 *TargetStart = Decoded.data(); - auto Result = + auto ErrorCode = ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart, Decoded.data() + Decoded.size(), lenientConversion); Decoded.resize(TargetStart - Decoded.data()); - return std::make_pair(Result, Decoded); + return std::make_pair(ErrorCode, Decoded); +} + +std::pair> +ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) { + const UTF8 *SourceStart = reinterpret_cast(S.data()); + + const UTF8 *SourceNext = SourceStart; + std::vector Decoded(S.size(), 0); + UTF32 *TargetStart = Decoded.data(); + + auto ErrorCode = ConvertUTF8toUTF32Partial( + &SourceNext, SourceStart + S.size(), &TargetStart, + Decoded.data() + Decoded.size(), lenientConversion); + + Decoded.resize(TargetStart - Decoded.data()); + + return std::make_pair(ErrorCode, Decoded); } -#define R0(RESULT) std::make_pair(RESULT, std::vector{}) -#define R(RESULT, ...) std::make_pair(RESULT, std::vector{ __VA_ARGS__ }) +::testing::AssertionResult +CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected, + StringRef S, bool Partial = false) { + ConversionResult ErrorCode; + std::vector Decoded; + if (!Partial) + std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S); + else + + std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S); + if (Expected.ErrorCode != ErrorCode) + return ::testing::AssertionFailure() << "Expected error code " + << Expected.ErrorCode << ", actual " + << ErrorCode; + + if (Expected.UnicodeScalars != Decoded) + return ::testing::AssertionFailure() + << "Expected lenient decoded result:\n" + << ::testing::PrintToString(Expected.UnicodeScalars) << "\n" + << "Actual result:\n" << ::testing::PrintToString(Decoded); + + return ::testing::AssertionSuccess(); +} TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { @@ -92,25 +164,27 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { // // U+0041 LATIN CAPITAL LETTER A - EXPECT_EQ(R(conversionOK, 0x0041), - ConvertUTF8ToUnicodeScalarsLenient("\x41")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41")); // // 2-byte sequences // // U+0283 LATIN SMALL LETTER ESH - EXPECT_EQ(R(conversionOK, 0x0283), - ConvertUTF8ToUnicodeScalarsLenient("\xca\x83")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x0283), + "\xca\x83")); // U+03BA GREEK SMALL LETTER KAPPA // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA // U+03C3 GREEK SMALL LETTER SIGMA // U+03BC GREEK SMALL LETTER MU // U+03B5 GREEK SMALL LETTER EPSILON - EXPECT_EQ(R(conversionOK, 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5), - ConvertUTF8ToUnicodeScalarsLenient( - "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK) + .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5), + "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5")); // // 3-byte sequences @@ -118,13 +192,15 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B // U+6587 CJK UNIFIED IDEOGRAPH-6587 - EXPECT_EQ(R(conversionOK, 0x4f8b, 0x6587), - ConvertUTF8ToUnicodeScalarsLenient("\xe4\xbe\x8b\xe6\x96\x87")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587), + "\xe4\xbe\x8b\xe6\x96\x87")); // U+D55C HANGUL SYLLABLE HAN // U+AE00 HANGUL SYLLABLE GEUL - EXPECT_EQ(R(conversionOK, 0xd55c, 0xae00), - ConvertUTF8ToUnicodeScalarsLenient("\xed\x95\x9c\xea\xb8\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00), + "\xed\x95\x9c\xea\xb8\x80")); // U+1112 HANGUL CHOSEONG HIEUH // U+1161 HANGUL JUNGSEONG A @@ -132,98 +208,122 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { // U+1100 HANGUL CHOSEONG KIYEOK // U+1173 HANGUL JUNGSEONG EU // U+11AF HANGUL JONGSEONG RIEUL - EXPECT_EQ(R(conversionOK, 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af), - ConvertUTF8ToUnicodeScalarsLenient( - "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3" - "\xe1\x86\xaf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK) + .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af), + "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3" + "\xe1\x86\xaf")); // // 4-byte sequences // // U+E0100 VARIATION SELECTOR-17 - EXPECT_EQ(R(conversionOK, 0x000E0100), - ConvertUTF8ToUnicodeScalarsLenient("\xf3\xa0\x84\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100), + "\xf3\xa0\x84\x80")); // // First possible sequence of a certain length // // U+0000 NULL - EXPECT_EQ(R(conversionOK, 0x0000), - ConvertUTF8ToUnicodeScalarsLenient(StringRef("\x00", 1))); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x0000), + StringRef("\x00", 1))); // U+0080 PADDING CHARACTER - EXPECT_EQ(R(conversionOK, 0x0080), - ConvertUTF8ToUnicodeScalarsLenient("\xc2\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x0080), + "\xc2\x80")); // U+0800 SAMARITAN LETTER ALAF - EXPECT_EQ(R(conversionOK, 0x0800), - ConvertUTF8ToUnicodeScalarsLenient("\xe0\xa0\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x0800), + "\xe0\xa0\x80")); // U+10000 LINEAR B SYLLABLE B008 A - EXPECT_EQ(R(conversionOK, 0x10000), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x90\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x10000), + "\xf0\x90\x80\x80")); // U+200000 (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf8\x88\x80\x80\x80")); // U+4000000 (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfc\x84\x80\x80\x80\x80")); // // Last possible sequence of a certain length // // U+007F DELETE - EXPECT_EQ(R(conversionOK, 0x007f), - ConvertUTF8ToUnicodeScalarsLenient("\x7f")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f")); // U+07FF (unassigned) - EXPECT_EQ(R(conversionOK, 0x07ff), - ConvertUTF8ToUnicodeScalarsLenient("\xdf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x07ff), + "\xdf\xbf")); // U+FFFF (noncharacter) - EXPECT_EQ(R(conversionOK, 0xffff), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xffff), + "\xef\xbf\xbf")); // U+1FFFFF (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf7\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf7\xbf\xbf\xbf")); // U+3FFFFFF (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfb\xbf\xbf\xbf\xbf")); // U+7FFFFFFF (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfd\xbf\xbf\xbf\xbf\xbf")); // // Other boundary conditions // // U+D7FF (unassigned) - EXPECT_EQ(R(conversionOK, 0xd7ff), - ConvertUTF8ToUnicodeScalarsLenient("\xed\x9f\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff), + "\xed\x9f\xbf")); // U+E000 (private use) - EXPECT_EQ(R(conversionOK, 0xe000), - ConvertUTF8ToUnicodeScalarsLenient("\xee\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xe000), + "\xee\x80\x80")); // U+FFFD REPLACEMENT CHARACTER - EXPECT_EQ(R(conversionOK, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbd")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfffd), + "\xef\xbf\xbd")); // U+10FFFF (noncharacter) - EXPECT_EQ(R(conversionOK, 0x10ffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff), + "\xf4\x8f\xbf\xbf")); // U+110000 (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\x90\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf4\x90\x80\x80")); // // Unexpected continuation bytes @@ -232,407 +332,570 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { // A sequence of unexpected continuation bytes that don't follow a first // byte, every byte is a maximal subpart. - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xbf\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf\x80\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf\x82\xbf\xaa")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xaa\xb0\xbb\xbf\xaa\xa0")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xaa\xb0\xbb\xbf\xaa\xa0\x8f")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\x80\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xbf\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\x80\xbf\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\x80\xbf\x80\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\x80\xbf\x82\xbf\xaa")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xaa\xb0\xbb\xbf\xaa\xa0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xaa\xb0\xbb\xbf\xaa\xa0\x8f")); // All continuation bytes (0x80--0xbf). - EXPECT_EQ(R(sourceIllegal, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient( - "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf")); // // Lonely start bytes // // Start bytes of 2-byte sequences (0xc0--0xdf). - EXPECT_EQ(R(sourceIllegal, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient( - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf")); - - EXPECT_EQ(R(sourceIllegal, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020), - ConvertUTF8ToUnicodeScalarsLenient( - "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20" - "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20" - "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20" - "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf")); + + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020), + "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20" + "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20" + "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20" + "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20")); // Start bytes of 3-byte sequences (0xe0--0xef). - EXPECT_EQ(R(sourceIllegal, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient( - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef")); - - EXPECT_EQ(R(sourceIllegal, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020), - ConvertUTF8ToUnicodeScalarsLenient( - "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20" - "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef")); + + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020), + "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20" + "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20")); // Start bytes of 4-byte sequences (0xf0--0xf7). - EXPECT_EQ(R(sourceIllegal, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7")); - - EXPECT_EQ(R(sourceIllegal, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020), - ConvertUTF8ToUnicodeScalarsLenient( - "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7")); + + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020), + "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20")); // Start bytes of 5-byte sequences (0xf8--0xfb). - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\xf9\xfa\xfb")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf8\xf9\xfa\xfb")); - EXPECT_EQ(R(sourceIllegal, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\x20\xf9\x20\xfa\x20\xfb\x20")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020), + "\xf8\x20\xf9\x20\xfa\x20\xfb\x20")); // Start bytes of 6-byte sequences (0xfc--0xfd). - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\xfd")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xfc\xfd")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0x0020, 0xfffd, 0x0020), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x20\xfd\x20")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020), + "\xfc\x20\xfd\x20")); // // Other bytes (0xc0--0xc1, 0xfe--0xff). // - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xc0")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xc1")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfe")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xff")); - - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xc0\xc1\xfe\xff")); - - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfe\xfe\xff\xff")); - - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfe\x80\x80\x80\x80\x80")); - - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xff\x80\x80\x80\x80\x80")); - - EXPECT_EQ(R(sourceIllegal, - 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020), - ConvertUTF8ToUnicodeScalarsLenient("\xc0\x20\xc1\x20\xfe\x20\xff\x20")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff")); + + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xc0\xc1\xfe\xff")); + + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfe\xfe\xff\xff")); + + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfe\x80\x80\x80\x80\x80")); + + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xff\x80\x80\x80\x80\x80")); + + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020), + "\xc0\x20\xc1\x20\xfe\x20\xff\x20")); // // Sequences with one continuation byte missing // - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xc2")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xdf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xe0\xa0")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xe0\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xe1\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xec\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\x9f")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xee\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x90\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf1\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xe0\xa0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xe0\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xe1\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xec\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xed\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xed\x9f")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xee\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xef\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf0\x90\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf0\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf1\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf3\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf4\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf4\x8f\xbf")); // Overlong sequences with one trailing byte missing. - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xc0")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xc1")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xe0\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xe0\x9f")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xc0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xc1")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xe0\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xe0\x9f")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf0\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf0\x8f\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf8\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfc\x80\x80\x80\x80")); // Sequences that represent surrogates with one trailing byte missing. // High surrogates - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xac")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xed\xa0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xed\xac")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xed\xaf")); // Low surrogates - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xb0")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xb4")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xed\xb0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xed\xb4")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xed\xbf")); // Ill-formed 4-byte sequences. // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx // U+1100xx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\x90\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf4\x90\x80")); // U+13FBxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\xbf\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf5\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf6\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf7\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf4\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf5\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf6\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf7\x80\x80")); // U+1FFBxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf7\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf7\xbf\xbf")); // Ill-formed 5-byte sequences. // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx // U+2000xx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\xbf\xbf\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf9\x80\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfa\x80\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfb\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf8\x88\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf8\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf9\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfa\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfb\x80\x80\x80")); // U+3FFFFxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfb\xbf\xbf\xbf")); // Ill-formed 6-byte sequences. // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx // U+40000xx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf\xbf\xbf\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfc\x84\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfc\xbf\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfd\x80\x80\x80\x80")); // U+7FFFFFxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfd\xbf\xbf\xbf\xbf")); // // Sequences with two continuation bytes missing // - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x90")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf1\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf0\x90")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf0\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf1\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf3\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf4\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), + "\xf4\x8f")); // Overlong sequences with two trailing byte missing. - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xe0")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf0\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf0\x8f")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf8\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfc\x80\x80\x80")); // Sequences that represent surrogates with two trailing bytes missing. - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed")); // Ill-formed 4-byte sequences. // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx // U+110yxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\x90")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf4\x90")); // U+13Fyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf5\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf6\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf7\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf4\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf5\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf6\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf7\x80")); // U+1FFyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf7\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf7\xbf")); // Ill-formed 5-byte sequences. // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx // U+200yxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\xbf\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf9\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfa\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfb\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf8\x88\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf8\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), + "\xf9\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), + "\xfa\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), + "\xfb\x80\x80")); // U+3FFFyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), + "\xfb\xbf\xbf")); // Ill-formed 6-byte sequences. // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx // U+4000yxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf\xbf\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfc\x84\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfc\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfd\x80\x80\x80")); // U+7FFFFyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfd\xbf\xbf\xbf")); // // Sequences with three continuation bytes missing // - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf1")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf2")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf3")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf4")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4")); // Broken overlong sequences. - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf8\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), + "\xfc\x80\x80")); // Ill-formed 4-byte sequences. // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx // U+14yyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf5")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf6")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6")); // U+1Cyyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf7")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7")); // Ill-formed 5-byte sequences. // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx // U+20yyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf9\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfa\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfb\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf8\x88")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf8\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xf9\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xfa\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xfb\x80")); // U+3FCyyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xfb\xbf")); // Ill-formed 6-byte sequences. // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx // U+400yyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), + "\xfc\x84\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), + "\xfc\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), + "\xfd\x80\x80")); // U+7FFCyyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), + "\xfd\xbf\xbf")); // // Sequences with four continuation bytes missing @@ -641,36 +904,41 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { // Ill-formed 5-byte sequences. // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx // U+uzyyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf9")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfa")); - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfb")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb")); // U+3zyyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfb")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb")); // Broken overlong sequences. - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xfc\x80")); // Ill-formed 6-byte sequences. // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx // U+uzzyyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xfc\x84")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xfc\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xfd\x80")); // U+7Fzzyyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xfd\xbf")); // // Sequences with five continuation bytes missing @@ -679,89 +947,124 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { // Ill-formed 6-byte sequences. // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx // U+uzzyyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc")); // U+uuzzyyxx (invalid) - EXPECT_EQ(R(sourceIllegal, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfd")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd")); // // Consecutive sequences with trailing bytes missing // - EXPECT_EQ(R(sourceIllegal, - 0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, /**/ - 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, /**/ - 0xfffd, 0xfffd, 0xfffd, 0xfffd, - 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient( - "\xc0" "\xe0\x80" "\xf0\x80\x80" - "\xf8\x80\x80\x80" - "\xfc\x80\x80\x80\x80" - "\xdf" "\xef\xbf" "\xf7\xbf\xbf" - "\xfb\xbf\xbf\xbf" - "\xfd\xbf\xbf\xbf\xbf")); - + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xc0" "\xe0\x80" "\xf0\x80\x80" + "\xf8\x80\x80\x80" + "\xfc\x80\x80\x80\x80" + "\xdf" "\xef\xbf" "\xf7\xbf\xbf" + "\xfb\xbf\xbf\xbf" + "\xfd\xbf\xbf\xbf\xbf")); // // Overlong UTF-8 sequences // // U+002F SOLIDUS - EXPECT_EQ(R(conversionOK, 0x002f), - ConvertUTF8ToUnicodeScalarsLenient("\x2f")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f")); // Overlong sequences of the above. - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xc0\xaf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xe0\x80\xaf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80\x80\xaf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80\x80\xaf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80\x80\xaf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xc0\xaf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xe0\x80\xaf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf0\x80\x80\xaf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf8\x80\x80\x80\xaf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfc\x80\x80\x80\x80\xaf")); // U+0000 NULL - EXPECT_EQ(R(conversionOK, 0x0000), - ConvertUTF8ToUnicodeScalarsLenient(StringRef("\x00", 1))); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x0000), + StringRef("\x00", 1))); // Overlong sequences of the above. - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xc0\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xe0\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xc0\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xe0\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf0\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf8\x80\x80\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfc\x80\x80\x80\x80\x80")); // Other overlong sequences. - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xc0\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xc1\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xc1\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xe0\x9f\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xbf\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f\x80\x80")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f\xbf\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xf8\x87\xbf\xbf\xbf")); - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xfc\x83\xbf\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xc0\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xc1\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), + "\xc1\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xe0\x9f\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xed\xa0\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xed\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf0\x8f\x80\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf0\x8f\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xf8\x87\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xfc\x83\xbf\xbf\xbf\xbf")); // // Isolated surrogates @@ -780,68 +1083,98 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { // High surrogates // U+D800 - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xed\xa0\x80")); // U+DB40 - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xed\xac\xa0")); // U+DBFF - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xed\xaf\xbf")); // Low surrogates // U+DC00 - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xb0\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xed\xb0\x80")); // U+DD00 - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xb4\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xed\xb4\x80")); // U+DFFF - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd), + "\xed\xbf\xbf")); // Surrogate pairs // U+D800 U+DC00 - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80\xed\xb0\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xed\xa0\x80\xed\xb0\x80")); // U+D800 U+DD00 - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80\xed\xb4\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xed\xa0\x80\xed\xb4\x80")); // U+D800 U+DFFF - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80\xed\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xed\xa0\x80\xed\xbf\xbf")); // U+DB40 U+DC00 - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0\xed\xb0\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xed\xac\xa0\xed\xb0\x80")); // U+DB40 U+DD00 - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0\xed\xb4\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xed\xac\xa0\xed\xb4\x80")); // U+DB40 U+DFFF - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0\xed\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xed\xac\xa0\xed\xbf\xbf")); // U+DBFF U+DC00 - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf\xed\xb0\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xed\xaf\xbf\xed\xb0\x80")); // U+DBFF U+DD00 - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf\xed\xb4\x80")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xed\xaf\xbf\xed\xb4\x80")); // U+DBFF U+DFFF - EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), - ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf\xed\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceIllegal) + .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), + "\xed\xaf\xbf\xed\xbf\xbf")); // // Noncharacters @@ -855,397 +1188,477 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { // and the values U+FDD0..U+FDEF. // U+FFFE - EXPECT_EQ(R(conversionOK, 0xfffe), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfffe), + "\xef\xbf\xbe")); // U+FFFF - EXPECT_EQ(R(conversionOK, 0xffff), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xffff), + "\xef\xbf\xbf")); // U+1FFFE - EXPECT_EQ(R(conversionOK, 0x1fffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x9f\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe), + "\xf0\x9f\xbf\xbe")); // U+1FFFF - EXPECT_EQ(R(conversionOK, 0x1ffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\x9f\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff), + "\xf0\x9f\xbf\xbf")); // U+2FFFE - EXPECT_EQ(R(conversionOK, 0x2fffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\xaf\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe), + "\xf0\xaf\xbf\xbe")); // U+2FFFF - EXPECT_EQ(R(conversionOK, 0x2ffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\xaf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff), + "\xf0\xaf\xbf\xbf")); // U+3FFFE - EXPECT_EQ(R(conversionOK, 0x3fffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe), + "\xf0\xbf\xbf\xbe")); // U+3FFFF - EXPECT_EQ(R(conversionOK, 0x3ffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff), + "\xf0\xbf\xbf\xbf")); // U+4FFFE - EXPECT_EQ(R(conversionOK, 0x4fffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf1\x8f\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe), + "\xf1\x8f\xbf\xbe")); // U+4FFFF - EXPECT_EQ(R(conversionOK, 0x4ffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf1\x8f\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff), + "\xf1\x8f\xbf\xbf")); // U+5FFFE - EXPECT_EQ(R(conversionOK, 0x5fffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf1\x9f\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe), + "\xf1\x9f\xbf\xbe")); // U+5FFFF - EXPECT_EQ(R(conversionOK, 0x5ffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf1\x9f\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff), + "\xf1\x9f\xbf\xbf")); // U+6FFFE - EXPECT_EQ(R(conversionOK, 0x6fffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf1\xaf\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe), + "\xf1\xaf\xbf\xbe")); // U+6FFFF - EXPECT_EQ(R(conversionOK, 0x6ffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf1\xaf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff), + "\xf1\xaf\xbf\xbf")); // U+7FFFE - EXPECT_EQ(R(conversionOK, 0x7fffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf1\xbf\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe), + "\xf1\xbf\xbf\xbe")); // U+7FFFF - EXPECT_EQ(R(conversionOK, 0x7ffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf1\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff), + "\xf1\xbf\xbf\xbf")); // U+8FFFE - EXPECT_EQ(R(conversionOK, 0x8fffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf2\x8f\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe), + "\xf2\x8f\xbf\xbe")); // U+8FFFF - EXPECT_EQ(R(conversionOK, 0x8ffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf2\x8f\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff), + "\xf2\x8f\xbf\xbf")); // U+9FFFE - EXPECT_EQ(R(conversionOK, 0x9fffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf2\x9f\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe), + "\xf2\x9f\xbf\xbe")); // U+9FFFF - EXPECT_EQ(R(conversionOK, 0x9ffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf2\x9f\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff), + "\xf2\x9f\xbf\xbf")); // U+AFFFE - EXPECT_EQ(R(conversionOK, 0xafffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf2\xaf\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xafffe), + "\xf2\xaf\xbf\xbe")); // U+AFFFF - EXPECT_EQ(R(conversionOK, 0xaffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf2\xaf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xaffff), + "\xf2\xaf\xbf\xbf")); // U+BFFFE - EXPECT_EQ(R(conversionOK, 0xbfffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf2\xbf\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe), + "\xf2\xbf\xbf\xbe")); // U+BFFFF - EXPECT_EQ(R(conversionOK, 0xbffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf2\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xbffff), + "\xf2\xbf\xbf\xbf")); // U+CFFFE - EXPECT_EQ(R(conversionOK, 0xcfffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf3\x8f\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe), + "\xf3\x8f\xbf\xbe")); // U+CFFFF - EXPECT_EQ(R(conversionOK, 0xcfffF), - ConvertUTF8ToUnicodeScalarsLenient("\xf3\x8f\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF), + "\xf3\x8f\xbf\xbf")); // U+DFFFE - EXPECT_EQ(R(conversionOK, 0xdfffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf3\x9f\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe), + "\xf3\x9f\xbf\xbe")); // U+DFFFF - EXPECT_EQ(R(conversionOK, 0xdffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf3\x9f\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xdffff), + "\xf3\x9f\xbf\xbf")); // U+EFFFE - EXPECT_EQ(R(conversionOK, 0xefffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf3\xaf\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xefffe), + "\xf3\xaf\xbf\xbe")); // U+EFFFF - EXPECT_EQ(R(conversionOK, 0xeffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf3\xaf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xeffff), + "\xf3\xaf\xbf\xbf")); // U+FFFFE - EXPECT_EQ(R(conversionOK, 0xffffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xffffe), + "\xf3\xbf\xbf\xbe")); // U+FFFFF - EXPECT_EQ(R(conversionOK, 0xfffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfffff), + "\xf3\xbf\xbf\xbf")); // U+10FFFE - EXPECT_EQ(R(conversionOK, 0x10fffe), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe), + "\xf4\x8f\xbf\xbe")); // U+10FFFF - EXPECT_EQ(R(conversionOK, 0x10ffff), - ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf\xbf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff), + "\xf4\x8f\xbf\xbf")); // U+FDD0 - EXPECT_EQ(R(conversionOK, 0xfdd0), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x90")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0), + "\xef\xb7\x90")); // U+FDD1 - EXPECT_EQ(R(conversionOK, 0xfdd1), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x91")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1), + "\xef\xb7\x91")); // U+FDD2 - EXPECT_EQ(R(conversionOK, 0xfdd2), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x92")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2), + "\xef\xb7\x92")); // U+FDD3 - EXPECT_EQ(R(conversionOK, 0xfdd3), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x93")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3), + "\xef\xb7\x93")); // U+FDD4 - EXPECT_EQ(R(conversionOK, 0xfdd4), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x94")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4), + "\xef\xb7\x94")); // U+FDD5 - EXPECT_EQ(R(conversionOK, 0xfdd5), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x95")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5), + "\xef\xb7\x95")); // U+FDD6 - EXPECT_EQ(R(conversionOK, 0xfdd6), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x96")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6), + "\xef\xb7\x96")); // U+FDD7 - EXPECT_EQ(R(conversionOK, 0xfdd7), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x97")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7), + "\xef\xb7\x97")); // U+FDD8 - EXPECT_EQ(R(conversionOK, 0xfdd8), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x98")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8), + "\xef\xb7\x98")); // U+FDD9 - EXPECT_EQ(R(conversionOK, 0xfdd9), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x99")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9), + "\xef\xb7\x99")); // U+FDDA - EXPECT_EQ(R(conversionOK, 0xfdda), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9a")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdda), + "\xef\xb7\x9a")); // U+FDDB - EXPECT_EQ(R(conversionOK, 0xfddb), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9b")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfddb), + "\xef\xb7\x9b")); // U+FDDC - EXPECT_EQ(R(conversionOK, 0xfddc), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9c")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfddc), + "\xef\xb7\x9c")); // U+FDDD - EXPECT_EQ(R(conversionOK, 0xfddd), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9d")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfddd), + "\xef\xb7\x9d")); // U+FDDE - EXPECT_EQ(R(conversionOK, 0xfdde), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9e")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdde), + "\xef\xb7\x9e")); // U+FDDF - EXPECT_EQ(R(conversionOK, 0xfddf), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9f")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfddf), + "\xef\xb7\x9f")); // U+FDE0 - EXPECT_EQ(R(conversionOK, 0xfde0), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfde0), + "\xef\xb7\xa0")); // U+FDE1 - EXPECT_EQ(R(conversionOK, 0xfde1), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa1")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfde1), + "\xef\xb7\xa1")); // U+FDE2 - EXPECT_EQ(R(conversionOK, 0xfde2), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa2")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfde2), + "\xef\xb7\xa2")); // U+FDE3 - EXPECT_EQ(R(conversionOK, 0xfde3), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa3")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfde3), + "\xef\xb7\xa3")); // U+FDE4 - EXPECT_EQ(R(conversionOK, 0xfde4), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa4")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfde4), + "\xef\xb7\xa4")); // U+FDE5 - EXPECT_EQ(R(conversionOK, 0xfde5), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa5")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfde5), + "\xef\xb7\xa5")); // U+FDE6 - EXPECT_EQ(R(conversionOK, 0xfde6), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa6")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfde6), + "\xef\xb7\xa6")); // U+FDE7 - EXPECT_EQ(R(conversionOK, 0xfde7), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa7")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfde7), + "\xef\xb7\xa7")); // U+FDE8 - EXPECT_EQ(R(conversionOK, 0xfde8), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa8")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfde8), + "\xef\xb7\xa8")); // U+FDE9 - EXPECT_EQ(R(conversionOK, 0xfde9), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa9")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfde9), + "\xef\xb7\xa9")); // U+FDEA - EXPECT_EQ(R(conversionOK, 0xfdea), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xaa")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdea), + "\xef\xb7\xaa")); // U+FDEB - EXPECT_EQ(R(conversionOK, 0xfdeb), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xab")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb), + "\xef\xb7\xab")); // U+FDEC - EXPECT_EQ(R(conversionOK, 0xfdec), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xac")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdec), + "\xef\xb7\xac")); // U+FDED - EXPECT_EQ(R(conversionOK, 0xfded), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xad")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfded), + "\xef\xb7\xad")); // U+FDEE - EXPECT_EQ(R(conversionOK, 0xfdee), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xae")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdee), + "\xef\xb7\xae")); // U+FDEF - EXPECT_EQ(R(conversionOK, 0xfdef), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xaf")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdef), + "\xef\xb7\xaf")); // U+FDF0 - EXPECT_EQ(R(conversionOK, 0xfdf0), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb0")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0), + "\xef\xb7\xb0")); // U+FDF1 - EXPECT_EQ(R(conversionOK, 0xfdf1), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb1")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1), + "\xef\xb7\xb1")); // U+FDF2 - EXPECT_EQ(R(conversionOK, 0xfdf2), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb2")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2), + "\xef\xb7\xb2")); // U+FDF3 - EXPECT_EQ(R(conversionOK, 0xfdf3), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb3")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3), + "\xef\xb7\xb3")); // U+FDF4 - EXPECT_EQ(R(conversionOK, 0xfdf4), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb4")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4), + "\xef\xb7\xb4")); // U+FDF5 - EXPECT_EQ(R(conversionOK, 0xfdf5), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb5")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5), + "\xef\xb7\xb5")); // U+FDF6 - EXPECT_EQ(R(conversionOK, 0xfdf6), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb6")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6), + "\xef\xb7\xb6")); // U+FDF7 - EXPECT_EQ(R(conversionOK, 0xfdf7), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb7")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7), + "\xef\xb7\xb7")); // U+FDF8 - EXPECT_EQ(R(conversionOK, 0xfdf8), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb8")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8), + "\xef\xb7\xb8")); // U+FDF9 - EXPECT_EQ(R(conversionOK, 0xfdf9), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb9")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9), + "\xef\xb7\xb9")); // U+FDFA - EXPECT_EQ(R(conversionOK, 0xfdfa), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xba")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa), + "\xef\xb7\xba")); // U+FDFB - EXPECT_EQ(R(conversionOK, 0xfdfb), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbb")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb), + "\xef\xb7\xbb")); // U+FDFC - EXPECT_EQ(R(conversionOK, 0xfdfc), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbc")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc), + "\xef\xb7\xbc")); // U+FDFD - EXPECT_EQ(R(conversionOK, 0xfdfd), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbd")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd), + "\xef\xb7\xbd")); // U+FDFE - EXPECT_EQ(R(conversionOK, 0xfdfe), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbe")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe), + "\xef\xb7\xbe")); // U+FDFF - EXPECT_EQ(R(conversionOK, 0xfdff), - ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbf")); -} - -std::pair> -ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) { - const UTF8 *SourceStart = reinterpret_cast(S.data()); - - const UTF8 *SourceNext = SourceStart; - std::vector Decoded(S.size(), 0); - UTF32 *TargetStart = Decoded.data(); - - auto Result = ConvertUTF8toUTF32Partial( - &SourceNext, SourceStart + S.size(), &TargetStart, - Decoded.data() + Decoded.size(), lenientConversion); - - Decoded.resize(TargetStart - Decoded.data()); - - return std::make_pair(Result, Decoded); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0xfdff), + "\xef\xb7\xbf")); } TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) { // U+0041 LATIN CAPITAL LETTER A - EXPECT_EQ(R(conversionOK, 0x0041), - ConvertUTF8ToUnicodeScalarsPartialLenient("\x41")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(conversionOK).withScalars(0x0041), + "\x41", true)); // // Sequences with one continuation byte missing // - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xc2")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xdf")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xe0\xa0")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xe0\xbf")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xe1\x80")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xec\xbf")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xed\x80")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xed\x9f")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xee\x80")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xef\xbf")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xf0\x90\x80")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xf0\xbf\xbf")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xf1\x80\x80")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xf3\xbf\xbf")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xf4\x80\x80")); - EXPECT_EQ(R0(sourceExhausted), - ConvertUTF8ToUnicodeScalarsPartialLenient("\xf4\x8f\xbf")); - - EXPECT_EQ(R(sourceExhausted, 0x0041), - ConvertUTF8ToUnicodeScalarsPartialLenient("\x41\xc2")); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xc2", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xdf", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xe0\xa0", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xe0\xbf", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xe1\x80", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xec\xbf", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xed\x80", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xed\x9f", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xee\x80", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xef\xbf", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xf0\x90\x80", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xf0\xbf\xbf", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xf1\x80\x80", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xf3\xbf\xbf", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xf4\x80\x80", true)); + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted), + "\xf4\x8f\xbf", true)); + + EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( + ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041), + "\x41\xc2", true)); } -#undef R0 -#undef R - -- cgit v1.2.3