summaryrefslogtreecommitdiff
path: root/unittests/Support/ConvertUTFTest.cpp
blob: 13ea75b1573b5b7da28defa275778be5d735874f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
//===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "llvm/Support/ConvertUTF.h"
#include "gtest/gtest.h"
#include <string>

using namespace llvm;

TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
  // Src is the look of disapproval.
  static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
  ArrayRef<char> Ref(Src, sizeof(Src) - 1);
  std::string Result;
  bool Success = convertUTF16ToUTF8String(Ref, Result);
  EXPECT_TRUE(Success);
  std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
  EXPECT_EQ(Expected, Result);
}

TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
  // Src is the look of disapproval.
  static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
  ArrayRef<char> Ref(Src, sizeof(Src) - 1);
  std::string Result;
  bool Success = convertUTF16ToUTF8String(Ref, Result);
  EXPECT_TRUE(Success);
  std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
  EXPECT_EQ(Expected, Result);
}

TEST(ConvertUTFTest, OddLengthInput) {
  std::string Result;
  bool Success = convertUTF16ToUTF8String(ArrayRef<char>("xxxxx", 5), Result);
  EXPECT_FALSE(Success);
}

TEST(ConvertUTFTest, Empty) {
  std::string Result;
  bool Success = convertUTF16ToUTF8String(ArrayRef<char>(), Result);
  EXPECT_TRUE(Success);
  EXPECT_TRUE(Result.empty());
}

TEST(ConvertUTFTest, HasUTF16BOM) {
  bool HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xff\xfe", 2));
  EXPECT_TRUE(HasBOM);
  HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe\xff", 2));
  EXPECT_TRUE(HasBOM);
  HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe\xff ", 3));
  EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
  HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe\xff\x00asdf", 6));
  EXPECT_TRUE(HasBOM);

  HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>());
  EXPECT_FALSE(HasBOM);
  HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe", 1));
  EXPECT_FALSE(HasBOM);
}