summaryrefslogtreecommitdiff
path: root/unittests
diff options
context:
space:
mode:
authorAlexander Kornienko <alexfh@google.com>2013-08-07 00:07:07 +0000
committerAlexander Kornienko <alexfh@google.com>2013-08-07 00:07:07 +0000
commitece0bec0c824e71f062656ed5c727baf2a7bfc90 (patch)
treea60c6a3b376085470d329dca710518c256eb7f4e /unittests
parentf65993999079bcde7b04373eca9ffa2be0e7ffe9 (diff)
downloadllvm-ece0bec0c824e71f062656ed5c727baf2a7bfc90.tar.gz
llvm-ece0bec0c824e71f062656ed5c727baf2a7bfc90.tar.bz2
llvm-ece0bec0c824e71f062656ed5c727baf2a7bfc90.tar.xz
Implemented llvm::sys::locale::columnWidth and isPrint for the case of generic UTF8-capable terminal.
Summary: This is a second attempt to get this right. After reading the Unicode Standard I came up with the code that uses definitions of "printable" and "column width" more suitable for terminal output (i.e. fixed-width fonts and special treatment of many control characters). The implementation here can probably be used for Windows and MacOS if someone can test it properly. The patch addresses PR14910. Reviewers: jordan_rose, gribozavr CC: llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D1253 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187837 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'unittests')
-rw-r--r--unittests/Support/CMakeLists.txt1
-rw-r--r--unittests/Support/LocaleTest.cpp86
2 files changed, 87 insertions, 0 deletions
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index 2cbe730ef7..5f26d5d745 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_unittest(SupportTests
FileOutputBufferTest.cpp
IntegersSubsetTest.cpp
LeakDetectorTest.cpp
+ LocaleTest.cpp
ManagedStatic.cpp
MathExtrasTest.cpp
MD5Test.cpp
diff --git a/unittests/Support/LocaleTest.cpp b/unittests/Support/LocaleTest.cpp
new file mode 100644
index 0000000000..d48eb559f0
--- /dev/null
+++ b/unittests/Support/LocaleTest.cpp
@@ -0,0 +1,86 @@
+//===- unittests/Support/LocaleTest.cpp - Locale.h tests ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Locale.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace sys {
+namespace locale {
+namespace {
+
+TEST(Locale, columnWidth) {
+ EXPECT_EQ(0, columnWidth(""));
+ EXPECT_EQ(1, columnWidth(" "));
+ EXPECT_EQ(1, columnWidth("a"));
+ EXPECT_EQ(1, columnWidth("~"));
+
+ EXPECT_EQ(6, columnWidth("abcdef"));
+
+ EXPECT_EQ(-1, columnWidth("\x01"));
+ EXPECT_EQ(-1, columnWidth("aaaaaaaaaa\x01"));
+ EXPECT_EQ(-1, columnWidth("\342\200\213")); // 200B ZERO WIDTH SPACE
+
+ EXPECT_EQ(0, columnWidth("\314\200")); // 0300 COMBINING GRAVE ACCENT
+ EXPECT_EQ(1, columnWidth("\340\270\201")); // 0E01 THAI CHARACTER KO KAI
+ EXPECT_EQ(2, columnWidth("\344\270\200")); // CJK UNIFIED IDEOGRAPH-4E00
+
+ EXPECT_EQ(4, columnWidth("\344\270\200\344\270\200"));
+ EXPECT_EQ(3, columnWidth("q\344\270\200"));
+ EXPECT_EQ(3, columnWidth("\314\200\340\270\201\344\270\200"));
+
+ // Invalid UTF-8 strings, columnWidth should error out.
+ EXPECT_EQ(-2, columnWidth("\344"));
+ EXPECT_EQ(-2, columnWidth("\344\270"));
+ EXPECT_EQ(-2, columnWidth("\344\270\033"));
+ EXPECT_EQ(-2, columnWidth("\344\270\300"));
+ EXPECT_EQ(-2, columnWidth("\377\366\355"));
+
+ EXPECT_EQ(-2, columnWidth("qwer\344"));
+ EXPECT_EQ(-2, columnWidth("qwer\344\270"));
+ EXPECT_EQ(-2, columnWidth("qwer\344\270\033"));
+ EXPECT_EQ(-2, columnWidth("qwer\344\270\300"));
+ EXPECT_EQ(-2, columnWidth("qwer\377\366\355"));
+
+ // UTF-8 sequences longer than 4 bytes correspond to unallocated Unicode
+ // characters.
+ EXPECT_EQ(-2, columnWidth("\370\200\200\200\200")); // U+200000
+ EXPECT_EQ(-2, columnWidth("\374\200\200\200\200\200")); // U+4000000
+}
+
+TEST(Locale, isPrint) {
+ EXPECT_EQ(false, isPrint(0)); // <control-0000>-<control-001F>
+ EXPECT_EQ(false, isPrint(0x01));
+ EXPECT_EQ(false, isPrint(0x1F));
+ EXPECT_EQ(true, isPrint(' '));
+ EXPECT_EQ(true, isPrint('A'));
+ EXPECT_EQ(true, isPrint('~'));
+ EXPECT_EQ(false, isPrint(0x7F)); // <control-007F>..<control-009F>
+ EXPECT_EQ(false, isPrint(0x90));
+ EXPECT_EQ(false, isPrint(0x9F));
+
+ EXPECT_EQ(true, isPrint(0xAC));
+ EXPECT_EQ(false, isPrint(0xAD)); // SOFT HYPHEN
+ EXPECT_EQ(true, isPrint(0xAE));
+
+ EXPECT_EQ(true, isPrint(0x0377)); // GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
+ EXPECT_EQ(false, isPrint(0x0378)); // <reserved-0378>..<reserved-0379>
+
+ EXPECT_EQ(false, isPrint(0x0600)); // ARABIC NUMBER SIGN
+
+ EXPECT_EQ(false, isPrint(0x1FFFF)); // <reserved-1F774>..<noncharacter-1FFFF>
+ EXPECT_EQ(true, isPrint(0x20000)); // CJK UNIFIED IDEOGRAPH-20000
+
+ EXPECT_EQ(false, isPrint(0x10FFFF)); // noncharacter
+}
+
+} // namespace
+} // namespace locale
+} // namespace sys
+} // namespace llvm