Line data Source code
1 : // Copyright (c) 2013-2025 Made to Order Software Corp. All Rights Reserved
2 : //
3 : // https://snapwebsites.org/project/libutf8
4 : // contact@m2osw.com
5 : //
6 : // This program is free software: you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation, either version 3 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License
17 : // along with this program. If not, see <https://www.gnu.org/licenses/>.
18 :
19 : // libutf8
20 : //
21 : #include <libutf8/exception.h>
22 : #include <libutf8/libutf8.h>
23 :
24 :
25 : // unit test
26 : //
27 : #include "catch_main.h"
28 :
29 :
30 : // C++
31 : //
32 : #include <cctype>
33 : #include <iostream>
34 : #include <iomanip>
35 :
36 :
37 : // last include
38 : //
39 : #include <snapdev/poison.h>
40 :
41 :
42 :
43 1 : CATCH_TEST_CASE("string_length", "[strings][valid][length][u8][u16][u32]")
44 : {
45 1 : CATCH_START_SECTION("string_length: length of valid Unicode strings")
46 : {
47 101 : for(int idx(0); idx < 100; ++idx)
48 : {
49 100 : std::size_t const length(rand() % 100 + 1);
50 100 : std::u32string str32;
51 5330 : for(std::size_t j(0); j < length; ++j)
52 : {
53 5230 : char32_t const c(SNAP_CATCH2_NAMESPACE::random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_ZUNICODE));
54 5230 : str32 += c;
55 : }
56 100 : CATCH_REQUIRE(libutf8::is_valid_unicode(str32));
57 100 : CATCH_REQUIRE(str32.length() == length);
58 :
59 100 : std::string str8(libutf8::to_u8string(str32));
60 100 : CATCH_REQUIRE(libutf8::is_valid_utf8(str8));
61 100 : CATCH_REQUIRE(str8.length() >= length);
62 100 : CATCH_REQUIRE(libutf8::u8length(str8) == length);
63 :
64 100 : std::u16string str16(libutf8::to_u16string(str8));
65 100 : CATCH_REQUIRE(libutf8::is_valid_utf16(str16));
66 100 : CATCH_REQUIRE(str16.length() >= length);
67 100 : CATCH_REQUIRE(static_cast<std::size_t>(libutf8::u16length(str16)) == length);
68 100 : }
69 : }
70 1 : CATCH_END_SECTION()
71 1 : }
72 :
73 :
74 1 : CATCH_TEST_CASE("invalid_utf16_string_length", "[strings][invalid][length][u16]")
75 : {
76 1 : CATCH_START_SECTION("invalid_utf16_string_length: invalid UTF-16 returns -1 for length")
77 : {
78 101 : for(int idx(0); idx < 100; ++idx)
79 : {
80 100 : std::size_t const length(rand() % 30 + 5);
81 100 : char16_t bad_char(rand() & 0x03FF);
82 100 : std::size_t bad_pos(length / 2);
83 100 : switch(idx % 3)
84 : {
85 34 : case 0:
86 34 : bad_char += 0xDC00; // low without a high
87 34 : break;
88 :
89 33 : case 1:
90 33 : bad_char += 0xD800; // high not followed by a low
91 33 : break;
92 :
93 33 : case 2:
94 33 : bad_char += 0xD800; // high followed by u'\0'
95 33 : bad_pos = length - 1;
96 33 : break;
97 :
98 : }
99 200 : std::u16string str16;
100 2209 : for(std::size_t j(0); j < length; ++j)
101 : {
102 2109 : char32_t const wc(SNAP_CATCH2_NAMESPACE::random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_ZUNICODE));
103 2109 : str16 += libutf8::to_u16string(wc);
104 2109 : if(j == bad_pos)
105 : {
106 100 : str16 += bad_char;
107 : }
108 : }
109 :
110 100 : CATCH_REQUIRE_FALSE(libutf8::is_valid_utf16(str16));
111 100 : CATCH_REQUIRE(libutf8::u16length(str16) == -1);
112 : }
113 : }
114 1 : CATCH_END_SECTION()
115 1 : }
116 :
117 :
118 : // vim: ts=4 sw=4 et
|