Line data Source code
1 : // Copyright (c) 2013-2025 Made to Order Software Corp. All Rights Reserved
2 : //
3 : // https://snapwebsites.org/project/libutf8
4 : // contact@m2osw.com
5 : //
6 : // This program is free software: you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation, either version 3 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License
17 : // along with this program. If not, see <https://www.gnu.org/licenses/>.
18 :
19 : // libutf8
20 : //
21 : #include <libutf8/exception.h>
22 : #include <libutf8/libutf8.h>
23 :
24 :
25 : // unit test
26 : //
27 : #include "catch_main.h"
28 :
29 :
30 : // snapdev
31 : //
32 : #include <snapdev/hexadecimal_string.h>
33 :
34 :
35 : // C++
36 : //
37 : #include <cctype>
38 : #include <iostream>
39 : #include <iomanip>
40 :
41 :
42 : // last include
43 : //
44 : #include <snapdev/poison.h>
45 :
46 :
47 :
48 3 : CATCH_TEST_CASE("make_valid", "[strings][valid][u8]")
49 : {
50 3 : CATCH_START_SECTION("make_valid: test bad encoding (1 byte when 2 necessary)")
51 : {
52 1921 : for(char32_t two_bytes(0x80); two_bytes < 0x800; ++two_bytes)
53 : {
54 1920 : char const byte1(static_cast<char>((two_bytes >> 6) | 0xC0));
55 1920 : char32_t const vc1(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
56 1920 : char32_t const vc2(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
57 1920 : char32_t const fix_char(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
58 1920 : std::string invalid_string;
59 1920 : invalid_string += vc1;
60 1920 : invalid_string += byte1;
61 1920 : invalid_string += vc2;
62 1920 : std::string expected_string;
63 1920 : expected_string += vc1;
64 1920 : expected_string += fix_char;
65 1920 : expected_string += vc2;
66 1920 : CATCH_REQUIRE_FALSE(libutf8::make_u8string_valid(invalid_string, fix_char));
67 1920 : CATCH_REQUIRE(invalid_string == expected_string);
68 1920 : }
69 : }
70 3 : CATCH_END_SECTION()
71 :
72 3 : CATCH_START_SECTION("make_valid: test bad encoding (2 bytes when 3 necessary)")
73 : {
74 63489 : for(char32_t two_bytes(0x800); two_bytes < 0x10000; ++two_bytes)
75 : {
76 : // Note: this includes the UTF-16 surrogates which are also
77 : // considered invalid
78 : //
79 63488 : char const byte1(static_cast<char>((two_bytes >> 12) | 0xE0));
80 63488 : char const byte2(((two_bytes >> 6) & 0x3F) | 0x80);
81 63488 : char32_t const vc1(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
82 63488 : char32_t const vc2(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
83 63488 : char32_t const fix_char(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
84 63488 : std::string invalid_string;
85 63488 : invalid_string += vc1;
86 63488 : invalid_string += byte1;
87 63488 : invalid_string += byte2;
88 63488 : invalid_string += vc2;
89 63488 : std::string expected_string;
90 63488 : expected_string += vc1;
91 63488 : expected_string += fix_char;
92 63488 : expected_string += vc2;
93 63488 : CATCH_REQUIRE_FALSE(libutf8::make_u8string_valid(invalid_string, fix_char));
94 63488 : CATCH_REQUIRE(invalid_string == expected_string);
95 63488 : }
96 : }
97 3 : CATCH_END_SECTION()
98 :
99 3 : CATCH_START_SECTION("make_valid: test bad encoding (3 bytes when 4 necessary)")
100 : {
101 1048577 : for(char32_t two_bytes(0x10000); two_bytes < 0x110000; ++two_bytes)
102 : {
103 1048576 : char const byte1(static_cast<char>((two_bytes >> 18) | 0xF0));
104 1048576 : char const byte2(((two_bytes >> 12) & 0x3F) | 0x80);
105 1048576 : char const byte3(((two_bytes >> 6) & 0x3F) | 0x80);
106 1048576 : char32_t const vc1(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
107 1048576 : char32_t const vc2(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
108 1048576 : char32_t const fix_char(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
109 1048576 : std::string invalid_string;
110 1048576 : invalid_string += vc1;
111 1048576 : invalid_string += byte1;
112 1048576 : invalid_string += byte2;
113 1048576 : invalid_string += byte3;
114 1048576 : invalid_string += vc2;
115 1048576 : std::string expected_string;
116 1048576 : expected_string += vc1;
117 1048576 : expected_string += fix_char;
118 1048576 : expected_string += vc2;
119 1048576 : CATCH_REQUIRE_FALSE(libutf8::make_u8string_valid(invalid_string, fix_char));
120 1048576 : CATCH_REQUIRE(invalid_string == expected_string);
121 1048576 : }
122 : }
123 3 : CATCH_END_SECTION()
124 3 : }
125 :
126 :
127 :
128 : // vim: ts=4 sw=4 et
|