Line data Source code
1 : // Copyright (c) 2013-2022 Made to Order Software Corporation
2 : //
3 : // https://snapwebsites.org/project/libutf8
4 : // contact@m2osw.com
5 : //
6 : // This program is free software; you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation; either version 2 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License along
17 : // with this program; if not, write to the Free Software Foundation, Inc.,
18 : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 :
20 : // libutf8
21 : //
22 : #include <libutf8/exception.h>
23 : #include <libutf8/libutf8.h>
24 :
25 :
26 : // unit test
27 : //
28 : #include "catch_main.h"
29 :
30 :
31 : // snapdev
32 : //
33 : #include <snapdev/hexadecimal_string.h>
34 :
35 :
36 : // C++
37 : //
38 : #include <cctype>
39 : #include <iostream>
40 : #include <iomanip>
41 :
42 :
43 : // last include
44 : //
45 : #include <snapdev/poison.h>
46 :
47 :
48 :
49 5 : CATCH_TEST_CASE("make_valid", "[strings][valid][u8]")
50 : {
51 6 : CATCH_START_SECTION("make_valid: test bad encoding (1 byte when 2 necessary)")
52 : {
53 1921 : for(char32_t two_bytes(0x80); two_bytes < 0x800; ++two_bytes)
54 : {
55 1920 : char const byte1(static_cast<char>((two_bytes >> 6) | 0xC0));
56 1920 : char32_t const vc1(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
57 1920 : char32_t const vc2(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
58 1920 : char32_t const fix_char(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
59 3840 : std::string invalid_string;
60 1920 : invalid_string += vc1;
61 1920 : invalid_string += byte1;
62 1920 : invalid_string += vc2;
63 3840 : std::string expected_string;
64 1920 : expected_string += vc1;
65 1920 : expected_string += fix_char;
66 1920 : expected_string += vc2;
67 1920 : CATCH_REQUIRE_FALSE(libutf8::make_u8string_valid(invalid_string, fix_char));
68 1920 : CATCH_REQUIRE(invalid_string == expected_string);
69 : }
70 : }
71 : CATCH_END_SECTION()
72 :
73 6 : CATCH_START_SECTION("make_valid: test bad encoding (2 bytes when 3 necessary)")
74 : {
75 63489 : for(char32_t two_bytes(0x800); two_bytes < 0x10000; ++two_bytes)
76 : {
77 : // Note: this includes the UTF-16 surrogates which are also
78 : // considered invalid
79 : //
80 63488 : char const byte1(static_cast<char>((two_bytes >> 12) | 0xE0));
81 63488 : char const byte2(((two_bytes >> 6) & 0x3F) | 0x80);
82 63488 : char32_t const vc1(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
83 63488 : char32_t const vc2(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
84 63488 : char32_t const fix_char(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
85 126976 : std::string invalid_string;
86 63488 : invalid_string += vc1;
87 63488 : invalid_string += byte1;
88 63488 : invalid_string += byte2;
89 63488 : invalid_string += vc2;
90 126976 : std::string expected_string;
91 63488 : expected_string += vc1;
92 63488 : expected_string += fix_char;
93 63488 : expected_string += vc2;
94 63488 : CATCH_REQUIRE_FALSE(libutf8::make_u8string_valid(invalid_string, fix_char));
95 63488 : CATCH_REQUIRE(invalid_string == expected_string);
96 : }
97 : }
98 : CATCH_END_SECTION()
99 :
100 6 : CATCH_START_SECTION("make_valid: test bad encoding (3 bytes when 4 necessary)")
101 : {
102 1048577 : for(char32_t two_bytes(0x10000); two_bytes < 0x110000; ++two_bytes)
103 : {
104 1048576 : char const byte1(static_cast<char>((two_bytes >> 18) | 0xF0));
105 1048576 : char const byte2(((two_bytes >> 12) & 0x3F) | 0x80);
106 1048576 : char const byte3(((two_bytes >> 6) & 0x3F) | 0x80);
107 1048576 : char32_t const vc1(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
108 1048576 : char32_t const vc2(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
109 1048576 : char32_t const fix_char(random_char(SNAP_CATCH2_NAMESPACE::character_t::CHARACTER_UNICODE));
110 2097152 : std::string invalid_string;
111 1048576 : invalid_string += vc1;
112 1048576 : invalid_string += byte1;
113 1048576 : invalid_string += byte2;
114 1048576 : invalid_string += byte3;
115 1048576 : invalid_string += vc2;
116 2097152 : std::string expected_string;
117 1048576 : expected_string += vc1;
118 1048576 : expected_string += fix_char;
119 1048576 : expected_string += vc2;
120 1048576 : CATCH_REQUIRE_FALSE(libutf8::make_u8string_valid(invalid_string, fix_char));
121 1048576 : CATCH_REQUIRE(invalid_string == expected_string);
122 : }
123 : }
124 : CATCH_END_SECTION()
125 9 : }
126 :
127 :
128 :
129 : // vim: ts=4 sw=4 et
|