Line data Source code
1 : // Copyright (c) 2021-2025 Made to Order Software Corp. All Rights Reserved
2 : //
3 : // https://snapwebsites.org/project/libutf8
4 : // contact@m2osw.com
5 : //
6 : // This program is free software: you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation, either version 3 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License
17 : // along with this program. If not, see <https://www.gnu.org/licenses/>.
18 :
19 : // libutf8
20 : //
21 : #include <libutf8/base.h>
22 : #include <libutf8/exception.h>
23 :
24 :
25 : // unit test
26 : //
27 : #include "catch_main.h"
28 :
29 :
30 : // C++
31 : //
32 : #include <cctype>
33 : #include <iostream>
34 :
35 :
36 : // last include
37 : //
38 : #include <snapdev/poison.h>
39 :
40 :
41 :
42 6 : CATCH_TEST_CASE("character_conversions", "[characters]")
43 : {
44 6 : CATCH_START_SECTION("character_conversions: Verify minimum buffer length for MBS conversions")
45 : {
46 1 : CATCH_REQUIRE(libutf8::MBS_MIN_BUFFER_LENGTH >= 5);
47 : }
48 6 : CATCH_END_SECTION()
49 :
50 6 : CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x00 and 0x7F inclusive")
51 : {
52 129 : for(char32_t wc(0); wc < 0x000080; ++wc)
53 : {
54 128 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
55 128 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
56 128 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 1);
57 :
58 128 : CATCH_REQUIRE(static_cast<char32_t>(buf[0]) == wc);
59 128 : CATCH_REQUIRE(buf[1] == '\0');
60 :
61 128 : char32_t back(rand());
62 128 : char const * s(buf);
63 128 : size_t len(1);
64 128 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 1);
65 128 : CATCH_REQUIRE(back == wc);
66 : }
67 : }
68 6 : CATCH_END_SECTION()
69 :
70 6 : CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x80 and 0x7FF inclusive")
71 : {
72 1921 : for(char32_t wc(0x000080); wc < 0x000800; ++wc)
73 : {
74 1920 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
75 1920 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
76 1920 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
77 1920 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
78 :
79 1920 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
80 1920 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
81 1920 : CATCH_REQUIRE(found == wc);
82 1920 : CATCH_REQUIRE(buf[2] == '\0');
83 :
84 1920 : char32_t back(rand());
85 1920 : char const * s(buf);
86 1920 : size_t len(2);
87 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 2);
88 1920 : CATCH_REQUIRE(back == wc);
89 : }
90 : }
91 6 : CATCH_END_SECTION()
92 :
93 6 : CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x800 and 0xFFFF inclusive")
94 : {
95 61442 : for(char32_t wc(0x000800); wc < 0x010000; ++wc)
96 : {
97 61441 : if(wc >= 0xD800 && wc <= 0xDFFF)
98 : {
99 : // skip UTF-16 surrogates
100 : //
101 1 : wc = 0xDFFF;
102 1 : continue;
103 : }
104 :
105 61440 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
106 61440 : if(rand() % 10 == 0)
107 : {
108 6213 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
109 6213 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
110 6213 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
111 : }
112 61440 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
113 :
114 61440 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
115 61440 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 6)
116 61440 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 0));
117 61440 : CATCH_REQUIRE(found == wc);
118 61440 : CATCH_REQUIRE(buf[3] == '\0');
119 :
120 61440 : char32_t back(rand());
121 61440 : char const * s(buf);
122 61440 : size_t len(3);
123 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 3);
124 61440 : CATCH_REQUIRE(back == wc);
125 : }
126 : }
127 6 : CATCH_END_SECTION()
128 :
129 6 : CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x10000 and 0x10FFFF inclusive")
130 : {
131 1048577 : for(char32_t wc(0x010000); wc < 0x110000; ++wc)
132 : {
133 1048576 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
134 1048576 : if(rand() % 100 == 0)
135 : {
136 10482 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
137 10482 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
138 10482 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
139 10482 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 3), libutf8::libutf8_logic_exception);
140 : }
141 1048576 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
142 :
143 1048576 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
144 1048576 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
145 1048576 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 6)
146 1048576 : | ((static_cast<char32_t>(buf[3]) & 0x3F) << 0));
147 1048576 : CATCH_REQUIRE(found == wc);
148 1048576 : CATCH_REQUIRE(buf[4] == '\0');
149 :
150 1048576 : char32_t back(rand());
151 1048576 : char const * s(buf);
152 1048576 : size_t len(4);
153 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 4);
154 1048576 : CATCH_REQUIRE(back == wc);
155 : }
156 : }
157 6 : CATCH_END_SECTION()
158 :
159 6 : CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with an empty input string")
160 : {
161 11 : for(char32_t repeat(0); repeat < 10; ++repeat)
162 : {
163 10 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
164 10 : char copy[libutf8::MBS_MIN_BUFFER_LENGTH];
165 60 : for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
166 : {
167 50 : buf[idx] = rand();
168 50 : copy[idx] = buf[idx];
169 : }
170 10 : char const * s(buf);
171 10 : char32_t null = rand();
172 10 : size_t len(0);
173 10 : CATCH_REQUIRE(libutf8::mbstowc(null, s, len) == 0);
174 10 : CATCH_REQUIRE(null == '\0');
175 60 : for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
176 : {
177 50 : CATCH_REQUIRE(copy[idx] == buf[idx]);
178 : }
179 : }
180 : }
181 6 : CATCH_END_SECTION()
182 6 : }
183 :
184 :
185 2 : CATCH_TEST_CASE("invalid_utf32_to_utf8", "[characters],[invalid]")
186 : {
187 2 : CATCH_START_SECTION("invalid_utf32_to_utf8: Verify that surrogates do not work in UTF-8")
188 : {
189 2049 : for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
190 : {
191 2048 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
192 : {
193 : 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
194 : };
195 2048 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
196 2048 : CATCH_REQUIRE(buf[0] == '\0');
197 : }
198 : }
199 2 : CATCH_END_SECTION()
200 :
201 2 : CATCH_START_SECTION("invalid_utf32_to_utf8: Verify that too large a number is not supported")
202 : {
203 1001 : for(int repeat(0); repeat < 1000; ++repeat)
204 : {
205 1000 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
206 : {
207 : 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
208 : };
209 1000 : char32_t wc(0);
210 : do
211 : {
212 1000 : wc = (rand() << 16) + (rand() & 0x0000FFFF);
213 : }
214 1000 : while(wc < 0x110000);
215 1000 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
216 1000 : CATCH_REQUIRE(buf[0] == '\0');
217 : }
218 : }
219 2 : CATCH_END_SECTION()
220 2 : }
221 :
222 :
223 5 : CATCH_TEST_CASE("invalid_utf8_to_utf32", "[characters],[invalid]")
224 : {
225 5 : CATCH_START_SECTION("invalid_utf8_to_utf32: Verify that surrogates do not work in UTF-8")
226 : {
227 2049 : for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
228 : {
229 : //int mbstowc(char32_t & wc, char const * & mb, size_t & len);
230 2048 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH] = {};
231 2048 : buf[0] = static_cast<char>((wc >> 12) | 0xE0);
232 2048 : buf[1] = ((wc >> 6) & 0x3F) | 0x80;
233 2048 : buf[2] = (wc & 0x3F) | 0x80;
234 2048 : buf[3] = '\0';
235 2048 : char const * s = buf;
236 2048 : size_t len(3);
237 2048 : char32_t cwc(rand());
238 2048 : CATCH_REQUIRE(libutf8::mbstowc(cwc, s, len) == -1);
239 2048 : CATCH_REQUIRE(cwc == libutf8::NOT_A_CHARACTER);
240 2048 : char const c1(static_cast<char>((wc >> 12) | 0xE0));
241 2048 : CATCH_REQUIRE(buf[0] == c1);
242 2048 : char const c2(static_cast<char>(((wc >> 6) & 0x3F) | 0x80));
243 2048 : CATCH_REQUIRE(buf[1] == c2);
244 2048 : char const c3((wc & 0x3F) | 0x80);
245 2048 : CATCH_REQUIRE(buf[2] == c3);
246 2048 : CATCH_REQUIRE(buf[3] == '\0');
247 2048 : CATCH_REQUIRE(s == buf + 3);
248 2048 : CATCH_REQUIRE(len == 0);
249 : }
250 : }
251 5 : CATCH_END_SECTION()
252 :
253 : //CATCH_START_SECTION("invalid_utf8_to_utf32: Verify that too large a number is not supported")
254 : //{
255 : // for(int idx(0); idx < 1000; ++idx)
256 : // {
257 : // char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
258 : // {
259 : // 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
260 : // };
261 : // char32_t wc(0);
262 : // do
263 : // {
264 : // wc = (rand() << 16) + (rand() & 0x0000FFFF);
265 : // }
266 : // while(wc < 0x110000);
267 : // CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
268 : // CATCH_REQUIRE(buf[0] == libutf8::NOT_A_CHARACTER);
269 : // }
270 : //}
271 : //CATCH_END_SECTION()
272 :
273 5 : CATCH_START_SECTION("invalid_utf8_to_utf32: Test an invalid UTF-8 sequence (0x80 to 0x7FF)")
274 : {
275 1921 : for(char32_t wc(0x000080); wc < 0x000800; ++wc)
276 : {
277 1920 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
278 1920 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
279 :
280 1920 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
281 1920 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
282 1920 : CATCH_REQUIRE(found == wc);
283 1920 : CATCH_REQUIRE(buf[2] == '\0');
284 :
285 : // too short
286 : //
287 1920 : char32_t back(rand());
288 1920 : char const * s(buf);
289 1920 : size_t len(1);
290 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
291 :
292 : // invalid middle byte
293 : //
294 1920 : char const second_byte(buf[1]);
295 1920 : back = rand();
296 1920 : s = buf;
297 1920 : int c(rand() % (255 - 0x40) + 1);
298 1920 : if(c >= 0x80)
299 : {
300 631 : c += 0x40;
301 : }
302 1920 : buf[1] = static_cast<char>(c);
303 1920 : len = 2;
304 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
305 1920 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
306 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
307 1920 : CATCH_REQUIRE(len == 1);
308 1920 : buf[1] = second_byte;
309 :
310 : // invalid introducer (0x80 to 0xBF)
311 : //
312 1920 : back = rand();
313 1920 : s = buf;
314 1920 : buf[0] = rand() % 64 + 0x80;
315 1920 : len = 2;
316 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
317 1920 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
318 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
319 1920 : CATCH_REQUIRE(len == 0);
320 :
321 : // invalid introducer (0xF8 to 0xFF)
322 : //
323 1920 : back = rand();
324 1920 : s = buf;
325 1920 : buf[0] = rand() % 8 + 0xF8;
326 1920 : len = 2;
327 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
328 1920 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
329 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
330 1920 : CATCH_REQUIRE(len == 0);
331 : }
332 : }
333 5 : CATCH_END_SECTION()
334 :
335 5 : CATCH_START_SECTION("invalid_utf8_to_utf32: Test an invalid UTF-8 sequence (0x800 to 0xFFFF)")
336 : {
337 61442 : for(char32_t wc(0x000800); wc < 0x010000; ++wc)
338 : {
339 61441 : if(wc >= 0xD800 && wc <= 0xDFFF)
340 : {
341 : // skip UTF-16 surrogates -- this is not the test for those
342 : //
343 1 : wc = 0xDFFF;
344 1 : continue;
345 : }
346 :
347 61440 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
348 61440 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
349 :
350 61440 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
351 61440 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 6)
352 61440 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 0));
353 61440 : CATCH_REQUIRE(found == wc);
354 61440 : CATCH_REQUIRE(buf[3] == '\0');
355 :
356 : // too short
357 : //
358 61440 : char32_t back(rand());
359 61440 : char const * s(buf);
360 61440 : size_t len(2);
361 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
362 :
363 : // invalid middle byte
364 : //
365 61440 : char const second_byte(buf[1]);
366 61440 : back = rand();
367 61440 : s = buf;
368 61440 : int c(rand() % (255 - 0x40) + 1);
369 61440 : if(c >= 0x80)
370 : {
371 20577 : c += 0x40;
372 : }
373 61440 : buf[1] = static_cast<char>(c);
374 61440 : len = 3;
375 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
376 61440 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
377 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
378 61440 : CATCH_REQUIRE(len == 2);
379 61440 : buf[1] = second_byte;
380 :
381 61440 : char const third_byte(buf[2]);
382 61440 : back = rand();
383 61440 : s = buf;
384 61440 : c = rand() % (255 - 0x40) + 1;
385 61440 : if(c >= 0x80)
386 : {
387 20862 : c += 0x40;
388 : }
389 61440 : buf[2] = static_cast<char>(c);
390 61440 : len = 3;
391 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
392 61440 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
393 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
394 61440 : CATCH_REQUIRE(len == 1);
395 61440 : buf[2] = third_byte;
396 :
397 : // invalid introducer (0x80 to 0xBF)
398 : //
399 61440 : back = rand();
400 61440 : s = buf;
401 61440 : buf[0] = rand() % 64 + 0x80;
402 61440 : len = 3;
403 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
404 61440 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
405 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
406 61440 : CATCH_REQUIRE(len == 0);
407 :
408 : // invalid introducer (0xF8 to 0xFF)
409 : //
410 61440 : back = rand();
411 61440 : s = buf;
412 61440 : buf[0] = rand() % 8 + 0xF8;
413 61440 : len = 3;
414 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
415 61440 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
416 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
417 61440 : CATCH_REQUIRE(len == 0);
418 : }
419 : }
420 5 : CATCH_END_SECTION()
421 :
422 5 : CATCH_START_SECTION("invalid_utf8_to_utf32: Test UTF-32 to UTF-8 with characters between 0x010000 and 0x110000 inclusive")
423 : {
424 1048577 : for(char32_t wc(0x010000); wc < 0x110000; ++wc)
425 : {
426 1048576 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
427 1048576 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
428 :
429 1048576 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
430 1048576 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
431 1048576 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 6)
432 1048576 : | ((static_cast<char32_t>(buf[3]) & 0x3F) << 0));
433 1048576 : CATCH_REQUIRE(found == wc);
434 1048576 : CATCH_REQUIRE(buf[4] == '\0');
435 :
436 1048576 : char32_t back(rand());
437 1048576 : char const * s(buf);
438 :
439 : // too short
440 : //
441 1048576 : size_t len(3);
442 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
443 :
444 : // invalid middle byte
445 : //
446 1048576 : char const second_byte(buf[1]);
447 1048576 : back = rand();
448 1048576 : s = buf;
449 1048576 : int c(rand() % (255 - 0x40) + 1);
450 1048576 : if(c >= 0x80)
451 : {
452 351758 : c += 0x40;
453 : }
454 1048576 : buf[1] = static_cast<char>(c);
455 1048576 : len = 4;
456 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
457 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
458 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
459 1048576 : CATCH_REQUIRE(len == 3);
460 1048576 : buf[1] = second_byte;
461 :
462 1048576 : char const third_byte(buf[2]);
463 1048576 : back = rand();
464 1048576 : s = buf;
465 1048576 : c = rand() % (255 - 0x40) + 1;
466 1048576 : if(c >= 0x80)
467 : {
468 351234 : c += 0x40;
469 : }
470 1048576 : buf[2] = static_cast<char>(c);
471 1048576 : len = 4;
472 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
473 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
474 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
475 1048576 : CATCH_REQUIRE(len == 2);
476 1048576 : buf[2] = third_byte;
477 :
478 1048576 : char const forth_byte(buf[3]);
479 1048576 : back = rand();
480 1048576 : s = buf;
481 1048576 : c = rand() % (255 - 0x40) + 1;
482 1048576 : if(c >= 0x80)
483 : {
484 351758 : c += 0x40;
485 : }
486 1048576 : buf[3] = static_cast<char>(c);
487 1048576 : len = 4;
488 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
489 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
490 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
491 1048576 : CATCH_REQUIRE(len == 1);
492 1048576 : buf[3] = forth_byte;
493 :
494 : // invalid introducer (0x80 to 0xBF)
495 : //
496 1048576 : back = rand();
497 1048576 : s = buf;
498 1048576 : buf[0] = rand() % 64 + 0x80;
499 1048576 : len = 3;
500 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
501 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
502 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
503 1048576 : CATCH_REQUIRE(len == 0);
504 :
505 : // invalid introducer (0x80 to 0xBF)
506 : //
507 1048576 : back = rand();
508 1048576 : buf[0] = rand() % 64 + 0x80;
509 1048576 : s = buf;
510 1048576 : len = 4;
511 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
512 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
513 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
514 1048576 : CATCH_REQUIRE(len == 0);
515 :
516 : // invalid introducer (0xF8 to 0xFF)
517 : //
518 1048576 : back = rand();
519 1048576 : s = buf;
520 1048576 : buf[0] = rand() % 8 + 0xF8;
521 1048576 : len = 4;
522 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
523 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
524 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
525 1048576 : CATCH_REQUIRE(len == 0);
526 : }
527 : }
528 5 : CATCH_END_SECTION()
529 :
530 5 : CATCH_START_SECTION("invalid_utf8_to_utf32: Test three random characters, destroy the second one and make sure it gets skipped properly")
531 : {
532 1001 : for(int repeat(0); repeat < 1000; ++repeat)
533 : {
534 : char32_t wc[3]
535 : {
536 1000 : unittest::rand_char(true),
537 1000 : unittest::rand_char(true),
538 1000 : unittest::rand_char(true),
539 1000 : };
540 1000 : size_t sz[3] = {};
541 :
542 1000 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH * 3];
543 1000 : char * s(buf);
544 1000 : sz[0] += libutf8::wctombs(s, wc[0], sizeof(buf));
545 1000 : s += sz[0];
546 1000 : sz[1] = libutf8::wctombs(s, wc[1], sizeof(buf));
547 1000 : s += sz[1];
548 1000 : sz[2] = libutf8::wctombs(s, wc[2], sizeof(buf));
549 :
550 1000 : char32_t back(rand());
551 1000 : s = buf;
552 1000 : buf[sz[0]] = rand() % 64 + 0x80;
553 1000 : size_t len(sizeof(buf));
554 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
555 1000 : CATCH_REQUIRE(back == wc[0]);
556 :
557 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
558 1000 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
559 :
560 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
561 1000 : CATCH_REQUIRE(back == wc[2]);
562 : }
563 : }
564 5 : CATCH_END_SECTION()
565 5 : }
566 :
567 :
568 : // vim: ts=4 sw=4 et
|