Line data Source code
1 : // Copyright (c) 2021-2022 Made to Order Software Corporation
2 : //
3 : // https://snapwebsites.org/project/libutf8
4 : // contact@m2osw.com
5 : //
6 : // This program is free software; you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation; either version 2 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License along
17 : // with this program; if not, write to the Free Software Foundation, Inc.,
18 : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 :
20 : // libutf8
21 : //
22 : #include <libutf8/base.h>
23 : #include <libutf8/exception.h>
24 :
25 :
26 : // unit test
27 : //
28 : #include "catch_main.h"
29 :
30 :
31 : // C++
32 : //
33 : #include <cctype>
34 : #include <iostream>
35 :
36 :
37 : // last include
38 : //
39 : #include <snapdev/poison.h>
40 :
41 :
42 :
43 8 : CATCH_TEST_CASE("character_conversions", "[characters]")
44 : {
45 12 : CATCH_START_SECTION("character_conversions: Verify minimum buffer length for MBS conversions")
46 : {
47 1 : CATCH_REQUIRE(libutf8::MBS_MIN_BUFFER_LENGTH >= 5);
48 : }
49 : CATCH_END_SECTION()
50 :
51 12 : CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x00 and 0x7F inclusive")
52 : {
53 129 : for(char32_t wc(0); wc < 0x000080; ++wc)
54 : {
55 128 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
56 128 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
57 128 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 1);
58 :
59 128 : CATCH_REQUIRE(static_cast<char32_t>(buf[0]) == wc);
60 128 : CATCH_REQUIRE(buf[1] == '\0');
61 :
62 128 : char32_t back(rand());
63 128 : char const * s(buf);
64 128 : size_t len(1);
65 128 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 1);
66 128 : CATCH_REQUIRE(back == wc);
67 : }
68 : }
69 : CATCH_END_SECTION()
70 :
71 12 : CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x80 and 0x7FF inclusive")
72 : {
73 1921 : for(char32_t wc(0x000080); wc < 0x000800; ++wc)
74 : {
75 1920 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
76 1920 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
77 1920 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
78 1920 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
79 :
80 3840 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
81 1920 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
82 1920 : CATCH_REQUIRE(found == wc);
83 1920 : CATCH_REQUIRE(buf[2] == '\0');
84 :
85 1920 : char32_t back(rand());
86 1920 : char const * s(buf);
87 1920 : size_t len(2);
88 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 2);
89 1920 : CATCH_REQUIRE(back == wc);
90 : }
91 : }
92 : CATCH_END_SECTION()
93 :
94 12 : CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x800 and 0xFFFF inclusive")
95 : {
96 61442 : for(char32_t wc(0x000800); wc < 0x010000; ++wc)
97 : {
98 61442 : if(wc >= 0xD800 && wc <= 0xDFFF)
99 : {
100 : // skip UTF-16 surrogates
101 : //
102 1 : wc = 0xDFFF;
103 1 : continue;
104 : }
105 :
106 61440 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
107 61440 : if(rand() % 10 == 0)
108 : {
109 6051 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
110 6051 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
111 6051 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
112 : }
113 61440 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
114 :
115 122880 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
116 61440 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 6)
117 61440 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 0));
118 61440 : CATCH_REQUIRE(found == wc);
119 61440 : CATCH_REQUIRE(buf[3] == '\0');
120 :
121 61440 : char32_t back(rand());
122 61440 : char const * s(buf);
123 61440 : size_t len(3);
124 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 3);
125 61440 : CATCH_REQUIRE(back == wc);
126 : }
127 : }
128 : CATCH_END_SECTION()
129 :
130 12 : CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x10000 and 0x10FFFF inclusive")
131 : {
132 1048577 : for(char32_t wc(0x010000); wc < 0x110000; ++wc)
133 : {
134 1048576 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
135 1048576 : if(rand() % 100 == 0)
136 : {
137 10517 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
138 10517 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
139 10517 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
140 10517 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 3), libutf8::libutf8_logic_exception);
141 : }
142 1048576 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
143 :
144 2097152 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
145 1048576 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
146 1048576 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 6)
147 1048576 : | ((static_cast<char32_t>(buf[3]) & 0x3F) << 0));
148 1048576 : CATCH_REQUIRE(found == wc);
149 1048576 : CATCH_REQUIRE(buf[4] == '\0');
150 :
151 1048576 : char32_t back(rand());
152 1048576 : char const * s(buf);
153 1048576 : size_t len(4);
154 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 4);
155 1048576 : CATCH_REQUIRE(back == wc);
156 : }
157 : }
158 : CATCH_END_SECTION()
159 :
160 12 : CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with an empty input string")
161 : {
162 11 : for(char32_t repeat(0); repeat < 10; ++repeat)
163 : {
164 10 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
165 10 : char copy[libutf8::MBS_MIN_BUFFER_LENGTH];
166 60 : for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
167 : {
168 50 : buf[idx] = rand();
169 50 : copy[idx] = buf[idx];
170 : }
171 10 : char const * s(buf);
172 10 : char32_t null = rand();
173 10 : size_t len(0);
174 10 : CATCH_REQUIRE(libutf8::mbstowc(null, s, len) == 0);
175 10 : CATCH_REQUIRE(null == '\0');
176 60 : for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
177 : {
178 50 : CATCH_REQUIRE(copy[idx] == buf[idx]);
179 : }
180 : }
181 : }
182 : CATCH_END_SECTION()
183 6 : }
184 :
185 :
186 4 : CATCH_TEST_CASE("invalid_utf32_to_utf8", "[characters],[invalid]")
187 : {
188 4 : CATCH_START_SECTION("invalid_utf32_to_utf8: Verify that surrogates do not work in UTF-8")
189 : {
190 2049 : for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
191 : {
192 2048 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
193 : {
194 : 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
195 : };
196 2048 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
197 2048 : CATCH_REQUIRE(buf[0] == '\0');
198 : }
199 : }
200 : CATCH_END_SECTION()
201 :
202 4 : CATCH_START_SECTION("invalid_utf32_to_utf8: Verify that too large a number is not supported")
203 : {
204 1001 : for(int repeat(0); repeat < 1000; ++repeat)
205 : {
206 1000 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
207 : {
208 : 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
209 : };
210 1000 : char32_t wc(0);
211 1 : do
212 : {
213 1001 : wc = (rand() << 16) + (rand() & 0x0000FFFF);
214 : }
215 1001 : while(wc < 0x110000);
216 1000 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
217 1000 : CATCH_REQUIRE(buf[0] == '\0');
218 : }
219 : }
220 : CATCH_END_SECTION()
221 2 : }
222 :
223 :
224 7 : CATCH_TEST_CASE("invalid_utf8_to_utf32", "[characters],[invalid]")
225 : {
226 10 : CATCH_START_SECTION("invalid_utf8_to_utf32: Verify that surrogates do not work in UTF-8")
227 : {
228 2049 : for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
229 : {
230 : //int mbstowc(char32_t & wc, char const * & mb, size_t & len);
231 2048 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH] = {};
232 2048 : buf[0] = static_cast<char>((wc >> 12) | 0xE0);
233 2048 : buf[1] = ((wc >> 6) & 0x3F) | 0x80;
234 2048 : buf[2] = (wc & 0x3F) | 0x80;
235 2048 : buf[3] = '\0';
236 2048 : char const * s = buf;
237 2048 : size_t len(3);
238 2048 : char32_t cwc(rand());
239 2048 : CATCH_REQUIRE(libutf8::mbstowc(cwc, s, len) == -1);
240 2048 : CATCH_REQUIRE(cwc == libutf8::NOT_A_CHARACTER);
241 2048 : char const c1(static_cast<char>((wc >> 12) | 0xE0));
242 2048 : CATCH_REQUIRE(buf[0] == c1);
243 2048 : char const c2(static_cast<char>(((wc >> 6) & 0x3F) | 0x80));
244 2048 : CATCH_REQUIRE(buf[1] == c2);
245 2048 : char const c3((wc & 0x3F) | 0x80);
246 2048 : CATCH_REQUIRE(buf[2] == c3);
247 2048 : CATCH_REQUIRE(buf[3] == '\0');
248 2048 : CATCH_REQUIRE(s == buf + 3);
249 2048 : CATCH_REQUIRE(len == 0);
250 : }
251 : }
252 : CATCH_END_SECTION()
253 :
254 : //CATCH_START_SECTION("invalid_utf8_to_utf32: Verify that too large a number is not supported")
255 : //{
256 : // for(int idx(0); idx < 1000; ++idx)
257 : // {
258 : // char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
259 : // {
260 : // 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
261 : // };
262 : // char32_t wc(0);
263 : // do
264 : // {
265 : // wc = (rand() << 16) + (rand() & 0x0000FFFF);
266 : // }
267 : // while(wc < 0x110000);
268 : // CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
269 : // CATCH_REQUIRE(buf[0] == libutf8::NOT_A_CHARACTER);
270 : // }
271 : //}
272 : //CATCH_END_SECTION()
273 :
274 10 : CATCH_START_SECTION("invalid_utf8_to_utf32: Test an invalid UTF-8 sequence (0x80 to 0x7FF)")
275 : {
276 1921 : for(char32_t wc(0x000080); wc < 0x000800; ++wc)
277 : {
278 1920 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
279 1920 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
280 :
281 3840 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
282 1920 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
283 1920 : CATCH_REQUIRE(found == wc);
284 1920 : CATCH_REQUIRE(buf[2] == '\0');
285 :
286 : // too short
287 : //
288 1920 : char32_t back(rand());
289 1920 : char const * s(buf);
290 1920 : size_t len(1);
291 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
292 :
293 : // invalid middle byte
294 : //
295 1920 : char const second_byte(buf[1]);
296 1920 : back = rand();
297 1920 : s = buf;
298 1920 : int c(rand() % (255 - 0x40) + 1);
299 1920 : if(c >= 0x80)
300 : {
301 662 : c += 0x40;
302 : }
303 1920 : buf[1] = static_cast<char>(c);
304 1920 : len = 2;
305 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
306 1920 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
307 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
308 1920 : CATCH_REQUIRE(len == 1);
309 1920 : buf[1] = second_byte;
310 :
311 : // invalid introducer (0x80 to 0xBF)
312 : //
313 1920 : back = rand();
314 1920 : s = buf;
315 1920 : buf[0] = rand() % 64 + 0x80;
316 1920 : len = 2;
317 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
318 1920 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
319 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
320 1920 : CATCH_REQUIRE(len == 0);
321 :
322 : // invalid introducer (0xF8 to 0xFF)
323 : //
324 1920 : back = rand();
325 1920 : s = buf;
326 1920 : buf[0] = rand() % 8 + 0xF8;
327 1920 : len = 2;
328 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
329 1920 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
330 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
331 1920 : CATCH_REQUIRE(len == 0);
332 : }
333 : }
334 : CATCH_END_SECTION()
335 :
336 10 : CATCH_START_SECTION("invalid_utf8_to_utf32: Test an invalid UTF-8 sequence (0x800 to 0xFFFF)")
337 : {
338 61442 : for(char32_t wc(0x000800); wc < 0x010000; ++wc)
339 : {
340 61442 : if(wc >= 0xD800 && wc <= 0xDFFF)
341 : {
342 : // skip UTF-16 surrogates -- this is not the test for those
343 : //
344 1 : wc = 0xDFFF;
345 1 : continue;
346 : }
347 :
348 61440 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
349 61440 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
350 :
351 122880 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
352 61440 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 6)
353 61440 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 0));
354 61440 : CATCH_REQUIRE(found == wc);
355 61440 : CATCH_REQUIRE(buf[3] == '\0');
356 :
357 : // too short
358 : //
359 61440 : char32_t back(rand());
360 61440 : char const * s(buf);
361 61440 : size_t len(2);
362 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
363 :
364 : // invalid middle byte
365 : //
366 61440 : char const second_byte(buf[1]);
367 61440 : back = rand();
368 61440 : s = buf;
369 61440 : int c(rand() % (255 - 0x40) + 1);
370 61440 : if(c >= 0x80)
371 : {
372 20703 : c += 0x40;
373 : }
374 61440 : buf[1] = static_cast<char>(c);
375 61440 : len = 3;
376 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
377 61440 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
378 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
379 61440 : CATCH_REQUIRE(len == 2);
380 61440 : buf[1] = second_byte;
381 :
382 61440 : char const third_byte(buf[2]);
383 61440 : back = rand();
384 61440 : s = buf;
385 61440 : c = rand() % (255 - 0x40) + 1;
386 61440 : if(c >= 0x80)
387 : {
388 20710 : c += 0x40;
389 : }
390 61440 : buf[2] = static_cast<char>(c);
391 61440 : len = 3;
392 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
393 61440 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
394 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
395 61440 : CATCH_REQUIRE(len == 1);
396 61440 : buf[2] = third_byte;
397 :
398 : // invalid introducer (0x80 to 0xBF)
399 : //
400 61440 : back = rand();
401 61440 : s = buf;
402 61440 : buf[0] = rand() % 64 + 0x80;
403 61440 : len = 3;
404 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
405 61440 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
406 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
407 61440 : CATCH_REQUIRE(len == 0);
408 :
409 : // invalid introducer (0xF8 to 0xFF)
410 : //
411 61440 : back = rand();
412 61440 : s = buf;
413 61440 : buf[0] = rand() % 8 + 0xF8;
414 61440 : len = 3;
415 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
416 61440 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
417 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
418 61440 : CATCH_REQUIRE(len == 0);
419 : }
420 : }
421 : CATCH_END_SECTION()
422 :
423 10 : CATCH_START_SECTION("invalid_utf8_to_utf32: Test UTF-32 to UTF-8 with characters between 0x010000 and 0x110000 inclusive")
424 : {
425 1048577 : for(char32_t wc(0x010000); wc < 0x110000; ++wc)
426 : {
427 1048576 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
428 1048576 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
429 :
430 2097152 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
431 1048576 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
432 1048576 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 6)
433 1048576 : | ((static_cast<char32_t>(buf[3]) & 0x3F) << 0));
434 1048576 : CATCH_REQUIRE(found == wc);
435 1048576 : CATCH_REQUIRE(buf[4] == '\0');
436 :
437 1048576 : char32_t back(rand());
438 1048576 : char const * s(buf);
439 :
440 : // too short
441 : //
442 1048576 : size_t len(3);
443 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
444 :
445 : // invalid middle byte
446 : //
447 1048576 : char const second_byte(buf[1]);
448 1048576 : back = rand();
449 1048576 : s = buf;
450 1048576 : int c(rand() % (255 - 0x40) + 1);
451 1048576 : if(c >= 0x80)
452 : {
453 351086 : c += 0x40;
454 : }
455 1048576 : buf[1] = static_cast<char>(c);
456 1048576 : len = 4;
457 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
458 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
459 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
460 1048576 : CATCH_REQUIRE(len == 3);
461 1048576 : buf[1] = second_byte;
462 :
463 1048576 : char const third_byte(buf[2]);
464 1048576 : back = rand();
465 1048576 : s = buf;
466 1048576 : c = rand() % (255 - 0x40) + 1;
467 1048576 : if(c >= 0x80)
468 : {
469 352058 : c += 0x40;
470 : }
471 1048576 : buf[2] = static_cast<char>(c);
472 1048576 : len = 4;
473 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
474 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
475 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
476 1048576 : CATCH_REQUIRE(len == 2);
477 1048576 : buf[2] = third_byte;
478 :
479 1048576 : char const forth_byte(buf[3]);
480 1048576 : back = rand();
481 1048576 : s = buf;
482 1048576 : c = rand() % (255 - 0x40) + 1;
483 1048576 : if(c >= 0x80)
484 : {
485 351566 : c += 0x40;
486 : }
487 1048576 : buf[3] = static_cast<char>(c);
488 1048576 : len = 4;
489 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
490 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
491 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
492 1048576 : CATCH_REQUIRE(len == 1);
493 1048576 : buf[3] = forth_byte;
494 :
495 : // invalid introducer (0x80 to 0xBF)
496 : //
497 1048576 : back = rand();
498 1048576 : s = buf;
499 1048576 : buf[0] = rand() % 64 + 0x80;
500 1048576 : len = 3;
501 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
502 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
503 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
504 1048576 : CATCH_REQUIRE(len == 0);
505 :
506 : // invalid introducer (0x80 to 0xBF)
507 : //
508 1048576 : back = rand();
509 1048576 : buf[0] = rand() % 64 + 0x80;
510 1048576 : s = buf;
511 1048576 : len = 4;
512 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
513 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
514 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
515 1048576 : CATCH_REQUIRE(len == 0);
516 :
517 : // invalid introducer (0xF8 to 0xFF)
518 : //
519 1048576 : back = rand();
520 1048576 : s = buf;
521 1048576 : buf[0] = rand() % 8 + 0xF8;
522 1048576 : len = 4;
523 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
524 1048576 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
525 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
526 1048576 : CATCH_REQUIRE(len == 0);
527 : }
528 : }
529 : CATCH_END_SECTION()
530 :
531 10 : CATCH_START_SECTION("invalid_utf8_to_utf32: Test three random characters, destroy the second one and make sure it gets skipped properly")
532 : {
533 1001 : for(int repeat(0); repeat < 1000; ++repeat)
534 : {
535 : char32_t wc[3]
536 : {
537 1000 : unittest::rand_char(true),
538 1000 : unittest::rand_char(true),
539 1000 : unittest::rand_char(true),
540 3000 : };
541 1000 : size_t sz[3] = {};
542 :
543 1000 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH * 3];
544 1000 : char * s(buf);
545 1000 : sz[0] += libutf8::wctombs(s, wc[0], sizeof(buf));
546 1000 : s += sz[0];
547 1000 : sz[1] = libutf8::wctombs(s, wc[1], sizeof(buf));
548 1000 : s += sz[1];
549 1000 : sz[2] = libutf8::wctombs(s, wc[2], sizeof(buf));
550 :
551 1000 : char32_t back(rand());
552 1000 : s = buf;
553 1000 : buf[sz[0]] = rand() % 64 + 0x80;
554 1000 : size_t len(sizeof(buf));
555 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
556 1000 : CATCH_REQUIRE(back == wc[0]);
557 :
558 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
559 1000 : CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
560 :
561 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
562 1000 : CATCH_REQUIRE(back == wc[2]);
563 : }
564 : }
565 : CATCH_END_SECTION()
566 11 : }
567 :
568 :
569 : // vim: ts=4 sw=4 et
|