Line data Source code
1 : // Copyright (c) 2021 Made to Order Software Corporation
2 : //
3 : // https://snapwebsites.org/project/libutf8
4 : // contact@m2osw.com
5 : //
6 : // This program is free software; you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation; either version 2 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License along
17 : // with this program; if not, write to the Free Software Foundation, Inc.,
18 : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 :
20 : // unit test
21 : //
22 : #include "catch_main.h"
23 :
24 :
25 : // libutf8 lib
26 : //
27 : #include "libutf8/base.h"
28 : #include "libutf8/exception.h"
29 :
30 :
31 : // C++ lib
32 : //
33 : #include <cctype>
34 : #include <iostream>
35 :
36 :
37 : // last include
38 : //
39 : #include <snapdev/poison.h>
40 :
41 :
42 :
43 8 : CATCH_TEST_CASE("Character Conversions", "[characters]")
44 : {
45 12 : CATCH_START_SECTION("Verify minimum buffer length for MBS conversions")
46 1 : CATCH_REQUIRE(libutf8::MBS_MIN_BUFFER_LENGTH >= 5);
47 : CATCH_END_SECTION()
48 :
49 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x00 and 0x7F inclusive")
50 129 : for(char32_t wc(0); wc < 0x000080; ++wc)
51 : {
52 128 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
53 128 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
54 128 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 1);
55 :
56 128 : CATCH_REQUIRE(static_cast<char32_t>(buf[0]) == wc);
57 128 : CATCH_REQUIRE(buf[1] == '\0');
58 :
59 128 : char32_t back(rand());
60 128 : char const * s(buf);
61 128 : size_t len(1);
62 128 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 1);
63 128 : CATCH_REQUIRE(back == wc);
64 : }
65 : CATCH_END_SECTION()
66 :
67 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x80 and 0x7FF inclusive")
68 1921 : for(char32_t wc(0x000080); wc < 0x000800; ++wc)
69 : {
70 1920 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
71 1920 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
72 1920 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
73 1920 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
74 :
75 3840 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
76 1920 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
77 1920 : CATCH_REQUIRE(found == wc);
78 1920 : CATCH_REQUIRE(buf[2] == '\0');
79 :
80 1920 : char32_t back(rand());
81 1920 : char const * s(buf);
82 1920 : size_t len(2);
83 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 2);
84 1920 : CATCH_REQUIRE(back == wc);
85 : }
86 : CATCH_END_SECTION()
87 :
88 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x800 and 0xFFFF inclusive")
89 61442 : for(char32_t wc(0x000800); wc < 0x010000; ++wc)
90 : {
91 61442 : if(wc >= 0xD800 && wc <= 0xDFFF)
92 : {
93 : // skip UTF-16 surrogates
94 : //
95 1 : wc = 0xDFFF;
96 1 : continue;
97 : }
98 :
99 61440 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
100 61440 : if(rand() % 10 == 0)
101 : {
102 6092 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
103 6092 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
104 6092 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
105 : }
106 61440 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
107 :
108 122880 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
109 61440 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 6)
110 61440 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 0));
111 61440 : CATCH_REQUIRE(found == wc);
112 61440 : CATCH_REQUIRE(buf[3] == '\0');
113 :
114 61440 : char32_t back(rand());
115 61440 : char const * s(buf);
116 61440 : size_t len(3);
117 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 3);
118 61440 : CATCH_REQUIRE(back == wc);
119 : }
120 : CATCH_END_SECTION()
121 :
122 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x10000 and 0x10FFFF inclusive")
123 1048577 : for(char32_t wc(0x010000); wc < 0x110000; ++wc)
124 : {
125 1048576 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
126 1048576 : if(rand() % 100 == 0)
127 : {
128 10449 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
129 10449 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
130 10449 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
131 10449 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 3), libutf8::libutf8_logic_exception);
132 : }
133 1048576 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
134 :
135 2097152 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
136 1048576 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
137 1048576 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 6)
138 1048576 : | ((static_cast<char32_t>(buf[3]) & 0x3F) << 0));
139 1048576 : CATCH_REQUIRE(found == wc);
140 1048576 : CATCH_REQUIRE(buf[4] == '\0');
141 :
142 1048576 : char32_t back(rand());
143 1048576 : char const * s(buf);
144 1048576 : size_t len(4);
145 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 4);
146 1048576 : CATCH_REQUIRE(back == wc);
147 : }
148 : CATCH_END_SECTION()
149 :
150 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with an empty input string")
151 11 : for(char32_t repeat(0); repeat < 10; ++repeat)
152 : {
153 10 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
154 10 : char copy[libutf8::MBS_MIN_BUFFER_LENGTH];
155 60 : for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
156 : {
157 50 : buf[idx] = rand();
158 50 : copy[idx] = buf[idx];
159 : }
160 10 : char const * s(buf);
161 10 : char32_t null = rand();
162 10 : size_t len(0);
163 10 : CATCH_REQUIRE(libutf8::mbstowc(null, s, len) == 0);
164 10 : CATCH_REQUIRE(null == '\0');
165 60 : for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
166 : {
167 50 : CATCH_REQUIRE(copy[idx] == buf[idx]);
168 : }
169 : }
170 : CATCH_END_SECTION()
171 6 : }
172 :
173 :
174 4 : CATCH_TEST_CASE("Invalid UTF-32 Character to UTF-8", "[characters],[invalid]")
175 : {
176 4 : CATCH_START_SECTION("Verify that surrogates do not work in UTF-8")
177 2049 : for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
178 : {
179 2048 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
180 : {
181 : 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
182 : };
183 2048 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
184 2048 : CATCH_REQUIRE(buf[0] == '\0');
185 : }
186 : CATCH_END_SECTION()
187 :
188 4 : CATCH_START_SECTION("Verify that too large a number is not supported")
189 1001 : for(int repeat(0); repeat < 1000; ++repeat)
190 : {
191 1000 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
192 : {
193 : 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
194 : };
195 1000 : char32_t wc(0);
196 0 : do
197 : {
198 1000 : wc = (rand() << 16) + (rand() & 0x0000FFFF);
199 : }
200 1000 : while(wc < 0x110000);
201 1000 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
202 1000 : CATCH_REQUIRE(buf[0] == '\0');
203 : }
204 : CATCH_END_SECTION()
205 2 : }
206 :
207 :
208 7 : CATCH_TEST_CASE("Invalid UTF-8 Character to UTF-32", "[characters],[invalid]")
209 : {
210 10 : CATCH_START_SECTION("Verify that surrogates do not work in UTF-8")
211 2049 : for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
212 : {
213 : //int mbstowc(char32_t & wc, char const * & mb, size_t & len);
214 2048 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH] = {};
215 2048 : buf[0] = static_cast<char>((wc >> 12) | 0xE0);
216 2048 : buf[1] = ((wc >> 6) & 0x3F) | 0x80;
217 2048 : buf[2] = (wc & 0x3F) | 0x80;
218 2048 : buf[3] = '\0';
219 2048 : char const * s = buf;
220 2048 : size_t len(3);
221 2048 : char32_t cwc(rand());
222 2048 : CATCH_REQUIRE(libutf8::mbstowc(cwc, s, len) == -1);
223 2048 : CATCH_REQUIRE(cwc == U'\0');
224 2048 : char const c1(static_cast<char>((wc >> 12) | 0xE0));
225 2048 : CATCH_REQUIRE(buf[0] == c1);
226 2048 : char const c2(static_cast<char>(((wc >> 6) & 0x3F) | 0x80));
227 2048 : CATCH_REQUIRE(buf[1] == c2);
228 2048 : char const c3((wc & 0x3F) | 0x80);
229 2048 : CATCH_REQUIRE(buf[2] == c3);
230 2048 : CATCH_REQUIRE(buf[3] == '\0');
231 2048 : CATCH_REQUIRE(s == buf + 3);
232 2048 : CATCH_REQUIRE(len == 0);
233 : }
234 : CATCH_END_SECTION()
235 :
236 : //CATCH_START_SECTION("Verify that too large a number is not supported")
237 : // for(int idx(0); idx < 1000; ++idx)
238 : // {
239 : // char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
240 : // {
241 : // 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
242 : // };
243 : // char32_t wc(0);
244 : // do
245 : // {
246 : // wc = (rand() << 16) + (rand() & 0x0000FFFF);
247 : // }
248 : // while(wc < 0x110000);
249 : // CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
250 : // CATCH_REQUIRE(buf[0] == '\0');
251 : // }
252 : //CATCH_END_SECTION()
253 :
254 10 : CATCH_START_SECTION("Test an invalid UTF-8 sequence (0x80 to 0x7FF)")
255 1921 : for(char32_t wc(0x000080); wc < 0x000800; ++wc)
256 : {
257 1920 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
258 1920 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
259 :
260 3840 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
261 1920 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
262 1920 : CATCH_REQUIRE(found == wc);
263 1920 : CATCH_REQUIRE(buf[2] == '\0');
264 :
265 : // too short
266 : //
267 1920 : char32_t back(rand());
268 1920 : char const * s(buf);
269 1920 : size_t len(1);
270 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
271 :
272 : // invalid middle byte
273 : //
274 1920 : char const second_byte(buf[1]);
275 1920 : back = rand();
276 1920 : s = buf;
277 1920 : int c(rand() % (255 - 0x40) + 1);
278 1920 : if(c >= 0x80)
279 : {
280 644 : c += 0x40;
281 : }
282 1920 : buf[1] = static_cast<char>(c);
283 1920 : len = 2;
284 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
285 1920 : CATCH_REQUIRE(back == U'\0');
286 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
287 1920 : CATCH_REQUIRE(len == 1);
288 1920 : buf[1] = second_byte;
289 :
290 : // invalid introducer (0x80 to 0xBF)
291 : //
292 1920 : back = rand();
293 1920 : s = buf;
294 1920 : buf[0] = rand() % 64 + 0x80;
295 1920 : len = 2;
296 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
297 1920 : CATCH_REQUIRE(back == U'\0');
298 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
299 1920 : CATCH_REQUIRE(len == 0);
300 :
301 : // invalid introducer (0xF8 to 0xFF)
302 : //
303 1920 : back = rand();
304 1920 : s = buf;
305 1920 : buf[0] = rand() % 8 + 0xF8;
306 1920 : len = 2;
307 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
308 1920 : CATCH_REQUIRE(back == '\0');
309 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
310 1920 : CATCH_REQUIRE(len == 0);
311 : }
312 : CATCH_END_SECTION()
313 :
314 10 : CATCH_START_SECTION("Test an invalid UTF-8 sequence (0x800 to 0xFFFF)")
315 61442 : for(char32_t wc(0x000800); wc < 0x010000; ++wc)
316 : {
317 61442 : if(wc >= 0xD800 && wc <= 0xDFFF)
318 : {
319 : // skip UTF-16 surrogates -- this is not the test for those
320 : //
321 1 : wc = 0xDFFF;
322 1 : continue;
323 : }
324 :
325 61440 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
326 61440 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
327 :
328 122880 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
329 61440 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 6)
330 61440 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 0));
331 61440 : CATCH_REQUIRE(found == wc);
332 61440 : CATCH_REQUIRE(buf[3] == '\0');
333 :
334 : // too short
335 : //
336 61440 : char32_t back(rand());
337 61440 : char const * s(buf);
338 61440 : size_t len(2);
339 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
340 :
341 : // invalid middle byte
342 : //
343 61440 : char const second_byte(buf[1]);
344 61440 : back = rand();
345 61440 : s = buf;
346 61440 : int c(rand() % (255 - 0x40) + 1);
347 61440 : if(c >= 0x80)
348 : {
349 20520 : c += 0x40;
350 : }
351 61440 : buf[1] = static_cast<char>(c);
352 61440 : len = 3;
353 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
354 61440 : CATCH_REQUIRE(back == U'\0');
355 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
356 61440 : CATCH_REQUIRE(len == 2);
357 61440 : buf[1] = second_byte;
358 :
359 61440 : char const third_byte(buf[2]);
360 61440 : back = rand();
361 61440 : s = buf;
362 61440 : c = rand() % (255 - 0x40) + 1;
363 61440 : if(c >= 0x80)
364 : {
365 20490 : c += 0x40;
366 : }
367 61440 : buf[2] = static_cast<char>(c);
368 61440 : len = 3;
369 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
370 61440 : CATCH_REQUIRE(back == U'\0');
371 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
372 61440 : CATCH_REQUIRE(len == 1);
373 61440 : buf[2] = third_byte;
374 :
375 : // invalid introducer (0x80 to 0xBF)
376 : //
377 61440 : back = rand();
378 61440 : s = buf;
379 61440 : buf[0] = rand() % 64 + 0x80;
380 61440 : len = 3;
381 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
382 61440 : CATCH_REQUIRE(back == U'\0');
383 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
384 61440 : CATCH_REQUIRE(len == 0);
385 :
386 : // invalid introducer (0xF8 to 0xFF)
387 : //
388 61440 : back = rand();
389 61440 : s = buf;
390 61440 : buf[0] = rand() % 8 + 0xF8;
391 61440 : len = 3;
392 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
393 61440 : CATCH_REQUIRE(back == '\0');
394 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
395 61440 : CATCH_REQUIRE(len == 0);
396 : }
397 : CATCH_END_SECTION()
398 :
399 10 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x010000 and 0x110000 inclusive")
400 1048577 : for(char32_t wc(0x010000); wc < 0x110000; ++wc)
401 : {
402 1048576 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
403 1048576 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
404 :
405 2097152 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
406 1048576 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
407 1048576 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 6)
408 1048576 : | ((static_cast<char32_t>(buf[3]) & 0x3F) << 0));
409 1048576 : CATCH_REQUIRE(found == wc);
410 1048576 : CATCH_REQUIRE(buf[4] == '\0');
411 :
412 1048576 : char32_t back(rand());
413 1048576 : char const * s(buf);
414 :
415 : // too short
416 : //
417 1048576 : size_t len(3);
418 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
419 :
420 : // invalid middle byte
421 : //
422 1048576 : char const second_byte(buf[1]);
423 1048576 : back = rand();
424 1048576 : s = buf;
425 1048576 : int c(rand() % (255 - 0x40) + 1);
426 1048576 : if(c >= 0x80)
427 : {
428 351416 : c += 0x40;
429 : }
430 1048576 : buf[1] = static_cast<char>(c);
431 1048576 : len = 4;
432 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
433 1048576 : CATCH_REQUIRE(back == U'\0');
434 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
435 1048576 : CATCH_REQUIRE(len == 3);
436 1048576 : buf[1] = second_byte;
437 :
438 1048576 : char const third_byte(buf[2]);
439 1048576 : back = rand();
440 1048576 : s = buf;
441 1048576 : c = rand() % (255 - 0x40) + 1;
442 1048576 : if(c >= 0x80)
443 : {
444 351062 : c += 0x40;
445 : }
446 1048576 : buf[2] = static_cast<char>(c);
447 1048576 : len = 4;
448 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
449 1048576 : CATCH_REQUIRE(back == U'\0');
450 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
451 1048576 : CATCH_REQUIRE(len == 2);
452 1048576 : buf[2] = third_byte;
453 :
454 1048576 : char const forth_byte(buf[3]);
455 1048576 : back = rand();
456 1048576 : s = buf;
457 1048576 : c = rand() % (255 - 0x40) + 1;
458 1048576 : if(c >= 0x80)
459 : {
460 351432 : c += 0x40;
461 : }
462 1048576 : buf[3] = static_cast<char>(c);
463 1048576 : len = 4;
464 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
465 1048576 : CATCH_REQUIRE(back == U'\0');
466 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
467 1048576 : CATCH_REQUIRE(len == 1);
468 1048576 : buf[3] = forth_byte;
469 :
470 : // invalid introducer (0x80 to 0xBF)
471 : //
472 1048576 : back = rand();
473 1048576 : s = buf;
474 1048576 : buf[0] = rand() % 64 + 0x80;
475 1048576 : len = 3;
476 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
477 1048576 : CATCH_REQUIRE(back == U'\0');
478 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
479 1048576 : CATCH_REQUIRE(len == 0);
480 :
481 : // invalid introducer (0x80 to 0xBF)
482 : //
483 1048576 : back = rand();
484 1048576 : buf[0] = rand() % 64 + 0x80;
485 1048576 : s = buf;
486 1048576 : len = 4;
487 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
488 1048576 : CATCH_REQUIRE(back == U'\0');
489 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
490 1048576 : CATCH_REQUIRE(len == 0);
491 :
492 : // invalid introducer (0xF8 to 0xFF)
493 : //
494 1048576 : back = rand();
495 1048576 : s = buf;
496 1048576 : buf[0] = rand() % 8 + 0xF8;
497 1048576 : len = 4;
498 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
499 1048576 : CATCH_REQUIRE(back == '\0');
500 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
501 1048576 : CATCH_REQUIRE(len == 0);
502 : }
503 : CATCH_END_SECTION()
504 :
505 10 : CATCH_START_SECTION("Test three random characters, destroy the second one and make sure it gets skipped properly")
506 1001 : for(int repeat(0); repeat < 1000; ++repeat)
507 : {
508 : char32_t wc[3]
509 : {
510 1000 : unittest::rand_char(true),
511 1000 : unittest::rand_char(true),
512 1000 : unittest::rand_char(true)
513 3000 : };
514 1000 : size_t sz[3] = {};
515 :
516 1000 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH * 3];
517 1000 : char * s(buf);
518 1000 : sz[0] += libutf8::wctombs(s, wc[0], sizeof(buf));
519 1000 : s += sz[0];
520 1000 : sz[1] = libutf8::wctombs(s, wc[1], sizeof(buf));
521 1000 : s += sz[1];
522 1000 : sz[2] = libutf8::wctombs(s, wc[2], sizeof(buf));
523 :
524 1000 : char32_t back(rand());
525 1000 : s = buf;
526 1000 : buf[sz[0]] = rand() % 64 + 0x80;
527 1000 : size_t len(sizeof(buf));
528 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
529 1000 : CATCH_REQUIRE(back == wc[0]);
530 :
531 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
532 1000 : CATCH_REQUIRE(back == U'\0');
533 :
534 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
535 1000 : CATCH_REQUIRE(back == wc[2]);
536 : }
537 : CATCH_END_SECTION()
538 11 : }
539 :
540 :
541 : // vim: ts=4 sw=4 et
|