Line data Source code
1 : /* tests/character.cpp
2 : * Copyright (C) 2013-2019 Made to Order Software Corporation
3 : *
4 : * This program is free software; you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation; either version 2 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License along
15 : * with this program; if not, write to the Free Software Foundation, Inc.,
16 : * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 : *
18 : * Authors
19 : * Alexis Wilke alexis@m2osw.com
20 : */
21 :
22 : // unit test
23 : //
24 : #include "main.h"
25 :
26 : // libutf8 lib
27 : //
28 : #include "libutf8/base.h"
29 : #include "libutf8/exception.h"
30 :
31 : // Catch2 lib
32 : //
33 : #include <catch2/catch.hpp>
34 :
35 : // C++ lib
36 : //
37 : #include <cctype>
38 : #include <iostream>
39 :
40 :
41 8 : CATCH_TEST_CASE("Character Conversions", "characters")
42 : {
43 12 : CATCH_START_SECTION("Verify minimum buffer length for MBS conversions")
44 1 : CATCH_REQUIRE(libutf8::MBS_MIN_BUFFER_LENGTH >= 5);
45 : CATCH_END_SECTION()
46 :
47 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x00 and 0x7F inclusive")
48 129 : for(char32_t wc(0); wc < 0x000080; ++wc)
49 : {
50 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
51 128 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
52 128 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 1);
53 :
54 128 : CATCH_REQUIRE(static_cast<char32_t>(buf[0]) == wc);
55 128 : CATCH_REQUIRE(buf[1] == '\0');
56 :
57 128 : char32_t back(rand());
58 128 : char const * s(buf);
59 128 : size_t len(1);
60 128 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 1);
61 128 : CATCH_REQUIRE(back == wc);
62 : }
63 : CATCH_END_SECTION()
64 :
65 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x80 and 0x7FF inclusive")
66 1921 : for(char32_t wc(0x000080); wc < 0x000800; ++wc)
67 : {
68 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
69 1920 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
70 1920 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
71 1920 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
72 :
73 1920 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
74 1920 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
75 1920 : CATCH_REQUIRE(found == wc);
76 1920 : CATCH_REQUIRE(buf[2] == '\0');
77 :
78 1920 : char32_t back(rand());
79 1920 : char const * s(buf);
80 1920 : size_t len(2);
81 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 2);
82 1920 : CATCH_REQUIRE(back == wc);
83 : }
84 : CATCH_END_SECTION()
85 :
86 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x800 and 0xFFFF inclusive")
87 61442 : for(char32_t wc(0x000800); wc < 0x010000; ++wc)
88 : {
89 61441 : if(wc >= 0xD800 && wc <= 0xDFFF)
90 : {
91 : // skip UTF-16 surrogates
92 : //
93 1 : wc = 0xDFFF;
94 1 : continue;
95 : }
96 :
97 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
98 61440 : if(rand() % 10 == 0)
99 : {
100 6115 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
101 6115 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
102 6115 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
103 : }
104 61440 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
105 :
106 61440 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
107 61440 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 6)
108 61440 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 0));
109 61440 : CATCH_REQUIRE(found == wc);
110 61440 : CATCH_REQUIRE(buf[3] == '\0');
111 :
112 61440 : char32_t back(rand());
113 61440 : char const * s(buf);
114 61440 : size_t len(3);
115 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 3);
116 61440 : CATCH_REQUIRE(back == wc);
117 : }
118 : CATCH_END_SECTION()
119 :
120 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x10000 and 0x10FFFF inclusive")
121 1048577 : for(char32_t wc(0x010000); wc < 0x110000; ++wc)
122 : {
123 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
124 1048576 : if(rand() % 100 == 0)
125 : {
126 10441 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
127 10441 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
128 10441 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
129 10441 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 3), libutf8::libutf8_logic_exception);
130 : }
131 1048576 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
132 :
133 1048576 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
134 1048576 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
135 1048576 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 6)
136 1048576 : | ((static_cast<char32_t>(buf[3]) & 0x3F) << 0));
137 1048576 : CATCH_REQUIRE(found == wc);
138 1048576 : CATCH_REQUIRE(buf[4] == '\0');
139 :
140 1048576 : char32_t back(rand());
141 1048576 : char const * s(buf);
142 1048576 : size_t len(4);
143 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 4);
144 1048576 : CATCH_REQUIRE(back == wc);
145 : }
146 : CATCH_END_SECTION()
147 :
148 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with an empty input string")
149 11 : for(char32_t repeat(0); repeat < 10; ++repeat)
150 : {
151 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
152 : char copy[libutf8::MBS_MIN_BUFFER_LENGTH];
153 60 : for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
154 : {
155 50 : buf[idx] = rand();
156 50 : copy[idx] = buf[idx];
157 : }
158 10 : char const * s(buf);
159 10 : char32_t null = rand();
160 10 : size_t len(0);
161 10 : CATCH_REQUIRE(libutf8::mbstowc(null, s, len) == 0);
162 10 : CATCH_REQUIRE(null == '\0');
163 60 : for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
164 : {
165 50 : CATCH_REQUIRE(copy[idx] == buf[idx]);
166 : }
167 : }
168 : CATCH_END_SECTION()
169 6 : }
170 :
171 :
172 4 : CATCH_TEST_CASE("Invalid UTF-32 Character to UTF-8", "characters,invalid")
173 : {
174 4 : CATCH_START_SECTION("Verify that surrogates do not work in UTF-8")
175 2049 : for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
176 : {
177 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
178 : {
179 : 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
180 2048 : };
181 2048 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
182 2048 : CATCH_REQUIRE(buf[0] == '\0');
183 : }
184 : CATCH_END_SECTION()
185 :
186 4 : CATCH_START_SECTION("Verify that too large a number is not supported")
187 1001 : for(int repeat(0); repeat < 1000; ++repeat)
188 : {
189 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
190 : {
191 : 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
192 1000 : };
193 1000 : char32_t wc(0);
194 0 : do
195 : {
196 1000 : wc = (rand() << 16) + (rand() & 0x0000FFFF);
197 : }
198 1000 : while(wc < 0x110000);
199 1000 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
200 1000 : CATCH_REQUIRE(buf[0] == '\0');
201 : }
202 : CATCH_END_SECTION()
203 2 : }
204 :
205 :
206 7 : CATCH_TEST_CASE("Invalid UTF-8 Character to UTF-32", "characters,invalid")
207 : {
208 10 : CATCH_START_SECTION("Verify that surrogates do not work in UTF-8")
209 2049 : for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
210 : {
211 : //int mbstowc(char32_t & wc, char const * & mb, size_t & len);
212 2048 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH] = {};
213 2048 : buf[0] = static_cast<char>((wc >> 12) | 0xE0);
214 2048 : buf[1] = ((wc >> 6) & 0x3F) | 0x80;
215 2048 : buf[2] = (wc & 0x3F) | 0x80;
216 2048 : buf[3] = '\0';
217 2048 : char const * s = buf;
218 2048 : size_t len(3);
219 2048 : char32_t cwc(rand());
220 2048 : CATCH_REQUIRE(libutf8::mbstowc(cwc, s, len) == -1);
221 2048 : CATCH_REQUIRE(cwc == U'\0');
222 2048 : char const c1(static_cast<char>((wc >> 12) | 0xE0));
223 2048 : CATCH_REQUIRE(buf[0] == c1);
224 2048 : char const c2(static_cast<char>(((wc >> 6) & 0x3F) | 0x80));
225 2048 : CATCH_REQUIRE(buf[1] == c2);
226 2048 : char const c3((wc & 0x3F) | 0x80);
227 2048 : CATCH_REQUIRE(buf[2] == c3);
228 2048 : CATCH_REQUIRE(buf[3] == '\0');
229 2048 : CATCH_REQUIRE(s == buf + 3);
230 2048 : CATCH_REQUIRE(len == 0);
231 : }
232 : CATCH_END_SECTION()
233 :
234 : //CATCH_START_SECTION("Verify that too large a number is not supported")
235 : // for(int idx(0); idx < 1000; ++idx)
236 : // {
237 : // char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
238 : // {
239 : // 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
240 : // };
241 : // char32_t wc(0);
242 : // do
243 : // {
244 : // wc = (rand() << 16) + (rand() & 0x0000FFFF);
245 : // }
246 : // while(wc < 0x110000);
247 : // CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
248 : // CATCH_REQUIRE(buf[0] == '\0');
249 : // }
250 : //CATCH_END_SECTION()
251 :
252 10 : CATCH_START_SECTION("Test an invalid UTF-8 sequence (0x80 to 0x7FF)")
253 1921 : for(char32_t wc(0x000080); wc < 0x000800; ++wc)
254 : {
255 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
256 1920 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
257 :
258 1920 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
259 1920 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
260 1920 : CATCH_REQUIRE(found == wc);
261 1920 : CATCH_REQUIRE(buf[2] == '\0');
262 :
263 : // too short
264 : //
265 1920 : char32_t back(rand());
266 1920 : char const * s(buf);
267 1920 : size_t len(1);
268 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
269 :
270 : // invalid middle byte
271 : //
272 1920 : char const second_byte(buf[1]);
273 1920 : back = rand();
274 1920 : s = buf;
275 1920 : int c(rand() % (255 - 0x40) + 1);
276 1920 : if(c >= 0x80)
277 : {
278 641 : c += 0x40;
279 : }
280 1920 : buf[1] = static_cast<char>(c);
281 1920 : len = 2;
282 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
283 1920 : CATCH_REQUIRE(back == U'\0');
284 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
285 1920 : CATCH_REQUIRE(len == 1);
286 1920 : buf[1] = second_byte;
287 :
288 : // invalid introducer (0x80 to 0xBF)
289 : //
290 1920 : back = rand();
291 1920 : s = buf;
292 1920 : buf[0] = rand() % 64 + 0x80;
293 1920 : len = 2;
294 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
295 1920 : CATCH_REQUIRE(back == U'\0');
296 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
297 1920 : CATCH_REQUIRE(len == 0);
298 :
299 : // invalid introducer (0xF8 to 0xFF)
300 : //
301 1920 : back = rand();
302 1920 : s = buf;
303 1920 : buf[0] = rand() % 8 + 0xF8;
304 1920 : len = 2;
305 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
306 1920 : CATCH_REQUIRE(back == '\0');
307 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
308 1920 : CATCH_REQUIRE(len == 0);
309 : }
310 : CATCH_END_SECTION()
311 :
312 10 : CATCH_START_SECTION("Test an invalid UTF-8 sequence (0x800 to 0xFFFF)")
313 61442 : for(char32_t wc(0x000800); wc < 0x010000; ++wc)
314 : {
315 61441 : if(wc >= 0xD800 && wc <= 0xDFFF)
316 : {
317 : // skip UTF-16 surrogates -- this is not the test for those
318 : //
319 1 : wc = 0xDFFF;
320 1 : continue;
321 : }
322 :
323 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
324 61440 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
325 :
326 61440 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
327 61440 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 6)
328 61440 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 0));
329 61440 : CATCH_REQUIRE(found == wc);
330 61440 : CATCH_REQUIRE(buf[3] == '\0');
331 :
332 : // too short
333 : //
334 61440 : char32_t back(rand());
335 61440 : char const * s(buf);
336 61440 : size_t len(2);
337 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
338 :
339 : // invalid middle byte
340 : //
341 61440 : char const second_byte(buf[1]);
342 61440 : back = rand();
343 61440 : s = buf;
344 61440 : int c(rand() % (255 - 0x40) + 1);
345 61440 : if(c >= 0x80)
346 : {
347 20508 : c += 0x40;
348 : }
349 61440 : buf[1] = static_cast<char>(c);
350 61440 : len = 3;
351 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
352 61440 : CATCH_REQUIRE(back == U'\0');
353 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
354 61440 : CATCH_REQUIRE(len == 2);
355 61440 : buf[1] = second_byte;
356 :
357 61440 : char const third_byte(buf[2]);
358 61440 : back = rand();
359 61440 : s = buf;
360 61440 : c = rand() % (255 - 0x40) + 1;
361 61440 : if(c >= 0x80)
362 : {
363 20606 : c += 0x40;
364 : }
365 61440 : buf[2] = static_cast<char>(c);
366 61440 : len = 3;
367 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
368 61440 : CATCH_REQUIRE(back == U'\0');
369 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
370 61440 : CATCH_REQUIRE(len == 1);
371 61440 : buf[2] = third_byte;
372 :
373 : // invalid introducer (0x80 to 0xBF)
374 : //
375 61440 : back = rand();
376 61440 : s = buf;
377 61440 : buf[0] = rand() % 64 + 0x80;
378 61440 : len = 3;
379 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
380 61440 : CATCH_REQUIRE(back == U'\0');
381 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
382 61440 : CATCH_REQUIRE(len == 0);
383 :
384 : // invalid introducer (0xF8 to 0xFF)
385 : //
386 61440 : back = rand();
387 61440 : s = buf;
388 61440 : buf[0] = rand() % 8 + 0xF8;
389 61440 : len = 3;
390 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
391 61440 : CATCH_REQUIRE(back == '\0');
392 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
393 61440 : CATCH_REQUIRE(len == 0);
394 : }
395 : CATCH_END_SECTION()
396 :
397 10 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x010000 and 0x110000 inclusive")
398 1048577 : for(char32_t wc(0x010000); wc < 0x110000; ++wc)
399 : {
400 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
401 1048576 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
402 :
403 1048576 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
404 1048576 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
405 1048576 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 6)
406 1048576 : | ((static_cast<char32_t>(buf[3]) & 0x3F) << 0));
407 1048576 : CATCH_REQUIRE(found == wc);
408 1048576 : CATCH_REQUIRE(buf[4] == '\0');
409 :
410 1048576 : char32_t back(rand());
411 1048576 : char const * s(buf);
412 :
413 : // too short
414 : //
415 1048576 : size_t len(3);
416 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
417 :
418 : // invalid middle byte
419 : //
420 1048576 : char const second_byte(buf[1]);
421 1048576 : back = rand();
422 1048576 : s = buf;
423 1048576 : int c(rand() % (255 - 0x40) + 1);
424 1048576 : if(c >= 0x80)
425 : {
426 351952 : c += 0x40;
427 : }
428 1048576 : buf[1] = static_cast<char>(c);
429 1048576 : len = 4;
430 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
431 1048576 : CATCH_REQUIRE(back == U'\0');
432 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
433 1048576 : CATCH_REQUIRE(len == 3);
434 1048576 : buf[1] = second_byte;
435 :
436 1048576 : char const third_byte(buf[2]);
437 1048576 : back = rand();
438 1048576 : s = buf;
439 1048576 : c = rand() % (255 - 0x40) + 1;
440 1048576 : if(c >= 0x80)
441 : {
442 351136 : c += 0x40;
443 : }
444 1048576 : buf[2] = static_cast<char>(c);
445 1048576 : len = 4;
446 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
447 1048576 : CATCH_REQUIRE(back == U'\0');
448 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
449 1048576 : CATCH_REQUIRE(len == 2);
450 1048576 : buf[2] = third_byte;
451 :
452 1048576 : char const forth_byte(buf[3]);
453 1048576 : back = rand();
454 1048576 : s = buf;
455 1048576 : c = rand() % (255 - 0x40) + 1;
456 1048576 : if(c >= 0x80)
457 : {
458 351034 : c += 0x40;
459 : }
460 1048576 : buf[3] = static_cast<char>(c);
461 1048576 : len = 4;
462 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
463 1048576 : CATCH_REQUIRE(back == U'\0');
464 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
465 1048576 : CATCH_REQUIRE(len == 1);
466 1048576 : buf[3] = forth_byte;
467 :
468 : // invalid introducer (0x80 to 0xBF)
469 : //
470 1048576 : back = rand();
471 1048576 : s = buf;
472 1048576 : buf[0] = rand() % 64 + 0x80;
473 1048576 : len = 3;
474 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
475 1048576 : CATCH_REQUIRE(back == U'\0');
476 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
477 1048576 : CATCH_REQUIRE(len == 0);
478 :
479 : // invalid introducer (0x80 to 0xBF)
480 : //
481 1048576 : back = rand();
482 1048576 : buf[0] = rand() % 64 + 0x80;
483 1048576 : s = buf;
484 1048576 : len = 4;
485 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
486 1048576 : CATCH_REQUIRE(back == U'\0');
487 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
488 1048576 : CATCH_REQUIRE(len == 0);
489 :
490 : // invalid introducer (0xF8 to 0xFF)
491 : //
492 1048576 : back = rand();
493 1048576 : s = buf;
494 1048576 : buf[0] = rand() % 8 + 0xF8;
495 1048576 : len = 4;
496 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
497 1048576 : CATCH_REQUIRE(back == '\0');
498 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
499 1048576 : CATCH_REQUIRE(len == 0);
500 : }
501 : CATCH_END_SECTION()
502 :
503 10 : CATCH_START_SECTION("Test three random characters, destroy the second one and make sure it gets skipped properly")
504 1001 : for(int repeat(0); repeat < 1000; ++repeat)
505 : {
506 : char32_t wc[3]
507 : {
508 1000 : unittest::rand_char(true),
509 1000 : unittest::rand_char(true),
510 1000 : unittest::rand_char(true)
511 3000 : };
512 1000 : size_t sz[3] = {};
513 :
514 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH * 3];
515 1000 : char * s(buf);
516 1000 : sz[0] += libutf8::wctombs(s, wc[0], sizeof(buf));
517 1000 : s += sz[0];
518 1000 : sz[1] = libutf8::wctombs(s, wc[1], sizeof(buf));
519 1000 : s += sz[1];
520 1000 : sz[2] = libutf8::wctombs(s, wc[2], sizeof(buf));
521 :
522 1000 : char32_t back(rand());
523 1000 : s = buf;
524 1000 : buf[sz[0]] = rand() % 64 + 0x80;
525 1000 : size_t len(sizeof(buf));
526 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
527 1000 : CATCH_REQUIRE(back == wc[0]);
528 :
529 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
530 1000 : CATCH_REQUIRE(back == U'\0');
531 :
532 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
533 1000 : CATCH_REQUIRE(back == wc[2]);
534 : }
535 : CATCH_END_SECTION()
536 11 : }
537 :
538 :
539 : // vim: ts=4 sw=4 et
|