Line data Source code
1 : /* tests/character.cpp
2 : * Copyright (C) 2013-2019 Made to Order Software Corporation
3 : *
4 : * This program is free software; you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation; either version 2 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License along
15 : * with this program; if not, write to the Free Software Foundation, Inc.,
16 : * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 : *
18 : * Authors
19 : * Alexis Wilke alexis@m2osw.com
20 : */
21 :
22 : // unit test
23 : //
24 : #include "main.h"
25 :
26 : // libutf8 lib
27 : //
28 : #include "libutf8/base.h"
29 : #include "libutf8/exception.h"
30 :
31 : // C++ lib
32 : //
33 : #include <cctype>
34 : #include <iostream>
35 :
36 :
37 8 : CATCH_TEST_CASE("Character Conversions", "[characters]")
38 : {
39 12 : CATCH_START_SECTION("Verify minimum buffer length for MBS conversions")
40 1 : CATCH_REQUIRE(libutf8::MBS_MIN_BUFFER_LENGTH >= 5);
41 : CATCH_END_SECTION()
42 :
43 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x00 and 0x7F inclusive")
44 129 : for(char32_t wc(0); wc < 0x000080; ++wc)
45 : {
46 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
47 128 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
48 128 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 1);
49 :
50 128 : CATCH_REQUIRE(static_cast<char32_t>(buf[0]) == wc);
51 128 : CATCH_REQUIRE(buf[1] == '\0');
52 :
53 128 : char32_t back(rand());
54 128 : char const * s(buf);
55 128 : size_t len(1);
56 128 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 1);
57 128 : CATCH_REQUIRE(back == wc);
58 : }
59 : CATCH_END_SECTION()
60 :
61 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x80 and 0x7FF inclusive")
62 1921 : for(char32_t wc(0x000080); wc < 0x000800; ++wc)
63 : {
64 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
65 1920 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
66 1920 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
67 1920 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
68 :
69 1920 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
70 1920 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
71 1920 : CATCH_REQUIRE(found == wc);
72 1920 : CATCH_REQUIRE(buf[2] == '\0');
73 :
74 1920 : char32_t back(rand());
75 1920 : char const * s(buf);
76 1920 : size_t len(2);
77 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 2);
78 1920 : CATCH_REQUIRE(back == wc);
79 : }
80 : CATCH_END_SECTION()
81 :
82 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x800 and 0xFFFF inclusive")
83 61442 : for(char32_t wc(0x000800); wc < 0x010000; ++wc)
84 : {
85 61441 : if(wc >= 0xD800 && wc <= 0xDFFF)
86 : {
87 : // skip UTF-16 surrogates
88 : //
89 1 : wc = 0xDFFF;
90 1 : continue;
91 : }
92 :
93 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
94 61440 : if(rand() % 10 == 0)
95 : {
96 6141 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
97 6141 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
98 6141 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
99 : }
100 61440 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
101 :
102 61440 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
103 61440 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 6)
104 61440 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 0));
105 61440 : CATCH_REQUIRE(found == wc);
106 61440 : CATCH_REQUIRE(buf[3] == '\0');
107 :
108 61440 : char32_t back(rand());
109 61440 : char const * s(buf);
110 61440 : size_t len(3);
111 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 3);
112 61440 : CATCH_REQUIRE(back == wc);
113 : }
114 : CATCH_END_SECTION()
115 :
116 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x10000 and 0x10FFFF inclusive")
117 1048577 : for(char32_t wc(0x010000); wc < 0x110000; ++wc)
118 : {
119 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
120 1048576 : if(rand() % 100 == 0)
121 : {
122 10516 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
123 10516 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
124 10516 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
125 10516 : CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 3), libutf8::libutf8_logic_exception);
126 : }
127 1048576 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
128 :
129 1048576 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
130 1048576 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
131 1048576 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 6)
132 1048576 : | ((static_cast<char32_t>(buf[3]) & 0x3F) << 0));
133 1048576 : CATCH_REQUIRE(found == wc);
134 1048576 : CATCH_REQUIRE(buf[4] == '\0');
135 :
136 1048576 : char32_t back(rand());
137 1048576 : char const * s(buf);
138 1048576 : size_t len(4);
139 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 4);
140 1048576 : CATCH_REQUIRE(back == wc);
141 : }
142 : CATCH_END_SECTION()
143 :
144 12 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with an empty input string")
145 11 : for(char32_t repeat(0); repeat < 10; ++repeat)
146 : {
147 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
148 : char copy[libutf8::MBS_MIN_BUFFER_LENGTH];
149 60 : for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
150 : {
151 50 : buf[idx] = rand();
152 50 : copy[idx] = buf[idx];
153 : }
154 10 : char const * s(buf);
155 10 : char32_t null = rand();
156 10 : size_t len(0);
157 10 : CATCH_REQUIRE(libutf8::mbstowc(null, s, len) == 0);
158 10 : CATCH_REQUIRE(null == '\0');
159 60 : for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
160 : {
161 50 : CATCH_REQUIRE(copy[idx] == buf[idx]);
162 : }
163 : }
164 : CATCH_END_SECTION()
165 6 : }
166 :
167 :
168 4 : CATCH_TEST_CASE("Invalid UTF-32 Character to UTF-8", "[characters],[invalid]")
169 : {
170 4 : CATCH_START_SECTION("Verify that surrogates do not work in UTF-8")
171 2049 : for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
172 : {
173 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
174 : {
175 : 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
176 2048 : };
177 2048 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
178 2048 : CATCH_REQUIRE(buf[0] == '\0');
179 : }
180 : CATCH_END_SECTION()
181 :
182 4 : CATCH_START_SECTION("Verify that too large a number is not supported")
183 1001 : for(int repeat(0); repeat < 1000; ++repeat)
184 : {
185 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
186 : {
187 : 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
188 1000 : };
189 1000 : char32_t wc(0);
190 0 : do
191 : {
192 1000 : wc = (rand() << 16) + (rand() & 0x0000FFFF);
193 : }
194 1000 : while(wc < 0x110000);
195 1000 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
196 1000 : CATCH_REQUIRE(buf[0] == '\0');
197 : }
198 : CATCH_END_SECTION()
199 2 : }
200 :
201 :
202 7 : CATCH_TEST_CASE("Invalid UTF-8 Character to UTF-32", "[characters],[invalid]")
203 : {
204 10 : CATCH_START_SECTION("Verify that surrogates do not work in UTF-8")
205 2049 : for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
206 : {
207 : //int mbstowc(char32_t & wc, char const * & mb, size_t & len);
208 2048 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH] = {};
209 2048 : buf[0] = static_cast<char>((wc >> 12) | 0xE0);
210 2048 : buf[1] = ((wc >> 6) & 0x3F) | 0x80;
211 2048 : buf[2] = (wc & 0x3F) | 0x80;
212 2048 : buf[3] = '\0';
213 2048 : char const * s = buf;
214 2048 : size_t len(3);
215 2048 : char32_t cwc(rand());
216 2048 : CATCH_REQUIRE(libutf8::mbstowc(cwc, s, len) == -1);
217 2048 : CATCH_REQUIRE(cwc == U'\0');
218 2048 : char const c1(static_cast<char>((wc >> 12) | 0xE0));
219 2048 : CATCH_REQUIRE(buf[0] == c1);
220 2048 : char const c2(static_cast<char>(((wc >> 6) & 0x3F) | 0x80));
221 2048 : CATCH_REQUIRE(buf[1] == c2);
222 2048 : char const c3((wc & 0x3F) | 0x80);
223 2048 : CATCH_REQUIRE(buf[2] == c3);
224 2048 : CATCH_REQUIRE(buf[3] == '\0');
225 2048 : CATCH_REQUIRE(s == buf + 3);
226 2048 : CATCH_REQUIRE(len == 0);
227 : }
228 : CATCH_END_SECTION()
229 :
230 : //CATCH_START_SECTION("Verify that too large a number is not supported")
231 : // for(int idx(0); idx < 1000; ++idx)
232 : // {
233 : // char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
234 : // {
235 : // 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
236 : // };
237 : // char32_t wc(0);
238 : // do
239 : // {
240 : // wc = (rand() << 16) + (rand() & 0x0000FFFF);
241 : // }
242 : // while(wc < 0x110000);
243 : // CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
244 : // CATCH_REQUIRE(buf[0] == '\0');
245 : // }
246 : //CATCH_END_SECTION()
247 :
248 10 : CATCH_START_SECTION("Test an invalid UTF-8 sequence (0x80 to 0x7FF)")
249 1921 : for(char32_t wc(0x000080); wc < 0x000800; ++wc)
250 : {
251 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
252 1920 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
253 :
254 1920 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
255 1920 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
256 1920 : CATCH_REQUIRE(found == wc);
257 1920 : CATCH_REQUIRE(buf[2] == '\0');
258 :
259 : // too short
260 : //
261 1920 : char32_t back(rand());
262 1920 : char const * s(buf);
263 1920 : size_t len(1);
264 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
265 :
266 : // invalid middle byte
267 : //
268 1920 : char const second_byte(buf[1]);
269 1920 : back = rand();
270 1920 : s = buf;
271 1920 : int c(rand() % (255 - 0x40) + 1);
272 1920 : if(c >= 0x80)
273 : {
274 652 : c += 0x40;
275 : }
276 1920 : buf[1] = static_cast<char>(c);
277 1920 : len = 2;
278 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
279 1920 : CATCH_REQUIRE(back == U'\0');
280 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
281 1920 : CATCH_REQUIRE(len == 1);
282 1920 : buf[1] = second_byte;
283 :
284 : // invalid introducer (0x80 to 0xBF)
285 : //
286 1920 : back = rand();
287 1920 : s = buf;
288 1920 : buf[0] = rand() % 64 + 0x80;
289 1920 : len = 2;
290 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
291 1920 : CATCH_REQUIRE(back == U'\0');
292 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
293 1920 : CATCH_REQUIRE(len == 0);
294 :
295 : // invalid introducer (0xF8 to 0xFF)
296 : //
297 1920 : back = rand();
298 1920 : s = buf;
299 1920 : buf[0] = rand() % 8 + 0xF8;
300 1920 : len = 2;
301 1920 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
302 1920 : CATCH_REQUIRE(back == '\0');
303 1920 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
304 1920 : CATCH_REQUIRE(len == 0);
305 : }
306 : CATCH_END_SECTION()
307 :
308 10 : CATCH_START_SECTION("Test an invalid UTF-8 sequence (0x800 to 0xFFFF)")
309 61442 : for(char32_t wc(0x000800); wc < 0x010000; ++wc)
310 : {
311 61441 : if(wc >= 0xD800 && wc <= 0xDFFF)
312 : {
313 : // skip UTF-16 surrogates -- this is not the test for those
314 : //
315 1 : wc = 0xDFFF;
316 1 : continue;
317 : }
318 :
319 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
320 61440 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
321 :
322 61440 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
323 61440 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 6)
324 61440 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 0));
325 61440 : CATCH_REQUIRE(found == wc);
326 61440 : CATCH_REQUIRE(buf[3] == '\0');
327 :
328 : // too short
329 : //
330 61440 : char32_t back(rand());
331 61440 : char const * s(buf);
332 61440 : size_t len(2);
333 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
334 :
335 : // invalid middle byte
336 : //
337 61440 : char const second_byte(buf[1]);
338 61440 : back = rand();
339 61440 : s = buf;
340 61440 : int c(rand() % (255 - 0x40) + 1);
341 61440 : if(c >= 0x80)
342 : {
343 20428 : c += 0x40;
344 : }
345 61440 : buf[1] = static_cast<char>(c);
346 61440 : len = 3;
347 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
348 61440 : CATCH_REQUIRE(back == U'\0');
349 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
350 61440 : CATCH_REQUIRE(len == 2);
351 61440 : buf[1] = second_byte;
352 :
353 61440 : char const third_byte(buf[2]);
354 61440 : back = rand();
355 61440 : s = buf;
356 61440 : c = rand() % (255 - 0x40) + 1;
357 61440 : if(c >= 0x80)
358 : {
359 20594 : c += 0x40;
360 : }
361 61440 : buf[2] = static_cast<char>(c);
362 61440 : len = 3;
363 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
364 61440 : CATCH_REQUIRE(back == U'\0');
365 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
366 61440 : CATCH_REQUIRE(len == 1);
367 61440 : buf[2] = third_byte;
368 :
369 : // invalid introducer (0x80 to 0xBF)
370 : //
371 61440 : back = rand();
372 61440 : s = buf;
373 61440 : buf[0] = rand() % 64 + 0x80;
374 61440 : len = 3;
375 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
376 61440 : CATCH_REQUIRE(back == U'\0');
377 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
378 61440 : CATCH_REQUIRE(len == 0);
379 :
380 : // invalid introducer (0xF8 to 0xFF)
381 : //
382 61440 : back = rand();
383 61440 : s = buf;
384 61440 : buf[0] = rand() % 8 + 0xF8;
385 61440 : len = 3;
386 61440 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
387 61440 : CATCH_REQUIRE(back == '\0');
388 61440 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
389 61440 : CATCH_REQUIRE(len == 0);
390 : }
391 : CATCH_END_SECTION()
392 :
393 10 : CATCH_START_SECTION("Test UTF-32 to UTF-8 with characters between 0x010000 and 0x110000 inclusive")
394 1048577 : for(char32_t wc(0x010000); wc < 0x110000; ++wc)
395 : {
396 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
397 1048576 : CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
398 :
399 1048576 : char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
400 1048576 : | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
401 1048576 : | ((static_cast<char32_t>(buf[2]) & 0x3F) << 6)
402 1048576 : | ((static_cast<char32_t>(buf[3]) & 0x3F) << 0));
403 1048576 : CATCH_REQUIRE(found == wc);
404 1048576 : CATCH_REQUIRE(buf[4] == '\0');
405 :
406 1048576 : char32_t back(rand());
407 1048576 : char const * s(buf);
408 :
409 : // too short
410 : //
411 1048576 : size_t len(3);
412 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
413 :
414 : // invalid middle byte
415 : //
416 1048576 : char const second_byte(buf[1]);
417 1048576 : back = rand();
418 1048576 : s = buf;
419 1048576 : int c(rand() % (255 - 0x40) + 1);
420 1048576 : if(c >= 0x80)
421 : {
422 351060 : c += 0x40;
423 : }
424 1048576 : buf[1] = static_cast<char>(c);
425 1048576 : len = 4;
426 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
427 1048576 : CATCH_REQUIRE(back == U'\0');
428 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
429 1048576 : CATCH_REQUIRE(len == 3);
430 1048576 : buf[1] = second_byte;
431 :
432 1048576 : char const third_byte(buf[2]);
433 1048576 : back = rand();
434 1048576 : s = buf;
435 1048576 : c = rand() % (255 - 0x40) + 1;
436 1048576 : if(c >= 0x80)
437 : {
438 351679 : c += 0x40;
439 : }
440 1048576 : buf[2] = static_cast<char>(c);
441 1048576 : len = 4;
442 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
443 1048576 : CATCH_REQUIRE(back == U'\0');
444 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
445 1048576 : CATCH_REQUIRE(len == 2);
446 1048576 : buf[2] = third_byte;
447 :
448 1048576 : char const forth_byte(buf[3]);
449 1048576 : back = rand();
450 1048576 : s = buf;
451 1048576 : c = rand() % (255 - 0x40) + 1;
452 1048576 : if(c >= 0x80)
453 : {
454 350907 : c += 0x40;
455 : }
456 1048576 : buf[3] = static_cast<char>(c);
457 1048576 : len = 4;
458 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
459 1048576 : CATCH_REQUIRE(back == U'\0');
460 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
461 1048576 : CATCH_REQUIRE(len == 1);
462 1048576 : buf[3] = forth_byte;
463 :
464 : // invalid introducer (0x80 to 0xBF)
465 : //
466 1048576 : back = rand();
467 1048576 : s = buf;
468 1048576 : buf[0] = rand() % 64 + 0x80;
469 1048576 : len = 3;
470 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
471 1048576 : CATCH_REQUIRE(back == U'\0');
472 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
473 1048576 : CATCH_REQUIRE(len == 0);
474 :
475 : // invalid introducer (0x80 to 0xBF)
476 : //
477 1048576 : back = rand();
478 1048576 : buf[0] = rand() % 64 + 0x80;
479 1048576 : s = buf;
480 1048576 : len = 4;
481 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
482 1048576 : CATCH_REQUIRE(back == U'\0');
483 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
484 1048576 : CATCH_REQUIRE(len == 0);
485 :
486 : // invalid introducer (0xF8 to 0xFF)
487 : //
488 1048576 : back = rand();
489 1048576 : s = buf;
490 1048576 : buf[0] = rand() % 8 + 0xF8;
491 1048576 : len = 4;
492 1048576 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
493 1048576 : CATCH_REQUIRE(back == '\0');
494 1048576 : CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
495 1048576 : CATCH_REQUIRE(len == 0);
496 : }
497 : CATCH_END_SECTION()
498 :
499 10 : CATCH_START_SECTION("Test three random characters, destroy the second one and make sure it gets skipped properly")
500 1001 : for(int repeat(0); repeat < 1000; ++repeat)
501 : {
502 : char32_t wc[3]
503 : {
504 1000 : unittest::rand_char(true),
505 1000 : unittest::rand_char(true),
506 1000 : unittest::rand_char(true)
507 3000 : };
508 1000 : size_t sz[3] = {};
509 :
510 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH * 3];
511 1000 : char * s(buf);
512 1000 : sz[0] += libutf8::wctombs(s, wc[0], sizeof(buf));
513 1000 : s += sz[0];
514 1000 : sz[1] = libutf8::wctombs(s, wc[1], sizeof(buf));
515 1000 : s += sz[1];
516 1000 : sz[2] = libutf8::wctombs(s, wc[2], sizeof(buf));
517 :
518 1000 : char32_t back(rand());
519 1000 : s = buf;
520 1000 : buf[sz[0]] = rand() % 64 + 0x80;
521 1000 : size_t len(sizeof(buf));
522 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
523 1000 : CATCH_REQUIRE(back == wc[0]);
524 :
525 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
526 1000 : CATCH_REQUIRE(back == U'\0');
527 :
528 1000 : CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
529 1000 : CATCH_REQUIRE(back == wc[2]);
530 : }
531 : CATCH_END_SECTION()
532 11 : }
533 :
534 :
535 : // vim: ts=4 sw=4 et
|