Line data Source code
1 : /* tests/iterator.cpp
2 : * Copyright (C) 2013-2019 Made to Order Software Corporation
3 : *
4 : * This program is free software; you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation; either version 2 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License along
15 : * with this program; if not, write to the Free Software Foundation, Inc.,
16 : * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 : *
18 : * Authors
19 : * Alexis Wilke alexis@m2osw.com
20 : */
21 :
22 : // unit test
23 : //
24 : #include "main.h"
25 :
26 : // libutf8 lib
27 : //
28 : #include "libutf8/base.h"
29 : #include "libutf8/iterator.h"
30 : #include "libutf8/libutf8.h"
31 :
32 : // C++ lib
33 : //
34 : #include <cctype>
35 : #include <iostream>
36 :
37 :
38 3 : CATCH_TEST_CASE("libutf8 iterator", "[iterator]")
39 : {
40 2 : CATCH_START_SECTION("valid iterators tests")
41 1 : char32_t p(0);
42 0 : do
43 : {
44 1 : p = rand() % 0x11 * 0x10000;
45 : }
46 1 : while(p == 0 || (p >= 0xD800 && p <= 0xDFFF));
47 :
48 18 : for(char32_t plan(0); plan < 0x110000; plan += 0x10000)
49 : {
50 : // create one plan in one string
51 : //
52 34 : std::string str;
53 17 : str.reserve(0x10000 * 4);
54 1112082 : for(char32_t wc(0); wc < 0x10000; ++wc)
55 : {
56 1112065 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
57 : {
58 1 : wc = 0xDFFF;
59 1 : continue;
60 : }
61 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
62 1112064 : CATCH_REQUIRE(libutf8::wctombs(buf, wc + plan, sizeof(buf)) >= 1);
63 1112064 : if(plan == 0 && wc == 0)
64 : {
65 : // this is a special case as buf[0] = '\0' and the += with
66 : // the string won't work
67 : //
68 1 : str += '\0';
69 : }
70 : else
71 : {
72 1112063 : str += buf;
73 : }
74 : }
75 : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
76 :
77 : {
78 17 : libutf8::utf8_iterator it(str);
79 17 : libutf8::utf8_iterator it_end(str, true);
80 17 : libutf8::utf8_iterator it_next(str);
81 17 : ++it_next;
82 :
83 17 : CATCH_REQUIRE(it == str.begin());
84 17 : CATCH_REQUIRE(it == str.cbegin());
85 17 : CATCH_REQUIRE(it != str.end());
86 17 : CATCH_REQUIRE(it != str.cend());
87 :
88 17 : CATCH_REQUIRE(it == it);
89 17 : CATCH_REQUIRE(it != it_end);
90 17 : CATCH_REQUIRE(it != it_next);
91 :
92 17 : CATCH_REQUIRE(str.begin() == it);
93 17 : CATCH_REQUIRE(str.cbegin() == it);
94 17 : CATCH_REQUIRE(str.end() != it);
95 17 : CATCH_REQUIRE(str.cend() != it);
96 :
97 1112082 : for(char32_t wc(0); wc < 0x10000; ++wc)
98 : {
99 1112065 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
100 : {
101 1 : wc = 0xDFFF;
102 1 : continue;
103 : }
104 1112064 : CATCH_REQUIRE(*it == wc + plan);
105 1112064 : ++it;
106 : }
107 :
108 17 : CATCH_REQUIRE(it != str.begin());
109 17 : CATCH_REQUIRE(it != str.cbegin());
110 17 : CATCH_REQUIRE(it == str.end());
111 17 : CATCH_REQUIRE(it == str.cend());
112 :
113 17 : CATCH_REQUIRE(str.begin() != it);
114 17 : CATCH_REQUIRE(str.cbegin() != it);
115 17 : CATCH_REQUIRE(str.end() == it);
116 17 : CATCH_REQUIRE(str.cend() == it);
117 :
118 17 : CATCH_REQUIRE(*it == EOF);
119 17 : ++it;
120 17 : it++;
121 17 : CATCH_REQUIRE(it == str.cend());
122 :
123 1112082 : for(char32_t wc(0x10000); wc > 0; )
124 : {
125 1112065 : --wc;
126 1112065 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
127 : {
128 1 : wc = 0xD800;
129 1 : continue;
130 : }
131 1112064 : --it;
132 1112064 : CATCH_REQUIRE(*it == wc + plan);
133 : }
134 :
135 17 : --it;
136 17 : it--;
137 :
138 17 : CATCH_REQUIRE(it.good());
139 17 : CATCH_REQUIRE(!it.bad());
140 : }
141 :
142 17 : if(plan == p)
143 : {
144 1 : libutf8::utf8_iterator it(str);
145 :
146 65537 : for(char32_t wc(0); wc < 0x10000; ++wc)
147 : {
148 65536 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
149 : {
150 0 : wc = 0xDFFF;
151 0 : continue;
152 : }
153 65536 : CATCH_REQUIRE(*it++ == wc + plan);
154 : }
155 :
156 1 : CATCH_REQUIRE(it == str.end());
157 1 : it++;
158 1 : ++it;
159 1 : CATCH_REQUIRE(it == str.end());
160 :
161 65537 : for(char32_t wc(0x10000); wc > 0; )
162 : {
163 65536 : --wc;
164 65536 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
165 : {
166 0 : wc = 0xD800;
167 0 : continue;
168 : }
169 65536 : CATCH_REQUIRE(*--it == wc + plan);
170 : }
171 :
172 1 : CATCH_REQUIRE(it == str.begin());
173 1 : CATCH_REQUIRE(str.begin() == it);
174 1 : it--;
175 1 : --it;
176 1 : CATCH_REQUIRE(it == str.begin());
177 1 : CATCH_REQUIRE(str.begin() == it);
178 : }
179 :
180 17 : if(plan == (p + 1) % 0x11)
181 : {
182 0 : libutf8::utf8_iterator it(str);
183 :
184 0 : for(char32_t wc(0); wc < 0x10000; ++wc)
185 : {
186 0 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
187 : {
188 0 : wc = 0xDFFF;
189 0 : continue;
190 : }
191 0 : CATCH_REQUIRE(*it == wc + plan);
192 0 : it++;
193 : }
194 :
195 0 : for(char32_t wc(0x10000); wc > 0; )
196 : {
197 0 : --wc;
198 0 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
199 : {
200 0 : wc = 0xD800;
201 0 : continue;
202 : }
203 0 : it--;
204 0 : CATCH_REQUIRE(*it == wc + plan);
205 : }
206 : }
207 : }
208 : CATCH_END_SECTION()
209 1 : }
210 :
211 :
212 4 : CATCH_TEST_CASE("libutf8 iterator invalid string", "[iterator],[invalid]")
213 : {
214 4 : CATCH_START_SECTION("iterators with invalid characters (bad UTF-8)")
215 101 : for(int repeat(0); repeat < 100; ++repeat)
216 : {
217 : // create one plan in one string
218 : //
219 100 : constexpr size_t STR_LENGTH = 4;
220 : char32_t wc;
221 200 : std::u32string wstr;
222 100 : wstr.reserve(STR_LENGTH);
223 500 : for(size_t idx(0); idx < STR_LENGTH; ++idx)
224 : {
225 0 : do
226 : {
227 400 : wc = unittest::rand_char(true);
228 : }
229 400 : while(wc < 0x80);
230 400 : wstr += wc;
231 : }
232 200 : std::string str(libutf8::to_u8string(wstr));
233 :
234 : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
235 :
236 : // first verify that it works
237 : //
238 : std::string::size_type pos[STR_LENGTH];
239 : {
240 100 : libutf8::utf8_iterator it(str);
241 :
242 100 : CATCH_REQUIRE(it == str.begin());
243 100 : CATCH_REQUIRE(it == str.cbegin());
244 100 : CATCH_REQUIRE(it != str.end());
245 100 : CATCH_REQUIRE(it != str.cend());
246 :
247 100 : CATCH_REQUIRE(str.begin() == it);
248 100 : CATCH_REQUIRE(str.cbegin() == it);
249 100 : CATCH_REQUIRE(str.end() != it);
250 100 : CATCH_REQUIRE(str.cend() != it);
251 :
252 500 : for(size_t idx(0); idx < STR_LENGTH; ++idx)
253 : {
254 400 : CATCH_REQUIRE(*it == wstr[idx]);
255 400 : if(rand() % 2 == 0)
256 : {
257 196 : pos[idx] = it - str.begin();
258 : }
259 : else
260 : {
261 204 : pos[idx] = -(str.begin() - it);
262 : }
263 400 : ++it;
264 : }
265 :
266 100 : CATCH_REQUIRE(it != str.begin());
267 100 : CATCH_REQUIRE(it != str.cbegin());
268 100 : CATCH_REQUIRE(it == str.end());
269 100 : CATCH_REQUIRE(it == str.cend());
270 :
271 100 : CATCH_REQUIRE(str.begin() != it);
272 100 : CATCH_REQUIRE(str.cbegin() != it);
273 100 : CATCH_REQUIRE(str.end() == it);
274 100 : CATCH_REQUIRE(str.cend() == it);
275 :
276 100 : CATCH_REQUIRE(*it == EOF);
277 100 : ++it;
278 100 : it++;
279 100 : CATCH_REQUIRE(it == str.cend());
280 : }
281 :
282 : {
283 100 : libutf8::utf8_iterator it(str);
284 :
285 100 : str[pos[1]] = rand() % 0x40 + 0x80;
286 :
287 100 : CATCH_REQUIRE(*it++ == wstr[0]);
288 100 : CATCH_REQUIRE(*it++ == U'\0'); // we broke this one
289 100 : CATCH_REQUIRE(*it++ == wstr[2]);
290 100 : CATCH_REQUIRE(*it++ == wstr[3]);
291 100 : CATCH_REQUIRE(*it++ == EOF);
292 : }
293 :
294 : {
295 100 : str.erase(str.length() - 1);
296 100 : libutf8::utf8_iterator it(str);
297 :
298 100 : str[pos[1]] = rand() % 0x40 + 0x80;
299 :
300 100 : CATCH_REQUIRE(*it++ == wstr[0]);
301 100 : CATCH_REQUIRE(*it++ == U'\0');
302 100 : CATCH_REQUIRE(*it++ == wstr[2]);
303 100 : CATCH_REQUIRE(*it++ == U'\0');
304 : }
305 : }
306 : CATCH_END_SECTION()
307 :
308 4 : CATCH_START_SECTION("iterators with invalid characters (too large)")
309 983040 : for(char32_t wc(0x110000); wc < 0x1FFFFF; ++wc)
310 : {
311 : // since this character is not we have to encode it _manually_
312 : //
313 : char buf[4];
314 983039 : buf[0] = 0xF0 | ((wc >> 18) & 0x07);
315 983039 : buf[1] = 0x80 | ((wc >> 12) & 0x3F);
316 983039 : buf[2] = 0x80 | ((wc >> 6) & 0x3F);
317 983039 : buf[3] = 0x80 | ((wc >> 0) & 0x3F);
318 :
319 1966078 : std::string str(buf, 4);
320 :
321 : // first verify that it works
322 : //
323 : {
324 983039 : libutf8::utf8_iterator it(str);
325 :
326 983039 : CATCH_REQUIRE(it == str.begin());
327 983039 : CATCH_REQUIRE(it == str.cbegin());
328 983039 : CATCH_REQUIRE(it != str.end());
329 983039 : CATCH_REQUIRE(it != str.cend());
330 :
331 983039 : CATCH_REQUIRE(str.begin() == it);
332 983039 : CATCH_REQUIRE(str.cbegin() == it);
333 983039 : CATCH_REQUIRE(str.end() != it);
334 983039 : CATCH_REQUIRE(str.cend() != it);
335 :
336 983039 : CATCH_REQUIRE(*it == '\0');
337 983039 : ++it;
338 :
339 983039 : CATCH_REQUIRE(it != str.begin());
340 983039 : CATCH_REQUIRE(it != str.cbegin());
341 983039 : CATCH_REQUIRE(it == str.end());
342 983039 : CATCH_REQUIRE(it == str.cend());
343 :
344 983039 : CATCH_REQUIRE(str.begin() != it);
345 983039 : CATCH_REQUIRE(str.cbegin() != it);
346 983039 : CATCH_REQUIRE(str.end() == it);
347 983039 : CATCH_REQUIRE(str.cend() == it);
348 :
349 983039 : CATCH_REQUIRE(*it == EOF);
350 983039 : ++it;
351 983039 : it++;
352 983039 : CATCH_REQUIRE(it == str.cend());
353 : }
354 : }
355 : CATCH_END_SECTION()
356 8 : }
357 :
358 :
359 :
360 : // vim: ts=4 sw=4 et
|