Line data Source code
1 : /* tests/iterator.cpp
2 : * Copyright (C) 2013-2019 Made to Order Software Corporation
3 : *
4 : * This program is free software; you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation; either version 2 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License along
15 : * with this program; if not, write to the Free Software Foundation, Inc.,
16 : * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 : *
18 : * Authors
19 : * Alexis Wilke alexis@m2osw.com
20 : */
21 :
22 : // unit test
23 : //
24 : #include "main.h"
25 :
26 : // libutf8 lib
27 : //
28 : #include "libutf8/base.h"
29 : #include "libutf8/iterator.h"
30 : #include "libutf8/libutf8.h"
31 :
32 : // catch lib
33 : //
34 : #include <catch2/catch.hpp>
35 :
36 : // C++ lib
37 : //
38 : #include <cctype>
39 : #include <iostream>
40 :
41 :
42 3 : CATCH_TEST_CASE("libutf8 iterator", "iterator")
43 : {
44 2 : CATCH_START_SECTION("valid iterators tests")
45 1 : char32_t p(0);
46 0 : do
47 : {
48 1 : p = rand() % 0x11 * 0x10000;
49 : }
50 1 : while(p == 0 || (p >= 0xD800 && p <= 0xDFFF));
51 :
52 18 : for(char32_t plan(0); plan < 0x110000; plan += 0x10000)
53 : {
54 : // create one plan in one string
55 : //
56 34 : std::string str;
57 17 : str.reserve(0x10000 * 4);
58 1112082 : for(char32_t wc(0); wc < 0x10000; ++wc)
59 : {
60 1112065 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
61 : {
62 1 : wc = 0xDFFF;
63 1 : continue;
64 : }
65 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
66 1112064 : CATCH_REQUIRE(libutf8::wctombs(buf, wc + plan, sizeof(buf)) >= 1);
67 1112064 : if(plan == 0 && wc == 0)
68 : {
69 : // this is a special case as buf[0] = '\0' and the += with
70 : // the string won't work
71 : //
72 1 : str += '\0';
73 : }
74 : else
75 : {
76 1112063 : str += buf;
77 : }
78 : }
79 : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
80 :
81 : {
82 17 : libutf8::utf8_iterator it(str);
83 :
84 17 : CATCH_REQUIRE(it == str.begin());
85 17 : CATCH_REQUIRE(it == str.cbegin());
86 17 : CATCH_REQUIRE(it != str.end());
87 17 : CATCH_REQUIRE(it != str.cend());
88 :
89 17 : CATCH_REQUIRE(str.begin() == it);
90 17 : CATCH_REQUIRE(str.cbegin() == it);
91 17 : CATCH_REQUIRE(str.end() != it);
92 17 : CATCH_REQUIRE(str.cend() != it);
93 :
94 1112082 : for(char32_t wc(0); wc < 0x10000; ++wc)
95 : {
96 1112065 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
97 : {
98 1 : wc = 0xDFFF;
99 1 : continue;
100 : }
101 1112064 : CATCH_REQUIRE(*it == wc + plan);
102 1112064 : ++it;
103 : }
104 :
105 17 : CATCH_REQUIRE(it != str.begin());
106 17 : CATCH_REQUIRE(it != str.cbegin());
107 17 : CATCH_REQUIRE(it == str.end());
108 17 : CATCH_REQUIRE(it == str.cend());
109 :
110 17 : CATCH_REQUIRE(str.begin() != it);
111 17 : CATCH_REQUIRE(str.cbegin() != it);
112 17 : CATCH_REQUIRE(str.end() == it);
113 17 : CATCH_REQUIRE(str.cend() == it);
114 :
115 17 : CATCH_REQUIRE(*it == EOF);
116 17 : ++it;
117 17 : it++;
118 17 : CATCH_REQUIRE(it == str.cend());
119 :
120 1112082 : for(char32_t wc(0x10000); wc > 0; )
121 : {
122 1112065 : --wc;
123 1112065 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
124 : {
125 1 : wc = 0xD800;
126 1 : continue;
127 : }
128 1112064 : --it;
129 1112064 : CATCH_REQUIRE(*it == wc + plan);
130 : }
131 :
132 17 : --it;
133 17 : it--;
134 :
135 17 : CATCH_REQUIRE(it.good());
136 17 : CATCH_REQUIRE(!it.bad());
137 : }
138 :
139 17 : if(plan == p)
140 : {
141 1 : libutf8::utf8_iterator it(str);
142 :
143 65537 : for(char32_t wc(0); wc < 0x10000; ++wc)
144 : {
145 65536 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
146 : {
147 0 : wc = 0xDFFF;
148 0 : continue;
149 : }
150 65536 : CATCH_REQUIRE(*it++ == wc + plan);
151 : }
152 :
153 1 : CATCH_REQUIRE(it == str.end());
154 1 : it++;
155 1 : ++it;
156 1 : CATCH_REQUIRE(it == str.end());
157 :
158 65537 : for(char32_t wc(0x10000); wc > 0; )
159 : {
160 65536 : --wc;
161 65536 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
162 : {
163 0 : wc = 0xD800;
164 0 : continue;
165 : }
166 65536 : CATCH_REQUIRE(*--it == wc + plan);
167 : }
168 :
169 1 : CATCH_REQUIRE(it == str.begin());
170 1 : CATCH_REQUIRE(str.begin() == it);
171 1 : it--;
172 1 : --it;
173 1 : CATCH_REQUIRE(it == str.begin());
174 1 : CATCH_REQUIRE(str.begin() == it);
175 : }
176 :
177 17 : if(plan == (p + 1) % 0x11)
178 : {
179 0 : libutf8::utf8_iterator it(str);
180 :
181 0 : for(char32_t wc(0); wc < 0x10000; ++wc)
182 : {
183 0 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
184 : {
185 0 : wc = 0xDFFF;
186 0 : continue;
187 : }
188 0 : CATCH_REQUIRE(*it == wc + plan);
189 0 : it++;
190 : }
191 :
192 0 : for(char32_t wc(0x10000); wc > 0; )
193 : {
194 0 : --wc;
195 0 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
196 : {
197 0 : wc = 0xD800;
198 0 : continue;
199 : }
200 0 : it--;
201 0 : CATCH_REQUIRE(*it == wc + plan);
202 : }
203 : }
204 : }
205 : CATCH_END_SECTION()
206 1 : }
207 :
208 :
209 4 : CATCH_TEST_CASE("libutf8 iterator invalid string", "iterator,invalid")
210 : {
211 4 : CATCH_START_SECTION("iterators with invalid characters (bad UTF-8)")
212 101 : for(int repeat(0); repeat < 100; ++repeat)
213 : {
214 : // create one plan in one string
215 : //
216 100 : constexpr size_t STR_LENGTH = 4;
217 : char32_t wc;
218 200 : std::u32string wstr;
219 100 : wstr.reserve(STR_LENGTH);
220 500 : for(size_t idx(0); idx < STR_LENGTH; ++idx)
221 : {
222 0 : do
223 : {
224 400 : wc = unittest::rand_char(true);
225 : }
226 400 : while(wc < 0x80);
227 400 : wstr += wc;
228 : }
229 200 : std::string str(libutf8::to_u8string(wstr));
230 :
231 : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
232 :
233 : // first verify that it works
234 : //
235 : std::string::size_type pos[STR_LENGTH];
236 : {
237 100 : libutf8::utf8_iterator it(str);
238 :
239 100 : CATCH_REQUIRE(it == str.begin());
240 100 : CATCH_REQUIRE(it == str.cbegin());
241 100 : CATCH_REQUIRE(it != str.end());
242 100 : CATCH_REQUIRE(it != str.cend());
243 :
244 100 : CATCH_REQUIRE(str.begin() == it);
245 100 : CATCH_REQUIRE(str.cbegin() == it);
246 100 : CATCH_REQUIRE(str.end() != it);
247 100 : CATCH_REQUIRE(str.cend() != it);
248 :
249 500 : for(size_t idx(0); idx < STR_LENGTH; ++idx)
250 : {
251 400 : CATCH_REQUIRE(*it == wstr[idx]);
252 400 : if(rand() % 2 == 0)
253 : {
254 209 : pos[idx] = it - str.begin();
255 : }
256 : else
257 : {
258 191 : pos[idx] = -(str.begin() - it);
259 : }
260 400 : ++it;
261 : }
262 :
263 100 : CATCH_REQUIRE(it != str.begin());
264 100 : CATCH_REQUIRE(it != str.cbegin());
265 100 : CATCH_REQUIRE(it == str.end());
266 100 : CATCH_REQUIRE(it == str.cend());
267 :
268 100 : CATCH_REQUIRE(str.begin() != it);
269 100 : CATCH_REQUIRE(str.cbegin() != it);
270 100 : CATCH_REQUIRE(str.end() == it);
271 100 : CATCH_REQUIRE(str.cend() == it);
272 :
273 100 : CATCH_REQUIRE(*it == EOF);
274 100 : ++it;
275 100 : it++;
276 100 : CATCH_REQUIRE(it == str.cend());
277 : }
278 :
279 : {
280 100 : libutf8::utf8_iterator it(str);
281 :
282 100 : str[pos[1]] = rand() % 0x40 + 0x80;
283 :
284 100 : CATCH_REQUIRE(*it++ == wstr[0]);
285 100 : CATCH_REQUIRE(*it++ == U'\0'); // we broke this one
286 100 : CATCH_REQUIRE(*it++ == wstr[2]);
287 100 : CATCH_REQUIRE(*it++ == wstr[3]);
288 100 : CATCH_REQUIRE(*it++ == EOF);
289 : }
290 :
291 : {
292 100 : str.erase(str.length() - 1);
293 100 : libutf8::utf8_iterator it(str);
294 :
295 100 : str[pos[1]] = rand() % 0x40 + 0x80;
296 :
297 100 : CATCH_REQUIRE(*it++ == wstr[0]);
298 100 : CATCH_REQUIRE(*it++ == U'\0');
299 100 : CATCH_REQUIRE(*it++ == wstr[2]);
300 100 : CATCH_REQUIRE(*it++ == U'\0');
301 : }
302 : }
303 : CATCH_END_SECTION()
304 :
305 4 : CATCH_START_SECTION("iterators with invalid characters (too large)")
306 983040 : for(char32_t wc(0x110000); wc < 0x1FFFFF; ++wc)
307 : {
308 : // since this character is not we have to encode it _manually_
309 : //
310 : char buf[4];
311 983039 : buf[0] = 0xF0 | ((wc >> 18) & 0x07);
312 983039 : buf[1] = 0x80 | ((wc >> 12) & 0x3F);
313 983039 : buf[2] = 0x80 | ((wc >> 6) & 0x3F);
314 983039 : buf[3] = 0x80 | ((wc >> 0) & 0x3F);
315 :
316 1966078 : std::string str(buf, 4);
317 :
318 : // first verify that it works
319 : //
320 : {
321 983039 : libutf8::utf8_iterator it(str);
322 :
323 983039 : CATCH_REQUIRE(it == str.begin());
324 983039 : CATCH_REQUIRE(it == str.cbegin());
325 983039 : CATCH_REQUIRE(it != str.end());
326 983039 : CATCH_REQUIRE(it != str.cend());
327 :
328 983039 : CATCH_REQUIRE(str.begin() == it);
329 983039 : CATCH_REQUIRE(str.cbegin() == it);
330 983039 : CATCH_REQUIRE(str.end() != it);
331 983039 : CATCH_REQUIRE(str.cend() != it);
332 :
333 983039 : CATCH_REQUIRE(*it == '\0');
334 983039 : ++it;
335 :
336 983039 : CATCH_REQUIRE(it != str.begin());
337 983039 : CATCH_REQUIRE(it != str.cbegin());
338 983039 : CATCH_REQUIRE(it == str.end());
339 983039 : CATCH_REQUIRE(it == str.cend());
340 :
341 983039 : CATCH_REQUIRE(str.begin() != it);
342 983039 : CATCH_REQUIRE(str.cbegin() != it);
343 983039 : CATCH_REQUIRE(str.end() == it);
344 983039 : CATCH_REQUIRE(str.cend() == it);
345 :
346 983039 : CATCH_REQUIRE(*it == EOF);
347 983039 : ++it;
348 983039 : it++;
349 983039 : CATCH_REQUIRE(it == str.cend());
350 : }
351 : }
352 : CATCH_END_SECTION()
353 8 : }
354 :
355 :
356 :
357 : // vim: ts=4 sw=4 et
|