Line data Source code
1 : /* tests/iterator.cpp
2 : * Copyright (C) 2013-2019 Made to Order Software Corporation
3 : *
4 : * This program is free software; you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation; either version 2 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License along
15 : * with this program; if not, write to the Free Software Foundation, Inc.,
16 : * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 : *
18 : * Authors
19 : * Alexis Wilke alexis@m2osw.com
20 : */
21 :
22 : // unit test
23 : //
24 : #include "main.h"
25 :
26 : // libutf8 lib
27 : //
28 : #include "libutf8/base.h"
29 : #include "libutf8/iterator.h"
30 : #include "libutf8/libutf8.h"
31 :
32 : // C++ lib
33 : //
34 : #include <cctype>
35 : #include <iostream>
36 :
37 :
38 3 : CATCH_TEST_CASE("libutf8 iterator", "iterator")
39 : {
40 2 : CATCH_START_SECTION("valid iterators tests")
41 1 : char32_t p(0);
42 0 : do
43 : {
44 1 : p = rand() % 0x11 * 0x10000;
45 : }
46 1 : while(p == 0 || (p >= 0xD800 && p <= 0xDFFF));
47 :
48 18 : for(char32_t plan(0); plan < 0x110000; plan += 0x10000)
49 : {
50 : // create one plan in one string
51 : //
52 34 : std::string str;
53 17 : str.reserve(0x10000 * 4);
54 1112082 : for(char32_t wc(0); wc < 0x10000; ++wc)
55 : {
56 1112065 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
57 : {
58 1 : wc = 0xDFFF;
59 1 : continue;
60 : }
61 : char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
62 1112064 : CATCH_REQUIRE(libutf8::wctombs(buf, wc + plan, sizeof(buf)) >= 1);
63 1112064 : if(plan == 0 && wc == 0)
64 : {
65 : // this is a special case as buf[0] = '\0' and the += with
66 : // the string won't work
67 : //
68 1 : str += '\0';
69 : }
70 : else
71 : {
72 1112063 : str += buf;
73 : }
74 : }
75 : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
76 :
77 : {
78 17 : libutf8::utf8_iterator it(str);
79 :
80 17 : CATCH_REQUIRE(it == str.begin());
81 17 : CATCH_REQUIRE(it == str.cbegin());
82 17 : CATCH_REQUIRE(it != str.end());
83 17 : CATCH_REQUIRE(it != str.cend());
84 :
85 17 : CATCH_REQUIRE(str.begin() == it);
86 17 : CATCH_REQUIRE(str.cbegin() == it);
87 17 : CATCH_REQUIRE(str.end() != it);
88 17 : CATCH_REQUIRE(str.cend() != it);
89 :
90 1112082 : for(char32_t wc(0); wc < 0x10000; ++wc)
91 : {
92 1112065 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
93 : {
94 1 : wc = 0xDFFF;
95 1 : continue;
96 : }
97 1112064 : CATCH_REQUIRE(*it == wc + plan);
98 1112064 : ++it;
99 : }
100 :
101 17 : CATCH_REQUIRE(it != str.begin());
102 17 : CATCH_REQUIRE(it != str.cbegin());
103 17 : CATCH_REQUIRE(it == str.end());
104 17 : CATCH_REQUIRE(it == str.cend());
105 :
106 17 : CATCH_REQUIRE(str.begin() != it);
107 17 : CATCH_REQUIRE(str.cbegin() != it);
108 17 : CATCH_REQUIRE(str.end() == it);
109 17 : CATCH_REQUIRE(str.cend() == it);
110 :
111 17 : CATCH_REQUIRE(*it == EOF);
112 17 : ++it;
113 17 : it++;
114 17 : CATCH_REQUIRE(it == str.cend());
115 :
116 1112082 : for(char32_t wc(0x10000); wc > 0; )
117 : {
118 1112065 : --wc;
119 1112065 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
120 : {
121 1 : wc = 0xD800;
122 1 : continue;
123 : }
124 1112064 : --it;
125 1112064 : CATCH_REQUIRE(*it == wc + plan);
126 : }
127 :
128 17 : --it;
129 17 : it--;
130 :
131 17 : CATCH_REQUIRE(it.good());
132 17 : CATCH_REQUIRE(!it.bad());
133 : }
134 :
135 17 : if(plan == p)
136 : {
137 1 : libutf8::utf8_iterator it(str);
138 :
139 65537 : for(char32_t wc(0); wc < 0x10000; ++wc)
140 : {
141 65536 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
142 : {
143 0 : wc = 0xDFFF;
144 0 : continue;
145 : }
146 65536 : CATCH_REQUIRE(*it++ == wc + plan);
147 : }
148 :
149 1 : CATCH_REQUIRE(it == str.end());
150 1 : it++;
151 1 : ++it;
152 1 : CATCH_REQUIRE(it == str.end());
153 :
154 65537 : for(char32_t wc(0x10000); wc > 0; )
155 : {
156 65536 : --wc;
157 65536 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
158 : {
159 0 : wc = 0xD800;
160 0 : continue;
161 : }
162 65536 : CATCH_REQUIRE(*--it == wc + plan);
163 : }
164 :
165 1 : CATCH_REQUIRE(it == str.begin());
166 1 : CATCH_REQUIRE(str.begin() == it);
167 1 : it--;
168 1 : --it;
169 1 : CATCH_REQUIRE(it == str.begin());
170 1 : CATCH_REQUIRE(str.begin() == it);
171 : }
172 :
173 17 : if(plan == (p + 1) % 0x11)
174 : {
175 0 : libutf8::utf8_iterator it(str);
176 :
177 0 : for(char32_t wc(0); wc < 0x10000; ++wc)
178 : {
179 0 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
180 : {
181 0 : wc = 0xDFFF;
182 0 : continue;
183 : }
184 0 : CATCH_REQUIRE(*it == wc + plan);
185 0 : it++;
186 : }
187 :
188 0 : for(char32_t wc(0x10000); wc > 0; )
189 : {
190 0 : --wc;
191 0 : if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
192 : {
193 0 : wc = 0xD800;
194 0 : continue;
195 : }
196 0 : it--;
197 0 : CATCH_REQUIRE(*it == wc + plan);
198 : }
199 : }
200 : }
201 : CATCH_END_SECTION()
202 1 : }
203 :
204 :
205 4 : CATCH_TEST_CASE("libutf8 iterator invalid string", "iterator,invalid")
206 : {
207 4 : CATCH_START_SECTION("iterators with invalid characters (bad UTF-8)")
208 101 : for(int repeat(0); repeat < 100; ++repeat)
209 : {
210 : // create one plan in one string
211 : //
212 100 : constexpr size_t STR_LENGTH = 4;
213 : char32_t wc;
214 200 : std::u32string wstr;
215 100 : wstr.reserve(STR_LENGTH);
216 500 : for(size_t idx(0); idx < STR_LENGTH; ++idx)
217 : {
218 0 : do
219 : {
220 400 : wc = unittest::rand_char(true);
221 : }
222 400 : while(wc < 0x80);
223 400 : wstr += wc;
224 : }
225 200 : std::string str(libutf8::to_u8string(wstr));
226 :
227 : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
228 :
229 : // first verify that it works
230 : //
231 : std::string::size_type pos[STR_LENGTH];
232 : {
233 100 : libutf8::utf8_iterator it(str);
234 :
235 100 : CATCH_REQUIRE(it == str.begin());
236 100 : CATCH_REQUIRE(it == str.cbegin());
237 100 : CATCH_REQUIRE(it != str.end());
238 100 : CATCH_REQUIRE(it != str.cend());
239 :
240 100 : CATCH_REQUIRE(str.begin() == it);
241 100 : CATCH_REQUIRE(str.cbegin() == it);
242 100 : CATCH_REQUIRE(str.end() != it);
243 100 : CATCH_REQUIRE(str.cend() != it);
244 :
245 500 : for(size_t idx(0); idx < STR_LENGTH; ++idx)
246 : {
247 400 : CATCH_REQUIRE(*it == wstr[idx]);
248 400 : if(rand() % 2 == 0)
249 : {
250 193 : pos[idx] = it - str.begin();
251 : }
252 : else
253 : {
254 207 : pos[idx] = -(str.begin() - it);
255 : }
256 400 : ++it;
257 : }
258 :
259 100 : CATCH_REQUIRE(it != str.begin());
260 100 : CATCH_REQUIRE(it != str.cbegin());
261 100 : CATCH_REQUIRE(it == str.end());
262 100 : CATCH_REQUIRE(it == str.cend());
263 :
264 100 : CATCH_REQUIRE(str.begin() != it);
265 100 : CATCH_REQUIRE(str.cbegin() != it);
266 100 : CATCH_REQUIRE(str.end() == it);
267 100 : CATCH_REQUIRE(str.cend() == it);
268 :
269 100 : CATCH_REQUIRE(*it == EOF);
270 100 : ++it;
271 100 : it++;
272 100 : CATCH_REQUIRE(it == str.cend());
273 : }
274 :
275 : {
276 100 : libutf8::utf8_iterator it(str);
277 :
278 100 : str[pos[1]] = rand() % 0x40 + 0x80;
279 :
280 100 : CATCH_REQUIRE(*it++ == wstr[0]);
281 100 : CATCH_REQUIRE(*it++ == U'\0'); // we broke this one
282 100 : CATCH_REQUIRE(*it++ == wstr[2]);
283 100 : CATCH_REQUIRE(*it++ == wstr[3]);
284 100 : CATCH_REQUIRE(*it++ == EOF);
285 : }
286 :
287 : {
288 100 : str.erase(str.length() - 1);
289 100 : libutf8::utf8_iterator it(str);
290 :
291 100 : str[pos[1]] = rand() % 0x40 + 0x80;
292 :
293 100 : CATCH_REQUIRE(*it++ == wstr[0]);
294 100 : CATCH_REQUIRE(*it++ == U'\0');
295 100 : CATCH_REQUIRE(*it++ == wstr[2]);
296 100 : CATCH_REQUIRE(*it++ == U'\0');
297 : }
298 : }
299 : CATCH_END_SECTION()
300 :
301 4 : CATCH_START_SECTION("iterators with invalid characters (too large)")
302 983040 : for(char32_t wc(0x110000); wc < 0x1FFFFF; ++wc)
303 : {
304 : // since this character is not we have to encode it _manually_
305 : //
306 : char buf[4];
307 983039 : buf[0] = 0xF0 | ((wc >> 18) & 0x07);
308 983039 : buf[1] = 0x80 | ((wc >> 12) & 0x3F);
309 983039 : buf[2] = 0x80 | ((wc >> 6) & 0x3F);
310 983039 : buf[3] = 0x80 | ((wc >> 0) & 0x3F);
311 :
312 1966078 : std::string str(buf, 4);
313 :
314 : // first verify that it works
315 : //
316 : {
317 983039 : libutf8::utf8_iterator it(str);
318 :
319 983039 : CATCH_REQUIRE(it == str.begin());
320 983039 : CATCH_REQUIRE(it == str.cbegin());
321 983039 : CATCH_REQUIRE(it != str.end());
322 983039 : CATCH_REQUIRE(it != str.cend());
323 :
324 983039 : CATCH_REQUIRE(str.begin() == it);
325 983039 : CATCH_REQUIRE(str.cbegin() == it);
326 983039 : CATCH_REQUIRE(str.end() != it);
327 983039 : CATCH_REQUIRE(str.cend() != it);
328 :
329 983039 : CATCH_REQUIRE(*it == '\0');
330 983039 : ++it;
331 :
332 983039 : CATCH_REQUIRE(it != str.begin());
333 983039 : CATCH_REQUIRE(it != str.cbegin());
334 983039 : CATCH_REQUIRE(it == str.end());
335 983039 : CATCH_REQUIRE(it == str.cend());
336 :
337 983039 : CATCH_REQUIRE(str.begin() != it);
338 983039 : CATCH_REQUIRE(str.cbegin() != it);
339 983039 : CATCH_REQUIRE(str.end() == it);
340 983039 : CATCH_REQUIRE(str.cend() == it);
341 :
342 983039 : CATCH_REQUIRE(*it == EOF);
343 983039 : ++it;
344 983039 : it++;
345 983039 : CATCH_REQUIRE(it == str.cend());
346 : }
347 : }
348 : CATCH_END_SECTION()
349 8 : }
350 :
351 :
352 :
353 : // vim: ts=4 sw=4 et
|