Line data Source code
1 : /* TLD library -- test converting domain names to lowercase
2 : * Copyright (c) 2011-2018 Made to Order Software Corp. All Rights Reserved
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the
6 : * "Software"), to deal in the Software without restriction, including
7 : * without limitation the rights to use, copy, modify, merge, publish,
8 : * distribute, sublicense, and/or sell copies of the Software, and to
9 : * permit persons to whom the Software is furnished to do so, subject to
10 : * the following conditions:
11 : *
12 : * The above copyright notice and this permission notice shall be included
13 : * in all copies or substantial portions of the Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 : */
23 :
24 : /** \file
25 : * \brief Test the tld_domain_to_lowercase() function.
26 : *
27 : * This file implements various test to verify that the
28 : * tld_domain_to_lowercase() function works as expected.
29 : */
30 :
31 : #include "libtld/tld.h"
32 : #include <string.h>
33 : #include <stdlib.h>
34 : #include <stdio.h>
35 : #include <time.h>
36 : #include <limits.h>
37 : #include <wctype.h>
38 :
39 : int err_count = 0;
40 : int verbose = 0;
41 :
42 :
43 :
44 21091962 : void test_add_byte(char **out, int wc, int force_caps)
45 : {
46 21091962 : if((wc >= 'A' && wc <= 'Z')
47 21091936 : || (wc >= 'a' && wc <= 'z')
48 21091858 : || (wc >= '0' && wc <= '9')
49 21091838 : || wc == '.'
50 21091836 : || wc == '-'
51 21091834 : || wc == '!'
52 21091832 : || wc == '~'
53 21091830 : || wc == '/'
54 21091830 : || wc == '_')
55 : {
56 134 : **out = wc;
57 134 : ++*out;
58 : }
59 : else
60 : {
61 : // add '%XX' where X represents an hexadecimal digit
62 21091828 : if(force_caps
63 10543133 : || (rand() & 1) != 0)
64 : {
65 15819324 : sprintf(*out, "%%%02X", wc);
66 : }
67 : else
68 : {
69 5272504 : sprintf(*out, "%%%02x", wc);
70 : }
71 21091828 : *out += 3;
72 : }
73 21091962 : }
74 :
75 :
76 5306331 : void test_to_utf8(char **out, int wc, int force_caps)
77 : {
78 5306331 : if(wc < 0x80)
79 : {
80 252 : test_add_byte(out, wc, force_caps);
81 : }
82 5306079 : else if(wc < 0x800)
83 : {
84 3840 : test_add_byte(out, ((wc >> 6) | 0xC0), force_caps);
85 3840 : test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
86 : }
87 5302239 : else if(wc < 0x10000)
88 : {
89 124926 : test_add_byte(out, ((wc >> 12) | 0xE0), force_caps);
90 124926 : test_add_byte(out, (((wc >> 6) & 0x3F) | 0x80), force_caps);
91 124926 : test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
92 : }
93 : else
94 : {
95 5177313 : test_add_byte(out, ((wc >> 18) | 0xF0), force_caps);
96 5177313 : test_add_byte(out, (((wc >> 12) & 0x3F) | 0x80), force_caps);
97 5177313 : test_add_byte(out, (((wc >> 6) & 0x3F) | 0x80), force_caps);
98 5177313 : test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
99 : }
100 5306331 : }
101 :
102 :
103 1 : void test_all_characters()
104 : {
105 : int wc;
106 : char buf[256], *s, *r;
107 :
108 1114112 : for(wc = 1; wc < 0x110000; ++wc)
109 : {
110 1114111 : if((wc >= 0xD800 && wc <= 0xDFFF) // UTF-16 stuff ignored
111 1112063 : || (wc & 0xFFFF) == 0xFFFE
112 1112046 : || (wc & 0xFFFF) == 0xFFFF
113 1112029 : || wc == '/')
114 : {
115 : // those code points must be ignored because they
116 : // really don't work in a domain name
117 2083 : continue;
118 : }
119 :
120 1112028 : s = buf;
121 1112028 : test_to_utf8(&s, wc, 0);
122 1112028 : *s = '\0';
123 :
124 1112028 : r = tld_domain_to_lowercase(buf);
125 :
126 1112028 : s = buf;
127 1112028 : test_to_utf8(&s, towlower(wc), 1); // force caps in %XX notication
128 1112028 : *s = '\0';
129 :
130 1112028 : if(r == NULL)
131 : {
132 0 : fprintf(stderr, "error: character 0x%06X generated an error and tld_domain_to_lower() returned NULL (expected \"%s\")\n", wc, buf);
133 : }
134 : else
135 : {
136 1112028 : if(strcmp(r, buf) != 0)
137 : {
138 0 : fprintf(stderr, "error: character 0x%06X was not converted back and force as expected (expected \"%s\", received \"%s\")\n", wc, buf, r);
139 : }
140 :
141 : // done with the result
142 1112028 : free(r);
143 : }
144 : }
145 1 : }
146 :
147 :
148 1 : void test_empty()
149 : {
150 : char *r;
151 :
152 : // NULL as input, returns NULL
153 1 : r = tld_domain_to_lowercase(NULL);
154 1 : if(r != NULL)
155 : {
156 0 : ++err_count;
157 0 : fprintf(stderr, "error: tld_domain_to_lowercase(NULL) is expected to return NULL.\n");
158 : }
159 :
160 : // an empty string also returns NULL as result
161 1 : r = tld_domain_to_lowercase("");
162 1 : if(r != NULL)
163 : {
164 0 : ++err_count;
165 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\") is expected to return NULL.\n");
166 : }
167 1 : }
168 :
169 :
170 1 : void test_invalid_xx()
171 : {
172 : char *r;
173 : char buf[256];
174 : int i;
175 :
176 1 : r = tld_domain_to_lowercase("%AZ");
177 1 : if(r != NULL)
178 : {
179 0 : ++err_count;
180 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%%AZ\") is expected to return NULL.\n");
181 : }
182 :
183 1 : r = tld_domain_to_lowercase("%ZA");
184 1 : if(r != NULL)
185 : {
186 0 : ++err_count;
187 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%%ZA\") is expected to return NULL.\n");
188 : }
189 :
190 : // these are 3 x a with an acute accent (as used in Spanish)
191 1 : r = tld_domain_to_lowercase("\xC3\xA1\xC3\xA1\xC3\xA1");
192 1 : if(r != NULL)
193 : {
194 0 : ++err_count;
195 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xC3\xA1\xC3\xA1\xC3\xA1\") is expected to return NULL because of an overflow.\n");
196 : }
197 :
198 : // these are 2 x a with an acute accent followed by "ab"
199 : // this time the overflow happens when the 'a' is hit
200 1 : r = tld_domain_to_lowercase("\xC3\xA1\xC3\xA1\x61\x62");
201 1 : if(r != NULL)
202 : {
203 0 : ++err_count;
204 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xC3\xA1\xC3\xA1\x61\x62\") is expected to return NULL because of an overflow.\n");
205 : }
206 :
207 : // these are 3 x 0x0911 (Devanagari letter candra o)
208 1 : r = tld_domain_to_lowercase("\xE0\xA4\x91\xE0\xA4\x91\xE0\xA4\x91");
209 1 : if(r != NULL)
210 : {
211 0 : ++err_count;
212 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91\xE0\xA4\x91\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
213 : }
214 :
215 : // these are 2 x 0x0911 and a # in between (Devanagari letter candra o)
216 1 : r = tld_domain_to_lowercase("\xE0\xA4\x91#\xE0\xA4\x91");
217 1 : if(r != NULL)
218 : {
219 0 : ++err_count;
220 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91#\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
221 : }
222 :
223 : // these are 2 x 0x0911 and a q in between (Devanagari letter candra o)
224 1 : r = tld_domain_to_lowercase("\xE0\xA4\x91q\xE0\xA4\x91");
225 1 : if(r != NULL)
226 : {
227 0 : ++err_count;
228 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91q\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
229 : }
230 :
231 : // these are 3 x 0x13F0B (Miao letter da)
232 1 : r = tld_domain_to_lowercase("\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B");
233 1 : if(r != NULL)
234 : {
235 0 : ++err_count;
236 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
237 : }
238 :
239 : // these are 2 x 0x13F0B with # in between (Miao letter da)
240 1 : r = tld_domain_to_lowercase("\xF0\x96\xBC\x8B#\xF0\x96\xBC\x8B");
241 1 : if(r != NULL)
242 : {
243 0 : ++err_count;
244 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8B#\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
245 : }
246 :
247 : // these are 2 x 0x13F0B with q in between (Miao letter da)
248 1 : r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bq\xF0\x96\xBC\x8B");
249 1 : if(r != NULL)
250 : {
251 0 : ++err_count;
252 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
253 : }
254 :
255 : // these are 2 x 0x13F0B with qq in between (Miao letter da)
256 1 : r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bqq\xF0\x96\xBC\x8B");
257 1 : if(r != NULL)
258 : {
259 0 : ++err_count;
260 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bqq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
261 : }
262 :
263 : // these are 2 x 0x13F0B with qqq in between (Miao letter da)
264 1 : r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bqqq\xF0\x96\xBC\x8B");
265 1 : if(r != NULL)
266 : {
267 0 : ++err_count;
268 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bqqq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
269 : }
270 :
271 9 : for(i = 0xF8; i <= 0xFF; ++i)
272 : {
273 8 : snprintf(buf, sizeof(buf), "+%%%02X+", i);
274 :
275 8 : r = tld_domain_to_lowercase(buf);
276 8 : if(r != NULL)
277 : {
278 0 : ++err_count;
279 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid byte (introduction byte).\n", buf);
280 : }
281 : }
282 :
283 65 : for(i = 0x80; i <= 0xBF; ++i)
284 : {
285 64 : snprintf(buf, sizeof(buf), "+%%%02X+", i);
286 :
287 64 : r = tld_domain_to_lowercase(buf);
288 64 : if(r != NULL)
289 : {
290 0 : ++err_count;
291 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid byte (continuation byte).\n", buf);
292 : }
293 : }
294 :
295 : // byte missing (end of string found before end of UTF-8 character)
296 57 : for(i = 0xC0; i <= 0xF7; ++i)
297 : {
298 56 : buf[0] = i;
299 56 : buf[1] = '\0';
300 56 : r = tld_domain_to_lowercase(buf);
301 56 : if(r != NULL)
302 : {
303 0 : ++err_count;
304 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (end of string found too early).\n", buf);
305 : }
306 : }
307 :
308 : // continuation byte out of range
309 257 : for(i = 0x00; i <= 0xFF; ++i)
310 : {
311 256 : if(i >= 0x80 && i <= 0xBF)
312 : {
313 : // that's a valid continuation
314 64 : continue;
315 : }
316 192 : buf[0] = rand() % (0xF8 - 0xC0) + 0xC0;
317 192 : buf[1] = i;
318 192 : buf[2] = '\0';
319 192 : r = tld_domain_to_lowercase(buf);
320 192 : if(r != NULL)
321 : {
322 0 : ++err_count;
323 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (continuation byte out of range).\n", buf);
324 : }
325 : }
326 :
327 1114112 : for(i = 1; i < 0x110000; ++i)
328 : {
329 1114111 : if((i >= 0xD800 && i <= 0xDFFF) // UTF-16 stuff ignored
330 1112063 : || (i & 0xFFFF) == 0xFFFE
331 1112046 : || (i & 0xFFFF) == 0xFFFF)
332 : {
333 2082 : r = buf;
334 2082 : test_to_utf8(&r, i, rand() & 1);
335 2082 : *r = '\0';
336 :
337 2082 : r = tld_domain_to_lowercase(buf);
338 2082 : if(r != NULL)
339 : {
340 0 : ++err_count;
341 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (continuation byte out of range).\n", buf);
342 : }
343 : }
344 : }
345 :
346 3080193 : for(i = 0x110000;; ++i)
347 : {
348 3080193 : r = buf;
349 3080193 : test_to_utf8(&r, i, rand() & 1);
350 3080193 : *r = '\0';
351 :
352 : // we only save up to 4 bytes, so to check overflow, we expect %F0
353 : // as the first byte...
354 3080193 : if(strncmp(buf, "%f0", 3) == 0
355 3080192 : || strncmp(buf, "%F0", 3) == 0)
356 : {
357 : // no need to test further, we hit the case of 0xF8 or more in
358 : // the first byte which is checked somewhere else
359 : break;
360 : }
361 :
362 3080192 : r = tld_domain_to_lowercase(buf);
363 3080192 : if(r != NULL)
364 : {
365 0 : ++err_count;
366 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid Unicode character. Got \"%s\" instead.\n", buf, r);
367 : }
368 3080192 : }
369 1 : }
370 :
371 :
372 1 : int main(int argc, char *argv[])
373 : {
374 : int i;
375 1 : int seed = time(NULL);
376 :
377 1 : for(i = 1; i < argc; ++i)
378 : {
379 0 : if(strcmp(argv[i], "-v") == 0)
380 : {
381 0 : verbose = 1;
382 : }
383 0 : else if(strcmp(argv[i], "--seed") == 0)
384 : {
385 0 : if(i + 1 >= argc)
386 : {
387 0 : fprintf(stderr, "error: --seed expect a value.\n");
388 0 : exit(1);
389 : }
390 0 : seed = atol(argv[i + 1]);
391 : }
392 : }
393 :
394 1 : printf("testing tld test domain lowercase version %s with seed %d\n", tld_version(), seed);
395 :
396 1 : srand(seed);
397 :
398 1 : test_empty();
399 1 : test_all_characters();
400 1 : test_invalid_xx();
401 :
402 1 : exit(err_count ? 1 : 0);
403 : }
404 :
405 : /* vim: ts=4 sw=4 et
406 : */
407 :
|