Line data Source code
1 : /* TLD library -- test converting domain names to lowercase
2 : * Copyright (c) 2011-2022 Made to Order Software Corp. All Rights Reserved
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the
6 : * "Software"), to deal in the Software without restriction, including
7 : * without limitation the rights to use, copy, modify, merge, publish,
8 : * distribute, sublicense, and/or sell copies of the Software, and to
9 : * permit persons to whom the Software is furnished to do so, subject to
10 : * the following conditions:
11 : *
12 : * The above copyright notice and this permission notice shall be included
13 : * in all copies or substantial portions of the Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 : */
23 :
24 : /** \file
25 : * \brief Test the tld_domain_to_lowercase() functions.
26 : *
27 : * This file implements various test to verify that the
28 : * tld_domain_to_lowercase() function works as expected
29 : * with valid and invalid names.
30 : */
31 :
32 : #include "libtld/tld.h"
33 : #include <string.h>
34 : #include <stdlib.h>
35 : #include <stdio.h>
36 : #include <time.h>
37 : #include <limits.h>
38 : #include <wctype.h>
39 :
40 : int err_count = 0;
41 : int verbose = 0;
42 :
43 :
44 :
45 21091962 : void test_add_byte(char **out, int wc, int force_caps)
46 : {
47 21091962 : if((wc >= 'A' && wc <= 'Z')
48 21091936 : || (wc >= 'a' && wc <= 'z')
49 21091858 : || (wc >= '0' && wc <= '9')
50 21091838 : || wc == '.'
51 21091836 : || wc == '-'
52 21091834 : || wc == '!'
53 21091832 : || wc == '~'
54 21091830 : || wc == '/'
55 21091830 : || wc == '_')
56 : {
57 134 : **out = wc;
58 134 : ++*out;
59 : }
60 : else
61 : {
62 : // add '%XX' where X represents an hexadecimal digit
63 21091828 : if(force_caps
64 10545790 : || (rand() & 1) != 0)
65 : {
66 15818229 : sprintf(*out, "%%%02X", wc);
67 : }
68 : else
69 : {
70 5273599 : sprintf(*out, "%%%02x", wc);
71 : }
72 21091828 : *out += 3;
73 : }
74 21091962 : }
75 :
76 :
77 5306331 : void test_to_utf8(char **out, int wc, int force_caps)
78 : {
79 5306331 : if(wc < 0x80)
80 : {
81 252 : test_add_byte(out, wc, force_caps);
82 : }
83 5306079 : else if(wc < 0x800)
84 : {
85 3840 : test_add_byte(out, ((wc >> 6) | 0xC0), force_caps);
86 3840 : test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
87 : }
88 5302239 : else if(wc < 0x10000)
89 : {
90 124926 : test_add_byte(out, ((wc >> 12) | 0xE0), force_caps);
91 124926 : test_add_byte(out, (((wc >> 6) & 0x3F) | 0x80), force_caps);
92 124926 : test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
93 : }
94 : else
95 : {
96 5177313 : test_add_byte(out, ((wc >> 18) | 0xF0), force_caps);
97 5177313 : test_add_byte(out, (((wc >> 12) & 0x3F) | 0x80), force_caps);
98 5177313 : test_add_byte(out, (((wc >> 6) & 0x3F) | 0x80), force_caps);
99 5177313 : test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
100 : }
101 5306331 : }
102 :
103 :
104 1 : void test_all_characters()
105 : {
106 : int wc;
107 1 : char buf[256], *s, *r;
108 :
109 1114112 : for(wc = 1; wc < 0x110000; ++wc)
110 : {
111 1114111 : if((wc >= 0xD800 && wc <= 0xDFFF) // UTF-16 stuff ignored
112 1112063 : || (wc & 0xFFFF) == 0xFFFE
113 1112046 : || (wc & 0xFFFF) == 0xFFFF
114 1112029 : || wc == '/')
115 : {
116 : // those code points must be ignored because they
117 : // really don't work in a domain name
118 2083 : continue;
119 : }
120 :
121 1112028 : s = buf;
122 1112028 : test_to_utf8(&s, wc, 0);
123 1112028 : *s = '\0';
124 :
125 1112028 : r = tld_domain_to_lowercase(buf);
126 :
127 1112028 : s = buf;
128 1112028 : test_to_utf8(&s, towlower(wc), 1); // force caps in %XX notication
129 1112028 : *s = '\0';
130 :
131 1112028 : if(r == NULL)
132 : {
133 0 : fprintf(stderr, "error: character 0x%06X generated an error and tld_domain_to_lower() returned NULL (expected \"%s\")\n", wc, buf);
134 : }
135 : else
136 : {
137 1112028 : if(strcmp(r, buf) != 0)
138 : {
139 0 : fprintf(stderr, "error: character 0x%06X was not converted back and force as expected (expected \"%s\", received \"%s\")\n", wc, buf, r);
140 : }
141 :
142 : // done with the result
143 1112028 : free(r);
144 : }
145 : }
146 1 : }
147 :
148 :
149 1 : void test_empty()
150 : {
151 : char *r;
152 :
153 : // NULL as input, returns NULL
154 1 : r = tld_domain_to_lowercase(NULL);
155 1 : if(r != NULL)
156 : {
157 0 : ++err_count;
158 0 : fprintf(stderr, "error: tld_domain_to_lowercase(NULL) is expected to return NULL.\n");
159 : }
160 :
161 : // an empty string also returns NULL as result
162 1 : r = tld_domain_to_lowercase("");
163 1 : if(r != NULL)
164 : {
165 0 : ++err_count;
166 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\") is expected to return NULL.\n");
167 : }
168 1 : }
169 :
170 :
171 1 : void test_invalid_xx()
172 : {
173 1 : char *r;
174 1 : char buf[256];
175 : int i;
176 :
177 1 : r = tld_domain_to_lowercase("%AZ");
178 1 : if(r != NULL)
179 : {
180 0 : ++err_count;
181 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%%AZ\") is expected to return NULL.\n");
182 : }
183 :
184 1 : r = tld_domain_to_lowercase("%ZA");
185 1 : if(r != NULL)
186 : {
187 0 : ++err_count;
188 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%%ZA\") is expected to return NULL.\n");
189 : }
190 :
191 : // these are 3 x a with an acute accent (as used in Spanish)
192 1 : r = tld_domain_to_lowercase("\xC3\xA1\xC3\xA1\xC3\xA1");
193 1 : if(r != NULL)
194 : {
195 0 : ++err_count;
196 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xC3\xA1\xC3\xA1\xC3\xA1\") is expected to return NULL because of an overflow.\n");
197 : }
198 :
199 : // these are 2 x a with an acute accent followed by "ab"
200 : // this time the overflow happens when the 'a' is hit
201 1 : r = tld_domain_to_lowercase("\xC3\xA1\xC3\xA1\x61\x62");
202 1 : if(r != NULL)
203 : {
204 0 : ++err_count;
205 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xC3\xA1\xC3\xA1\x61\x62\") is expected to return NULL because of an overflow.\n");
206 : }
207 :
208 : // these are 3 x 0x0911 (Devanagari letter candra o)
209 1 : r = tld_domain_to_lowercase("\xE0\xA4\x91\xE0\xA4\x91\xE0\xA4\x91");
210 1 : if(r != NULL)
211 : {
212 0 : ++err_count;
213 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91\xE0\xA4\x91\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
214 : }
215 :
216 : // these are 2 x 0x0911 and a # in between (Devanagari letter candra o)
217 1 : r = tld_domain_to_lowercase("\xE0\xA4\x91#\xE0\xA4\x91");
218 1 : if(r != NULL)
219 : {
220 0 : ++err_count;
221 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91#\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
222 : }
223 :
224 : // these are 2 x 0x0911 and a q in between (Devanagari letter candra o)
225 1 : r = tld_domain_to_lowercase("\xE0\xA4\x91q\xE0\xA4\x91");
226 1 : if(r != NULL)
227 : {
228 0 : ++err_count;
229 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91q\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
230 : }
231 :
232 : // these are 3 x 0x13F0B (Miao letter da)
233 1 : r = tld_domain_to_lowercase("\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B");
234 1 : if(r != NULL)
235 : {
236 0 : ++err_count;
237 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
238 : }
239 :
240 : // these are 2 x 0x13F0B with # in between (Miao letter da)
241 1 : r = tld_domain_to_lowercase("\xF0\x96\xBC\x8B#\xF0\x96\xBC\x8B");
242 1 : if(r != NULL)
243 : {
244 0 : ++err_count;
245 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8B#\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
246 : }
247 :
248 : // these are 2 x 0x13F0B with q in between (Miao letter da)
249 1 : r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bq\xF0\x96\xBC\x8B");
250 1 : if(r != NULL)
251 : {
252 0 : ++err_count;
253 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
254 : }
255 :
256 : // these are 2 x 0x13F0B with qq in between (Miao letter da)
257 1 : r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bqq\xF0\x96\xBC\x8B");
258 1 : if(r != NULL)
259 : {
260 0 : ++err_count;
261 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bqq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
262 : }
263 :
264 : // these are 2 x 0x13F0B with qqq in between (Miao letter da)
265 1 : r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bqqq\xF0\x96\xBC\x8B");
266 1 : if(r != NULL)
267 : {
268 0 : ++err_count;
269 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bqqq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
270 : }
271 :
272 9 : for(i = 0xF8; i <= 0xFF; ++i)
273 : {
274 8 : snprintf(buf, sizeof(buf), "+%%%02X+", i);
275 :
276 8 : r = tld_domain_to_lowercase(buf);
277 8 : if(r != NULL)
278 : {
279 0 : ++err_count;
280 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid byte (introduction byte).\n", buf);
281 : }
282 : }
283 :
284 65 : for(i = 0x80; i <= 0xBF; ++i)
285 : {
286 64 : snprintf(buf, sizeof(buf), "+%%%02X+", i);
287 :
288 64 : r = tld_domain_to_lowercase(buf);
289 64 : if(r != NULL)
290 : {
291 0 : ++err_count;
292 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid byte (continuation byte).\n", buf);
293 : }
294 : }
295 :
296 : // byte missing (end of string found before end of UTF-8 character)
297 57 : for(i = 0xC0; i <= 0xF7; ++i)
298 : {
299 56 : buf[0] = i;
300 56 : buf[1] = '\0';
301 56 : r = tld_domain_to_lowercase(buf);
302 56 : if(r != NULL)
303 : {
304 0 : ++err_count;
305 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (end of string found too early).\n", buf);
306 : }
307 : }
308 :
309 : // continuation byte out of range
310 257 : for(i = 0x00; i <= 0xFF; ++i)
311 : {
312 256 : if(i >= 0x80 && i <= 0xBF)
313 : {
314 : // that's a valid continuation
315 64 : continue;
316 : }
317 192 : buf[0] = rand() % (0xF8 - 0xC0) + 0xC0;
318 192 : buf[1] = i;
319 192 : buf[2] = '\0';
320 192 : r = tld_domain_to_lowercase(buf);
321 192 : if(r != NULL)
322 : {
323 0 : ++err_count;
324 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (continuation byte out of range).\n", buf);
325 : }
326 : }
327 :
328 1114112 : for(i = 1; i < 0x110000; ++i)
329 : {
330 1114111 : if((i >= 0xD800 && i <= 0xDFFF) // UTF-16 stuff ignored
331 1112063 : || (i & 0xFFFF) == 0xFFFE
332 1112046 : || (i & 0xFFFF) == 0xFFFF)
333 : {
334 2082 : r = buf;
335 2082 : test_to_utf8(&r, i, rand() & 1);
336 2082 : *r = '\0';
337 :
338 2082 : r = tld_domain_to_lowercase(buf);
339 2082 : if(r != NULL)
340 : {
341 0 : ++err_count;
342 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (continuation byte out of range).\n", buf);
343 : }
344 : }
345 : }
346 :
347 3080193 : for(i = 0x110000;; ++i)
348 : {
349 6160385 : r = buf;
350 3080193 : test_to_utf8(&r, i, rand() & 1);
351 3080193 : *r = '\0';
352 :
353 : // we only save up to 4 bytes, so to check overflow, we expect %F0
354 : // as the first byte...
355 3080193 : if(strncmp(buf, "%f0", 3) == 0
356 3080193 : || strncmp(buf, "%F0", 3) == 0)
357 : {
358 : // no need to test further, we hit the case of 0xF8 or more in
359 : // the first byte which is checked somewhere else
360 : break;
361 : }
362 :
363 3080192 : r = tld_domain_to_lowercase(buf);
364 3080192 : if(r != NULL)
365 : {
366 0 : ++err_count;
367 0 : fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid Unicode character. Got \"%s\" instead.\n", buf, r);
368 : }
369 : }
370 1 : }
371 :
372 :
373 1 : int main(int argc, char *argv[])
374 : {
375 : int i;
376 1 : int seed = time(NULL);
377 :
378 1 : for(i = 1; i < argc; ++i)
379 : {
380 0 : if(strcmp(argv[i], "-v") == 0)
381 : {
382 0 : verbose = 1;
383 : }
384 0 : else if(strcmp(argv[i], "--seed") == 0)
385 : {
386 0 : if(i + 1 >= argc)
387 : {
388 0 : fprintf(stderr, "error: --seed expect a value.\n");
389 0 : exit(1);
390 : }
391 0 : seed = atol(argv[i + 1]);
392 : }
393 : }
394 :
395 1 : printf("testing tld test domain lowercase version %s with seed %d\n", tld_version(), seed);
396 :
397 1 : srand(seed);
398 :
399 1 : test_empty();
400 1 : test_all_characters();
401 1 : test_invalid_xx();
402 :
403 1 : exit(err_count ? 1 : 0);
404 : }
405 :
406 : /* vim: ts=4 sw=4 et
407 : */
408 :
|