Line data Source code
1 : /* TLD library -- test the TLD interface
2 : * Copyright (c) 2011-2021 Made to Order Software Corp. All Rights Reserved
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the
6 : * "Software"), to deal in the Software without restriction, including
7 : * without limitation the rights to use, copy, modify, merge, publish,
8 : * distribute, sublicense, and/or sell copies of the Software, and to
9 : * permit persons to whom the Software is furnished to do so, subject to
10 : * the following conditions:
11 : *
12 : * The above copyright notice and this permission notice shall be included
13 : * in all copies or substantial portions of the Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 : */
23 :
24 : /** \file
25 : * \brief Test the tld() function like an end user.
26 : *
27 : * This file implements various tests verifying the tld() function.
28 : * The tests defined here are not for coverage but rather edge
29 : * cases which could be hard to expect in a full coverage test.
30 : */
31 :
32 : #include "libtld/tld.h"
33 : #include <string.h>
34 : #include <stdlib.h>
35 : #include <stdio.h>
36 : #include <limits.h>
37 :
38 : /* we get access to the table with all the TLDs so we can go through them all
39 : * the library does not give direct access by default... (although maybe we
40 : * could give users access to the data)
41 : */
42 : #include "tld_data.h"
43 : extern const struct tld_description tld_descriptions[];
44 : extern unsigned short tld_start_offset;
45 : extern unsigned short tld_end_offset;
46 :
47 : int err_count = 0;
48 : int verbose = 0;
49 :
50 : /*
51 : * This test calls the tld() function with all the TLDs and then
52 : * with wrong TLDs to make sure that the tld() functions works as
53 : * expected.
54 : *
55 : * extern enum tld_result tld(const char *uri, struct tld_info *info);
56 : */
57 :
58 :
59 : /** \brief Build an extension from any offset.
60 : *
61 : * Create a domain name extensions from any entry in the TLD
62 : * descriptions.
63 : *
64 : * \param[in] offset The offset in the tld_descriptions table
65 : * \param[in] uri The URI buffer
66 : *
67 : * \return true if the first TLD is a star ("*").
68 : */
69 52435 : int cat_ext(int offset, char *uri)
70 : {
71 : int k, l;
72 52435 : int has_star = strcmp(tld_descriptions[offset].f_tld, "*") == 0;
73 :
74 52435 : if(!has_star)
75 : {
76 52010 : strcat(uri, ".");
77 52010 : strcat(uri, tld_descriptions[offset].f_tld);
78 : }
79 52435 : l = offset;
80 506403335 : for(k = offset + 1; k < tld_end_offset; ++k)
81 : {
82 506350900 : if(l >= tld_descriptions[k].f_start_offset
83 44766860 : && l < tld_descriptions[k].f_end_offset)
84 : {
85 : /* found a parent */
86 61145 : if(strcmp(tld_descriptions[k].f_tld, "*") != 0)
87 : {
88 61145 : strcat(uri, ".");
89 61145 : strcat(uri, tld_descriptions[k].f_tld);
90 : }
91 : else
92 : {
93 0 : fprintf(stderr, "fatal error: found \"*\" at the wrong place; it's only supported as the very first segment.\n");
94 0 : exit(1);
95 : }
96 61145 : l = k;
97 61145 : k = tld_descriptions[k].f_end_offset;
98 : }
99 : }
100 :
101 52435 : return has_star;
102 : }
103 :
104 :
105 :
106 :
107 : struct test_uris
108 : {
109 : const char * f_uri;
110 : enum tld_result f_result;
111 : int f_offset;
112 : };
113 :
114 :
115 : const struct test_uris g_uris[] = {
116 : {
117 : "advisor-z2-ngprod-1997768525.us-west-2.elb.amazonaws.com",
118 : TLD_RESULT_SUCCESS,
119 : 28,
120 : },
121 : {
122 : "us-west-2.elb.amazonaws.com",
123 : TLD_RESULT_SUCCESS,
124 : 0,
125 : },
126 : {
127 : "m2osw.com",
128 : TLD_RESULT_SUCCESS,
129 : 5,
130 : },
131 : {
132 : ".com",
133 : TLD_RESULT_SUCCESS,
134 : 0,
135 : },
136 : {
137 : "com",
138 : TLD_RESULT_NO_TLD,
139 : -1,
140 : },
141 : {
142 : ".ar",
143 : TLD_RESULT_INVALID,
144 : 0,
145 : },
146 : {
147 : "int.ar",
148 : TLD_RESULT_SUCCESS,
149 : 0,
150 : },
151 : {
152 : "blah.int.ar",
153 : TLD_RESULT_SUCCESS,
154 : 4,
155 : },
156 : {
157 : "orange.blah.int.ar",
158 : TLD_RESULT_SUCCESS,
159 : 11,
160 : },
161 : {
162 : "congresodelalengua3.ar", /* congresodelalengua3 is an exceptional 2nd level */
163 : TLD_RESULT_SUCCESS,
164 : 19,
165 : },
166 : {
167 : "special.congresodelalengua3.ar", /* congresodelalengua3 is an exceptional 2nd level */
168 : TLD_RESULT_SUCCESS,
169 : 27,
170 : },
171 : {
172 : "night-club.kawasaki.jp",
173 : TLD_RESULT_SUCCESS,
174 : 0,
175 : },
176 : {
177 : "orange.night-club.kawasaki.jp",
178 : TLD_RESULT_SUCCESS,
179 : 6,
180 : },
181 : };
182 :
183 :
184 : /*
185 : * This tests various ad hoc domains with expected results.
186 : *
187 : * This way we can verify specific things we want to check.
188 : */
189 1 : void test_specific()
190 : {
191 14 : for(size_t idx = 0; idx < sizeof(g_uris) / sizeof(g_uris[0]); ++idx)
192 : {
193 : struct tld_info info;
194 13 : enum tld_result r = tld(g_uris[idx].f_uri, &info);
195 13 : if(verbose)
196 : {
197 0 : fprintf(
198 : stderr
199 : , "info: URI \"%s\" returned %d and TLD is \"%s\"\n"
200 : , g_uris[idx].f_uri
201 : , r
202 0 : , g_uris[idx].f_uri + info.f_offset);
203 : }
204 :
205 13 : if(r != g_uris[idx].f_result)
206 : {
207 0 : fprintf(stderr, "error: testing URI \"%s\" got result %d, expected %d and TLD of \"%s\"\n",
208 0 : g_uris[idx].f_uri, r, g_uris[idx].f_result,
209 0 : g_uris[idx].f_uri + g_uris[idx].f_offset);
210 0 : ++err_count;
211 : }
212 13 : else if(info.f_offset != g_uris[idx].f_offset)
213 : {
214 0 : fprintf(stderr, "error: testing URI \"%s\" got offset %d, expected %d and TLD of \"%s\"\n",
215 : g_uris[idx].f_uri, info.f_offset, g_uris[idx].f_offset,
216 0 : g_uris[idx].f_uri + info.f_offset);
217 0 : ++err_count;
218 : }
219 : }
220 1 : }
221 :
222 :
223 : /*
224 : * This test goes through all the domain names and extracts the domain,
225 : * sub-domains and TLDs. (Or at least verifies that we get the correct
226 : * information in order to do so.)
227 : *
228 : * It builds a URI with zero to many sub-domain names, adds a specific
229 : * domain name, then append a complete TLD. The result is then checked
230 : * with the tld() function from the library. The tld() is expected to
231 : * either return VALID or INVALID but nothing else (since all those
232 : * TLDs exist in our table.) Then we verify that the returned offset is
233 : * a perfect match.
234 : */
235 1 : void test_all()
236 : {
237 1 : const char *sub_domains[] = {
238 : "",
239 : "www.",
240 : "tld.",
241 : "george.snap.",
242 : "very.long.sub.domain.ext.en.sion.here."
243 : "host.%20.space."
244 : "host.%fa.u-acute."
245 : "host.%FA.U-acute."
246 : };
247 : struct tld_info info;
248 : char uri[256], extension_uri[256];
249 : int i, j, p, max_subdomains, has_star;
250 : enum tld_result r;
251 :
252 1 : max_subdomains = sizeof(sub_domains) / sizeof(sub_domains[0]);
253 :
254 10467 : for(i = 0; i < tld_end_offset; ++i)
255 : {
256 62796 : for(j = 0; j < max_subdomains; ++j)
257 : {
258 52330 : strcpy(uri, sub_domains[j]);
259 52330 : strcat(uri, "domain-name");
260 52330 : has_star = cat_ext(i, uri);
261 :
262 : /* just in case make sure that we did not overflow the buffer */
263 52330 : if(strlen(uri) >= sizeof(uri))
264 : {
265 0 : fprintf(stderr, "fatal error: the URI \"%s\" is longer than the uri[] array.\n", uri);
266 0 : exit(1);
267 : }
268 :
269 : /* reset the structure so we can verify it gets initialized */
270 52330 : memset(&info, 0xFE, sizeof(info));
271 52330 : r = tld(uri, &info);
272 : /*
273 : for(size_t l = 0; l < sizeof(info); ++l)
274 : {
275 : fprintf(stderr, "0x%02X ", ((unsigned char*)&info)[l]);
276 : }
277 : fprintf(stderr, "\nresult for [%s]: category[%d], status[%d/%d], country[%s],"
278 : " tld[%s], offset[%d]\n",
279 : uri,
280 : (int)info.f_category,
281 : (int)info.f_status, (int)tld_descriptions[i].f_status,
282 : info.f_country,
283 : info.f_tld, (int)info.f_offset);
284 : */
285 52330 : p = i;
286 52330 : if(tld_descriptions[i].f_status == TLD_STATUS_EXCEPTION)
287 : {
288 105 : if(tld_descriptions[i].f_exception_apply_to == USHRT_MAX)
289 : {
290 0 : fprintf(stderr, "error: domain name for \"%s\" (%d) is said to be an exception but it has no apply-to parameter. (result: %d)\n",
291 : uri, i, r);
292 0 : ++err_count;
293 : }
294 : else
295 : {
296 105 : p = tld_descriptions[i].f_exception_apply_to;
297 : }
298 : }
299 52330 : if(tld_descriptions[i].f_status == TLD_STATUS_VALID)
300 : {
301 49600 : if(r != TLD_RESULT_SUCCESS)
302 : {
303 0 : fprintf(stderr, "error: valid domain name for \"%s\" (%d) could not be extracted successfully (returned: %d)\n",
304 : uri, i, r);
305 0 : ++err_count;
306 : }
307 49600 : else if(has_star)
308 : {
309 : /* the "domain-name" is absorbed as part of the TLD */
310 425 : int expected = strlen(sub_domains[j]);
311 425 : if(expected != 0)
312 : {
313 340 : --expected; /* ignore the "." */
314 : }
315 425 : if(info.f_offset != expected)
316 : {
317 0 : fprintf(stderr, "error: valid domain name for \"%s\" (%d) could not be extracted successfully (offset: %d, expected: %d)\n",
318 : uri, i,
319 : info.f_offset, expected);
320 0 : ++err_count;
321 : }
322 : }
323 : else
324 : {
325 : /* verify the top domain name */
326 49175 : if(info.f_offset < 11)
327 : {
328 0 : fprintf(stderr, "error: somehow the top domain name in \"%s\" (%d) cannot properly be extracted\n",
329 : uri, i);
330 0 : ++err_count;
331 : }
332 49175 : else if(strncmp(uri + info.f_offset - 11, "domain-name", 11) != 0)
333 : {
334 0 : fprintf(stderr, "error: valid domain name for \"%s\" (%d) could not be extracted successfully (offset: %d)\n",
335 : uri, i, info.f_offset);
336 0 : ++err_count;
337 : }
338 : /*
339 : else
340 : fprintf(stderr, "valid: \"%s\" -> \"%s\"\n", uri, info.f_tld);
341 : */
342 : }
343 : }
344 2730 : else if(tld_descriptions[i].f_status == TLD_STATUS_EXCEPTION)
345 : {
346 105 : if(r != TLD_RESULT_SUCCESS)
347 : {
348 0 : fprintf(stderr, "error: exceptional domain name for \"%s\" (%d) could not be extracted successfully (returned: %d)\n",
349 : uri, i, r);
350 0 : ++err_count;
351 : }
352 : else
353 : {
354 105 : extension_uri[0] = '\0';
355 105 : cat_ext(p, extension_uri);
356 105 : if(strcmp(info.f_tld, extension_uri) != 0)
357 : //if(strncmp(uri + info.f_offset - 11, "domain-name", 11) != 0)
358 : {
359 0 : fprintf(stderr, "error: exceptional domain name for \"%s\" (%d/%d) could not be extracted successfully as \"%s\" (offset: %d)\n",
360 : uri, i, p, extension_uri, info.f_offset);
361 0 : ++err_count;
362 : }
363 : /*
364 : else
365 : fprintf(stderr, "valid: \"%s\" -> \"%s\"\n", uri, info.f_tld);
366 : */
367 : }
368 : }
369 : else
370 : {
371 2625 : if(tld_descriptions[i].f_status == TLD_STATUS_UNUSED
372 1430 : && tld_descriptions[i].f_start_offset != USHRT_MAX
373 1205 : && strcmp(tld_descriptions[tld_descriptions[i].f_start_offset].f_tld, "*") == 0)
374 : {
375 : /* this is a special case, an entry such as:
376 : *
377 : * *.blah.com
378 : *
379 : * and that means the result is going to be SUCCESS
380 : * and VALID...
381 : */
382 850 : if(r != TLD_RESULT_SUCCESS
383 425 : || info.f_status != TLD_STATUS_VALID)
384 : {
385 0 : fprintf(stderr, "error: domain name for \"%s\" (%d) could not be extracted as expected (returned: %d) [1]\n",
386 : uri, i, r);
387 0 : ++err_count;
388 : }
389 : }
390 2200 : else if(r != TLD_RESULT_INVALID)
391 : {
392 0 : fprintf(stderr, "error: domain name for \"%s\" (%d) could not be extracted as expected (returned: %d) [2]\n",
393 : uri, i, r);
394 0 : ++err_count;
395 : }
396 2200 : else if(p != i)
397 : {
398 0 : extension_uri[0] = '\0';
399 0 : cat_ext(p, extension_uri);
400 0 : if(strcmp(info.f_tld, extension_uri) != 0)
401 : {
402 0 : fprintf(stderr, "error: other domain name for \"%s\" (%d) could not be extracted successfully (returned: %d/%s != %s) [1]\n",
403 : uri, i, r, info.f_tld, extension_uri);
404 0 : ++err_count;
405 : }
406 : /*
407 : else
408 : fprintf(stderr, "?? invalid: \"%s\" -> \"%s\"\n", uri, info.f_tld);
409 : */
410 : }
411 : else
412 : {
413 2200 : if(strncmp(uri + info.f_offset - 11, "domain-name", 11) != 0)
414 : {
415 0 : fprintf(stderr, "error: other domain name for \"%s\" (%d) could not be extracted successfully (returned: %d/%s) [2]\n",
416 : uri, i, r, info.f_tld);
417 0 : ++err_count;
418 : }
419 : /*
420 : else
421 : fprintf(stderr, "?? invalid: \"%s\" -> \"%s\"\n", uri, info.f_tld);
422 : */
423 : }
424 : }
425 : }
426 : }
427 1 : }
428 :
429 :
430 : /*
431 : * This test checks out URIs that end with an invalid TLD. This is
432 : * expected to return an error every single time.
433 : */
434 1 : void test_unknown()
435 : {
436 : struct bad_data
437 : {
438 : const char * f_uri;
439 : };
440 1 : struct bad_data d[] =
441 : {
442 : { "this.is.wrong" },
443 : { "missing.tld" },
444 : { ".net.absolutely.com.no.info.on.this" }
445 : };
446 : struct tld_info info;
447 : int i, max;
448 : enum tld_result r;
449 :
450 1 : max = sizeof(d) / sizeof(d[0]);
451 4 : for(i = 0; i < max; ++i)
452 : {
453 3 : memset(&info, 0xFE, sizeof(info));
454 3 : r = tld(d[i].f_uri, &info);
455 3 : if(r != TLD_RESULT_NOT_FOUND)
456 : {
457 0 : fprintf(stderr, "error: the invalid URI \"%s\" was found by tld()!\n", d[i].f_uri);
458 0 : ++err_count;
459 : }
460 : }
461 1 : }
462 :
463 :
464 :
465 :
466 1 : void test_invalid()
467 : {
468 : struct tld_info undefined_info;
469 : struct tld_info clear_info;
470 : struct tld_info info;
471 : enum tld_result r;
472 :
473 : /*
474 : * We reset the undefined_info the same way we reset the info
475 : * structure because the alignment on 64bits may add another
476 : * 4 bytes at the end of the structure that are not otherwise
477 : * accessible.
478 : */
479 1 : memset(&undefined_info, 0xFE, sizeof(undefined_info));
480 1 : undefined_info.f_category = TLD_CATEGORY_UNDEFINED;
481 1 : undefined_info.f_status = TLD_STATUS_UNDEFINED;
482 1 : undefined_info.f_country = (const char *) 0;
483 1 : undefined_info.f_tld = (const char *) 0;
484 1 : undefined_info.f_offset = -1;
485 :
486 1 : memset(&clear_info, 0xFE, sizeof(clear_info));
487 :
488 : /* test: NULL */
489 1 : info = clear_info;
490 1 : r = tld(NULL, &info);
491 1 : if(r != TLD_RESULT_NULL)
492 : {
493 0 : fprintf(stderr, "error: the NULL URI did not return the TLD_RESULT_NULL result.\n");
494 0 : ++err_count;
495 : }
496 1 : if(memcmp(&info, &undefined_info, sizeof(info)) != 0)
497 : {
498 0 : fprintf(stderr, "error: the NULL URI did not return a reset info structure.\n");
499 0 : ++err_count;
500 : }
501 :
502 : /* test: "" */
503 1 : info = clear_info;
504 1 : r = tld("", &info);
505 1 : if(r != TLD_RESULT_NULL)
506 : {
507 0 : fprintf(stderr, "error: the \"\" URI did not return the TLD_RESULT_NULL result.\n");
508 0 : ++err_count;
509 : }
510 1 : if(memcmp(&info, &undefined_info, sizeof(info)) != 0)
511 : {
512 0 : fprintf(stderr, "error: the \"\" URI did not return a reset info structure.\n");
513 0 : ++err_count;
514 : }
515 :
516 : /* test: ".." (two periods one after another) */
517 1 : info = clear_info;
518 1 : r = tld("test..com", &info);
519 1 : if(r != TLD_RESULT_BAD_URI)
520 : {
521 0 : fprintf(stderr, "error: the \"test..com\" URI did not return the TLD_RESULT_BAD_URI result.\n");
522 0 : ++err_count;
523 : }
524 1 : if(memcmp(&info, &undefined_info, sizeof(info)) != 0)
525 : {
526 0 : fprintf(stderr, "error: the \"test..com\" URI did not return a reset info structure.\n");
527 0 : ++err_count;
528 : }
529 :
530 : /* test: ".." (two periods one after another) */
531 1 : info = clear_info;
532 1 : r = tld("more..test.com", &info);
533 1 : if(r != TLD_RESULT_BAD_URI)
534 : {
535 0 : fprintf(stderr, "error: the \"more..test.com\" URI did not return the TLD_RESULT_BAD_URI result.\n");
536 0 : ++err_count;
537 : }
538 1 : if(memcmp(&info, &undefined_info, sizeof(info)) != 0)
539 : {
540 0 : fprintf(stderr, "error: the \"more..test.com\" URI did not return a reset info structure.\n");
541 0 : ++err_count;
542 : }
543 :
544 : /* test: "noperiodanywhere" (no periods anywhere) */
545 1 : info = clear_info;
546 1 : r = tld("noperiodanywhere", &info);
547 1 : if(r != TLD_RESULT_NO_TLD)
548 : {
549 0 : fprintf(stderr, "error: the \"noperiodanywhere\" URI did not return the TLD_RESULT_NO_TLD result.\n");
550 0 : ++err_count;
551 : }
552 1 : if(memcmp(&info, &undefined_info, sizeof(info)) != 0)
553 : {
554 0 : fprintf(stderr, "error: the \"noperiodanywhere\" URI did not return a reset info structure.\n");
555 0 : ++err_count;
556 : }
557 1 : }
558 :
559 :
560 :
561 :
562 1 : int main(int argc, char *argv[])
563 : {
564 1 : fprintf(stderr, "testing tld version %s\n", tld_version());
565 :
566 1 : if(argc > 1)
567 : {
568 0 : if(strcmp(argv[1], "-v") == 0)
569 : {
570 0 : verbose = 1;
571 : }
572 : else
573 : {
574 0 : fprintf(stderr, "error: unknown command line option \"%s\"\n", argv[1]);
575 0 : exit(1);
576 : }
577 : }
578 :
579 : /* call all the tests, one by one
580 : * failures are "recorded" in the err_count global variable
581 : * and the process stops with an error message and exit(1)
582 : * if err_count is not zero.
583 : */
584 1 : test_specific();
585 1 : test_all();
586 1 : test_unknown();
587 1 : test_invalid();
588 :
589 1 : if(err_count)
590 : {
591 0 : fprintf(stderr, "%d error%s occured.\n",
592 0 : err_count, err_count != 1 ? "s" : "");
593 : }
594 1 : exit(err_count ? 1 : 0);
595 : }
596 :
597 : /* vim: ts=4 sw=4 et
598 : */
|