LCOV - code coverage report
Current view: top level - tests - tld_test_domain_lowercase.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 73.1 % 182 133
Test Date: 2025-07-17 21:03:15 Functions: 100.0 % 6 6
Legend: Lines: hit not hit

            Line data    Source code
       1              : /* TLD library -- test converting domain names to lowercase
       2              :  * Copyright (c) 2011-2023  Made to Order Software Corp.  All Rights Reserved
       3              :  *
       4              :  * Permission is hereby granted, free of charge, to any person obtaining a
       5              :  * copy of this software and associated documentation files (the
       6              :  * "Software"), to deal in the Software without restriction, including
       7              :  * without limitation the rights to use, copy, modify, merge, publish,
       8              :  * distribute, sublicense, and/or sell copies of the Software, and to
       9              :  * permit persons to whom the Software is furnished to do so, subject to
      10              :  * the following conditions:
      11              :  *
      12              :  * The above copyright notice and this permission notice shall be included
      13              :  * in all copies or substantial portions of the Software.
      14              :  *
      15              :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      16              :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      17              :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      18              :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      19              :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      20              :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      21              :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      22              :  */
      23              : 
      24              : /** \file
      25              :  * \brief Test the tld_domain_to_lowercase() functions.
      26              :  *
      27              :  * This file implements various test to verify that the
      28              :  * tld_domain_to_lowercase() function works as expected
      29              :  * with valid and invalid names.
      30              :  */
      31              : 
      32              : #include "libtld/tld.h"
      33              : #include <string.h>
      34              : #include <stdlib.h>
      35              : #include <stdio.h>
      36              : #include <time.h>
      37              : #include <limits.h>
      38              : #include <wctype.h>
      39              : 
      40              : int err_count = 0;
      41              : int verbose = 0;
      42              : 
      43              : 
      44              : 
      45     21091962 : void test_add_byte(char **out, int wc, int force_caps)
      46              : {
      47     21091962 :     if((wc >= 'A' && wc <= 'Z')
      48     21091936 :     || (wc >= 'a' && wc <= 'z')
      49     21091858 :     || (wc >= '0' && wc <= '9')
      50     21091838 :     || wc == '.'
      51     21091836 :     || wc == '-'
      52     21091834 :     || wc == '!'
      53     21091832 :     || wc == '~'
      54     21091830 :     || wc == '/'
      55     21091830 :     || wc == '_')
      56              :     {
      57          134 :         **out = wc;
      58          134 :         ++*out;
      59              :     }
      60              :     else
      61              :     {
      62              :         // add '%XX' where X represents a hexadecimal digit
      63     21091828 :         if(force_caps
      64     10543622 :         || (rand() & 1) != 0)
      65              :         {
      66     15818033 :             sprintf(*out, "%%%02X", wc);
      67              :         }
      68              :         else
      69              :         {
      70      5273795 :             sprintf(*out, "%%%02x", wc);
      71              :         }
      72     21091828 :         *out += 3;
      73              :     }
      74     21091962 : }
      75              : 
      76              : 
      77      5306331 : void test_to_utf8(char **out, int wc, int force_caps)
      78              : {
      79      5306331 :     if(wc < 0x80)
      80              :     {
      81          252 :         test_add_byte(out, wc, force_caps);
      82              :     }
      83      5306079 :     else if(wc < 0x800)
      84              :     {
      85         3840 :         test_add_byte(out, ((wc >> 6) | 0xC0), force_caps);
      86         3840 :         test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
      87              :     }
      88      5302239 :     else if(wc < 0x10000)
      89              :     {
      90       124926 :         test_add_byte(out, ((wc >> 12) | 0xE0), force_caps);
      91       124926 :         test_add_byte(out, (((wc >> 6) & 0x3F) | 0x80), force_caps);
      92       124926 :         test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
      93              :     }
      94              :     else
      95              :     {
      96      5177313 :         test_add_byte(out, ((wc >> 18) | 0xF0), force_caps);
      97      5177313 :         test_add_byte(out, (((wc >> 12) & 0x3F) | 0x80), force_caps);
      98      5177313 :         test_add_byte(out, (((wc >> 6) & 0x3F) | 0x80), force_caps);
      99      5177313 :         test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
     100              :     }
     101      5306331 : }
     102              : 
     103              : 
     104            1 : void test_all_characters()
     105              : {
     106              :     int  wc;
     107            1 :     char buf[256], *s, *r;
     108              : 
     109      1114112 :     for(wc = 1; wc < 0x110000; ++wc)
     110              :     {
     111      1114111 :         if((wc >= 0xD800 && wc <= 0xDFFF)  // UTF-16 stuff ignored
     112      1112063 :         || (wc & 0xFFFF) == 0xFFFE
     113      1112046 :         || (wc & 0xFFFF) == 0xFFFF
     114      1112029 :         || wc == '/')
     115              :         {
     116              :             // those code points must be ignored because they
     117              :             // really don't work in a domain name
     118         2083 :             continue;
     119              :         }
     120              : 
     121      1112028 :         s = buf;
     122      1112028 :         test_to_utf8(&s, wc, 0);
     123      1112028 :         *s = '\0';
     124              : 
     125      1112028 :         r = tld_domain_to_lowercase(buf);
     126              : 
     127      1112028 :         s = buf;
     128      1112028 :         test_to_utf8(&s, towlower(wc), 1); // force caps in %XX notication
     129      1112028 :         *s = '\0';
     130              : 
     131      1112028 :         if(r == NULL)
     132              :         {
     133            0 :             fprintf(stderr, "error: character 0x%06X generated an error and tld_domain_to_lower() returned NULL (expected \"%s\")\n", wc, buf);
     134              :         }
     135              :         else
     136              :         {
     137      1112028 :             if(strcmp(r, buf) != 0)
     138              :             {
     139            0 :                 fprintf(stderr, "error: character 0x%06X was not converted back and force as expected (expected \"%s\", received \"%s\")\n", wc, buf, r);
     140              :             }
     141              : 
     142              :             // done with the result
     143      1112028 :             free(r);
     144              :         }
     145              :     }
     146            1 : }
     147              : 
     148              : 
     149            1 : void test_empty()
     150              : {
     151              :     char *r;
     152              : 
     153              :     // NULL as input, returns NULL
     154            1 :     r = tld_domain_to_lowercase(NULL);
     155            1 :     if(r != NULL)
     156              :     {
     157            0 :         ++err_count;
     158            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(NULL) is expected to return NULL.\n");
     159              :     }
     160              : 
     161              :     // an empty string also returns NULL as result
     162            1 :     r = tld_domain_to_lowercase("");
     163            1 :     if(r != NULL)
     164              :     {
     165            0 :         ++err_count;
     166            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\") is expected to return NULL.\n");
     167              :     }
     168            1 : }
     169              : 
     170              : 
     171            1 : void test_invalid_xx()
     172              : {
     173            1 :     char *r;
     174            1 :     char buf[256];
     175              :     int i;
     176              : 
     177            1 :     r = tld_domain_to_lowercase("%AZ");
     178            1 :     if(r != NULL)
     179              :     {
     180            0 :         ++err_count;
     181            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"%%AZ\") is expected to return NULL.\n");
     182              :     }
     183              : 
     184            1 :     r = tld_domain_to_lowercase("%ZA");
     185            1 :     if(r != NULL)
     186              :     {
     187            0 :         ++err_count;
     188            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"%%ZA\") is expected to return NULL.\n");
     189              :     }
     190              : 
     191              :     // these are 3 x a with an acute accent (as used in Spanish)
     192            1 :     r = tld_domain_to_lowercase("\xC3\xA1\xC3\xA1\xC3\xA1");
     193            1 :     if(r != NULL)
     194              :     {
     195            0 :         ++err_count;
     196            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xC3\xA1\xC3\xA1\xC3\xA1\") is expected to return NULL because of an overflow.\n");
     197              :     }
     198              : 
     199              :     // these are 2 x a with an acute accent followed by "ab"
     200              :     // this time the overflow happens when the 'a' is hit
     201            1 :     r = tld_domain_to_lowercase("\xC3\xA1\xC3\xA1\x61\x62");
     202            1 :     if(r != NULL)
     203              :     {
     204            0 :         ++err_count;
     205            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xC3\xA1\xC3\xA1\x61\x62\") is expected to return NULL because of an overflow.\n");
     206              :     }
     207              : 
     208              :     // these are 3 x 0x0911 (Devanagari letter candra o)
     209            1 :     r = tld_domain_to_lowercase("\xE0\xA4\x91\xE0\xA4\x91\xE0\xA4\x91");
     210            1 :     if(r != NULL)
     211              :     {
     212            0 :         ++err_count;
     213            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91\xE0\xA4\x91\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
     214              :     }
     215              : 
     216              :     // these are 2 x 0x0911 and a # in between (Devanagari letter candra o)
     217            1 :     r = tld_domain_to_lowercase("\xE0\xA4\x91#\xE0\xA4\x91");
     218            1 :     if(r != NULL)
     219              :     {
     220            0 :         ++err_count;
     221            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91#\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
     222              :     }
     223              : 
     224              :     // these are 2 x 0x0911 and a q in between (Devanagari letter candra o)
     225            1 :     r = tld_domain_to_lowercase("\xE0\xA4\x91q\xE0\xA4\x91");
     226            1 :     if(r != NULL)
     227              :     {
     228            0 :         ++err_count;
     229            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91q\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
     230              :     }
     231              : 
     232              :     // these are 3 x 0x13F0B (Miao letter da)
     233            1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B");
     234            1 :     if(r != NULL)
     235              :     {
     236            0 :         ++err_count;
     237            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     238              :     }
     239              : 
     240              :     // these are 2 x 0x13F0B with # in between (Miao letter da)
     241            1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8B#\xF0\x96\xBC\x8B");
     242            1 :     if(r != NULL)
     243              :     {
     244            0 :         ++err_count;
     245            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8B#\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     246              :     }
     247              : 
     248              :     // these are 2 x 0x13F0B with q in between (Miao letter da)
     249            1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bq\xF0\x96\xBC\x8B");
     250            1 :     if(r != NULL)
     251              :     {
     252            0 :         ++err_count;
     253            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     254              :     }
     255              : 
     256              :     // these are 2 x 0x13F0B with qq in between (Miao letter da)
     257            1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bqq\xF0\x96\xBC\x8B");
     258            1 :     if(r != NULL)
     259              :     {
     260            0 :         ++err_count;
     261            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bqq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     262              :     }
     263              : 
     264              :     // these are 2 x 0x13F0B with qqq in between (Miao letter da)
     265            1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bqqq\xF0\x96\xBC\x8B");
     266            1 :     if(r != NULL)
     267              :     {
     268            0 :         ++err_count;
     269            0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bqqq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     270              :     }
     271              : 
     272            9 :     for(i = 0xF8; i <= 0xFF; ++i)
     273              :     {
     274            8 :         snprintf(buf, sizeof(buf), "+%%%02X+", i);
     275              : 
     276            8 :         r = tld_domain_to_lowercase(buf);
     277            8 :         if(r != NULL)
     278              :         {
     279            0 :             ++err_count;
     280            0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid byte (introduction byte).\n", buf);
     281              :         }
     282              :     }
     283              : 
     284           65 :     for(i = 0x80; i <= 0xBF; ++i)
     285              :     {
     286           64 :         snprintf(buf, sizeof(buf), "+%%%02X+", i);
     287              : 
     288           64 :         r = tld_domain_to_lowercase(buf);
     289           64 :         if(r != NULL)
     290              :         {
     291            0 :             ++err_count;
     292            0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid byte (continuation byte).\n", buf);
     293              :         }
     294              :     }
     295              : 
     296              :     // byte missing (end of string found before end of UTF-8 character)
     297           57 :     for(i = 0xC0; i <= 0xF7; ++i)
     298              :     {
     299           56 :         buf[0] = i;
     300           56 :         buf[1] = '\0';
     301           56 :         r = tld_domain_to_lowercase(buf);
     302           56 :         if(r != NULL)
     303              :         {
     304            0 :             ++err_count;
     305            0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (end of string found too early).\n", buf);
     306              :         }
     307              :     }
     308              : 
     309              :     // continuation byte out of range
     310          257 :     for(i = 0x00; i <= 0xFF; ++i)
     311              :     {
     312          256 :         if(i >= 0x80 && i <= 0xBF)
     313              :         {
     314              :             // that's a valid continuation
     315           64 :             continue;
     316              :         }
     317          192 :         buf[0] = rand() % (0xF8 - 0xC0) + 0xC0;
     318          192 :         buf[1] = i;
     319          192 :         buf[2] = '\0';
     320          192 :         r = tld_domain_to_lowercase(buf);
     321          192 :         if(r != NULL)
     322              :         {
     323            0 :             ++err_count;
     324            0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (continuation byte out of range).\n", buf);
     325              :         }
     326              :     }
     327              : 
     328      1114112 :     for(i = 1; i < 0x110000; ++i)
     329              :     {
     330      1114111 :         if((i >= 0xD800 && i <= 0xDFFF)  // UTF-16 stuff ignored
     331      1112063 :         || (i & 0xFFFF) == 0xFFFE
     332      1112046 :         || (i & 0xFFFF) == 0xFFFF)
     333              :         {
     334         2082 :             r = buf;
     335         2082 :             test_to_utf8(&r, i, rand() & 1);
     336         2082 :             *r = '\0';
     337              : 
     338         2082 :             r = tld_domain_to_lowercase(buf);
     339         2082 :             if(r != NULL)
     340              :             {
     341            0 :                 ++err_count;
     342            0 :                 fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (continuation byte out of range).\n", buf);
     343              :             }
     344              :         }
     345              :     }
     346              : 
     347      3080193 :     for(i = 0x110000;; ++i)
     348              :     {
     349      3080193 :         r = buf;
     350      3080193 :         test_to_utf8(&r, i, rand() & 1);
     351      3080193 :         *r = '\0';
     352              : 
     353              :         // we only save up to 4 bytes, so to check overflow, we expect %F0
     354              :         // as the first byte...
     355      3080193 :         if(strncmp(buf, "%f0", 3) == 0
     356      3080192 :         || strncmp(buf, "%F0", 3) == 0)
     357              :         {
     358              :             // no need to test further, we hit the case of 0xF8 or more in
     359              :             // the first byte which is checked somewhere else
     360              :             break;
     361              :         }
     362              : 
     363      3080192 :         r = tld_domain_to_lowercase(buf);
     364      3080192 :         if(r != NULL)
     365              :         {
     366            0 :             ++err_count;
     367            0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid Unicode character. Got \"%s\" instead.\n", buf, r);
     368              :         }
     369              :     }
     370            1 : }
     371              : 
     372              : 
     373            1 : int main(int argc, char *argv[])
     374              : {
     375              :     int i;
     376            1 :     int seed = time(NULL);
     377              : 
     378            1 :     for(i = 1; i < argc; ++i)
     379              :     {
     380            0 :         if(strcmp(argv[i], "-v") == 0)
     381              :         {
     382            0 :             verbose = 1;
     383              :         }
     384            0 :         else if(strcmp(argv[i], "--seed") == 0)
     385              :         {
     386            0 :             if(i + 1 >= argc)
     387              :             {
     388            0 :                 fprintf(stderr, "error: --seed expect a value.\n");
     389            0 :                 exit(1);
     390              :             }
     391            0 :             seed = atol(argv[i + 1]);
     392              :         }
     393              :     }
     394              : 
     395            1 :     printf("testing tld test domain lowercase version %s with seed %d\n", tld_version(), seed);
     396              : 
     397            1 :     srand(seed);
     398              : 
     399            1 :     test_empty();
     400            1 :     test_all_characters();
     401            1 :     test_invalid_xx();
     402              : 
     403            1 :     exit(err_count ? 1 : 0);
     404              : }
     405              : 
     406              : /* vim: ts=4 sw=4 et
     407              :  */
     408              : 
        

Generated by: LCOV version 2.0-1

Snap C++ | List of projects | List of versions