LCOV - code coverage report
Current view: top level - tests - tld_test_domain_lowercase.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 133 182 73.1 %
Date: 2022-02-19 13:28:04 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* TLD library -- test converting domain names to lowercase
       2             :  * Copyright (c) 2011-2022  Made to Order Software Corp.  All Rights Reserved
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the
       6             :  * "Software"), to deal in the Software without restriction, including
       7             :  * without limitation the rights to use, copy, modify, merge, publish,
       8             :  * distribute, sublicense, and/or sell copies of the Software, and to
       9             :  * permit persons to whom the Software is furnished to do so, subject to
      10             :  * the following conditions:
      11             :  *
      12             :  * The above copyright notice and this permission notice shall be included
      13             :  * in all copies or substantial portions of the Software.
      14             :  *
      15             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      16             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      17             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      18             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      19             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      20             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      21             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      22             :  */
      23             : 
      24             : /** \file
      25             :  * \brief Test the tld_domain_to_lowercase() functions.
      26             :  *
      27             :  * This file implements various test to verify that the
      28             :  * tld_domain_to_lowercase() function works as expected
      29             :  * with valid and invalid names.
      30             :  */
      31             : 
      32             : #include "libtld/tld.h"
      33             : #include <string.h>
      34             : #include <stdlib.h>
      35             : #include <stdio.h>
      36             : #include <time.h>
      37             : #include <limits.h>
      38             : #include <wctype.h>
      39             : 
      40             : int err_count = 0;
      41             : int verbose = 0;
      42             : 
      43             : 
      44             : 
      45    21091962 : void test_add_byte(char **out, int wc, int force_caps)
      46             : {
      47    21091962 :     if((wc >= 'A' && wc <= 'Z')
      48    21091936 :     || (wc >= 'a' && wc <= 'z')
      49    21091858 :     || (wc >= '0' && wc <= '9')
      50    21091838 :     || wc == '.'
      51    21091836 :     || wc == '-'
      52    21091834 :     || wc == '!'
      53    21091832 :     || wc == '~'
      54    21091830 :     || wc == '/'
      55    21091830 :     || wc == '_')
      56             :     {
      57         134 :         **out = wc;
      58         134 :         ++*out;
      59             :     }
      60             :     else
      61             :     {
      62             :         // add '%XX' where X represents an hexadecimal digit
      63    21091828 :         if(force_caps
      64    10546731 :         || (rand() & 1) != 0)
      65             :         {
      66    15817407 :             sprintf(*out, "%%%02X", wc);
      67             :         }
      68             :         else
      69             :         {
      70     5274421 :             sprintf(*out, "%%%02x", wc);
      71             :         }
      72    21091828 :         *out += 3;
      73             :     }
      74    21091962 : }
      75             : 
      76             : 
      77     5306331 : void test_to_utf8(char **out, int wc, int force_caps)
      78             : {
      79     5306331 :     if(wc < 0x80)
      80             :     {
      81         252 :         test_add_byte(out, wc, force_caps);
      82             :     }
      83     5306079 :     else if(wc < 0x800)
      84             :     {
      85        3840 :         test_add_byte(out, ((wc >> 6) | 0xC0), force_caps);
      86        3840 :         test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
      87             :     }
      88     5302239 :     else if(wc < 0x10000)
      89             :     {
      90      124926 :         test_add_byte(out, ((wc >> 12) | 0xE0), force_caps);
      91      124926 :         test_add_byte(out, (((wc >> 6) & 0x3F) | 0x80), force_caps);
      92      124926 :         test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
      93             :     }
      94             :     else
      95             :     {
      96     5177313 :         test_add_byte(out, ((wc >> 18) | 0xF0), force_caps);
      97     5177313 :         test_add_byte(out, (((wc >> 12) & 0x3F) | 0x80), force_caps);
      98     5177313 :         test_add_byte(out, (((wc >> 6) & 0x3F) | 0x80), force_caps);
      99     5177313 :         test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
     100             :     }
     101     5306331 : }
     102             : 
     103             : 
     104           1 : void test_all_characters()
     105             : {
     106             :     int  wc;
     107           1 :     char buf[256], *s, *r;
     108             : 
     109     1114112 :     for(wc = 1; wc < 0x110000; ++wc)
     110             :     {
     111     1114111 :         if((wc >= 0xD800 && wc <= 0xDFFF)  // UTF-16 stuff ignored
     112     1112063 :         || (wc & 0xFFFF) == 0xFFFE
     113     1112046 :         || (wc & 0xFFFF) == 0xFFFF
     114     1112029 :         || wc == '/')
     115             :         {
     116             :             // those code points must be ignored because they
     117             :             // really don't work in a domain name
     118        2083 :             continue;
     119             :         }
     120             : 
     121     1112028 :         s = buf;
     122     1112028 :         test_to_utf8(&s, wc, 0);
     123     1112028 :         *s = '\0';
     124             : 
     125     1112028 :         r = tld_domain_to_lowercase(buf);
     126             : 
     127     1112028 :         s = buf;
     128     1112028 :         test_to_utf8(&s, towlower(wc), 1); // force caps in %XX notication
     129     1112028 :         *s = '\0';
     130             : 
     131     1112028 :         if(r == NULL)
     132             :         {
     133           0 :             fprintf(stderr, "error: character 0x%06X generated an error and tld_domain_to_lower() returned NULL (expected \"%s\")\n", wc, buf);
     134             :         }
     135             :         else
     136             :         {
     137     1112028 :             if(strcmp(r, buf) != 0)
     138             :             {
     139           0 :                 fprintf(stderr, "error: character 0x%06X was not converted back and force as expected (expected \"%s\", received \"%s\")\n", wc, buf, r);
     140             :             }
     141             : 
     142             :             // done with the result
     143     1112028 :             free(r);
     144             :         }
     145             :     }
     146           1 : }
     147             : 
     148             : 
     149           1 : void test_empty()
     150             : {
     151             :     char *r;
     152             : 
     153             :     // NULL as input, returns NULL
     154           1 :     r = tld_domain_to_lowercase(NULL);
     155           1 :     if(r != NULL)
     156             :     {
     157           0 :         ++err_count;
     158           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(NULL) is expected to return NULL.\n");
     159             :     }
     160             : 
     161             :     // an empty string also returns NULL as result
     162           1 :     r = tld_domain_to_lowercase("");
     163           1 :     if(r != NULL)
     164             :     {
     165           0 :         ++err_count;
     166           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\") is expected to return NULL.\n");
     167             :     }
     168           1 : }
     169             : 
     170             : 
     171           1 : void test_invalid_xx()
     172             : {
     173           1 :     char *r;
     174           1 :     char buf[256];
     175             :     int i;
     176             : 
     177           1 :     r = tld_domain_to_lowercase("%AZ");
     178           1 :     if(r != NULL)
     179             :     {
     180           0 :         ++err_count;
     181           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"%%AZ\") is expected to return NULL.\n");
     182             :     }
     183             : 
     184           1 :     r = tld_domain_to_lowercase("%ZA");
     185           1 :     if(r != NULL)
     186             :     {
     187           0 :         ++err_count;
     188           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"%%ZA\") is expected to return NULL.\n");
     189             :     }
     190             : 
     191             :     // these are 3 x a with an acute accent (as used in Spanish)
     192           1 :     r = tld_domain_to_lowercase("\xC3\xA1\xC3\xA1\xC3\xA1");
     193           1 :     if(r != NULL)
     194             :     {
     195           0 :         ++err_count;
     196           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xC3\xA1\xC3\xA1\xC3\xA1\") is expected to return NULL because of an overflow.\n");
     197             :     }
     198             : 
     199             :     // these are 2 x a with an acute accent followed by "ab"
     200             :     // this time the overflow happens when the 'a' is hit
     201           1 :     r = tld_domain_to_lowercase("\xC3\xA1\xC3\xA1\x61\x62");
     202           1 :     if(r != NULL)
     203             :     {
     204           0 :         ++err_count;
     205           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xC3\xA1\xC3\xA1\x61\x62\") is expected to return NULL because of an overflow.\n");
     206             :     }
     207             : 
     208             :     // these are 3 x 0x0911 (Devanagari letter candra o)
     209           1 :     r = tld_domain_to_lowercase("\xE0\xA4\x91\xE0\xA4\x91\xE0\xA4\x91");
     210           1 :     if(r != NULL)
     211             :     {
     212           0 :         ++err_count;
     213           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91\xE0\xA4\x91\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
     214             :     }
     215             : 
     216             :     // these are 2 x 0x0911 and a # in between (Devanagari letter candra o)
     217           1 :     r = tld_domain_to_lowercase("\xE0\xA4\x91#\xE0\xA4\x91");
     218           1 :     if(r != NULL)
     219             :     {
     220           0 :         ++err_count;
     221           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91#\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
     222             :     }
     223             : 
     224             :     // these are 2 x 0x0911 and a q in between (Devanagari letter candra o)
     225           1 :     r = tld_domain_to_lowercase("\xE0\xA4\x91q\xE0\xA4\x91");
     226           1 :     if(r != NULL)
     227             :     {
     228           0 :         ++err_count;
     229           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91q\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
     230             :     }
     231             : 
     232             :     // these are 3 x 0x13F0B (Miao letter da)
     233           1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B");
     234           1 :     if(r != NULL)
     235             :     {
     236           0 :         ++err_count;
     237           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     238             :     }
     239             : 
     240             :     // these are 2 x 0x13F0B with # in between (Miao letter da)
     241           1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8B#\xF0\x96\xBC\x8B");
     242           1 :     if(r != NULL)
     243             :     {
     244           0 :         ++err_count;
     245           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8B#\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     246             :     }
     247             : 
     248             :     // these are 2 x 0x13F0B with q in between (Miao letter da)
     249           1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bq\xF0\x96\xBC\x8B");
     250           1 :     if(r != NULL)
     251             :     {
     252           0 :         ++err_count;
     253           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     254             :     }
     255             : 
     256             :     // these are 2 x 0x13F0B with qq in between (Miao letter da)
     257           1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bqq\xF0\x96\xBC\x8B");
     258           1 :     if(r != NULL)
     259             :     {
     260           0 :         ++err_count;
     261           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bqq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     262             :     }
     263             : 
     264             :     // these are 2 x 0x13F0B with qqq in between (Miao letter da)
     265           1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bqqq\xF0\x96\xBC\x8B");
     266           1 :     if(r != NULL)
     267             :     {
     268           0 :         ++err_count;
     269           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bqqq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     270             :     }
     271             : 
     272           9 :     for(i = 0xF8; i <= 0xFF; ++i)
     273             :     {
     274           8 :         snprintf(buf, sizeof(buf), "+%%%02X+", i);
     275             : 
     276           8 :         r = tld_domain_to_lowercase(buf);
     277           8 :         if(r != NULL)
     278             :         {
     279           0 :             ++err_count;
     280           0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid byte (introduction byte).\n", buf);
     281             :         }
     282             :     }
     283             : 
     284          65 :     for(i = 0x80; i <= 0xBF; ++i)
     285             :     {
     286          64 :         snprintf(buf, sizeof(buf), "+%%%02X+", i);
     287             : 
     288          64 :         r = tld_domain_to_lowercase(buf);
     289          64 :         if(r != NULL)
     290             :         {
     291           0 :             ++err_count;
     292           0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid byte (continuation byte).\n", buf);
     293             :         }
     294             :     }
     295             : 
     296             :     // byte missing (end of string found before end of UTF-8 character)
     297          57 :     for(i = 0xC0; i <= 0xF7; ++i)
     298             :     {
     299          56 :         buf[0] = i;
     300          56 :         buf[1] = '\0';
     301          56 :         r = tld_domain_to_lowercase(buf);
     302          56 :         if(r != NULL)
     303             :         {
     304           0 :             ++err_count;
     305           0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (end of string found too early).\n", buf);
     306             :         }
     307             :     }
     308             : 
     309             :     // continuation byte out of range
     310         257 :     for(i = 0x00; i <= 0xFF; ++i)
     311             :     {
     312         256 :         if(i >= 0x80 && i <= 0xBF)
     313             :         {
     314             :             // that's a valid continuation
     315          64 :             continue;
     316             :         }
     317         192 :         buf[0] = rand() % (0xF8 - 0xC0) + 0xC0;
     318         192 :         buf[1] = i;
     319         192 :         buf[2] = '\0';
     320         192 :         r = tld_domain_to_lowercase(buf);
     321         192 :         if(r != NULL)
     322             :         {
     323           0 :             ++err_count;
     324           0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (continuation byte out of range).\n", buf);
     325             :         }
     326             :     }
     327             : 
     328     1114112 :     for(i = 1; i < 0x110000; ++i)
     329             :     {
     330     1114111 :         if((i >= 0xD800 && i <= 0xDFFF)  // UTF-16 stuff ignored
     331     1112063 :         || (i & 0xFFFF) == 0xFFFE
     332     1112046 :         || (i & 0xFFFF) == 0xFFFF)
     333             :         {
     334        2082 :             r = buf;
     335        2082 :             test_to_utf8(&r, i, rand() & 1);
     336        2082 :             *r = '\0';
     337             : 
     338        2082 :             r = tld_domain_to_lowercase(buf);
     339        2082 :             if(r != NULL)
     340             :             {
     341           0 :                 ++err_count;
     342           0 :                 fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (continuation byte out of range).\n", buf);
     343             :             }
     344             :         }
     345             :     }
     346             : 
     347     3080193 :     for(i = 0x110000;; ++i)
     348             :     {
     349     6160385 :         r = buf;
     350     3080193 :         test_to_utf8(&r, i, rand() & 1);
     351     3080193 :         *r = '\0';
     352             : 
     353             :         // we only save up to 4 bytes, so to check overflow, we expect %F0
     354             :         // as the first byte...
     355     3080193 :         if(strncmp(buf, "%f0", 3) == 0
     356     3080193 :         || strncmp(buf, "%F0", 3) == 0)
     357             :         {
     358             :             // no need to test further, we hit the case of 0xF8 or more in
     359             :             // the first byte which is checked somewhere else
     360             :             break;
     361             :         }
     362             : 
     363     3080192 :         r = tld_domain_to_lowercase(buf);
     364     3080192 :         if(r != NULL)
     365             :         {
     366           0 :             ++err_count;
     367           0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid Unicode character. Got \"%s\" instead.\n", buf, r);
     368             :         }
     369             :     }
     370           1 : }
     371             : 
     372             : 
     373           1 : int main(int argc, char *argv[])
     374             : {
     375             :     int i;
     376           1 :     int seed = time(NULL);
     377             : 
     378           1 :     for(i = 1; i < argc; ++i)
     379             :     {
     380           0 :         if(strcmp(argv[i], "-v") == 0)
     381             :         {
     382           0 :             verbose = 1;
     383             :         }
     384           0 :         else if(strcmp(argv[i], "--seed") == 0)
     385             :         {
     386           0 :             if(i + 1 >= argc)
     387             :             {
     388           0 :                 fprintf(stderr, "error: --seed expect a value.\n");
     389           0 :                 exit(1);
     390             :             }
     391           0 :             seed = atol(argv[i + 1]);
     392             :         }
     393             :     }
     394             : 
     395           1 :     printf("testing tld test domain lowercase version %s with seed %d\n", tld_version(), seed);
     396             : 
     397           1 :     srand(seed);
     398             : 
     399           1 :     test_empty();
     400           1 :     test_all_characters();
     401           1 :     test_invalid_xx();
     402             : 
     403           1 :     exit(err_count ? 1 : 0);
     404             : }
     405             : 
     406             : /* vim: ts=4 sw=4 et
     407             :  */
     408             : 

Generated by: LCOV version 1.13