LCOV - code coverage report
Current view: top level - tests - tld_test_domain_lowercase.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 131 180 72.8 %
Date: 2018-08-28 01:54:14 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* TLD library -- test converting domain names to lowercase
       2             :  * Copyright (c) 2011-2018  Made to Order Software Corp.  All Rights Reserved
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the
       6             :  * "Software"), to deal in the Software without restriction, including
       7             :  * without limitation the rights to use, copy, modify, merge, publish,
       8             :  * distribute, sublicense, and/or sell copies of the Software, and to
       9             :  * permit persons to whom the Software is furnished to do so, subject to
      10             :  * the following conditions:
      11             :  *
      12             :  * The above copyright notice and this permission notice shall be included
      13             :  * in all copies or substantial portions of the Software.
      14             :  *
      15             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      16             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      17             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      18             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      19             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      20             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      21             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      22             :  */
      23             : 
      24             : /** \file
      25             :  * \brief Test the tld_domain_to_lowercase() function.
      26             :  *
      27             :  * This file implements various test to verify that the
      28             :  * tld_domain_to_lowercase() function works as expected.
      29             :  */
      30             : 
      31             : #include "libtld/tld.h"
      32             : #include <string.h>
      33             : #include <stdlib.h>
      34             : #include <stdio.h>
      35             : #include <time.h>
      36             : #include <limits.h>
      37             : #include <wctype.h>
      38             : 
      39             : int err_count = 0;
      40             : int verbose = 0;
      41             : 
      42             : 
      43             : 
      44    21091962 : void test_add_byte(char **out, int wc, int force_caps)
      45             : {
      46    21091962 :     if((wc >= 'A' && wc <= 'Z')
      47    21091936 :     || (wc >= 'a' && wc <= 'z')
      48    21091858 :     || (wc >= '0' && wc <= '9')
      49    21091838 :     || wc == '.'
      50    21091836 :     || wc == '-'
      51    21091834 :     || wc == '!'
      52    21091832 :     || wc == '~'
      53    21091830 :     || wc == '/'
      54    21091830 :     || wc == '_')
      55             :     {
      56         134 :         **out = wc;
      57         134 :         ++*out;
      58             :     }
      59             :     else
      60             :     {
      61             :         // add '%XX' where X represents an hexadecimal digit
      62    21091828 :         if(force_caps
      63    10543133 :         || (rand() & 1) != 0)
      64             :         {
      65    15819324 :             sprintf(*out, "%%%02X", wc);
      66             :         }
      67             :         else
      68             :         {
      69     5272504 :             sprintf(*out, "%%%02x", wc);
      70             :         }
      71    21091828 :         *out += 3;
      72             :     }
      73    21091962 : }
      74             : 
      75             : 
      76     5306331 : void test_to_utf8(char **out, int wc, int force_caps)
      77             : {
      78     5306331 :     if(wc < 0x80)
      79             :     {
      80         252 :         test_add_byte(out, wc, force_caps);
      81             :     }
      82     5306079 :     else if(wc < 0x800)
      83             :     {
      84        3840 :         test_add_byte(out, ((wc >> 6) | 0xC0), force_caps);
      85        3840 :         test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
      86             :     }
      87     5302239 :     else if(wc < 0x10000)
      88             :     {
      89      124926 :         test_add_byte(out, ((wc >> 12) | 0xE0), force_caps);
      90      124926 :         test_add_byte(out, (((wc >> 6) & 0x3F) | 0x80), force_caps);
      91      124926 :         test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
      92             :     }
      93             :     else
      94             :     {
      95     5177313 :         test_add_byte(out, ((wc >> 18) | 0xF0), force_caps);
      96     5177313 :         test_add_byte(out, (((wc >> 12) & 0x3F) | 0x80), force_caps);
      97     5177313 :         test_add_byte(out, (((wc >> 6) & 0x3F) | 0x80), force_caps);
      98     5177313 :         test_add_byte(out, ((wc & 0x3F) | 0x80), force_caps);
      99             :     }
     100     5306331 : }
     101             : 
     102             : 
     103           1 : void test_all_characters()
     104             : {
     105             :     int  wc;
     106             :     char buf[256], *s, *r;
     107             : 
     108     1114112 :     for(wc = 1; wc < 0x110000; ++wc)
     109             :     {
     110     1114111 :         if((wc >= 0xD800 && wc <= 0xDFFF)  // UTF-16 stuff ignored
     111     1112063 :         || (wc & 0xFFFF) == 0xFFFE
     112     1112046 :         || (wc & 0xFFFF) == 0xFFFF
     113     1112029 :         || wc == '/')
     114             :         {
     115             :             // those code points must be ignored because they
     116             :             // really don't work in a domain name
     117        2083 :             continue;
     118             :         }
     119             : 
     120     1112028 :         s = buf;
     121     1112028 :         test_to_utf8(&s, wc, 0);
     122     1112028 :         *s = '\0';
     123             : 
     124     1112028 :         r = tld_domain_to_lowercase(buf);
     125             : 
     126     1112028 :         s = buf;
     127     1112028 :         test_to_utf8(&s, towlower(wc), 1); // force caps in %XX notication
     128     1112028 :         *s = '\0';
     129             : 
     130     1112028 :         if(r == NULL)
     131             :         {
     132           0 :             fprintf(stderr, "error: character 0x%06X generated an error and tld_domain_to_lower() returned NULL (expected \"%s\")\n", wc, buf);
     133             :         }
     134             :         else
     135             :         {
     136     1112028 :             if(strcmp(r, buf) != 0)
     137             :             {
     138           0 :                 fprintf(stderr, "error: character 0x%06X was not converted back and force as expected (expected \"%s\", received \"%s\")\n", wc, buf, r);
     139             :             }
     140             : 
     141             :             // done with the result
     142     1112028 :             free(r);
     143             :         }
     144             :     }
     145           1 : }
     146             : 
     147             : 
     148           1 : void test_empty()
     149             : {
     150             :     char *r;
     151             : 
     152             :     // NULL as input, returns NULL
     153           1 :     r = tld_domain_to_lowercase(NULL);
     154           1 :     if(r != NULL)
     155             :     {
     156           0 :         ++err_count;
     157           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(NULL) is expected to return NULL.\n");
     158             :     }
     159             : 
     160             :     // an empty string also returns NULL as result
     161           1 :     r = tld_domain_to_lowercase("");
     162           1 :     if(r != NULL)
     163             :     {
     164           0 :         ++err_count;
     165           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\") is expected to return NULL.\n");
     166             :     }
     167           1 : }
     168             : 
     169             : 
     170           1 : void test_invalid_xx()
     171             : {
     172             :     char *r;
     173             :     char buf[256];
     174             :     int i;
     175             : 
     176           1 :     r = tld_domain_to_lowercase("%AZ");
     177           1 :     if(r != NULL)
     178             :     {
     179           0 :         ++err_count;
     180           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"%%AZ\") is expected to return NULL.\n");
     181             :     }
     182             : 
     183           1 :     r = tld_domain_to_lowercase("%ZA");
     184           1 :     if(r != NULL)
     185             :     {
     186           0 :         ++err_count;
     187           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"%%ZA\") is expected to return NULL.\n");
     188             :     }
     189             : 
     190             :     // these are 3 x a with an acute accent (as used in Spanish)
     191           1 :     r = tld_domain_to_lowercase("\xC3\xA1\xC3\xA1\xC3\xA1");
     192           1 :     if(r != NULL)
     193             :     {
     194           0 :         ++err_count;
     195           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xC3\xA1\xC3\xA1\xC3\xA1\") is expected to return NULL because of an overflow.\n");
     196             :     }
     197             : 
     198             :     // these are 2 x a with an acute accent followed by "ab"
     199             :     // this time the overflow happens when the 'a' is hit
     200           1 :     r = tld_domain_to_lowercase("\xC3\xA1\xC3\xA1\x61\x62");
     201           1 :     if(r != NULL)
     202             :     {
     203           0 :         ++err_count;
     204           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xC3\xA1\xC3\xA1\x61\x62\") is expected to return NULL because of an overflow.\n");
     205             :     }
     206             : 
     207             :     // these are 3 x 0x0911 (Devanagari letter candra o)
     208           1 :     r = tld_domain_to_lowercase("\xE0\xA4\x91\xE0\xA4\x91\xE0\xA4\x91");
     209           1 :     if(r != NULL)
     210             :     {
     211           0 :         ++err_count;
     212           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91\xE0\xA4\x91\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
     213             :     }
     214             : 
     215             :     // these are 2 x 0x0911 and a # in between (Devanagari letter candra o)
     216           1 :     r = tld_domain_to_lowercase("\xE0\xA4\x91#\xE0\xA4\x91");
     217           1 :     if(r != NULL)
     218             :     {
     219           0 :         ++err_count;
     220           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91#\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
     221             :     }
     222             : 
     223             :     // these are 2 x 0x0911 and a q in between (Devanagari letter candra o)
     224           1 :     r = tld_domain_to_lowercase("\xE0\xA4\x91q\xE0\xA4\x91");
     225           1 :     if(r != NULL)
     226             :     {
     227           0 :         ++err_count;
     228           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xE0\xA4\x91q\xE0\xA4\x91\") is expected to return NULL because of an overflow.\n");
     229             :     }
     230             : 
     231             :     // these are 3 x 0x13F0B (Miao letter da)
     232           1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B");
     233           1 :     if(r != NULL)
     234             :     {
     235           0 :         ++err_count;
     236           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     237             :     }
     238             : 
     239             :     // these are 2 x 0x13F0B with # in between (Miao letter da)
     240           1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8B#\xF0\x96\xBC\x8B");
     241           1 :     if(r != NULL)
     242             :     {
     243           0 :         ++err_count;
     244           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8B#\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     245             :     }
     246             : 
     247             :     // these are 2 x 0x13F0B with q in between (Miao letter da)
     248           1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bq\xF0\x96\xBC\x8B");
     249           1 :     if(r != NULL)
     250             :     {
     251           0 :         ++err_count;
     252           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     253             :     }
     254             : 
     255             :     // these are 2 x 0x13F0B with qq in between (Miao letter da)
     256           1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bqq\xF0\x96\xBC\x8B");
     257           1 :     if(r != NULL)
     258             :     {
     259           0 :         ++err_count;
     260           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bqq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     261             :     }
     262             : 
     263             :     // these are 2 x 0x13F0B with qqq in between (Miao letter da)
     264           1 :     r = tld_domain_to_lowercase("\xF0\x96\xBC\x8Bqqq\xF0\x96\xBC\x8B");
     265           1 :     if(r != NULL)
     266             :     {
     267           0 :         ++err_count;
     268           0 :         fprintf(stderr, "error: tld_domain_to_lowercase(\"\xF0\x96\xBC\x8Bqqq\xF0\x96\xBC\x8B\") is expected to return NULL because of an overflow.\n");
     269             :     }
     270             : 
     271           9 :     for(i = 0xF8; i <= 0xFF; ++i)
     272             :     {
     273           8 :         snprintf(buf, sizeof(buf), "+%%%02X+", i);
     274             : 
     275           8 :         r = tld_domain_to_lowercase(buf);
     276           8 :         if(r != NULL)
     277             :         {
     278           0 :             ++err_count;
     279           0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid byte (introduction byte).\n", buf);
     280             :         }
     281             :     }
     282             : 
     283          65 :     for(i = 0x80; i <= 0xBF; ++i)
     284             :     {
     285          64 :         snprintf(buf, sizeof(buf), "+%%%02X+", i);
     286             : 
     287          64 :         r = tld_domain_to_lowercase(buf);
     288          64 :         if(r != NULL)
     289             :         {
     290           0 :             ++err_count;
     291           0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid byte (continuation byte).\n", buf);
     292             :         }
     293             :     }
     294             : 
     295             :     // byte missing (end of string found before end of UTF-8 character)
     296          57 :     for(i = 0xC0; i <= 0xF7; ++i)
     297             :     {
     298          56 :         buf[0] = i;
     299          56 :         buf[1] = '\0';
     300          56 :         r = tld_domain_to_lowercase(buf);
     301          56 :         if(r != NULL)
     302             :         {
     303           0 :             ++err_count;
     304           0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (end of string found too early).\n", buf);
     305             :         }
     306             :     }
     307             : 
     308             :     // continuation byte out of range
     309         257 :     for(i = 0x00; i <= 0xFF; ++i)
     310             :     {
     311         256 :         if(i >= 0x80 && i <= 0xBF)
     312             :         {
     313             :             // that's a valid continuation
     314          64 :             continue;
     315             :         }
     316         192 :         buf[0] = rand() % (0xF8 - 0xC0) + 0xC0;
     317         192 :         buf[1] = i;
     318         192 :         buf[2] = '\0';
     319         192 :         r = tld_domain_to_lowercase(buf);
     320         192 :         if(r != NULL)
     321             :         {
     322           0 :             ++err_count;
     323           0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (continuation byte out of range).\n", buf);
     324             :         }
     325             :     }
     326             : 
     327     1114112 :     for(i = 1; i < 0x110000; ++i)
     328             :     {
     329     1114111 :         if((i >= 0xD800 && i <= 0xDFFF)  // UTF-16 stuff ignored
     330     1112063 :         || (i & 0xFFFF) == 0xFFFE
     331     1112046 :         || (i & 0xFFFF) == 0xFFFF)
     332             :         {
     333        2082 :             r = buf;
     334        2082 :             test_to_utf8(&r, i, rand() & 1);
     335        2082 :             *r = '\0';
     336             : 
     337        2082 :             r = tld_domain_to_lowercase(buf);
     338        2082 :             if(r != NULL)
     339             :             {
     340           0 :                 ++err_count;
     341           0 :                 fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid UTF-8 sequence (continuation byte out of range).\n", buf);
     342             :             }
     343             :         }
     344             :     }
     345             : 
     346     3080193 :     for(i = 0x110000;; ++i)
     347             :     {
     348     3080193 :         r = buf;
     349     3080193 :         test_to_utf8(&r, i, rand() & 1);
     350     3080193 :         *r = '\0';
     351             : 
     352             :         // we only save up to 4 bytes, so to check overflow, we expect %F0
     353             :         // as the first byte...
     354     3080193 :         if(strncmp(buf, "%f0", 3) == 0
     355     3080192 :         || strncmp(buf, "%F0", 3) == 0)
     356             :         {
     357             :             // no need to test further, we hit the case of 0xF8 or more in
     358             :             // the first byte which is checked somewhere else
     359             :             break;
     360             :         }
     361             : 
     362     3080192 :         r = tld_domain_to_lowercase(buf);
     363     3080192 :         if(r != NULL)
     364             :         {
     365           0 :             ++err_count;
     366           0 :             fprintf(stderr, "error: tld_domain_to_lowercase(\"%s\") is expected to return NULL because of the invalid Unicode character. Got \"%s\" instead.\n", buf, r);
     367             :         }
     368     3080192 :     }
     369           1 : }
     370             : 
     371             : 
     372           1 : int main(int argc, char *argv[])
     373             : {
     374             :     int i;
     375           1 :     int seed = time(NULL);
     376             : 
     377           1 :     for(i = 1; i < argc; ++i)
     378             :     {
     379           0 :         if(strcmp(argv[i], "-v") == 0)
     380             :         {
     381           0 :             verbose = 1;
     382             :         }
     383           0 :         else if(strcmp(argv[i], "--seed") == 0)
     384             :         {
     385           0 :             if(i + 1 >= argc)
     386             :             {
     387           0 :                 fprintf(stderr, "error: --seed expect a value.\n");
     388           0 :                 exit(1);
     389             :             }
     390           0 :             seed = atol(argv[i + 1]);
     391             :         }
     392             :     }
     393             : 
     394           1 :     printf("testing tld test domain lowercase version %s with seed %d\n", tld_version(), seed);
     395             : 
     396           1 :     srand(seed);
     397             : 
     398           1 :     test_empty();
     399           1 :     test_all_characters();
     400           1 :     test_invalid_xx();
     401             : 
     402           1 :     exit(err_count ? 1 : 0);
     403             : }
     404             : 
     405             : /* vim: ts=4 sw=4 et
     406             :  */
     407             : 

Generated by: LCOV version 1.12