LCOV - code coverage report
Current view: top level - tests - tld_test_tld_names.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 86 139 61.9 %
Date: 2018-08-28 01:54:14 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* TLD library -- test the TLD interface against the Public Suffix List
       2             :  * Copyright (c) 2011-2018  Made to Order Software Corp.  All Rights Reserved
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the
       6             :  * "Software"), to deal in the Software without restriction, including
       7             :  * without limitation the rights to use, copy, modify, merge, publish,
       8             :  * distribute, sublicense, and/or sell copies of the Software, and to
       9             :  * permit persons to whom the Software is furnished to do so, subject to
      10             :  * the following conditions:
      11             :  *
      12             :  * The above copyright notice and this permission notice shall be included
      13             :  * in all copies or substantial portions of the Software.
      14             :  *
      15             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      16             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      17             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      18             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      19             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      20             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      21             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      22             :  */
      23             : 
      24             : /** \file
      25             :  * \brief Test the tld_domain_to_lowercase() function.
      26             :  *
      27             :  * This file implements various test to verify that the
      28             :  * tld() function works as expected with valid and
      29             :  * invalid names.
      30             :  */
      31             : 
      32             : // Qt headers make use of long long which is not considered a valid type
      33             : #pragma GCC diagnostic ignored "-Wlong-long"
      34             : 
      35             : #include "libtld/tld.h"
      36             : #include <string>
      37             : #include <vector>
      38             : #include <stdlib.h>
      39             : #include <stdio.h>
      40             : #include <boost/algorithm/string.hpp>
      41             : #include <QtCore/QString>
      42             : 
      43             : 
      44             : 
      45             : int err_count = 0;
      46             : int verbose = 0;
      47             : 
      48             : /*
      49             :  * This test calls the tld() function with all the TLDs as defined
      50             :  * by Mozilla to determine whether we are up to date.
      51             :  *
      52             :  * extern enum tld_result tld(const char *uri, struct tld_info *info);
      53             :  */
      54             : 
      55       58629 : struct tld_t
      56             : {
      57             :     std::string     f_name = std::string();
      58             :     int             f_line = 0;
      59             : };
      60             : typedef std::vector<tld_t> string_vector_t;
      61           1 : string_vector_t tlds;
      62             : 
      63             : 
      64             : /** \brief Encode a URL.
      65             :  *
      66             :  * This function transforms the characters in a valid URI string.
      67             :  */
      68       17118 : QString tld_encode(const QString& tld, int& level)
      69             : {
      70       17118 :     QString result;
      71       17118 :     level = 0;
      72             : 
      73       34236 :     QByteArray utf8 = tld.toUtf8();
      74       17118 :     int max(utf8.length());
      75       17118 :     const char *p = utf8.data();
      76      748534 :     for(int l = 0; l < max; ++l)
      77             :     {
      78      731416 :         char c(p[l]);
      79      731416 :         if(static_cast<unsigned char>(c) < 0x20)
      80             :         {
      81           0 :             fprintf(stderr, "error: controls characters (^%c) are not allowed in TLDs (%s).\n", c, p);
      82           0 :             exit(1);
      83             :         }
      84      731416 :         if((c >= 'A' && c <= 'Z')
      85      731416 :         || (c >= 'a' && c <= 'z')
      86      154457 :         || (c >= '0' && c <= '9')
      87      153759 :         || c == '.' || c == '-')
      88             :         {
      89             :             // these are accepted as is; note that we already checked the
      90             :             // validty of the data w
      91      726146 :             if(c == '.')
      92             :             {
      93       35728 :                 ++level;
      94             :             }
      95      726146 :             result += c;
      96             :         }
      97             :         else
      98             :         {
      99             :             // add/remove as appropriate
     100        5270 :             if(c == '/' || c == ':' || c == '&')
     101             :             {
     102           0 :                 fprintf(stderr, "error: character (^%c) is not allowed in TLDs.\n", c);
     103           0 :                 exit(1);
     104             :             }
     105        5270 :             result += '%';
     106       10540 :             QString v(QString("%1").arg(c & 255, 2, 16, QLatin1Char('0')));
     107        5270 :             result += v[0];
     108        5270 :             result += v[1];
     109             :         }
     110             :     }
     111             :     // at this time the maximum level we declared is 4 but there are cases
     112             :     // where countries defined 5 levels (which is definitively crazy!)
     113             :     // there is also one Amazon server using 6 levels
     114       17118 :     if(level < 0 || level > 6)
     115             :     {
     116           0 :         fprintf(stderr, "error: level out of range (%d) in \"%s\"; if larger than the maximum limit, you may want to increase the limit.\n", level, p);
     117           0 :         exit(1);
     118             :     }
     119             : 
     120       34236 :     return result;
     121             : }
     122             : 
     123             : 
     124             : /*
     125             :  * The function reads the public_suffix_list.dat file in memory.
     126             :  *
     127             :  * We call exit(1) if we find an error while reading the data.
     128             :  */
     129           1 : void test_load()
     130             : {
     131           1 :     FILE *f = fopen("public_suffix_list.dat", "r");
     132           1 :     if(f == NULL)
     133             :     {
     134           0 :         fprintf(stderr, "error: could not open the \"public_suffix_list.dat\" file; did you start the test in the source directory?\n");
     135           0 :         exit(1);
     136             :     }
     137             :     char buf[256];
     138           1 :     buf[sizeof(buf) -1] = '\0';
     139           1 :     int line(0);
     140       25327 :     while(fgets(buf, sizeof(buf) - 1, f) != NULL)
     141             :     {
     142       12663 :         ++line;
     143       12663 :         int const l = strlen(buf);
     144       12663 :         if(l == sizeof(buf) - 1)
     145             :         {
     146             :             // the fgets() failed in this case so forget it
     147           0 :             fprintf(stderr, "public_suffix_list.dat:%d:error: line too long.\n", line);
     148           0 :             ++err_count;
     149             :         }
     150             :         else
     151             :         {
     152       25326 :             std::string s(buf);
     153       12663 :             boost::algorithm::trim(s);
     154       12663 :             if(s.length() == 1)
     155             :             {
     156             :                 // all TLDs are at least 2 characters
     157           0 :                 fprintf(stderr, "public_suffix_list.dat:%d:error: a TLD must be at least two characters.\n", line);
     158           0 :                 ++err_count;
     159             :             }
     160       12663 :             else if(s.length() > 1 && s[0] != '/' && s[1] != '/')
     161             :             {
     162             :                 // this is not a comment and not an empty line, that's a TLD
     163       17242 :                 tld_t t;
     164        8621 :                 t.f_name = s;
     165        8621 :                 t.f_line = line;
     166        8621 :                 tlds.push_back(t);
     167             : //printf("found [%s]\n", s.c_str());
     168             :             }
     169             :         }
     170             :     }
     171           1 :     fclose(f);
     172           1 :     if(verbose)
     173             :     {
     174           0 :         printf("Found %d TLDs in the input file.\n", static_cast<int>(tlds.size()));
     175             :     }
     176           1 : }
     177             : 
     178             : 
     179             : /*
     180             :  * This test checks out URIs that end with an invalid TLD. This is
     181             :  * expected to return an error every single time.
     182             :  */
     183           1 : void test_tlds()
     184             : {
     185        8622 :     for(string_vector_t::const_iterator it(tlds.begin()); it != tlds.end(); ++it)
     186             :     {
     187             :         tld_info info;
     188             : 
     189             :         // note: it is possible for the input to have an asterisk (*) anywhere
     190             :         //       in the name, although at this time it only appears at the
     191             :         //       start and we just handle it as a special case here
     192             :         //
     193       17242 :         if(it->f_name.at(0) == '*'
     194        8621 :         && it->f_name.at(1) == '.')
     195             :         {
     196             :             // as is (well, without the '*'), a '*.tld' must return INVALID
     197             :             // and status UNUSED
     198             :             //
     199         108 :             std::string base_tld(it->f_name.substr(2));
     200          54 :             if(base_tld.find('.') == std::string::npos)
     201             :             {
     202             :                 // at least one '.', however for one such as '*.example.com'
     203             :                 // we just want the 'example.com' part, no extra '.',
     204             :                 // otherwise the test itself would fail.
     205             :                 //
     206          11 :                 base_tld = "." + base_tld;
     207             :             }
     208          54 :             tld_result r = tld(base_tld.c_str(), &info);
     209          54 :             if(r != TLD_RESULT_INVALID)
     210             :             {
     211             :                 // we're good if invalid since that's what we expect in this case
     212             :                 // any other result is an error
     213           0 :                 fprintf(stderr, "error: tld(\"%s\", &info) for \"%s\" expected %d, got %d instead.\n",
     214             :                             base_tld.c_str(),
     215           0 :                             it->f_name.c_str(),
     216             :                             TLD_RESULT_INVALID,
     217           0 :                             r);
     218           0 :                 ++err_count;
     219             :             }
     220             : 
     221             :             // then try with a sub-name, in most cases it is invalid
     222             :             // although it can be success (it depends on whether the
     223             :             // '*' has a few specific cases or none at all)
     224             :             //
     225         108 :             std::string url("we-want-to-test-just-one-domain-name");
     226          54 :             url += it->f_name.substr(1);
     227          54 :             r = tld(url.c_str(), &info);
     228          54 :             if(r == TLD_RESULT_SUCCESS)
     229             :             {
     230             :                 // if it worked then we have a problem
     231             :                 //
     232           0 :                 fprintf(stderr,
     233             :                         "error: tld(\"%s\", &info) accepted when 2nd or 3rd level names are not accepted by public_suffix_list.dat.\n",
     234           0 :                         url.c_str());
     235           0 :                 ++err_count;
     236             :             }
     237          54 :             else if(r != TLD_RESULT_INVALID)
     238             :             {
     239             :                 // we're good if invalid since that's what we expect in this case
     240             :                 // any other result is an error
     241           0 :                 fprintf(stderr, "error: tld(\"%s\", &info) for \"%s\" failed with %d.\n",
     242           0 :                             url.c_str(), it->f_name.c_str(), r);
     243           0 :                 ++err_count;
     244             :             }
     245             :         }
     246        8567 :         else if(it->f_name.at(0) == '!')
     247             :         {
     248          16 :             std::string url;//("we-want-to-test-just-one-domain-name.");
     249           8 :             url += it->f_name.substr(1);
     250           8 :             tld_result r = tld(url.c_str(), &info);
     251           8 :             if(r != TLD_RESULT_SUCCESS)
     252             :             {
     253             :                 // if it worked then we have a problem
     254           0 :                 fprintf(stderr, "error: tld(\"%s\", &info) = %d failed with an exception that should have been accepted.\n",
     255           0 :                         it->f_name.c_str(), r);
     256           0 :                 ++err_count;
     257             :             }
     258             :         }
     259        8559 :         else if(it->f_name.at(0) != '!')
     260             :         {
     261       17118 :             std::string url("www.this-is-a-long-domain-name-that-should-not-make-it-in-a-tld.");
     262        8559 :             url += it->f_name;
     263             :             int level;
     264       17118 :             QString utf16(QString::fromUtf8(url.c_str()));
     265       17118 :             QString u(tld_encode(utf16, level));
     266       17118 :             QByteArray uri(u.toUtf8());
     267        8559 :             tld_result r = tld(uri.data(), &info);
     268        8559 :             if(r == TLD_RESULT_SUCCESS || r == TLD_RESULT_INVALID)
     269             :             {
     270             :                 // it succeeded, but is it the right length?
     271        8559 :                 utf16 = QString::fromUtf8(it->f_name.c_str());
     272        8559 :                 u = tld_encode(utf16, level);
     273        8559 :                 if(strlen(info.f_tld) != static_cast<size_t>(u.size() + 1))
     274             :                 {
     275           0 :                     fprintf(stderr, "error:%d: tld(\"%s\", &info) length mismatch (\"%s\", %d/%d).\n",
     276           0 :                             it->f_line,
     277             :                             uri.data(),
     278             :                             info.f_tld,
     279           0 :                             static_cast<int>(strlen(info.f_tld)),
     280           0 :                             static_cast<int>((u.size() + 1)));
     281             : // s3-website.ap-northeast-2.amazonaws.com
     282           0 : QString s(QString::fromUtf8(it->f_name.c_str()));
     283           0 : fprintf(stderr, "%d> %s [%s] {%s} -> %d ",
     284             :         r,
     285           0 :         it->f_name.c_str(),
     286           0 :         u.toUtf8().data(),
     287             :         info.f_tld,
     288           0 :         s.length());
     289           0 : for(int i(0); i < s.length(); ++i) {
     290           0 : fprintf(stderr, "&#x%04X;", s.at(i).unicode());
     291             : }
     292           0 : fprintf(stderr, "\n");
     293           0 :                     ++err_count;
     294        8559 :                 }
     295             :             }
     296             :             else
     297             :             {
     298             :                 //fprintf(stderr, "error: tld(\"%s\", &info) failed.\n", it->f_name.c_str());
     299           0 : QString s(QString::fromUtf8(it->f_name.c_str()));
     300           0 : printf("error:%d: tld(\"%s\", &info) failed with %d [%s] -> %d ",
     301           0 :         it->f_line,
     302           0 :         it->f_name.c_str(),
     303             :         r,
     304           0 :         u.toUtf8().data(),
     305           0 :         s.length());
     306           0 : for(int i(0); i < s.length(); ++i) {
     307           0 : printf("&#x%04X;", s.at(i).unicode());
     308             : }
     309           0 : printf("\n");
     310           0 :                 ++err_count;
     311             :             }
     312             :         }
     313             :     }
     314           1 : }
     315             : 
     316             : 
     317             : 
     318             : 
     319           1 : int main(int argc, char *argv[])
     320             : {
     321           1 :     printf("testing tld names version %s\n", tld_version());
     322             : 
     323           1 :     if(argc > 1)
     324             :     {
     325           0 :         if(strcmp(argv[1], "-v") == 0)
     326             :         {
     327           0 :             verbose = 1;
     328             :         }
     329             :     }
     330             : 
     331             :     /* call all the tests, one by one
     332             :      * failures are "recorded" in the err_count global variable
     333             :      * and the process stops with an error message and exit(1)
     334             :      * if err_count is not zero.
     335             :      */
     336           1 :     test_load();
     337             : 
     338           1 :     if(err_count == 0)
     339             :     {
     340           1 :         test_tlds();
     341             :     }
     342             : 
     343           1 :     if(err_count || verbose)
     344             :     {
     345           0 :         fprintf(stderr, "%d error%s occured.\n",
     346           0 :                     err_count, err_count != 1 ? "s" : "");
     347             :     }
     348           1 :     exit(err_count ? 1 : 0);
     349           3 : }
     350             : 
     351             : /* vim: ts=4 sw=4 et
     352             :  */

Generated by: LCOV version 1.12