LCOV - code coverage report
Current view: top level - tests - tld_test_tld_names.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 81 116 69.8 %
Date: 2015-11-01 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* TLD library -- test the TLD interface against the Mozilla effective TLD names
       2             :  * Copyright (C) 2011-2015  Made to Order Software Corp.
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the
       6             :  * "Software"), to deal in the Software without restriction, including
       7             :  * without limitation the rights to use, copy, modify, merge, publish,
       8             :  * distribute, sublicense, and/or sell copies of the Software, and to
       9             :  * permit persons to whom the Software is furnished to do so, subject to
      10             :  * the following conditions:
      11             :  *
      12             :  * The above copyright notice and this permission notice shall be included
      13             :  * in all copies or substantial portions of the Software.
      14             :  *
      15             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      16             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      17             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      18             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      19             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      20             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      21             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      22             :  */
      23             : 
      24             : /** \file
      25             :  * \brief Test the tld_domain_to_lowercase() function.
      26             :  *
      27             :  * This file implements various test to verify that the
      28             :  * tld() function works as expected with valid and
      29             :  * invalid names.
      30             :  */
      31             : 
      32             : // Qt headers make use of long long which is not considered a valid type
      33             : #pragma GCC diagnostic ignored "-Wlong-long"
      34             : 
      35             : #include "libtld/tld.h"
      36             : #include <string>
      37             : #include <vector>
      38             : #include <stdlib.h>
      39             : #include <stdio.h>
      40             : #include <boost/algorithm/string.hpp>
      41             : #include <QtCore/QString>
      42             : 
      43             : 
      44             : 
      45             : int err_count = 0;
      46             : int verbose = 0;
      47             : 
      48             : /*
      49             :  * This test calls the tld() function with all the TLDs as defined
      50             :  * by Mozilla to determine whether we are up to date.
      51             :  *
      52             :  * extern enum tld_result tld(const char *uri, struct tld_info *info);
      53             :  */
      54             : 
      55             : typedef std::vector<std::string> string_vector_t;
      56           1 : string_vector_t tlds;
      57             : 
      58             : 
      59             : /** \brief Encode a URL.
      60             :  *
      61             :  * This function transforms the characters in a valid URI string.
      62             :  */
      63       15092 : QString tld_encode(const QString& tld, int& level)
      64             : {
      65       15092 :     QString result;
      66       15092 :     level = 0;
      67             : 
      68       30184 :     QByteArray utf8 = tld.toUtf8();
      69       15092 :     int max(utf8.length());
      70       15092 :     const char *p = utf8.data();
      71      659010 :     for(int l = 0; l < max; ++l)
      72             :     {
      73      643918 :         char c(p[l]);
      74      643918 :         if(static_cast<unsigned char>(c) < 0x20)
      75             :         {
      76           0 :             fprintf(stderr, "error: controls characters (^%c) are not allowed in TLDs (%s).\n", c, p);
      77           0 :             exit(1);
      78             :         }
      79      643918 :         if((c >= 'A' && c <= 'Z')
      80      643918 :         || (c >= 'a' && c <= 'z')
      81      135576 :         || (c >= '0' && c <= '9')
      82      135184 :         || c == '.' || c == '-')
      83             :         {
      84             :             // these are accepted as is; note that we already checked the
      85             :             // validty of the data w
      86      639580 :             if(c == '.')
      87             :             {
      88       31730 :                 ++level;
      89             :             }
      90      639580 :             result += c;
      91             :         }
      92             :         else
      93             :         {
      94             :             // add/remove as appropriate
      95        4338 :             if(c == '/' || c == ':' || c == '&')
      96             :             {
      97           0 :                 fprintf(stderr, "error: character (^%c) is not allowed in TLDs.\n", c);
      98           0 :                 exit(1);
      99             :             }
     100        4338 :             result += '%';
     101        4338 :             QString v(QString("%1").arg(c & 255, 2, 16, QLatin1Char('0')));
     102        4338 :             result += v[0];
     103        4338 :             result += v[1];
     104             :         }
     105             :     }
     106             :     // at this time the maximum level we declared is 4 but there are cases
     107             :     // where countries defined 5 levels (which is definitively crazy!)
     108             :     // there is also one Amazon server using 6 levels
     109       15092 :     if(level < 0 || level > 6)
     110             :     {
     111           0 :         fprintf(stderr, "error: level out of range (%d) in \"%s\"; if larger than the maximum limit, you may want to increase the limit.\n", level, p);
     112           0 :         exit(1);
     113             :     }
     114             : 
     115       30184 :     return result;
     116             : }
     117             : 
     118             : 
     119             : /*
     120             :  * The function reads the effective_tld_names.dat file in memory.
     121             :  *
     122             :  * We call exit(1) if we find an error while reading the data.
     123             :  */
     124           1 : void test_load()
     125             : {
     126           1 :     FILE *f = fopen("effective_tld_names.dat", "r");
     127           1 :     if(f == NULL)
     128             :     {
     129           0 :         fprintf(stderr, "error: could not open the \"effective_tld_names.dat\" file; did you start the test in the source directory?\n");
     130           0 :         exit(1);
     131             :     }
     132             :     char buf[256];
     133           1 :     buf[sizeof(buf) -1] = '\0';
     134           1 :     int line(0);
     135       10653 :     while(fgets(buf, sizeof(buf) - 1, f) != NULL)
     136             :     {
     137       10651 :         ++line;
     138       10651 :         int l = strlen(buf);
     139       10651 :         if(l == sizeof(buf) - 1)
     140             :         {
     141             :             // the fgets() failed in this case so forget it
     142           0 :             fprintf(stderr, "effective_tld_names.data:%d:error: line too long.\n", line);
     143           0 :             ++err_count;
     144             :         }
     145             :         else
     146             :         {
     147       10651 :             std::string s(buf);
     148       10651 :             boost::algorithm::trim(s);
     149       10651 :             if(s.length() == 1)
     150             :             {
     151             :                 // all TLDs are at least 2 characters
     152           0 :                 fprintf(stderr, "effective_tld_names.data:%d:error: a TLD must be at least two characters.\n", line);
     153           0 :                 ++err_count;
     154             :             }
     155       10651 :             else if(s.length() > 1 && s[0] != '/' && s[1] != '/')
     156             :             {
     157             :                 // this is not a comment and not an empty line, that's a TLD
     158        7583 :                 tlds.push_back(s);
     159             : //printf("found [%s]\n", s.c_str());
     160       10651 :             }
     161             :         }
     162             :     }
     163           1 :     fclose(f);
     164           1 :     if(verbose)
     165             :     {
     166           0 :         printf("Found %d TLDs in the input file.\n", static_cast<int>(tlds.size()));
     167             :     }
     168           1 : }
     169             : 
     170             : 
     171             : /*
     172             :  * This test checks out URIs that end with an invalid TLD. This is
     173             :  * expected to return an error every single time.
     174             :  */
     175           1 : void test_tlds()
     176             : {
     177        7584 :     for(string_vector_t::const_iterator it(tlds.begin()); it != tlds.end(); ++it)
     178             :     {
     179             :         tld_info info;
     180        7583 :         if(it->at(0) == '*')
     181             :         {
     182          28 :             std::string url("we-want-to-test-just-one-domain-name");
     183          28 :             url += it->substr(1);
     184          28 :             tld_result r = tld(url.c_str(), &info);
     185          28 :             if(r == TLD_RESULT_SUCCESS)
     186             :             {
     187             :                 // if it worked then we have a problem
     188             :                 fprintf(stderr, "error: tld(\"%s\", &info) accepted when 2nd level names are not accepted by effective_tld_names.dat.\n",
     189           0 :                         url.c_str());
     190           0 :                 ++err_count;
     191             :             }
     192          28 :             else if(r != TLD_RESULT_INVALID)
     193             :             {
     194             :                 // we're good if invalid since that's what we expect in this case
     195             :                 // any other result is an error
     196           0 :                 fprintf(stderr, "error: tld(\"%s\", &info) failed.\n", it->c_str());
     197           0 :                 ++err_count;
     198          28 :             }
     199             :         }
     200        7555 :         else if(it->at(0) == '!')
     201             :         {
     202           9 :             if(*it != "!nel.uk")
     203             :             {
     204           9 :                 std::string url;//("we-want-to-test-just-one-domain-name.");
     205           9 :                 url += it->substr(1);
     206           9 :                 tld_result r = tld(url.c_str(), &info);
     207           9 :                 if(r != TLD_RESULT_SUCCESS)
     208             :                 {
     209             :                     // if it worked then we have a problem
     210             :                     fprintf(stderr, "error: tld(\"%s\", &info) = %d failed with an exception that should have been accepted.\n",
     211           0 :                             it->c_str(), r);
     212           0 :                     ++err_count;
     213           9 :                 }
     214             :             }
     215             :         }
     216        7546 :         else if(it->at(0) != '!')
     217             :         {
     218        7546 :             std::string url("www.this-is-a-long-domain-name-that-should-not-make-it-in-a-tld.");
     219        7546 :             url += *it;
     220             :             int level;
     221       15092 :             QString utf16(QString::fromUtf8(url.c_str()));
     222       15092 :             QString u(tld_encode(utf16, level));
     223       15092 :             QByteArray uri(u.toUtf8());
     224        7546 :             tld_result r = tld(uri.data(), &info);
     225        7546 :             if(r == TLD_RESULT_SUCCESS || r == TLD_RESULT_INVALID)
     226             :             {
     227             :                 // it succeeded, but is it the right length?
     228        7546 :                 utf16 = QString::fromUtf8(it->c_str());
     229        7546 :                 u = tld_encode(utf16, level);
     230        7546 :                 if(strlen(info.f_tld) != static_cast<size_t>(u.size() + 1))
     231             :                 {
     232             :                     fprintf(stderr, "error: tld(\"%s\", &info) length mismatch (\"%s\", %d/%d).\n",
     233           0 :                             uri.data(), info.f_tld, static_cast<int>(strlen(info.f_tld)), static_cast<int>((u.size() + 1)));
     234           0 : QString s(QString::fromUtf8(it->c_str()));
     235           0 : fprintf(stderr, "%d> %s [%s] -> %d ", r, it->c_str(), u.toUtf8().data(), s.length());
     236           0 : for(int i(0); i < s.length(); ++i) {
     237           0 : fprintf(stderr, "&#x%04X;", s.at(i).unicode());
     238             : }
     239           0 : fprintf(stderr, "\n");
     240           0 :                     ++err_count;
     241        7546 :                 }
     242             :             }
     243             :             else
     244             :             {
     245             :                 //fprintf(stderr, "error: tld(\"%s\", &info) failed.\n", it->c_str());
     246           0 : QString s(QString::fromUtf8(it->c_str()));
     247           0 : printf("error: tld(\"%s\", &info) failed with %d [%s] -> %d ", it->c_str(), r, u.toUtf8().data(), s.length());
     248           0 : for(int i(0); i < s.length(); ++i) {
     249           0 : printf("&#x%04X;", s.at(i).unicode());
     250             : }
     251           0 : printf("\n");
     252           0 :                 ++err_count;
     253        7546 :             }
     254             :         }
     255             :     }
     256           1 : }
     257             : 
     258             : 
     259             : 
     260             : 
     261           1 : int main(int argc, char *argv[])
     262             : {
     263           1 :     printf("testing tld names version %s\n", tld_version());
     264             : 
     265           1 :     if(argc > 1)
     266             :     {
     267           0 :         if(strcmp(argv[1], "-v") == 0)
     268             :         {
     269           0 :             verbose = 1;
     270             :         }
     271             :     }
     272             : 
     273             :     /* call all the tests, one by one
     274             :      * failures are "recorded" in the err_count global variable
     275             :      * and the process stops with an error message and exit(1)
     276             :      * if err_count is not zero.
     277             :      */
     278           1 :     test_load();
     279             : 
     280           1 :     if(err_count == 0)
     281             :     {
     282           1 :         test_tlds();
     283             :     }
     284             : 
     285           1 :     if(err_count || verbose)
     286             :     {
     287             :         fprintf(stderr, "%d error%s occured.\n",
     288           0 :                     err_count, err_count != 1 ? "s" : "");
     289             :     }
     290           1 :     exit(err_count ? 1 : 0);
     291           3 : }
     292             : 
     293             : /* vim: ts=4 sw=4 et
     294             :  */

Generated by: LCOV version 1.10