LCOV - code coverage report
Current view: top level - libtld - tld_file.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 156 220 70.9 %
Date: 2022-02-19 13:28:04 Functions: 12 13 92.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* TLD library -- TLD, domain name, and sub-domain extraction
       2             :  * Copyright (c) 2011-2022  Made to Order Software Corp.  All Rights Reserved
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the
       6             :  * "Software"), to deal in the Software without restriction, including
       7             :  * without limitation the rights to use, copy, modify, merge, publish,
       8             :  * distribute, sublicense, and/or sell copies of the Software, and to
       9             :  * permit persons to whom the Software is furnished to do so, subject to
      10             :  * the following conditions:
      11             :  *
      12             :  * The above copyright notice and this permission notice shall be included
      13             :  * in all copies or substantial portions of the Software.
      14             :  *
      15             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      16             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      17             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      18             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      19             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      20             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      21             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      22             :  */
      23             : 
      24             : /** \file
      25             :  * \brief Implementation of the TLD file data handling.
      26             :  *
      27             :  * This file handles the loading of the TLDs from an RIFF file.
      28             :  */
      29             : 
      30             : // self
      31             : //
      32             : #include    "libtld/tld_file.h"
      33             : #include    "libtld/tld.h"
      34             : #include    "libtld/tld_data.h"
      35             : 
      36             : 
      37             : // C++ lib
      38             : //
      39             : #include    <fstream>
      40             : #include    <iostream>
      41             : #include    <sstream>
      42             : 
      43             : 
      44             : // C lib
      45             : //
      46             : #include    <limits.h>
      47             : #include    <string.h>
      48             : 
      49             : 
      50             : 
      51         232 : tld_file_error tld_file_load_stream(tld_file ** file, std::istream & in)
      52             : {
      53         232 :     tld_magic magic;
      54         232 :     in.read(reinterpret_cast<char *>(&magic), sizeof(magic));
      55         464 :     if(!in
      56         232 :     || in.gcount() != sizeof(magic))
      57             :     {
      58           0 :         return TLD_FILE_ERROR_CANNOT_READ_FILE;
      59             :     }
      60             : 
      61         232 :     if(magic.f_riff != TLD_MAGIC
      62         232 :     || magic.f_type != TLD_TLDS)
      63             :     {
      64           0 :         return TLD_FILE_ERROR_UNRECOGNIZED_FILE;
      65             :     }
      66         232 :     if(magic.f_size < sizeof(tld_header) + 4
      67         232 :     || magic.f_size > 1024 * 1024)
      68             :     {
      69           0 :         return TLD_FILE_ERROR_INVALID_FILE_SIZE;
      70             :     }
      71         232 :     uint32_t size(magic.f_size - sizeof(uint32_t));
      72             : 
      73             :     // we already read the type so we can skip that one in the following
      74             :     // memory buffer & read
      75             :     //
      76         232 :     *file = reinterpret_cast<tld_file *>(malloc(sizeof(tld_file) + size));
      77         232 :     if(*file == nullptr)
      78             :     {
      79           0 :         return TLD_FILE_ERROR_OUT_OF_MEMORY;
      80             :     }
      81             : 
      82             :     class auto_free
      83             :     {
      84             :     public:
      85         232 :         auto_free(tld_file ** ptr)
      86         232 :             : f_ptr(ptr)
      87             :         {
      88         232 :         }
      89             : 
      90             :         auto_free(auto_free const &) = delete;
      91             : 
      92         232 :         ~auto_free()
      93         232 :         {
      94         232 :             if(f_ptr != nullptr
      95           0 :             && *f_ptr != nullptr)
      96             :             {
      97           0 :                 free(*f_ptr);
      98           0 :                 *f_ptr = nullptr;
      99             :             }
     100         232 :         }
     101             : 
     102             :         auto_free & operator = (auto_free const &) = delete;
     103             : 
     104         232 :         void keep()
     105             :         {
     106         232 :             f_ptr = nullptr;
     107         232 :         }
     108             : 
     109             :     private:
     110             :         tld_file ** f_ptr = nullptr;
     111             :     };
     112         464 :     auto_free safe_ptr(file);
     113             : 
     114         232 :     memset(*file, 0, sizeof(tld_file));
     115             : 
     116         232 :     tld_hunk * hunk(reinterpret_cast<tld_hunk *>(*file + 1));
     117             : 
     118         232 :     in.read(reinterpret_cast<char *>(hunk), size);
     119         464 :     if(!in
     120         232 :     || in.gcount() != size) // this doesn't fail if the file is larger...
     121             :     {
     122           0 :         return TLD_FILE_ERROR_CANNOT_READ_FILE;
     123             :     }
     124             : 
     125             :     for(;;)
     126             :     {
     127        3016 :         if(size == 0)
     128             :         {
     129         232 :             break;
     130             :         }
     131             : 
     132        1392 :         if(sizeof(tld_hunk) > size)
     133             :         {
     134           0 :             return TLD_FILE_ERROR_INVALID_HUNK_SIZE;
     135             :         }
     136        1392 :         size -= sizeof(tld_hunk);
     137             : 
     138        1392 :         if(hunk->f_size > size)
     139             :         {
     140           0 :             return TLD_FILE_ERROR_INVALID_HUNK_SIZE;
     141             :         }
     142        1392 :         size -= hunk->f_size;
     143             : 
     144        1392 :         switch(hunk->f_name)
     145             :         {
     146         232 :         case TLD_HEADER:
     147         232 :             if(sizeof(tld_header) != hunk->f_size)
     148             :             {
     149           0 :                 return TLD_FILE_ERROR_INVALID_STRUCTURE_SIZE;
     150             :             }
     151         232 :             (*file)->f_header = reinterpret_cast<tld_header *>(hunk + 1);
     152         232 :             if((*file)->f_header->f_version_major != TLD_FILE_VERSION_MAJOR
     153         232 :             || (*file)->f_header->f_version_minor != TLD_FILE_VERSION_MINOR)
     154             :             {
     155           0 :                 return TLD_FILE_ERROR_UNSUPPORTED_VERSION;
     156             :             }
     157         232 :             break;
     158             : 
     159         232 :         case TLD_DESCRIPTIONS:
     160         232 :             (*file)->f_descriptions_count = hunk->f_size / sizeof(tld_description);
     161         232 :             if((*file)->f_descriptions_count * sizeof(tld_description) != hunk->f_size)
     162             :             {
     163           0 :                 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     164             :             }
     165         232 :             (*file)->f_descriptions = reinterpret_cast<tld_description *>(hunk + 1);
     166         232 :             break;
     167             : 
     168         232 :         case TLD_TAGS:
     169             :             // the tags ar ea bit peculiar in that the compression happens
     170             :             // by uin32_t and not by tld_tags so the number of tags cannot
     171             :             // be inferred by the hunk size
     172             :             //
     173         232 :             (*file)->f_tags_size = hunk->f_size / sizeof(uint32_t);
     174         232 :             if((*file)->f_tags_size * sizeof(uint32_t) != hunk->f_size)
     175             :             {
     176           0 :                 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     177             :             }
     178         232 :             (*file)->f_tags = reinterpret_cast<uint32_t *>(hunk + 1);
     179         232 :             break;
     180             : 
     181         232 :         case TLD_STRING_OFFSETS:
     182         232 :             if((*file)->f_strings_count == 0)
     183             :             {
     184         232 :                 (*file)->f_strings_count = hunk->f_size / sizeof(tld_string_offset);
     185         232 :                 if((*file)->f_strings_count == 0)
     186             :                 {
     187           0 :                     return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     188             :                 }
     189             :             }
     190         232 :             if((*file)->f_strings_count * sizeof(tld_string_offset) != hunk->f_size)
     191             :             {
     192           0 :                 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     193             :             }
     194         232 :             (*file)->f_string_offsets = reinterpret_cast<tld_string_offset *>(hunk + 1);
     195         232 :             break;
     196             : 
     197         232 :         case TLD_STRING_LENGTHS:
     198         232 :             if((*file)->f_strings_count == 0)
     199             :             {
     200           0 :                 (*file)->f_strings_count = hunk->f_size / sizeof(tld_string_length);
     201           0 :                 if((*file)->f_strings_count == 0)
     202             :                 {
     203           0 :                     return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     204             :                 }
     205             :             }
     206         232 :             if((*file)->f_strings_count * sizeof(tld_string_length) != hunk->f_size)
     207             :             {
     208           0 :                 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     209             :             }
     210         232 :             (*file)->f_string_lengths = reinterpret_cast<tld_string_length *>(hunk + 1);
     211         232 :             break;
     212             : 
     213         232 :         case TLD_STRINGS:
     214         232 :             if(hunk->f_size == 0)
     215             :             {
     216           0 :                 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     217             :             }
     218         232 :             (*file)->f_strings = reinterpret_cast<char *>(hunk + 1);
     219         232 :             (*file)->f_strings_end = reinterpret_cast<char *>(hunk + 1 + hunk->f_size);
     220         232 :             break;
     221             : 
     222           0 :         default:
     223             :             // just skip unrecognized hunks
     224           0 :             break;
     225             : 
     226             :         }
     227             : 
     228        1392 :         hunk = reinterpret_cast<tld_hunk *>(reinterpret_cast<char *>(hunk + 1) + hunk->f_size);
     229             :     }
     230             : 
     231             :     // verify we got all the required tables
     232             :     //
     233         232 :     if((*file)->f_header == nullptr
     234         232 :     || (*file)->f_descriptions == nullptr
     235         232 :     || (*file)->f_tags == nullptr
     236         232 :     || (*file)->f_string_offsets == nullptr
     237         232 :     || (*file)->f_string_lengths == nullptr
     238         232 :     || (*file)->f_strings == nullptr)
     239             :     {
     240           0 :         return TLD_FILE_ERROR_MISSING_HUNK;
     241             :     }
     242             : 
     243             :     // it worked, do no lose the allocated pointer
     244             :     //
     245         232 :     safe_ptr.keep();
     246             : 
     247         232 :     return TLD_FILE_ERROR_NONE;
     248             : }
     249             : 
     250             : 
     251             : #ifdef __cplusplus
     252             : extern "C" {
     253             : #endif
     254             : 
     255             : 
     256         463 : enum tld_file_error tld_file_load(char const * filename, tld_file ** file)
     257             : {
     258         463 :     if(file == nullptr
     259         463 :     || filename == nullptr)
     260             :     {
     261           0 :         return TLD_FILE_ERROR_INVALID_POINTER;
     262             :     }
     263         463 :     if(*file != nullptr)
     264             :     {
     265           0 :         return TLD_FILE_ERROR_POINTER_PRESENT;
     266             :     }
     267             : 
     268         926 :     std::ifstream in;
     269         463 :     in.open(filename);
     270         463 :     if(!in.is_open())
     271             :     {
     272         460 :         return TLD_FILE_ERROR_CANNOT_OPEN_FILE;
     273             :     }
     274             : 
     275           3 :     return tld_file_load_stream(file, in);
     276             : }
     277             : 
     278             : 
     279           0 : const char *tld_file_errstr(tld_file_error err)
     280             : {
     281           0 :     switch(err)
     282             :     {
     283           0 :     case TLD_FILE_ERROR_NONE:
     284           0 :         return "No error";
     285             : 
     286           0 :     case TLD_FILE_ERROR_INVALID_POINTER:
     287           0 :         return "Invalid pointer";
     288             : 
     289           0 :     case TLD_FILE_ERROR_POINTER_PRESENT:
     290           0 :         return "Pointer present when it should ne nullptr";
     291             : 
     292           0 :     case TLD_FILE_ERROR_CANNOT_OPEN_FILE:
     293           0 :         return "Cannot open file";
     294             : 
     295           0 :     case TLD_FILE_ERROR_CANNOT_READ_FILE:
     296           0 :         return "I/O error reading file";
     297             : 
     298           0 :     case TLD_FILE_ERROR_UNRECOGNIZED_FILE:
     299           0 :         return "Unrecognized input file";
     300             : 
     301           0 :     case TLD_FILE_ERROR_INVALID_FILE_SIZE:
     302           0 :         return "Invalid file size";
     303             : 
     304           0 :     case TLD_FILE_ERROR_OUT_OF_MEMORY:
     305           0 :         return "Out of memory";
     306             : 
     307           0 :     case TLD_FILE_ERROR_INVALID_HUNK_SIZE:
     308           0 :         return "Invalid hunk size";
     309             : 
     310           0 :     case TLD_FILE_ERROR_INVALID_STRUCTURE_SIZE:
     311           0 :         return "Invalid structure size";
     312             : 
     313           0 :     case TLD_FILE_ERROR_INVALID_ARRAY_SIZE:
     314           0 :         return "Invalid array size";
     315             : 
     316           0 :     case TLD_FILE_ERROR_UNSUPPORTED_VERSION:
     317           0 :         return "Unsupported version";
     318             : 
     319           0 :     case TLD_FILE_ERROR_MISSING_HUNK:
     320           0 :         return "Missing hunk";
     321             : 
     322             :     //default: -- handled below, without a default, we know whether we missed
     323             :     //            some new TLD_FILE_ERROR_... in our cases above.
     324             :     }
     325             : 
     326           0 :     return "Unknown tld_file error number";
     327             : }
     328             : 
     329             : 
     330   507678853 : const tld_description *tld_file_description(tld_file const * file, uint32_t id)
     331             : {
     332   507678853 :     if(id >= file->f_descriptions_count)
     333             :     {
     334           0 :         return nullptr;
     335             :     }
     336   507678853 :     return file->f_descriptions + id;
     337             : }
     338             : 
     339             : 
     340      134560 : const tld_tag *tld_file_tag(tld_file const * file, uint32_t id)
     341             : {
     342      134560 :     if(id + 1 >= file->f_tags_size)
     343             :     {
     344           0 :         return nullptr;
     345             :     }
     346      134560 :     return reinterpret_cast<tld_tag *>(file->f_tags + id);
     347             : }
     348             : 
     349             : 
     350     1587877 : const char *tld_file_string(tld_file const * file, uint32_t id, uint32_t * length)
     351             : {
     352     1587877 :     if(length == nullptr)
     353             :     {
     354           0 :         errno = EINVAL;
     355           0 :         return nullptr;
     356             :     }
     357     1587877 :     *length = 0;
     358             : 
     359     1587877 :     --id;
     360     1587877 :     if(id >= file->f_strings_count)
     361             :     {
     362           0 :         errno = EINVAL;
     363           0 :         return nullptr;
     364             :     }
     365     1587877 :     char * s(file->f_strings + file->f_string_offsets[id].f_string_offset);
     366     1587877 :     uint32_t l(file->f_string_lengths[id].f_string_length);
     367     1587877 :     char * e(s + l);
     368     1587877 :     if(s > file->f_strings_end
     369     1587877 :     || e > file->f_strings_end)
     370             :     {
     371             :         // assuming the file is valid, this should not happen
     372             :         //
     373           0 :         errno = EINVAL;
     374           0 :         return nullptr;
     375             :     }
     376     1587877 :     *length = l;
     377     1587877 :     return s;
     378             : }
     379             : 
     380             : 
     381             : /** \brief Transform a tld_file to a JSON string.
     382             :  *
     383             :  * This function transforms a tld_file in a JSON string which gets returned.
     384             :  * If something goes wrong, then the function may return a nullptr instead.
     385             :  *
     386             :  * The returned strings must be freed by you with the `free()` function.
     387             :  *
     388             :  * \param[in] file  The tld_file to transform to a JSON.
     389             :  *
     390             :  * \return A string with the tld_file JSON or nullptr on error.
     391             :  */
     392           1 : char *tld_file_to_json(tld_file const * file)
     393             : {
     394           1 :     if(file == nullptr
     395           1 :     || file->f_header == nullptr
     396           1 :     || file->f_descriptions == nullptr
     397           1 :     || file->f_tags == nullptr
     398           1 :     || file->f_string_offsets == nullptr
     399           1 :     || file->f_string_lengths == nullptr
     400           1 :     || file->f_strings == nullptr)
     401             :     {
     402           0 :         return nullptr;
     403             :     }
     404             : 
     405           2 :     std::stringstream out;
     406             : 
     407           1 :     out << "{\n";
     408           1 :     out << "\"version\":\"" << static_cast<int>(file->f_header->f_version_major)
     409           1 :                      << '.' << static_cast<int>(file->f_header->f_version_minor) << "\",\n";
     410           1 :     out << "\"created-on\":" << file->f_header->f_created_on << ",\n";
     411           1 :     out << "\"max-level\":" << static_cast<int>(file->f_header->f_tld_max_level) << ",\n";
     412           1 :     out << "\"tld-start-offset\":" << static_cast<int>(file->f_header->f_tld_start_offset) << ",\n";
     413           1 :     out << "\"tld-end-offset\":" << static_cast<int>(file->f_header->f_tld_end_offset) << ",\n";
     414           1 :     out << "\"descriptions\":[\n";
     415       10465 :     for(uint32_t idx(0); idx < file->f_descriptions_count; ++idx)
     416             :     {
     417       10464 :         tld_description const * d(tld_file_description(file, idx));
     418             : 
     419       10464 :         out << (idx == 0 ? "" : ",\n");
     420             : 
     421             :         {
     422       10464 :             uint32_t length(0);
     423       10464 :             char const * tld(tld_file_string(file, d->f_tld, &length));
     424       10464 :             out << "{\"tld\":\"" << std::string(tld, length) << "\"";
     425             :         }
     426             : 
     427       10464 :         out << ",\"status\":\"" << tld_status_to_string(static_cast<tld_status>(d->f_status)) << "\"";
     428             : 
     429       10464 :         if(d->f_exception_apply_to != USHRT_MAX)
     430             :         {
     431          21 :             tld_description const * apply_to(tld_file_description(file, d->f_exception_apply_to));
     432          21 :             uint32_t length(0);
     433          21 :             char const * to_tld(tld_file_string(file, apply_to->f_tld, &length));
     434          21 :             out << ",\"apply-to\":\"" << std::string(to_tld, length) << "\"";
     435             :         }
     436             : 
     437       10464 :         if(d->f_start_offset != USHRT_MAX)
     438             :         {
     439         795 :             out << ",\"start-offset\":" << d->f_start_offset;
     440         795 :             out << ",\"end-offset\":" << d->f_end_offset;
     441             :         }
     442             : 
     443       29911 :         for(uint32_t tidx(0); tidx < d->f_tags_count; ++tidx)
     444             :         {
     445       19447 :             const tld_tag * tag(tld_file_tag(file, d->f_tags + tidx * 2));
     446             :             {
     447       19447 :                 uint32_t length(0);
     448       19447 :                 char const * tag_name(tld_file_string(file, tag->f_tag_name, &length));
     449       38894 :                 out << ",\"" << std::string(tag_name, length)
     450       38894 :                     << "\":\"";
     451             :             }
     452             :             {
     453       19447 :                 uint32_t length(0);
     454       19447 :                 char const * tag_value(tld_file_string(file, tag->f_tag_value, &length));
     455       38894 :                 out << std::string(tag_value, length)
     456       38894 :                     << "\"";
     457             :             }
     458             :         }
     459             : 
     460       10464 :         out << "}";
     461             :     }
     462           1 :     out << "]}\n";
     463             : 
     464           1 :     return strdup(out.str().c_str());
     465             : }
     466             : 
     467             : 
     468         231 : void tld_file_free(tld_file ** file)
     469             : {
     470         231 :     if(file != nullptr
     471         231 :     && *file != nullptr)
     472             :     {
     473           2 :         free(*file);
     474           2 :         *file = nullptr;
     475             :     }
     476         231 : }
     477             : 
     478             : 
     479             : #ifdef __cplusplus
     480         729 : }
     481             : #endif
     482             : 
     483             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13