LCOV - code coverage report
Current view: top level - libtld - tld_file.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 155 217 71.4 %
Date: 2022-01-01 21:23:34 Functions: 12 13 92.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* TLD library -- TLD, domain name, and sub-domain extraction
       2             :  * Copyright (c) 2011-2022  Made to Order Software Corp.  All Rights Reserved
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the
       6             :  * "Software"), to deal in the Software without restriction, including
       7             :  * without limitation the rights to use, copy, modify, merge, publish,
       8             :  * distribute, sublicense, and/or sell copies of the Software, and to
       9             :  * permit persons to whom the Software is furnished to do so, subject to
      10             :  * the following conditions:
      11             :  *
      12             :  * The above copyright notice and this permission notice shall be included
      13             :  * in all copies or substantial portions of the Software.
      14             :  *
      15             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      16             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      17             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      18             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      19             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      20             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      21             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      22             :  */
      23             : 
      24             : /** \file
      25             :  * \brief Implementation of the TLD file data handling.
      26             :  *
      27             :  * This file handles the loading of the TLDs from an RIFF file.
      28             :  */
      29             : 
      30             : // self
      31             : //
      32             : #include    "libtld/tld_file.h"
      33             : #include    "libtld/tld.h"
      34             : #include    "libtld/tld_data.h"
      35             : 
      36             : 
      37             : // C++ lib
      38             : //
      39             : #include    <fstream>
      40             : #include    <iostream>
      41             : #include    <sstream>
      42             : 
      43             : 
      44             : // C lib
      45             : //
      46             : #include    <limits.h>
      47             : #include    <string.h>
      48             : 
      49             : 
      50             : 
      51         228 : tld_file_error tld_file_load_stream(tld_file ** file, std::istream & in)
      52             : {
      53         228 :     tld_magic magic;
      54         228 :     in.read(reinterpret_cast<char *>(&magic), sizeof(magic));
      55         456 :     if(!in
      56         228 :     || in.gcount() != sizeof(magic))
      57             :     {
      58           0 :         return TLD_FILE_ERROR_CANNOT_READ_FILE;
      59             :     }
      60             : 
      61         228 :     if(magic.f_riff != TLD_MAGIC
      62         228 :     || magic.f_type != TLD_TLDS)
      63             :     {
      64           0 :         return TLD_FILE_ERROR_UNRECOGNIZED_FILE;
      65             :     }
      66         228 :     if(magic.f_size < sizeof(tld_header) + 4
      67         228 :     || magic.f_size > 1024 * 1024)
      68             :     {
      69           0 :         return TLD_FILE_ERROR_INVALID_FILE_SIZE;
      70             :     }
      71         228 :     uint32_t size(magic.f_size - sizeof(uint32_t));
      72             : 
      73             :     // we already read the type so we can skip that one in the following
      74             :     // memory buffer & read
      75             :     //
      76         228 :     *file = reinterpret_cast<tld_file *>(malloc(sizeof(tld_file) + size));
      77         228 :     if(*file == nullptr)
      78             :     {
      79           0 :         return TLD_FILE_ERROR_OUT_OF_MEMORY;
      80             :     }
      81             : 
      82             :     class auto_free
      83             :     {
      84             :     public:
      85         228 :         auto_free(tld_file ** ptr)
      86         228 :             : f_ptr(ptr)
      87             :         {
      88         228 :         }
      89             : 
      90             :         auto_free(auto_free const &) = delete;
      91             : 
      92         228 :         ~auto_free()
      93         228 :         {
      94         228 :             if(f_ptr != nullptr
      95           0 :             && *f_ptr != nullptr)
      96             :             {
      97           0 :                 free(*f_ptr);
      98           0 :                 *f_ptr = nullptr;
      99             :             }
     100         228 :         }
     101             : 
     102             :         auto_free & operator = (auto_free const &) = delete;
     103             : 
     104         228 :         void keep()
     105             :         {
     106         228 :             f_ptr = nullptr;
     107         228 :         }
     108             : 
     109             :     private:
     110             :         tld_file ** f_ptr = nullptr;
     111             :     };
     112         456 :     auto_free safe_ptr(file);
     113             : 
     114         228 :     memset(*file, 0, sizeof(tld_file));
     115             : 
     116         228 :     tld_hunk * hunk(reinterpret_cast<tld_hunk *>(*file + 1));
     117             : 
     118         228 :     in.read(reinterpret_cast<char *>(hunk), size);
     119         456 :     if(!in
     120         228 :     || in.gcount() != size) // this doesn't fail if the file is larger...
     121             :     {
     122           0 :         return TLD_FILE_ERROR_CANNOT_READ_FILE;
     123             :     }
     124             : 
     125             :     for(;;)
     126             :     {
     127        2964 :         if(size == 0)
     128             :         {
     129         228 :             break;
     130             :         }
     131             : 
     132        1368 :         if(sizeof(tld_hunk) > size)
     133             :         {
     134           0 :             return TLD_FILE_ERROR_INVALID_HUNK_SIZE;
     135             :         }
     136        1368 :         size -= sizeof(tld_hunk);
     137             : 
     138        1368 :         if(hunk->f_size > size)
     139             :         {
     140           0 :             return TLD_FILE_ERROR_INVALID_HUNK_SIZE;
     141             :         }
     142        1368 :         size -= hunk->f_size;
     143             : 
     144        1368 :         switch(hunk->f_name)
     145             :         {
     146         228 :         case TLD_HEADER:
     147         228 :             if(sizeof(tld_header) != hunk->f_size)
     148             :             {
     149           0 :                 return TLD_FILE_ERROR_INVALID_STRUCTURE_SIZE;
     150             :             }
     151         228 :             (*file)->f_header = reinterpret_cast<tld_header *>(hunk + 1);
     152         228 :             if((*file)->f_header->f_version_major != TLD_FILE_VERSION_MAJOR
     153         228 :             || (*file)->f_header->f_version_minor != TLD_FILE_VERSION_MINOR)
     154             :             {
     155           0 :                 return TLD_FILE_ERROR_UNSUPPORTED_VERSION;
     156             :             }
     157         228 :             break;
     158             : 
     159         228 :         case TLD_DESCRIPTIONS:
     160         228 :             (*file)->f_descriptions_count = hunk->f_size / sizeof(tld_description);
     161         228 :             if((*file)->f_descriptions_count * sizeof(tld_description) != hunk->f_size)
     162             :             {
     163           0 :                 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     164             :             }
     165         228 :             (*file)->f_descriptions = reinterpret_cast<tld_description *>(hunk + 1);
     166         228 :             break;
     167             : 
     168         228 :         case TLD_TAGS:
     169             :             // the tags ar ea bit peculiar in that the compression happens
     170             :             // by uin32_t and not by tld_tags so the number of tags cannot
     171             :             // be inferred by the hunk size
     172             :             //
     173         228 :             (*file)->f_tags_size = hunk->f_size / sizeof(uint32_t);
     174         228 :             if((*file)->f_tags_size * sizeof(uint32_t) != hunk->f_size)
     175             :             {
     176           0 :                 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     177             :             }
     178         228 :             (*file)->f_tags = reinterpret_cast<uint32_t *>(hunk + 1);
     179         228 :             break;
     180             : 
     181         228 :         case TLD_STRING_OFFSETS:
     182         228 :             if((*file)->f_strings_count == 0)
     183             :             {
     184         228 :                 (*file)->f_strings_count = hunk->f_size / sizeof(tld_string_offset);
     185         228 :                 if((*file)->f_strings_count == 0)
     186             :                 {
     187           0 :                     return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     188             :                 }
     189             :             }
     190         228 :             if((*file)->f_strings_count * sizeof(tld_string_offset) != hunk->f_size)
     191             :             {
     192           0 :                 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     193             :             }
     194         228 :             (*file)->f_string_offsets = reinterpret_cast<tld_string_offset *>(hunk + 1);
     195         228 :             break;
     196             : 
     197         228 :         case TLD_STRING_LENGTHS:
     198         228 :             if((*file)->f_strings_count == 0)
     199             :             {
     200           0 :                 (*file)->f_strings_count = hunk->f_size / sizeof(tld_string_length);
     201           0 :                 if((*file)->f_strings_count == 0)
     202             :                 {
     203           0 :                     return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     204             :                 }
     205             :             }
     206         228 :             if((*file)->f_strings_count * sizeof(tld_string_length) != hunk->f_size)
     207             :             {
     208           0 :                 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     209             :             }
     210         228 :             (*file)->f_string_lengths = reinterpret_cast<tld_string_length *>(hunk + 1);
     211         228 :             break;
     212             : 
     213         228 :         case TLD_STRINGS:
     214         228 :             if(hunk->f_size == 0)
     215             :             {
     216           0 :                 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
     217             :             }
     218         228 :             (*file)->f_strings = reinterpret_cast<char *>(hunk + 1);
     219         228 :             (*file)->f_strings_end = reinterpret_cast<char *>(hunk + 1 + hunk->f_size);
     220         228 :             break;
     221             : 
     222           0 :         default:
     223             :             // just skip unrecognized hunks
     224           0 :             break;
     225             : 
     226             :         }
     227             : 
     228        1368 :         hunk = reinterpret_cast<tld_hunk *>(reinterpret_cast<char *>(hunk + 1) + hunk->f_size);
     229             :     }
     230             : 
     231             :     // verify we got all the required tables
     232             :     //
     233         228 :     if((*file)->f_header == nullptr
     234         228 :     || (*file)->f_descriptions == nullptr
     235         228 :     || (*file)->f_tags == nullptr
     236         228 :     || (*file)->f_string_offsets == nullptr
     237         228 :     || (*file)->f_string_lengths == nullptr
     238         228 :     || (*file)->f_strings == nullptr)
     239             :     {
     240           0 :         return TLD_FILE_ERROR_MISSING_HUNK;
     241             :     }
     242             : 
     243             :     // it worked, do no lose the allocated pointer
     244             :     //
     245         228 :     safe_ptr.keep();
     246             : 
     247         228 :     return TLD_FILE_ERROR_NONE;
     248             : }
     249             : 
     250             : 
     251             : #ifdef __cplusplus
     252             : extern "C" {
     253             : #endif
     254             : 
     255             : 
     256         455 : enum tld_file_error tld_file_load(char const * filename, tld_file ** file)
     257             : {
     258         455 :     if(file == nullptr
     259         455 :     || filename == nullptr)
     260             :     {
     261           0 :         return TLD_FILE_ERROR_INVALID_POINTER;
     262             :     }
     263         455 :     if(*file != nullptr)
     264             :     {
     265           0 :         return TLD_FILE_ERROR_POINTER_PRESENT;
     266             :     }
     267             : 
     268         910 :     std::ifstream in;
     269         455 :     in.open(filename);
     270         455 :     if(!in.is_open())
     271             :     {
     272         452 :         return TLD_FILE_ERROR_CANNOT_OPEN_FILE;
     273             :     }
     274             : 
     275           3 :     return tld_file_load_stream(file, in);
     276             : }
     277             : 
     278             : 
     279           0 : const char *tld_file_errstr(tld_file_error err)
     280             : {
     281           0 :     switch(err)
     282             :     {
     283           0 :     case TLD_FILE_ERROR_NONE:
     284           0 :         return "No error";
     285             : 
     286           0 :     case TLD_FILE_ERROR_INVALID_POINTER:
     287           0 :         return "Invalid pointer";
     288             : 
     289           0 :     case TLD_FILE_ERROR_POINTER_PRESENT:
     290           0 :         return "Pointer present when it should ne nullptr";
     291             : 
     292           0 :     case TLD_FILE_ERROR_CANNOT_OPEN_FILE:
     293           0 :         return "Cannot open file";
     294             : 
     295           0 :     case TLD_FILE_ERROR_CANNOT_READ_FILE:
     296           0 :         return "I/O error reading file";
     297             : 
     298           0 :     case TLD_FILE_ERROR_UNRECOGNIZED_FILE:
     299           0 :         return "Unrecognized input file";
     300             : 
     301           0 :     case TLD_FILE_ERROR_INVALID_FILE_SIZE:
     302           0 :         return "Invalid file size";
     303             : 
     304           0 :     case TLD_FILE_ERROR_OUT_OF_MEMORY:
     305           0 :         return "Out of memory";
     306             : 
     307           0 :     case TLD_FILE_ERROR_INVALID_HUNK_SIZE:
     308           0 :         return "Invalid hunk size";
     309             : 
     310           0 :     case TLD_FILE_ERROR_INVALID_STRUCTURE_SIZE:
     311           0 :         return "Invalid structure size";
     312             : 
     313           0 :     case TLD_FILE_ERROR_INVALID_ARRAY_SIZE:
     314           0 :         return "Invalid array size";
     315             : 
     316           0 :     case TLD_FILE_ERROR_UNSUPPORTED_VERSION:
     317           0 :         return "Unsupported version";
     318             : 
     319           0 :     case TLD_FILE_ERROR_MISSING_HUNK:
     320           0 :         return "Missing hunk";
     321             : 
     322             :     //default: -- handled below, without a default, we know whether we missed
     323             :     //            some new TLD_FILE_ERROR_... in our cases above.
     324             :     }
     325             : 
     326           0 :     return "Unknown tld_file error number";
     327             : }
     328             : 
     329             : 
     330   507678781 : const tld_description *tld_file_description(tld_file const * file, uint32_t id)
     331             : {
     332   507678781 :     if(id >= file->f_descriptions_count)
     333             :     {
     334           0 :         return nullptr;
     335             :     }
     336   507678781 :     return file->f_descriptions + id;
     337             : }
     338             : 
     339             : 
     340      134552 : const tld_tag *tld_file_tag(tld_file const * file, uint32_t id)
     341             : {
     342      134552 :     if(id + 1 >= file->f_tags_size)
     343             :     {
     344           0 :         return nullptr;
     345             :     }
     346      134552 :     return reinterpret_cast<tld_tag *>(file->f_tags + id);
     347             : }
     348             : 
     349             : 
     350     1587799 : const char *tld_file_string(tld_file const * file, uint32_t id, uint32_t * length)
     351             : {
     352     1587799 :     --id;
     353     1587799 :     if(length == nullptr
     354     1587799 :     || id >= file->f_strings_count)
     355             :     {
     356           0 :         errno = EINVAL;
     357           0 :         return nullptr;
     358             :     }
     359     1587799 :     char * s(file->f_strings + file->f_string_offsets[id].f_string_offset);
     360     1587799 :     uint32_t l(file->f_string_lengths[id].f_string_length);
     361     1587799 :     char * e(s + l);
     362     1587799 :     if(s > file->f_strings_end
     363     1587799 :     || e > file->f_strings_end)
     364             :     {
     365             :         // assuming the file is valid, this should not happen
     366             :         //
     367           0 :         errno = EINVAL;
     368           0 :         return nullptr;
     369             :     }
     370     1587799 :     *length = l;
     371     1587799 :     return s;
     372             : }
     373             : 
     374             : 
     375             : /** \brief Transform a tld_file to a JSON string.
     376             :  *
     377             :  * This function transforms a tld_file in a JSON string which gets returned.
     378             :  * If something goes wrong, then the function may return a nullptr instead.
     379             :  *
     380             :  * The returned strings must be freed by you with the `free()` function.
     381             :  *
     382             :  * \param[in] file  The tld_file to transform to a JSON.
     383             :  *
     384             :  * \return A string with the tld_file JSON or nullptr on error.
     385             :  */
     386           1 : char *tld_file_to_json(tld_file const * file)
     387             : {
     388           1 :     if(file == nullptr
     389           1 :     || file->f_header == nullptr
     390           1 :     || file->f_descriptions == nullptr
     391           1 :     || file->f_tags == nullptr
     392           1 :     || file->f_string_offsets == nullptr
     393           1 :     || file->f_string_lengths == nullptr
     394           1 :     || file->f_strings == nullptr)
     395             :     {
     396           0 :         return nullptr;
     397             :     }
     398             : 
     399           2 :     std::stringstream out;
     400             : 
     401           1 :     out << "{\n";
     402           1 :     out << "\"version\":\"" << static_cast<int>(file->f_header->f_version_major)
     403           1 :                      << '.' << static_cast<int>(file->f_header->f_version_minor) << "\",\n";
     404           1 :     out << "\"created-on\":" << file->f_header->f_created_on << ",\n";
     405           1 :     out << "\"max-level\":" << static_cast<int>(file->f_header->f_tld_max_level) << ",\n";
     406           1 :     out << "\"tld-start-offset\":" << static_cast<int>(file->f_header->f_tld_start_offset) << ",\n";
     407           1 :     out << "\"tld-end-offset\":" << static_cast<int>(file->f_header->f_tld_end_offset) << ",\n";
     408           1 :     out << "\"descriptions\":[\n";
     409       10465 :     for(uint32_t idx(0); idx < file->f_descriptions_count; ++idx)
     410             :     {
     411       10464 :         tld_description const * d(tld_file_description(file, idx));
     412             : 
     413       10464 :         out << (idx == 0 ? "" : ",\n");
     414             : 
     415             :         {
     416       10464 :             uint32_t length(0);
     417       10464 :             char const * tld(tld_file_string(file, d->f_tld, &length));
     418       10464 :             out << "{\"tld\":\"" << std::string(tld, length) << "\"";
     419             :         }
     420             : 
     421       10464 :         out << ",\"status\":\"" << tld_status_to_string(static_cast<tld_status>(d->f_status)) << "\"";
     422             : 
     423       10464 :         if(d->f_exception_apply_to != USHRT_MAX)
     424             :         {
     425          21 :             tld_description const * apply_to(tld_file_description(file, d->f_exception_apply_to));
     426          21 :             uint32_t length(0);
     427          21 :             char const * to_tld(tld_file_string(file, apply_to->f_tld, &length));
     428          21 :             out << ",\"apply-to\":\"" << std::string(to_tld, length) << "\"";
     429             :         }
     430             : 
     431       10464 :         if(d->f_start_offset != USHRT_MAX)
     432             :         {
     433         795 :             out << ",\"start-offset\":" << d->f_start_offset;
     434         795 :             out << ",\"end-offset\":" << d->f_end_offset;
     435             :         }
     436             : 
     437       29911 :         for(uint32_t tidx(0); tidx < d->f_tags_count; ++tidx)
     438             :         {
     439       19447 :             const tld_tag * tag(tld_file_tag(file, d->f_tags + tidx * 2));
     440             :             {
     441       19447 :                 uint32_t length(0);
     442       19447 :                 char const * tag_name(tld_file_string(file, tag->f_tag_name, &length));
     443       38894 :                 out << ",\"" << std::string(tag_name, length)
     444       38894 :                     << "\":\"";
     445             :             }
     446             :             {
     447       19447 :                 uint32_t length(0);
     448       19447 :                 char const * tag_value(tld_file_string(file, tag->f_tag_value, &length));
     449       38894 :                 out << std::string(tag_value, length)
     450       38894 :                     << "\"";
     451             :             }
     452             :         }
     453             : 
     454       10464 :         out << "}";
     455             :     }
     456           1 :     out << "]}\n";
     457             : 
     458           1 :     return strdup(out.str().c_str());
     459             : }
     460             : 
     461             : 
     462         227 : void tld_file_free(tld_file ** file)
     463             : {
     464         227 :     if(file != nullptr
     465         227 :     && *file != nullptr)
     466             :     {
     467           2 :         free(*file);
     468           2 :         *file = nullptr;
     469             :     }
     470         227 : }
     471             : 
     472             : 
     473             : #ifdef __cplusplus
     474         717 : }
     475             : #endif
     476             : 
     477             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13