LCOV - code coverage report
Current view: top level - libtld - tld_compiler.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 7 7 100.0 %
Date: 2022-02-19 13:28:04 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* TLD library -- TLD, domain name, and sub-domain extraction
       2             :  * Copyright (c) 2011-2022  Made to Order Software Corp.  All Rights Reserved
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the
       6             :  * "Software"), to deal in the Software without restriction, including
       7             :  * without limitation the rights to use, copy, modify, merge, publish,
       8             :  * distribute, sublicense, and/or sell copies of the Software, and to
       9             :  * permit persons to whom the Software is furnished to do so, subject to
      10             :  * the following conditions:
      11             :  *
      12             :  * The above copyright notice and this permission notice shall be included
      13             :  * in all copies or substantial portions of the Software.
      14             :  *
      15             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      16             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      17             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      18             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      19             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      20             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      21             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      22             :  */
      23             : 
      24             : /** \file
      25             :  * \brief Implementation of the TLD parser library.
      26             :  *
      27             :  * This file includes all the functions available in the C library
      28             :  * of libtld that pertain to the parsing of URIs and extraction of
      29             :  * TLDs.
      30             :  */
      31             : 
      32             : // self
      33             : //
      34             : #include "libtld/tld.h"
      35             : 
      36             : #ifdef __cplusplus
      37             : 
      38             : // C++ lib
      39             : //
      40             : #include    <iostream>
      41             : #include    <list>
      42             : #include    <map>
      43             : #include    <memory>
      44             : #include    <set>
      45             : #include    <vector>
      46             : 
      47             : // C lib
      48             : //
      49             : #include    <limits.h>
      50             : 
      51             : 
      52             : typedef uint32_t                            string_id_t;
      53             : typedef std::map<string_id_t, string_id_t>  tags_t;
      54             : typedef uint32_t                            tag_id_t;
      55             : 
      56             : constexpr string_id_t       STRING_ID_NULL = 0;
      57             : 
      58        7346 : class tld_string
      59             : {
      60             : public:
      61             :     typedef std::shared_ptr<tld_string>             pointer_t;
      62             :     typedef std::map<std::string, pointer_t>        map_by_string_t;
      63             :     typedef std::map<string_id_t, pointer_t>        map_by_id_t;
      64             : 
      65             :                             tld_string(string_id_t id, std::string const & s);
      66             : 
      67             :     string_id_t             get_id() const;
      68             :     std::string const &     get_string() const;
      69             :     std::string::size_type  length() const;
      70             :     void                    set_found_in(string_id_t id);
      71             :     string_id_t             get_found_in() const;
      72             : 
      73             : private:
      74             :     string_id_t             f_id = STRING_ID_NULL;
      75             :     std::string             f_string = std::string();
      76             :     string_id_t             f_found_in = STRING_ID_NULL;
      77             : };
      78             : 
      79             : 
      80           2 : class tld_string_manager
      81             : {
      82             : public:
      83             :     string_id_t                 add_string(std::string const & s);
      84             :     string_id_t                 find_string(std::string const & s);
      85             :     std::string                 get_string(string_id_t id) const;
      86             :     string_id_t                 get_next_string_id() const;
      87             :     std::size_t                 size() const;
      88             :     std::size_t                 max_length() const;
      89             :     std::size_t                 total_length() const;
      90             :     std::string const &         compressed_strings() const;
      91             :     std::size_t                 compressed_length() const;
      92             :     void                        merge_strings();
      93             :     std::size_t                 included_count() const;
      94             :     std::size_t                 included_length() const;
      95             :     std::size_t                 merged_count() const;
      96             :     std::size_t                 merged_length() const;
      97             :     std::size_t                 get_string_offset(std::string const & s) const;
      98             :     std::size_t                 get_string_offset(string_id_t id) const;
      99             : 
     100             : private:
     101             :     typedef std::set<string_id_t>   set_id_t;
     102             : 
     103             :     std::string::size_type      end_start_match(
     104             :                                       std::string const & s1
     105             :                                     , std::string const & s2);
     106             :     bool                        merge_two_strings();
     107             : 
     108             :     string_id_t                 f_next_id = STRING_ID_NULL;
     109             :     tld_string::map_by_string_t f_strings_by_string = tld_string::map_by_string_t();
     110             :     tld_string::map_by_id_t     f_strings_by_id = tld_string::map_by_id_t();
     111             :     set_id_t                    f_strings_reviewed = set_id_t();
     112             :     std::size_t                 f_max_length = 0;
     113             :     std::size_t                 f_total_length = 0;
     114             :     std::size_t                 f_included_count = 0;
     115             :     std::size_t                 f_included_length = 0;
     116             :     std::size_t                 f_merged_count = 0;
     117             :     std::size_t                 f_merged_length = 0;
     118             :     std::string                 f_merged_strings = std::string();
     119             : };
     120             : 
     121             : 
     122           2 : class tld_tag_manager
     123             : {
     124             : public:
     125             :     typedef std::vector<string_id_t>    tags_table_t;
     126             : 
     127             :     void                        add(tags_t const & tags);
     128             :     void                        merge();
     129             :     tags_table_t const &        merged_tags() const;
     130             :     std::size_t                 merged_size() const;
     131             :     std::size_t                 get_tag_offset(tags_t const & tags) const;
     132             : 
     133             : private:
     134             :     typedef std::vector<tags_table_t>   tags_vector_t;
     135             : 
     136             :     tags_table_t                tags_to_table(tags_t const & tags) const;
     137             :     std::size_t                 end_start_match(
     138             :                                       tags_table_t const & s1
     139             :                                     , tags_table_t const & s2);
     140             : 
     141             :     tags_vector_t               f_tags = tags_vector_t();
     142             :     tags_table_t                f_merged_tags = tags_table_t();
     143             : };
     144             : 
     145             : 
     146       10464 : class tld_definition
     147             : {
     148             : public:
     149             :     typedef std::shared_ptr<tld_definition>         pointer_t;
     150             :     typedef std::vector<string_id_t>                segments_t;
     151             :     typedef std::map<std::string, pointer_t>        map_t;
     152             : 
     153             :     static constexpr std::uint32_t      SET_TLD =         0x0001;
     154             :     static constexpr std::uint32_t      SET_STATUS =      0x0002;
     155             :     static constexpr std::uint32_t      SET_APPLY_TO =    0x0080;
     156             : 
     157             :                             tld_definition(tld_definition const &) = default;
     158             :                             tld_definition(tld_string_manager & strings);
     159             : 
     160             :     tld_definition &        operator = (tld_definition const &);
     161             : 
     162             :     bool                    add_segment(std::string const & segment, std::string & errmsg);
     163             :     segments_t const &      get_segments() const;
     164             :     std::string             get_name() const;
     165             :     std::string             get_inverted_name() const;
     166             :     std::string             get_parent_name() const;
     167             :     std::string             get_parent_inverted_name() const;
     168             : 
     169             :     void                    set_index(int idx);
     170             :     int                     get_index() const;
     171             : 
     172             :     bool                    set_status(tld_status status);
     173             :     tld_status              get_status() const;
     174             : 
     175             :     bool                    set_apply_to(std::string const & apply_to);
     176             :     std::string             get_apply_to() const;
     177             : 
     178             :     void                    add_tag(
     179             :                                   std::string const & tag_name
     180             :                                 , std::string const & value
     181             :                                 , std::string & errmsg);
     182             :     tags_t const &          get_tags() const;
     183             : 
     184             :     void                    reset_set_flags();
     185             :     void                    set_named_parameter(
     186             :                                   std::string const & name
     187             :                                 , std::string const & value
     188             :                                 , std::string & errmsg);
     189             : 
     190             :     void                    set_start_offset(uint16_t start);
     191             :     void                    set_end_offset(uint16_t end);
     192             :     uint16_t                get_start_offset() const;
     193             :     uint16_t                get_end_offset() const;
     194             : 
     195             : private:
     196             :     tld_string_manager &    f_strings;
     197             : 
     198             :     int                     f_set = 0;
     199             :     segments_t              f_tld = segments_t();
     200             :     int                     f_index = 0;
     201             :     tld_status              f_status = TLD_STATUS_VALID;
     202             :     std::string             f_apply_to = std::string();
     203             : 
     204             :     tags_t                  f_tags = tags_t();
     205             : 
     206             :     uint16_t                f_start_offset = USHRT_MAX;
     207             :     uint16_t                f_end_offset = USHRT_MAX;
     208             : };
     209             : 
     210             : 
     211           3 : class tld_compiler
     212             : {
     213             : public:
     214             :     void                    set_input_folder(std::string const & path);
     215             :     std::string const &     get_input_folder() const;
     216             :     void                    set_output(std::string const & filename);
     217             :     std::string const &     get_output() const;
     218             :     void                    set_c_file(std::string const & filename);
     219             :     std::string const &     get_c_file() const;
     220             :     bool                    compile();
     221             :     int                     get_errno() const;
     222             :     std::string const &     get_errmsg() const;
     223             :     int                     get_line() const;
     224             :     std::string const &     get_filename() const;
     225             :     tld_string_manager &    get_string_manager();
     226             :     void                    output_to_json(std::ostream & out, bool verbose) const;
     227             : 
     228             : private:
     229             :     typedef std::vector<std::string>                paths_t;
     230             :     typedef std::vector<std::uint8_t>               data_t;
     231             :     typedef std::map<std::string, std::string>      values_t;
     232             : 
     233             :     static constexpr char32_t const        CHAR_ERR = static_cast<char32_t>(-2);
     234             :     static constexpr char32_t const        CHAR_EOF = static_cast<char32_t>(-1);
     235             : 
     236             :     enum token_t
     237             :     {
     238             :         TOKEN_EOF,
     239             :         TOKEN_STRING,
     240             :         TOKEN_IDENTIFIER,
     241             :         TOKEN_WORD,
     242             :         TOKEN_NUMBER,
     243             :         TOKEN_EQUAL,
     244             :         TOKEN_DOT,
     245             :         TOKEN_WILD_CARD,
     246             :         TOKEN_EXCEPTION,
     247             :         TOKEN_OPEN_SQUARE_BRACKET,
     248             :         TOKEN_CLOSE_SQUARE_BRACKET,
     249             :     };
     250       76607 :     class token
     251             :     {
     252             :     public:
     253             :         typedef std::vector<token>      vector_t;
     254             : 
     255             :                                 token(std::string const & filename
     256             :                                     , int line
     257             :                                     , token_t token
     258             :                                     , std::string const & value);
     259             : 
     260             :         std::string const &     get_filename() const;
     261             :         int                     get_line() const;
     262             :         token_t                 get_token() const;
     263             :         std::string const &     get_value() const;
     264             : 
     265             :     private:
     266             :         std::string const       f_filename;
     267             :         int const               f_line = 0;
     268             :         token_t const           f_token = TOKEN_EOF;
     269             :         std::string const       f_value = std::string();
     270             :     };
     271             : 
     272             :     void                    find_files(std::string const & path);
     273             :     void                    process_input_files();
     274             :     void                    process_file(std::string const & filename);
     275             :     bool                    get_backslash(char32_t & c);
     276             :     void                    read_line();
     277             :     bool                    is_space(char32_t wc) const;
     278             :     char32_t                getc();
     279             :     void                    ungetc(char32_t c);
     280             :     bool                    append_wc(std::string & value, char32_t wc);
     281             :     void                    parse_line();
     282             :     void                    parse_variable();
     283             :     void                    parse_tld();
     284             :     void                    print_tokens();
     285             :     void                    define_default_category();
     286             :     void                    find_max_level();
     287             :     void                    compress_tags();
     288             :     uint16_t                find_definition(std::string name) const;
     289             :     void                    output_tlds(std::ostream & out);
     290             :     void                    save_to_file(std::string const & buffer);
     291             :     void                    output_header(std::ostream & out);
     292             :     void                    save_to_c_file(std::string const & buffer);
     293             : 
     294             :     std::string             f_input_folder = "/usr/share/libtld/tlds";
     295             :     std::string             f_output = "/var/lib/libtld/tlds.tld";
     296             :     std::string             f_c_file = std::string();
     297             :     int                     f_errno = 0;
     298             :     std::string             f_errmsg = std::string();
     299             :     paths_t                 f_input_files = paths_t();
     300             :     values_t                f_global_variables = values_t();
     301             :     values_t                f_global_tags = values_t();
     302             :     std::string             f_current_tld = std::string();
     303             :     tld_definition::map_t   f_definitions = tld_definition::map_t();
     304             :     token::vector_t         f_tokens = token::vector_t();
     305             :     data_t                  f_data = data_t();
     306             :     std::string::size_type  f_pos = 0;
     307             :     int                     f_line = 1;
     308             :     std::string             f_filename = std::string();
     309             :     char32_t                f_ungetc[1] = {};
     310             :     std::string::size_type  f_ungetc_pos = 0;
     311             :     tld_string_manager      f_strings = tld_string_manager();
     312             :     string_id_t             f_strings_count = 0;
     313             :     tld_tag_manager         f_tags = tld_tag_manager();
     314           1 :     time_t                  f_created_on = time(nullptr);
     315             :     uint8_t                 f_tld_max_level = 0;
     316             :     uint16_t                f_tld_start_offset = USHRT_MAX;
     317             :     uint16_t                f_tld_end_offset = USHRT_MAX;
     318             : };
     319             : #endif
     320             : /*#ifdef __cplusplus*/
     321             : 
     322             : /* vim: ts=4 sw=4 et
     323             :  */

Generated by: LCOV version 1.13