LCOV - code coverage report
Current view: top level - home/snapwebsites/snapcpp/snapwebsites/snapserver-core-plugins/src/mimetype - magic-to-js.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 476 1403 33.9 %
Date: 2019-12-15 17:13:15 Functions: 58 121 47.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Snap Websites Server -- transform magic definitions to a .js file
       2             : // Copyright (c) 2014-2019  Made to Order Software Corp.  All Rights Reserved
       3             : //
       4             : // This program is free software; you can redistribute it and/or modify
       5             : // it under the terms of the GNU General Public License as published by
       6             : // the Free Software Foundation; either version 2 of the License, or
       7             : // (at your option) any later version.
       8             : //
       9             : // This program is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             : // GNU General Public License for more details.
      13             : //
      14             : // You should have received a copy of the GNU General Public License
      15             : // along with this program; if not, write to the Free Software
      16             : // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
      17             : 
      18             : // self
      19             : //
      20             : #include "magic-to-js.h"
      21             : 
      22             : 
      23             : // snapwebsites lib
      24             : //
      25             : #include <snapwebsites/snapwebsites.h>
      26             : 
      27             : 
      28             : // snapdev lib
      29             : //
      30             : #include <snapdev/not_reached.h>
      31             : #include <snapdev/not_used.h>
      32             : 
      33             : 
      34             : // C++ lib
      35             : //
      36             : #include <algorithm>
      37             : #include <cstring>
      38             : #include <fstream>
      39             : #include <iomanip>
      40             : #include <memory>
      41             : #include <vector>
      42             : 
      43             : 
      44             : // C lib
      45             : //
      46             : #include <math.h>
      47             : 
      48             : 
      49             : // last include
      50             : //
      51             : #include <snapdev/poison.h>
      52             : 
      53             : 
      54             : 
      55             : 
      56             : /** \file
      57             :  * \brief Tool used to transform magic files in .js files.
      58             :  *
      59             :  * This tool is used to parse magic data files to use in JavaScript
      60             :  * to detect file formats on file Drag & Drop.
      61             :  *
      62             :  * The documentation of the format of the files is found in the magic
      63             :  * man page:
      64             :  *
      65             :  * \code
      66             :  *      man 5 magic
      67             :  * \endcode
      68             :  *
      69             :  * The following is an approximation of the lexer:
      70             :  *
      71             :  * \code
      72             :  * start: comment
      73             :  *      | empty_line
      74             :  *      | command
      75             :  *      | line
      76             :  *
      77             :  * comment: '#' end_of_line new_line
      78             :  *
      79             :  * empty_line: new_line
      80             :  *           | spaces new_line
      81             :  *
      82             :  * command: '!' ':' cmd
      83             :  *
      84             :  * cmd: mimetype
      85             :  *    | apple
      86             :  *    | strength
      87             :  * 
      88             :  * mimetype: 'mimetype' spaces end_of_line new_line
      89             :  *
      90             :  * apple: 'apple' spaces end_of_line new_line
      91             :  *
      92             :  * strength: 'strength' spaces binop spaces number new_line
      93             :  *
      94             :  * line: level offset spaces type spaces value opt_message new_line
      95             :  *
      96             :  * level: '>'
      97             :  *      | level '>'
      98             :  *
      99             :  * offset: number
     100             :  *       | opt_index '(' opt_index number opt_size opt_adjustment ')'
     101             :  *
     102             :  * type: identifier
     103             :  *     | identifier '&' number
     104             :  *     | identifier '/' flags
     105             :  *     | identifier '/' number     -- search/123
     106             :  *
     107             :  * -- valid types are: byte, short, long, quad, float, double, string, pstring,
     108             :  *                     date, qdate, ldate, qldate, beid3, beshort, belong,
     109             :  *                     bequad, befloat, bedouble, bedate, deqdate, beldate,
     110             :  *                     beqldate, bestring16, leid3, leshort, lelong, lequad,
     111             :  *                     lefloat, ledouble, ledate, leqdate, leldate, leqldate,
     112             :  *                     lestring16, melong, medate, meldate, indirect, name,
     113             :  *                     use, regex, search, default, and 'u'-<integer type>
     114             :  *
     115             :  * value: str_value
     116             :  *      | num_value
     117             :  *      | '!' str_value
     118             :  *      | '!' num_value
     119             :  *
     120             :  * str_value: opt_str_comparison [! \n\r]+
     121             :  *
     122             :  * opt_str_comparison: '='
     123             :  *                   | '<'
     124             :  *                   | '>'
     125             :  *
     126             :  * num_value: opt_num_comparison number
     127             :  *          | x
     128             :  *
     129             :  * opt_num_comparison: opt_str_comparison
     130             :  *                   | '&'
     131             :  *                   | '^'
     132             :  *                   | '~'
     133             :  *
     134             :  * opt_message: (* empty *)
     135             :  *            | spaces
     136             :  *            | spaces end_of_line
     137             :  *
     138             :  * new_line: '\n'
     139             :  *         | '\r'
     140             :  *         | '\r' '\n'
     141             :  *
     142             :  * opt_spaces: (* empty *)
     143             :  *           | spaces
     144             :  *
     145             :  * spaces: space
     146             :  *       | spaces space
     147             :  *
     148             :  * space: ' '
     149             :  *      | '\t'
     150             :  *
     151             :  * identifier: [a-zA-Z_][0-9a-zA-Z_]*
     152             :  *
     153             :  * flags: [a-zA-Z]+
     154             :  *
     155             :  * -- valid flags for string are: WwcCtb
     156             :  * -- valid flags for pstring are: BHhLlJ
     157             :  *
     158             :  * opt_size: (* empty *)
     159             :  *         | '.' [bilmsBILS]
     160             :  *
     161             :  * opt_index: (* empty *)
     162             :  *          | &
     163             :  *
     164             :  * opt_adjustment: '+' number
     165             :  *               | '-' number
     166             :  *               | '*' number
     167             :  *               | '/' number
     168             :  *               | '%' number
     169             :  *               | '&' number
     170             :  *               | '|' number
     171             :  *               | '^' number
     172             :  *               | '(' offset ')'
     173             :  *
     174             :  * binop: '+' opt_spaces number
     175             :  *      | '-' opt_spaces number
     176             :  *      | '*' opt_spaces number
     177             :  *      | '/' opt_spaces number
     178             :  *
     179             :  * number: decimal
     180             :  *       | octal
     181             :  *       | hexadecimal
     182             :  *       | floating_point
     183             :  *
     184             :  * decimal: [1-9][0-9]+
     185             :  *
     186             :  * floating_point: [1-9][0-9]* '.' [0-9]* ( [eE] [-+]? [0-9]+ )?
     187             :  *
     188             :  * octal: 0[0-7]*
     189             :  *
     190             :  * hexadecimal: 0[xX][0-9a-fA-F]+
     191             :  *
     192             :  * end_of_line: .*
     193             :  * \endcode
     194             :  */
     195             : 
     196             : 
     197             : namespace
     198             : {
     199             : 
     200             : bool g_debug = false;
     201             : 
     202             : } // no name namespace
     203             : 
     204             : 
     205             : /** \brief Lexer used to read the data from the input files.
     206             :  *
     207             :  * The lexer transforms the input files in token.
     208             :  */
     209           1 : class lexer
     210             : {
     211             : public:
     212             :     enum class mode_t
     213             :     {
     214             :         LEXER_MODE_NORMAL,                  // normal parsing
     215             :         LEXER_MODE_NORMAL_WITHOUT_FLOATS,   // normal parsing, but no floats
     216             :         LEXER_MODE_MESSAGE,                 // read whatever up to the end of line as a string (keep spaces, do not convert integers, etc.)
     217             :         LEXER_MODE_REGEX                    // reading a regular expression (read as a string)
     218             :     };
     219             : 
     220             :     typedef std::shared_ptr<lexer>      pointer_t;
     221             :     typedef std::vector<std::string>    filenames_t;
     222             : 
     223         613 :     class token_t
     224             :     {
     225             :     public:
     226             :         enum class type_t
     227             :         {
     228             :             TOKEN_TYPE_EOT,         // end of token
     229             :             TOKEN_TYPE_CHARACTER,   // '\n' for new line, ' ' for spaces (space or tab), other operators as themselves
     230             :             TOKEN_TYPE_STRING,      // string/identifier depending on where it appears
     231             :             TOKEN_TYPE_INTEGER,     // decimal, hexadecimal, and octal
     232             :             TOKEN_TYPE_FLOAT,       // floating point ('.' is the trigger)
     233             :             TOKEN_TYPE_COMMAND      // !:<command> a string with "command"
     234             :         };
     235             :         typedef char            character_t;
     236             :         typedef std::string     string_t;
     237             :         typedef int64_t         integer_t;
     238             :         typedef double          float_t;
     239             : 
     240         156 :                         token_t()
     241         156 :                             : f_type(type_t::TOKEN_TYPE_EOT)
     242             :                         {
     243         156 :                         }
     244             : 
     245          82 :                         token_t(character_t character)
     246          82 :                             : f_type(type_t::TOKEN_TYPE_CHARACTER)
     247          82 :                             , f_character(character)
     248             :                         {
     249          82 :                         }
     250             : 
     251          60 :                         token_t(string_t string, bool is_string = true)
     252          60 :                             : f_type(is_string ? type_t::TOKEN_TYPE_STRING : type_t::TOKEN_TYPE_COMMAND)
     253          60 :                             , f_string(string)
     254             :                         {
     255          60 :                         }
     256             : 
     257          26 :                         token_t(integer_t integer)
     258          26 :                             : f_type(type_t::TOKEN_TYPE_INTEGER)
     259          26 :                             , f_integer(integer)
     260             :                         {
     261          26 :                         }
     262             : 
     263           0 :                         token_t(float_t floating_point)
     264           0 :                             : f_type(type_t::TOKEN_TYPE_FLOAT)
     265           0 :                             , f_float(floating_point)
     266             :                         {
     267           0 :                         }
     268             : 
     269         253 :         type_t          get_type() const { return f_type; }
     270             : 
     271          98 :         character_t     get_character() const { return f_character; }
     272          34 :         string_t        get_string() const { return f_string; }
     273          26 :         integer_t       get_integer() const { return f_integer; }
     274           0 :         float_t         get_float() const { return f_float; }
     275             : 
     276             :     private:
     277             :         type_t                      f_type = type_t::TOKEN_TYPE_EOT;
     278             : 
     279             :         // TODO: redefine controlled vars with the typedef's of this class
     280             :         char                        f_character = 0;
     281             :         std::string                 f_string = std::string();
     282             :         int64_t                     f_integer = 0;
     283             :         double                      f_float = 0.0;
     284             :     };
     285             : 
     286             :                     lexer(filenames_t fn);
     287             : 
     288             :     std::string     list_of_filenames() const;
     289             :     token_t         get_token(mode_t mode);
     290           0 :     std::string     current_filename() const { return f_filenames.empty() ? "<no filenames>" : f_filenames[f_fpos - 1]; }
     291           0 :     int32_t         current_line() const { return f_line; }
     292             : 
     293             : private:
     294             :     int             getc();
     295             :     void            ungetc(int c);
     296             :     token_t         get_normal_token(mode_t mode);
     297             :     token_t         get_message_token();
     298             :     token_t         get_identifier_token(int c);
     299             :     token_t         get_string_token();
     300             :     token_t         get_number_token(mode_t mode, int c);
     301             : 
     302             :     filenames_t                     f_filenames = filenames_t();
     303             :     size_t                          f_fpos = 0;
     304             :     int32_t                         f_line = 1;
     305             :     bool                            f_start_of_line = true;
     306             :     std::shared_ptr<std::ifstream>  f_file = std::shared_ptr<std::ifstream>(); // current stream
     307             :     std::vector<char>               f_unget = std::vector<char>();
     308             : };
     309             : 
     310             : 
     311             : /** \brief Print out a token.
     312             :  *
     313             :  * This function prints out a token to the specified output stream.
     314             :  *
     315             :  * \param[in,out] out  The stream where the token is written.
     316             :  * \param[in] token  The token to write out.
     317             :  *
     318             :  * \return A reference to the output stream passed in.
     319             :  */
     320           0 : std::ostream& operator << (std::ostream& out, lexer::token_t const& token)
     321             : {
     322           0 :     switch(token.get_type())
     323             :     {
     324           0 :     case lexer::token_t::type_t::TOKEN_TYPE_EOT:
     325           0 :         out << "end of token";
     326           0 :         break;
     327             : 
     328           0 :     case lexer::token_t::type_t::TOKEN_TYPE_CHARACTER:
     329             :         {
     330           0 :             char c(token.get_character());
     331           0 :             if(c == 0)
     332             :             {
     333           0 :                 out << "character '\\0'";
     334             :             }
     335           0 :             else if(c == '\a')
     336             :             {
     337           0 :                 out << "character '\\a'";
     338             :             }
     339           0 :             else if(c == '\b')
     340             :             {
     341           0 :                 out << "character '\\b'";
     342             :             }
     343           0 :             else if(c == '\f')
     344             :             {
     345           0 :                 out << "character '\\f'";
     346             :             }
     347           0 :             else if(c == '\n')
     348             :             {
     349           0 :                 out << "character '\\n'";
     350             :             }
     351           0 :             else if(c == '\r')
     352             :             {
     353           0 :                 out << "character '\\r'";
     354             :             }
     355           0 :             else if(c == '\t')
     356             :             {
     357           0 :                 out << "character '\\t'";
     358             :             }
     359           0 :             else if(c == '\v')
     360             :             {
     361           0 :                 out << "character '\\v'";
     362             :             }
     363           0 :             else if(c < 0x20 || c >= 0x7F)
     364             :             {
     365           0 :                 out << "character '\\" << std::oct << std::setw(3) << static_cast<int>(c) << std::dec
     366           0 :                     << "' (\\x" << std::hex << std::uppercase << static_cast<int>(c)
     367           0 :                     << std::dec << std::nouppercase << ")";
     368             :             }
     369             :             else
     370             :             {
     371           0 :                 out << "character '" << c << "'";
     372             :             }
     373             :         }
     374           0 :         break;
     375             : 
     376           0 :     case lexer::token_t::type_t::TOKEN_TYPE_STRING:
     377           0 :         out << "string \"" << token.get_string() << "\"";
     378           0 :         break;
     379             : 
     380           0 :     case lexer::token_t::type_t::TOKEN_TYPE_INTEGER:
     381           0 :         out << "integer " << token.get_integer() << " (0x"
     382           0 :             << std::hex << std::uppercase << token.get_integer()
     383           0 :             << std::dec << std::nouppercase << ")";
     384           0 :         break;
     385             : 
     386           0 :     case lexer::token_t::type_t::TOKEN_TYPE_FLOAT:
     387           0 :         out << "float " << token.get_float();
     388           0 :         break;
     389             : 
     390           0 :     case lexer::token_t::type_t::TOKEN_TYPE_COMMAND:
     391           0 :         out << "command !:" << token.get_string();
     392           0 :         break;
     393             : 
     394             :     }
     395             : 
     396           0 :     return out;
     397             : }
     398             : 
     399             : 
     400             : /* \brief Initializes a lexer.
     401             :  *
     402             :  * Magic files are text files. Everything is line based. The lexer
     403             :  * detects the different elements and has intelligence to parse a
     404             :  * line into separate tokens.
     405             :  *
     406             :  * The input is any number of files. Once the end of a file is reached,
     407             :  * the next file is read. A file is always considered to end with a newline
     408             :  * even if none are found in the file.
     409             :  *
     410             :  * \param[in] fn  The list of files to read from.
     411             :  */
     412           1 : lexer::lexer(filenames_t fn)
     413           1 :     : f_filenames(fn)
     414             : {
     415           1 :     if(fn.size() > 0)
     416             :     {
     417           1 :         f_file.reset(new std::ifstream);
     418           1 :         f_file->open(f_filenames[0]);
     419           1 :         if(!f_file->is_open())
     420             :         {
     421           0 :             std::cerr << "error: could not open file \"" << f_filenames[0] << "\".\n";
     422           0 :             exit(1);
     423             :         }
     424           1 :         f_fpos = 1;
     425             :     }
     426           1 : }
     427             : 
     428             : 
     429             : /** \brief Generate the list of filenames for documentation purposes.
     430             :  *
     431             :  * This function generates a list of filenames that can be output in the
     432             :  * output documentation.
     433             :  *
     434             :  * \return List of filenames in a string.
     435             :  */
     436           1 : std::string lexer::list_of_filenames() const
     437             : {
     438           1 :     std::string result;
     439           3 :     for(size_t i(0); i < f_filenames.size(); ++i)
     440             :     {
     441           2 :         result += " * \\li " + f_filenames[i] + "\n";
     442             :     }
     443           1 :     return result;
     444             : }
     445             : 
     446             : 
     447             : /** \brief Read one token.
     448             :  *
     449             :  * This function reads one token from the magic file.
     450             :  */
     451         155 : lexer::token_t lexer::get_token(mode_t mode)
     452             : {
     453         155 :     lexer::token_t token;
     454         155 :     switch(mode)
     455             :     {
     456         133 :     case mode_t::LEXER_MODE_NORMAL:
     457             :     case mode_t::LEXER_MODE_NORMAL_WITHOUT_FLOATS:
     458         133 :         token = get_normal_token(mode);
     459         133 :         break;
     460             : 
     461          20 :     case mode_t::LEXER_MODE_MESSAGE:
     462          20 :         token = get_message_token();
     463          20 :         break;
     464             : 
     465           2 :     case mode_t::LEXER_MODE_REGEX:
     466           2 :         token = get_string_token();
     467           2 :         break;
     468             : 
     469           0 :     default:
     470           0 :         throw std::logic_error("lexer::get_token() called with an invalid mode");
     471             : 
     472             :     }
     473             : 
     474         155 :     if(g_debug)
     475             :     {
     476           0 :         std::cerr << token << std::endl;
     477             :     }
     478             : 
     479         155 :     return token;
     480             : }
     481             : 
     482             : 
     483             : /** \brief Get one character from the input file.
     484             :  *
     485             :  * If the end of the current input file is reached (eof() is returned) then
     486             :  * the function tries to open the next file. If that fails, then the function
     487             :  * returns eof().
     488             :  *
     489             :  * \return The next character, or std::istream::traits_type::eof().
     490             :  */
     491        1128 : int lexer::getc()
     492             : {
     493        1128 :     if(!f_unget.empty())
     494             :     {
     495         120 :         int const c(f_unget.back());
     496         120 :         f_unget.pop_back();
     497         120 :         return c;
     498             :     }
     499             :     for(;;)
     500             :     {
     501        1009 :         int const c(f_file->get());
     502        1009 :         if(c != std::istream::traits_type::eof())
     503             :         {
     504             :             // get a character, return it
     505             : //std::cerr << static_cast<char>(c);
     506        1007 :             return c;
     507             :         }
     508             :         // more files to read?
     509           2 :         if(f_fpos >= f_filenames.size())
     510             :         {
     511           1 :             return std::istream::traits_type::eof();
     512             :         }
     513           1 :         f_file.reset(new std::ifstream);
     514           1 :         f_file->open(f_filenames[f_fpos]);
     515           1 :         if(!f_file->is_open())
     516             :         {
     517             :             // file cannot be read...
     518           0 :             std::cerr << "error: could not open file \"" << f_filenames[f_fpos] << "\".\n";
     519           0 :             exit(1);
     520             :         }
     521           1 :         ++f_fpos;
     522           1 :         f_line = 1;
     523           1 :     }
     524             : }
     525             : 
     526             : 
     527             : /** \brief Restore a character.
     528             :  *
     529             :  * Note that we support restoring any character, although it is supposed to
     530             :  * be the last character read. You may call ungetc() any number of times.
     531             :  * Note that this does not modify the file stream in any way.
     532             :  *
     533             :  * \param[in] c  The character to restore.
     534             :  */
     535         120 : void lexer::ungetc(int c)
     536             : {
     537         120 :     if(c != std::istream::traits_type::eof())
     538             :     {
     539         120 :         f_unget.push_back(c);
     540             :     }
     541         120 : }
     542             : 
     543             : 
     544             : /** \brief Retrieve a token, here the parser transform the input to a type.
     545             :  *
     546             :  * This function reads one token and returns it.
     547             :  *
     548             :  * If the end of all the input files is reached, then the type_t::TOKEN_TYPE_EOT
     549             :  * token is returned.
     550             :  *
     551             :  * \return The next token.
     552             :  */
     553         151 : lexer::token_t lexer::get_normal_token(mode_t mode)
     554             : {
     555             :     // at this time the only reason we loop is a line commented out
     556             :     // or an empty line; anything else either returns or generates
     557             :     // an error and exit the tool at once
     558             :     for(;;)
     559             :     {
     560         151 :         bool is_start(f_start_of_line);
     561         151 :         f_start_of_line = false;
     562         151 :         int c(getc());
     563         151 :         switch(c)
     564             :         {
     565          10 :         case '#':
     566          10 :             if(is_start)
     567             :             {
     568             :                 // skip the comment, it's just like a message!
     569          10 :                 get_message_token();
     570          10 :                 getc(); // skip the '\n' right away
     571          10 :                 ++f_line;
     572          10 :                 f_start_of_line = true; // next call we're at the start of the line
     573          10 :                 break;
     574             :             }
     575           0 :             return get_string_token();
     576             : 
     577         110 :         case ' ':
     578             :         case '\t':
     579             :             // skip all the spaces between tokens and return ONE space
     580             :             for(;;)
     581             :             {
     582         188 :                 c = getc();
     583         110 :                 if(c != ' ' && c != '\t')
     584             :                 {
     585          32 :                     ungetc(c);
     586          32 :                     break;
     587             :                 }
     588             :             }
     589          32 :             return token_t(static_cast<token_t::character_t>(' '));
     590             : 
     591           0 :         case '\r':
     592             :             // remove \r\n if such is found
     593           0 :             c = getc();
     594           0 :             if(c != '\n')
     595             :             {
     596           0 :                 ungetc(c);
     597             :             }
     598             : #if __cplusplus >= 201700
     599             :             [[fallthrough]];
     600             : #endif
     601             :         case '\n':
     602          28 :             ++f_line;
     603          28 :             f_start_of_line = true; // next call we're at the start of the line
     604          28 :             if(is_start)
     605             :             {
     606             :                 // no need to return empty lines
     607           8 :                 break;
     608             :             }
     609          20 :             return token_t(static_cast<token_t::character_t>('\n'));
     610             : 
     611          24 :         case '>':
     612             :         case '<':
     613             :         case '=':
     614             :         case '&':
     615             :         case '^':
     616             :         case '*':
     617             :         case '/':
     618             :         case '+':
     619             :         case '-':
     620             :         case '(':
     621             :         case ')':
     622             :         case '.':
     623          24 :             return token_t(static_cast<token_t::character_t>(c));
     624             : 
     625          10 :         case '!':
     626             :             // TBD: should we force this check at the start of a line?
     627             :             //      (if it works like this for us, we will be just fine.)
     628          10 :             c = getc();
     629          10 :             if(c == ':')
     630             :             {
     631             :                 // read an identifier
     632           8 :                 token_t id(get_string_token());
     633             :                 // and transform to a command
     634           4 :                 return token_t(id.get_string(), false);
     635           6 :             }
     636           6 :             ungetc(c);
     637           6 :             return token_t(static_cast<token_t::character_t>('!'));
     638             : 
     639          47 :         default:
     640          47 :             if(c >= '0' && c <= '9')
     641             :             {
     642          26 :                 return get_number_token(mode, c);
     643             :             }
     644          21 :             if((c >= 'a' && c <= 'z')
     645           1 :             || (c >= 'A' && c <= 'Z')
     646           1 :             || c == '_')
     647             :             {
     648          20 :                 return get_identifier_token(c);
     649             :             }
     650           1 :             if(c == std::istream::traits_type::eof())
     651             :             {
     652           1 :                 return token_t();
     653             :             }
     654           0 :             std::cerr << "error:" << f_filenames[f_fpos - 1]
     655           0 :                       << ":" << f_line
     656           0 :                       << ": unsupported character " << c
     657           0 :                       << " (0x" << std::hex << std::uppercase << c
     658           0 :                       << ") from input file.\n";
     659           0 :             exit(1);
     660             :             snap::NOTREACHED();
     661             : 
     662             :         }
     663          18 :     }
     664             : }
     665             : 
     666             : 
     667             : /** \brief Retrieve the message.
     668             :  *
     669             :  * This function reads characters up to the following new line character.
     670             :  * If the end of the file is found first, then the process stops on that
     671             :  * even too.
     672             :  *
     673             :  * \return The message token (a string token).
     674             :  */
     675          30 : lexer::token_t lexer::get_message_token()
     676             : {
     677             :     // the message ends the line, no special parsing of messages
     678          60 :     std::string message;
     679             :     for(;;)
     680             :     {
     681         674 :         int c(getc());
     682         674 :         if(c == std::istream::traits_type::eof())
     683             :         {
     684             :             // return type_t::TOKEN_TYPE_EOT
     685           0 :             return token_t(message);
     686             :         }
     687         674 :         if(c == '\r')
     688             :         {
     689           0 :             c = getc();
     690           0 :             if(c != '\n')
     691             :             {
     692           0 :                 ungetc(c);
     693             :             }
     694           0 :             c = '\n';
     695             :         }
     696         674 :         if(c == '\n')
     697             :         {
     698             :             // we need a new line at the end of the string so keep it here
     699          30 :             ungetc('\n');
     700          30 :             return token_t(message);
     701             :         }
     702         644 :         message += c;
     703         644 :     }
     704             : }
     705             : 
     706             : 
     707             : /** \brief We found a digit, so reading a number.
     708             :  *
     709             :  * This function reads a number, either an integer, or if a period (.)
     710             :  * is found, a floating point.
     711             :  *
     712             :  * Integers support decimal, octal, and hexadecimal.
     713             :  *
     714             :  * Floating points only support decimal with 'e' for the exponent.
     715             :  *
     716             :  * This function does not detect a sign at the start of the number.
     717             :  *
     718             :  * \param[in] mode  The mode used to read this token.
     719             :  * \param[in] c  The start digit.
     720             :  */
     721          26 : lexer::token_t lexer::get_number_token(mode_t mode, int c)
     722             : {
     723          26 :     token_t::integer_t   ri(0);
     724          26 :     token_t::float_t     rf(0.0);
     725             : 
     726          26 :     int d(getc());
     727             : 
     728             :     // hexadecimal?
     729          26 :     if(c == '0')
     730             :     {
     731          14 :         if(d == 'x' || d == 'X')
     732             :         {
     733             :             // in C, hexadecimal is simple, any character can follow
     734             :             for(;;)
     735             :             {
     736          39 :                 d = getc();
     737          21 :                 if(d >= '0' && d <= '9')
     738             :                 {
     739          16 :                     ri = ri * 16 + (d - '0');
     740             :                 }
     741           5 :                 else if(d >= 'a' && d <= 'f')
     742             :                 {
     743           2 :                     ri = ri * 16 + (d - 'a' + 10);
     744             :                 }
     745           3 :                 else if(d >= 'A' && d <= 'F')
     746             :                 {
     747           0 :                     ri = ri * 16 + (d - 'A' + 10);
     748             :                 }
     749             :                 else
     750             :                 {
     751           3 :                     ungetc(d);
     752           3 :                     return token_t(ri);
     753             :                 }
     754             :             }
     755             :         }
     756             : 
     757             :         // if no 'x' or 'X' then it is octal
     758             :         for(;;)
     759             :         {
     760          11 :             if(d >= '0' && d <= '7')
     761             :             {
     762           0 :                 ri = ri * 8 + (d - '0');
     763             :             }
     764          11 :             else if(d == '8' || d == '9')
     765             :             {
     766           0 :                 std::cerr << "error: invalid octal number in \"" << f_filenames[f_fpos - 1] << "\".\n";
     767           0 :                 exit(1);
     768             :             }
     769             :             else
     770             :             {
     771          11 :                 ungetc(d);
     772          11 :                 return token_t(ri);
     773             :             }
     774           0 :             d = getc();
     775             :         }
     776             :         snap::NOTREACHED();
     777             :     }
     778             : 
     779             :     // first read the number as if it were an integer
     780          12 :     ri = c - '0';
     781             :     for(;;)
     782             :     {
     783          20 :         if(d >= '0' && d <= '9')
     784             :         {
     785           4 :             ri = ri * 10 + (d - '0');
     786             :         }
     787             :         else
     788             :         {
     789             :             break;
     790             :         }
     791           4 :         d = getc();
     792             :     }
     793             : 
     794             :     // floating point number?
     795             :     // TBD: we may need to support detecting 'e' or 'E' as a floating point too?
     796          12 :     if(d == '.'
     797           0 :     && mode == lexer::mode_t::LEXER_MODE_NORMAL_WITHOUT_FLOATS)
     798             :     {
     799             :         // TBD: for floating points we may want to use the strtod() or
     800             :         //      similar function to make sure that we get the same result
     801             :         //      as what other users would get in other languages.
     802             :         //      (those functions may have heuristics to properly handle
     803             :         //      very large or very small numbers which we may not have
     804             :         //      properly captured here.)
     805           0 :         double dec = 1.0;
     806             :         for(;;)
     807             :         {
     808           0 :             d = getc();
     809           0 :             if(d >= '0' && d <= '9')
     810             :             {
     811           0 :                 dec *= 10.0;
     812           0 :                 rf = rf + (d - '0') / dec;
     813             :             }
     814             :             else
     815             :             {
     816             :                 break;
     817             :             }
     818             :         }
     819           0 :         if(d == 'e' || d == 'E')
     820             :         {
     821             :             // exponent
     822           0 :             double sign(1.0);
     823           0 :             d = getc();
     824           0 :             if(d == '-')
     825             :             {
     826           0 :                 sign = -1.0;
     827           0 :                 d = getc();
     828             :             }
     829           0 :             else if(d == '+')
     830             :             {
     831           0 :                 d = getc();
     832             :             }
     833           0 :             if(d >= '0' && d <= '9')
     834             :             {
     835           0 :                 token_t::float_t exponent(0.0);
     836             :                 for(;;)
     837             :                 {
     838           0 :                     exponent = exponent * 1 + (d - '0');
     839           0 :                     d = getc();
     840           0 :                     if(d < '0' || d > '9')
     841             :                     {
     842           0 :                         ungetc(d);
     843           0 :                         rf *= pow(10, exponent * sign);
     844           0 :                         return token_t(rf);
     845             :                     }
     846             :                 }
     847             :             }
     848             :             else
     849             :             {
     850           0 :                 std::cerr << "error: invalid floating point exponent, digits expected after the 'e', in \"" << f_filenames[f_fpos - 1] << "\".\n";
     851           0 :                 exit(1);
     852             :             }
     853             :         }
     854           0 :         ungetc(d);
     855           0 :         return token_t(rf);
     856             :     }
     857             : 
     858          12 :     ungetc(d);
     859          12 :     return token_t(ri);
     860             : }
     861             : 
     862             : 
     863             : /** \brief Read one identifier.
     864             :  *
     865             :  * This function reads one C-like identifier. Identifiers are parsed from
     866             :  * the 3rd token in a standard line.
     867             :  *
     868             :  * \param[in] c  The first character that was already read.
     869             :  *
     870             :  * \return A string token.
     871             :  */
     872          20 : lexer::token_t lexer::get_identifier_token(int c)
     873             : {
     874          40 :     std::string identifier;
     875             :     for(;;)
     876             :     {
     877         160 :         identifier += c; // note: c may be '\0' here!
     878          90 :         c = getc();
     879          90 :         if((c < '0' || c > '9')
     880          90 :         && (c < 'a' || c > 'z')
     881          20 :         && (c < 'A' || c > 'Z')
     882          20 :         && c != '_')
     883             :         {
     884             :             // done reading this identifier
     885          20 :             ungetc(c);
     886          40 :             return token_t(identifier);
     887             :         }
     888             :     }
     889             : }
     890             : 
     891             : 
     892             : /** \brief Read one string ending with a space.
     893             :  *
     894             :  * This function reads one string that ends with a space. This string can
     895             :  * generally include any character. Special characters are added with a
     896             :  * backslash.
     897             :  *
     898             :  * \param[in] c  The first character that was already read.
     899             :  *
     900             :  * \return A string token.
     901             :  */
     902           6 : lexer::token_t lexer::get_string_token()
     903             : {
     904          12 :     std::string str;
     905             :     for(;;)
     906             :     {
     907          32 :         int c(getc());
     908          32 :         if(c == '\\') // really allow any character in identifier including spaces!
     909             :         {
     910           0 :             c = getc();
     911           0 :             if(c == std::istream::traits_type::eof())
     912             :             {
     913           0 :                 return token_t(str);
     914             :             }
     915             :             // transform the backslash character
     916           0 :             switch(c)
     917             :             {
     918           0 :             case '0':
     919             :                 {
     920           0 :                     int d(getc());
     921           0 :                     if(d == 'x' || d == 'X')
     922             :                     {
     923             :                         // hexadecimal character, get one or 2 more digits
     924           0 :                         c = 0;
     925           0 :                         int max_chars(2);
     926           0 :                         for(; max_chars > 0; --max_chars)
     927             :                         {
     928           0 :                             d = getc();
     929           0 :                             if(d >= '0' && d <= '7')
     930             :                             {
     931           0 :                                 c = c * 16 + (d - '0');
     932             :                             }
     933           0 :                             else if(d >= 'a' && d <= 'f')
     934             :                             {
     935           0 :                                 c = c * 16 + (d - 'a' + 10);
     936             :                             }
     937           0 :                             else if(d >= 'A' && d <= 'F')
     938             :                             {
     939           0 :                                 c = c * 16 + (d - 'A' + 10);
     940             :                             }
     941             :                             else
     942             :                             {
     943             :                                 break;
     944             :                             }
     945             :                         }
     946           0 :                         if(max_chars == 2)
     947             :                         {
     948             :                             // invalid \x without an hex digit
     949           0 :                             std::cerr << "error: invalid use of \\x without a valid hexadecimal number following in \"" << f_filenames[f_fpos - 1] << "\".\n";
     950           0 :                             exit(1);
     951             :                         }
     952           0 :                         break;
     953             :                     }
     954           0 :                     ungetc(d);
     955             :                 }
     956             : #if __cplusplus >= 201700
     957             :         [[fallthrough]];
     958             : #endif
     959           0 :             case '1':
     960             :             case '2':
     961             :             case '3':
     962             :             case '4':
     963             :             case '5':
     964             :             case '6':
     965             :             case '7':
     966           0 :                 c = c - '0';
     967           0 :                 for(int max_chars(3); max_chars > 0; --max_chars)
     968             :                 {
     969           0 :                     int d(getc());
     970           0 :                     if(d >= '0' && d <= '7')
     971             :                     {
     972           0 :                         c = c * 8 + (d - '0');
     973             :                     }
     974             :                     else
     975             :                     {
     976             :                         break;
     977             :                     }
     978           0 :                 }
     979           0 :                 break;
     980             : 
     981           0 :             case 'a':
     982           0 :                 c = '\a';
     983           0 :                 break;
     984             : 
     985           0 :             case 'b':
     986           0 :                 c = '\b';
     987           0 :                 break;
     988             : 
     989           0 :             case 'f':
     990           0 :                 c = '\f';
     991           0 :                 break;
     992             : 
     993           0 :             case 'n':
     994           0 :                 c = '\n';
     995           0 :                 break;
     996             : 
     997           0 :             case 'r':
     998           0 :                 c = '\r';
     999           0 :                 break;
    1000             : 
    1001           0 :             case 't':
    1002           0 :                 c = '\t';
    1003           0 :                 break;
    1004             : 
    1005           0 :             case 'v':
    1006           0 :                 c = '\v';
    1007           0 :                 break;
    1008             : 
    1009             :             //default: -- keep 'c' as is
    1010             :             }
    1011             :         }
    1012          62 :         else if(c == ' ' || c == '\t'
    1013          26 :              || c == '\r' || c == '\n'
    1014          58 :              || c == std::istream::traits_type::eof())
    1015             :         {
    1016             :             // done reading this string
    1017           6 :             ungetc(c);
    1018           6 :             return token_t(str);
    1019             :         }
    1020          26 :         str += c; // note: c may be '\0' here!
    1021          26 :     }
    1022             :     snap::NOTREACHED();
    1023             : }
    1024             : 
    1025             : 
    1026             : /** \brief Parse magic files.
    1027             :  *
    1028             :  * This class is used to parse magic files.
    1029             :  */
    1030           1 : class parser
    1031             : {
    1032             : public:
    1033             :     typedef std::shared_ptr<parser> pointer_t;
    1034             : 
    1035          32 :     class entry_t
    1036             :     {
    1037             :     public:
    1038             :         typedef std::shared_ptr<entry_t>        pointer_t;
    1039             : 
    1040             :         enum class type_t
    1041             :         {
    1042             :             ENTRY_TYPE_UNKNOWN,
    1043             : 
    1044             :             // int -- 1 byte
    1045             :             ENTRY_TYPE_BYTE,
    1046             :             ENTRY_TYPE_UBYTE,
    1047             :             // int -- 2 bytes
    1048             :             ENTRY_TYPE_SHORT,
    1049             :             ENTRY_TYPE_LESHORT,
    1050             :             ENTRY_TYPE_BESHORT,
    1051             :             ENTRY_TYPE_USHORT,
    1052             :             ENTRY_TYPE_ULESHORT,
    1053             :             ENTRY_TYPE_UBESHORT,
    1054             :             // int -- 4 bytes
    1055             :             ENTRY_TYPE_LONG,
    1056             :             ENTRY_TYPE_LELONG,
    1057             :             ENTRY_TYPE_BELONG,
    1058             :             ENTRY_TYPE_MELONG,
    1059             :             ENTRY_TYPE_ULONG,
    1060             :             ENTRY_TYPE_ULELONG,
    1061             :             ENTRY_TYPE_UBELONG,
    1062             :             ENTRY_TYPE_UMELONG,
    1063             :             // int -- 4 bytes -- an ID3 size is 32 bits defined as: ((size & 0x0FFFFFFF) * 4)
    1064             :             ENTRY_TYPE_BEID3,
    1065             :             ENTRY_TYPE_LEID3,
    1066             :             ENTRY_TYPE_UBEID3,
    1067             :             ENTRY_TYPE_ULEID3,
    1068             :             // int -- 8 bytes
    1069             :             ENTRY_TYPE_QUAD,
    1070             :             ENTRY_TYPE_BEQUAD,
    1071             :             ENTRY_TYPE_LEQUAD,
    1072             :             ENTRY_TYPE_UQUAD,
    1073             :             ENTRY_TYPE_UBEQUAD,
    1074             :             ENTRY_TYPE_ULEQUAD,
    1075             :             // float -- 4 bytes
    1076             :             ENTRY_TYPE_FLOAT,
    1077             :             ENTRY_TYPE_BEFLOAT,
    1078             :             ENTRY_TYPE_LEFLOAT,
    1079             :             // float -- 8 bytes
    1080             :             ENTRY_TYPE_DOUBLE,
    1081             :             ENTRY_TYPE_BEDOUBLE,
    1082             :             ENTRY_TYPE_LEDOUBLE,
    1083             :             // "text" (if value includes characters considered binary bytes then it is considered binary too)
    1084             :             ENTRY_TYPE_STRING,
    1085             :             ENTRY_TYPE_PSTRING,
    1086             :             ENTRY_TYPE_BESTRING16,
    1087             :             ENTRY_TYPE_LESTRING16,
    1088             :             ENTRY_TYPE_SEARCH,
    1089             :             ENTRY_TYPE_REGEX,
    1090             :             // date
    1091             :             ENTRY_TYPE_DATE,
    1092             :             ENTRY_TYPE_QDATE,
    1093             :             ENTRY_TYPE_LDATE,
    1094             :             ENTRY_TYPE_QLDATE,
    1095             :             ENTRY_TYPE_BEDATE,
    1096             :             ENTRY_TYPE_BEQDATE,
    1097             :             ENTRY_TYPE_BELDATE,
    1098             :             ENTRY_TYPE_BEQLDATE,
    1099             :             ENTRY_TYPE_LEDATE,
    1100             :             ENTRY_TYPE_LEQDATE,
    1101             :             ENTRY_TYPE_LELDATE,
    1102             :             ENTRY_TYPE_LEQLDATE,
    1103             :             ENTRY_TYPE_MEDATE,
    1104             :             ENTRY_TYPE_MELDATE,
    1105             :             // special
    1106             :             ENTRY_TYPE_INDIRECT,
    1107             :             ENTRY_TYPE_DEFAULT,
    1108             :             ENTRY_TYPE_NAME,
    1109             :             ENTRY_TYPE_USE
    1110             :         };
    1111             : 
    1112             :         typedef lexer::token_t::integer_t       integer_t;
    1113             :         typedef lexer::token_t::float_t         float_t;
    1114             : 
    1115             :         // string & search flags
    1116             :         static integer_t const  ENTRY_FLAG_COMPACT_BLANK        = 0x00000001; // W
    1117             :         static integer_t const  ENTRY_FLAG_BLANK                = 0x00000002; // w
    1118             :         static integer_t const  ENTRY_FLAG_LOWER_INSENSITIVE    = 0x00000004; // c
    1119             :         static integer_t const  ENTRY_FLAG_UPPER_INSENSITIVE    = 0x00000008; // C
    1120             :         static integer_t const  ENTRY_FLAG_TEXT_FILE            = 0x00000010; // t
    1121             :         static integer_t const  ENTRY_FLAG_BINARY_FILE          = 0x00000020; // b
    1122             :         // pstring sizes
    1123             :         static integer_t const  ENTRY_FLAG_BYTE                 = 0x00000040; // B
    1124             :         static integer_t const  ENTRY_FLAG_BE_SHORT             = 0x00000080; // H
    1125             :         static integer_t const  ENTRY_FLAG_LE_SHORT             = 0x00000100; // h
    1126             :         static integer_t const  ENTRY_FLAG_BE_LONG              = 0x00000200; // L
    1127             :         static integer_t const  ENTRY_FLAG_LE_LONG              = 0x00000400; // l
    1128             :         static integer_t const  ENTRY_FLAG_SELF_INCLUDED        = 0x00000800; // J (size includes itself + string)
    1129             :         // compare value
    1130             :         static integer_t const  ENTRY_FLAG_NOT                  = 0x00001000; // !value
    1131             :         static integer_t const  ENTRY_FLAG_EQUAL                = 0x00002000; // =value
    1132             :         static integer_t const  ENTRY_FLAG_LESS                 = 0x00004000; // <value
    1133             :         static integer_t const  ENTRY_FLAG_GREATER              = 0x00008000; // >value
    1134             :         static integer_t const  ENTRY_FLAG_ARE_SET              = 0x00010000; // &value   integer only
    1135             :         static integer_t const  ENTRY_FLAG_ARE_CLEAR            = 0x00020000; // ^value   integer only
    1136             :         static integer_t const  ENTRY_FLAG_NEGATE               = 0x00040000; // ~value   integer only
    1137             :         static integer_t const  ENTRY_FLAG_TRUE                 = 0x00080000; // x        numbers only
    1138             :         // regex flags
    1139             :         static integer_t const  ENTRY_FLAG_LINES                = 0x00100000; // l        regex only
    1140             :         static integer_t const  ENTRY_FLAG_CASE_INSENSITIVE     = 0x00200000; // c        regex only
    1141             :         static integer_t const  ENTRY_FLAG_START_OFFSET         = 0x00400000; // s        regex only
    1142             :         // offset flags
    1143             :         static integer_t const  ENTRY_FLAG_RELATIVE             = 0x04000000; // &        before the offset
    1144             :         static integer_t const  ENTRY_FLAG_INDIRECT_RELATIVE    = 0x08000000; // (&...)   before the indirect offset
    1145             : 
    1146             :         // indirect sizes (TBD: what are the "i and I"? why have "b and B"?)
    1147             :         static integer_t const  ENTRY_FLAG_INDIRECT_BYTE        = 0x01000000000; // b or B (B not used in existing files)
    1148             :         static integer_t const  ENTRY_FLAG_INDIRECT_BE_SHORT    = 0x02000000000; // S
    1149             :         static integer_t const  ENTRY_FLAG_INDIRECT_LE_SHORT    = 0x04000000000; // s
    1150             :         static integer_t const  ENTRY_FLAG_INDIRECT_BE_LONG     = 0x08000000000; // L
    1151             :         static integer_t const  ENTRY_FLAG_INDIRECT_LE_LONG     = 0x10000000000; // l
    1152             :         static integer_t const  ENTRY_FLAG_INDIRECT_ME_LONG     = 0x20000000000; // m
    1153             :         static integer_t const  ENTRY_FLAG_INDIRECT_BE_ID3      = 0x40000000000; // I
    1154             :         static integer_t const  ENTRY_FLAG_INDIRECT_LE_ID3      = 0x80000000000; // i
    1155             : 
    1156          12 :         void                set_level(integer_t level) { f_level = level; }
    1157          49 :         integer_t           get_level() const { return f_level; }
    1158             : 
    1159          16 :         void                set_offset(integer_t offset) { f_offset = offset; }
    1160          12 :         integer_t           get_offset() const { return f_offset; }
    1161             : 
    1162          16 :         void                set_type(type_t type) { f_type = type; }
    1163          60 :         type_t              get_type() const { return f_type; }
    1164             : 
    1165           0 :         void                set_mask(integer_t mask) { f_mask = mask; }
    1166             :         integer_t           get_mask() const { return f_mask; }
    1167             : 
    1168           0 :         void                set_maxlength(integer_t maxlength) { f_maxlength = maxlength; }
    1169           0 :         integer_t           get_maxlength() const { return f_maxlength; }
    1170             : 
    1171          10 :         void                set_flags(integer_t flags) { f_flags |= flags; }
    1172             :         void                clear_flags(integer_t flags) { f_flags &= ~flags; }
    1173           0 :         integer_t           get_flags() const { return f_flags; }
    1174             :         std::string         flags_to_js_operator() const;
    1175             : 
    1176           4 :         void                set_mimetype(std::string mimetype) { f_mimetype = mimetype; }
    1177          44 :         std::string         get_mimetype() const { return f_mimetype; }
    1178             : 
    1179          10 :         void                set_integer(integer_t integer) { f_integer = integer; }
    1180          10 :         integer_t           get_integer() const { return f_integer; }
    1181             : 
    1182           0 :         void                set_float(float_t flt) { f_float = flt; }
    1183             :         float_t             get_float() const { return f_float; }
    1184             : 
    1185           2 :         void                set_string(std::string string) { f_string = string; }
    1186           2 :         std::string         get_string() const { return f_string; }
    1187             : 
    1188             :     private:
    1189             :         integer_t           f_level = 0;                // number of > at the start (0+)
    1190             :         integer_t           f_offset = 0;               // no support for indirections at this point (it's not that complicated, just time consuming to make sure it works right.)
    1191             :         type_t              f_type = type_t::ENTRY_TYPE_UNKNOWN;         // see enum
    1192             :         integer_t           f_mask = 0;                 // defined with the type as in: "long&0xF0F0F0F0"
    1193             :         integer_t           f_maxlength = 0;            // search/<maxlength>
    1194             :         integer_t           f_flags = 0;                // [p]string/<flags>, and NOT (!)
    1195             :         std::string         f_mimetype = std::string(); // a string found after the !:mimetype ...
    1196             :         integer_t           f_integer = 0;              // compare with this integer
    1197             :         float_t             f_float = 0.0;              // compare with this float
    1198             :         std::string         f_string = std::string();   // compare with this string (may include '\0')
    1199             :     };
    1200             :     typedef std::vector<entry_t::pointer_t>    entry_vector_t;
    1201             : 
    1202           1 :     parser(lexer::pointer_t& l, std::string const& magic_name)
    1203           1 :         : f_lexer(l)
    1204           1 :         , f_magic_name(magic_name)
    1205             :     {
    1206           1 :     }
    1207             : 
    1208             :     void                    parse();
    1209             :     void                    output();
    1210             : 
    1211             : private:
    1212             :     void                    output_entry(size_t start, size_t end, bool has_mime);
    1213             :     void                    output_header();
    1214             :     void                    output_footer();
    1215             : 
    1216             :     lexer::pointer_t        f_lexer = lexer::pointer_t();
    1217             : 
    1218             :     entry_vector_t          f_entries = entry_vector_t();
    1219             :     std::string             f_magic_name = std::string();
    1220             : };
    1221             : 
    1222             : 
    1223             : 
    1224          20 : std::string parser::entry_t::flags_to_js_operator() const
    1225             : {
    1226          20 :     if((f_flags & ENTRY_FLAG_NOT) != 0)
    1227             :     {
    1228           6 :         return "!==";
    1229             :     }
    1230             :     else
    1231             :     {
    1232          14 :         return "===";
    1233             :     }
    1234             :         // TODO: support <, >, &, ^, ~...
    1235             :         //static integer_t const  ENTRY_FLAG_NOT                  = 0x00001000; // !value
    1236             :         //static integer_t const  ENTRY_FLAG_EQUAL                = 0x00002000; // =value
    1237             :         //static integer_t const  ENTRY_FLAG_LESS                 = 0x00004000; // <value
    1238             :         //static integer_t const  ENTRY_FLAG_GREATER              = 0x00008000; // >value
    1239             :         //static integer_t const  ENTRY_FLAG_ARE_SET              = 0x00010000; // &value   integer only
    1240             :         //static integer_t const  ENTRY_FLAG_ARE_CLEAR            = 0x00020000; // ^value   integer only
    1241             :         //static integer_t const  ENTRY_FLAG_NEGATE               = 0x00040000; // ~value   integer only
    1242             :         //static integer_t const  ENTRY_FLAG_TRUE                 = 0x00080000; // x        numbers only
    1243             : }
    1244             : 
    1245             : 
    1246             : /** \brief Parse the magic files data.
    1247             :  *
    1248             :  * This function reads magic files and parse them for any number of
    1249             :  * magic definitions.
    1250             :  *
    1251             :  * \todo
    1252             :  * According to the magic documentation, all magic tests that apply
    1253             :  * to text files need to be run after all the binary magic tests.
    1254             :  * So at some point we would need to add a sorting capability which
    1255             :  * ensures that such happens as expected.
    1256             :  */
    1257           1 : void parser::parse()
    1258             : {
    1259           2 :     entry_t::pointer_t e;
    1260             :     for(;;)
    1261             :     {
    1262          37 :         lexer::token_t token(f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL));
    1263          21 :         switch(token.get_type())
    1264             :         {
    1265           4 :         case lexer::token_t::type_t::TOKEN_TYPE_COMMAND:
    1266           4 :             if(!e)
    1267             :             {
    1268           0 :                 std::cerr << "error: a command without any line is not legal.\n";
    1269           0 :                 exit(1);
    1270             :             }
    1271           4 :             if(token.get_string() == "mime")
    1272             :             {
    1273             :                 // these we accept!
    1274           4 :                 token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_MESSAGE);
    1275           8 :                 std::string mimetype(token.get_string());
    1276          12 :                 while(mimetype[0] == ' ' || mimetype[0] == '\t')
    1277             :                 {
    1278           4 :                     mimetype.erase(mimetype.begin());
    1279             :                 }
    1280           4 :                 e->set_mimetype(mimetype);
    1281             :             }
    1282           0 :             else if(token.get_string() == "apple" || token.get_string() == "strength")
    1283             :             {
    1284             :                 // ignore those for now
    1285           0 :                 f_lexer->get_token(lexer::mode_t::LEXER_MODE_MESSAGE);
    1286             :             }
    1287             :             else
    1288             :             {
    1289           0 :                 std::cerr << "error: unknown command (!:" << token.get_string() << ").\n";
    1290           0 :                 exit(1);
    1291             :             }
    1292           4 :             token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1293           8 :             if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1294           4 :             || token.get_character() != '\n')
    1295             :             {
    1296           0 :                 std::cerr << "error: a command line is expected to end with a new line.\n";
    1297           0 :                 exit(1);
    1298             :             }
    1299           4 :             continue;
    1300             : 
    1301           1 :         case lexer::token_t::type_t::TOKEN_TYPE_EOT:
    1302             :             // we are done parsing
    1303           2 :             return;
    1304             : 
    1305          12 :         case lexer::token_t::type_t::TOKEN_TYPE_CHARACTER:
    1306             :             // a line may start with characters (>)
    1307          12 :             if(token.get_character() != '>')
    1308             :             {
    1309           0 :                 std::cerr << "error: expected '>' to indicate the level of this line. Got " << token.get_character() << " instead.\n";
    1310           0 :                 exit(1);
    1311             :             }
    1312          12 :             e.reset(new entry_t);
    1313             :             {
    1314          12 :                 int level(0);
    1315          12 :                 do
    1316             :                 {
    1317          24 :                     ++level;
    1318          24 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1319             :                 }
    1320          24 :                 while(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1321          24 :                    && token.get_character() == '>');
    1322          12 :                 e->set_level(level);
    1323             :             }
    1324             : 
    1325          24 :             if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1326          12 :             && token.get_character() == '&')
    1327             :             {
    1328           0 :                 e->set_flags(entry_t::ENTRY_FLAG_RELATIVE);
    1329           0 :                 token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1330             :             }
    1331             : 
    1332             :             {
    1333          12 :                 int offset_sign(1);
    1334          24 :                 if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1335          12 :                 && token.get_character() == '-')
    1336             :                 {
    1337           0 :                     offset_sign = -1;
    1338           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1339             :                 }
    1340             : 
    1341          12 :                 if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
    1342             :                 {
    1343             :                     // the actual offset
    1344          12 :                     e->set_offset(token.get_integer() * offset_sign);
    1345          12 :                     break;
    1346             :                 }
    1347             : 
    1348           0 :                 if(offset_sign == -1)
    1349             :                 {
    1350           0 :                     std::cerr << "error:" << f_lexer->current_filename()
    1351           0 :                               << ":" << f_lexer->current_line()
    1352           0 :                               << ": expected an integer after a '-' in the offset.\n";
    1353           0 :                     exit(1);
    1354           0 :                 }
    1355             :             }
    1356             : 
    1357             :             // indirect
    1358           0 :             if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1359           0 :             || token.get_character() != '(')
    1360             :             {
    1361           0 :                 std::cerr << "error:" << f_lexer->current_filename()
    1362           0 :                           << ":" << f_lexer->current_line()
    1363           0 :                           << ": expected an integer, '&', or '(' after the level indication.\n";
    1364           0 :                 exit(1);
    1365             :             }
    1366           0 :             token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1367           0 :             if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1368           0 :             && token.get_character() == '&')
    1369             :             {
    1370           0 :                 e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_RELATIVE);
    1371           0 :                 token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1372             :             }
    1373             : 
    1374             :             // indirect offset
    1375           0 :             if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
    1376             :             {
    1377           0 :                 std::cerr << "error: expected an integer for the indirect offset.\n";
    1378           0 :                 exit(1);
    1379             :             }
    1380           0 :             e->set_offset(token.get_integer());
    1381             : 
    1382           0 :             token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1383           0 :             if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1384           0 :             && token.get_character() == '.')
    1385             :             {
    1386             :                 // NOTE: The documentation says that the size is
    1387             :                 //       optional, and if not defined, long is used
    1388             :                 //       (but they do not specify the endian, so I would
    1389             :                 //       imagine that the machine endian is to be used?!)
    1390           0 :                 token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1391           0 :                 if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_STRING)
    1392             :                 {
    1393           0 :                     std::cerr << "error: indirect offsets can be followed by a size (.b, .l, etc.), here the size is missing.\n";
    1394           0 :                     exit(1);
    1395             :                 }
    1396           0 :                 std::string size(token.get_string());
    1397           0 :                 if(size.size() != 1)
    1398             :                 {
    1399           0 :                     std::cerr << "error: indirect offsets size (.b, .l, etc.), must be exactly one chracter.\n";
    1400           0 :                     exit(1);
    1401             :                 }
    1402           0 :                 switch(size[0])
    1403             :                 {
    1404           0 :                 case 'b':
    1405             :                 case 'B':
    1406           0 :                     e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_BYTE);
    1407           0 :                     break;
    1408             : 
    1409           0 :                 case 'S':
    1410           0 :                     e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_BE_SHORT);
    1411           0 :                     break;
    1412             : 
    1413           0 :                 case 's':
    1414           0 :                     e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_LE_SHORT);
    1415           0 :                     break;
    1416             : 
    1417           0 :                 case 'l':
    1418           0 :                     e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_BE_LONG);
    1419           0 :                     break;
    1420             : 
    1421           0 :                 case 'L':
    1422           0 :                     e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_LE_LONG);
    1423           0 :                     break;
    1424             : 
    1425           0 :                 case 'm':
    1426           0 :                     e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_ME_LONG);
    1427           0 :                     break;
    1428             : 
    1429           0 :                 case 'I':
    1430           0 :                     e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_BE_ID3);
    1431           0 :                     break;
    1432             : 
    1433           0 :                 case 'i':
    1434           0 :                     e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_LE_ID3);
    1435           0 :                     break;
    1436             : 
    1437           0 :                 default:
    1438           0 :                     std::cerr << "error: invalid character used as an offset size (" << size[0] << ").\n";
    1439           0 :                     exit(1);
    1440             : 
    1441             :                 }
    1442           0 :                 token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1443             :             }
    1444           0 :             if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1445           0 :             && token.get_character() != ')')
    1446             :             {
    1447           0 :                 switch(token.get_character())
    1448             :                 {
    1449           0 :                 case '+':
    1450             :                 case '-':
    1451             :                 case '*':
    1452             :                 case '/':
    1453             :                 case '%':
    1454             :                 case '&':
    1455             :                 case '|':
    1456             :                 case '^':
    1457             :                     //e->set_indirect_adjustment_operator(token.get_character());
    1458           0 :                     break;
    1459             : 
    1460           0 :                 default:
    1461             :                     std::cerr << "error: indirect adjustment operator ("
    1462           0 :                               << token.get_character() << ") not supported."
    1463           0 :                               << std::endl;
    1464           0 :                     exit(1);
    1465             : 
    1466             :                 }
    1467           0 :                 token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1468           0 :                 if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1469           0 :                 && token.get_character() == '(')
    1470             :                 {
    1471             :                     // case were we have a negative number and they
    1472             :                     // generally use (<position>.<size>+(-<offset>))
    1473             :                     //
    1474           0 :                     int sign(1);
    1475           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1476           0 :                     if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1477           0 :                     && token.get_character() == '-')
    1478             :                     {
    1479           0 :                         sign = -1;
    1480           0 :                         token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1481             :                     }
    1482           0 :                     if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
    1483             :                     {
    1484           0 :                         std::cerr << "error:" << f_lexer->current_filename()
    1485           0 :                                   << ":" << f_lexer->current_line()
    1486           0 :                                   << ": indirect adjustment operator must be followed by an integer."
    1487           0 :                                   << std::endl;
    1488           0 :                         exit(1);
    1489             :                     }
    1490             :                     // Note: the + and - can be optimized by replacing the
    1491             :                     //       integer with -integer and the '-' by '+'
    1492             :                     //e->set_indirect_adjustment(token.get_integer() * sign);
    1493           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1494           0 :                     if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1495           0 :                     && token.get_character() != ')')
    1496             :                     {
    1497           0 :                         std::cerr << "error:" << f_lexer->current_filename()
    1498           0 :                                   << ":" << f_lexer->current_line()
    1499           0 :                                   << ": indirect adjustment operator sub-offset must be ended by a ')'."
    1500           0 :                                   << std::endl;
    1501           0 :                         exit(1);
    1502             :                     }
    1503           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1504             :                 }
    1505             :                 else
    1506             :                 {
    1507           0 :                     if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
    1508             :                     {
    1509             :                         // Note: in the documentation they say you can also have
    1510             :                         //       another parenthesis layer as in: +(-4)
    1511           0 :                         std::cerr << "error:" << f_lexer->current_filename()
    1512           0 :                                   << ":" << f_lexer->current_line()
    1513           0 :                                   << ": indirect adjustment operator must be followed by an integer."
    1514           0 :                                   << std::endl;
    1515           0 :                         exit(1);
    1516             :                     }
    1517             :                     // Note: the + and - can be optimized by replacing the
    1518             :                     //       integer with -integer and the '-' by '+'
    1519             :                     //e->set_indirect_adjustment(token.get_integer());
    1520           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1521             :                 }
    1522             :             }
    1523           0 :             if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1524           0 :             || token.get_character() != ')')
    1525             :             {
    1526           0 :                 std::cerr << "error: an indirect offset must end with ')'.\n";
    1527           0 :                 exit(1);
    1528             :             }
    1529           0 :             break;
    1530             : 
    1531           4 :         case lexer::token_t::type_t::TOKEN_TYPE_INTEGER:
    1532             :             // the offset for this line
    1533           4 :             e.reset(new entry_t);
    1534           4 :             e->set_offset(token.get_integer());
    1535           4 :             break;
    1536             : 
    1537           0 :         default:
    1538           0 :             std::cerr << "error: expected a standard line token: an integer optionally preceeded by '>' characters.\n";
    1539           4 :             exit(1);
    1540             : 
    1541             :         }
    1542             : 
    1543             :         // after the offset we have to have a space then the type
    1544          16 :         token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1545          32 :         if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1546          16 :         || token.get_character() != ' ')
    1547             :         {
    1548           0 :             std::cerr << "error: expected a space or tab after the offset.\n";
    1549           0 :             exit(1);
    1550             :         }
    1551             : 
    1552          16 :         token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1553          16 :         if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_STRING)
    1554             :         {
    1555           0 :             std::cerr << "error: expected a string to indicate the type on this line.\n";
    1556           0 :             exit(1);
    1557             :         }
    1558             : 
    1559          32 :         std::string type(token.get_string());
    1560          16 :         if(type == "byte")
    1561             :         {
    1562           6 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BYTE);
    1563             :         }
    1564          10 :         else if(type == "ubyte")
    1565             :         {
    1566           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_UBYTE);
    1567             :         }
    1568          10 :         else if(type == "short")
    1569             :         {
    1570           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_SHORT);
    1571             :         }
    1572          10 :         else if(type == "leshort")
    1573             :         {
    1574           2 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LESHORT);
    1575             :         }
    1576           8 :         else if(type == "beshort")
    1577             :         {
    1578           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BESHORT);
    1579             :         }
    1580           8 :         else if(type == "ushort")
    1581             :         {
    1582           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_USHORT);
    1583             :         }
    1584           8 :         else if(type == "uleshort")
    1585             :         {
    1586           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_ULESHORT);
    1587             :         }
    1588           8 :         else if(type == "ubeshort")
    1589             :         {
    1590           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_UBESHORT);
    1591             :         }
    1592           8 :         else if(type == "long")
    1593             :         {
    1594           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LONG);
    1595             :         }
    1596           8 :         else if(type == "lelong")
    1597             :         {
    1598           4 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LELONG);
    1599             :         }
    1600           4 :         else if(type == "belong")
    1601             :         {
    1602           2 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BELONG);
    1603             :         }
    1604           2 :         else if(type == "melong")
    1605             :         {
    1606           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_MELONG);
    1607             :         }
    1608           2 :         else if(type == "ulong")
    1609             :         {
    1610           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_ULONG);
    1611             :         }
    1612           2 :         else if(type == "ulong")
    1613             :         {
    1614           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_ULONG);
    1615             :         }
    1616           2 :         else if(type == "ulelong")
    1617             :         {
    1618           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_ULELONG);
    1619             :         }
    1620           2 :         else if(type == "ubelong")
    1621             :         {
    1622           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_UBELONG);
    1623             :         }
    1624           2 :         else if(type == "umelong")
    1625             :         {
    1626           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_UMELONG);
    1627             :         }
    1628           2 :         else if(type == "beid3")
    1629             :         {
    1630           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BEID3);
    1631             :         }
    1632           2 :         else if(type == "leid3")
    1633             :         {
    1634           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LEID3);
    1635             :         }
    1636           2 :         else if(type == "ubeid3")
    1637             :         {
    1638           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_UBEID3);
    1639             :         }
    1640           2 :         else if(type == "uleid3")
    1641             :         {
    1642           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_ULEID3);
    1643             :         }
    1644           2 :         else if(type == "quad")
    1645             :         {
    1646           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_QUAD);
    1647             :         }
    1648           2 :         else if(type == "bequad")
    1649             :         {
    1650           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BEQUAD);
    1651             :         }
    1652           2 :         else if(type == "lequad")
    1653             :         {
    1654           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LEQUAD);
    1655             :         }
    1656           2 :         else if(type == "uquad")
    1657             :         {
    1658           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_UQUAD);
    1659             :         }
    1660           2 :         else if(type == "ubequad")
    1661             :         {
    1662           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_UBEQUAD);
    1663             :         }
    1664           2 :         else if(type == "ulequad")
    1665             :         {
    1666           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_ULEQUAD);
    1667             :         }
    1668           2 :         else if(type == "float")
    1669             :         {
    1670           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_FLOAT);
    1671             :         }
    1672           2 :         else if(type == "befloat")
    1673             :         {
    1674           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BEFLOAT);
    1675             :         }
    1676           2 :         else if(type == "lefloat")
    1677             :         {
    1678           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LEFLOAT);
    1679             :         }
    1680           2 :         else if(type == "double")
    1681             :         {
    1682           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_DOUBLE);
    1683             :         }
    1684           2 :         else if(type == "bedouble")
    1685             :         {
    1686           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BEDOUBLE);
    1687             :         }
    1688           2 :         else if(type == "ledouble")
    1689             :         {
    1690           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LEDOUBLE);
    1691             :         }
    1692           2 :         else if(type == "string")
    1693             :         {
    1694           2 :             e->set_type(entry_t::type_t::ENTRY_TYPE_STRING);
    1695             :         }
    1696           0 :         else if(type == "pstring")
    1697             :         {
    1698           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_PSTRING);
    1699             :         }
    1700           0 :         else if(type == "bestring16")
    1701             :         {
    1702           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BESTRING16);
    1703             :         }
    1704           0 :         else if(type == "lestring16")
    1705             :         {
    1706           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LESTRING16);
    1707             :         }
    1708           0 :         else if(type == "search")
    1709             :         {
    1710           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_SEARCH);
    1711             :         }
    1712           0 :         else if(type == "regex")
    1713             :         {
    1714           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_REGEX);
    1715             :         }
    1716           0 :         else if(type == "date")
    1717             :         {
    1718           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_DATE);
    1719             :         }
    1720           0 :         else if(type == "qdate")
    1721             :         {
    1722           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_QDATE);
    1723             :         }
    1724           0 :         else if(type == "ldate")
    1725             :         {
    1726           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LDATE);
    1727             :         }
    1728           0 :         else if(type == "qldate")
    1729             :         {
    1730           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_QLDATE);
    1731             :         }
    1732           0 :         else if(type == "bedate")
    1733             :         {
    1734           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BEDATE);
    1735             :         }
    1736           0 :         else if(type == "beqdate")
    1737             :         {
    1738           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BEQDATE);
    1739             :         }
    1740           0 :         else if(type == "beldate")
    1741             :         {
    1742           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BELDATE);
    1743             :         }
    1744           0 :         else if(type == "beqldate")
    1745             :         {
    1746           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_BEQLDATE);
    1747             :         }
    1748           0 :         else if(type == "ledate")
    1749             :         {
    1750           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LEDATE);
    1751             :         }
    1752           0 :         else if(type == "leqdate")
    1753             :         {
    1754           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LEQDATE);
    1755             :         }
    1756           0 :         else if(type == "leldate")
    1757             :         {
    1758           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LELDATE);
    1759             :         }
    1760           0 :         else if(type == "leqldate")
    1761             :         {
    1762           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_LEQLDATE);
    1763             :         }
    1764           0 :         else if(type == "medate")
    1765             :         {
    1766           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_MEDATE);
    1767             :         }
    1768           0 :         else if(type == "meldate")
    1769             :         {
    1770           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_MELDATE);
    1771             :         }
    1772           0 :         else if(type == "indirect")
    1773             :         {
    1774           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_INDIRECT);
    1775             :         }
    1776           0 :         else if(type == "default")
    1777             :         {
    1778           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_DEFAULT);
    1779             :         }
    1780           0 :         else if(type == "name")
    1781             :         {
    1782           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_NAME);
    1783             :         }
    1784           0 :         else if(type == "use")
    1785             :         {
    1786           0 :             e->set_type(entry_t::type_t::ENTRY_TYPE_USE);
    1787             :         }
    1788             :         else
    1789             :         {
    1790           0 :             std::cerr << "error:" << f_lexer->current_filename()
    1791           0 :                       << ":" << f_lexer->current_line()
    1792           0 :                       << ": unknown type \"" << type << "\".\n";
    1793           0 :             exit(1);
    1794             :         }
    1795             : 
    1796          16 :         token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1797          16 :         if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER)
    1798             :         {
    1799          16 :             switch(token.get_character())
    1800             :             {
    1801           0 :             case '&': // <integer-type> & <integer>
    1802           0 :                 switch(e->get_type())
    1803             :                 {
    1804           0 :                 case entry_t::type_t::ENTRY_TYPE_BYTE:
    1805             :                 case entry_t::type_t::ENTRY_TYPE_UBYTE:
    1806             :                 case entry_t::type_t::ENTRY_TYPE_SHORT:
    1807             :                 case entry_t::type_t::ENTRY_TYPE_LESHORT:
    1808             :                 case entry_t::type_t::ENTRY_TYPE_BESHORT:
    1809             :                 case entry_t::type_t::ENTRY_TYPE_USHORT:
    1810             :                 case entry_t::type_t::ENTRY_TYPE_ULESHORT:
    1811             :                 case entry_t::type_t::ENTRY_TYPE_UBESHORT:
    1812             :                 case entry_t::type_t::ENTRY_TYPE_LONG:
    1813             :                 case entry_t::type_t::ENTRY_TYPE_LELONG:
    1814             :                 case entry_t::type_t::ENTRY_TYPE_BELONG:
    1815             :                 case entry_t::type_t::ENTRY_TYPE_MELONG:
    1816             :                 case entry_t::type_t::ENTRY_TYPE_ULONG:
    1817             :                 case entry_t::type_t::ENTRY_TYPE_ULELONG:
    1818             :                 case entry_t::type_t::ENTRY_TYPE_UBELONG:
    1819             :                 case entry_t::type_t::ENTRY_TYPE_UMELONG:
    1820             :                 case entry_t::type_t::ENTRY_TYPE_BEID3:
    1821             :                 case entry_t::type_t::ENTRY_TYPE_LEID3:
    1822             :                 case entry_t::type_t::ENTRY_TYPE_UBEID3:
    1823             :                 case entry_t::type_t::ENTRY_TYPE_ULEID3:
    1824             :                 case entry_t::type_t::ENTRY_TYPE_QUAD:
    1825             :                 case entry_t::type_t::ENTRY_TYPE_BEQUAD:
    1826             :                 case entry_t::type_t::ENTRY_TYPE_LEQUAD:
    1827             :                 case entry_t::type_t::ENTRY_TYPE_UQUAD:
    1828             :                 case entry_t::type_t::ENTRY_TYPE_UBEQUAD:
    1829             :                 case entry_t::type_t::ENTRY_TYPE_ULEQUAD:
    1830             :                 case entry_t::type_t::ENTRY_TYPE_DATE:
    1831             :                 case entry_t::type_t::ENTRY_TYPE_QDATE:
    1832             :                 case entry_t::type_t::ENTRY_TYPE_LDATE:
    1833             :                 case entry_t::type_t::ENTRY_TYPE_QLDATE:
    1834             :                 case entry_t::type_t::ENTRY_TYPE_BEDATE:
    1835             :                 case entry_t::type_t::ENTRY_TYPE_BEQDATE:
    1836             :                 case entry_t::type_t::ENTRY_TYPE_BELDATE:
    1837             :                 case entry_t::type_t::ENTRY_TYPE_BEQLDATE:
    1838             :                 case entry_t::type_t::ENTRY_TYPE_LEDATE:
    1839             :                 case entry_t::type_t::ENTRY_TYPE_LEQDATE:
    1840             :                 case entry_t::type_t::ENTRY_TYPE_LELDATE:
    1841             :                 case entry_t::type_t::ENTRY_TYPE_LEQLDATE:
    1842             :                 case entry_t::type_t::ENTRY_TYPE_MEDATE:
    1843             :                 case entry_t::type_t::ENTRY_TYPE_MELDATE:
    1844           0 :                     break;
    1845             : 
    1846           0 :                 default:
    1847           0 :                     std::cerr << "error: a type followed by & must be an integral type.\n";
    1848           0 :                     exit(1);
    1849             :                     snap::NOTREACHED();
    1850             : 
    1851             :                 }
    1852           0 :                 token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1853           0 :                 if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
    1854             :                 {
    1855           0 :                     std::cerr << "error: a type followed by & must next be followed by an integer.\n";
    1856           0 :                     exit(1);
    1857             :                 }
    1858           0 :                 e->set_mask(token.get_integer());
    1859           0 :                 token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1860           0 :                 break;
    1861             : 
    1862           0 :             case '/': // <string-type> '/' <flags>, or "search" '/' <number>
    1863           0 :                 switch(e->get_type())
    1864             :                 {
    1865           0 :                 case entry_t::type_t::ENTRY_TYPE_STRING:
    1866             :                 case entry_t::type_t::ENTRY_TYPE_BESTRING16:
    1867             :                 case entry_t::type_t::ENTRY_TYPE_LESTRING16:
    1868           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1869           0 :                     if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_STRING)
    1870             :                     {
    1871           0 :                         std::cerr << "error: a search followed by / must next be followed by a set of flags.\n";
    1872           0 :                         exit(1);
    1873             :                     }
    1874             :                     {
    1875           0 :                         std::string const flags(token.get_string());
    1876           0 :                         for(char const *f(flags.c_str()); *f != '\0'; ++f)
    1877             :                         {
    1878           0 :                             switch(*f)
    1879             :                             {
    1880           0 :                             case 'W':
    1881           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_COMPACT_BLANK);
    1882           0 :                                 break;
    1883             : 
    1884           0 :                             case 'w':
    1885           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_BLANK);
    1886           0 :                                 break;
    1887             : 
    1888           0 :                             case 'c':
    1889           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_LOWER_INSENSITIVE);
    1890           0 :                                 break;
    1891             : 
    1892           0 :                             case 'C':
    1893           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_UPPER_INSENSITIVE);
    1894           0 :                                 break;
    1895             : 
    1896           0 :                             case 't':
    1897           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_TEXT_FILE);
    1898           0 :                                 break;
    1899             : 
    1900           0 :                             case 'b':
    1901           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_BINARY_FILE);
    1902           0 :                                 break;
    1903             : 
    1904           0 :                             default:
    1905           0 :                                 std::cerr << "error:" << f_lexer->current_filename()
    1906           0 :                                           << ":" << f_lexer->current_line()
    1907             :                                           << ": invalid character used as a string, bestring16, or lestring16 ("
    1908           0 :                                           << *f << ").\n";
    1909           0 :                                 exit(1);
    1910             : 
    1911             :                             }
    1912           0 :                         }
    1913             :                     }
    1914           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1915           0 :                     break;
    1916             : 
    1917           0 :                 case entry_t::type_t::ENTRY_TYPE_PSTRING:
    1918             :                     // only width of the string size is expected here
    1919           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1920           0 :                     if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_STRING)
    1921             :                     {
    1922           0 :                         std::cerr << "error: a search followed by / must next be followed by a set of flags.\n";
    1923           0 :                         exit(1);
    1924             :                     }
    1925             :                     {
    1926           0 :                         std::string const flags(token.get_string());
    1927           0 :                         for(char const *f(flags.c_str()); *f != '\0'; ++f)
    1928             :                         {
    1929           0 :                             switch(*f)
    1930             :                             {
    1931           0 :                             case 'B':
    1932           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_BYTE);
    1933           0 :                                 break;
    1934             : 
    1935           0 :                             case 'H':
    1936           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_BE_SHORT);
    1937           0 :                                 break;
    1938             : 
    1939           0 :                             case 'h':
    1940           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_LE_SHORT);
    1941           0 :                                 break;
    1942             : 
    1943           0 :                             case 'L':
    1944           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_BE_LONG);
    1945           0 :                                 break;
    1946             : 
    1947           0 :                             case 'l':
    1948           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_LE_LONG);
    1949           0 :                                 break;
    1950             : 
    1951           0 :                             case 'J':
    1952           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_SELF_INCLUDED);
    1953           0 :                                 break;
    1954             : 
    1955           0 :                             default:
    1956           0 :                                 std::cerr << "error: invalid character used as a pstring flag (pstring/" << *f << ").\n";
    1957           0 :                                 exit(1);
    1958             : 
    1959             :                             }
    1960           0 :                         }
    1961             :                     }
    1962           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1963           0 :                     break;
    1964             : 
    1965           0 :                 case entry_t::type_t::ENTRY_TYPE_REGEX: // <regex> / <flags>  or  <regex> / <number>
    1966           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1967             :                     // TBD:
    1968             :                     // I would imagine that both could be used (integer + flags)
    1969             :                     // but it is not documented so at this point I read one or
    1970             :                     // the other and that is enough with the existing files.
    1971           0 :                     if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
    1972             :                     {
    1973             :                         // the number of lines to check the regex against
    1974           0 :                         e->set_maxlength(token.get_integer());
    1975           0 :                         token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    1976           0 :                         if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    1977           0 :                         || token.get_character() != '/')
    1978             :                         {
    1979             :                             // no extra flags
    1980           0 :                             break;
    1981             :                         }
    1982             :                     }
    1983           0 :                     if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_STRING)
    1984             :                     {
    1985             :                         // regex flags are 'l', 's' and 'c'
    1986           0 :                         std::string flags(token.get_string());
    1987           0 :                         for(char const *f(flags.c_str()); *f != '\0'; ++f)
    1988             :                         {
    1989           0 :                             switch(*f)
    1990             :                             {
    1991           0 :                             case 'l':
    1992           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_LINES);
    1993           0 :                                 break;
    1994             : 
    1995           0 :                             case 'c':
    1996           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_CASE_INSENSITIVE);
    1997           0 :                                 break;
    1998             : 
    1999           0 :                             case 's':
    2000           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_START_OFFSET);
    2001           0 :                                 break;
    2002             : 
    2003           0 :                             default:
    2004           0 :                                 std::cerr << "error: invalid character used as a regex flag (regex/" << *f << ").\n";
    2005           0 :                                 exit(1);
    2006             : 
    2007             :                             }
    2008             :                         }
    2009             :                     }
    2010             :                     else
    2011             :                     {
    2012           0 :                         std::cerr << "error: a search followed by / must next be followed by an integer and/or flags.\n";
    2013           0 :                         exit(1);
    2014             :                     }
    2015           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2016           0 :                     break;
    2017             : 
    2018           0 :                 case entry_t::type_t::ENTRY_TYPE_SEARCH:
    2019           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2020           0 :                     if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
    2021             :                     {
    2022           0 :                         e->set_maxlength(token.get_integer());
    2023           0 :                         token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2024           0 :                         if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    2025           0 :                         || token.get_character() != '/')
    2026             :                         {
    2027             :                             // no extra flags
    2028           0 :                             break;
    2029             :                         }
    2030             :                     }
    2031           0 :                     if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_STRING)
    2032             :                     {
    2033           0 :                         std::string flags(token.get_string());
    2034           0 :                         for(char const *f(flags.c_str()); *f != '\0'; ++f)
    2035             :                         {
    2036           0 :                             switch(*f)
    2037             :                             {
    2038           0 :                             case 'W':
    2039           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_COMPACT_BLANK);
    2040           0 :                                 break;
    2041             : 
    2042           0 :                             case 'w':
    2043           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_BLANK);
    2044           0 :                                 break;
    2045             : 
    2046           0 :                             case 'c':
    2047           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_LOWER_INSENSITIVE);
    2048           0 :                                 break;
    2049             : 
    2050           0 :                             case 'C':
    2051           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_UPPER_INSENSITIVE);
    2052           0 :                                 break;
    2053             : 
    2054           0 :                             case 't':
    2055           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_TEXT_FILE);
    2056           0 :                                 break;
    2057             : 
    2058           0 :                             case 'b':
    2059           0 :                                 e->set_flags(entry_t::ENTRY_FLAG_BINARY_FILE);
    2060           0 :                                 break;
    2061             : 
    2062           0 :                             default:
    2063           0 :                                 std::cerr << "error: invalid character used as a search flag (" << *f << ").\n";
    2064           0 :                                 exit(1);
    2065             : 
    2066             :                             }
    2067             :                         }
    2068             :                     }
    2069             :                     else
    2070             :                     {
    2071           0 :                         std::cerr << "error: a search followed by / must next be followed by an integer (count) or a string (flags).\n";
    2072           0 :                         exit(1);
    2073             :                     }
    2074           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2075           0 :                     break;
    2076             : 
    2077           0 :                 default:
    2078           0 :                     std::cerr << "error: a type followed by / must be a string type.\n";
    2079           0 :                     exit(1);
    2080             :                     snap::NOTREACHED();
    2081             : 
    2082             :                 }
    2083           0 :                 break;
    2084             : 
    2085             :             }
    2086             :         }
    2087             : 
    2088          32 :         if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    2089          16 :         || token.get_character() != ' ')
    2090             :         {
    2091           0 :             std::cerr << "error: expected a space or tab after the type.\n";
    2092           0 :             exit(1);
    2093             :         }
    2094             : 
    2095             :         // the next get_token() mode depends on the type so we do that
    2096             :         // separately
    2097          16 :         bool is_float(false);
    2098          16 :         switch(e->get_type())
    2099             :         {
    2100           0 :         case entry_t::type_t::ENTRY_TYPE_FLOAT:
    2101             :         case entry_t::type_t::ENTRY_TYPE_BEFLOAT:
    2102             :         case entry_t::type_t::ENTRY_TYPE_LEFLOAT:
    2103             :         case entry_t::type_t::ENTRY_TYPE_DOUBLE:
    2104             :         case entry_t::type_t::ENTRY_TYPE_BEDOUBLE:
    2105             :         case entry_t::type_t::ENTRY_TYPE_LEDOUBLE:
    2106           0 :             is_float = true;
    2107             : #if __cplusplus >= 201700
    2108             :         [[fallthrough]];
    2109             : #endif
    2110          14 :         case entry_t::type_t::ENTRY_TYPE_BYTE:
    2111             :         case entry_t::type_t::ENTRY_TYPE_UBYTE:
    2112             :         case entry_t::type_t::ENTRY_TYPE_SHORT:
    2113             :         case entry_t::type_t::ENTRY_TYPE_LESHORT:
    2114             :         case entry_t::type_t::ENTRY_TYPE_BESHORT:
    2115             :         case entry_t::type_t::ENTRY_TYPE_USHORT:
    2116             :         case entry_t::type_t::ENTRY_TYPE_ULESHORT:
    2117             :         case entry_t::type_t::ENTRY_TYPE_UBESHORT:
    2118             :         case entry_t::type_t::ENTRY_TYPE_LONG:
    2119             :         case entry_t::type_t::ENTRY_TYPE_LELONG:
    2120             :         case entry_t::type_t::ENTRY_TYPE_BELONG:
    2121             :         case entry_t::type_t::ENTRY_TYPE_MELONG:
    2122             :         case entry_t::type_t::ENTRY_TYPE_ULONG:
    2123             :         case entry_t::type_t::ENTRY_TYPE_ULELONG:
    2124             :         case entry_t::type_t::ENTRY_TYPE_UBELONG:
    2125             :         case entry_t::type_t::ENTRY_TYPE_UMELONG:
    2126             :         case entry_t::type_t::ENTRY_TYPE_BEID3:
    2127             :         case entry_t::type_t::ENTRY_TYPE_LEID3:
    2128             :         case entry_t::type_t::ENTRY_TYPE_UBEID3:
    2129             :         case entry_t::type_t::ENTRY_TYPE_ULEID3:
    2130             :         case entry_t::type_t::ENTRY_TYPE_QUAD:
    2131             :         case entry_t::type_t::ENTRY_TYPE_BEQUAD:
    2132             :         case entry_t::type_t::ENTRY_TYPE_LEQUAD:
    2133             :         case entry_t::type_t::ENTRY_TYPE_UQUAD:
    2134             :         case entry_t::type_t::ENTRY_TYPE_UBEQUAD:
    2135             :         case entry_t::type_t::ENTRY_TYPE_ULEQUAD:
    2136             :         case entry_t::type_t::ENTRY_TYPE_DATE:
    2137             :         case entry_t::type_t::ENTRY_TYPE_QDATE:
    2138             :         case entry_t::type_t::ENTRY_TYPE_LDATE:
    2139             :         case entry_t::type_t::ENTRY_TYPE_QLDATE:
    2140             :         case entry_t::type_t::ENTRY_TYPE_BEDATE:
    2141             :         case entry_t::type_t::ENTRY_TYPE_BEQDATE:
    2142             :         case entry_t::type_t::ENTRY_TYPE_BELDATE:
    2143             :         case entry_t::type_t::ENTRY_TYPE_BEQLDATE:
    2144             :         case entry_t::type_t::ENTRY_TYPE_LEDATE:
    2145             :         case entry_t::type_t::ENTRY_TYPE_LEQDATE:
    2146             :         case entry_t::type_t::ENTRY_TYPE_LELDATE:
    2147             :         case entry_t::type_t::ENTRY_TYPE_LEQLDATE:
    2148             :         case entry_t::type_t::ENTRY_TYPE_MEDATE:
    2149             :         case entry_t::type_t::ENTRY_TYPE_MELDATE:
    2150             :             // integers expect a number of flags so we manage these here
    2151          14 :             token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2152             :             // first check whether we have a '!' (must be the very first)
    2153          28 :             if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    2154          14 :             && token.get_character() == '!')
    2155             :             {
    2156           6 :                 e->set_flags(entry_t::ENTRY_FLAG_NOT);
    2157           6 :                 token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2158             :             }
    2159             :             {
    2160          14 :                 bool has_operator(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER);
    2161          14 :                 if(has_operator
    2162          14 :                 && token.get_character() != '-')
    2163             :                 {
    2164             :                     // verify that it is legal with a floating point value if such
    2165           0 :                     if(is_float)
    2166             :                     {
    2167           0 :                         switch(token.get_character())
    2168             :                         {
    2169           0 :                         case '&':
    2170             :                         case '^':
    2171             :                         case '~':
    2172           0 :                             std::cerr << "error:" << f_lexer->current_filename()
    2173           0 :                                     << ":" << f_lexer->current_line()
    2174           0 :                                     << ": " << static_cast<char>(token.get_character())
    2175           0 :                                     << " used with a floating point number.\n";
    2176           0 :                             exit(1);
    2177             :                             snap::NOTREACHED();
    2178             : 
    2179             :                         }
    2180             :                     }
    2181           0 :                     switch(token.get_character())
    2182             :                     {
    2183           0 :                     case '=':
    2184           0 :                         e->set_flags(entry_t::ENTRY_FLAG_EQUAL);
    2185           0 :                         break;
    2186             : 
    2187           0 :                     case '<':
    2188           0 :                         e->set_flags(entry_t::ENTRY_FLAG_LESS);
    2189           0 :                         break;
    2190             : 
    2191           0 :                     case '>':
    2192           0 :                         e->set_flags(entry_t::ENTRY_FLAG_GREATER);
    2193           0 :                         break;
    2194             : 
    2195           0 :                     case '&':
    2196           0 :                         e->set_flags(entry_t::ENTRY_FLAG_ARE_SET);
    2197           0 :                         break;
    2198             : 
    2199           0 :                     case '^':
    2200           0 :                         e->set_flags(entry_t::ENTRY_FLAG_ARE_CLEAR);
    2201           0 :                         break;
    2202             : 
    2203           0 :                     case '~':
    2204           0 :                         e->set_flags(entry_t::ENTRY_FLAG_NEGATE);
    2205           0 :                         break;
    2206             : 
    2207           0 :                     default:
    2208             :                         std::cerr << "error:"
    2209           0 :                                   << f_lexer->current_filename() << ":"
    2210             :                                   << f_lexer->current_line() << ": unknown comparison operator "
    2211           0 :                                   << token.get_character() << ".\n";
    2212           0 :                         exit(1);
    2213             :                         snap::NOTREACHED();
    2214             : 
    2215             :                     }
    2216           0 :                     token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2217             : 
    2218             :                     // we allow spaces after an operator
    2219           0 :                     if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    2220           0 :                     && token.get_character() == ' ')
    2221             :                     {
    2222           0 :                         token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2223             :                     }
    2224             :                 }
    2225             :                 // one special case here: "x"
    2226          28 :                 if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_STRING
    2227          18 :                 && token.get_string() == "x"
    2228          32 :                 && !has_operator)
    2229             :                 {
    2230           4 :                     e->set_flags(entry_t::ENTRY_FLAG_TRUE);
    2231             :                 }
    2232             :                 else
    2233             :                 {
    2234          10 :                     int sign(1);
    2235          20 :                     if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    2236          10 :                     && token.get_character() == '-')
    2237             :                     {
    2238           0 :                         sign = -1;
    2239           0 :                         token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2240             :                     }
    2241          10 :                     if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_FLOAT)
    2242             :                     {
    2243           0 :                         if(!is_float)
    2244             :                         {
    2245           0 :                             std::cerr << "error:" << f_lexer->current_filename()
    2246           0 :                                     << ":" << f_lexer->current_line()
    2247           0 :                                     << ": an integer was expected for an entry specifying a number type.\n";
    2248           0 :                             exit(1);
    2249             :                         }
    2250             : 
    2251           0 :                         e->set_float(token.get_float() * static_cast<double>(sign));
    2252             :                     }
    2253          10 :                     else if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
    2254             :                     {
    2255          10 :                         if(is_float)
    2256             :                         {
    2257           0 :                             std::cerr << "error:" << f_lexer->current_filename()
    2258           0 :                                     << ":" << f_lexer->current_line()
    2259           0 :                                     << ": a floating point number was expected for an entry specifying a floating point type, got an integer.\n";
    2260           0 :                             exit(1);
    2261             :                         }
    2262             : 
    2263          10 :                         e->set_integer(token.get_integer() * sign);
    2264             :                     }
    2265             :                     else
    2266             :                     {
    2267           0 :                         std::cerr << "error:" << f_lexer->current_filename()
    2268           0 :                                 << ":" << f_lexer->current_line()
    2269           0 :                                 << ": an \"x\", an integer, or a floating point number were expected (instead token type is: "
    2270           0 :                                 << static_cast<int>(token.get_type())
    2271           0 :                                 << ").\n";
    2272           0 :                         exit(1);
    2273             :                     }
    2274             :                 }
    2275             :             }
    2276          14 :             break;
    2277             : 
    2278           2 :         case entry_t::type_t::ENTRY_TYPE_STRING:
    2279             :         case entry_t::type_t::ENTRY_TYPE_PSTRING:
    2280             :         case entry_t::type_t::ENTRY_TYPE_BESTRING16:
    2281             :         case entry_t::type_t::ENTRY_TYPE_LESTRING16:
    2282             :         case entry_t::type_t::ENTRY_TYPE_SEARCH:
    2283             :             // strings can start with !, !=, !<, !>, =, <, >
    2284             :             // however, we better read the string as a whole
    2285             :             {
    2286           2 :                 token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_REGEX);
    2287           4 :                 std::string str(token.get_string());
    2288           2 :                 if(str[0] == '!')
    2289             :                 {
    2290           0 :                     str.erase(str.begin());
    2291           0 :                     e->set_flags(entry_t::ENTRY_FLAG_NOT);
    2292             :                 }
    2293           2 :                 switch(str[0])
    2294             :                 {
    2295           0 :                 case '=':
    2296           0 :                     str.erase(str.begin());
    2297           0 :                     e->set_flags(entry_t::ENTRY_FLAG_EQUAL);
    2298           0 :                     break;
    2299             : 
    2300           0 :                 case '<':
    2301           0 :                     str.erase(str.begin());
    2302           0 :                     e->set_flags(entry_t::ENTRY_FLAG_LESS);
    2303           0 :                     break;
    2304             : 
    2305           0 :                 case '>':
    2306           0 :                     str.erase(str.begin());
    2307           0 :                     e->set_flags(entry_t::ENTRY_FLAG_GREATER);
    2308           0 :                     break;
    2309             : 
    2310             :                 }
    2311           4 :                 e->set_string(str);
    2312             :             }
    2313           2 :             break;
    2314             : 
    2315           0 :         case entry_t::type_t::ENTRY_TYPE_REGEX:
    2316           0 :             token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_REGEX);
    2317           0 :             e->set_string(token.get_string());
    2318           0 :             break;
    2319             : 
    2320           0 :         case entry_t::type_t::ENTRY_TYPE_NAME: // this creates a macro
    2321           0 :             token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2322           0 :             e->set_string(token.get_string());
    2323           0 :             break;
    2324             : 
    2325           0 :         case entry_t::type_t::ENTRY_TYPE_USE: // this calls a macro
    2326           0 :             token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2327           0 :             e->set_string(token.get_string());
    2328           0 :             break;
    2329             : 
    2330           0 :         case entry_t::type_t::ENTRY_TYPE_INDIRECT:
    2331             :             // the indirect may or may not be followed by the 'x' before
    2332             :             // the message... since we ignore the message we can also
    2333             :             // ignore the x here
    2334           0 :             break;
    2335             : 
    2336           0 :         case entry_t::type_t::ENTRY_TYPE_DEFAULT:
    2337           0 :             token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2338           0 :             if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_STRING
    2339           0 :             || token.get_string() != "x")
    2340             :             {
    2341           0 :                 std::cerr << "error: default must always be used with \"x\".\n";
    2342           0 :                 exit(1);
    2343             :             }
    2344           0 :             e->set_flags(entry_t::ENTRY_FLAG_TRUE);
    2345           0 :             break;
    2346             : 
    2347           0 :         case entry_t::type_t::ENTRY_TYPE_UNKNOWN:
    2348           0 :             std::cerr << "error: entry type still unknown when defining its value.\n";
    2349           0 :             exit(1);
    2350             :             snap::NOTREACHED();
    2351             : 
    2352             :         }
    2353          16 :         token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_MESSAGE);
    2354          16 :         if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_STRING)
    2355             :         {
    2356             :             // We don't do anything with the message, but just in case I
    2357             :             // show here that we'd have to skip the spaces before saving it
    2358             :             //std::string msg(token.get_string());
    2359             :             //while(msg[0] == ' ' || msg[0] == '\t')
    2360             :             //{
    2361             :             //    msg.erase(msg.begin());
    2362             :             //}
    2363             :             //e->set_message(msg);
    2364             : 
    2365             :             // we can switch back to normal to read the \n
    2366          16 :             token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
    2367             :         }
    2368          32 :         if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
    2369          16 :         || token.get_character() != '\n')
    2370             :         {
    2371           0 :             std::cerr << "error: expected an optional message and a new line at the end of the line.\n";
    2372           0 :             exit(1);
    2373             :         }
    2374             : 
    2375          16 :         f_entries.push_back(e);
    2376          20 :     }
    2377             : }
    2378             : 
    2379             : 
    2380           1 : void parser::output()
    2381             : {
    2382             :     // the output is sent to stdout so that way we can save the data to
    2383             :     // any file using a redirection or see it on the screen
    2384           1 :     size_t const max_entries(f_entries.size());
    2385           1 :     if(max_entries == 0)
    2386             :     {
    2387           0 :         std::cerr << "error: read some magic files, but did not get an valid entries...\n";
    2388           0 :         exit(1);
    2389             :     }
    2390             : 
    2391           1 :     if(f_entries[0]->get_level() != 0)
    2392             :     {
    2393           0 :         std::cerr << "error: the very first entry must always be a level zero entry.\n";
    2394           0 :         exit(1);
    2395             :     }
    2396             : 
    2397           1 :     output_header();
    2398             : 
    2399           1 :     bool has_mime(false);
    2400           2 :     std::string name;
    2401           1 :     size_t start(0);
    2402          17 :     for(size_t i(0); i < max_entries; ++i)
    2403             :     {
    2404          16 :         if(f_entries[i]->get_level() == 0)
    2405             :         {
    2406             :             // if we get an entry with a mime type, then send it out
    2407           4 :             if(has_mime)
    2408             :             {
    2409           3 :                 output_entry(start, i, true);
    2410           3 :                 has_mime = false;
    2411             :             }
    2412           1 :             else if(!name.empty())
    2413             :             {
    2414           0 :                 std::cout << "__macro_" << name << " = function(offset) {" << std::endl;
    2415           0 :                 output_entry(start, i, false);
    2416           0 :                 std::cout << "return false;};" << std::endl;
    2417           0 :                 name.clear();
    2418             :             }
    2419           4 :             start = i;
    2420             :         }
    2421          16 :         if(!f_entries[i]->get_mimetype().empty())
    2422             :         {
    2423             :             // this means it is worth encoding
    2424           4 :             has_mime = true;
    2425             :         }
    2426          16 :         if(f_entries[i]->get_type() == entry_t::type_t::ENTRY_TYPE_NAME)
    2427             :         {
    2428             :             // found a macro
    2429           0 :             name = f_entries[i]->get_string();
    2430             :         }
    2431             :     }
    2432           1 :     if(has_mime)
    2433             :     {
    2434           1 :         output_entry(start, max_entries, true);
    2435             :     }
    2436             : 
    2437           1 :     output_footer();
    2438           1 : }
    2439             : 
    2440             : 
    2441           4 : void parser::output_entry(size_t start, size_t end, bool has_mime)
    2442             : {
    2443           4 :     struct recursive_output
    2444             :     {
    2445           4 :         recursive_output(bool has_mime)
    2446           4 :             : f_has_mime(has_mime)
    2447             :         {
    2448           4 :         }
    2449             : 
    2450          12 :         size_t output(size_t pos)
    2451             :         {
    2452          12 :             output_if(pos);
    2453          12 :             size_t next_pos(pos + 1);
    2454          24 :             if(next_pos < f_entries.size()
    2455          12 :             && f_entries[pos]->get_level() <= f_entries[next_pos]->get_level())
    2456             :             {
    2457             :                 // returns our new next_pos
    2458           8 :                 next_pos = output(next_pos);  // recursive call
    2459             :             }
    2460           4 :             else if(!f_has_mime)
    2461             :             {
    2462           0 :                 std::cout << "return true;" << std::endl;
    2463             :             }
    2464          12 :             output_mimetype(pos);
    2465          12 :             output_endif(pos);
    2466             : 
    2467          12 :             return next_pos;
    2468             :         }
    2469             : 
    2470          12 :         void output_if(size_t pos)
    2471             :         {
    2472             :             typedef void (recursive_output::*output_func_t)(size_t pos);
    2473             : #pragma GCC diagnostic push
    2474             : #pragma GCC diagnostic ignored "-Wpedantic"
    2475             :             static output_func_t const output_by_type[] =
    2476             :             {
    2477             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UNKNOWN)] = &recursive_output::output_unknown,
    2478             : 
    2479             :                 // int -- 1 byte
    2480             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BYTE)] = &recursive_output::output_byte,
    2481             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UBYTE)] = &recursive_output::output_ubyte,
    2482             :                 // int -- 2 bytes
    2483             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_SHORT)] = &recursive_output::output_short,
    2484             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LESHORT)] = &recursive_output::output_leshort,
    2485             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BESHORT)] = &recursive_output::output_beshort,
    2486             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_USHORT)] = &recursive_output::output_ushort,
    2487             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_ULESHORT)] = &recursive_output::output_uleshort,
    2488             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UBESHORT)] = &recursive_output::output_ubeshort,
    2489             :                 // int -- 4 bytes
    2490             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LONG)] = &recursive_output::output_long,
    2491             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LELONG)] = &recursive_output::output_lelong,
    2492             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BELONG)] = &recursive_output::output_belong,
    2493             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_MELONG)] = &recursive_output::output_melong,
    2494             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_ULONG)] = &recursive_output::output_ulong,
    2495             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_ULELONG)] = &recursive_output::output_ulelong,
    2496             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UBELONG)] = &recursive_output::output_ubelong,
    2497             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UMELONG)] = &recursive_output::output_umelong,
    2498             :                 // int -- 4 bytes -- an ID3 size is 32 bits defined as: ((size & 0x0FFFFFFF) * 4)
    2499             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEID3)] = &recursive_output::output_beid3,
    2500             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEID3)] = &recursive_output::output_leid3,
    2501             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UBEID3)] = &recursive_output::output_ubeid3,
    2502             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_ULEID3)] = &recursive_output::output_uleid3,
    2503             :                 // int -- 8 bytes
    2504             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_QUAD)] = &recursive_output::output_quad,
    2505             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEQUAD)] = &recursive_output::output_bequad,
    2506             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEQUAD)] = &recursive_output::output_lequad,
    2507             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UQUAD)] = &recursive_output::output_uquad,
    2508             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UBEQUAD)] = &recursive_output::output_ubequad,
    2509             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_ULEQUAD)] = &recursive_output::output_ulequad,
    2510             :                 // float -- 4 bytes
    2511             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_FLOAT)] = &recursive_output::output_float,
    2512             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEFLOAT)] = &recursive_output::output_befloat,
    2513             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEFLOAT)] = &recursive_output::output_lefloat,
    2514             :                 // float -- 8 bytes
    2515             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_DOUBLE)] = &recursive_output::output_double,
    2516             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEDOUBLE)] = &recursive_output::output_bedouble,
    2517             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEDOUBLE)] = &recursive_output::output_ledouble,
    2518             :                 // "text" (if value includes characters considered binary bytes then it is considered binary too)
    2519             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_STRING)] = &recursive_output::output_string,
    2520             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_PSTRING)] = &recursive_output::output_pstring,
    2521             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BESTRING16)] = &recursive_output::output_besearch16,
    2522             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LESTRING16)] = &recursive_output::output_lesearch16,
    2523             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_SEARCH)] = &recursive_output::output_search,
    2524             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_REGEX)] = &recursive_output::output_regex,
    2525             :                 // date
    2526             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_DATE)] = &recursive_output::output_date,
    2527             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_QDATE)] = &recursive_output::output_qdate,
    2528             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LDATE)] = &recursive_output::output_ldate,
    2529             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_QLDATE)] = &recursive_output::output_qldate,
    2530             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEDATE)] = &recursive_output::output_bedate,
    2531             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEQDATE)] = &recursive_output::output_beqdate,
    2532             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BELDATE)] = &recursive_output::output_beldate,
    2533             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEQLDATE)] = &recursive_output::output_beqldate,
    2534             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEDATE)] = &recursive_output::output_ledate,
    2535             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEQDATE)] = &recursive_output::output_leqdate,
    2536             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LELDATE)] = &recursive_output::output_leldate,
    2537             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEQLDATE)] = &recursive_output::output_leqldate,
    2538             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_MEDATE)] = &recursive_output::output_medate,
    2539             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_MELDATE)] = &recursive_output::output_meldate,
    2540             :                 // special
    2541             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_INDIRECT)] = &recursive_output::output_indirect,
    2542             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_DEFAULT)] = &recursive_output::output_default,
    2543             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_NAME)] = &recursive_output::output_name,
    2544             :                 [static_cast<int>(entry_t::type_t::ENTRY_TYPE_USE)] = &recursive_output::output_use
    2545             :             };
    2546             : #pragma GCC diagnostic pop
    2547             : 
    2548          12 :             std::cout << "if(";
    2549          12 :             (this->*output_by_type[static_cast<int>(f_entries[pos]->get_type())])(pos);
    2550          12 :             std::cout << ")\n{\n";
    2551          12 :         }
    2552             : 
    2553           0 :         void output_unknown(size_t pos)
    2554             :         {
    2555           0 :             snap::NOTUSED(pos);
    2556           0 :             std::cerr << "error: found an unknown entry while outputing data.\n";
    2557           0 :             exit(1);
    2558             :         }
    2559             : 
    2560           2 :         void output_byte(size_t pos)
    2561             :         {
    2562           2 :             int64_t const be(f_entries[pos]->get_integer());
    2563           2 :             std::cout << "buf[" << (f_has_mime ? "" : "offset+") << f_entries[pos]->get_offset() << "]"
    2564             :                       << " "
    2565           4 :                       << f_entries[pos]->flags_to_js_operator()
    2566           4 :                       << " 0x"
    2567           2 :                       << std::hex << std::uppercase
    2568           2 :                       << (be & 0xff)
    2569           2 :                       << std::dec << std::nouppercase;
    2570           2 :         }
    2571             : 
    2572           0 :         void output_ubyte(size_t pos)
    2573             :         {
    2574           0 :             int64_t const be(f_entries[pos]->get_integer());
    2575           0 :             std::cout << "buf[" << (f_has_mime ? "" : "offset+") << f_entries[pos]->get_offset() << "]"
    2576             :                       << " "
    2577           0 :                       << f_entries[pos]->flags_to_js_operator()
    2578           0 :                       << " 0x"
    2579           0 :                       << std::hex << std::uppercase
    2580           0 :                       << (be & 0xff)
    2581           0 :                       << std::dec << std::nouppercase;
    2582           0 :         }
    2583             : 
    2584           0 :         void output_short(size_t pos)
    2585             :         {
    2586           0 :             snap::NOTUSED(pos);
    2587           0 :             std::cerr << "error: type not implemented yet (short).\n";
    2588           0 :             exit(1);
    2589             :         }
    2590             : 
    2591           2 :         void output_leshort(size_t pos)
    2592             :         {
    2593           2 :             int64_t const le(f_entries[pos]->get_integer());
    2594           2 :             int64_t const offset(f_entries[pos]->get_offset());
    2595           2 :             std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
    2596           2 :                       << "] + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
    2597             :                       << "] * 256 "
    2598           4 :                       << f_entries[pos]->flags_to_js_operator()
    2599           4 :                       << " 0x"
    2600           2 :                       << std::hex << std::uppercase
    2601           2 :                       << (le & 0xffff)
    2602           2 :                       << std::dec << std::nouppercase;
    2603           2 :         }
    2604             : 
    2605           0 :         void output_beshort(size_t pos)
    2606             :         {
    2607           0 :             int64_t const be(f_entries[pos]->get_integer());
    2608           0 :             int64_t const offset(f_entries[pos]->get_offset());
    2609           0 :             std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
    2610           0 :                       << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
    2611             :                       << " "
    2612           0 :                       << f_entries[pos]->flags_to_js_operator()
    2613           0 :                       << " 0x"
    2614           0 :                       << std::hex << std::uppercase
    2615           0 :                       << (be & 0xffff)
    2616           0 :                       << std::dec << std::nouppercase;
    2617           0 :         }
    2618             : 
    2619           0 :         void output_ushort(size_t pos)
    2620             :         {
    2621           0 :             snap::NOTUSED(pos);
    2622           0 :             std::cerr << "error: type not implemented yet (ushort).\n";
    2623           0 :             exit(1);
    2624             :         }
    2625             : 
    2626           0 :         void output_uleshort(size_t pos)
    2627             :         {
    2628           0 :             int64_t const ule(f_entries[pos]->get_integer());
    2629           0 :             int64_t const offset(f_entries[pos]->get_offset());
    2630           0 :             std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
    2631           0 :                       << "] + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
    2632             :                       << " * 256 "
    2633           0 :                       << f_entries[pos]->flags_to_js_operator()
    2634           0 :                       << " 0x"
    2635           0 :                       << std::hex << std::uppercase
    2636           0 :                       << (ule & 0xffff)
    2637           0 :                       << std::dec << std::nouppercase;
    2638           0 :         }
    2639             : 
    2640           0 :         void output_ubeshort(size_t pos)
    2641             :         {
    2642           0 :             int64_t const ube(f_entries[pos]->get_integer());
    2643           0 :             int64_t const offset(f_entries[pos]->get_offset());
    2644           0 :             std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
    2645           0 :                       << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
    2646             :                       << " "
    2647           0 :                       << f_entries[pos]->flags_to_js_operator()
    2648           0 :                       << " 0x"
    2649           0 :                       << std::hex << std::uppercase
    2650           0 :                       << (ube & 0xffff)
    2651           0 :                       << std::dec << std::nouppercase;
    2652           0 :         }
    2653             : 
    2654           0 :         void output_long(size_t pos)
    2655             :         {
    2656             :             // this is a machine byte order, I am not currently sure
    2657             :             // on how we could really get that in JavaScript; for
    2658             :             // now do a little endian since most users have x86 based
    2659             :             // processors which are in little endian
    2660             :             //
    2661           0 :             int64_t const le(f_entries[pos]->get_integer());
    2662           0 :             int64_t const offset(f_entries[pos]->get_offset());
    2663           0 :             std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
    2664           0 :                       << "] + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
    2665           0 :                       << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 2)
    2666           0 :                       << "] * 65536 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 3)
    2667             :                       << "] * 16777216 "
    2668           0 :                       << f_entries[pos]->flags_to_js_operator()
    2669           0 :                       << " 0x"
    2670           0 :                       << std::hex << std::uppercase
    2671           0 :                       << (le & 0xffffffffLL)
    2672           0 :                       << std::dec << std::nouppercase;
    2673           0 :         }
    2674             : 
    2675           4 :         void output_lelong(size_t pos)
    2676             :         {
    2677           4 :             int64_t const le(f_entries[pos]->get_integer());
    2678           4 :             int64_t const offset(f_entries[pos]->get_offset());
    2679           4 :             std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
    2680           4 :                       << "] + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
    2681           4 :                       << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 2)
    2682           4 :                       << "] * 65536 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 3)
    2683             :                       << "] * 16777216 "
    2684           8 :                       << f_entries[pos]->flags_to_js_operator()
    2685           8 :                       << " 0x"
    2686           4 :                       << std::hex << std::uppercase
    2687           4 :                       << (le & 0xffffffffLL)
    2688           4 :                       << std::dec << std::nouppercase;
    2689           4 :         }
    2690             : 
    2691           2 :         void output_belong(size_t pos)
    2692             :         {
    2693           2 :             int64_t const be(f_entries[pos]->get_integer());
    2694           2 :             int64_t const offset(f_entries[pos]->get_offset());
    2695           2 :             std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
    2696           2 :                       << "] * 16777216 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
    2697           2 :                       << "] * 65536 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 2)
    2698           2 :                       << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 3)
    2699             :                       << "] "
    2700           4 :                       << f_entries[pos]->flags_to_js_operator()
    2701           4 :                       << " 0x"
    2702           2 :                       << std::hex << std::uppercase
    2703           2 :                       << (be & 0xffffffffLL)
    2704           2 :                       << std::dec << std::nouppercase;
    2705           2 :         }
    2706             : 
    2707           0 :         void output_melong(size_t pos)
    2708             :         {
    2709           0 :             snap::NOTUSED(pos);
    2710           0 :             std::cerr << "error: type not implemented yet (melong).\n";
    2711           0 :             exit(1);
    2712             :         }
    2713             : 
    2714           0 :         void output_ulong(size_t pos)
    2715             :         {
    2716           0 :             snap::NOTUSED(pos);
    2717           0 :             std::cerr << "error: type not implemented yet (ulong).\n";
    2718           0 :             exit(1);
    2719             :         }
    2720             : 
    2721           0 :         void output_ulelong(size_t pos)
    2722             :         {
    2723           0 :             snap::NOTUSED(pos);
    2724           0 :             std::cerr << "error: type not implemented yet (ulelong).\n";
    2725           0 :             exit(1);
    2726             :         }
    2727             : 
    2728           0 :         void output_ubelong(size_t pos)
    2729             :         {
    2730           0 :             int64_t const ube(f_entries[pos]->get_integer());
    2731           0 :             int64_t const offset(f_entries[pos]->get_offset());
    2732           0 :             std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
    2733           0 :                       << "] * 16777216 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
    2734           0 :                       << "] * 65536 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 2)
    2735           0 :                       << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 3)
    2736             :                       << "] "
    2737           0 :                       << f_entries[pos]->flags_to_js_operator()
    2738           0 :                       << " 0x"
    2739           0 :                       << std::hex << std::uppercase
    2740           0 :                       << (ube & 0xffffffffLL)
    2741           0 :                       << std::dec << std::nouppercase;
    2742           0 :         }
    2743             : 
    2744           0 :         void output_umelong(size_t pos)
    2745             :         {
    2746           0 :             snap::NOTUSED(pos);
    2747           0 :             std::cerr << "error: type not implemented yet (umelong).\n";
    2748           0 :             exit(1);
    2749             :         }
    2750             : 
    2751           0 :         void output_beid3(size_t pos)
    2752             :         {
    2753           0 :             snap::NOTUSED(pos);
    2754           0 :             std::cerr << "error: type not implemented yet (beid3).\n";
    2755           0 :             exit(1);
    2756             :         }
    2757             : 
    2758           0 :         void output_leid3(size_t pos)
    2759             :         {
    2760           0 :             snap::NOTUSED(pos);
    2761           0 :             std::cerr << "error: type not implemented yet (leid3).\n";
    2762           0 :             exit(1);
    2763             :         }
    2764             : 
    2765           0 :         void output_ubeid3(size_t pos)
    2766             :         {
    2767           0 :             snap::NOTUSED(pos);
    2768           0 :             std::cerr << "error: type not implemented yet (ubeid3).\n";
    2769           0 :             exit(1);
    2770             :         }
    2771             : 
    2772           0 :         void output_uleid3(size_t pos)
    2773             :         {
    2774           0 :             snap::NOTUSED(pos);
    2775           0 :             std::cerr << "error: type not implemented yet (uleid3).\n";
    2776           0 :             exit(1);
    2777             :         }
    2778             : 
    2779           0 :         void output_quad(size_t pos)
    2780             :         {
    2781           0 :             snap::NOTUSED(pos);
    2782           0 :             std::cerr << "error: type not implemented yet (quad).\n";
    2783           0 :             exit(1);
    2784             :         }
    2785             : 
    2786           0 :         void output_bequad(size_t pos)
    2787             :         {
    2788           0 :             snap::NOTUSED(pos);
    2789           0 :             std::cerr << "error: type not implemented yet (bequad).\n";
    2790           0 :             exit(1);
    2791             :         }
    2792             : 
    2793           0 :         void output_lequad(size_t pos)
    2794             :         {
    2795           0 :             snap::NOTUSED(pos);
    2796           0 :             std::cerr << "error: type not implemented yet (lequad).\n";
    2797           0 :             exit(1);
    2798             :         }
    2799             : 
    2800           0 :         void output_uquad(size_t pos)
    2801             :         {
    2802           0 :             snap::NOTUSED(pos);
    2803           0 :             std::cerr << "error: type not implemented yet (uquad).\n";
    2804           0 :             exit(1);
    2805             :         }
    2806             : 
    2807           0 :         void output_ubequad(size_t pos)
    2808             :         {
    2809           0 :             snap::NOTUSED(pos);
    2810           0 :             std::cerr << "error: type not implemented yet (ubequad).\n";
    2811           0 :             exit(1);
    2812             :         }
    2813             : 
    2814           0 :         void output_ulequad(size_t pos)
    2815             :         {
    2816           0 :             snap::NOTUSED(pos);
    2817           0 :             std::cerr << "error: type not implemented yet (ulequad).\n";
    2818           0 :             exit(1);
    2819             :         }
    2820             : 
    2821           0 :         void output_float(size_t pos)
    2822             :         {
    2823           0 :             snap::NOTUSED(pos);
    2824           0 :             std::cerr << "error: type not implemented yet (float).\n";
    2825           0 :             exit(1);
    2826             :         }
    2827             : 
    2828           0 :         void output_befloat(size_t pos)
    2829             :         {
    2830           0 :             snap::NOTUSED(pos);
    2831           0 :             std::cerr << "error: type not implemented yet (befloat).\n";
    2832           0 :             exit(1);
    2833             :         }
    2834             : 
    2835           0 :         void output_lefloat(size_t pos)
    2836             :         {
    2837           0 :             snap::NOTUSED(pos);
    2838           0 :             std::cerr << "error: type not implemented yet (lefloat).\n";
    2839           0 :             exit(1);
    2840             :         }
    2841             : 
    2842           0 :         void output_double(size_t pos)
    2843             :         {
    2844           0 :             snap::NOTUSED(pos);
    2845           0 :             std::cerr << "error: type not implemented yet (double).\n";
    2846           0 :             exit(1);
    2847             :         }
    2848             : 
    2849           0 :         void output_bedouble(size_t pos)
    2850             :         {
    2851           0 :             snap::NOTUSED(pos);
    2852           0 :             std::cerr << "error: type not implemented yet (bedouble).\n";
    2853           0 :             exit(1);
    2854             :         }
    2855             : 
    2856           0 :         void output_ledouble(size_t pos)
    2857             :         {
    2858           0 :             snap::NOTUSED(pos);
    2859           0 :             std::cerr << "error: type not implemented yet (ledouble).\n";
    2860           0 :             exit(1);
    2861             :         }
    2862             : 
    2863           2 :         void output_string(size_t pos)
    2864             :         {
    2865           2 :             parser::entry_t::integer_t offset(f_entries[pos]->get_offset());
    2866           4 :             std::string const str(f_entries[pos]->get_string());
    2867          12 :             for(size_t i(0); i < str.length(); ++i, ++offset)
    2868             :             {
    2869             :                 std::cout << (i > 0 ? "\n&& " : "")
    2870             :                           << "buf["
    2871          10 :                           << (f_has_mime ? "" : "offset+")
    2872          10 :                           << offset
    2873             :                           << "] "
    2874          20 :                           << f_entries[pos]->flags_to_js_operator()
    2875          20 :                           << " 0x"
    2876          10 :                           << std::hex << std::uppercase
    2877          10 :                           << (static_cast<int>(str[i]) & 0xff)
    2878          10 :                           << std::dec << std::nouppercase;
    2879             :             }
    2880           2 :         }
    2881             : 
    2882           0 :         void output_pstring(size_t pos)
    2883             :         {
    2884           0 :             snap::NOTUSED(pos);
    2885           0 :             std::cerr << "error: type not implemented yet (pstring).\n";
    2886           0 :             exit(1);
    2887             :         }
    2888             : 
    2889           0 :         void output_besearch16(size_t pos)
    2890             :         {
    2891           0 :             snap::NOTUSED(pos);
    2892           0 :             std::cerr << "error: type not implemented yet (besearch16).\n";
    2893           0 :             exit(1);
    2894             :         }
    2895             : 
    2896           0 :         void output_lesearch16(size_t pos)
    2897             :         {
    2898           0 :             snap::NOTUSED(pos);
    2899           0 :             std::cerr << "error: type not implemented yet (lesearch16).\n";
    2900           0 :             exit(1);
    2901             :         }
    2902             : 
    2903           0 :         void output_search(size_t pos)
    2904             :         {
    2905           0 :             parser::entry_t::integer_t const offset(f_entries[pos]->get_offset());
    2906           0 :             std::cout << "snapwebsites.BufferToMIMESystemImages.scan(buf,"
    2907           0 :                       << offset << "," << f_entries[pos]->get_maxlength()
    2908           0 :                       << ",{";
    2909           0 :             std::string const str(f_entries[pos]->get_string());
    2910           0 :             for(size_t i(0); i < str.length(); ++i)
    2911             :             {
    2912           0 :                 std::cout << (i == 0 ? "" : ",")
    2913           0 :                           << std::hex << std::uppercase
    2914           0 :                           << "0x" << static_cast<int>(str[i])
    2915           0 :                           << std::dec << std::nouppercase;
    2916             :             }
    2917           0 :             std::cout << "});";
    2918           0 :         }
    2919             : 
    2920           0 :         void output_regex(size_t pos)
    2921             :         {
    2922           0 :             parser::entry_t::integer_t const offset(f_entries[pos]->get_offset());
    2923           0 :             std::cout << "snapwebsites.BufferToMIMESystemImages.regex(buf,"
    2924           0 :                       << offset << "," << f_entries[pos]->get_maxlength()
    2925           0 :                       << ",{";
    2926           0 :             std::string const str(f_entries[pos]->get_string());
    2927           0 :             for(size_t i(0); i < str.length(); ++i)
    2928             :             {
    2929           0 :                 std::cout << (i == 0 ? "" : ",")
    2930           0 :                           << std::hex << std::uppercase
    2931           0 :                           << "0x" << static_cast<int>(str[i])
    2932           0 :                           << std::dec << std::nouppercase;
    2933             :             }
    2934           0 :             std::cout << "},"
    2935             :                       << (
    2936           0 :                             ((f_entries[pos]->get_flags() & entry_t::ENTRY_FLAG_LINES           ) != 0 ? 1 : 0)
    2937           0 :                           | ((f_entries[pos]->get_flags() & entry_t::ENTRY_FLAG_CASE_INSENSITIVE) != 0 ? 2 : 0)
    2938           0 :                           | ((f_entries[pos]->get_flags() & entry_t::ENTRY_FLAG_START_OFFSET    ) != 0 ? 4 : 0)
    2939             :                          )
    2940           0 :                       << ");";
    2941           0 :         }
    2942             : 
    2943           0 :         void output_date(size_t pos)
    2944             :         {
    2945           0 :             snap::NOTUSED(pos);
    2946           0 :             std::cerr << "error: type not implemented yet (date).\n";
    2947           0 :             exit(1);
    2948             :         }
    2949             : 
    2950           0 :         void output_qdate(size_t pos)
    2951             :         {
    2952           0 :             snap::NOTUSED(pos);
    2953           0 :             std::cerr << "error: type not implemented yet (qdate).\n";
    2954           0 :             exit(1);
    2955             :         }
    2956             : 
    2957           0 :         void output_ldate(size_t pos)
    2958             :         {
    2959           0 :             snap::NOTUSED(pos);
    2960           0 :             std::cerr << "error: type not implemented yet (ldate).\n";
    2961           0 :             exit(1);
    2962             :         }
    2963             : 
    2964           0 :         void output_qldate(size_t pos)
    2965             :         {
    2966           0 :             snap::NOTUSED(pos);
    2967           0 :             std::cerr << "error: type not implemented yet (qldate).\n";
    2968           0 :             exit(1);
    2969             :         }
    2970             : 
    2971           0 :         void output_bedate(size_t pos)
    2972             :         {
    2973           0 :             snap::NOTUSED(pos);
    2974           0 :             std::cerr << "error: type not implemented yet (bedate).\n";
    2975           0 :             exit(1);
    2976             :         }
    2977             : 
    2978           0 :         void output_beqdate(size_t pos)
    2979             :         {
    2980           0 :             snap::NOTUSED(pos);
    2981           0 :             std::cerr << "error: type not implemented yet (beqdate).\n";
    2982           0 :             exit(1);
    2983             :         }
    2984             : 
    2985           0 :         void output_beldate(size_t pos)
    2986             :         {
    2987           0 :             snap::NOTUSED(pos);
    2988           0 :             std::cerr << "error: type not implemented yet (beldate).\n";
    2989           0 :             exit(1);
    2990             :         }
    2991             : 
    2992           0 :         void output_beqldate(size_t pos)
    2993             :         {
    2994           0 :             snap::NOTUSED(pos);
    2995           0 :             std::cerr << "error: type not implemented yet (beqldate).\n";
    2996           0 :             exit(1);
    2997             :         }
    2998             : 
    2999           0 :         void output_ledate(size_t pos)
    3000             :         {
    3001           0 :             snap::NOTUSED(pos);
    3002           0 :             std::cerr << "error: type not implemented yet (ledate).\n";
    3003           0 :             exit(1);
    3004             :         }
    3005             : 
    3006           0 :         void output_leqdate(size_t pos)
    3007             :         {
    3008           0 :             snap::NOTUSED(pos);
    3009           0 :             std::cerr << "error: type not implemented yet (leqdate).\n";
    3010           0 :             exit(1);
    3011             :         }
    3012             : 
    3013           0 :         void output_leldate(size_t pos)
    3014             :         {
    3015           0 :             snap::NOTUSED(pos);
    3016           0 :             std::cerr << "error: type not implemented yet (leldate).\n";
    3017           0 :             exit(1);
    3018             :         }
    3019             : 
    3020           0 :         void output_leqldate(size_t pos)
    3021             :         {
    3022           0 :             snap::NOTUSED(pos);
    3023           0 :             std::cerr << "error: type not implemented yet (leqldate).\n";
    3024           0 :             exit(1);
    3025             :         }
    3026             : 
    3027           0 :         void output_medate(size_t pos)
    3028             :         {
    3029           0 :             snap::NOTUSED(pos);
    3030           0 :             std::cerr << "error: type not implemented yet (medate).\n";
    3031           0 :             exit(1);
    3032             :         }
    3033             : 
    3034           0 :         void output_meldate(size_t pos)
    3035             :         {
    3036           0 :             snap::NOTUSED(pos);
    3037           0 :             std::cerr << "error: type not implemented yet (meldate).\n";
    3038           0 :             exit(1);
    3039             :         }
    3040             : 
    3041           0 :         void output_indirect(size_t pos)
    3042             :         {
    3043           0 :             snap::NOTUSED(pos);
    3044           0 :             std::cerr << "error: type not implemented yet (indirect).\n";
    3045           0 :             exit(1);
    3046             :         }
    3047             : 
    3048           0 :         void output_default(size_t pos)
    3049             :         {
    3050             :             // default is always true
    3051           0 :             snap::NOTUSED(pos);
    3052           0 :             std::cout << "true";
    3053           0 :         }
    3054             : 
    3055           0 :         void output_name(size_t pos)
    3056             :         {
    3057           0 :             snap::NOTUSED(pos);
    3058             :             // this is already done in the caller which generates the
    3059             :             // function declaration
    3060           0 :         }
    3061             : 
    3062           0 :         void output_use(size_t pos)
    3063             :         {
    3064           0 :             std::cout << "__macro_" << f_entries[pos]->get_string()
    3065           0 :                       << "("
    3066           0 :                       << f_entries[pos]->get_offset()
    3067           0 :                       << ")";
    3068           0 :         }
    3069             : 
    3070          12 :         void output_mimetype(size_t pos)
    3071             :         {
    3072          24 :             std::string const mimetype(f_entries[pos]->get_mimetype());
    3073          12 :             if(!mimetype.empty())
    3074             :             {
    3075           4 :                 std::cout << "return \"" << mimetype << "\";" << std::endl;
    3076             :             }
    3077          12 :         }
    3078             : 
    3079          12 :         void output_endif(size_t pos)
    3080             :         {
    3081          12 :             snap::NOTUSED(pos);
    3082          12 :             std::cout << "}" << std::endl;
    3083          12 :         }
    3084             : 
    3085             :         // variable members
    3086             :         entry_vector_t  f_entries = entry_vector_t();
    3087             :         bool            f_has_mime = false;
    3088             :     };
    3089           8 :     recursive_output out(has_mime);
    3090             : 
    3091             :     // first remove all entries that we are not going to use (i.e.
    3092             :     // anything at the end which does not include a MIME type)
    3093           4 :     entry_t::integer_t l(-1);
    3094           4 :     if(!has_mime)
    3095             :     {
    3096           0 :         l = f_entries[end - 1]->get_level();
    3097             :     }
    3098           4 :     size_t j(end);
    3099          36 :     while(j > start)
    3100             :     {
    3101          16 :         --j;
    3102             : 
    3103          16 :         if(f_entries[j]->get_type() != entry_t::type_t::ENTRY_TYPE_NAME)
    3104             :         {
    3105          16 :             if(f_entries[j]->get_mimetype().empty())
    3106             :             {
    3107          12 :                 if(f_entries[j]->get_level() <= l)
    3108             :                 {
    3109           8 :                     out.f_entries.insert(out.f_entries.begin(), f_entries[j]);
    3110             :                 }
    3111             :             }
    3112             :             else
    3113             :             {
    3114           4 :                 l = f_entries[j]->get_level();
    3115           4 :                 out.f_entries.insert(out.f_entries.begin(), f_entries[j]);
    3116             :             }
    3117             :         }
    3118             :     }
    3119             : 
    3120           4 :     out.output(0);
    3121           4 : }
    3122             : 
    3123             : 
    3124           1 : void parser::output_header()
    3125             : {
    3126           2 :     std::string lower_magic_name(f_magic_name);
    3127           1 :     std::transform(lower_magic_name.begin(), lower_magic_name.end(), lower_magic_name.begin(), ::tolower);
    3128             : 
    3129             :     std::cout <<
    3130             : "/** @preserve\n"
    3131             : " * WARNING: AUTO-GENERATED FILE, DO NOT EDIT. See Source: magic-to-js.cpp\n"
    3132             : " * Name: mimetype-" << lower_magic_name << "\n"
    3133             : " * Version: " << MIMETYPE_VERSION_STRING << "\n"
    3134             : " * Browsers: all\n"
    3135             : " * Copyright: Copyright (c) 2014-2019  Made to Order Software Corp.  All Rights Reserved.\n"
    3136             : " * Depends: output (0.1.5.5)\n"
    3137             : " * License: GPL 2.0\n"
    3138             : " * Source: File generated by magic-to-js from magic library definition files.\n"
    3139             : " */\n"
    3140             : "\n"
    3141             : "\n"
    3142             : "//\n"
    3143             : "// Inline \"command line\" parameters for the Google Closure Compiler\n"
    3144             : "// See output of:\n"
    3145             : "//    java -jar .../google-js-compiler/compiler.jar --help\n"
    3146             : "//\n"
    3147             : "// ==ClosureCompiler==\n"
    3148             : "// @compilation_level ADVANCED_OPTIMIZATIONS\n"
    3149             : "// @externs $CLOSURE_COMPILER/contrib/externs/jquery-1.9.js\n"
    3150             : "// @externs plugins/output/externs/jquery-extensions.js\n"
    3151             : "// ==/ClosureCompiler==\n"
    3152             : "//\n"
    3153             : "\n"
    3154             : "/*jslint nomen: true, todo: true, devel: true */\n"
    3155             : "/*global snapwebsites: false, jQuery: false, Uint8Array: true */\n"
    3156             : "\n"
    3157             : "\n"
    3158             : "\n"
    3159             : "/** \\brief Check for \"system\" images.\n"
    3160             : " *\n"
    3161             : " * This function checks for well known images. The function is generally\n"
    3162             : " * very fast because it checks only the few very well known image file\n"
    3163             : " * formats.\n"
    3164             : " *\n"
    3165             : " * @return {!snapwebsites.BufferToMIMESystemImages} A reference to this new\n"
    3166             : " *                                                  object.\n"
    3167             : " *\n"
    3168             : " * @extends {snapwebsites.BufferToMIMETemplate}\n"
    3169             : " * @constructor\n"
    3170             : " */\n"
    3171             : "snapwebsites.BufferToMIME" << f_magic_name << " = function()\n"
    3172             : "{\n"
    3173             : "    snapwebsites.BufferToMIME" << f_magic_name << ".superClass_.constructor.call(this);\n"
    3174             : "\n"
    3175             : "    return this;\n"
    3176             : "};\n"
    3177             : "\n"
    3178             : "\n"
    3179             : "/** \\brief Chain up the extension.\n"
    3180             : " *\n"
    3181             : " * This is the chain between this class and it's super.\n"
    3182             : " */\n"
    3183             : "snapwebsites.inherits(snapwebsites.BufferToMIME" << f_magic_name << ", snapwebsites.BufferToMIMETemplate);\n"
    3184             : "\n"
    3185             : "\n"
    3186             : "/** \\brief Check for the " << f_magic_name << " file formats.\n"
    3187             : " *\n"
    3188             : " * This function checks for file formats as defined in the magic library.\n"
    3189             : " * This version includes the descriptions from the following files:\n"
    3190             : " *\n"
    3191           2 : << f_lexer->list_of_filenames() <<
    3192             : " *\n"
    3193             : " * @param {!Uint8Array} buf  The array of data to check for a known magic.\n"
    3194             : " *\n"
    3195             : " * @return {!string} The MIME type or the empty string if not determined.\n"
    3196             : " *\n"
    3197             : " * @override\n"
    3198             : " */\n"
    3199           1 : "snapwebsites.BufferToMIME" << f_magic_name << ".prototype.bufferToMIME = function(buf)\n"
    3200           2 : "{\n"
    3201             : ;
    3202             : 
    3203           1 : }
    3204             : 
    3205             : 
    3206           1 : void parser::output_footer()
    3207             : {
    3208             :     // close the function we opened in the header
    3209             :     std::cout <<
    3210             : "return \"\";\n"
    3211             : "};\n"
    3212             : "\n"
    3213             : "// auto-initialize\n"
    3214             : "jQuery(document).ready(\n"
    3215             : "    function()\n"
    3216             : "    {\n"
    3217           1 : "        snapwebsites.OutputInstance.registerBufferToMIME(new snapwebsites.BufferToMIME" << f_magic_name << "());\n"
    3218             : "    }\n"
    3219           1 : ");\n"
    3220             : ;
    3221             : 
    3222           1 : }
    3223             : 
    3224             : 
    3225             : 
    3226           0 : int usage()
    3227             : {
    3228           0 :     std::cout << "Usage: magic-to-js <input files> ..." << std::endl;
    3229           0 :     std::cout << "You may also want to redirect the output to a .js file" << std::endl;
    3230           0 :     std::cout << "  --debug | -d    print out debug information in stderr" << std::endl;
    3231           0 :     std::cout << "  --help | -h     print out this help screen" << std::endl;
    3232           0 :     std::cout << "  --lib-version   print out this tool's version" << std::endl;
    3233           0 :     std::cout << "  --name | -n     specify the name of the magic MIME to output" << std::endl;
    3234           0 :     std::cout << "  --version       print out this tool's version" << std::endl;
    3235           0 :     exit(1);
    3236             : }
    3237             : 
    3238             : 
    3239           1 : int main(int argc, char *argv[])
    3240             : {
    3241             :     try
    3242             :     {
    3243           2 :         lexer::filenames_t fn;
    3244           2 :         std::string magic_name;
    3245             : 
    3246           4 :         for(int i(1); i < argc; ++i)
    3247             :         {
    3248           3 :             if(strcmp(argv[i], "-h") == 0
    3249           3 :             || strcmp(argv[i], "--help") == 0)
    3250             :             {
    3251           0 :                 usage();
    3252           0 :                 snap::NOTREACHED();
    3253             :             }
    3254           3 :             if(strcmp(argv[i], "--version") == 0)
    3255             :             {
    3256           0 :                 std::cout << MIMETYPE_VERSION_STRING << std::endl;
    3257           0 :                 exit(1);
    3258             :                 snap::NOTREACHED();
    3259             :             }
    3260           3 :             if(strcmp(argv[i], "--lib-version") == 0)
    3261             :             {
    3262           0 :                 std::cout << SNAPWEBSITES_VERSION_MAJOR << "." << SNAPWEBSITES_VERSION_MINOR << "." << SNAPWEBSITES_VERSION_PATCH << std::endl;
    3263           0 :                 exit(1);
    3264             :                 snap::NOTREACHED();
    3265             :             }
    3266           3 :             if(strcmp(argv[i], "-d") == 0
    3267           3 :             || strcmp(argv[i], "--debug") == 0)
    3268             :             {
    3269           0 :                 std::cerr << "info: turning debug ON\n";
    3270           0 :                 g_debug = true;
    3271             :             }
    3272           3 :             else if(strcmp(argv[i], "-n") == 0
    3273           3 :                  || strcmp(argv[i], "--name") == 0)
    3274             :             {
    3275           1 :                 ++i;
    3276           1 :                 if(i >= argc)
    3277             :                 {
    3278           0 :                     std::cerr << "error: -n/--name expect to be followed by one argument, the magic name." << std::endl;
    3279           0 :                     exit(1);
    3280             :                 }
    3281           1 :                 magic_name = argv[i];
    3282             :             }
    3283             :             else
    3284             :             {
    3285           2 :                 fn.push_back(argv[i]);
    3286             :             }
    3287             :         }
    3288             : 
    3289           1 :         if(fn.empty())
    3290             :         {
    3291           0 :             std::cerr << "error: expected at least one filename on the command line. Try --help for more info." << std::endl;
    3292           0 :             exit(1);
    3293             :         }
    3294             : 
    3295           1 :         if(magic_name.empty())
    3296             :         {
    3297           0 :             std::cerr << "error: a magic name must be specified (--name option)" << std::endl;
    3298           0 :             exit(1);
    3299             :         }
    3300             : 
    3301           2 :         lexer::pointer_t l(new lexer(fn));
    3302           2 :         parser::pointer_t p(new parser(l, magic_name));
    3303           1 :         p->parse();
    3304             : 
    3305             :         // it worked, the parser has now a pile of parsed lines we can
    3306             :         // convert in JavaScript
    3307           1 :         p->output();
    3308             : 
    3309           1 :         return 0;
    3310             :     }
    3311           0 :     catch(std::exception const & e)
    3312             :     {
    3313           0 :         std::cerr << "magic-to-js: exception: " << e.what() << std::endl;
    3314           0 :         return 1;
    3315             :     }
    3316           3 : }
    3317             : 
    3318             : 
    3319             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13