LCOV - code coverage report
Current view: top level - snapwebsites - snap_parser.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 54 65 83.1 %
Date: 2019-12-15 17:13:15 Functions: 52 66 78.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Snap Websites Server -- advanced parser
       2             : // Copyright (c) 2011-2019  Made to Order Software Corp.  All Rights Reserved
       3             : //
       4             : // This program is free software; you can redistribute it and/or modify
       5             : // it under the terms of the GNU General Public License as published by
       6             : // the Free Software Foundation; either version 2 of the License, or
       7             : // (at your option) any later version.
       8             : //
       9             : // This program is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             : // GNU General Public License for more details.
      13             : //
      14             : // You should have received a copy of the GNU General Public License
      15             : // along with this program; if not, write to the Free Software
      16             : // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
      17             : #pragma once
      18             : 
      19             : #include "snapwebsites/snap_exception.h"
      20             : 
      21             : #include <QVariant>
      22             : #include <QVector>
      23             : #include <QSharedPointer>
      24             : 
      25             : namespace snap
      26             : {
      27             : namespace parser
      28             : {
      29             : 
      30           0 : class snap_parser_exception : public snap_exception
      31             : {
      32             : public:
      33             :     snap_parser_exception(char const *        what_msg) : snap_exception("parser", what_msg) {}
      34             :     snap_parser_exception(std::string const & what_msg) : snap_exception("parser", what_msg) {}
      35           0 :     snap_parser_exception(QString const &     what_msg) : snap_exception("parser", what_msg) {}
      36             : };
      37             : 
      38             : class snap_parser_no_current_choices : public snap_parser_exception
      39             : {
      40             : public:
      41             :     snap_parser_no_current_choices(char const *        what_msg) : snap_parser_exception(what_msg) {}
      42             :     snap_parser_no_current_choices(std::string const & what_msg) : snap_parser_exception(what_msg) {}
      43             :     snap_parser_no_current_choices(QString const &     what_msg) : snap_parser_exception(what_msg) {}
      44             : };
      45             : 
      46             : class snap_parser_state_has_children : public snap_parser_exception
      47             : {
      48             : public:
      49             :     snap_parser_state_has_children(char const *        what_msg) : snap_parser_exception(what_msg) {}
      50             :     snap_parser_state_has_children(std::string const & what_msg) : snap_parser_exception(what_msg) {}
      51             :     snap_parser_state_has_children(QString const &     what_msg) : snap_parser_exception(what_msg) {}
      52             : };
      53             : 
      54           0 : class snap_parser_unexpected_token : public snap_parser_exception
      55             : {
      56             : public:
      57             :     snap_parser_unexpected_token(char const *        what_msg) : snap_parser_exception(what_msg) {}
      58             :     snap_parser_unexpected_token(std::string const & what_msg) : snap_parser_exception(what_msg) {}
      59           0 :     snap_parser_unexpected_token(QString const &     what_msg) : snap_parser_exception(what_msg) {}
      60             : };
      61             : 
      62             : 
      63             : 
      64             : 
      65             : 
      66             : enum class token_t
      67             : {
      68             :     TOKEN_ID_NONE_ENUM = 0,     // "not a token" (also end of input)
      69             : 
      70             :     TOKEN_ID_INTEGER_ENUM,
      71             :     TOKEN_ID_FLOAT_ENUM,
      72             :     TOKEN_ID_IDENTIFIER_ENUM,
      73             :     TOKEN_ID_KEYWORD_ENUM,
      74             :     TOKEN_ID_STRING_ENUM,
      75             :     TOKEN_ID_LITERAL_ENUM,      // literal character(s)
      76             : 
      77             :     TOKEN_ID_EMPTY_ENUM,        // special empty token
      78             :     TOKEN_ID_CHOICES_ENUM,      // pointer to a choices object
      79             :     TOKEN_ID_RULES_ENUM,        // pointer to a choices object (see rules operator |() )
      80             :     TOKEN_ID_NODE_ENUM,         // pointer to a node object
      81             :     TOKEN_ID_ERROR_ENUM         // an error occured
      82             : };
      83             : 
      84          28 : struct token_id { token_id(token_t t) : f_type(t) {} operator token_t () const { return f_type; } private: token_t f_type; };
      85           2 : struct token_id_none_def       : public token_id { token_id_none_def()       : token_id(token_t::TOKEN_ID_NONE_ENUM      ) {} };
      86           2 : struct token_id_integer_def    : public token_id { token_id_integer_def()    : token_id(token_t::TOKEN_ID_INTEGER_ENUM   ) {} };
      87           2 : struct token_id_float_def      : public token_id { token_id_float_def()      : token_id(token_t::TOKEN_ID_FLOAT_ENUM     ) {} };
      88           2 : struct token_id_identifier_def : public token_id { token_id_identifier_def() : token_id(token_t::TOKEN_ID_IDENTIFIER_ENUM) {} };
      89           2 : struct token_id_keyword_def    : public token_id { token_id_keyword_def()    : token_id(token_t::TOKEN_ID_KEYWORD_ENUM   ) {} };
      90           2 : struct token_id_string_def     : public token_id { token_id_string_def()     : token_id(token_t::TOKEN_ID_STRING_ENUM    ) {} };
      91           2 : struct token_id_literal_def    : public token_id { token_id_literal_def()    : token_id(token_t::TOKEN_ID_LITERAL_ENUM   ) {} };
      92           2 : struct token_id_empty_def      : public token_id { token_id_empty_def()      : token_id(token_t::TOKEN_ID_EMPTY_ENUM     ) {} };
      93             : 
      94             : extern token_id_none_def        TOKEN_ID_NONE;
      95             : extern token_id_integer_def     TOKEN_ID_INTEGER;
      96             : extern token_id_float_def       TOKEN_ID_FLOAT;
      97             : extern token_id_identifier_def  TOKEN_ID_IDENTIFIER;
      98             : extern token_id_keyword_def     TOKEN_ID_KEYWORD;
      99             : extern token_id_string_def      TOKEN_ID_STRING;
     100             : extern token_id_literal_def     TOKEN_ID_LITERAL;
     101             : extern token_id_empty_def       TOKEN_ID_EMPTY;
     102             : 
     103             : 
     104             : 
     105             : 
     106             : class token
     107             : {
     108             : public:
     109         554 :                     token(token_t id = TOKEN_ID_NONE) : f_id(id) {}
     110           9 :                     token(token const & t) : f_id(t.f_id), f_value(t.f_value) {}
     111             :                     token & operator = (token const & t)
     112             :                     {
     113             :                         if(this != &t)
     114             :                         {
     115             :                             f_id = t.f_id;
     116             :                             f_value = t.f_value;
     117             :                         }
     118             :                         return *this;
     119             :                     }
     120             : 
     121             :     // polymorphic type so user data works as expected
     122         511 :     virtual         ~token() {}
     123             : 
     124           3 :     void            set_id(token_t id) { f_id = id; }
     125         540 :     token_t         get_id() const { return f_id; }
     126             : 
     127           3 :     void            set_value(QVariant const & value) { f_value = value; }
     128          29 :     QVariant        get_value() const { return f_value; }
     129             : 
     130             :     QString         to_string() const;
     131             : 
     132             : private:
     133             :     token_t         f_id = token_t::TOKEN_ID_NONE_ENUM;
     134             :     QVariant        f_value = QVariant();
     135             : };
     136             : typedef QVector<QSharedPointer<token> >    vector_token_t;
     137             : 
     138             : class keyword;
     139             : 
     140           1 : class lexer
     141             : {
     142             : public:
     143             :     enum class lexer_error_t
     144             :     {
     145             :         LEXER_ERROR_NONE,
     146             : 
     147             :         LEXER_ERROR_INVALID_STRING,
     148             :         LEXER_ERROR_INVALID_C_COMMENT,
     149             :         LEXER_ERROR_INVALID_NUMBER,
     150             : 
     151             :         LEXER_ERROR_max
     152             :     };
     153             : 
     154           1 :                     lexer() { f_pos = f_input.begin(); }
     155             :                     lexer(lexer const & rhs) = delete;
     156             :     lexer &         operator = (lexer const & rhs) = delete;
     157             :     bool            eoi() const { return f_pos == f_input.end(); }
     158           4 :     uint32_t        line() const { return f_line; }
     159             :     void            set_input(QString const & input);
     160             :     void            add_keyword(keyword & k);
     161             :     token           next_token();
     162           0 :     lexer_error_t   get_error_code() const { return f_error_code; }
     163           0 :     QString         get_error_message() const { return f_error_message; }
     164           0 :     uint32_t        get_error_line() const { return f_error_line; }
     165             : 
     166             : private:
     167             :     // list of keywords / identifiers
     168             :     typedef QMap<QString, int>      keywords_map_t;
     169             : 
     170             :     QString                         f_input = QString();
     171             :     QString::const_iterator         f_pos = QString::const_iterator();
     172             :     uint32_t                        f_line = 0;
     173             :     keywords_map_t                  f_keywords = keywords_map_t();
     174             :     lexer_error_t                   f_error_code = lexer_error_t::LEXER_ERROR_NONE;
     175             :     QString                         f_error_message = QString();
     176             :     uint32_t                        f_error_line = 0;
     177             : };
     178             : 
     179             : 
     180         421 : class keyword
     181             : {
     182             : public:
     183         133 :                     keyword() {}
     184             :                     keyword(lexer & parent, QString const & keyword_identifier, int index_number = 0);
     185             : 
     186           2 :     QString         identifier() const { return f_identifier; }
     187           2 :     int             number() const { return f_number; }
     188             : 
     189             : private:
     190             :     static int      g_next_number;
     191             : 
     192             :     int             f_number = 0;
     193             :     QString         f_identifier = QString();
     194             : };
     195             : 
     196             : class choices;
     197             : class token_node;
     198             : 
     199             : // TODO: remove these once we only have shared & weak pointers
     200             : //
     201             : #pragma GCC diagnostic push
     202             : #pragma GCC diagnostic ignored "-Weffc++"
     203          55 : class rule
     204             : {
     205             : public:
     206             :     typedef void (*reducer_t)(rule const & r, QSharedPointer<token_node> & t);
     207             : 
     208          71 :                     rule() : f_parent(nullptr), f_reducer(nullptr) {}
     209             :                     rule(choices& c);
     210             :                     rule(rule const & r);
     211             : 
     212             :     void            add_rules(choices& c); // choices of rules
     213             :     void            add_choices(choices& c); // sub-rule
     214             :     void            add_token(token_t token); // any value accepted
     215             :     void            add_literal(QString const& value);
     216             :     void            add_keyword(keyword const& k);
     217          53 :     void            set_reducer(reducer_t reducer) { f_reducer = reducer; }
     218         124 :     int             count() const { return f_tokens.count(); }
     219             : 
     220             :     class rule_ref
     221             :     {
     222             :     public:
     223         579 :                         rule_ref(rule const * r, int position)
     224         579 :                             : f_rule(r), f_position(position)
     225             :                         {
     226         579 :                         }
     227             :                         rule_ref(rule_ref const & ref)
     228             :                             : f_rule(ref.f_rule), f_position(ref.f_position)
     229             :                         {
     230             :                         }
     231             : 
     232         462 :         token           get_token() const { return f_rule->f_tokens[f_position].f_token; }
     233          27 :         QString         get_value() const { return f_rule->f_tokens[f_position].f_value; }
     234           0 :         keyword         get_keyword() const { return f_rule->f_tokens[f_position].f_keyword; }
     235         194 :         choices&        get_choices() const { return *f_rule->f_tokens[f_position].f_choices; }
     236             : 
     237             :     private:
     238             :         rule const *    f_rule = nullptr;
     239             :         int             f_position = 0;
     240             :     };
     241             : 
     242         579 :     rule_ref const  operator [] (int position) const
     243             :                     {
     244         579 :                         return rule_ref(this, position);
     245             :                     }
     246             : 
     247          34 :     void            reduce(QSharedPointer<token_node> & n) const
     248             :                     {
     249          34 :                         if(f_reducer != nullptr)
     250             :                         {
     251          30 :                             f_reducer(*this, n);
     252             :                         }
     253          34 :                     }
     254             : 
     255             :     rule&           operator >> (token_id const & token);
     256             :     rule&           operator >> (QString const & literal);
     257             :     rule&           operator >> (char const * literal);
     258             :     rule&           operator >> (keyword const & k);
     259             :     rule&           operator >> (choices & c);
     260             :     rule&           operator >= (rule::reducer_t function);
     261             : 
     262             :     QString         to_string() const;
     263             : 
     264             : private:
     265         210 :     struct rule_data_t
     266             :     {
     267             :         rule_data_t();
     268             :         rule_data_t(rule_data_t const & s);
     269             :         rule_data_t(choices & c);
     270             :         rule_data_t(token_t token);
     271             :         rule_data_t(QString const & value); // i.e. literal
     272             :         rule_data_t(keyword const & k);
     273             : 
     274             :         token_t             f_token = token_t::TOKEN_ID_NONE_ENUM;
     275             :         QString             f_value = QString();        // required value if not empty
     276             :         keyword             f_keyword = keyword();      // the keyword
     277             :         choices *           f_choices = nullptr;        // sub-rule if not null & token TOKEN_ID_CHOICES_ENUM
     278             :     };
     279             : 
     280             :     choices *               f_parent = nullptr;
     281             :     QVector<rule_data_t>    f_tokens = QVector<rule_data_t>();
     282             :     reducer_t               f_reducer = reducer_t();
     283             : };
     284             : #pragma GCC diagnostic pop
     285             : 
     286             : // these have to be defined as friends of the class to enable
     287             : // all possible cases
     288             : rule & operator >> (token_id const & token_left, token_id const & token_right);
     289             : rule & operator >> (token_id const & token, QString const & literal);
     290             : rule & operator >> (token_id const & token, char const * literal);
     291             : rule & operator >> (token_id const & token, keyword const & k);
     292             : rule & operator >> (token_id const & token, choices & c);
     293             : rule & operator >> (QString const & literal, token_id const & token);
     294             : rule & operator >> (QString const & literal_left, QString const & literal_right);
     295             : rule & operator >> (QString const & literal, keyword const & k);
     296             : rule & operator >> (QString const & literal, choices & c);
     297             : rule & operator >> (keyword const & k, token_id const & token);
     298             : rule & operator >> (keyword const & k, QString const & literal);
     299             : rule & operator >> (keyword const & k_left, keyword const & k_right);
     300             : rule & operator >> (keyword const & k, choices & c);
     301             : rule & operator >> (choices & c, token_id const & token);
     302             : rule & operator >> (choices & c, QString const & literal);
     303             : rule & operator >> (choices & c, keyword const & k);
     304             : rule & operator >> (choices & c_left, choices & c_right);
     305             : rule & operator >> (char const * literal, choices & c);
     306             : 
     307             : // now a way to add a reducer function
     308             : rule & operator >= (token_id const & token, rule::reducer_t function);
     309             : rule & operator >= (QString const & literal, rule::reducer_t function);
     310             : rule & operator >= (keyword const & k, rule::reducer_t function);
     311             : rule & operator >= (choices & c, rule::reducer_t function);
     312             : 
     313             : rule & operator | (token_id const & token, rule & r_right);
     314             : rule & operator | (rule & r_left, token_id const & token);
     315             : rule & operator | (rule & r_left, keyword const & k);
     316             : rule & operator | (rule & r_left, rule & r_right);
     317             : rule & operator | (rule & r, choices & c);
     318             : // rule & operator | (choices & c, rule & r); -- defined in choices class
     319             : 
     320             : class grammar;
     321             : 
     322             : class choices
     323             : {
     324             : public:
     325             :                         choices(grammar * parent, char const * choice_name = "");
     326             :                         ~choices();
     327             : 
     328           0 :     QString const &     name() const { return f_name; }
     329          57 :     int                 count() { return f_rules.count(); }
     330             :     void                clear();
     331             : 
     332             :     choices &           operator = (const choices & rhs);
     333             : 
     334             :     choices &           operator >>= (token_id const & token);
     335             :     choices &           operator >>= (QString const & literal);
     336             :     choices &           operator >>= (keyword const & k);
     337             :     choices &           operator >>= (choices & rhs);
     338             :     choices &           operator >>= (rule & rhs);
     339             : 
     340             :     rule &              operator | (rule & r);
     341             : 
     342             :     void                add_rule(rule & r);
     343         566 :     rule const &        operator [] (int rule) const
     344             :                         {
     345         566 :                             return *f_rules[rule];
     346             :                         }
     347             : 
     348             :     // for debug purposes
     349             :     QString             to_string() const;
     350             : 
     351             : private:
     352             :     QString             f_name = QString();
     353             :     QVector<rule *>     f_rules = QVector<rule *>();
     354             : };
     355             : typedef QVector<choices *>            choices_array_t;
     356             : 
     357             : 
     358             : // base class that parsers derive from to create user data to be
     359             : // saved in token_node objects (see below)
     360             : // must always be used with QSharedPointer<>
     361           3 : class parser_user_data
     362             : {
     363             : public:
     364           0 :     virtual             ~parser_user_data() {}
     365             : 
     366             : private:
     367             : };
     368             : 
     369             : 
     370             : // token holder that can be saved in a tree like manner via the QObject
     371             : // child/parent functionality
     372          79 : class token_node : public token
     373             : {
     374             : // Q_OBJECT is not used because we don't have signals, slots or properties
     375             : public:
     376          88 :                                 token_node() : token(token_t::TOKEN_ID_NODE_ENUM) {}
     377             : 
     378           8 :     void                        add_token(token & t) { f_tokens.push_back(QSharedPointer<token>(new token(t))); }
     379          82 :     void                        add_node(QSharedPointer<token_node> & n) { f_tokens.push_back(n); }
     380             :     vector_token_t &            tokens() { return f_tokens; }
     381           0 :     size_t                      size() const { return f_tokens.size(); }
     382          32 :     QSharedPointer<token>       operator [] (int index) { return f_tokens[index]; }
     383             :     QSharedPointer<token> const operator [] (int index) const { return f_tokens[index]; }
     384          88 :     void                        set_line(uint32_t line) { f_line = line; }
     385             :     uint32_t                    get_line() const { return f_line; }
     386             : 
     387          30 :     void                        set_user_data(QSharedPointer<parser_user_data> data) { f_user_data = data; }
     388          30 :     QSharedPointer<parser_user_data> get_user_data() const { return f_user_data; }
     389             : 
     390             : private:
     391             :     int32_t                             f_line = 0;
     392             :     vector_token_t                      f_tokens = vector_token_t();
     393             :     QSharedPointer<parser_user_data>    f_user_data = QSharedPointer<parser_user_data>();
     394             : };
     395             : 
     396           1 : class grammar
     397             : {
     398             : public:
     399             :                                 grammar();
     400             : 
     401             :     void                        add_choices(choices & c);
     402             : 
     403             :     bool                        parse(lexer & input, choices & start);
     404           1 :     QSharedPointer<token_node>  get_result() const { return f_result; }
     405             : 
     406             : private:
     407             :     choices_array_t             f_choices = choices_array_t();
     408             :     QSharedPointer<token_node>  f_result = QSharedPointer<token_node>();
     409             : };
     410             : 
     411             : 
     412             : 
     413             : } // namespace parser
     414             : } // namespace snap
     415             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13