LCOV - code coverage report
Current view: top level - snapwebsites - snap_parser.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 359 1082 33.2 %
Date: 2019-12-15 17:13:15 Functions: 51 83 61.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Snap Websites Server -- advanced parser
       2             : // Copyright (c) 2011-2019  Made to Order Software Corp.  All Rights Reserved
       3             : //
       4             : // This program is free software; you can redistribute it and/or modify
       5             : // it under the terms of the GNU General Public License as published by
       6             : // the Free Software Foundation; either version 2 of the License, or
       7             : // (at your option) any later version.
       8             : //
       9             : // This program is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             : // GNU General Public License for more details.
      13             : //
      14             : // You should have received a copy of the GNU General Public License
      15             : // along with this program; if not, write to the Free Software
      16             : // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
      17             : 
      18             : 
      19             : // self
      20             : //
      21             : #include "snapwebsites/snap_parser.h"
      22             : 
      23             : 
      24             : // snapwebsites lib
      25             : //
      26             : #include "snapwebsites/log.h"
      27             : #include "snapwebsites/qstring_stream.h"
      28             : 
      29             : 
      30             : // Qt lib
      31             : //
      32             : #include <QList>
      33             : #include <QPointer>
      34             : 
      35             : 
      36             : // C++ lib
      37             : //
      38             : #include <iostream>
      39             : 
      40             : 
      41             : // last include
      42             : //
      43             : #include <snapdev/poison.h>
      44             : 
      45             : 
      46             : 
      47             : 
      48             : namespace snap
      49             : {
      50             : namespace parser
      51             : {
      52             : 
      53           2 : token_id_none_def TOKEN_ID_NONE;
      54           2 : token_id_integer_def TOKEN_ID_INTEGER;
      55           2 : token_id_float_def TOKEN_ID_FLOAT;
      56           2 : token_id_identifier_def TOKEN_ID_IDENTIFIER;
      57           2 : token_id_keyword_def TOKEN_ID_KEYWORD;
      58           2 : token_id_string_def TOKEN_ID_STRING;
      59           2 : token_id_literal_def TOKEN_ID_LITERAL;
      60           2 : token_id_empty_def TOKEN_ID_EMPTY;
      61             : 
      62             : 
      63             : 
      64           0 : QString token::to_string() const
      65             : {
      66           0 :     QString result;
      67             : 
      68           0 :     switch(f_id)
      69             :     {
      70           0 :     case token_t::TOKEN_ID_NONE_ENUM:
      71           0 :         result = "<no token>";
      72           0 :         break;
      73             : 
      74           0 :     case token_t::TOKEN_ID_INTEGER_ENUM:
      75           0 :         result = QString("int<%1>").arg(f_value.toInt());
      76           0 :         break;
      77             : 
      78           0 :     case token_t::TOKEN_ID_FLOAT_ENUM:
      79           0 :         result = QString("float<%1>").arg(f_value.toDouble());
      80           0 :         break;
      81             : 
      82           0 :     case token_t::TOKEN_ID_IDENTIFIER_ENUM:
      83           0 :         result = QString("identifier<%1>").arg(f_value.toString());
      84           0 :         break;
      85             : 
      86           0 :     case token_t::TOKEN_ID_KEYWORD_ENUM:
      87           0 :         result = QString("keyword<%1>").arg(f_value.toString());
      88           0 :         break;
      89             : 
      90           0 :     case token_t::TOKEN_ID_STRING_ENUM:
      91           0 :         result = QString("string<%1>").arg(f_value.toString());
      92           0 :         break;
      93             : 
      94           0 :     case token_t::TOKEN_ID_LITERAL_ENUM:
      95           0 :         result = QString("literal<%1>").arg(f_value.toString());
      96           0 :         break;
      97             : 
      98           0 :     case token_t::TOKEN_ID_EMPTY_ENUM:
      99           0 :         result = "empty<>";
     100           0 :         break;
     101             : 
     102           0 :     case token_t::TOKEN_ID_CHOICES_ENUM:
     103           0 :         result = QString("choices<...>");//.arg(f_value.toString());
     104           0 :         break;
     105             : 
     106           0 :     case token_t::TOKEN_ID_RULES_ENUM:
     107           0 :         result += " /* INVALID -- TOKEN_ID_RULES!!! */ ";
     108           0 :         break;
     109             : 
     110           0 :     case token_t::TOKEN_ID_NODE_ENUM:
     111           0 :         result += " /* INVALID -- TOKEN_ID_RULES!!! */ ";
     112           0 :         break;
     113             : 
     114           0 :     case token_t::TOKEN_ID_ERROR_ENUM:
     115           0 :         result += " /* INVALID -- TOKEN_ID_ERROR!!! */ ";
     116           0 :         break;
     117             : 
     118           0 :     default:
     119           0 :         result += " /* INVALID -- unknown token identifier!!! */ ";
     120           0 :         break;
     121             : 
     122             :     }
     123             : 
     124           0 :     return result;
     125             : }
     126             : 
     127             : 
     128             : /** \brief Set the input string for the lexer.
     129             :  *
     130             :  * This lexer accepts a standard QString as input. It will be what gets parsed.
     131             :  *
     132             :  * The input is never modified. It is parsed using the next_token() function.
     133             :  *
     134             :  * By default, the input is an empty string.
     135             :  *
     136             :  * \param[in] input  The input string to be parsed by this lexer.
     137             :  */
     138           1 : void lexer::set_input(const QString& input)
     139             : {
     140           1 :     f_input = input;
     141           1 :     f_pos = f_input.begin();
     142           1 :     f_line = 1;
     143           1 : }
     144             : 
     145             : /** \brief Read the next token.
     146             :  *
     147             :  * At this time we support the follow tokens:
     148             :  *
     149             :  * \li TOKEN_ID_NONE_ENUM -- the end of the input was reached
     150             :  *
     151             :  * \li TOKEN_ID_INTEGER_ENUM -- an integer ([0-9]+) number; always positive since
     152             :  *                the parser returns '-' as a separate literal
     153             :  *
     154             :  * \li TOKEN_ID_FLOAT_ENUM -- a floating point number with optinal exponent
     155             :  *                ([0-9]+\.[0-9]+([eE][+-]?[0-9]+)?); always positive since
     156             :  *                the parser returns '-' as a separate literal
     157             :  *
     158             :  * \li TOKEN_ID_IDENTIFIER_ENUM -- supports C like identifiers ([a-z_][a-z0-9_]*)
     159             :  *
     160             :  * \li TOKEN_ID_KEYWORD_ENUM -- an identifier that matches one of our keywords
     161             :  *                as defined in the keyword map
     162             :  *
     163             :  * \li TOKEN_ID_STRING_ENUM -- a string delimited by double quotes ("); support
     164             :  *                backslashes; returns the content of the string
     165             :  *                (the quotes are removed)
     166             :  *
     167             :  * \li TOKEN_ID_LITERAL_ENUM -- anything else except what gets removed (spaces,
     168             :  *                new lines, C or C++ like comments)
     169             :  *
     170             :  * \li TOKEN_ID_ERROR_ENUM -- an error occured, you can get the error message for
     171             :  *                more information
     172             :  *
     173             :  * The TOKEN_ID_LITERAL_ENUM may either return a character ('=' operator) or a
     174             :  * string ("/=" operator). The special literals are defined here:
     175             :  *
     176             :  * \li ++ - increment
     177             :  * \li += - add & assign
     178             :  * \li -- - decrement
     179             :  * \li -= - subtract & assign
     180             :  * \li *= - multiply & assign
     181             :  * \li ** - power
     182             :  * \li **= - power & assign
     183             :  * \li /= - divide & assign
     184             :  * \li %= - divide & assign
     185             :  * \li ~= - bitwise not & assign
     186             :  * \li &= - bitwise and & assign
     187             :  * \li && - logical and
     188             :  * \li &&= - logical and & assign
     189             :  * \li |= - bitwise or & assign
     190             :  * \li || - logical or
     191             :  * \li ||= - logical or & assign
     192             :  * \li ^= - bitwise xor & assign
     193             :  * \li ^^ - logical xor
     194             :  * \li ^^= - logical xor & assign
     195             :  * \li != - not equal
     196             :  * \li !== - exactly not equal
     197             :  * \li !< - rotate left
     198             :  * \li !> - rotate left
     199             :  * \li ?= - assign default if undefined
     200             :  * \li == - equal
     201             :  * \li === - exactly equal
     202             :  * \li <= - smaller or equal
     203             :  * \li << - shift left
     204             :  * \li <<= - shift left and assign
     205             :  * \li <? - minimum
     206             :  * \li <?= - minimum and assign
     207             :  * \li >= - larger or equal
     208             :  * \li >> - shift right
     209             :  * \li >>> - unsigned shift right
     210             :  * \li >>= - shift right and assign
     211             :  * \li >>>= - unsigned shift right and assign
     212             :  * \li >? - maximum
     213             :  * \li >?= - maximum and assign
     214             :  * \li := - required assignment
     215             :  * \li :: - namespace
     216             :  *
     217             :  * If the returned token says TOKEN_ID_NONE_ENUM then you reached the
     218             :  * end of the input. When it says TOKEN_ID_ERROR_ENUM, then the input
     219             :  * is invalid and the error message and line number can be retrieved
     220             :  * to inform the user.
     221             :  *
     222             :  * The parser supports any type of new lines (Unix, Windows and Mac.)
     223             :  *
     224             :  * \todo
     225             :  * Check for overflow on integers and doubles
     226             :  *
     227             :  * \todo
     228             :  * Should we include default keywords? (i.e. true, false, if, else,
     229             :  * etc.) so those cannot be used as identifiers in some places?
     230             :  *
     231             :  * \return The read token.
     232             :  */
     233           4 : token lexer::next_token()
     234             : {
     235           0 :     auto xdigit = [](int c)
     236             :     {
     237           0 :         if(c >= '0' && c <= '9')
     238             :         {
     239           0 :             return c - '0';
     240             :         }
     241           0 :         else if(c >= 'a' && c <= 'f')
     242             :         {
     243           0 :             return c - 'a' + 10;
     244             :         }
     245           0 :         else if(c >= 'A' && c <= 'F')
     246             :         {
     247           0 :             return c - 'A' + 10;
     248             :         }
     249           0 :         return -1;
     250             :     };
     251           4 :     token        result;
     252             : 
     253             : // restart is called whenever we find a comment or
     254             : // some other entry that just gets "deleted" from the input
     255             : // (i.e. new line, space...)
     256             : //
     257             : // Note: I don't use a do ... while(repeat); because in some cases
     258             : // we are inside several levels of switch() for() while() loops.
     259           6 : restart:
     260             : 
     261             :     // we reached the end of input
     262           6 :     if(f_pos == f_input.end())
     263             :     {
     264           1 :         return result;
     265             :     }
     266             : 
     267           5 :     switch(f_pos->unicode())
     268             :     {
     269           0 :     case '\n':
     270           0 :         ++f_pos;
     271           0 :         ++f_line;
     272           0 :         goto restart;
     273             : 
     274           0 :     case '\r':
     275           0 :         ++f_pos;
     276           0 :         ++f_line;
     277           0 :         if(f_pos != f_input.end() && *f_pos == '\n')
     278             :         {
     279             :             // skip "\r\n" as one end of line
     280           0 :             ++f_pos;
     281             :         }
     282           0 :         goto restart;
     283             : 
     284           2 :     case ' ':
     285             :     case '\t':
     286           2 :         ++f_pos;
     287           2 :         goto restart;
     288             : 
     289           0 :     case '+':
     290           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     291           0 :         result.set_value(*f_pos);
     292           0 :         ++f_pos;
     293           0 :         if(f_pos != f_input.end())
     294             :         {
     295           0 :             switch(f_pos->unicode())
     296             :             {
     297           0 :             case '=': // add and assign
     298           0 :                 result.set_value("+=");
     299           0 :                 ++f_pos;
     300           0 :                 break;
     301             : 
     302           0 :             case '+': // increment
     303           0 :                 result.set_value("++");
     304           0 :                 ++f_pos;
     305           0 :                 break;
     306             : 
     307           0 :             default:
     308             :                 // ignore other characters
     309           0 :                 break;
     310             : 
     311             :             }
     312             :         }
     313           0 :         break;
     314             : 
     315           0 :     case '-':
     316           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     317           0 :         result.set_value(*f_pos);
     318           0 :         ++f_pos;
     319           0 :         if(f_pos != f_input.end())
     320             :         {
     321           0 :             switch(f_pos->unicode())
     322             :             {
     323           0 :             case '=': // subtract and assign
     324           0 :                 result.set_value("-=");
     325           0 :                 ++f_pos;
     326           0 :                 break;
     327             : 
     328           0 :             case '-': // decrement
     329           0 :                 result.set_value("--");
     330           0 :                 ++f_pos;
     331           0 :                 break;
     332             : 
     333           0 :             default:
     334             :                 // ignore other characters
     335           0 :                 break;
     336             : 
     337             :             }
     338             :         }
     339           0 :         break;
     340             : 
     341           0 :     case '*':
     342           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     343           0 :         result.set_value(*f_pos);
     344           0 :         ++f_pos;
     345           0 :         if(f_pos != f_input.end())
     346             :         {
     347           0 :             switch(f_pos->unicode())
     348             :             {
     349           0 :             case '/': // invalid C comment end marker
     350             :                 // in this case we don't have to restart since we
     351             :                 // reached the end of the input
     352           0 :                 f_error_code = lexer_error_t::LEXER_ERROR_INVALID_C_COMMENT;
     353           0 :                 f_error_message = "comment terminator without introducer";
     354           0 :                 f_error_line = f_line;
     355           0 :                 result.set_id(token_t::TOKEN_ID_ERROR_ENUM);
     356           0 :                 break;
     357             : 
     358           0 :             case '=': // multiply and assign
     359           0 :                 result.set_value("*=");
     360           0 :                 ++f_pos;
     361           0 :                 break;
     362             : 
     363           0 :             case '*': // power
     364           0 :                 result.set_value("**");
     365           0 :                 ++f_pos;
     366           0 :                 if(f_pos != f_input.end())
     367             :                 {
     368           0 :                     if(*f_pos == '=')
     369             :                     {
     370             :                         // power and assign
     371           0 :                         result.set_value("**=");
     372           0 :                         ++f_pos;
     373             :                     }
     374             :                 }
     375           0 :                 break;
     376             : 
     377           0 :             default:
     378             :                 // ignore other characters
     379           0 :                 break;
     380             : 
     381             :             }
     382             :         }
     383           0 :         break;
     384             : 
     385           0 :     case '/': // divide
     386           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     387           0 :         result.set_value(*f_pos);
     388           0 :         ++f_pos;
     389           0 :         if(f_pos != f_input.end())
     390             :         {
     391           0 :             switch(f_pos->unicode())
     392             :             {
     393           0 :             case '/': // C++ comment -- skip up to eol
     394           0 :                 for(++f_pos; f_pos != f_input.end(); ++f_pos)
     395             :                 {
     396           0 :                     if(*f_pos == '\n' || *f_pos == '\r')
     397             :                     {
     398           0 :                         goto restart;
     399             :                     }
     400             :                 }
     401             :                 // in this case we don't have to restart since we
     402             :                 // reached the end of the input
     403           0 :                 result.set_id(token_t::TOKEN_ID_NONE_ENUM);
     404           0 :                 break;
     405             : 
     406           0 :             case '*': // C comment -- skip up to */
     407           0 :                 for(++f_pos; f_pos != f_input.end(); ++f_pos)
     408             :                 {
     409           0 :                     if(f_pos + 1 != f_input.end() && *f_pos == '*' && f_pos[1] == '/')
     410             :                     {
     411           0 :                         f_pos += 2;
     412           0 :                         goto restart;
     413             :                     }
     414             :                 }
     415             :                 // in this case the comment was not terminated
     416           0 :                 f_error_code = lexer_error_t::LEXER_ERROR_INVALID_C_COMMENT;
     417           0 :                 f_error_message = "comment not terminated";
     418           0 :                 f_error_line = f_line;
     419           0 :                 result.set_id(token_t::TOKEN_ID_ERROR_ENUM);
     420           0 :                 break;
     421             : 
     422           0 :             case '=': // divide and assign
     423           0 :                 result.set_value("/=");
     424           0 :                 ++f_pos;
     425           0 :                 break;
     426             : 
     427           0 :             default:
     428             :                 // ignore other characters
     429           0 :                 break;
     430             : 
     431             :             }
     432             :         }
     433           0 :         break;
     434             : 
     435           0 :     case '%': // modulo
     436           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     437           0 :         result.set_value(*f_pos);
     438           0 :         ++f_pos;
     439           0 :         if(f_pos != f_input.end())
     440             :         {
     441           0 :             switch(f_pos->unicode())
     442             :             {
     443           0 :             case '=': // modulo and assign
     444           0 :                 result.set_value("%=");
     445           0 :                 ++f_pos;
     446           0 :                 break;
     447             : 
     448           0 :             default:
     449             :                 // ignore other characters
     450           0 :                 break;
     451             : 
     452             :             }
     453             :         }
     454           0 :         break;
     455             : 
     456           0 :     case '~': // bitwise not
     457           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     458           0 :         result.set_value(*f_pos);
     459           0 :         ++f_pos;
     460           0 :         if(f_pos != f_input.end())
     461             :         {
     462           0 :             switch(f_pos->unicode())
     463             :             {
     464           0 :             case '=': // bitwise not and assign
     465           0 :                 result.set_value("~=");
     466           0 :                 ++f_pos;
     467           0 :                 break;
     468             : 
     469           0 :             default:
     470             :                 // ignore other characters
     471           0 :                 break;
     472             : 
     473             :             }
     474             :         }
     475           0 :         break;
     476             : 
     477           0 :     case '&': // bitwise and
     478           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     479           0 :         result.set_value(*f_pos);
     480           0 :         ++f_pos;
     481           0 :         if(f_pos != f_input.end())
     482             :         {
     483           0 :             switch(f_pos->unicode())
     484             :             {
     485           0 :             case '=': // bitwise and & assign
     486           0 :                 result.set_value("&=");
     487           0 :                 ++f_pos;
     488           0 :                 break;
     489             : 
     490           0 :             case '&': // logical and
     491           0 :                 result.set_value("&&");
     492           0 :                 ++f_pos;
     493           0 :                 if(f_pos != f_input.end())
     494             :                 {
     495           0 :                     if(*f_pos == '=')
     496             :                     {
     497             :                         // logical and & assign
     498           0 :                         result.set_value("&&=");
     499           0 :                         ++f_pos;
     500             :                     }
     501             :                 }
     502           0 :                 break;
     503             : 
     504           0 :             default:
     505             :                 // ignore other characters
     506           0 :                 break;
     507             : 
     508             :             }
     509             :         }
     510           0 :         break;
     511             : 
     512           0 :     case '|': // bitwise or
     513           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     514           0 :         result.set_value(*f_pos);
     515           0 :         ++f_pos;
     516           0 :         if(f_pos != f_input.end())
     517             :         {
     518           0 :             switch(f_pos->unicode())
     519             :             {
     520           0 :             case '=': // bitwise or & assign
     521           0 :                 result.set_value("|=");
     522           0 :                 ++f_pos;
     523           0 :                 break;
     524             : 
     525           0 :             case '|': // logical or
     526           0 :                 result.set_value("||");
     527           0 :                 ++f_pos;
     528           0 :                 if(f_pos != f_input.end())
     529             :                 {
     530           0 :                     if(*f_pos == '=')
     531             :                     {
     532             :                         // logical or and assign
     533           0 :                         result.set_value("||=");
     534           0 :                         ++f_pos;
     535             :                     }
     536             :                 }
     537           0 :                 break;
     538             : 
     539           0 :             default:
     540             :                 // ignore other characters
     541           0 :                 break;
     542             : 
     543             :             }
     544             :         }
     545           0 :         break;
     546             : 
     547           0 :     case '^': // bitwise xor
     548           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     549           0 :         result.set_value(*f_pos);
     550           0 :         ++f_pos;
     551           0 :         if(f_pos != f_input.end())
     552             :         {
     553           0 :             switch(f_pos->unicode())
     554             :             {
     555           0 :             case '=': // bitwise xor & assign
     556           0 :                 result.set_value("^=");
     557           0 :                 ++f_pos;
     558           0 :                 break;
     559             : 
     560           0 :             case '^': // logical xor
     561           0 :                 result.set_value("^^");
     562           0 :                 ++f_pos;
     563           0 :                 if(f_pos != f_input.end())
     564             :                 {
     565           0 :                     if(*f_pos == '=')
     566             :                     {
     567             :                         // logical xor and assign
     568           0 :                         result.set_value("^^=");
     569           0 :                         ++f_pos;
     570             :                     }
     571             :                 }
     572           0 :                 break;
     573             : 
     574           0 :             default:
     575             :                 // ignore other characters
     576           0 :                 break;
     577             : 
     578             :             }
     579             :         }
     580           0 :         break;
     581             : 
     582           0 :     case '!': // logical not
     583           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     584           0 :         result.set_value(*f_pos);
     585           0 :         ++f_pos;
     586           0 :         if(f_pos != f_input.end())
     587             :         {
     588           0 :             switch(f_pos->unicode())
     589             :             {
     590           0 :             case '=': // not equal
     591           0 :                 result.set_value("!=");
     592           0 :                 ++f_pos;
     593           0 :                 if(f_pos != f_input.end())
     594             :                 {
     595           0 :                     if(*f_pos == '=')
     596             :                     {
     597             :                         // exactly not equal (type checked)
     598           0 :                         result.set_value("!==");
     599           0 :                         ++f_pos;
     600             :                     }
     601             :                 }
     602           0 :                 break;
     603             : 
     604           0 :             case '<': // rotate left
     605           0 :                 result.set_value("!<");
     606           0 :                 ++f_pos;
     607           0 :                 break;
     608             : 
     609           0 :             case '>': // rotate right
     610           0 :                 result.set_value("!>");
     611           0 :                 ++f_pos;
     612           0 :                 break;
     613             : 
     614           0 :             default:
     615             :                 // ignore other characters
     616           0 :                 break;
     617             : 
     618             :             }
     619             :         }
     620           0 :         break;
     621             : 
     622           0 :     case '?': // ? by itself is used here and there generally similar to C/C++
     623           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     624           0 :         result.set_value(*f_pos);
     625           0 :         ++f_pos;
     626           0 :         if(f_pos != f_input.end())
     627             :         {
     628           0 :             switch(f_pos->unicode())
     629             :             {
     630           0 :             case '=': // assign if left hand side not set
     631           0 :                 result.set_value("?=");
     632           0 :                 ++f_pos;
     633           0 :                 break;
     634             : 
     635           0 :             default:
     636             :                 // ignore other characters
     637           0 :                 break;
     638             : 
     639             :             }
     640             :         }
     641           0 :         break;
     642             : 
     643           0 :     case '=': // assign
     644           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     645           0 :         result.set_value(*f_pos);
     646           0 :         ++f_pos;
     647           0 :         if(f_pos != f_input.end())
     648             :         {
     649           0 :             switch(f_pos->unicode())
     650             :             {
     651           0 :             case '=': // equality check (compare)
     652           0 :                 result.set_value("==");
     653           0 :                 ++f_pos;
     654           0 :                 if(f_pos != f_input.end())
     655             :                 {
     656           0 :                     if(*f_pos == '=')
     657             :                     {
     658             :                         // exactly equal (type checked)
     659           0 :                         result.set_value("===");
     660           0 :                         ++f_pos;
     661             :                     }
     662             :                 }
     663           0 :                 break;
     664             : 
     665           0 :             default:
     666             :                 // ignore other characters
     667           0 :                 break;
     668             : 
     669             :             }
     670             :         }
     671           0 :         break;
     672             : 
     673           0 :     case '<': // greater than
     674           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     675           0 :         result.set_value(*f_pos);
     676           0 :         ++f_pos;
     677           0 :         if(f_pos != f_input.end())
     678             :         {
     679           0 :             switch(f_pos->unicode())
     680             :             {
     681           0 :             case '=': // smaller or equal
     682           0 :                 result.set_value("<=");
     683           0 :                 ++f_pos;
     684           0 :                 break;
     685             : 
     686           0 :             case '<': // shift left
     687           0 :                 result.set_value("<<");
     688           0 :                 ++f_pos;
     689           0 :                 if(f_pos != f_input.end())
     690             :                 {
     691           0 :                     if(*f_pos == '=')
     692             :                     {
     693             :                         // shift left and assign
     694           0 :                         result.set_value("<<=");
     695           0 :                         ++f_pos;
     696             :                     }
     697             :                 }
     698           0 :                 break;
     699             : 
     700           0 :             case '?': // minimum
     701           0 :                 result.set_value("<?");
     702           0 :                 ++f_pos;
     703           0 :                 if(f_pos != f_input.end())
     704             :                 {
     705           0 :                     if(*f_pos == '=')
     706             :                     {
     707             :                         // minimum and assign
     708           0 :                         result.set_value("<?=");
     709           0 :                         ++f_pos;
     710             :                     }
     711             :                 }
     712           0 :                 break;
     713             : 
     714           0 :             default:
     715             :                 // ignore other characters
     716           0 :                 break;
     717             : 
     718             :             }
     719             :         }
     720           0 :         break;
     721             : 
     722           1 :     case '>': // less than
     723           1 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     724           1 :         result.set_value(*f_pos);
     725           1 :         ++f_pos;
     726           1 :         if(f_pos != f_input.end())
     727             :         {
     728           1 :             switch(f_pos->unicode())
     729             :             {
     730           0 :             case '=': // larger or equal
     731           0 :                 result.set_value(">=");
     732           0 :                 ++f_pos;
     733           0 :                 break;
     734             : 
     735           0 :             case '>': // shift right
     736           0 :                 result.set_value(">>");
     737           0 :                 ++f_pos;
     738           0 :                 if(f_pos != f_input.end())
     739             :                 {
     740           0 :                     switch(f_pos->unicode())
     741             :                     {
     742           0 :                     case '=':
     743             :                         // shift right and assign
     744           0 :                         result.set_value(">>=");
     745           0 :                         ++f_pos;
     746           0 :                         break;
     747             : 
     748           0 :                     case '>':
     749             :                         // unsigned shift right
     750           0 :                         result.set_value(">>>");
     751           0 :                         ++f_pos;
     752           0 :                         if(f_pos != f_input.end())
     753             :                         {
     754           0 :                             if(*f_pos == '=')
     755             :                             {
     756             :                                 // unsigned right shift and assign
     757           0 :                                 result.set_value(">>>=");
     758           0 :                                 ++f_pos;
     759             :                             }
     760             :                         }
     761           0 :                         break;
     762             : 
     763           0 :                     default:
     764             :                         // ignore other characters
     765           0 :                         break;
     766             : 
     767             :                     }
     768             :                 }
     769           0 :                 break;
     770             : 
     771           0 :             case '?': // maximum
     772           0 :                 result.set_value(">?");
     773           0 :                 ++f_pos;
     774           0 :                 if(f_pos != f_input.end())
     775             :                 {
     776           0 :                     if(*f_pos == '=')
     777             :                     {
     778             :                         // maximum and assign
     779           0 :                         result.set_value(">?=");
     780           0 :                         ++f_pos;
     781             :                     }
     782             :                 }
     783           0 :                 break;
     784             : 
     785           1 :             default:
     786             :                 // ignore other characters
     787           1 :                 break;
     788             : 
     789             :             }
     790             :         }
     791           1 :         break;
     792             : 
     793           0 :     case ':':
     794           0 :         result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
     795           0 :         result.set_value(*f_pos);
     796           0 :         ++f_pos;
     797           0 :         if(f_pos != f_input.end())
     798             :         {
     799           0 :             switch(f_pos->unicode())
     800             :             {
     801           0 :             case '=': // required
     802           0 :                 result.set_value(":=");
     803           0 :                 ++f_pos;
     804           0 :                 break;
     805             : 
     806           0 :             case ':': // namespace
     807           0 :                 result.set_value("::");
     808           0 :                 ++f_pos;
     809           0 :                 break;
     810             : 
     811           0 :             default:
     812             :                 // ignore other characters
     813           0 :                 break;
     814             : 
     815             :             }
     816             :         }
     817           0 :         break;
     818             : 
     819           0 :     case '"':
     820             :         {
     821           0 :             ++f_pos;
     822           0 :             QString str;
     823           0 :             while(f_pos != f_input.end() && *f_pos != '"')
     824             :             {
     825           0 :                 if(*f_pos == '\n' || *f_pos == '\r')
     826             :                 {
     827             :                     // strings cannot continue after the end of a line
     828           0 :                     break;
     829             :                 }
     830           0 :                 if(*f_pos == '\\')
     831             :                 {
     832           0 :                     ++f_pos;
     833           0 :                     if(f_pos == f_input.end())
     834             :                     {
     835             :                         // this is an invalid backslash
     836           0 :                         break;
     837             :                     }
     838             :                     // TODO: add support for \x## and various other
     839             :                     //       escaped characters
     840           0 :                     switch(f_pos->unicode())
     841             :                     {
     842           0 :                     case 'a':
     843           0 :                         str += "\a";
     844           0 :                         break;
     845             : 
     846           0 :                     case 'b':
     847           0 :                         str += "\b";
     848           0 :                         break;
     849             : 
     850           0 :                     case 'f':
     851           0 :                         str += "\f";
     852           0 :                         break;
     853             : 
     854           0 :                     case 'n':
     855           0 :                         str += "\n";
     856           0 :                         break;
     857             : 
     858           0 :                     case 'r':
     859           0 :                         str += "\r";
     860           0 :                         break;
     861             : 
     862           0 :                     case 't':
     863           0 :                         str += "\t";
     864           0 :                         break;
     865             : 
     866           0 :                     case 'v':
     867           0 :                         str += "\v";
     868           0 :                         break;
     869             : 
     870           0 :                     case '0':
     871             :                     case '1':
     872             :                     case '2':
     873             :                     case '3':
     874             :                     case '4':
     875             :                     case '5':
     876             :                     case '6':
     877             :                     case '7':
     878             :                         // "\nnn" -- maximum of 3 digits
     879             :                         {
     880           0 :                             int v(f_pos->unicode() - '0');
     881           0 :                             if(f_pos + 1 != f_input.end() && (f_pos + 1)->unicode() >= '0' && (f_pos + 1)->unicode() <= '7')
     882             :                             {
     883           0 :                                 ++f_pos;
     884           0 :                                 v = v * 8 + f_pos->unicode() - '0';
     885             : 
     886           0 :                                 if(f_pos + 1 != f_input.end() && (f_pos + 1)->unicode() >= '0' && (f_pos + 1)->unicode() <= '7')
     887             :                                 {
     888           0 :                                     ++f_pos;
     889           0 :                                     v = v * 8 + f_pos->unicode() - '0';
     890             :                                 }
     891             :                             }
     892           0 :                             str += QChar(v);
     893             :                         }
     894           0 :                         break;
     895             : 
     896           0 :                     case 'x':
     897             :                     case 'X':
     898             :                         {
     899           0 :                             if(f_pos + 1 != f_input.end() && std::isxdigit((f_pos + 1)->unicode()))
     900             :                             {
     901           0 :                                 ++f_pos;
     902           0 :                                 int v(xdigit(f_pos->unicode()));
     903             : 
     904           0 :                                 if(f_pos + 1 != f_input.end() && std::isxdigit((f_pos + 1)->unicode()))
     905             :                                 {
     906           0 :                                     ++f_pos;
     907           0 :                                     v = v * 16 + xdigit(f_pos->unicode());
     908             :                                 }
     909             : 
     910           0 :                                 str += QChar(v);
     911             :                             }
     912             :                         }
     913           0 :                         break;
     914             : 
     915           0 :                     case 'u':
     916             :                         // take 0 to 4 digits
     917             :                         {
     918           0 :                             int v(0);
     919           0 :                             for(int idx(0); idx < 4; ++idx)
     920             :                             {
     921           0 :                                 if(f_pos == f_input.end()
     922           0 :                                 || !std::isxdigit((f_pos + 1)->unicode()))
     923             :                                 {
     924           0 :                                     break;
     925             :                                 }
     926           0 :                                 ++f_pos;
     927           0 :                                 v = v * 16 + xdigit(f_pos->unicode());
     928             :                             }
     929           0 :                             str += QChar(v);
     930             :                         }
     931           0 :                         break;
     932             : 
     933           0 :                     case 'U':
     934             :                         // take 0 to 8 digits
     935             :                         {
     936           0 :                             uint v(0);
     937           0 :                             for(int idx(0); idx < 8; ++idx)
     938             :                             {
     939           0 :                                 if(f_pos == f_input.end()
     940           0 :                                 || !std::isxdigit((f_pos + 1)->unicode()))
     941             :                                 {
     942           0 :                                     break;
     943             :                                 }
     944           0 :                                 ++f_pos;
     945           0 :                                 v = v * 16 + xdigit(f_pos->unicode());
     946             :                             }
     947           0 :                             str += QString::fromUcs4(&v, 1);
     948             :                         }
     949           0 :                         break;
     950             : 
     951           0 :                     default:
     952             :                         // anything, keep as is (", ', ?, \)
     953           0 :                         str += *f_pos;
     954           0 :                         break;
     955             : 
     956             :                     }
     957             :                 }
     958             :                 else
     959             :                 {
     960           0 :                     str += *f_pos;
     961             :                 }
     962           0 :                 ++f_pos;
     963             :             }
     964           0 :             if(f_pos == f_input.end())
     965             :             {
     966           0 :                 f_error_code = lexer_error_t::LEXER_ERROR_INVALID_STRING;
     967           0 :                 f_error_message = "invalid string";
     968           0 :                 f_error_line = f_line;
     969           0 :                 result.set_id(token_t::TOKEN_ID_ERROR_ENUM);
     970             :             }
     971             :             else
     972             :             {
     973           0 :                 result.set_id(token_t::TOKEN_ID_STRING_ENUM);
     974           0 :                 result.set_value(str);
     975           0 :                 ++f_pos; // skip the closing quote
     976           0 :             }
     977             :         }
     978           0 :         break;
     979             : 
     980           0 :     case '0':
     981             :         // hexadecimal?
     982           0 :         if(f_pos + 1 != f_input.end() && (f_pos[1] == 'x' || f_pos[1] == 'X')
     983           0 :         && f_pos + 2 != f_input.end() && ((f_pos[2] >= '0' && f_pos[2] <= '9')
     984           0 :                                     || (f_pos[2] >= 'a' && f_pos[2] <= 'f')
     985           0 :                                     || (f_pos[2] >= 'A' && f_pos[2] <= 'F')))
     986             :         {
     987             :             bool ok;
     988           0 :             f_pos += 2; // skip the 0x or 0X
     989           0 :             QString::const_iterator start(f_pos);
     990             :             // parse number
     991           0 :             while(f_pos != f_input.end() && ((*f_pos >= '0' && *f_pos <= '9')
     992           0 :                     || (*f_pos >= 'a' && *f_pos <= 'f')
     993           0 :                     || (*f_pos >= 'A' && *f_pos <= 'F')))
     994             :             {
     995           0 :                 ++f_pos;
     996             :             }
     997           0 :             result.set_id(token_t::TOKEN_ID_INTEGER_ENUM);
     998           0 :             QString value(start, static_cast<int>(f_pos - start));
     999           0 :             result.set_value(value.toULongLong(&ok, 16));
    1000           0 :             if(!ok)
    1001             :             {
    1002             :                 // as far as I know the only reason it can fail is because
    1003             :                 // it is too large (since we parsed a valid number!)
    1004           0 :                 f_error_code = lexer_error_t::LEXER_ERROR_INVALID_NUMBER;
    1005           0 :                 f_error_message = "number too large";
    1006           0 :                 f_error_line = f_line;
    1007           0 :                 result.set_id(token_t::TOKEN_ID_ERROR_ENUM);
    1008             :             }
    1009           0 :             break;
    1010           0 :         }
    1011             :         // no octal support at this point, octal is not available in
    1012             :         // JavaScript by default!
    1013             : #if __cplusplus >= 201700
    1014             :         [[fallthrough]];
    1015             : #endif
    1016             :     case '1':
    1017             :     case '2':
    1018             :     case '3':
    1019             :     case '4':
    1020             :     case '5':
    1021             :     case '6':
    1022             :     case '7':
    1023             :     case '8':
    1024             :     case '9':
    1025             :         {
    1026             :             bool ok;
    1027             :             // TODO: test overflows
    1028           0 :             QString::const_iterator start(f_pos);
    1029             :             // number
    1030           0 :             do
    1031             :             {
    1032           0 :                 ++f_pos;
    1033             :             }
    1034           0 :             while(f_pos != f_input.end() && *f_pos >= '0' && *f_pos <= '9');
    1035           0 :             if(*f_pos == '.')
    1036             :             {
    1037             :                 // skip the decimal point
    1038           0 :                 ++f_pos;
    1039             : 
    1040             :                 // floating point
    1041           0 :                 while(f_pos != f_input.end() && *f_pos >= '0' && *f_pos <= '9')
    1042             :                 {
    1043           0 :                     ++f_pos;
    1044             :                 }
    1045             :                 // TODO: add exponent support
    1046           0 :                 result.set_id(token_t::TOKEN_ID_FLOAT_ENUM);
    1047           0 :                 QString value(start, static_cast<int>(f_pos - start));
    1048           0 :                 result.set_value(value.toDouble(&ok));
    1049             :             }
    1050             :             else
    1051             :             {
    1052           0 :                 result.set_id(token_t::TOKEN_ID_INTEGER_ENUM);
    1053           0 :                 QString value(start, static_cast<int>(f_pos - start));
    1054           0 :                 result.set_value(value.toULongLong(&ok));
    1055             :             }
    1056           0 :             if(!ok)
    1057             :             {
    1058             :                 // as far as I know the only reason it can fail is because
    1059             :                 // it is too large (since we parsed a valid number!)
    1060           0 :                 f_error_code = lexer_error_t::LEXER_ERROR_INVALID_NUMBER;
    1061           0 :                 f_error_message = "number too large";
    1062           0 :                 f_error_line = f_line;
    1063           0 :                 result.set_id(token_t::TOKEN_ID_ERROR_ENUM);
    1064           0 :             }
    1065             :         }
    1066           0 :         break;
    1067             : 
    1068           2 :     default:
    1069             :         // TBD: add support for '$' for JavaScript?
    1070           6 :         if((*f_pos >= 'a' && *f_pos <= 'z')
    1071           2 :         || (*f_pos >= 'A' && *f_pos <= 'Z')
    1072           2 :         || *f_pos == '_')
    1073             :         {
    1074             :             // identifier
    1075           2 :             QString::const_iterator start(f_pos);
    1076           2 :             ++f_pos;
    1077           6 :             while(f_pos != f_input.end()
    1078          10 :                 && ((*f_pos >= 'a' && *f_pos <= 'z')
    1079           7 :                     || (*f_pos >= 'A' && *f_pos <= 'Z')
    1080           7 :                     || (*f_pos >= '0' && *f_pos <= '9')
    1081           5 :                     || *f_pos == '_'))
    1082             :             {
    1083           2 :                 ++f_pos;
    1084             :             }
    1085           4 :             QString identifier(start, static_cast<int>(f_pos - start));
    1086           2 :             if(f_keywords.contains(identifier))
    1087             :             {
    1088           0 :                 result.set_id(token_t::TOKEN_ID_KEYWORD_ENUM);
    1089           0 :                 result.set_value(f_keywords[identifier]);
    1090             :             }
    1091             :             else
    1092             :             {
    1093           2 :                 result.set_id(token_t::TOKEN_ID_IDENTIFIER_ENUM);
    1094           2 :                 result.set_value(identifier);
    1095             :             }
    1096             :         }
    1097             :         else
    1098             :         {
    1099             :             // in all other cases return a QChar
    1100           0 :             result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
    1101           0 :             result.set_value(*f_pos);
    1102           0 :             ++f_pos;
    1103             :         }
    1104           2 :         break;
    1105             : 
    1106             :     }
    1107             : 
    1108             : // Only to help with debug sessions
    1109             : //std::cerr << "lexer result: " << result.to_string() << "\n";
    1110             : 
    1111           3 :     return result;
    1112             : }
    1113             : 
    1114           2 : void lexer::add_keyword(keyword& k)
    1115             : {
    1116           2 :     f_keywords[k.identifier()] = k.number();
    1117           2 : }
    1118             : 
    1119             : 
    1120             : int    keyword::g_next_number = 0;
    1121             : 
    1122           2 : keyword::keyword(lexer& parent, const QString& keyword_identifier, int index_number)
    1123           2 :     : f_number(index_number == 0 ? ++g_next_number : index_number)
    1124           2 :     , f_identifier(keyword_identifier)
    1125             : {
    1126           2 :     parent.add_keyword(*this);
    1127           2 : }
    1128             : 
    1129             : 
    1130             : 
    1131           0 : rule::rule_data_t::rule_data_t()
    1132             :     : f_token(token_t::TOKEN_ID_NONE_ENUM)
    1133             :     //, f_value("") -- auto-init
    1134             :     //, f_keyword() -- auto-init
    1135           0 :     , f_choices(nullptr)
    1136             : {
    1137           0 : }
    1138             : 
    1139         207 : rule::rule_data_t::rule_data_t(rule_data_t const& s)
    1140         207 :     : f_token(s.f_token)
    1141             :     , f_value(s.f_value)
    1142             :     , f_keyword(s.f_keyword)
    1143         207 :     , f_choices(s.f_choices)
    1144             : {
    1145         207 : }
    1146             : 
    1147          89 : rule::rule_data_t::rule_data_t(choices& c)
    1148             :     : f_token(token_t::TOKEN_ID_CHOICES_ENUM)
    1149             :     //, f_value("") -- auto-init
    1150             :     //, f_keyword() -- auto-init
    1151          89 :     , f_choices(&c)
    1152             : {
    1153          89 : }
    1154             : 
    1155           8 : rule::rule_data_t::rule_data_t(token_t token)
    1156             :     : f_token(token)
    1157             :     //, f_value("") -- auto-init
    1158             :     //, f_keyword() -- auto-init
    1159           8 :     , f_choices(nullptr)
    1160             : {
    1161           8 : }
    1162             : 
    1163          36 : rule::rule_data_t::rule_data_t(const QString& value)
    1164             :     : f_token(token_t::TOKEN_ID_LITERAL_ENUM)
    1165             :     , f_value(value)
    1166             :     //, f_keyword() -- auto-init
    1167          36 :     , f_choices(nullptr)
    1168             : {
    1169          36 : }
    1170             : 
    1171           2 : rule::rule_data_t::rule_data_t(const keyword& k)
    1172             :     : f_token(token_t::TOKEN_ID_KEYWORD_ENUM)
    1173             :     //, f_value("") -- auto-init
    1174             :     , f_keyword(k)
    1175           2 :     , f_choices(nullptr)
    1176             : {
    1177           2 : }
    1178             : 
    1179             : 
    1180             : 
    1181           0 : rule::rule(choices& c)
    1182             :     : f_parent(&c)
    1183             :     //, f_tokens() -- auto-init
    1184           0 :     , f_reducer(nullptr)
    1185             : {
    1186           0 : }
    1187             : 
    1188          53 : rule::rule(const rule& r)
    1189          53 :     : f_parent(r.f_parent)
    1190             :     , f_tokens(r.f_tokens)
    1191          53 :     , f_reducer(r.f_reducer)
    1192             : {
    1193          53 : }
    1194             : 
    1195          16 : void rule::add_rules(choices& c)
    1196             : {
    1197          32 :     rule_data_t data(c);
    1198          16 :     data.f_token = token_t::TOKEN_ID_RULES_ENUM;
    1199          16 :     f_tokens.push_back(data);
    1200          16 : }
    1201             : 
    1202          73 : void rule::add_choices(choices& c)
    1203             : {
    1204          73 :     f_tokens.push_back(rule_data_t(c));
    1205          73 : }
    1206             : 
    1207           8 : void rule::add_token(token_t token)
    1208             : {
    1209           8 :     f_tokens.push_back(rule_data_t(token));
    1210           8 : }
    1211             : 
    1212          36 : void rule::add_literal(const QString& value)
    1213             : {
    1214          36 :     f_tokens.push_back(rule_data_t(value));
    1215          36 : }
    1216             : 
    1217           2 : void rule::add_keyword(const keyword& k)
    1218             : {
    1219           2 :     f_tokens.push_back(rule_data_t(k));
    1220           2 : }
    1221             : 
    1222           1 : rule& rule::operator >> (const token_id& token)
    1223             : {
    1224           1 :     add_token(token);
    1225           1 :     return *this;
    1226             : }
    1227             : 
    1228           0 : rule& rule::operator >> (const QString& literal)
    1229             : {
    1230           0 :     add_literal(literal);
    1231           0 :     return *this;
    1232             : }
    1233             : 
    1234           4 : rule& rule::operator >> (char const *literal)
    1235             : {
    1236           4 :     add_literal(literal);
    1237           4 :     return *this;
    1238             : }
    1239             : 
    1240           0 : rule& rule::operator >> (keyword const& k)
    1241             : {
    1242           0 :     add_keyword(k);
    1243           0 :     return *this;
    1244             : }
    1245             : 
    1246          26 : rule& rule::operator >> (choices& c)
    1247             : {
    1248          26 :     add_choices(c);
    1249          26 :     return *this;
    1250             : }
    1251             : 
    1252          32 : rule& rule::operator >= (rule::reducer_t function)
    1253             : {
    1254          32 :     set_reducer(function);
    1255          32 :     return *this;
    1256             : }
    1257             : 
    1258           0 : rule& operator >> (token_id const& token_left, token_id const& token_right)
    1259             : {
    1260           0 :     rule *r(new rule);
    1261           0 :     r->add_token(token_left);
    1262           0 :     r->add_token(token_right);
    1263           0 :     return *r;
    1264             : }
    1265             : 
    1266           0 : rule& operator >> (token_id const& token, QString const& literal)
    1267             : {
    1268           0 :     rule *r(new rule);
    1269           0 :     r->add_token(token);
    1270           0 :     r->add_literal(literal);
    1271           0 :     return *r;
    1272             : }
    1273             : 
    1274           1 : rule& operator >> (token_id const& token, char const *literal)
    1275             : {
    1276           1 :     rule *r(new rule);
    1277           1 :     r->add_token(token);
    1278           1 :     r->add_literal(literal);
    1279           1 :     return *r;
    1280             : }
    1281             : 
    1282           0 : rule& operator >> (token_id const& token, keyword const& k)
    1283             : {
    1284           0 :     rule *r(new rule);
    1285           0 :     r->add_token(token);
    1286           0 :     r->add_keyword(k);
    1287           0 :     return *r;
    1288             : }
    1289             : 
    1290           0 : rule& operator >> (token_id const& token, choices& c)
    1291             : {
    1292           0 :     rule *r(new rule);
    1293           0 :     r->add_token(token);
    1294           0 :     r->add_choices(c);
    1295           0 :     return *r;
    1296             : }
    1297             : 
    1298           0 : rule& operator >> (QString const& literal, token_id const& token)
    1299             : {
    1300           0 :     rule *r(new rule);
    1301           0 :     r->add_literal(literal);
    1302           0 :     r->add_token(token);
    1303           0 :     return *r;
    1304             : }
    1305             : 
    1306           0 : rule& operator >> (QString const& literal_left, QString const& literal_right)
    1307             : {
    1308           0 :     rule *r(new rule);
    1309           0 :     r->add_literal(literal_left);
    1310           0 :     r->add_literal(literal_right);
    1311           0 :     return *r;
    1312             : }
    1313             : 
    1314           0 : rule& operator >> (QString const& literal, keyword const& k)
    1315             : {
    1316           0 :     rule *r(new rule);
    1317           0 :     r->add_literal(literal);
    1318           0 :     r->add_keyword(k);
    1319           0 :     return *r;
    1320             : }
    1321             : 
    1322           0 : rule& operator >> (QString const& literal, choices& c)
    1323             : {
    1324           0 :     rule *r(new rule);
    1325           0 :     r->add_literal(literal);
    1326           0 :     r->add_choices(c);
    1327           0 :     return *r;
    1328             : }
    1329             : 
    1330           0 : rule& operator >> (keyword const& k, token_id const& token)
    1331             : {
    1332           0 :     rule *r(new rule);
    1333           0 :     r->add_keyword(k);
    1334           0 :     r->add_token(token);
    1335           0 :     return *r;
    1336             : }
    1337             : 
    1338           0 : rule& operator >> (keyword const& k, QString const& literal)
    1339             : {
    1340           0 :     rule *r(new rule);
    1341           0 :     r->add_keyword(k);
    1342           0 :     r->add_literal(literal);
    1343           0 :     return *r;
    1344             : }
    1345             : 
    1346           0 : rule& operator >> (keyword const& k_left, keyword const& k_right)
    1347             : {
    1348           0 :     rule *r(new rule);
    1349           0 :     r->add_keyword(k_left);
    1350           0 :     r->add_keyword(k_right);
    1351           0 :     return *r;
    1352             : }
    1353             : 
    1354           0 : rule& operator >> (keyword const& k, choices& c)
    1355             : {
    1356           0 :     rule *r(new rule);
    1357           0 :     r->add_keyword(k);
    1358           0 :     r->add_choices(c);
    1359           0 :     return *r;
    1360             : }
    1361             : 
    1362           1 : rule& operator >> (choices& c, token_id const& token)
    1363             : {
    1364           1 :     rule *r(new rule);
    1365           1 :     r->add_choices(c);
    1366           1 :     r->add_token(token);
    1367           1 :     return *r;
    1368             : }
    1369             : 
    1370          26 : rule& operator >> (choices& c, QString const& literal)
    1371             : {
    1372          26 :     rule *r(new rule);
    1373          26 :     r->add_choices(c);
    1374          26 :     r->add_literal(literal);
    1375          26 :     return *r;
    1376             : }
    1377             : 
    1378           0 : rule& operator >> (choices& c, keyword const& k)
    1379             : {
    1380           0 :     rule *r(new rule);
    1381           0 :     r->add_choices(c);
    1382           0 :     r->add_keyword(k);
    1383           0 :     return *r;
    1384             : }
    1385             : 
    1386           0 : rule& operator >> (choices& c_left, choices& c_right)
    1387             : {
    1388           0 :     rule *r(new rule);
    1389           0 :     r->add_choices(c_left);
    1390           0 :     r->add_choices(c_right);
    1391           0 :     return *r;
    1392             : }
    1393             : 
    1394           5 : rule& operator >> (char const *literal, choices& c)
    1395             : {
    1396           5 :     rule *r(new rule);
    1397           5 :     r->add_literal(literal);
    1398           5 :     r->add_choices(c);
    1399           5 :     return *r;
    1400             : }
    1401             : 
    1402           4 : rule& operator >= (token_id const& token, rule::reducer_t function)
    1403             : {
    1404           4 :     rule *r(new rule);
    1405           4 :     r->add_token(token);
    1406           4 :     r->set_reducer(function);
    1407           4 :     return *r;
    1408             : }
    1409             : 
    1410           0 : rule& operator >= (QString const& literal, rule::reducer_t function)
    1411             : {
    1412           0 :     rule *r(new rule);
    1413           0 :     r->add_literal(literal);
    1414           0 :     r->set_reducer(function);
    1415           0 :     return *r;
    1416             : }
    1417             : 
    1418           2 : rule& operator >= (keyword const& k, rule::reducer_t function)
    1419             : {
    1420           2 :     rule *r(new rule);
    1421           2 :     r->add_keyword(k);
    1422           2 :     r->set_reducer(function);
    1423           2 :     return *r;
    1424             : }
    1425             : 
    1426          15 : rule& operator >= (choices& c, rule::reducer_t function)
    1427             : {
    1428          15 :     rule *r(new rule);
    1429          15 :     r->add_choices(c);
    1430          15 :     r->set_reducer(function);
    1431          15 :     return *r;
    1432             : }
    1433             : 
    1434           0 : QString rule::to_string() const
    1435             : {
    1436           0 :     QString        result;
    1437             : 
    1438           0 :     for(QVector<rule_data_t>::const_iterator ri = f_tokens.begin();
    1439           0 :                                             ri != f_tokens.end(); ++ri)
    1440             :     {
    1441           0 :         if(ri != f_tokens.begin())
    1442             :         {
    1443           0 :             result += " ";
    1444             :         }
    1445           0 :         const rule_data_t& r(*ri);
    1446           0 :         switch(r.f_token)
    1447             :         {
    1448           0 :         case token_t::TOKEN_ID_NONE_ENUM:
    1449           0 :             result += "\xA4";  // currency sign used as the EOI marker
    1450           0 :             break;
    1451             : 
    1452           0 :         case token_t::TOKEN_ID_INTEGER_ENUM:
    1453           0 :             result += "TOKEN_ID_INTEGER";
    1454           0 :             break;
    1455             : 
    1456           0 :         case token_t::TOKEN_ID_FLOAT_ENUM:
    1457           0 :             result += "TOKEN_ID_FLOAT";
    1458           0 :             break;
    1459             : 
    1460           0 :         case token_t::TOKEN_ID_IDENTIFIER_ENUM:
    1461           0 :             result += "TOKEN_ID_IDENTIFIER";
    1462           0 :             break;
    1463             : 
    1464           0 :         case token_t::TOKEN_ID_KEYWORD_ENUM:
    1465           0 :             result += "keyword_" + r.f_keyword.identifier();
    1466           0 :             break;
    1467             : 
    1468           0 :         case token_t::TOKEN_ID_STRING_ENUM:
    1469           0 :             result += "TOKEN_ID_STRING";
    1470           0 :             break;
    1471             : 
    1472           0 :         case token_t::TOKEN_ID_LITERAL_ENUM:
    1473           0 :             result += "\"" + r.f_value + "\"";
    1474           0 :             break;
    1475             : 
    1476           0 :         case token_t::TOKEN_ID_EMPTY_ENUM:
    1477             :             // put the empty set for empty
    1478           0 :             result += "\xF8";
    1479           0 :             break;
    1480             : 
    1481           0 :         case token_t::TOKEN_ID_CHOICES_ENUM:
    1482             :             // you can select the one with the pointer for debugging
    1483             :             //result += QString("[0x%1] %2").arg(reinterpret_cast<qulonglong>(r.f_choices), 0, 16).arg(r.f_choices->name());
    1484           0 :             result += QString("%2").arg(r.f_choices->name());
    1485           0 :             break;
    1486             : 
    1487           0 :         case token_t::TOKEN_ID_NODE_ENUM:
    1488           0 :             result += " /* INVALID -- TOKEN_ID_NODE!!! */ ";
    1489           0 :             break;
    1490             : 
    1491           0 :         case token_t::TOKEN_ID_ERROR_ENUM:
    1492           0 :             result += " /* INVALID -- TOKEN_ID_ERROR!!! */ ";
    1493           0 :             break;
    1494             : 
    1495           0 :         default:
    1496           0 :             result += " /* INVALID -- unknown token identifier!!! */ ";
    1497           0 :             break;
    1498             : 
    1499             :         }
    1500             :     }
    1501             : 
    1502           0 :     if(f_reducer != nullptr)
    1503             :     {
    1504             :         // show that we have a reducer
    1505           0 :         result += " { ... }";
    1506             :     }
    1507             : 
    1508           0 :     return result;
    1509             : }
    1510             : 
    1511             : 
    1512             : 
    1513             : 
    1514          34 : choices::choices(grammar *parent, const char *choice_name)
    1515          34 :     : f_name(choice_name)
    1516             :       //f_rules() -- auto-init
    1517             : {
    1518          34 :     if(parent != nullptr)
    1519             :     {
    1520          18 :         parent->add_choices(*this);
    1521             :     }
    1522          34 : }
    1523             : 
    1524          36 : choices::~choices()
    1525             : {
    1526          18 :     clear();
    1527          18 : }
    1528             : 
    1529          34 : void choices::clear()
    1530             : {
    1531          34 :     int const max_rules(f_rules.count());
    1532          89 :     for(int r = 0; r < max_rules; ++r)
    1533             :     {
    1534          55 :         delete f_rules[r];
    1535             :     }
    1536          34 :     f_rules.clear();
    1537          34 : }
    1538             : 
    1539             : 
    1540          16 : choices& choices::operator = (const choices& rhs)
    1541             : {
    1542          16 :     if(this != &rhs)
    1543             :     {
    1544             :         //f_name -- not changed, rhs.f_name is probably "internal"
    1545             : 
    1546          16 :         clear();
    1547             : 
    1548             :         // copy rhs rules
    1549          16 :         int const max_rules(rhs.f_rules.count());
    1550          69 :         for(int r = 0; r < max_rules; ++r)
    1551             :         {
    1552          53 :             f_rules.push_back(new rule(*rhs.f_rules[r]));
    1553             :         }
    1554             :     }
    1555             : 
    1556          16 :     return *this;
    1557             : }
    1558             : 
    1559           0 : choices& choices::operator >>= (choices& rhs)
    1560             : {
    1561           0 :     if(this == &rhs)
    1562             :     {
    1563           0 :         throw snap_logic_exception("a rule cannot just be represented as itself");
    1564             :     }
    1565             : 
    1566           0 :     rule *r(new rule);
    1567           0 :     r->add_choices(rhs);
    1568           0 :     f_rules.push_back(r);
    1569             : 
    1570           0 :     return *this;
    1571             : }
    1572             : 
    1573          18 : choices& choices::operator >>= (rule& r)
    1574             : {
    1575             :     // in this case there are no choices
    1576          18 :     if(r[0].get_token().get_id() == token_t::TOKEN_ID_RULES_ENUM)
    1577             :     {
    1578          16 :         this->operator = (r[0].get_choices());
    1579             :     }
    1580             :     else
    1581             :     {
    1582           2 :         f_rules.push_back(&r);
    1583             :     }
    1584             : 
    1585          18 :     return *this;
    1586             : }
    1587             : 
    1588           0 : choices& choices::operator >>= (token_id const& token)
    1589             : {
    1590           0 :     rule *r = new rule;
    1591           0 :     r->add_token(token);
    1592           0 :     f_rules.push_back(r);
    1593             : 
    1594           0 :     return *this;
    1595             : }
    1596             : 
    1597           0 : choices& choices::operator >>= (QString const& literal)
    1598             : {
    1599           0 :     rule *r = new rule;
    1600           0 :     r->add_literal(literal);
    1601           0 :     f_rules.push_back(r);
    1602             : 
    1603           0 :     return *this;
    1604             : }
    1605             : 
    1606           0 : choices& choices::operator >>= (keyword const& k)
    1607             : {
    1608           0 :     rule *r = new rule;
    1609           0 :     r->add_keyword(k);
    1610           0 :     f_rules.push_back(r);
    1611             : 
    1612           0 :     return *this;
    1613             : }
    1614             : 
    1615             : 
    1616           0 : rule& choices::operator | (rule& r)
    1617             : {
    1618             :     // left hand-side is this
    1619           0 :     rule *l(new rule);
    1620           0 :     l->add_choices(*this);
    1621             : 
    1622           0 :     return *l | r;
    1623             : }
    1624             : 
    1625           0 : rule& operator | (rule& r_left, token_id const& token)
    1626             : {
    1627           0 :     choices *c(new choices(nullptr, "internal"));
    1628           0 :     rule *r_right(new rule);
    1629           0 :     r_right->add_token(token);
    1630           0 :     c->add_rule(r_left);
    1631           0 :     c->add_rule(*r_right);
    1632           0 :     rule *r(new rule);
    1633           0 :     r->add_rules(*c);
    1634           0 :     return *r;
    1635             : }
    1636             : 
    1637           1 : rule& operator | (token_id const& token, rule& r_right)
    1638             : {
    1639           1 :     choices *c(new choices(nullptr, "internal"));
    1640           1 :     rule *r_left(new rule);
    1641           1 :     r_left->add_token(token);
    1642           1 :     c->add_rule(*r_left);
    1643           1 :     c->add_rule(r_right);
    1644           1 :     rule *r(new rule);
    1645           1 :     r->add_rules(*c);
    1646           1 :     return *r;
    1647             : }
    1648             : 
    1649           0 : rule& operator | (rule& r_left, keyword const& k)
    1650             : {
    1651           0 :     choices *c(new choices(nullptr, "internal"));
    1652           0 :     rule *r_right(new rule);
    1653           0 :     r_right->add_keyword(k);
    1654           0 :     c->add_rule(r_left);
    1655           0 :     c->add_rule(*r_right);
    1656           0 :     rule *r(new rule);
    1657           0 :     r->add_rules(*c);
    1658           0 :     return *r;
    1659             : }
    1660             : 
    1661          36 : rule& operator | (rule& r_left, rule& r_right)
    1662             : {
    1663             :     // append to existing list?
    1664          36 :     if(r_left[0].get_token().get_id() == token_t::TOKEN_ID_RULES_ENUM)
    1665             :     {
    1666          21 :         r_left[0].get_choices().add_rule(r_right);
    1667          21 :         return r_left;
    1668             :     }
    1669             : 
    1670          15 :     choices *c(new choices(nullptr, "internal"));
    1671          15 :     c->add_rule(r_left);
    1672          15 :     c->add_rule(r_right);
    1673          15 :     rule *r(new rule);
    1674          15 :     r->add_rules(*c);
    1675          15 :     return *r;
    1676             : }
    1677             : 
    1678           0 : rule& operator | (rule& r, choices& c)
    1679             : {
    1680           0 :     rule *l(new rule);
    1681           0 :     l->add_choices(c);
    1682             : 
    1683           0 :     return r | *l;
    1684             : }
    1685             : 
    1686          53 : void choices::add_rule(rule& r)
    1687             : {
    1688          53 :     f_rules.push_back(&r);
    1689          53 : }
    1690             : 
    1691             : 
    1692             : 
    1693           0 : QString choices::to_string() const
    1694             : {
    1695             :     // you can select the one with the pointer for debugging
    1696             :     //QString result(QString("[0x%1] %2: ").arg(reinterpret_cast<qulonglong>(this), 0, 16).arg(f_name));
    1697           0 :     QString result(QString("%2: ").arg(f_name));
    1698             : 
    1699           0 :     for(QVector<rule *>::const_iterator ri = f_rules.begin();
    1700           0 :                                         ri != f_rules.end(); ++ri)
    1701             :     {
    1702           0 :         if(ri != f_rules.begin())
    1703             :         {
    1704           0 :             result += "\n    | ";
    1705             :         }
    1706           0 :         rule const *r(*ri);
    1707           0 :         result += r->to_string();
    1708             :     }
    1709             : 
    1710           0 :     return result;
    1711             : }
    1712             : 
    1713             : 
    1714             : 
    1715             : 
    1716             : 
    1717             : 
    1718           1 : grammar::grammar()
    1719             :     //: f_choices() -- auto-init
    1720             : {
    1721           1 : }
    1722             : 
    1723          18 : void grammar::add_choices(choices& c)
    1724             : {
    1725          18 :     f_choices.push_back(&c);
    1726          18 : }
    1727             : 
    1728             : struct parser_state;
    1729             : typedef QVector<parser_state *> state_array_t;
    1730             : typedef QMap<parser_state *, int> state_map_t;
    1731             : 
    1732             : struct parser_state
    1733             : {
    1734          54 :     parser_state(parser_state * parent, choices & c, int r)
    1735          54 :         : f_parent(parent)
    1736             :         , f_choices(&c)
    1737          54 :         , f_rule(r)
    1738             :     {
    1739          54 :         if(parent != nullptr)
    1740             :         {
    1741          53 :             parent->f_children.push_back(this);
    1742             :         }
    1743          54 :     }
    1744             : 
    1745             :     parser_state(parser_state const & rhs) = delete;
    1746             :     parser_state & operator = (parser_state const & rhs) = delete;
    1747             : 
    1748           0 :     ~parser_state()
    1749           0 :     {
    1750             : //std::cerr << "destructor! " << this << "\n";
    1751             :         try
    1752             :         {
    1753           0 :             clear();
    1754             :         }
    1755           0 :         catch(snap_logic_exception const &)
    1756             :         {
    1757             :         }
    1758           0 :     }
    1759             : 
    1760         103 :     void clear()
    1761             :     {
    1762         103 :         if(!f_children.empty())
    1763             :         {
    1764           0 :             throw snap_logic_exception("clearing a state that has children is not allowed");
    1765             :         }
    1766             :         // if we have a parent make sure we're removed from the list
    1767             :         // of children of that parent
    1768         103 :         if(f_parent != nullptr)
    1769             :         {
    1770         102 :             int const p(f_parent->f_children.indexOf(this));
    1771         102 :             if(p < 0)
    1772             :             {
    1773           0 :                 throw snap_logic_exception("clearing a state with a parent that doesn't know about us is not allowed");
    1774             :             }
    1775         102 :             f_parent->f_children.remove(p);
    1776         102 :             f_parent = nullptr;
    1777             :         }
    1778             :         // delete all the states to be executed on reduce
    1779             :         // if they're still here, they can be removed
    1780         103 :         while(!f_add_on_reduce.empty())
    1781             :         {
    1782           0 :             delete f_add_on_reduce.last();
    1783           0 :             f_add_on_reduce.pop_back();
    1784             :         }
    1785             :         // useful for debug purposes
    1786         103 :         f_choices = nullptr;
    1787         103 :         f_rule = -1;
    1788         103 :         f_position = -1;
    1789         103 :     }
    1790             : 
    1791          77 :     void reset(parser_state * parent, choices & c, int const r)
    1792             :     {
    1793          77 :         f_parent = parent;
    1794          77 :         if(parent != nullptr)
    1795             :         {
    1796          75 :             parent->f_children.push_back(this);
    1797             :         }
    1798          77 :         f_choices = &c;
    1799          77 :         f_rule = r;
    1800          77 :         f_position = 0;
    1801          77 :         f_node.clear();
    1802          77 :         f_add_on_reduce.clear();
    1803          77 :     }
    1804             : 
    1805         130 :     static parser_state * alloc(state_array_t & free_states, parser_state * parent, choices & c, int const r)
    1806             :     {
    1807             :         parser_state * state;
    1808         130 :         if(free_states.empty())
    1809             :         {
    1810          53 :             state = new parser_state(parent, c, r);
    1811             :         }
    1812             :         else
    1813             :         {
    1814          77 :             state = free_states.last();
    1815          77 :             free_states.pop_back();
    1816          77 :             state->reset(parent, c, r);
    1817             :         }
    1818         130 :         return state;
    1819             :     }
    1820             : 
    1821         103 :     static void free(state_array_t & current, state_array_t & free_states, parser_state * s)
    1822             :     {
    1823             : #ifdef DEBUG
    1824         103 :         if(s->f_lock)
    1825             :         {
    1826           0 :             throw snap_logic_exception("state that was not yet properly checked is getting deleted");
    1827             :         }
    1828             : #endif
    1829             : 
    1830             :         // recursively free all the children
    1831           0 :         while(!s->f_children.empty())
    1832             :         {
    1833           0 :             free(current, free_states, s->f_children.last());
    1834             :             //s->f_children.pop_back(); -- automatic in clear()
    1835             :         }
    1836         103 :         s->clear();
    1837         103 :         int const pos(current.indexOf(s));
    1838         103 :         if(pos != -1)
    1839             :         {
    1840         103 :             current.remove(pos);
    1841             :         }
    1842         103 :         free_states.push_back(s);
    1843         103 :     }
    1844             : 
    1845          34 :     static parser_state * copy(state_array_t& free_states, parser_state * source)
    1846             :     {
    1847          34 :         parser_state * state(alloc(free_states, source->f_parent, *source->f_choices, source->f_rule));
    1848          34 :         state->f_line = source->f_line;
    1849          34 :         state->f_position = source->f_position;
    1850          34 :         if(source->f_node != nullptr)
    1851             :         {
    1852           1 :             state->f_node = QSharedPointer<token_node>(new token_node(*source->f_node));
    1853             :         }
    1854          34 :         state->copy_reduce_states(free_states, source->f_add_on_reduce);
    1855          34 :         return state;
    1856             :     }
    1857             : 
    1858          34 :     void copy_reduce_states(state_array_t & free_states, state_array_t & add_on_reduce)
    1859             :     {
    1860          34 :         int const max_reduce(add_on_reduce.size());
    1861          34 :         for(int i(0); i < max_reduce; ++i)
    1862             :         {
    1863             :             // we need to set the correct parent in the copy
    1864             :             // and it is faster to correct in the source before the copy
    1865           0 :             f_add_on_reduce.push_back(copy(free_states, add_on_reduce[i]));
    1866             :         }
    1867          34 :     }
    1868             : 
    1869           8 :     void add_token(token & t)
    1870             :     {
    1871           8 :         if(f_node == nullptr)
    1872             :         {
    1873           7 :             f_node = QSharedPointer<token_node>(new token_node);
    1874           7 :             f_node->set_line(f_line);
    1875             :         }
    1876           8 :         f_node->add_token(t);
    1877           8 :     }
    1878             : 
    1879          82 :     void add_node(QSharedPointer<token_node> n)
    1880             :     {
    1881          82 :         if(f_node == nullptr)
    1882             :         {
    1883          81 :             f_node = QSharedPointer<token_node>(new token_node);
    1884          81 :             f_node->set_line(f_line);
    1885             :         }
    1886          82 :         f_node->add_node(n);
    1887          82 :     }
    1888             : 
    1889             :     QString toString()
    1890             :     {
    1891             :         QString result;
    1892             : 
    1893             :         result = QString("0x%1-%2 [r:%3, p:%4/%5]")
    1894             :                     .arg(reinterpret_cast<qulonglong>(this), 0, 16)
    1895             :                     .arg(f_choices->name())
    1896             :                     .arg(f_rule)
    1897             :                     .arg(f_position)
    1898             :                     .arg((*f_choices)[f_rule].count());
    1899             :         if(f_parent != nullptr)
    1900             :         {
    1901             :             result += QString(" (parent 0x%5-%6)")
    1902             :                     .arg(reinterpret_cast<qulonglong>(f_parent), 0, 16)
    1903             :                     .arg(f_parent->f_choices->name());
    1904             :         }
    1905             : 
    1906             :         return result;
    1907             :     }
    1908             : 
    1909             :     /** \brief Display an array of states.
    1910             :      *
    1911             :      * This function displays the array of states as defined by the parameter
    1912             :      * \p a. This prints all the parents of each element and also the list
    1913             :      * of add on reduce if any.
    1914             :      *
    1915             :      * \param[in] a  The array to be displayed.
    1916             :      */
    1917             : #ifdef DEBUG
    1918             :     static void display_array(const state_array_t & a)
    1919             :     {
    1920             :         SNAP_LOG_TRACE() << "+++ ARRAY (" << a.size() << " items)\n";
    1921             :         for(state_array_t::const_iterator it(a.begin()); it != a.end(); ++it)
    1922             :         {
    1923             :             parser_state * state(*it);
    1924             :             //std::cerr << "  state = " << state << "\n"; // for crash
    1925             :             SNAP_LOG_TRACE() << "  current: " << state->toString() << "\n";
    1926             :             for(state_array_t::const_iterator r(state->f_add_on_reduce.begin()); r != state->f_add_on_reduce.end(); ++r)
    1927             :             {
    1928             :                 parser_state * s(*r);
    1929             :                 SNAP_LOG_TRACE() << "      add on reduce: " << s->toString() << "\n";
    1930             :             }
    1931             :             while(state->f_parent != nullptr)
    1932             :             {
    1933             :                 state = state->f_parent;
    1934             :                 SNAP_LOG_TRACE() << "    parent: " << state->toString() << "\n";
    1935             :             }
    1936             :         }
    1937             :         SNAP_LOG_TRACE() << "---\n";
    1938             :     }
    1939             : 
    1940          82 :     void lock()
    1941             :     {
    1942          82 :         f_lock = true;
    1943          82 :     }
    1944             : 
    1945          77 :     void unlock()
    1946             :     {
    1947          77 :         f_lock = false;
    1948          77 :     }
    1949             : 
    1950             : #endif
    1951             : 
    1952             :     bool                            f_lock = false;
    1953             : 
    1954             :     int32_t                         f_line = -1;
    1955             :     parser_state *                  f_parent = nullptr;
    1956             :     state_array_t                   f_children = state_array_t();
    1957             : 
    1958             :     choices *                       f_choices = nullptr;
    1959             :     int32_t                         f_rule = 0;
    1960             :     int32_t                         f_position = 0;
    1961             : 
    1962             :     QSharedPointer<token_node>      f_node = QSharedPointer<token_node>();
    1963             :     state_array_t                   f_add_on_reduce = state_array_t();
    1964             : };
    1965             : 
    1966             : 
    1967             : /** \brief Move to the next token in a rule.
    1968             :  *
    1969             :  * Each state includes a position in one specific rule. This function moves
    1970             :  * that pointer to the next position.
    1971             :  *
    1972             :  * When the end of the rule is reached, then the rule gets reduced. This means
    1973             :  * calling the user reduce function and removing the rule from the current list
    1974             :  * and replacing it with its parent.
    1975             :  *
    1976             :  * Reducing means removing the current state and putting it the list of
    1977             :  * free state after we added the node tree to its parent. The parent is
    1978             :  * then added to the list of current state as it becomes current again.
    1979             :  *
    1980             :  * When reducing a rule and moving up to the parent, the parent may then need
    1981             :  * reduction too! Thus, the function loops and reduce this state and all of
    1982             :  * its parent until a state that cannot be reduced anymore.
    1983             :  *
    1984             :  * This function also detects recursive rules and place those in the current
    1985             :  * stack of states as expected. Note that next_token() is called on the
    1986             :  * recursive rule too. This is a recursive function call, but it is very
    1987             :  * unlikely to be called more than twice.
    1988             :  *
    1989             :  * \param[in] state  The state being moved.
    1990             :  * \param[in] current  The list of current states
    1991             :  * \param[in] free_states  The list of free states
    1992             :  */
    1993          90 : void next_token(parser_state *state, state_array_t& current, state_array_t& free_states)
    1994             : {
    1995             :     bool repeat;
    1996          90 :     do
    1997             :     {
    1998          90 :         repeat = false;
    1999             :         // move forward to the next token in this rule
    2000          90 :         ++state->f_position;
    2001          90 :         if(state->f_position >= (*state->f_choices)[state->f_rule].count())
    2002             :         {
    2003          34 :             if(state->f_position == (*state->f_choices)[state->f_rule].count())
    2004             :             {
    2005          34 :                 repeat = true;
    2006             : 
    2007             :                 // we reached the end of the rule, we can reduce it!
    2008             :                 // call user function
    2009             : //std::cerr << "reduce -- " << state->f_choices->name() << ": " << (*state->f_choices)[state->f_rule].to_string() << "\n";
    2010          34 :                 (*state->f_choices)[state->f_rule].reduce(state->f_node);
    2011             : 
    2012             :                 // add the recursive children in the current stack
    2013             :                 // check for recursive children (a: b | a ',' b)
    2014          34 :                 int const max_choices(state->f_choices->count());
    2015         142 :                 for(int i(0); i < max_choices; ++i)
    2016             :                 {
    2017         108 :                     rule const& r((*state->f_choices)[i]);
    2018         324 :                     if(token_t::TOKEN_ID_CHOICES_ENUM == r[0].get_token().get_id()
    2019         324 :                     && state->f_choices == &r[0].get_choices())
    2020             :                     {
    2021          48 :                         parser_state *s(parser_state::alloc(free_states, state->f_parent, *state->f_choices, i));
    2022             :                         //parser_state *s(parser_state::copy(free_states, state));
    2023          48 :                         s->f_line = state->f_line;
    2024          48 :                         s->add_node(state->f_node);
    2025          48 :                         current.push_back(s);
    2026             : //std::cerr << "** sub-next_token (recursive) " << reinterpret_cast<void*>(s) << "\n";
    2027          48 :                         next_token(s, current, free_states); // we just reduced that one state!
    2028             : //std::cerr << "**\n";
    2029             :                     }
    2030             :                 }
    2031             : 
    2032          34 :                 parser_state *p(state->f_parent);
    2033          34 :                 if(p->f_children.size() > 1)
    2034             :                 {
    2035             :                     // the parent has several children which means we may get
    2036             :                     // more than one reduce... to support that possibility
    2037             :                     // duplicate the parent now
    2038          34 :                     parser_state *new_parent(parser_state::copy(free_states, p));
    2039          34 :                     p = new_parent;
    2040             : //std::cerr << "    copy " << reinterpret_cast<void*>(state) << " to " << reinterpret_cast<void*>(p) << "\n";
    2041             :                 }
    2042          34 :                 p->add_node(state->f_node);
    2043             : 
    2044             :                 // remove this state from the current set of rules
    2045             : //std::cerr << "XXX delete " << reinterpret_cast<void*>(state) << " (parent: " << reinterpret_cast<void*>(p) << ")\n";
    2046          34 :                 parser_state::free(current, free_states, state);
    2047             : 
    2048             :                 // continue with the parent which will get its
    2049             :                 // position increased on the next iteration
    2050          34 :                 state = p;
    2051          34 :                 current.push_back(state);
    2052             :             }
    2053             :             else
    2054             :             {
    2055             :                 // forget about that state; we're reducing it for the second time?!
    2056             : //std::cerr << ">>>>>>>>>>>>>>>>>>>> delete on > count (double reduce) " << reinterpret_cast<void*>(state) << "\n";
    2057           0 :                 parser_state::free(current, free_states, state);
    2058             :             }
    2059             :         }
    2060             :         // else -- the user is not finished with this state
    2061             :     }
    2062             :     while(repeat);
    2063             : //std::cerr << "next_token() returns with: " << (*state->f_choices)[state->f_rule].to_string() << "\n";
    2064             : 
    2065             : //std::cerr << "NEXT TOKEN: =================================================================\n";
    2066             : //parser_state::display_array(current);
    2067          56 : }
    2068             : 
    2069           1 : bool grammar::parse(lexer & input, choices & start)
    2070             : {
    2071             :     // the result of the parser against the lexer is a tree of tokens
    2072             :     //
    2073             :     // to run the parser, we need a state, this can be defined locally
    2074             :     // because we do not need it in the result;
    2075             :     //
    2076             :     // create the root rule
    2077           2 :     choices root(this, "root");
    2078           1 :     root >>= start >> TOKEN_ID_NONE;
    2079             :     // TODO: all the state pointers leak if we throw...
    2080           1 :     parser_state * s(new parser_state(nullptr, root, 0));
    2081           1 :     s->f_line = 1;
    2082             : 
    2083           2 :     state_array_t free_states;
    2084           2 :     state_array_t current;
    2085           1 :     current.push_back(s);
    2086           7 :     while(!current.empty())
    2087             :     {
    2088           4 :         uint32_t const line(input.line());
    2089             : 
    2090             :         // we're working on the 'check' vector which is
    2091             :         // a copy of the current vector so the current
    2092             :         // vector can change in size
    2093             : #ifdef DEBUG
    2094             : //SNAP_LOG_TRACE("B: ================================================================= (line: ")(input.line())(")");
    2095             : //parser_state::display_array(current);
    2096             : #endif
    2097             : 
    2098             :         bool retry;
    2099          25 :         do
    2100             :         {
    2101          25 :             retry = false;
    2102          50 :             state_array_t check(current);
    2103         166 :             for(state_array_t::const_iterator it(check.begin());
    2104         166 :                             it != check.end(); ++it)
    2105             :             {
    2106             :                 // it is a state, check whether the current entry
    2107             :                 // is a token or a rule
    2108         141 :                 parser_state *state(*it);
    2109         141 :                 const rule::rule_ref ref((*state->f_choices)[state->f_rule][state->f_position]);
    2110         141 :                 token_t token_id(ref.get_token().get_id());
    2111             : 
    2112             :                 // only take care of choices in this loop (terminators are
    2113             :                 // handled in the next loop)
    2114         141 :                 if(token_id == token_t::TOKEN_ID_CHOICES_ENUM)
    2115             :                 {
    2116             :                     // follow the choice by adding all of the rules it points to
    2117          23 :                     choices * c(&ref.get_choices());
    2118             : 
    2119          23 :                     int const max_choices(c->count());
    2120         104 :                     for(int r(0); r < max_choices; ++r)
    2121             :                     {
    2122          81 :                         rule::rule_ref const child_ref((*c)[r][0]);
    2123             : 
    2124             :                         // recursive?
    2125         243 :                         if(token_t::TOKEN_ID_CHOICES_ENUM == child_ref.get_token().get_id()
    2126         243 :                         && &child_ref.get_choices() == c)
    2127             :                         {
    2128             :                             // ignore recursive at this level, we take them
    2129             :                             // in account when reducing instead
    2130             : //std::cerr << "  SKIP RECURSIVE -- " << c->name() << "  --> " << (*c)[r].to_string() << "\n";
    2131          33 :                             continue;
    2132             :                         }
    2133          48 :                         parser_state * child(parser_state::alloc(free_states, state, *c, r));
    2134          48 :                         child->f_line = line;
    2135             : //std::cerr << "  " << c->name() << "  --> " << (*c)[r].to_string() << "\n";
    2136             : 
    2137             :                         // check whether this is recursive; very important
    2138             :                         // to avoid infinite loop; recurvise rules are used
    2139             :                         // only when the concern rule gets reduced
    2140             :                         // the child position is always 0 here (it's a new child)
    2141          48 :                         bool recursive(false);
    2142             : //                        token_t const child_token_id(child_ref.get_token().get_id());
    2143             : //                        if(child_token_id == token_t::TOKEN_ID_CHOICES_ENUM)
    2144             : //                        {
    2145             : //                            // if the new child state starts with a 'choices'
    2146             : //                            // and that's a 'choices' we already added
    2147             : //                            // (including this very child,) then
    2148             : //                            // that child is recursive
    2149             : //                            choices *child_choices(&child_ref.get_choices());
    2150             : //std::cerr << "  --> follow choice " << c->name() << " with sub-choice " << child_choices->name() << "\n";
    2151             : //                            // start from ourselves
    2152             : //                            int i(0);
    2153             : //                            for(parser_state *p(child); p != nullptr && i < 2; p = p->f_parent, ++i)
    2154             : //                            {
    2155             : //                                if(child_choices == p->f_choices)
    2156             : //                                {
    2157             : //                                    if(p->f_parent == nullptr)
    2158             : //                                    {
    2159             : //                                        throw snap_logic_exception("invalid recursion (root cannot be recursive)");
    2160             : //                                    }
    2161             : //                                    // p may be ourselves so we cannot put that
    2162             : //                                    // there, use the parent instead
    2163             : //std::cerr << "  *** CHANGED TO REDUCE ***\n";
    2164             : //                                    p->f_parent->f_add_on_reduce.push_back(child);
    2165             : //                                    recursive = true;
    2166             : //                                    break;
    2167             : //                                }
    2168             : //
    2169             : //                                // cannot reduce any more than that if
    2170             : //                                // this rule is not at the end of list of
    2171             : //                                // choices
    2172             : //                                //if(p->f_position + 1 < (*p->f_choices)[p->f_rule].count())
    2173             : //                                //{
    2174             : //                                //    // TODO: this is not correct if the
    2175             : //                                //    //       following rule(s) support EMPTY
    2176             : //                                //    break;
    2177             : //                                //}
    2178             : //                            }
    2179             : //                        }
    2180             : 
    2181             :                         // if recursive it was already added to all the
    2182             :                         // states where it needs to be; otherwise we add it
    2183             :                         // to the current stack
    2184          48 :                         if(!recursive)
    2185             :                         {
    2186          48 :                             current.push_back(child);
    2187             :                         }
    2188             :                     }
    2189          23 :                     current.remove(current.indexOf(state));
    2190          23 :                     retry = true;
    2191             :                 }
    2192         118 :                 else if(token_id == token_t::TOKEN_ID_EMPTY_ENUM)
    2193             :                 {
    2194             :                     // we have to take care of empty rules here since anything
    2195             :                     // coming after an empty rule has to be added to the list
    2196             :                     // of rules here (it is very important because of the
    2197             :                     // potential for recursive rules)
    2198           0 :                     token t(token_t::TOKEN_ID_EMPTY_ENUM);
    2199           0 :                     state->add_token(t);
    2200           0 :                     next_token(state, current, free_states);
    2201           0 :                     retry = true;
    2202             :                 }
    2203             :             }
    2204             :         } while(retry);
    2205             : #ifdef DEBUG
    2206             : //std::cerr << "A: ================================================================= (line: " << input.line() << ")\n";
    2207             : //parser_state::display_array(current);
    2208             : #endif
    2209             : 
    2210             :         // get the first token
    2211           7 :         token t(input.next_token());
    2212             : #ifdef DEBUG
    2213             : //std::cerr << ". token type: " << t.to_string() << " to try against \n";
    2214             : #endif
    2215             : 
    2216           7 :         state_array_t check(current);
    2217             : #ifdef DEBUG
    2218             :         // lock all those states to make sure we don't delete the wrong one
    2219          86 :         for(state_array_t::const_iterator it(check.begin());
    2220          86 :                         it != check.end(); ++it)
    2221             :         {
    2222          82 :             (*it)->lock();
    2223             :         }
    2224             : #endif
    2225          81 :         for(state_array_t::const_iterator it(check.begin());
    2226          81 :                         it != check.end(); ++it)
    2227             :         {
    2228             :             // it is a state, check whether the current entry
    2229             :             // is a token or a rule
    2230          78 :             parser_state *state(*it);
    2231          78 :             rule::rule_ref const ref((*state->f_choices)[state->f_rule][state->f_position]);
    2232          78 :             token_t const token_id(ref.get_token().get_id());
    2233          78 :             if(token_id == token_t::TOKEN_ID_CHOICES_ENUM
    2234          78 :             || token_id == token_t::TOKEN_ID_EMPTY_ENUM)
    2235             :             {
    2236           0 :                 throw snap_logic_exception("this should never happen since the previous for() loop removed all of those!");
    2237             :             }
    2238             :             else
    2239             :             {
    2240          78 :                 bool remove(false);
    2241          78 :                 if(t.get_id() != token_id)
    2242             :                 {
    2243          43 :                     remove = true;
    2244             :                 }
    2245             :                 else
    2246             :                 {
    2247          35 :                     switch(token_id)
    2248             :                     {
    2249          27 :                     case token_t::TOKEN_ID_LITERAL_ENUM:
    2250             :                         // a literal must match exactly
    2251          27 :                         if(t.get_value().toString() != ref.get_value())
    2252             :                         {
    2253          26 :                             remove = true;
    2254             :                         }
    2255          27 :                         break;
    2256             : 
    2257           0 :                     case token_t::TOKEN_ID_KEYWORD_ENUM:
    2258             :                         // a keyword must match exactly
    2259           0 :                         if(t.get_value().toInt() != ref.get_keyword().number())
    2260             :                         {
    2261           0 :                             remove = true;
    2262             :                         }
    2263           0 :                         break;
    2264             : 
    2265           7 :                     case token_t::TOKEN_ID_IDENTIFIER_ENUM:
    2266             :                     case token_t::TOKEN_ID_STRING_ENUM:
    2267             :                     case token_t::TOKEN_ID_INTEGER_ENUM:
    2268             :                     case token_t::TOKEN_ID_FLOAT_ENUM:
    2269             :                         // this is a match whatever the value
    2270           7 :                         break;
    2271             : 
    2272           1 :                     case token_t::TOKEN_ID_NONE_ENUM:
    2273             :                         // this state is the root state, this means the result
    2274             :                         // is really the child node of this current state
    2275             :                         //
    2276           1 :                         f_result = qSharedPointerDynamicCast<token_node, token>((*state->f_node)[0]);
    2277           1 :                         return true;
    2278             : 
    2279           0 :                     default:
    2280             :                         // at this point other tokens are rejected here
    2281           0 :                         throw snap_parser_unexpected_token(QString("unexpected token %1").arg(static_cast<int>(token_id)));
    2282             : 
    2283             :                     }
    2284             :                 }
    2285             : #ifdef DEBUG
    2286          77 :                 state->unlock();
    2287             : #endif
    2288          77 :                 if(remove)
    2289             :                 {
    2290             : //std::cerr << "<*> delete unmatched state: " << state->f_choices->name() << "\n";
    2291          69 :                     parser_state::free(current, free_states, state);
    2292             :                 }
    2293             :                 else
    2294             :                 {
    2295             :                     // save this token as it was accepted
    2296           8 :                     state->add_token(t);
    2297             : //std::cerr << ">>> next token (IN)\n";
    2298           8 :                     next_token(state, current, free_states);
    2299             : //std::cerr << ">>> next token (OUT)\n";
    2300             :                 }
    2301             :             }
    2302             :         }
    2303             :     }
    2304             : 
    2305           0 :     return false;
    2306             : }
    2307             : 
    2308             : 
    2309             : 
    2310             : } // namespace parser
    2311           6 : } // namespace snap
    2312             : 
    2313             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13