Line data Source code
1 : // Snap Websites Server -- advanced parser
2 : // Copyright (c) 2011-2019 Made to Order Software Corp. All Rights Reserved
3 : //
4 : // This program is free software; you can redistribute it and/or modify
5 : // it under the terms of the GNU General Public License as published by
6 : // the Free Software Foundation; either version 2 of the License, or
7 : // (at your option) any later version.
8 : //
9 : // This program is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : // GNU General Public License for more details.
13 : //
14 : // You should have received a copy of the GNU General Public License
15 : // along with this program; if not, write to the Free Software
16 : // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 :
18 :
19 : // self
20 : //
21 : #include "snapwebsites/snap_parser.h"
22 :
23 :
24 : // snapwebsites lib
25 : //
26 : #include "snapwebsites/log.h"
27 : #include "snapwebsites/qstring_stream.h"
28 :
29 :
30 : // Qt lib
31 : //
32 : #include <QList>
33 : #include <QPointer>
34 :
35 :
36 : // C++ lib
37 : //
38 : #include <iostream>
39 :
40 :
41 : // last include
42 : //
43 : #include <snapdev/poison.h>
44 :
45 :
46 :
47 :
48 : namespace snap
49 : {
50 : namespace parser
51 : {
52 :
53 2 : token_id_none_def TOKEN_ID_NONE;
54 2 : token_id_integer_def TOKEN_ID_INTEGER;
55 2 : token_id_float_def TOKEN_ID_FLOAT;
56 2 : token_id_identifier_def TOKEN_ID_IDENTIFIER;
57 2 : token_id_keyword_def TOKEN_ID_KEYWORD;
58 2 : token_id_string_def TOKEN_ID_STRING;
59 2 : token_id_literal_def TOKEN_ID_LITERAL;
60 2 : token_id_empty_def TOKEN_ID_EMPTY;
61 :
62 :
63 :
64 0 : QString token::to_string() const
65 : {
66 0 : QString result;
67 :
68 0 : switch(f_id)
69 : {
70 0 : case token_t::TOKEN_ID_NONE_ENUM:
71 0 : result = "<no token>";
72 0 : break;
73 :
74 0 : case token_t::TOKEN_ID_INTEGER_ENUM:
75 0 : result = QString("int<%1>").arg(f_value.toInt());
76 0 : break;
77 :
78 0 : case token_t::TOKEN_ID_FLOAT_ENUM:
79 0 : result = QString("float<%1>").arg(f_value.toDouble());
80 0 : break;
81 :
82 0 : case token_t::TOKEN_ID_IDENTIFIER_ENUM:
83 0 : result = QString("identifier<%1>").arg(f_value.toString());
84 0 : break;
85 :
86 0 : case token_t::TOKEN_ID_KEYWORD_ENUM:
87 0 : result = QString("keyword<%1>").arg(f_value.toString());
88 0 : break;
89 :
90 0 : case token_t::TOKEN_ID_STRING_ENUM:
91 0 : result = QString("string<%1>").arg(f_value.toString());
92 0 : break;
93 :
94 0 : case token_t::TOKEN_ID_LITERAL_ENUM:
95 0 : result = QString("literal<%1>").arg(f_value.toString());
96 0 : break;
97 :
98 0 : case token_t::TOKEN_ID_EMPTY_ENUM:
99 0 : result = "empty<>";
100 0 : break;
101 :
102 0 : case token_t::TOKEN_ID_CHOICES_ENUM:
103 0 : result = QString("choices<...>");//.arg(f_value.toString());
104 0 : break;
105 :
106 0 : case token_t::TOKEN_ID_RULES_ENUM:
107 0 : result += " /* INVALID -- TOKEN_ID_RULES!!! */ ";
108 0 : break;
109 :
110 0 : case token_t::TOKEN_ID_NODE_ENUM:
111 0 : result += " /* INVALID -- TOKEN_ID_RULES!!! */ ";
112 0 : break;
113 :
114 0 : case token_t::TOKEN_ID_ERROR_ENUM:
115 0 : result += " /* INVALID -- TOKEN_ID_ERROR!!! */ ";
116 0 : break;
117 :
118 0 : default:
119 0 : result += " /* INVALID -- unknown token identifier!!! */ ";
120 0 : break;
121 :
122 : }
123 :
124 0 : return result;
125 : }
126 :
127 :
128 : /** \brief Set the input string for the lexer.
129 : *
130 : * This lexer accepts a standard QString as input. It will be what gets parsed.
131 : *
132 : * The input is never modified. It is parsed using the next_token() function.
133 : *
134 : * By default, the input is an empty string.
135 : *
136 : * \param[in] input The input string to be parsed by this lexer.
137 : */
138 1 : void lexer::set_input(const QString& input)
139 : {
140 1 : f_input = input;
141 1 : f_pos = f_input.begin();
142 1 : f_line = 1;
143 1 : }
144 :
145 : /** \brief Read the next token.
146 : *
147 : * At this time we support the follow tokens:
148 : *
149 : * \li TOKEN_ID_NONE_ENUM -- the end of the input was reached
150 : *
151 : * \li TOKEN_ID_INTEGER_ENUM -- an integer ([0-9]+) number; always positive since
152 : * the parser returns '-' as a separate literal
153 : *
154 : * \li TOKEN_ID_FLOAT_ENUM -- a floating point number with optinal exponent
155 : * ([0-9]+\.[0-9]+([eE][+-]?[0-9]+)?); always positive since
156 : * the parser returns '-' as a separate literal
157 : *
158 : * \li TOKEN_ID_IDENTIFIER_ENUM -- supports C like identifiers ([a-z_][a-z0-9_]*)
159 : *
160 : * \li TOKEN_ID_KEYWORD_ENUM -- an identifier that matches one of our keywords
161 : * as defined in the keyword map
162 : *
163 : * \li TOKEN_ID_STRING_ENUM -- a string delimited by double quotes ("); support
164 : * backslashes; returns the content of the string
165 : * (the quotes are removed)
166 : *
167 : * \li TOKEN_ID_LITERAL_ENUM -- anything else except what gets removed (spaces,
168 : * new lines, C or C++ like comments)
169 : *
170 : * \li TOKEN_ID_ERROR_ENUM -- an error occured, you can get the error message for
171 : * more information
172 : *
173 : * The TOKEN_ID_LITERAL_ENUM may either return a character ('=' operator) or a
174 : * string ("/=" operator). The special literals are defined here:
175 : *
176 : * \li ++ - increment
177 : * \li += - add & assign
178 : * \li -- - decrement
179 : * \li -= - subtract & assign
180 : * \li *= - multiply & assign
181 : * \li ** - power
182 : * \li **= - power & assign
183 : * \li /= - divide & assign
184 : * \li %= - divide & assign
185 : * \li ~= - bitwise not & assign
186 : * \li &= - bitwise and & assign
187 : * \li && - logical and
188 : * \li &&= - logical and & assign
189 : * \li |= - bitwise or & assign
190 : * \li || - logical or
191 : * \li ||= - logical or & assign
192 : * \li ^= - bitwise xor & assign
193 : * \li ^^ - logical xor
194 : * \li ^^= - logical xor & assign
195 : * \li != - not equal
196 : * \li !== - exactly not equal
197 : * \li !< - rotate left
198 : * \li !> - rotate left
199 : * \li ?= - assign default if undefined
200 : * \li == - equal
201 : * \li === - exactly equal
202 : * \li <= - smaller or equal
203 : * \li << - shift left
204 : * \li <<= - shift left and assign
205 : * \li <? - minimum
206 : * \li <?= - minimum and assign
207 : * \li >= - larger or equal
208 : * \li >> - shift right
209 : * \li >>> - unsigned shift right
210 : * \li >>= - shift right and assign
211 : * \li >>>= - unsigned shift right and assign
212 : * \li >? - maximum
213 : * \li >?= - maximum and assign
214 : * \li := - required assignment
215 : * \li :: - namespace
216 : *
217 : * If the returned token says TOKEN_ID_NONE_ENUM then you reached the
218 : * end of the input. When it says TOKEN_ID_ERROR_ENUM, then the input
219 : * is invalid and the error message and line number can be retrieved
220 : * to inform the user.
221 : *
222 : * The parser supports any type of new lines (Unix, Windows and Mac.)
223 : *
224 : * \todo
225 : * Check for overflow on integers and doubles
226 : *
227 : * \todo
228 : * Should we include default keywords? (i.e. true, false, if, else,
229 : * etc.) so those cannot be used as identifiers in some places?
230 : *
231 : * \return The read token.
232 : */
233 4 : token lexer::next_token()
234 : {
235 0 : auto xdigit = [](int c)
236 : {
237 0 : if(c >= '0' && c <= '9')
238 : {
239 0 : return c - '0';
240 : }
241 0 : else if(c >= 'a' && c <= 'f')
242 : {
243 0 : return c - 'a' + 10;
244 : }
245 0 : else if(c >= 'A' && c <= 'F')
246 : {
247 0 : return c - 'A' + 10;
248 : }
249 0 : return -1;
250 : };
251 4 : token result;
252 :
253 : // restart is called whenever we find a comment or
254 : // some other entry that just gets "deleted" from the input
255 : // (i.e. new line, space...)
256 : //
257 : // Note: I don't use a do ... while(repeat); because in some cases
258 : // we are inside several levels of switch() for() while() loops.
259 6 : restart:
260 :
261 : // we reached the end of input
262 6 : if(f_pos == f_input.end())
263 : {
264 1 : return result;
265 : }
266 :
267 5 : switch(f_pos->unicode())
268 : {
269 0 : case '\n':
270 0 : ++f_pos;
271 0 : ++f_line;
272 0 : goto restart;
273 :
274 0 : case '\r':
275 0 : ++f_pos;
276 0 : ++f_line;
277 0 : if(f_pos != f_input.end() && *f_pos == '\n')
278 : {
279 : // skip "\r\n" as one end of line
280 0 : ++f_pos;
281 : }
282 0 : goto restart;
283 :
284 2 : case ' ':
285 : case '\t':
286 2 : ++f_pos;
287 2 : goto restart;
288 :
289 0 : case '+':
290 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
291 0 : result.set_value(*f_pos);
292 0 : ++f_pos;
293 0 : if(f_pos != f_input.end())
294 : {
295 0 : switch(f_pos->unicode())
296 : {
297 0 : case '=': // add and assign
298 0 : result.set_value("+=");
299 0 : ++f_pos;
300 0 : break;
301 :
302 0 : case '+': // increment
303 0 : result.set_value("++");
304 0 : ++f_pos;
305 0 : break;
306 :
307 0 : default:
308 : // ignore other characters
309 0 : break;
310 :
311 : }
312 : }
313 0 : break;
314 :
315 0 : case '-':
316 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
317 0 : result.set_value(*f_pos);
318 0 : ++f_pos;
319 0 : if(f_pos != f_input.end())
320 : {
321 0 : switch(f_pos->unicode())
322 : {
323 0 : case '=': // subtract and assign
324 0 : result.set_value("-=");
325 0 : ++f_pos;
326 0 : break;
327 :
328 0 : case '-': // decrement
329 0 : result.set_value("--");
330 0 : ++f_pos;
331 0 : break;
332 :
333 0 : default:
334 : // ignore other characters
335 0 : break;
336 :
337 : }
338 : }
339 0 : break;
340 :
341 0 : case '*':
342 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
343 0 : result.set_value(*f_pos);
344 0 : ++f_pos;
345 0 : if(f_pos != f_input.end())
346 : {
347 0 : switch(f_pos->unicode())
348 : {
349 0 : case '/': // invalid C comment end marker
350 : // in this case we don't have to restart since we
351 : // reached the end of the input
352 0 : f_error_code = lexer_error_t::LEXER_ERROR_INVALID_C_COMMENT;
353 0 : f_error_message = "comment terminator without introducer";
354 0 : f_error_line = f_line;
355 0 : result.set_id(token_t::TOKEN_ID_ERROR_ENUM);
356 0 : break;
357 :
358 0 : case '=': // multiply and assign
359 0 : result.set_value("*=");
360 0 : ++f_pos;
361 0 : break;
362 :
363 0 : case '*': // power
364 0 : result.set_value("**");
365 0 : ++f_pos;
366 0 : if(f_pos != f_input.end())
367 : {
368 0 : if(*f_pos == '=')
369 : {
370 : // power and assign
371 0 : result.set_value("**=");
372 0 : ++f_pos;
373 : }
374 : }
375 0 : break;
376 :
377 0 : default:
378 : // ignore other characters
379 0 : break;
380 :
381 : }
382 : }
383 0 : break;
384 :
385 0 : case '/': // divide
386 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
387 0 : result.set_value(*f_pos);
388 0 : ++f_pos;
389 0 : if(f_pos != f_input.end())
390 : {
391 0 : switch(f_pos->unicode())
392 : {
393 0 : case '/': // C++ comment -- skip up to eol
394 0 : for(++f_pos; f_pos != f_input.end(); ++f_pos)
395 : {
396 0 : if(*f_pos == '\n' || *f_pos == '\r')
397 : {
398 0 : goto restart;
399 : }
400 : }
401 : // in this case we don't have to restart since we
402 : // reached the end of the input
403 0 : result.set_id(token_t::TOKEN_ID_NONE_ENUM);
404 0 : break;
405 :
406 0 : case '*': // C comment -- skip up to */
407 0 : for(++f_pos; f_pos != f_input.end(); ++f_pos)
408 : {
409 0 : if(f_pos + 1 != f_input.end() && *f_pos == '*' && f_pos[1] == '/')
410 : {
411 0 : f_pos += 2;
412 0 : goto restart;
413 : }
414 : }
415 : // in this case the comment was not terminated
416 0 : f_error_code = lexer_error_t::LEXER_ERROR_INVALID_C_COMMENT;
417 0 : f_error_message = "comment not terminated";
418 0 : f_error_line = f_line;
419 0 : result.set_id(token_t::TOKEN_ID_ERROR_ENUM);
420 0 : break;
421 :
422 0 : case '=': // divide and assign
423 0 : result.set_value("/=");
424 0 : ++f_pos;
425 0 : break;
426 :
427 0 : default:
428 : // ignore other characters
429 0 : break;
430 :
431 : }
432 : }
433 0 : break;
434 :
435 0 : case '%': // modulo
436 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
437 0 : result.set_value(*f_pos);
438 0 : ++f_pos;
439 0 : if(f_pos != f_input.end())
440 : {
441 0 : switch(f_pos->unicode())
442 : {
443 0 : case '=': // modulo and assign
444 0 : result.set_value("%=");
445 0 : ++f_pos;
446 0 : break;
447 :
448 0 : default:
449 : // ignore other characters
450 0 : break;
451 :
452 : }
453 : }
454 0 : break;
455 :
456 0 : case '~': // bitwise not
457 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
458 0 : result.set_value(*f_pos);
459 0 : ++f_pos;
460 0 : if(f_pos != f_input.end())
461 : {
462 0 : switch(f_pos->unicode())
463 : {
464 0 : case '=': // bitwise not and assign
465 0 : result.set_value("~=");
466 0 : ++f_pos;
467 0 : break;
468 :
469 0 : default:
470 : // ignore other characters
471 0 : break;
472 :
473 : }
474 : }
475 0 : break;
476 :
477 0 : case '&': // bitwise and
478 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
479 0 : result.set_value(*f_pos);
480 0 : ++f_pos;
481 0 : if(f_pos != f_input.end())
482 : {
483 0 : switch(f_pos->unicode())
484 : {
485 0 : case '=': // bitwise and & assign
486 0 : result.set_value("&=");
487 0 : ++f_pos;
488 0 : break;
489 :
490 0 : case '&': // logical and
491 0 : result.set_value("&&");
492 0 : ++f_pos;
493 0 : if(f_pos != f_input.end())
494 : {
495 0 : if(*f_pos == '=')
496 : {
497 : // logical and & assign
498 0 : result.set_value("&&=");
499 0 : ++f_pos;
500 : }
501 : }
502 0 : break;
503 :
504 0 : default:
505 : // ignore other characters
506 0 : break;
507 :
508 : }
509 : }
510 0 : break;
511 :
512 0 : case '|': // bitwise or
513 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
514 0 : result.set_value(*f_pos);
515 0 : ++f_pos;
516 0 : if(f_pos != f_input.end())
517 : {
518 0 : switch(f_pos->unicode())
519 : {
520 0 : case '=': // bitwise or & assign
521 0 : result.set_value("|=");
522 0 : ++f_pos;
523 0 : break;
524 :
525 0 : case '|': // logical or
526 0 : result.set_value("||");
527 0 : ++f_pos;
528 0 : if(f_pos != f_input.end())
529 : {
530 0 : if(*f_pos == '=')
531 : {
532 : // logical or and assign
533 0 : result.set_value("||=");
534 0 : ++f_pos;
535 : }
536 : }
537 0 : break;
538 :
539 0 : default:
540 : // ignore other characters
541 0 : break;
542 :
543 : }
544 : }
545 0 : break;
546 :
547 0 : case '^': // bitwise xor
548 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
549 0 : result.set_value(*f_pos);
550 0 : ++f_pos;
551 0 : if(f_pos != f_input.end())
552 : {
553 0 : switch(f_pos->unicode())
554 : {
555 0 : case '=': // bitwise xor & assign
556 0 : result.set_value("^=");
557 0 : ++f_pos;
558 0 : break;
559 :
560 0 : case '^': // logical xor
561 0 : result.set_value("^^");
562 0 : ++f_pos;
563 0 : if(f_pos != f_input.end())
564 : {
565 0 : if(*f_pos == '=')
566 : {
567 : // logical xor and assign
568 0 : result.set_value("^^=");
569 0 : ++f_pos;
570 : }
571 : }
572 0 : break;
573 :
574 0 : default:
575 : // ignore other characters
576 0 : break;
577 :
578 : }
579 : }
580 0 : break;
581 :
582 0 : case '!': // logical not
583 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
584 0 : result.set_value(*f_pos);
585 0 : ++f_pos;
586 0 : if(f_pos != f_input.end())
587 : {
588 0 : switch(f_pos->unicode())
589 : {
590 0 : case '=': // not equal
591 0 : result.set_value("!=");
592 0 : ++f_pos;
593 0 : if(f_pos != f_input.end())
594 : {
595 0 : if(*f_pos == '=')
596 : {
597 : // exactly not equal (type checked)
598 0 : result.set_value("!==");
599 0 : ++f_pos;
600 : }
601 : }
602 0 : break;
603 :
604 0 : case '<': // rotate left
605 0 : result.set_value("!<");
606 0 : ++f_pos;
607 0 : break;
608 :
609 0 : case '>': // rotate right
610 0 : result.set_value("!>");
611 0 : ++f_pos;
612 0 : break;
613 :
614 0 : default:
615 : // ignore other characters
616 0 : break;
617 :
618 : }
619 : }
620 0 : break;
621 :
622 0 : case '?': // ? by itself is used here and there generally similar to C/C++
623 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
624 0 : result.set_value(*f_pos);
625 0 : ++f_pos;
626 0 : if(f_pos != f_input.end())
627 : {
628 0 : switch(f_pos->unicode())
629 : {
630 0 : case '=': // assign if left hand side not set
631 0 : result.set_value("?=");
632 0 : ++f_pos;
633 0 : break;
634 :
635 0 : default:
636 : // ignore other characters
637 0 : break;
638 :
639 : }
640 : }
641 0 : break;
642 :
643 0 : case '=': // assign
644 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
645 0 : result.set_value(*f_pos);
646 0 : ++f_pos;
647 0 : if(f_pos != f_input.end())
648 : {
649 0 : switch(f_pos->unicode())
650 : {
651 0 : case '=': // equality check (compare)
652 0 : result.set_value("==");
653 0 : ++f_pos;
654 0 : if(f_pos != f_input.end())
655 : {
656 0 : if(*f_pos == '=')
657 : {
658 : // exactly equal (type checked)
659 0 : result.set_value("===");
660 0 : ++f_pos;
661 : }
662 : }
663 0 : break;
664 :
665 0 : default:
666 : // ignore other characters
667 0 : break;
668 :
669 : }
670 : }
671 0 : break;
672 :
673 0 : case '<': // greater than
674 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
675 0 : result.set_value(*f_pos);
676 0 : ++f_pos;
677 0 : if(f_pos != f_input.end())
678 : {
679 0 : switch(f_pos->unicode())
680 : {
681 0 : case '=': // smaller or equal
682 0 : result.set_value("<=");
683 0 : ++f_pos;
684 0 : break;
685 :
686 0 : case '<': // shift left
687 0 : result.set_value("<<");
688 0 : ++f_pos;
689 0 : if(f_pos != f_input.end())
690 : {
691 0 : if(*f_pos == '=')
692 : {
693 : // shift left and assign
694 0 : result.set_value("<<=");
695 0 : ++f_pos;
696 : }
697 : }
698 0 : break;
699 :
700 0 : case '?': // minimum
701 0 : result.set_value("<?");
702 0 : ++f_pos;
703 0 : if(f_pos != f_input.end())
704 : {
705 0 : if(*f_pos == '=')
706 : {
707 : // minimum and assign
708 0 : result.set_value("<?=");
709 0 : ++f_pos;
710 : }
711 : }
712 0 : break;
713 :
714 0 : default:
715 : // ignore other characters
716 0 : break;
717 :
718 : }
719 : }
720 0 : break;
721 :
722 1 : case '>': // less than
723 1 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
724 1 : result.set_value(*f_pos);
725 1 : ++f_pos;
726 1 : if(f_pos != f_input.end())
727 : {
728 1 : switch(f_pos->unicode())
729 : {
730 0 : case '=': // larger or equal
731 0 : result.set_value(">=");
732 0 : ++f_pos;
733 0 : break;
734 :
735 0 : case '>': // shift right
736 0 : result.set_value(">>");
737 0 : ++f_pos;
738 0 : if(f_pos != f_input.end())
739 : {
740 0 : switch(f_pos->unicode())
741 : {
742 0 : case '=':
743 : // shift right and assign
744 0 : result.set_value(">>=");
745 0 : ++f_pos;
746 0 : break;
747 :
748 0 : case '>':
749 : // unsigned shift right
750 0 : result.set_value(">>>");
751 0 : ++f_pos;
752 0 : if(f_pos != f_input.end())
753 : {
754 0 : if(*f_pos == '=')
755 : {
756 : // unsigned right shift and assign
757 0 : result.set_value(">>>=");
758 0 : ++f_pos;
759 : }
760 : }
761 0 : break;
762 :
763 0 : default:
764 : // ignore other characters
765 0 : break;
766 :
767 : }
768 : }
769 0 : break;
770 :
771 0 : case '?': // maximum
772 0 : result.set_value(">?");
773 0 : ++f_pos;
774 0 : if(f_pos != f_input.end())
775 : {
776 0 : if(*f_pos == '=')
777 : {
778 : // maximum and assign
779 0 : result.set_value(">?=");
780 0 : ++f_pos;
781 : }
782 : }
783 0 : break;
784 :
785 1 : default:
786 : // ignore other characters
787 1 : break;
788 :
789 : }
790 : }
791 1 : break;
792 :
793 0 : case ':':
794 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
795 0 : result.set_value(*f_pos);
796 0 : ++f_pos;
797 0 : if(f_pos != f_input.end())
798 : {
799 0 : switch(f_pos->unicode())
800 : {
801 0 : case '=': // required
802 0 : result.set_value(":=");
803 0 : ++f_pos;
804 0 : break;
805 :
806 0 : case ':': // namespace
807 0 : result.set_value("::");
808 0 : ++f_pos;
809 0 : break;
810 :
811 0 : default:
812 : // ignore other characters
813 0 : break;
814 :
815 : }
816 : }
817 0 : break;
818 :
819 0 : case '"':
820 : {
821 0 : ++f_pos;
822 0 : QString str;
823 0 : while(f_pos != f_input.end() && *f_pos != '"')
824 : {
825 0 : if(*f_pos == '\n' || *f_pos == '\r')
826 : {
827 : // strings cannot continue after the end of a line
828 0 : break;
829 : }
830 0 : if(*f_pos == '\\')
831 : {
832 0 : ++f_pos;
833 0 : if(f_pos == f_input.end())
834 : {
835 : // this is an invalid backslash
836 0 : break;
837 : }
838 : // TODO: add support for \x## and various other
839 : // escaped characters
840 0 : switch(f_pos->unicode())
841 : {
842 0 : case 'a':
843 0 : str += "\a";
844 0 : break;
845 :
846 0 : case 'b':
847 0 : str += "\b";
848 0 : break;
849 :
850 0 : case 'f':
851 0 : str += "\f";
852 0 : break;
853 :
854 0 : case 'n':
855 0 : str += "\n";
856 0 : break;
857 :
858 0 : case 'r':
859 0 : str += "\r";
860 0 : break;
861 :
862 0 : case 't':
863 0 : str += "\t";
864 0 : break;
865 :
866 0 : case 'v':
867 0 : str += "\v";
868 0 : break;
869 :
870 0 : case '0':
871 : case '1':
872 : case '2':
873 : case '3':
874 : case '4':
875 : case '5':
876 : case '6':
877 : case '7':
878 : // "\nnn" -- maximum of 3 digits
879 : {
880 0 : int v(f_pos->unicode() - '0');
881 0 : if(f_pos + 1 != f_input.end() && (f_pos + 1)->unicode() >= '0' && (f_pos + 1)->unicode() <= '7')
882 : {
883 0 : ++f_pos;
884 0 : v = v * 8 + f_pos->unicode() - '0';
885 :
886 0 : if(f_pos + 1 != f_input.end() && (f_pos + 1)->unicode() >= '0' && (f_pos + 1)->unicode() <= '7')
887 : {
888 0 : ++f_pos;
889 0 : v = v * 8 + f_pos->unicode() - '0';
890 : }
891 : }
892 0 : str += QChar(v);
893 : }
894 0 : break;
895 :
896 0 : case 'x':
897 : case 'X':
898 : {
899 0 : if(f_pos + 1 != f_input.end() && std::isxdigit((f_pos + 1)->unicode()))
900 : {
901 0 : ++f_pos;
902 0 : int v(xdigit(f_pos->unicode()));
903 :
904 0 : if(f_pos + 1 != f_input.end() && std::isxdigit((f_pos + 1)->unicode()))
905 : {
906 0 : ++f_pos;
907 0 : v = v * 16 + xdigit(f_pos->unicode());
908 : }
909 :
910 0 : str += QChar(v);
911 : }
912 : }
913 0 : break;
914 :
915 0 : case 'u':
916 : // take 0 to 4 digits
917 : {
918 0 : int v(0);
919 0 : for(int idx(0); idx < 4; ++idx)
920 : {
921 0 : if(f_pos == f_input.end()
922 0 : || !std::isxdigit((f_pos + 1)->unicode()))
923 : {
924 0 : break;
925 : }
926 0 : ++f_pos;
927 0 : v = v * 16 + xdigit(f_pos->unicode());
928 : }
929 0 : str += QChar(v);
930 : }
931 0 : break;
932 :
933 0 : case 'U':
934 : // take 0 to 8 digits
935 : {
936 0 : uint v(0);
937 0 : for(int idx(0); idx < 8; ++idx)
938 : {
939 0 : if(f_pos == f_input.end()
940 0 : || !std::isxdigit((f_pos + 1)->unicode()))
941 : {
942 0 : break;
943 : }
944 0 : ++f_pos;
945 0 : v = v * 16 + xdigit(f_pos->unicode());
946 : }
947 0 : str += QString::fromUcs4(&v, 1);
948 : }
949 0 : break;
950 :
951 0 : default:
952 : // anything, keep as is (", ', ?, \)
953 0 : str += *f_pos;
954 0 : break;
955 :
956 : }
957 : }
958 : else
959 : {
960 0 : str += *f_pos;
961 : }
962 0 : ++f_pos;
963 : }
964 0 : if(f_pos == f_input.end())
965 : {
966 0 : f_error_code = lexer_error_t::LEXER_ERROR_INVALID_STRING;
967 0 : f_error_message = "invalid string";
968 0 : f_error_line = f_line;
969 0 : result.set_id(token_t::TOKEN_ID_ERROR_ENUM);
970 : }
971 : else
972 : {
973 0 : result.set_id(token_t::TOKEN_ID_STRING_ENUM);
974 0 : result.set_value(str);
975 0 : ++f_pos; // skip the closing quote
976 0 : }
977 : }
978 0 : break;
979 :
980 0 : case '0':
981 : // hexadecimal?
982 0 : if(f_pos + 1 != f_input.end() && (f_pos[1] == 'x' || f_pos[1] == 'X')
983 0 : && f_pos + 2 != f_input.end() && ((f_pos[2] >= '0' && f_pos[2] <= '9')
984 0 : || (f_pos[2] >= 'a' && f_pos[2] <= 'f')
985 0 : || (f_pos[2] >= 'A' && f_pos[2] <= 'F')))
986 : {
987 : bool ok;
988 0 : f_pos += 2; // skip the 0x or 0X
989 0 : QString::const_iterator start(f_pos);
990 : // parse number
991 0 : while(f_pos != f_input.end() && ((*f_pos >= '0' && *f_pos <= '9')
992 0 : || (*f_pos >= 'a' && *f_pos <= 'f')
993 0 : || (*f_pos >= 'A' && *f_pos <= 'F')))
994 : {
995 0 : ++f_pos;
996 : }
997 0 : result.set_id(token_t::TOKEN_ID_INTEGER_ENUM);
998 0 : QString value(start, static_cast<int>(f_pos - start));
999 0 : result.set_value(value.toULongLong(&ok, 16));
1000 0 : if(!ok)
1001 : {
1002 : // as far as I know the only reason it can fail is because
1003 : // it is too large (since we parsed a valid number!)
1004 0 : f_error_code = lexer_error_t::LEXER_ERROR_INVALID_NUMBER;
1005 0 : f_error_message = "number too large";
1006 0 : f_error_line = f_line;
1007 0 : result.set_id(token_t::TOKEN_ID_ERROR_ENUM);
1008 : }
1009 0 : break;
1010 0 : }
1011 : // no octal support at this point, octal is not available in
1012 : // JavaScript by default!
1013 : #if __cplusplus >= 201700
1014 : [[fallthrough]];
1015 : #endif
1016 : case '1':
1017 : case '2':
1018 : case '3':
1019 : case '4':
1020 : case '5':
1021 : case '6':
1022 : case '7':
1023 : case '8':
1024 : case '9':
1025 : {
1026 : bool ok;
1027 : // TODO: test overflows
1028 0 : QString::const_iterator start(f_pos);
1029 : // number
1030 0 : do
1031 : {
1032 0 : ++f_pos;
1033 : }
1034 0 : while(f_pos != f_input.end() && *f_pos >= '0' && *f_pos <= '9');
1035 0 : if(*f_pos == '.')
1036 : {
1037 : // skip the decimal point
1038 0 : ++f_pos;
1039 :
1040 : // floating point
1041 0 : while(f_pos != f_input.end() && *f_pos >= '0' && *f_pos <= '9')
1042 : {
1043 0 : ++f_pos;
1044 : }
1045 : // TODO: add exponent support
1046 0 : result.set_id(token_t::TOKEN_ID_FLOAT_ENUM);
1047 0 : QString value(start, static_cast<int>(f_pos - start));
1048 0 : result.set_value(value.toDouble(&ok));
1049 : }
1050 : else
1051 : {
1052 0 : result.set_id(token_t::TOKEN_ID_INTEGER_ENUM);
1053 0 : QString value(start, static_cast<int>(f_pos - start));
1054 0 : result.set_value(value.toULongLong(&ok));
1055 : }
1056 0 : if(!ok)
1057 : {
1058 : // as far as I know the only reason it can fail is because
1059 : // it is too large (since we parsed a valid number!)
1060 0 : f_error_code = lexer_error_t::LEXER_ERROR_INVALID_NUMBER;
1061 0 : f_error_message = "number too large";
1062 0 : f_error_line = f_line;
1063 0 : result.set_id(token_t::TOKEN_ID_ERROR_ENUM);
1064 0 : }
1065 : }
1066 0 : break;
1067 :
1068 2 : default:
1069 : // TBD: add support for '$' for JavaScript?
1070 6 : if((*f_pos >= 'a' && *f_pos <= 'z')
1071 2 : || (*f_pos >= 'A' && *f_pos <= 'Z')
1072 2 : || *f_pos == '_')
1073 : {
1074 : // identifier
1075 2 : QString::const_iterator start(f_pos);
1076 2 : ++f_pos;
1077 6 : while(f_pos != f_input.end()
1078 10 : && ((*f_pos >= 'a' && *f_pos <= 'z')
1079 7 : || (*f_pos >= 'A' && *f_pos <= 'Z')
1080 7 : || (*f_pos >= '0' && *f_pos <= '9')
1081 5 : || *f_pos == '_'))
1082 : {
1083 2 : ++f_pos;
1084 : }
1085 4 : QString identifier(start, static_cast<int>(f_pos - start));
1086 2 : if(f_keywords.contains(identifier))
1087 : {
1088 0 : result.set_id(token_t::TOKEN_ID_KEYWORD_ENUM);
1089 0 : result.set_value(f_keywords[identifier]);
1090 : }
1091 : else
1092 : {
1093 2 : result.set_id(token_t::TOKEN_ID_IDENTIFIER_ENUM);
1094 2 : result.set_value(identifier);
1095 : }
1096 : }
1097 : else
1098 : {
1099 : // in all other cases return a QChar
1100 0 : result.set_id(token_t::TOKEN_ID_LITERAL_ENUM);
1101 0 : result.set_value(*f_pos);
1102 0 : ++f_pos;
1103 : }
1104 2 : break;
1105 :
1106 : }
1107 :
1108 : // Only to help with debug sessions
1109 : //std::cerr << "lexer result: " << result.to_string() << "\n";
1110 :
1111 3 : return result;
1112 : }
1113 :
1114 2 : void lexer::add_keyword(keyword& k)
1115 : {
1116 2 : f_keywords[k.identifier()] = k.number();
1117 2 : }
1118 :
1119 :
1120 : int keyword::g_next_number = 0;
1121 :
1122 2 : keyword::keyword(lexer& parent, const QString& keyword_identifier, int index_number)
1123 2 : : f_number(index_number == 0 ? ++g_next_number : index_number)
1124 2 : , f_identifier(keyword_identifier)
1125 : {
1126 2 : parent.add_keyword(*this);
1127 2 : }
1128 :
1129 :
1130 :
1131 0 : rule::rule_data_t::rule_data_t()
1132 : : f_token(token_t::TOKEN_ID_NONE_ENUM)
1133 : //, f_value("") -- auto-init
1134 : //, f_keyword() -- auto-init
1135 0 : , f_choices(nullptr)
1136 : {
1137 0 : }
1138 :
1139 207 : rule::rule_data_t::rule_data_t(rule_data_t const& s)
1140 207 : : f_token(s.f_token)
1141 : , f_value(s.f_value)
1142 : , f_keyword(s.f_keyword)
1143 207 : , f_choices(s.f_choices)
1144 : {
1145 207 : }
1146 :
1147 89 : rule::rule_data_t::rule_data_t(choices& c)
1148 : : f_token(token_t::TOKEN_ID_CHOICES_ENUM)
1149 : //, f_value("") -- auto-init
1150 : //, f_keyword() -- auto-init
1151 89 : , f_choices(&c)
1152 : {
1153 89 : }
1154 :
1155 8 : rule::rule_data_t::rule_data_t(token_t token)
1156 : : f_token(token)
1157 : //, f_value("") -- auto-init
1158 : //, f_keyword() -- auto-init
1159 8 : , f_choices(nullptr)
1160 : {
1161 8 : }
1162 :
1163 36 : rule::rule_data_t::rule_data_t(const QString& value)
1164 : : f_token(token_t::TOKEN_ID_LITERAL_ENUM)
1165 : , f_value(value)
1166 : //, f_keyword() -- auto-init
1167 36 : , f_choices(nullptr)
1168 : {
1169 36 : }
1170 :
1171 2 : rule::rule_data_t::rule_data_t(const keyword& k)
1172 : : f_token(token_t::TOKEN_ID_KEYWORD_ENUM)
1173 : //, f_value("") -- auto-init
1174 : , f_keyword(k)
1175 2 : , f_choices(nullptr)
1176 : {
1177 2 : }
1178 :
1179 :
1180 :
1181 0 : rule::rule(choices& c)
1182 : : f_parent(&c)
1183 : //, f_tokens() -- auto-init
1184 0 : , f_reducer(nullptr)
1185 : {
1186 0 : }
1187 :
1188 53 : rule::rule(const rule& r)
1189 53 : : f_parent(r.f_parent)
1190 : , f_tokens(r.f_tokens)
1191 53 : , f_reducer(r.f_reducer)
1192 : {
1193 53 : }
1194 :
1195 16 : void rule::add_rules(choices& c)
1196 : {
1197 32 : rule_data_t data(c);
1198 16 : data.f_token = token_t::TOKEN_ID_RULES_ENUM;
1199 16 : f_tokens.push_back(data);
1200 16 : }
1201 :
1202 73 : void rule::add_choices(choices& c)
1203 : {
1204 73 : f_tokens.push_back(rule_data_t(c));
1205 73 : }
1206 :
1207 8 : void rule::add_token(token_t token)
1208 : {
1209 8 : f_tokens.push_back(rule_data_t(token));
1210 8 : }
1211 :
1212 36 : void rule::add_literal(const QString& value)
1213 : {
1214 36 : f_tokens.push_back(rule_data_t(value));
1215 36 : }
1216 :
1217 2 : void rule::add_keyword(const keyword& k)
1218 : {
1219 2 : f_tokens.push_back(rule_data_t(k));
1220 2 : }
1221 :
1222 1 : rule& rule::operator >> (const token_id& token)
1223 : {
1224 1 : add_token(token);
1225 1 : return *this;
1226 : }
1227 :
1228 0 : rule& rule::operator >> (const QString& literal)
1229 : {
1230 0 : add_literal(literal);
1231 0 : return *this;
1232 : }
1233 :
1234 4 : rule& rule::operator >> (char const *literal)
1235 : {
1236 4 : add_literal(literal);
1237 4 : return *this;
1238 : }
1239 :
1240 0 : rule& rule::operator >> (keyword const& k)
1241 : {
1242 0 : add_keyword(k);
1243 0 : return *this;
1244 : }
1245 :
1246 26 : rule& rule::operator >> (choices& c)
1247 : {
1248 26 : add_choices(c);
1249 26 : return *this;
1250 : }
1251 :
1252 32 : rule& rule::operator >= (rule::reducer_t function)
1253 : {
1254 32 : set_reducer(function);
1255 32 : return *this;
1256 : }
1257 :
1258 0 : rule& operator >> (token_id const& token_left, token_id const& token_right)
1259 : {
1260 0 : rule *r(new rule);
1261 0 : r->add_token(token_left);
1262 0 : r->add_token(token_right);
1263 0 : return *r;
1264 : }
1265 :
1266 0 : rule& operator >> (token_id const& token, QString const& literal)
1267 : {
1268 0 : rule *r(new rule);
1269 0 : r->add_token(token);
1270 0 : r->add_literal(literal);
1271 0 : return *r;
1272 : }
1273 :
1274 1 : rule& operator >> (token_id const& token, char const *literal)
1275 : {
1276 1 : rule *r(new rule);
1277 1 : r->add_token(token);
1278 1 : r->add_literal(literal);
1279 1 : return *r;
1280 : }
1281 :
1282 0 : rule& operator >> (token_id const& token, keyword const& k)
1283 : {
1284 0 : rule *r(new rule);
1285 0 : r->add_token(token);
1286 0 : r->add_keyword(k);
1287 0 : return *r;
1288 : }
1289 :
1290 0 : rule& operator >> (token_id const& token, choices& c)
1291 : {
1292 0 : rule *r(new rule);
1293 0 : r->add_token(token);
1294 0 : r->add_choices(c);
1295 0 : return *r;
1296 : }
1297 :
1298 0 : rule& operator >> (QString const& literal, token_id const& token)
1299 : {
1300 0 : rule *r(new rule);
1301 0 : r->add_literal(literal);
1302 0 : r->add_token(token);
1303 0 : return *r;
1304 : }
1305 :
1306 0 : rule& operator >> (QString const& literal_left, QString const& literal_right)
1307 : {
1308 0 : rule *r(new rule);
1309 0 : r->add_literal(literal_left);
1310 0 : r->add_literal(literal_right);
1311 0 : return *r;
1312 : }
1313 :
1314 0 : rule& operator >> (QString const& literal, keyword const& k)
1315 : {
1316 0 : rule *r(new rule);
1317 0 : r->add_literal(literal);
1318 0 : r->add_keyword(k);
1319 0 : return *r;
1320 : }
1321 :
1322 0 : rule& operator >> (QString const& literal, choices& c)
1323 : {
1324 0 : rule *r(new rule);
1325 0 : r->add_literal(literal);
1326 0 : r->add_choices(c);
1327 0 : return *r;
1328 : }
1329 :
1330 0 : rule& operator >> (keyword const& k, token_id const& token)
1331 : {
1332 0 : rule *r(new rule);
1333 0 : r->add_keyword(k);
1334 0 : r->add_token(token);
1335 0 : return *r;
1336 : }
1337 :
1338 0 : rule& operator >> (keyword const& k, QString const& literal)
1339 : {
1340 0 : rule *r(new rule);
1341 0 : r->add_keyword(k);
1342 0 : r->add_literal(literal);
1343 0 : return *r;
1344 : }
1345 :
1346 0 : rule& operator >> (keyword const& k_left, keyword const& k_right)
1347 : {
1348 0 : rule *r(new rule);
1349 0 : r->add_keyword(k_left);
1350 0 : r->add_keyword(k_right);
1351 0 : return *r;
1352 : }
1353 :
1354 0 : rule& operator >> (keyword const& k, choices& c)
1355 : {
1356 0 : rule *r(new rule);
1357 0 : r->add_keyword(k);
1358 0 : r->add_choices(c);
1359 0 : return *r;
1360 : }
1361 :
1362 1 : rule& operator >> (choices& c, token_id const& token)
1363 : {
1364 1 : rule *r(new rule);
1365 1 : r->add_choices(c);
1366 1 : r->add_token(token);
1367 1 : return *r;
1368 : }
1369 :
1370 26 : rule& operator >> (choices& c, QString const& literal)
1371 : {
1372 26 : rule *r(new rule);
1373 26 : r->add_choices(c);
1374 26 : r->add_literal(literal);
1375 26 : return *r;
1376 : }
1377 :
1378 0 : rule& operator >> (choices& c, keyword const& k)
1379 : {
1380 0 : rule *r(new rule);
1381 0 : r->add_choices(c);
1382 0 : r->add_keyword(k);
1383 0 : return *r;
1384 : }
1385 :
1386 0 : rule& operator >> (choices& c_left, choices& c_right)
1387 : {
1388 0 : rule *r(new rule);
1389 0 : r->add_choices(c_left);
1390 0 : r->add_choices(c_right);
1391 0 : return *r;
1392 : }
1393 :
1394 5 : rule& operator >> (char const *literal, choices& c)
1395 : {
1396 5 : rule *r(new rule);
1397 5 : r->add_literal(literal);
1398 5 : r->add_choices(c);
1399 5 : return *r;
1400 : }
1401 :
1402 4 : rule& operator >= (token_id const& token, rule::reducer_t function)
1403 : {
1404 4 : rule *r(new rule);
1405 4 : r->add_token(token);
1406 4 : r->set_reducer(function);
1407 4 : return *r;
1408 : }
1409 :
1410 0 : rule& operator >= (QString const& literal, rule::reducer_t function)
1411 : {
1412 0 : rule *r(new rule);
1413 0 : r->add_literal(literal);
1414 0 : r->set_reducer(function);
1415 0 : return *r;
1416 : }
1417 :
1418 2 : rule& operator >= (keyword const& k, rule::reducer_t function)
1419 : {
1420 2 : rule *r(new rule);
1421 2 : r->add_keyword(k);
1422 2 : r->set_reducer(function);
1423 2 : return *r;
1424 : }
1425 :
1426 15 : rule& operator >= (choices& c, rule::reducer_t function)
1427 : {
1428 15 : rule *r(new rule);
1429 15 : r->add_choices(c);
1430 15 : r->set_reducer(function);
1431 15 : return *r;
1432 : }
1433 :
1434 0 : QString rule::to_string() const
1435 : {
1436 0 : QString result;
1437 :
1438 0 : for(QVector<rule_data_t>::const_iterator ri = f_tokens.begin();
1439 0 : ri != f_tokens.end(); ++ri)
1440 : {
1441 0 : if(ri != f_tokens.begin())
1442 : {
1443 0 : result += " ";
1444 : }
1445 0 : const rule_data_t& r(*ri);
1446 0 : switch(r.f_token)
1447 : {
1448 0 : case token_t::TOKEN_ID_NONE_ENUM:
1449 0 : result += "\xA4"; // currency sign used as the EOI marker
1450 0 : break;
1451 :
1452 0 : case token_t::TOKEN_ID_INTEGER_ENUM:
1453 0 : result += "TOKEN_ID_INTEGER";
1454 0 : break;
1455 :
1456 0 : case token_t::TOKEN_ID_FLOAT_ENUM:
1457 0 : result += "TOKEN_ID_FLOAT";
1458 0 : break;
1459 :
1460 0 : case token_t::TOKEN_ID_IDENTIFIER_ENUM:
1461 0 : result += "TOKEN_ID_IDENTIFIER";
1462 0 : break;
1463 :
1464 0 : case token_t::TOKEN_ID_KEYWORD_ENUM:
1465 0 : result += "keyword_" + r.f_keyword.identifier();
1466 0 : break;
1467 :
1468 0 : case token_t::TOKEN_ID_STRING_ENUM:
1469 0 : result += "TOKEN_ID_STRING";
1470 0 : break;
1471 :
1472 0 : case token_t::TOKEN_ID_LITERAL_ENUM:
1473 0 : result += "\"" + r.f_value + "\"";
1474 0 : break;
1475 :
1476 0 : case token_t::TOKEN_ID_EMPTY_ENUM:
1477 : // put the empty set for empty
1478 0 : result += "\xF8";
1479 0 : break;
1480 :
1481 0 : case token_t::TOKEN_ID_CHOICES_ENUM:
1482 : // you can select the one with the pointer for debugging
1483 : //result += QString("[0x%1] %2").arg(reinterpret_cast<qulonglong>(r.f_choices), 0, 16).arg(r.f_choices->name());
1484 0 : result += QString("%2").arg(r.f_choices->name());
1485 0 : break;
1486 :
1487 0 : case token_t::TOKEN_ID_NODE_ENUM:
1488 0 : result += " /* INVALID -- TOKEN_ID_NODE!!! */ ";
1489 0 : break;
1490 :
1491 0 : case token_t::TOKEN_ID_ERROR_ENUM:
1492 0 : result += " /* INVALID -- TOKEN_ID_ERROR!!! */ ";
1493 0 : break;
1494 :
1495 0 : default:
1496 0 : result += " /* INVALID -- unknown token identifier!!! */ ";
1497 0 : break;
1498 :
1499 : }
1500 : }
1501 :
1502 0 : if(f_reducer != nullptr)
1503 : {
1504 : // show that we have a reducer
1505 0 : result += " { ... }";
1506 : }
1507 :
1508 0 : return result;
1509 : }
1510 :
1511 :
1512 :
1513 :
1514 34 : choices::choices(grammar *parent, const char *choice_name)
1515 34 : : f_name(choice_name)
1516 : //f_rules() -- auto-init
1517 : {
1518 34 : if(parent != nullptr)
1519 : {
1520 18 : parent->add_choices(*this);
1521 : }
1522 34 : }
1523 :
1524 36 : choices::~choices()
1525 : {
1526 18 : clear();
1527 18 : }
1528 :
1529 34 : void choices::clear()
1530 : {
1531 34 : int const max_rules(f_rules.count());
1532 89 : for(int r = 0; r < max_rules; ++r)
1533 : {
1534 55 : delete f_rules[r];
1535 : }
1536 34 : f_rules.clear();
1537 34 : }
1538 :
1539 :
1540 16 : choices& choices::operator = (const choices& rhs)
1541 : {
1542 16 : if(this != &rhs)
1543 : {
1544 : //f_name -- not changed, rhs.f_name is probably "internal"
1545 :
1546 16 : clear();
1547 :
1548 : // copy rhs rules
1549 16 : int const max_rules(rhs.f_rules.count());
1550 69 : for(int r = 0; r < max_rules; ++r)
1551 : {
1552 53 : f_rules.push_back(new rule(*rhs.f_rules[r]));
1553 : }
1554 : }
1555 :
1556 16 : return *this;
1557 : }
1558 :
1559 0 : choices& choices::operator >>= (choices& rhs)
1560 : {
1561 0 : if(this == &rhs)
1562 : {
1563 0 : throw snap_logic_exception("a rule cannot just be represented as itself");
1564 : }
1565 :
1566 0 : rule *r(new rule);
1567 0 : r->add_choices(rhs);
1568 0 : f_rules.push_back(r);
1569 :
1570 0 : return *this;
1571 : }
1572 :
1573 18 : choices& choices::operator >>= (rule& r)
1574 : {
1575 : // in this case there are no choices
1576 18 : if(r[0].get_token().get_id() == token_t::TOKEN_ID_RULES_ENUM)
1577 : {
1578 16 : this->operator = (r[0].get_choices());
1579 : }
1580 : else
1581 : {
1582 2 : f_rules.push_back(&r);
1583 : }
1584 :
1585 18 : return *this;
1586 : }
1587 :
1588 0 : choices& choices::operator >>= (token_id const& token)
1589 : {
1590 0 : rule *r = new rule;
1591 0 : r->add_token(token);
1592 0 : f_rules.push_back(r);
1593 :
1594 0 : return *this;
1595 : }
1596 :
1597 0 : choices& choices::operator >>= (QString const& literal)
1598 : {
1599 0 : rule *r = new rule;
1600 0 : r->add_literal(literal);
1601 0 : f_rules.push_back(r);
1602 :
1603 0 : return *this;
1604 : }
1605 :
1606 0 : choices& choices::operator >>= (keyword const& k)
1607 : {
1608 0 : rule *r = new rule;
1609 0 : r->add_keyword(k);
1610 0 : f_rules.push_back(r);
1611 :
1612 0 : return *this;
1613 : }
1614 :
1615 :
1616 0 : rule& choices::operator | (rule& r)
1617 : {
1618 : // left hand-side is this
1619 0 : rule *l(new rule);
1620 0 : l->add_choices(*this);
1621 :
1622 0 : return *l | r;
1623 : }
1624 :
1625 0 : rule& operator | (rule& r_left, token_id const& token)
1626 : {
1627 0 : choices *c(new choices(nullptr, "internal"));
1628 0 : rule *r_right(new rule);
1629 0 : r_right->add_token(token);
1630 0 : c->add_rule(r_left);
1631 0 : c->add_rule(*r_right);
1632 0 : rule *r(new rule);
1633 0 : r->add_rules(*c);
1634 0 : return *r;
1635 : }
1636 :
1637 1 : rule& operator | (token_id const& token, rule& r_right)
1638 : {
1639 1 : choices *c(new choices(nullptr, "internal"));
1640 1 : rule *r_left(new rule);
1641 1 : r_left->add_token(token);
1642 1 : c->add_rule(*r_left);
1643 1 : c->add_rule(r_right);
1644 1 : rule *r(new rule);
1645 1 : r->add_rules(*c);
1646 1 : return *r;
1647 : }
1648 :
1649 0 : rule& operator | (rule& r_left, keyword const& k)
1650 : {
1651 0 : choices *c(new choices(nullptr, "internal"));
1652 0 : rule *r_right(new rule);
1653 0 : r_right->add_keyword(k);
1654 0 : c->add_rule(r_left);
1655 0 : c->add_rule(*r_right);
1656 0 : rule *r(new rule);
1657 0 : r->add_rules(*c);
1658 0 : return *r;
1659 : }
1660 :
1661 36 : rule& operator | (rule& r_left, rule& r_right)
1662 : {
1663 : // append to existing list?
1664 36 : if(r_left[0].get_token().get_id() == token_t::TOKEN_ID_RULES_ENUM)
1665 : {
1666 21 : r_left[0].get_choices().add_rule(r_right);
1667 21 : return r_left;
1668 : }
1669 :
1670 15 : choices *c(new choices(nullptr, "internal"));
1671 15 : c->add_rule(r_left);
1672 15 : c->add_rule(r_right);
1673 15 : rule *r(new rule);
1674 15 : r->add_rules(*c);
1675 15 : return *r;
1676 : }
1677 :
1678 0 : rule& operator | (rule& r, choices& c)
1679 : {
1680 0 : rule *l(new rule);
1681 0 : l->add_choices(c);
1682 :
1683 0 : return r | *l;
1684 : }
1685 :
1686 53 : void choices::add_rule(rule& r)
1687 : {
1688 53 : f_rules.push_back(&r);
1689 53 : }
1690 :
1691 :
1692 :
1693 0 : QString choices::to_string() const
1694 : {
1695 : // you can select the one with the pointer for debugging
1696 : //QString result(QString("[0x%1] %2: ").arg(reinterpret_cast<qulonglong>(this), 0, 16).arg(f_name));
1697 0 : QString result(QString("%2: ").arg(f_name));
1698 :
1699 0 : for(QVector<rule *>::const_iterator ri = f_rules.begin();
1700 0 : ri != f_rules.end(); ++ri)
1701 : {
1702 0 : if(ri != f_rules.begin())
1703 : {
1704 0 : result += "\n | ";
1705 : }
1706 0 : rule const *r(*ri);
1707 0 : result += r->to_string();
1708 : }
1709 :
1710 0 : return result;
1711 : }
1712 :
1713 :
1714 :
1715 :
1716 :
1717 :
1718 1 : grammar::grammar()
1719 : //: f_choices() -- auto-init
1720 : {
1721 1 : }
1722 :
1723 18 : void grammar::add_choices(choices& c)
1724 : {
1725 18 : f_choices.push_back(&c);
1726 18 : }
1727 :
1728 : struct parser_state;
1729 : typedef QVector<parser_state *> state_array_t;
1730 : typedef QMap<parser_state *, int> state_map_t;
1731 :
1732 : struct parser_state
1733 : {
1734 54 : parser_state(parser_state * parent, choices & c, int r)
1735 54 : : f_parent(parent)
1736 : , f_choices(&c)
1737 54 : , f_rule(r)
1738 : {
1739 54 : if(parent != nullptr)
1740 : {
1741 53 : parent->f_children.push_back(this);
1742 : }
1743 54 : }
1744 :
1745 : parser_state(parser_state const & rhs) = delete;
1746 : parser_state & operator = (parser_state const & rhs) = delete;
1747 :
1748 0 : ~parser_state()
1749 0 : {
1750 : //std::cerr << "destructor! " << this << "\n";
1751 : try
1752 : {
1753 0 : clear();
1754 : }
1755 0 : catch(snap_logic_exception const &)
1756 : {
1757 : }
1758 0 : }
1759 :
1760 103 : void clear()
1761 : {
1762 103 : if(!f_children.empty())
1763 : {
1764 0 : throw snap_logic_exception("clearing a state that has children is not allowed");
1765 : }
1766 : // if we have a parent make sure we're removed from the list
1767 : // of children of that parent
1768 103 : if(f_parent != nullptr)
1769 : {
1770 102 : int const p(f_parent->f_children.indexOf(this));
1771 102 : if(p < 0)
1772 : {
1773 0 : throw snap_logic_exception("clearing a state with a parent that doesn't know about us is not allowed");
1774 : }
1775 102 : f_parent->f_children.remove(p);
1776 102 : f_parent = nullptr;
1777 : }
1778 : // delete all the states to be executed on reduce
1779 : // if they're still here, they can be removed
1780 103 : while(!f_add_on_reduce.empty())
1781 : {
1782 0 : delete f_add_on_reduce.last();
1783 0 : f_add_on_reduce.pop_back();
1784 : }
1785 : // useful for debug purposes
1786 103 : f_choices = nullptr;
1787 103 : f_rule = -1;
1788 103 : f_position = -1;
1789 103 : }
1790 :
1791 77 : void reset(parser_state * parent, choices & c, int const r)
1792 : {
1793 77 : f_parent = parent;
1794 77 : if(parent != nullptr)
1795 : {
1796 75 : parent->f_children.push_back(this);
1797 : }
1798 77 : f_choices = &c;
1799 77 : f_rule = r;
1800 77 : f_position = 0;
1801 77 : f_node.clear();
1802 77 : f_add_on_reduce.clear();
1803 77 : }
1804 :
1805 130 : static parser_state * alloc(state_array_t & free_states, parser_state * parent, choices & c, int const r)
1806 : {
1807 : parser_state * state;
1808 130 : if(free_states.empty())
1809 : {
1810 53 : state = new parser_state(parent, c, r);
1811 : }
1812 : else
1813 : {
1814 77 : state = free_states.last();
1815 77 : free_states.pop_back();
1816 77 : state->reset(parent, c, r);
1817 : }
1818 130 : return state;
1819 : }
1820 :
1821 103 : static void free(state_array_t & current, state_array_t & free_states, parser_state * s)
1822 : {
1823 : #ifdef DEBUG
1824 103 : if(s->f_lock)
1825 : {
1826 0 : throw snap_logic_exception("state that was not yet properly checked is getting deleted");
1827 : }
1828 : #endif
1829 :
1830 : // recursively free all the children
1831 0 : while(!s->f_children.empty())
1832 : {
1833 0 : free(current, free_states, s->f_children.last());
1834 : //s->f_children.pop_back(); -- automatic in clear()
1835 : }
1836 103 : s->clear();
1837 103 : int const pos(current.indexOf(s));
1838 103 : if(pos != -1)
1839 : {
1840 103 : current.remove(pos);
1841 : }
1842 103 : free_states.push_back(s);
1843 103 : }
1844 :
1845 34 : static parser_state * copy(state_array_t& free_states, parser_state * source)
1846 : {
1847 34 : parser_state * state(alloc(free_states, source->f_parent, *source->f_choices, source->f_rule));
1848 34 : state->f_line = source->f_line;
1849 34 : state->f_position = source->f_position;
1850 34 : if(source->f_node != nullptr)
1851 : {
1852 1 : state->f_node = QSharedPointer<token_node>(new token_node(*source->f_node));
1853 : }
1854 34 : state->copy_reduce_states(free_states, source->f_add_on_reduce);
1855 34 : return state;
1856 : }
1857 :
1858 34 : void copy_reduce_states(state_array_t & free_states, state_array_t & add_on_reduce)
1859 : {
1860 34 : int const max_reduce(add_on_reduce.size());
1861 34 : for(int i(0); i < max_reduce; ++i)
1862 : {
1863 : // we need to set the correct parent in the copy
1864 : // and it is faster to correct in the source before the copy
1865 0 : f_add_on_reduce.push_back(copy(free_states, add_on_reduce[i]));
1866 : }
1867 34 : }
1868 :
1869 8 : void add_token(token & t)
1870 : {
1871 8 : if(f_node == nullptr)
1872 : {
1873 7 : f_node = QSharedPointer<token_node>(new token_node);
1874 7 : f_node->set_line(f_line);
1875 : }
1876 8 : f_node->add_token(t);
1877 8 : }
1878 :
1879 82 : void add_node(QSharedPointer<token_node> n)
1880 : {
1881 82 : if(f_node == nullptr)
1882 : {
1883 81 : f_node = QSharedPointer<token_node>(new token_node);
1884 81 : f_node->set_line(f_line);
1885 : }
1886 82 : f_node->add_node(n);
1887 82 : }
1888 :
1889 : QString toString()
1890 : {
1891 : QString result;
1892 :
1893 : result = QString("0x%1-%2 [r:%3, p:%4/%5]")
1894 : .arg(reinterpret_cast<qulonglong>(this), 0, 16)
1895 : .arg(f_choices->name())
1896 : .arg(f_rule)
1897 : .arg(f_position)
1898 : .arg((*f_choices)[f_rule].count());
1899 : if(f_parent != nullptr)
1900 : {
1901 : result += QString(" (parent 0x%5-%6)")
1902 : .arg(reinterpret_cast<qulonglong>(f_parent), 0, 16)
1903 : .arg(f_parent->f_choices->name());
1904 : }
1905 :
1906 : return result;
1907 : }
1908 :
1909 : /** \brief Display an array of states.
1910 : *
1911 : * This function displays the array of states as defined by the parameter
1912 : * \p a. This prints all the parents of each element and also the list
1913 : * of add on reduce if any.
1914 : *
1915 : * \param[in] a The array to be displayed.
1916 : */
1917 : #ifdef DEBUG
1918 : static void display_array(const state_array_t & a)
1919 : {
1920 : SNAP_LOG_TRACE() << "+++ ARRAY (" << a.size() << " items)\n";
1921 : for(state_array_t::const_iterator it(a.begin()); it != a.end(); ++it)
1922 : {
1923 : parser_state * state(*it);
1924 : //std::cerr << " state = " << state << "\n"; // for crash
1925 : SNAP_LOG_TRACE() << " current: " << state->toString() << "\n";
1926 : for(state_array_t::const_iterator r(state->f_add_on_reduce.begin()); r != state->f_add_on_reduce.end(); ++r)
1927 : {
1928 : parser_state * s(*r);
1929 : SNAP_LOG_TRACE() << " add on reduce: " << s->toString() << "\n";
1930 : }
1931 : while(state->f_parent != nullptr)
1932 : {
1933 : state = state->f_parent;
1934 : SNAP_LOG_TRACE() << " parent: " << state->toString() << "\n";
1935 : }
1936 : }
1937 : SNAP_LOG_TRACE() << "---\n";
1938 : }
1939 :
1940 82 : void lock()
1941 : {
1942 82 : f_lock = true;
1943 82 : }
1944 :
1945 77 : void unlock()
1946 : {
1947 77 : f_lock = false;
1948 77 : }
1949 :
1950 : #endif
1951 :
1952 : bool f_lock = false;
1953 :
1954 : int32_t f_line = -1;
1955 : parser_state * f_parent = nullptr;
1956 : state_array_t f_children = state_array_t();
1957 :
1958 : choices * f_choices = nullptr;
1959 : int32_t f_rule = 0;
1960 : int32_t f_position = 0;
1961 :
1962 : QSharedPointer<token_node> f_node = QSharedPointer<token_node>();
1963 : state_array_t f_add_on_reduce = state_array_t();
1964 : };
1965 :
1966 :
1967 : /** \brief Move to the next token in a rule.
1968 : *
1969 : * Each state includes a position in one specific rule. This function moves
1970 : * that pointer to the next position.
1971 : *
1972 : * When the end of the rule is reached, then the rule gets reduced. This means
1973 : * calling the user reduce function and removing the rule from the current list
1974 : * and replacing it with its parent.
1975 : *
1976 : * Reducing means removing the current state and putting it the list of
1977 : * free state after we added the node tree to its parent. The parent is
1978 : * then added to the list of current state as it becomes current again.
1979 : *
1980 : * When reducing a rule and moving up to the parent, the parent may then need
1981 : * reduction too! Thus, the function loops and reduce this state and all of
1982 : * its parent until a state that cannot be reduced anymore.
1983 : *
1984 : * This function also detects recursive rules and place those in the current
1985 : * stack of states as expected. Note that next_token() is called on the
1986 : * recursive rule too. This is a recursive function call, but it is very
1987 : * unlikely to be called more than twice.
1988 : *
1989 : * \param[in] state The state being moved.
1990 : * \param[in] current The list of current states
1991 : * \param[in] free_states The list of free states
1992 : */
1993 90 : void next_token(parser_state *state, state_array_t& current, state_array_t& free_states)
1994 : {
1995 : bool repeat;
1996 90 : do
1997 : {
1998 90 : repeat = false;
1999 : // move forward to the next token in this rule
2000 90 : ++state->f_position;
2001 90 : if(state->f_position >= (*state->f_choices)[state->f_rule].count())
2002 : {
2003 34 : if(state->f_position == (*state->f_choices)[state->f_rule].count())
2004 : {
2005 34 : repeat = true;
2006 :
2007 : // we reached the end of the rule, we can reduce it!
2008 : // call user function
2009 : //std::cerr << "reduce -- " << state->f_choices->name() << ": " << (*state->f_choices)[state->f_rule].to_string() << "\n";
2010 34 : (*state->f_choices)[state->f_rule].reduce(state->f_node);
2011 :
2012 : // add the recursive children in the current stack
2013 : // check for recursive children (a: b | a ',' b)
2014 34 : int const max_choices(state->f_choices->count());
2015 142 : for(int i(0); i < max_choices; ++i)
2016 : {
2017 108 : rule const& r((*state->f_choices)[i]);
2018 324 : if(token_t::TOKEN_ID_CHOICES_ENUM == r[0].get_token().get_id()
2019 324 : && state->f_choices == &r[0].get_choices())
2020 : {
2021 48 : parser_state *s(parser_state::alloc(free_states, state->f_parent, *state->f_choices, i));
2022 : //parser_state *s(parser_state::copy(free_states, state));
2023 48 : s->f_line = state->f_line;
2024 48 : s->add_node(state->f_node);
2025 48 : current.push_back(s);
2026 : //std::cerr << "** sub-next_token (recursive) " << reinterpret_cast<void*>(s) << "\n";
2027 48 : next_token(s, current, free_states); // we just reduced that one state!
2028 : //std::cerr << "**\n";
2029 : }
2030 : }
2031 :
2032 34 : parser_state *p(state->f_parent);
2033 34 : if(p->f_children.size() > 1)
2034 : {
2035 : // the parent has several children which means we may get
2036 : // more than one reduce... to support that possibility
2037 : // duplicate the parent now
2038 34 : parser_state *new_parent(parser_state::copy(free_states, p));
2039 34 : p = new_parent;
2040 : //std::cerr << " copy " << reinterpret_cast<void*>(state) << " to " << reinterpret_cast<void*>(p) << "\n";
2041 : }
2042 34 : p->add_node(state->f_node);
2043 :
2044 : // remove this state from the current set of rules
2045 : //std::cerr << "XXX delete " << reinterpret_cast<void*>(state) << " (parent: " << reinterpret_cast<void*>(p) << ")\n";
2046 34 : parser_state::free(current, free_states, state);
2047 :
2048 : // continue with the parent which will get its
2049 : // position increased on the next iteration
2050 34 : state = p;
2051 34 : current.push_back(state);
2052 : }
2053 : else
2054 : {
2055 : // forget about that state; we're reducing it for the second time?!
2056 : //std::cerr << ">>>>>>>>>>>>>>>>>>>> delete on > count (double reduce) " << reinterpret_cast<void*>(state) << "\n";
2057 0 : parser_state::free(current, free_states, state);
2058 : }
2059 : }
2060 : // else -- the user is not finished with this state
2061 : }
2062 : while(repeat);
2063 : //std::cerr << "next_token() returns with: " << (*state->f_choices)[state->f_rule].to_string() << "\n";
2064 :
2065 : //std::cerr << "NEXT TOKEN: =================================================================\n";
2066 : //parser_state::display_array(current);
2067 56 : }
2068 :
2069 1 : bool grammar::parse(lexer & input, choices & start)
2070 : {
2071 : // the result of the parser against the lexer is a tree of tokens
2072 : //
2073 : // to run the parser, we need a state, this can be defined locally
2074 : // because we do not need it in the result;
2075 : //
2076 : // create the root rule
2077 2 : choices root(this, "root");
2078 1 : root >>= start >> TOKEN_ID_NONE;
2079 : // TODO: all the state pointers leak if we throw...
2080 1 : parser_state * s(new parser_state(nullptr, root, 0));
2081 1 : s->f_line = 1;
2082 :
2083 2 : state_array_t free_states;
2084 2 : state_array_t current;
2085 1 : current.push_back(s);
2086 7 : while(!current.empty())
2087 : {
2088 4 : uint32_t const line(input.line());
2089 :
2090 : // we're working on the 'check' vector which is
2091 : // a copy of the current vector so the current
2092 : // vector can change in size
2093 : #ifdef DEBUG
2094 : //SNAP_LOG_TRACE("B: ================================================================= (line: ")(input.line())(")");
2095 : //parser_state::display_array(current);
2096 : #endif
2097 :
2098 : bool retry;
2099 25 : do
2100 : {
2101 25 : retry = false;
2102 50 : state_array_t check(current);
2103 166 : for(state_array_t::const_iterator it(check.begin());
2104 166 : it != check.end(); ++it)
2105 : {
2106 : // it is a state, check whether the current entry
2107 : // is a token or a rule
2108 141 : parser_state *state(*it);
2109 141 : const rule::rule_ref ref((*state->f_choices)[state->f_rule][state->f_position]);
2110 141 : token_t token_id(ref.get_token().get_id());
2111 :
2112 : // only take care of choices in this loop (terminators are
2113 : // handled in the next loop)
2114 141 : if(token_id == token_t::TOKEN_ID_CHOICES_ENUM)
2115 : {
2116 : // follow the choice by adding all of the rules it points to
2117 23 : choices * c(&ref.get_choices());
2118 :
2119 23 : int const max_choices(c->count());
2120 104 : for(int r(0); r < max_choices; ++r)
2121 : {
2122 81 : rule::rule_ref const child_ref((*c)[r][0]);
2123 :
2124 : // recursive?
2125 243 : if(token_t::TOKEN_ID_CHOICES_ENUM == child_ref.get_token().get_id()
2126 243 : && &child_ref.get_choices() == c)
2127 : {
2128 : // ignore recursive at this level, we take them
2129 : // in account when reducing instead
2130 : //std::cerr << " SKIP RECURSIVE -- " << c->name() << " --> " << (*c)[r].to_string() << "\n";
2131 33 : continue;
2132 : }
2133 48 : parser_state * child(parser_state::alloc(free_states, state, *c, r));
2134 48 : child->f_line = line;
2135 : //std::cerr << " " << c->name() << " --> " << (*c)[r].to_string() << "\n";
2136 :
2137 : // check whether this is recursive; very important
2138 : // to avoid infinite loop; recurvise rules are used
2139 : // only when the concern rule gets reduced
2140 : // the child position is always 0 here (it's a new child)
2141 48 : bool recursive(false);
2142 : // token_t const child_token_id(child_ref.get_token().get_id());
2143 : // if(child_token_id == token_t::TOKEN_ID_CHOICES_ENUM)
2144 : // {
2145 : // // if the new child state starts with a 'choices'
2146 : // // and that's a 'choices' we already added
2147 : // // (including this very child,) then
2148 : // // that child is recursive
2149 : // choices *child_choices(&child_ref.get_choices());
2150 : //std::cerr << " --> follow choice " << c->name() << " with sub-choice " << child_choices->name() << "\n";
2151 : // // start from ourselves
2152 : // int i(0);
2153 : // for(parser_state *p(child); p != nullptr && i < 2; p = p->f_parent, ++i)
2154 : // {
2155 : // if(child_choices == p->f_choices)
2156 : // {
2157 : // if(p->f_parent == nullptr)
2158 : // {
2159 : // throw snap_logic_exception("invalid recursion (root cannot be recursive)");
2160 : // }
2161 : // // p may be ourselves so we cannot put that
2162 : // // there, use the parent instead
2163 : //std::cerr << " *** CHANGED TO REDUCE ***\n";
2164 : // p->f_parent->f_add_on_reduce.push_back(child);
2165 : // recursive = true;
2166 : // break;
2167 : // }
2168 : //
2169 : // // cannot reduce any more than that if
2170 : // // this rule is not at the end of list of
2171 : // // choices
2172 : // //if(p->f_position + 1 < (*p->f_choices)[p->f_rule].count())
2173 : // //{
2174 : // // // TODO: this is not correct if the
2175 : // // // following rule(s) support EMPTY
2176 : // // break;
2177 : // //}
2178 : // }
2179 : // }
2180 :
2181 : // if recursive it was already added to all the
2182 : // states where it needs to be; otherwise we add it
2183 : // to the current stack
2184 48 : if(!recursive)
2185 : {
2186 48 : current.push_back(child);
2187 : }
2188 : }
2189 23 : current.remove(current.indexOf(state));
2190 23 : retry = true;
2191 : }
2192 118 : else if(token_id == token_t::TOKEN_ID_EMPTY_ENUM)
2193 : {
2194 : // we have to take care of empty rules here since anything
2195 : // coming after an empty rule has to be added to the list
2196 : // of rules here (it is very important because of the
2197 : // potential for recursive rules)
2198 0 : token t(token_t::TOKEN_ID_EMPTY_ENUM);
2199 0 : state->add_token(t);
2200 0 : next_token(state, current, free_states);
2201 0 : retry = true;
2202 : }
2203 : }
2204 : } while(retry);
2205 : #ifdef DEBUG
2206 : //std::cerr << "A: ================================================================= (line: " << input.line() << ")\n";
2207 : //parser_state::display_array(current);
2208 : #endif
2209 :
2210 : // get the first token
2211 7 : token t(input.next_token());
2212 : #ifdef DEBUG
2213 : //std::cerr << ". token type: " << t.to_string() << " to try against \n";
2214 : #endif
2215 :
2216 7 : state_array_t check(current);
2217 : #ifdef DEBUG
2218 : // lock all those states to make sure we don't delete the wrong one
2219 86 : for(state_array_t::const_iterator it(check.begin());
2220 86 : it != check.end(); ++it)
2221 : {
2222 82 : (*it)->lock();
2223 : }
2224 : #endif
2225 81 : for(state_array_t::const_iterator it(check.begin());
2226 81 : it != check.end(); ++it)
2227 : {
2228 : // it is a state, check whether the current entry
2229 : // is a token or a rule
2230 78 : parser_state *state(*it);
2231 78 : rule::rule_ref const ref((*state->f_choices)[state->f_rule][state->f_position]);
2232 78 : token_t const token_id(ref.get_token().get_id());
2233 78 : if(token_id == token_t::TOKEN_ID_CHOICES_ENUM
2234 78 : || token_id == token_t::TOKEN_ID_EMPTY_ENUM)
2235 : {
2236 0 : throw snap_logic_exception("this should never happen since the previous for() loop removed all of those!");
2237 : }
2238 : else
2239 : {
2240 78 : bool remove(false);
2241 78 : if(t.get_id() != token_id)
2242 : {
2243 43 : remove = true;
2244 : }
2245 : else
2246 : {
2247 35 : switch(token_id)
2248 : {
2249 27 : case token_t::TOKEN_ID_LITERAL_ENUM:
2250 : // a literal must match exactly
2251 27 : if(t.get_value().toString() != ref.get_value())
2252 : {
2253 26 : remove = true;
2254 : }
2255 27 : break;
2256 :
2257 0 : case token_t::TOKEN_ID_KEYWORD_ENUM:
2258 : // a keyword must match exactly
2259 0 : if(t.get_value().toInt() != ref.get_keyword().number())
2260 : {
2261 0 : remove = true;
2262 : }
2263 0 : break;
2264 :
2265 7 : case token_t::TOKEN_ID_IDENTIFIER_ENUM:
2266 : case token_t::TOKEN_ID_STRING_ENUM:
2267 : case token_t::TOKEN_ID_INTEGER_ENUM:
2268 : case token_t::TOKEN_ID_FLOAT_ENUM:
2269 : // this is a match whatever the value
2270 7 : break;
2271 :
2272 1 : case token_t::TOKEN_ID_NONE_ENUM:
2273 : // this state is the root state, this means the result
2274 : // is really the child node of this current state
2275 : //
2276 1 : f_result = qSharedPointerDynamicCast<token_node, token>((*state->f_node)[0]);
2277 1 : return true;
2278 :
2279 0 : default:
2280 : // at this point other tokens are rejected here
2281 0 : throw snap_parser_unexpected_token(QString("unexpected token %1").arg(static_cast<int>(token_id)));
2282 :
2283 : }
2284 : }
2285 : #ifdef DEBUG
2286 77 : state->unlock();
2287 : #endif
2288 77 : if(remove)
2289 : {
2290 : //std::cerr << "<*> delete unmatched state: " << state->f_choices->name() << "\n";
2291 69 : parser_state::free(current, free_states, state);
2292 : }
2293 : else
2294 : {
2295 : // save this token as it was accepted
2296 8 : state->add_token(t);
2297 : //std::cerr << ">>> next token (IN)\n";
2298 8 : next_token(state, current, free_states);
2299 : //std::cerr << ">>> next token (OUT)\n";
2300 : }
2301 : }
2302 : }
2303 : }
2304 :
2305 0 : return false;
2306 : }
2307 :
2308 :
2309 :
2310 : } // namespace parser
2311 6 : } // namespace snap
2312 :
2313 : // vim: ts=4 sw=4 et
|