Current Version: 1.0.33
Project Name: csspp
parser.cpp
Go to the documentation of this file.
1// Copyright (c) 2015-2025 Made to Order Software Corp. All Rights Reserved
2//
3// This program is free software; you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation; either version 2 of the License, or
6// (at your option) any later version.
7//
8// This program is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11// GNU General Public License for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with this program; if not, write to the Free Software Foundation, Inc.,
15// 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16
46#include "csspp/parser.h"
47
48#include "csspp/exception.h"
49
50#include <iostream>
51
52namespace csspp
53{
54
55namespace
56{
57
60
61} // no name namespace
62
64 : f_lexer(l)
65{
66 next_token();
67}
68
73
78
83
88
90{
91 return component_value_list(f_last_token, g_component_value_flag_return_on_semi_colon);
92}
93
98
100{
101 f_last_token = f_lexer->next_token();
102//std::cerr << "*** TOKEN: " << *f_last_token;
103 return f_last_token;
104}
105
107{
108 node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
109
110 for(; !n->is(node_type_t::EOF_TOKEN); n = f_last_token)
111 {
112 // completely ignore the CDO and CDC, if the "assembler"
113 // wants to output them, it will do so, but otherwise it
114 // is just completely ignored
115 //
116 // also white spaces at this level are pretty much useless
117 //
118 if(n->is(node_type_t::CDO)
119 || n->is(node_type_t::CDC)
120 || n->is(node_type_t::WHITESPACE))
121 {
122 next_token();
123 continue;
124 }
125
129 {
130 error::instance() << n->get_position()
131 << "Unexpected closing block of type: " << n->get_type() << "."
133 break;
134 }
135
136 if(n->is(node_type_t::COMMENT))
137 {
138 result->add_child(n);
139 next_token();
140 }
141 else if(n->is(node_type_t::AT_KEYWORD))
142 {
143 result->add_child(at_rule(n));
144 }
145 else
146 {
147 // anything else is a qualified rule
148 result->add_child(qualified_rule(n));
149 }
150 }
151
152 // we always return the LIST because it starts with @import (or rather
153 // is just one @import) or $var then it needs to be replaced and we
154 // could not do that if those were root nodes
155 return result;
156}
157
159{
160 node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
161
162 for(node::pointer_t q; (!q || !q->is(node_type_t::EOF_TOKEN)) && !n->is(node_type_t::EOF_TOKEN); n = f_last_token)
163 {
164 q = rule(n);
165 result->add_child(q);
166 }
167
168 return result;
169}
170
172{
173 if(n->is(node_type_t::CDO)
174 || n->is(node_type_t::CDC))
175 {
176 error::instance() << n->get_position()
177 << "HTML comment delimiters (<!-- and -->) are not allowed in this CSS document."
179 return node::pointer_t(new node(node_type_t::EOF_TOKEN, n->get_position()));
180 }
181
185 {
186 error::instance() << n->get_position()
187 << "Unexpected closing block of type: " << n->get_type() << "."
189 return node::pointer_t(new node(node_type_t::EOF_TOKEN, n->get_position()));
190 }
191
192 if(n->is(node_type_t::WHITESPACE))
193 {
194 // skip potential whitespaces
195 n = next_token();
196 }
197
198 if(n->is(node_type_t::AT_KEYWORD))
199 {
200 return at_rule(n);
201 }
202
203 // anything else is a qualified rule
204 return qualified_rule(n);
205}
206
208{
209 // the '@' was already eaten, it will be our result
210 node::pointer_t n(component_value_list(next_token(), g_component_value_flag_return_on_semi_colon));
211
212 if(n->empty())
213 {
214 error::instance() << at_keyword->get_position()
215 << "At '@' command cannot be empty (missing expression or block) unless ended by a semicolon (;)."
217 }
218 else
219 {
220 node::pointer_t last_child(n->get_last_child());
222 {
223 // skip the semi-colon
224 //
225 next_token();
226 }
227 else if(!last_child->is(node_type_t::OPEN_CURLYBRACKET))
228 {
229 error::instance() << at_keyword->get_position()
230 << "At '@' command must end with a block or a ';'."
232 }
233 at_keyword->take_over_children_of(n);
234 }
235
236 return at_keyword;
237}
238
240{
241 if(n->is(node_type_t::EOF_TOKEN))
242 {
243 return n;
244 }
245 if(n->is(node_type_t::SEMICOLON))
246 {
247 // skip the ';' (i.e. ';' in 'foo { blah: 123 };')
248 next_token();
249
250 // it is an error, we just make it clear what error it is because
251 // by default it would otherwise come out as "invalid qualified rule"
252 // which is rather hard to understand here...
253 error::instance() << n->get_position()
254 << "A qualified rule cannot end a { ... } block with a ';'."
256 return node::pointer_t(new node(node_type_t::EOF_TOKEN, n->get_position()));
257 }
258
259 // a qualified rule is a component value list that
260 // ends with a block
261 node::pointer_t result(component_value_list(n, g_component_value_flag_return_on_variable));
262
263 if(result->empty())
264 {
265 // I have not been able to reach these lines, somehow...
266 error::instance() << n->get_position()
267 << "A qualified rule cannot be empty; you are missing a { ... } block."
269 }
270 else
271 {
272 node::pointer_t last_child(result->get_last_child());
273 if(!is_variable_set(result, false)
274 && !last_child->is(node_type_t::OPEN_CURLYBRACKET))
275 {
276 error::instance() << n->get_position()
277 << "A qualified rule must end with a { ... } block."
279 }
280 }
281
282 return result;
283}
284
286{
287 node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
288
289 for(;;)
290 {
291 if(n->is(node_type_t::WHITESPACE))
292 {
293 n = next_token();
294 }
295
296 if(n->is(node_type_t::IDENTIFIER))
297 {
298 result->add_child(declaration(n));
300 {
301 // the EOF_TOKEN below generates an error if we
302 // do not remove those spaces ahead of time
304 {
305 next_token();
306 }
307 break;
308 }
309 // skip the ';'
310 n = next_token();
311 }
312 else if(n->is(node_type_t::AT_KEYWORD))
313 {
314 result->add_child(at_rule(n));
315 n = f_last_token;
316 }
317 else
318 {
319 break;
320 }
321 }
322
324 {
325 error::instance() << f_last_token->get_position()
326 << "the end of the stream was not reached in this declaration, we stopped on a "
327 << f_last_token->get_type()
328 << "."
330 }
331
332 return result;
333}
334
336{
337 node::pointer_t result(new node(node_type_t::DECLARATION, identifier->get_position()));
338 result->set_string(identifier->get_string());
339
341
342 // allow white spaces
343 if(n->is(node_type_t::WHITESPACE))
344 {
345 n = next_token();
346 }
347
348 // here we must have a ':'
349 if(n->is(node_type_t::COLON))
350 {
351 // skip the colon, no need to keep it around
352 n = next_token();
353 }
354 else
355 {
356 error::instance() << n->get_position()
357 << "':' missing in your declaration starting with \""
358 << identifier->get_string()
359 << "\"."
361 }
362
363 // a component value
364 result->add_child(component_value_list(n, g_component_value_flag_return_on_semi_colon));
365
366 return result;
367}
368
370{
371 node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
372
373 node::pointer_t list(new node(node_type_t::COMPONENT_VALUE, n->get_position()));
374 result->add_child(list);
375 for(;; n = f_last_token)
376 {
377 // this test is rather ugly... also it kinda breaks the
378 // so called 'preserved tokens'
379 //
380 if(n->is(node_type_t::EOF_TOKEN)
384 || ((flags & g_component_value_flag_return_on_semi_colon) != 0 && n->is(node_type_t::SEMICOLON)) // declarations handle the semi-colon differently
385 || n->is(node_type_t::CDO)
386 || n->is(node_type_t::CDC))
387 {
388 break;
389 }
390
391 if(n->is(node_type_t::AT_KEYWORD))
392 {
393 list->add_child(at_rule(n));
394 continue;
395 }
396
397 if(n->is(node_type_t::SEMICOLON))
398 {
399 next_token();
400
401 // remove leading and trailing whitespace, no need really
402 while(!list->empty() && list->get_child(0)->is(node_type_t::WHITESPACE))
403 {
404 list->remove_child(0);
405 }
406 while(!list->empty() && list->get_last_child()->is(node_type_t::WHITESPACE))
407 {
408 list->remove_child(list->size() - 1);
409 }
410
411 // variables are viewed as a terminator string when ended by a
412 // semicolon; a qualified rule normally requires a block to
413 // end, but we have a special case to allow definition of
414 // variables anywhere
415 if((flags & g_component_value_flag_return_on_variable) != 0
416 && is_variable_set(list, false))
417 {
418 break;
419 }
420
421 if(!list->empty())
422 {
423 // move to a new sub-list
424 list.reset(new node(node_type_t::COMPONENT_VALUE, n->get_position()));
425 result->add_child(list);
426 }
427 continue;
428 }
429
430 if(n->is(node_type_t::EXCLAMATION))
431 {
432 node::pointer_t exclamation(next_token());
433 if(exclamation->is(node_type_t::WHITESPACE))
434 {
435 exclamation = next_token();
436 }
437 if(exclamation->is(node_type_t::IDENTIFIER))
438 {
439 // remove the WHITESPACE before if there is one
440 if(!list->empty()
441 && list->get_last_child()->is(node_type_t::WHITESPACE))
442 {
443 list->remove_child(list->get_last_child());
444 }
445
446 // save the identifier in the EXCLAMATION node
447 // and add that to the current COMPONENT_VALUE
448 n->set_string(exclamation->get_string());
449 list->add_child(n);
450
451 // TBD: should we check that the identifier is either
452 // "important" or "global" at this point?
453 // (there are also others we support like "default")
454
455 // read the next token and if it is a space, skip it
456 n = next_token();
457 if(n->is(node_type_t::WHITESPACE))
458 {
459 next_token();
460 }
461 }
462 else
463 {
464 error::instance() << exclamation->get_position()
465 << "A '!' must be followed by an identifier, got a "
466 << exclamation->get_type()
467 << " instead."
469 }
470 continue;
471 }
472
473 // remove trailing whitespace before a block, no need
477 && !list->empty()
478 && list->get_last_child()->is(node_type_t::WHITESPACE))
479 {
480 list->remove_child(list->size() - 1);
481 }
482
484 {
485 // in this special case, we read the {}-block and return
486 // (i.e. end of an @-rule, etc.)
487 //
488 // however, to support the full SASS syntax we need to
489 // support two special cases:
490 //
491 // $var: { some-value: here; };
492 // font: { family: strange; style: italic };
493 //
494 // For those special entries, we must avoid returning
495 // when we find a block (darn! this grammar...)
496 //
497 // Note that the second test is done after we read the block
498 // since the presence of the block is checked in case of the
499 // nested declaration.
500 //
501 list->add_child(component_value(n));
502
503 // remove leading and trailing whitespace, no need really
504 // (to make sure the tests below work as expected)
505 //
506 while(!list->empty() && list->get_child(0)->is(node_type_t::WHITESPACE))
507 {
508 list->remove_child(0);
509 }
510
511 // return or that were sub-definitions?
512 //
513 if(!is_variable_set(list, true)
514 && !is_nested_declaration(list))
515 {
516 break;
517 }
518
520 {
521 next_token();
522 }
523
525 {
526 // blocks defining a variable or a nested declaration
527 // must be followed by a semi-colon or we have an error
528 error::instance() << list->get_child(0)->get_position()
529 << "Variable set to a block and a nested property block must end with a semicolon (;) after said block."
531 }
532 }
533 else
534 {
535 list->add_child(component_value(n));
536 }
537 }
538
539 // remove leading and trailing whitespace, no need really
540 if(!list->empty() && list->get_child(0)->is(node_type_t::WHITESPACE))
541 {
542 list->remove_child(0);
543 }
544 if(!list->empty() && list->get_last_child()->is(node_type_t::WHITESPACE))
545 {
546 list->remove_child(list->size() - 1);
547 }
548
549 if(list->empty())
550 {
551 result->remove_child(list);
552 }
553
554 if(result->size() == 1)
555 {
556 result = result->get_last_child();
557 }
558
559 return result;
560}
561
563{
565 {
566 // parse a block up to '}'
567 return block_list(n);
568 }
569
571 {
572 // parse a block up to ']'
574 }
575
577 || n->is(node_type_t::FUNCTION)
579 {
580 // parse a block up to ')'
582 }
583
584 next_token();
585
586 // n is the token we keep
587 return n;
588}
589
591{
593 b->take_over_children_of(children);
595 {
596 next_token();
597 }
598 if(f_last_token->is(closing_token))
599 {
600 // skip that closing token
601 next_token();
602 }
603 else
604 {
605 error::instance() << b->get_position()
606 << "Block expected to end with "
607 << closing_token
608 << " but got "
609 << f_last_token->get_type()
610 << " instead."
612 }
613
614 return b;
615}
616
618{
619 // skip the '{'
620 next_token();
621
622 do
623 {
625 b->add_child(children);
626 // WHITESPACE are skiped between component values
627 // Also the variable tokens that force a return without a next_token()
631 {
632 next_token();
633 }
636 {
637 error::instance() << b->get_position()
638 << "Block expected to end with "
640 << " but got "
641 << f_last_token->get_type()
642 << " instead."
644 next_token();
645 }
646 }
649
651 {
652 error::instance() << b->get_position()
653 << "Block expected to end with "
655 << " but got "
656 << f_last_token->get_type()
657 << " instead."
659 }
660
661 // skip the '}'
662 next_token();
663
664 return b;
665}
666
668{
669 // a variable set is at least 3 tokens:
670 // $var:<value>
671 if(n->size() < 3
672 || (!n->get_child(0)->is(node_type_t::VARIABLE)
673 && !n->get_child(0)->is(node_type_t::VARIABLE_FUNCTION)))
674 {
675 return false;
676 }
677
678 size_t pos(n->get_child(1)->is(node_type_t::WHITESPACE) ? 2 : 1);
679 if(!n->get_child(pos)->is(node_type_t::COLON))
680 {
681 return false;
682 }
683
684 if(!with_block)
685 {
686 // in this case the shorthand is enough: $var ':'
687 return true;
688 }
689
690 // WARNING: from here the size needs to be checked since the list may
691 // be smaller than what we are looking for in it
692
693 // in this case we need to have: $var ':' '{'
694 ++pos;
695 if(pos < n->size() && n->get_child(pos)->is(node_type_t::WHITESPACE))
696 {
697 ++pos;
698 }
699
700 return pos < n->size() && n->get_child(pos)->is(node_type_t::OPEN_CURLYBRACKET);
701}
702
704{
705 // a declaration with a sub-block
706 // field: [optional-values] '{' ... '}' ';'
707 if(n->size() < 3
708 || !n->get_child(0)->is(node_type_t::IDENTIFIER)
709 || !n->get_last_child()->is(node_type_t::OPEN_CURLYBRACKET))
710 {
711 return false;
712 }
713
714 // the colon is mandatory, after an optional whitespace
715 size_t pos(n->get_child(1)->is(node_type_t::WHITESPACE) ? 2 : 1);
716 if(!n->get_child(pos)->is(node_type_t::COLON))
717 {
718 return false;
719 }
720 ++pos; // skip the colon
721 if(pos >= n->size())
722 {
723 // this is "too short" so not really a declaration nor a component value
724 // note: I'm not able to reach this one anymore, I think that's because
725 // of the OPEN_CURLYBRACKET that I moved at the top...
726 return false; // LCOV_EXCL_LINE
727 }
728 if(n->get_child(pos)->is(node_type_t::WHITESPACE)
729 || n->get_child(pos)->is(node_type_t::OPEN_CURLYBRACKET))
730 {
731 // a colon cannot be followed by a space or '{' in a valid selector
732 return true;
733 }
734 if(n->get_child(pos)->is(node_type_t::FUNCTION))
735 {
736 // in this case we have <id>':'<func> which can be a valid selector
737 // so we have to skip this function otherwise we return 'true'
738 ++pos;
739 if(pos >= n->size())
740 {
741 // this test is for security (code may change over time...)
742 // but since the last item must be a curly bracket, it could
743 // not be this function, right?
744 return false; // LCOV_EXCL_LINE
745 }
746 }
747
748 for(;;)
749 {
750 switch(n->get_child(pos)->get_type())
751 {
757 // a valid declaration cannot include one of those
758 return false;
759
760 case node_type_t::ADD:
762 //case node_type_t::FUNCTION: -- must be preceded by ':' so no need here we already returned if we hit a colon
771 break;
772
773 default:
774 // this is something that would not be valid in a selector
775 // so we must have a declaration...
776 return true;
777
778 }
779
780 ++pos;
781 if(pos >= n->size())
782 {
783 // everything looks valid for a selector, so return false
784 return false;
785 }
786 }
787}
788
790{
791 switch(separator)
792 {
795 break;
796
797 default:
798 throw csspp_exception_logic("argify only supports ',' and '/' as separators.");
799
800 }
801
802 // make sure there are items and these are not already arguments
803 size_t const max_children(n->size());
804 if(max_children > 0
805 && !n->get_child(0)->is(node_type_t::ARG))
806 {
807 node::pointer_t temp(new node(node_type_t::LIST, n->get_position()));
808 temp->take_over_children_of(n);
809
810 node::pointer_t arg(new node(node_type_t::ARG, n->get_position()));
811 arg->set_integer(static_cast<integer_t>(separator));
812 n->add_child(arg);
813
814 for(size_t i(0); i < max_children; ++i)
815 {
816 node::pointer_t child(temp->get_child(i));
817 if(child->is(node_type_t::OPEN_CURLYBRACKET))
818 {
819 if(i + 1 != max_children)
820 {
821 throw csspp_exception_logic("compiler.cpp:compiler::argify(): list that has an OPEN_CURLYBRACKET that is not the last child."); // LCOV_EXCL_LINE
822 }
823 n->add_child(child);
824 break;
825 }
826 if(child->is(separator))
827 {
828 // make sure to remove any WHITESPACE appearing just
829 // before a comma
830 while(!arg->empty() && arg->get_last_child()->is(node_type_t::WHITESPACE))
831 {
832 arg->remove_child(arg->get_last_child());
833 }
834 if(arg->empty())
835 {
836 if(n->size() == 1)
837 {
838 error::instance() << n->get_position()
839 << "dangling comma at the beginning of a list of arguments or selectors."
841 }
842 else
843 {
844 error::instance() << n->get_position()
845 << "two commas in a row are invalid in a list of arguments or selectors."
847 }
848 return false;
849 }
850 if(i + 1 == max_children
851 || temp->get_child(i + 1)->is(node_type_t::OPEN_CURLYBRACKET))
852 {
853 error::instance() << n->get_position()
854 << "dangling comma at the end of a list of arguments or selectors."
856 return false;
857 }
858 // move to the next 'arg'
859 arg.reset(new node(node_type_t::ARG, n->get_position()));
860 arg->set_integer(static_cast<integer_t>(separator));
861 n->add_child(arg);
862 }
863 else if(!child->is(node_type_t::WHITESPACE) || !arg->empty())
864 {
865 arg->add_child(child);
866 }
867 }
868 }
869
870 return true;
871}
872
873} // namespace csspp
874
875// Local Variables:
876// mode: cpp
877// indent-tabs-mode: nil
878// c-basic-offset: 4
879// tab-width: 4
880// End:
881
882// vim: ts=4 sw=4 et
static error & instance()
Definition error.cpp:77
void reset()
Definition error.cpp:250
std::shared_ptr< lexer > pointer_t
Definition lexer.h:29
std::shared_ptr< node > pointer_t
Definition node.h:132
node::pointer_t declaration_list()
Definition parser.cpp:84
node::pointer_t stylesheet()
Definition parser.cpp:69
parser(lexer::pointer_t l)
Definition parser.cpp:63
node::pointer_t at_rule(node::pointer_t at_keyword)
Definition parser.cpp:207
node::pointer_t rule()
Definition parser.cpp:79
node::pointer_t component_value_list()
Definition parser.cpp:89
static bool argify(node::pointer_t n, node_type_t const separator=node_type_t::COMMA)
Definition parser.cpp:789
lexer::pointer_t f_lexer
Definition parser.h:57
node::pointer_t next_token()
Definition parser.cpp:99
node::pointer_t qualified_rule(node::pointer_t n)
Definition parser.cpp:239
node::pointer_t declaration(node::pointer_t identifier)
Definition parser.cpp:335
static bool is_nested_declaration(node::pointer_t n)
Definition parser.cpp:703
node::pointer_t f_last_token
Definition parser.h:58
node::pointer_t block_list(node::pointer_t b)
Definition parser.cpp:617
node::pointer_t rule_list()
Definition parser.cpp:74
node::pointer_t block(node::pointer_t b, node_type_t closing_token)
Definition parser.cpp:590
node::pointer_t component_value()
Definition parser.cpp:94
static bool is_variable_set(node::pointer_t n, bool with_block)
Definition parser.cpp:667
int const g_component_value_flag_return_on_semi_colon
Definition parser.cpp:58
The namespace of all the classes in the CSS Preprocessor.
Definition csspp.h:48
node_type_t
Definition node.h:41
int64_t integer_t
Definition csspp.h:58

Documentation of CSS Preprocessor.

This document is part of the Snap! Websites Project.

Copyright by Made to Order Software Corp.