Line data Source code
1 : // Snap Websites Server -- advanced parser
2 : // Copyright (c) 2011-2019 Made to Order Software Corp. All Rights Reserved
3 : //
4 : // This program is free software; you can redistribute it and/or modify
5 : // it under the terms of the GNU General Public License as published by
6 : // the Free Software Foundation; either version 2 of the License, or
7 : // (at your option) any later version.
8 : //
9 : // This program is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : // GNU General Public License for more details.
13 : //
14 : // You should have received a copy of the GNU General Public License
15 : // along with this program; if not, write to the Free Software
16 : // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 : #pragma once
18 :
19 : #include "snapwebsites/snap_exception.h"
20 :
21 : #include <QVariant>
22 : #include <QVector>
23 : #include <QSharedPointer>
24 :
25 : namespace snap
26 : {
27 : namespace parser
28 : {
29 :
30 0 : class snap_parser_exception : public snap_exception
31 : {
32 : public:
33 : snap_parser_exception(char const * what_msg) : snap_exception("parser", what_msg) {}
34 : snap_parser_exception(std::string const & what_msg) : snap_exception("parser", what_msg) {}
35 0 : snap_parser_exception(QString const & what_msg) : snap_exception("parser", what_msg) {}
36 : };
37 :
38 : class snap_parser_no_current_choices : public snap_parser_exception
39 : {
40 : public:
41 : snap_parser_no_current_choices(char const * what_msg) : snap_parser_exception(what_msg) {}
42 : snap_parser_no_current_choices(std::string const & what_msg) : snap_parser_exception(what_msg) {}
43 : snap_parser_no_current_choices(QString const & what_msg) : snap_parser_exception(what_msg) {}
44 : };
45 :
46 : class snap_parser_state_has_children : public snap_parser_exception
47 : {
48 : public:
49 : snap_parser_state_has_children(char const * what_msg) : snap_parser_exception(what_msg) {}
50 : snap_parser_state_has_children(std::string const & what_msg) : snap_parser_exception(what_msg) {}
51 : snap_parser_state_has_children(QString const & what_msg) : snap_parser_exception(what_msg) {}
52 : };
53 :
54 0 : class snap_parser_unexpected_token : public snap_parser_exception
55 : {
56 : public:
57 : snap_parser_unexpected_token(char const * what_msg) : snap_parser_exception(what_msg) {}
58 : snap_parser_unexpected_token(std::string const & what_msg) : snap_parser_exception(what_msg) {}
59 0 : snap_parser_unexpected_token(QString const & what_msg) : snap_parser_exception(what_msg) {}
60 : };
61 :
62 :
63 :
64 :
65 :
66 : enum class token_t
67 : {
68 : TOKEN_ID_NONE_ENUM = 0, // "not a token" (also end of input)
69 :
70 : TOKEN_ID_INTEGER_ENUM,
71 : TOKEN_ID_FLOAT_ENUM,
72 : TOKEN_ID_IDENTIFIER_ENUM,
73 : TOKEN_ID_KEYWORD_ENUM,
74 : TOKEN_ID_STRING_ENUM,
75 : TOKEN_ID_LITERAL_ENUM, // literal character(s)
76 :
77 : TOKEN_ID_EMPTY_ENUM, // special empty token
78 : TOKEN_ID_CHOICES_ENUM, // pointer to a choices object
79 : TOKEN_ID_RULES_ENUM, // pointer to a choices object (see rules operator |() )
80 : TOKEN_ID_NODE_ENUM, // pointer to a node object
81 : TOKEN_ID_ERROR_ENUM // an error occured
82 : };
83 :
84 28 : struct token_id { token_id(token_t t) : f_type(t) {} operator token_t () const { return f_type; } private: token_t f_type; };
85 2 : struct token_id_none_def : public token_id { token_id_none_def() : token_id(token_t::TOKEN_ID_NONE_ENUM ) {} };
86 2 : struct token_id_integer_def : public token_id { token_id_integer_def() : token_id(token_t::TOKEN_ID_INTEGER_ENUM ) {} };
87 2 : struct token_id_float_def : public token_id { token_id_float_def() : token_id(token_t::TOKEN_ID_FLOAT_ENUM ) {} };
88 2 : struct token_id_identifier_def : public token_id { token_id_identifier_def() : token_id(token_t::TOKEN_ID_IDENTIFIER_ENUM) {} };
89 2 : struct token_id_keyword_def : public token_id { token_id_keyword_def() : token_id(token_t::TOKEN_ID_KEYWORD_ENUM ) {} };
90 2 : struct token_id_string_def : public token_id { token_id_string_def() : token_id(token_t::TOKEN_ID_STRING_ENUM ) {} };
91 2 : struct token_id_literal_def : public token_id { token_id_literal_def() : token_id(token_t::TOKEN_ID_LITERAL_ENUM ) {} };
92 2 : struct token_id_empty_def : public token_id { token_id_empty_def() : token_id(token_t::TOKEN_ID_EMPTY_ENUM ) {} };
93 :
94 : extern token_id_none_def TOKEN_ID_NONE;
95 : extern token_id_integer_def TOKEN_ID_INTEGER;
96 : extern token_id_float_def TOKEN_ID_FLOAT;
97 : extern token_id_identifier_def TOKEN_ID_IDENTIFIER;
98 : extern token_id_keyword_def TOKEN_ID_KEYWORD;
99 : extern token_id_string_def TOKEN_ID_STRING;
100 : extern token_id_literal_def TOKEN_ID_LITERAL;
101 : extern token_id_empty_def TOKEN_ID_EMPTY;
102 :
103 :
104 :
105 :
106 : class token
107 : {
108 : public:
109 554 : token(token_t id = TOKEN_ID_NONE) : f_id(id) {}
110 9 : token(token const & t) : f_id(t.f_id), f_value(t.f_value) {}
111 : token & operator = (token const & t)
112 : {
113 : if(this != &t)
114 : {
115 : f_id = t.f_id;
116 : f_value = t.f_value;
117 : }
118 : return *this;
119 : }
120 :
121 : // polymorphic type so user data works as expected
122 511 : virtual ~token() {}
123 :
124 3 : void set_id(token_t id) { f_id = id; }
125 540 : token_t get_id() const { return f_id; }
126 :
127 3 : void set_value(QVariant const & value) { f_value = value; }
128 29 : QVariant get_value() const { return f_value; }
129 :
130 : QString to_string() const;
131 :
132 : private:
133 : token_t f_id = token_t::TOKEN_ID_NONE_ENUM;
134 : QVariant f_value = QVariant();
135 : };
136 : typedef QVector<QSharedPointer<token> > vector_token_t;
137 :
138 : class keyword;
139 :
140 1 : class lexer
141 : {
142 : public:
143 : enum class lexer_error_t
144 : {
145 : LEXER_ERROR_NONE,
146 :
147 : LEXER_ERROR_INVALID_STRING,
148 : LEXER_ERROR_INVALID_C_COMMENT,
149 : LEXER_ERROR_INVALID_NUMBER,
150 :
151 : LEXER_ERROR_max
152 : };
153 :
154 1 : lexer() { f_pos = f_input.begin(); }
155 : lexer(lexer const & rhs) = delete;
156 : lexer & operator = (lexer const & rhs) = delete;
157 : bool eoi() const { return f_pos == f_input.end(); }
158 4 : uint32_t line() const { return f_line; }
159 : void set_input(QString const & input);
160 : void add_keyword(keyword & k);
161 : token next_token();
162 0 : lexer_error_t get_error_code() const { return f_error_code; }
163 0 : QString get_error_message() const { return f_error_message; }
164 0 : uint32_t get_error_line() const { return f_error_line; }
165 :
166 : private:
167 : // list of keywords / identifiers
168 : typedef QMap<QString, int> keywords_map_t;
169 :
170 : QString f_input = QString();
171 : QString::const_iterator f_pos = QString::const_iterator();
172 : uint32_t f_line = 0;
173 : keywords_map_t f_keywords = keywords_map_t();
174 : lexer_error_t f_error_code = lexer_error_t::LEXER_ERROR_NONE;
175 : QString f_error_message = QString();
176 : uint32_t f_error_line = 0;
177 : };
178 :
179 :
180 421 : class keyword
181 : {
182 : public:
183 133 : keyword() {}
184 : keyword(lexer & parent, QString const & keyword_identifier, int index_number = 0);
185 :
186 2 : QString identifier() const { return f_identifier; }
187 2 : int number() const { return f_number; }
188 :
189 : private:
190 : static int g_next_number;
191 :
192 : int f_number = 0;
193 : QString f_identifier = QString();
194 : };
195 :
196 : class choices;
197 : class token_node;
198 :
199 : // TODO: remove these once we only have shared & weak pointers
200 : //
201 : #pragma GCC diagnostic push
202 : #pragma GCC diagnostic ignored "-Weffc++"
203 55 : class rule
204 : {
205 : public:
206 : typedef void (*reducer_t)(rule const & r, QSharedPointer<token_node> & t);
207 :
208 71 : rule() : f_parent(nullptr), f_reducer(nullptr) {}
209 : rule(choices& c);
210 : rule(rule const & r);
211 :
212 : void add_rules(choices& c); // choices of rules
213 : void add_choices(choices& c); // sub-rule
214 : void add_token(token_t token); // any value accepted
215 : void add_literal(QString const& value);
216 : void add_keyword(keyword const& k);
217 53 : void set_reducer(reducer_t reducer) { f_reducer = reducer; }
218 124 : int count() const { return f_tokens.count(); }
219 :
220 : class rule_ref
221 : {
222 : public:
223 579 : rule_ref(rule const * r, int position)
224 579 : : f_rule(r), f_position(position)
225 : {
226 579 : }
227 : rule_ref(rule_ref const & ref)
228 : : f_rule(ref.f_rule), f_position(ref.f_position)
229 : {
230 : }
231 :
232 462 : token get_token() const { return f_rule->f_tokens[f_position].f_token; }
233 27 : QString get_value() const { return f_rule->f_tokens[f_position].f_value; }
234 0 : keyword get_keyword() const { return f_rule->f_tokens[f_position].f_keyword; }
235 194 : choices& get_choices() const { return *f_rule->f_tokens[f_position].f_choices; }
236 :
237 : private:
238 : rule const * f_rule = nullptr;
239 : int f_position = 0;
240 : };
241 :
242 579 : rule_ref const operator [] (int position) const
243 : {
244 579 : return rule_ref(this, position);
245 : }
246 :
247 34 : void reduce(QSharedPointer<token_node> & n) const
248 : {
249 34 : if(f_reducer != nullptr)
250 : {
251 30 : f_reducer(*this, n);
252 : }
253 34 : }
254 :
255 : rule& operator >> (token_id const & token);
256 : rule& operator >> (QString const & literal);
257 : rule& operator >> (char const * literal);
258 : rule& operator >> (keyword const & k);
259 : rule& operator >> (choices & c);
260 : rule& operator >= (rule::reducer_t function);
261 :
262 : QString to_string() const;
263 :
264 : private:
265 210 : struct rule_data_t
266 : {
267 : rule_data_t();
268 : rule_data_t(rule_data_t const & s);
269 : rule_data_t(choices & c);
270 : rule_data_t(token_t token);
271 : rule_data_t(QString const & value); // i.e. literal
272 : rule_data_t(keyword const & k);
273 :
274 : token_t f_token = token_t::TOKEN_ID_NONE_ENUM;
275 : QString f_value = QString(); // required value if not empty
276 : keyword f_keyword = keyword(); // the keyword
277 : choices * f_choices = nullptr; // sub-rule if not null & token TOKEN_ID_CHOICES_ENUM
278 : };
279 :
280 : choices * f_parent = nullptr;
281 : QVector<rule_data_t> f_tokens = QVector<rule_data_t>();
282 : reducer_t f_reducer = reducer_t();
283 : };
284 : #pragma GCC diagnostic pop
285 :
286 : // these have to be defined as friends of the class to enable
287 : // all possible cases
288 : rule & operator >> (token_id const & token_left, token_id const & token_right);
289 : rule & operator >> (token_id const & token, QString const & literal);
290 : rule & operator >> (token_id const & token, char const * literal);
291 : rule & operator >> (token_id const & token, keyword const & k);
292 : rule & operator >> (token_id const & token, choices & c);
293 : rule & operator >> (QString const & literal, token_id const & token);
294 : rule & operator >> (QString const & literal_left, QString const & literal_right);
295 : rule & operator >> (QString const & literal, keyword const & k);
296 : rule & operator >> (QString const & literal, choices & c);
297 : rule & operator >> (keyword const & k, token_id const & token);
298 : rule & operator >> (keyword const & k, QString const & literal);
299 : rule & operator >> (keyword const & k_left, keyword const & k_right);
300 : rule & operator >> (keyword const & k, choices & c);
301 : rule & operator >> (choices & c, token_id const & token);
302 : rule & operator >> (choices & c, QString const & literal);
303 : rule & operator >> (choices & c, keyword const & k);
304 : rule & operator >> (choices & c_left, choices & c_right);
305 : rule & operator >> (char const * literal, choices & c);
306 :
307 : // now a way to add a reducer function
308 : rule & operator >= (token_id const & token, rule::reducer_t function);
309 : rule & operator >= (QString const & literal, rule::reducer_t function);
310 : rule & operator >= (keyword const & k, rule::reducer_t function);
311 : rule & operator >= (choices & c, rule::reducer_t function);
312 :
313 : rule & operator | (token_id const & token, rule & r_right);
314 : rule & operator | (rule & r_left, token_id const & token);
315 : rule & operator | (rule & r_left, keyword const & k);
316 : rule & operator | (rule & r_left, rule & r_right);
317 : rule & operator | (rule & r, choices & c);
318 : // rule & operator | (choices & c, rule & r); -- defined in choices class
319 :
320 : class grammar;
321 :
322 : class choices
323 : {
324 : public:
325 : choices(grammar * parent, char const * choice_name = "");
326 : ~choices();
327 :
328 0 : QString const & name() const { return f_name; }
329 57 : int count() { return f_rules.count(); }
330 : void clear();
331 :
332 : choices & operator = (const choices & rhs);
333 :
334 : choices & operator >>= (token_id const & token);
335 : choices & operator >>= (QString const & literal);
336 : choices & operator >>= (keyword const & k);
337 : choices & operator >>= (choices & rhs);
338 : choices & operator >>= (rule & rhs);
339 :
340 : rule & operator | (rule & r);
341 :
342 : void add_rule(rule & r);
343 566 : rule const & operator [] (int rule) const
344 : {
345 566 : return *f_rules[rule];
346 : }
347 :
348 : // for debug purposes
349 : QString to_string() const;
350 :
351 : private:
352 : QString f_name = QString();
353 : QVector<rule *> f_rules = QVector<rule *>();
354 : };
355 : typedef QVector<choices *> choices_array_t;
356 :
357 :
358 : // base class that parsers derive from to create user data to be
359 : // saved in token_node objects (see below)
360 : // must always be used with QSharedPointer<>
361 3 : class parser_user_data
362 : {
363 : public:
364 0 : virtual ~parser_user_data() {}
365 :
366 : private:
367 : };
368 :
369 :
370 : // token holder that can be saved in a tree like manner via the QObject
371 : // child/parent functionality
372 79 : class token_node : public token
373 : {
374 : // Q_OBJECT is not used because we don't have signals, slots or properties
375 : public:
376 88 : token_node() : token(token_t::TOKEN_ID_NODE_ENUM) {}
377 :
378 8 : void add_token(token & t) { f_tokens.push_back(QSharedPointer<token>(new token(t))); }
379 82 : void add_node(QSharedPointer<token_node> & n) { f_tokens.push_back(n); }
380 : vector_token_t & tokens() { return f_tokens; }
381 0 : size_t size() const { return f_tokens.size(); }
382 32 : QSharedPointer<token> operator [] (int index) { return f_tokens[index]; }
383 : QSharedPointer<token> const operator [] (int index) const { return f_tokens[index]; }
384 88 : void set_line(uint32_t line) { f_line = line; }
385 : uint32_t get_line() const { return f_line; }
386 :
387 30 : void set_user_data(QSharedPointer<parser_user_data> data) { f_user_data = data; }
388 30 : QSharedPointer<parser_user_data> get_user_data() const { return f_user_data; }
389 :
390 : private:
391 : int32_t f_line = 0;
392 : vector_token_t f_tokens = vector_token_t();
393 : QSharedPointer<parser_user_data> f_user_data = QSharedPointer<parser_user_data>();
394 : };
395 :
396 1 : class grammar
397 : {
398 : public:
399 : grammar();
400 :
401 : void add_choices(choices & c);
402 :
403 : bool parse(lexer & input, choices & start);
404 1 : QSharedPointer<token_node> get_result() const { return f_result; }
405 :
406 : private:
407 : choices_array_t f_choices = choices_array_t();
408 : QSharedPointer<token_node> f_result = QSharedPointer<token_node>();
409 : };
410 :
411 :
412 :
413 : } // namespace parser
414 : } // namespace snap
415 : // vim: ts=4 sw=4 et
|