Line data Source code
1 : // Snap Websites Server -- URI canonicalization
2 : // Copyright (c) 2011-2019 Made to Order Software Corp. All Rights Reserved
3 : //
4 : // https://snapwebsites.org/
5 : // contact@m2osw.com
6 : //
7 : // This program is free software; you can redistribute it and/or modify
8 : // it under the terms of the GNU General Public License as published by
9 : // the Free Software Foundation; either version 2 of the License, or
10 : // (at your option) any later version.
11 : //
12 : // This program is distributed in the hope that it will be useful,
13 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : // GNU General Public License for more details.
16 : //
17 : // You should have received a copy of the GNU General Public License
18 : // along with this program; if not, write to the Free Software
19 : // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 :
21 :
22 : // self
23 : //
24 : #include "snapwebsites/snap_uri.h"
25 :
26 :
27 : // snapwebsites lib
28 : //
29 : #include "snapwebsites/qstring_stream.h"
30 : #include "snapwebsites/log.h"
31 :
32 :
33 : // snapdev lib
34 : //
35 : #include <snapdev/not_used.h>
36 :
37 :
38 : // QtSerialization lib
39 : //
40 : #include <QtSerialization/QSerializationComposite.h>
41 : #include <QtSerialization/QSerializationFieldBasicTypes.h>
42 : #include <QtSerialization/QSerializationFieldString.h>
43 :
44 :
45 : // Qt lib
46 : //
47 : #include <QBuffer>
48 :
49 :
50 : // libtld lib
51 : //
52 : #include <libtld/tld.h>
53 :
54 :
55 : // C lib
56 : //
57 : #include <netdb.h>
58 :
59 :
60 : // last include
61 : //
62 : #include <snapdev/poison.h>
63 :
64 :
65 :
66 :
67 : namespace snap
68 : {
69 :
70 : // needed to avoid some linkage problems when references to these are used
71 : const domain_variable::domain_variable_type_t domain_variable::DOMAIN_VARIABLE_TYPE_STANDARD;
72 : const domain_variable::domain_variable_type_t domain_variable::DOMAIN_VARIABLE_TYPE_WEBSITE;
73 : const domain_variable::domain_variable_type_t domain_variable::DOMAIN_VARIABLE_TYPE_FLAG_WITH_DEFAULT;
74 : const domain_variable::domain_variable_type_t domain_variable::DOMAIN_VARIABLE_TYPE_FLAG_NO_DEFAULT;
75 :
76 : // needed to avoid some linkage problems when references to these are used
77 : const website_variable::website_variable_type_t website_variable::WEBSITE_VARIABLE_TYPE_STANDARD;
78 : const website_variable::website_variable_type_t website_variable::WEBSITE_VARIABLE_TYPE_WEBSITE;
79 : const website_variable::website_variable_type_t website_variable::WEBSITE_VARIABLE_TYPE_FLAG_WITH_DEFAULT;
80 : const website_variable::website_variable_type_t website_variable::WEBSITE_VARIABLE_TYPE_FLAG_NO_DEFAULT;
81 :
82 :
83 : /** \brief This function intializes a default Snap URI object.
84 : *
85 : * Initialize a default Snap URI object.
86 : *
87 : * By default, the protocol is set to HTTP and everything else is set to
88 : * empty. This also means the original URI is set to empty (and stays that
89 : * way unless you later call set_uri() with a valid URI.)
90 : *
91 : * \sa set_uri()
92 : * \sa set_protocol()
93 : * \sa set_domain()
94 : * \sa set_path()
95 : * \sa set_option()
96 : * \sa set_query_string()
97 : * \sa set_anchor()
98 : */
99 0 : snap_uri::snap_uri()
100 : {
101 0 : }
102 :
103 : /** \brief Set the URI to the specified string.
104 : *
105 : * This function sets the URI to the specified string. The parsing
106 : * is the same as in the set_uri() function.
107 : *
108 : * \todo
109 : * Should this function throw if the URI is considered invalid?
110 : *
111 : * \param[in] uri The URI to assign to this Snap URI object.
112 : *
113 : * \sa set_uri()
114 : */
115 0 : snap_uri::snap_uri(QString const & uri)
116 : {
117 0 : if(!set_uri(uri))
118 : {
119 : // TBD: should we throw if set_uri() returns false?
120 0 : SNAP_LOG_ERROR("URI \"")(uri)("\" is considered invalid.");
121 : }
122 0 : }
123 :
124 : /** \brief Replace the URI of this Snap URI object.
125 : *
126 : * This function replaces the current Snap URI object information
127 : * with the specified \p uri data.
128 : *
129 : * Before calling this function YOU must force a URI encoding if the
130 : * URI is not yet encoded.
131 : *
132 : * Anything wrong in the syntax and the function returns false. Wrong
133 : * means empty entries, invalid encoding sequence, etc.
134 : *
135 : * \param[in] uri The new URI to replace all the current data of this Snap URI object.
136 : *
137 : * \return false if the URI could not be parsed (in which case nothing's changed in the object); true otherwise
138 : */
139 0 : bool snap_uri::set_uri(QString const & uri)
140 : {
141 0 : QChar const * u(uri.constData());
142 :
143 : // retrieve the protocol
144 0 : QChar const * s(u);
145 0 : while(!u->isNull() && u->unicode() != ':')
146 : {
147 0 : ++u;
148 : }
149 0 : if(u - s < 1 || u->isNull() || u[1].unicode() != '/' || u[2].unicode() != '/')
150 : {
151 : // protocol is not followed by :// or is an empty string
152 0 : return false;
153 : }
154 0 : QString uri_protocol(s, static_cast<int>(u - s));
155 :
156 : // skip the ://
157 0 : u += 3;
158 :
159 : // retrieve the sub-domains and domain parts
160 : // we may also discover a name, password, and port
161 0 : QChar const * colon1(nullptr);
162 0 : QChar const * colon2(nullptr);
163 0 : QChar const * at(nullptr);
164 0 : for(s = u; !u->isNull() && u->unicode() != '/'; ++u)
165 : {
166 0 : if(u->unicode() == ':')
167 : {
168 0 : if(colon1 == nullptr)
169 : {
170 0 : colon1 = u;
171 : }
172 : else
173 : {
174 0 : if(at != nullptr)
175 : {
176 0 : if(colon2 != nullptr)
177 : {
178 0 : return false;
179 : }
180 0 : colon2 = u;
181 : }
182 : else
183 : {
184 0 : return false;
185 : }
186 : }
187 : }
188 0 : if(u->unicode() == '@')
189 : {
190 0 : if(at != nullptr)
191 : {
192 : // we cannot have more than one @ character that wasn't escaped
193 0 : return false;
194 : }
195 0 : at = u;
196 : }
197 : }
198 : // without an at (@) colon1 indicates a port
199 0 : if(at == nullptr && colon1 != nullptr)
200 : {
201 : // colon2 is nullptr since otherwise we already returned with false
202 0 : colon2 = colon1;
203 0 : colon1 = nullptr;
204 : }
205 :
206 0 : QString username;
207 0 : QString password;
208 0 : QString full_domain_name;
209 0 : int port(protocol_to_port(uri_protocol));
210 :
211 : // retrieve the data
212 0 : if(colon1 != nullptr)
213 : {
214 : // if(at == nullptr) -- missing '@'? this is not possible since we just
215 : // turned colon1 to colon2 if no '@' was defined
216 0 : username.insert(0, s, static_cast<int>(colon1 - s));
217 0 : s = colon1 + 1;
218 : }
219 0 : if(at != nullptr)
220 : {
221 0 : password.insert(0, s, static_cast<int>(at - s));
222 0 : s = at + 1;
223 : }
224 0 : if(colon2 != nullptr)
225 : {
226 0 : full_domain_name.insert(0, s, static_cast<int>(colon2 - s));
227 0 : QChar const * p(colon2 + 1);
228 0 : if(p == u)
229 : {
230 : // empty port entries are considered invalid
231 0 : return false;
232 : }
233 0 : port = 0; // Reset port.
234 0 : for(; p < u; ++p)
235 : {
236 0 : ushort d = p->unicode();
237 0 : if(d < '0' || d > '9')
238 : {
239 : // ports only accept digits
240 0 : return false;
241 : }
242 0 : port = port * 10 + d - '0';
243 0 : if(port > 65535)
244 : {
245 : // port overflow
246 0 : return false;
247 : }
248 : }
249 : }
250 : else
251 : {
252 0 : full_domain_name.insert(0, s, static_cast<int>(u - s));
253 : }
254 :
255 : // verify that there is a domain
256 0 : if(full_domain_name.isNull())
257 : {
258 0 : return false;
259 : }
260 :
261 : // force a username AND password or neither
262 0 : if(username.isNull() ^ password.isNull())
263 : {
264 0 : return false;
265 : }
266 :
267 : // break-up the domain in sub-domains, base domain, and TLD
268 0 : snap_string_list sub_domain_names;
269 0 : QString domain_name;
270 0 : QString tld;
271 0 : if(!process_domain(full_domain_name, sub_domain_names, domain_name, tld))
272 : {
273 0 : return false;
274 : }
275 :
276 : // now we are ready to parse further (i.e. path)
277 0 : snap_string_list uri_path;
278 0 : if(!u->isNull())
279 : {
280 : // skip the '/'
281 : //
282 0 : ++u;
283 0 : for(s = u; !u->isNull() && u->unicode() != '?' && u->unicode() != '#'; ++u)
284 : {
285 0 : if(u->unicode() == '/')
286 : {
287 0 : if(s != u)
288 : {
289 : // decode right here since we just separate one segment
290 : //
291 0 : uri_path << urldecode(QString(s, static_cast<int>(u - s)));
292 : }
293 : // skip the '/'
294 : //
295 0 : s = u + 1;
296 : }
297 : }
298 0 : if(s != u)
299 : {
300 : // last segment when it does not end with '/'
301 : //
302 0 : uri_path << urldecode(QString(s, static_cast<int>(u - s)));
303 : }
304 : }
305 :
306 0 : snap_uri_options_t query_strings;
307 0 : if(!u->isNull() && u->unicode() == '?')
308 : {
309 : // skip the '?' and then any (invalid?) introductory '&'
310 0 : do
311 : {
312 0 : ++u;
313 : }
314 0 : while(!u->isNull() && u->unicode() == '&');
315 0 : QChar const * e(nullptr);
316 0 : for(s = u;; ++u)
317 : {
318 0 : if(u->isNull() || u->unicode() == '&' || u->unicode() == '#')
319 : {
320 0 : if(e == nullptr)
321 : {
322 : // special case when a parameter appears without value
323 : // ...&name&...
324 0 : e = u;
325 : }
326 0 : QString name(s, static_cast<int>(e - s));
327 0 : if(name.isEmpty())
328 : {
329 : // this is a very special case!!!
330 : // ...&=value&...
331 : // so we use a "special" name, also even that name could be
332 : // defined in the query string (with %2A=value)
333 0 : name = "*";
334 : }
335 :
336 : // query strings are saved as options (name/value pairs)
337 : // although the value may not be defined at all (...&name&...)
338 : // query string names are case sensitive (as per 6.2.2.1 of RFC 3986)
339 0 : QString value;
340 0 : if(e != u)
341 : {
342 : // note that we reach here if there is an equal sign,
343 : // the value may still be empty (i.e. u - e - 1 == 0 is
344 : // possible)
345 : //
346 0 : value = QString::fromRawData(e + 1, static_cast<int>(u - e - 1));
347 : }
348 0 : name = urldecode(name);
349 0 : if(query_strings.contains(name))
350 : {
351 : // two parameters with the same name, refused
352 0 : return false;
353 : }
354 0 : query_strings[name] = urldecode(value);
355 :
356 : // skip all the & and then reset s and e
357 0 : while(!u->isNull() && u->unicode() == '&')
358 : {
359 0 : ++u;
360 : }
361 0 : if(u->isNull() || u->unicode() == '#')
362 : {
363 : // reached the end of the query strings
364 0 : break;
365 : }
366 0 : s = u;
367 0 : e = nullptr;
368 : }
369 0 : else if(e == nullptr && u->unicode() == '=')
370 : {
371 0 : e = u;
372 : }
373 0 : }
374 : }
375 :
376 : // finally check for an anchor
377 : // (note that browsers do not send us the anchor data, however, URIs
378 : // defined on the server side can very well include such.)
379 : //
380 0 : QString uri_anchor;
381 0 : if(!u->isNull() && u->unicode() == '#')
382 : {
383 0 : ++u;
384 :
385 : // we need to decode the string so we add the whole string here
386 : //
387 0 : QString p(u);
388 0 : p = urldecode(p);
389 0 : if(!p.isEmpty() && p[0] == '!')
390 : {
391 : // what do we do here?!
392 : //
393 : // it seems to me that we should not get those here, but that
394 : // could be from someone who wrote the URL in their document.
395 : //
396 0 : u = p.constData();
397 0 : for(s = u; !u->isNull(); ++u)
398 : {
399 0 : if(u->unicode() == '/')
400 : {
401 : // encode right here since we have separate strings
402 : //
403 0 : if(s != u)
404 : {
405 0 : uri_path << urldecode(QString(s, static_cast<int>(u - s)));
406 : }
407 : // skip the '/'
408 : //
409 0 : s = u + 1;
410 : }
411 : }
412 0 : if(s != u)
413 : {
414 : // last path that doesn't end with '/'
415 : //
416 0 : uri_path << urldecode(QString(s, static_cast<int>(u - s)));
417 : }
418 : }
419 : else
420 : {
421 0 : uri_anchor = p;
422 : }
423 : }
424 :
425 : // the path may include some ".." which we want to eliminate
426 : // note that contrary to Unix we do not accept "/.." as an equivalent
427 : // to "/" and we do not verify that all the paths exist... (i.e.
428 : // if "c" does not exist under "/a/b" (folder /a/b/c), then it should
429 : // be an error to use "/a/b/c/.." since "/a/b/c" cannot be computed.)
430 : //
431 0 : int max_path(uri_path.size());
432 0 : for(int i(0); i < max_path; ++i)
433 : {
434 0 : if(uri_path[i] == "..")
435 : {
436 0 : if(i == 0 || max_path < 2)
437 : {
438 : // the path starts with a ".." or has too many ".."
439 : //
440 0 : return false;
441 : }
442 0 : uri_path.removeAt(i);
443 0 : uri_path.removeAt(--i);
444 0 : --i;
445 0 : max_path -= 2;
446 : }
447 : }
448 :
449 : // totally unchanged URI, but only if it is considered valid
450 : //
451 0 : f_original = uri;
452 :
453 : // now decode all the entries that may be encoded
454 : //
455 0 : f_protocol = uri_protocol;
456 0 : f_username = urldecode(username);
457 0 : f_password = urldecode(password);
458 0 : if(port != -1)
459 : {
460 0 : f_port = port;
461 : }
462 0 : f_domain = domain_name;
463 0 : f_top_level_domain = tld;
464 0 : f_sub_domains = sub_domain_names;
465 0 : f_path = uri_path;
466 :
467 : // options come from parsing the sub-domains, query strings and paths
468 : // and at this point we do not have that information...
469 : //
470 0 : f_options.clear();
471 :
472 0 : f_query_strings = query_strings;
473 0 : f_anchor = uri_anchor;
474 :
475 0 : return true;
476 : }
477 :
478 :
479 : /** \brief Return the original URI used to define the Snap URI object.
480 : *
481 : * This function returns the original URI as defined when calling the
482 : * set_uri() or creating the Snap URI object with the snap_uri() constructor
483 : * accepting a string.
484 : *
485 : * Note that it is possible to use the snap_uri object without using the
486 : * set_uri() or a string in the constructor by calling the setters of
487 : * the different parts of a URI. This is actually how snap_child does it
488 : * because Apache does not give us one plane URI, instead we get pre
489 : * separated parts. Therefore the get_original_uri() is always empty when
490 : * called from that f_uri variable.
491 : *
492 : * Note that this URI may still include security issues, although if the
493 : * input was not considered valid (i.e. had a valid protocol, etc.) then
494 : * this function returns an empty string.
495 : *
496 : * \return A constant reference to the original Snap URI.
497 : */
498 0 : QString const & snap_uri::get_original_uri() const
499 : {
500 0 : return f_original;
501 : }
502 :
503 :
504 : /** \brief Return the current URI define in this Snap URI object.
505 : *
506 : * This function concatenate all the URI parts in a fully qualified URI
507 : * and returns the result.
508 : *
509 : * This function does NOT take the rules in account (since it does not
510 : * know anything about them.) So you may want to consider using the
511 : * snap_uri_rules::process_uri() function instead.
512 : *
513 : * \note
514 : * The returned URI is already encoded as required by HTTP and such.
515 : *
516 : * \param[in] use_hash_bang When this flag is set to true the URI is returned
517 : * as a hash bang (i.e. domain/path becomes domain/#!path).
518 : *
519 : * \return The URI represented by this Snap URI object.
520 : */
521 0 : QString snap_uri::get_uri(bool use_hash_bang) const
522 : {
523 0 : QString uri(f_protocol);
524 :
525 0 : uri += "://";
526 :
527 : // username/password if defined
528 0 : if(!f_username.isEmpty())
529 : {
530 0 : uri += urlencode(f_username);
531 0 : if(!f_password.isEmpty())
532 : {
533 0 : uri += ":";
534 0 : uri += urlencode(f_password);
535 : }
536 0 : uri += "@";
537 : }
538 :
539 : // full domain
540 : // domains should rarely require encoding for special characters, however,
541 : // it often is for international domains that make use of UTF-8 characters
542 : // outside of the standard ASCII letters and those definitively require
543 : // URL encoding to work right.
544 0 : uri += urlencode(full_domain());
545 0 : if(f_port != protocol_to_port(f_protocol))
546 : {
547 0 : uri += QString(":%1").arg(f_port);
548 : }
549 0 : uri += "/";
550 :
551 : // path if no hash bang
552 : //
553 0 : QString const p(path());
554 0 : if(!use_hash_bang && p.length() > 0)
555 : {
556 : // avoid a double slash if possible
557 : //
558 : // XXX: should the path not have a leading slash?
559 : // (as far as I know path() never return a path with a leading
560 : // slash; but we would need a test to make sure of it)
561 : //
562 0 : if(p[0] == '/')
563 : {
564 0 : uri += p.mid(1);
565 : }
566 : else
567 : {
568 0 : uri += p;
569 : }
570 : }
571 :
572 : // query string
573 0 : QString const q(query_string());
574 0 : if(!q.isEmpty())
575 : {
576 0 : uri += '?';
577 0 : uri += q;
578 : }
579 :
580 : // anchor
581 0 : if(!f_anchor.isEmpty())
582 : {
583 0 : if(use_hash_bang)
584 : {
585 : // hash bang and anchor are exclusive
586 0 : throw snap_uri_exception_exclusive_parameters("you cannot use the hash bang (#!) and an anchor (#) in the same URI");
587 : }
588 0 : uri += "#";
589 0 : uri += urlencode(f_anchor, "!/~");
590 : }
591 :
592 : // path when using the hash bang but only if not empty
593 0 : if(use_hash_bang && !p.isEmpty())
594 : {
595 0 : uri += "#!/";
596 0 : uri += p;
597 : }
598 :
599 0 : return uri;
600 : }
601 :
602 :
603 : /** \brief Retrieve the URI of the website.
604 : *
605 : * This function returns the URI of the website, without any path,
606 : * query string options, anchor. The port is included only if it
607 : * does not correspond to the protocol and the \p include_port flag
608 : * is set to true.
609 : *
610 : * \param[in] include_port Whether the port should be included.
611 : *
612 : * \return The domain name with the protocol and optionally the port.
613 : */
614 0 : QString snap_uri::get_website_uri(bool include_port) const
615 : {
616 0 : QString result(f_protocol);
617 :
618 0 : result += "://";
619 0 : result += full_domain();
620 :
621 : // only include the port if the caller wants it and if it does not
622 : // match the default protocol port
623 : //
624 0 : if(include_port
625 0 : && protocol_to_port(f_protocol) != f_port)
626 : {
627 0 : result += QString(":%1").arg(f_port);
628 : }
629 :
630 0 : result += "/";
631 :
632 0 : return result;
633 : }
634 :
635 :
636 : /** \brief Retrieve a part by name.
637 : *
638 : * This function allows you to retrieve a part by name.
639 : *
640 : * The supported parts are:
641 : *
642 : * \li anchor -- The anchor
643 : * \li domain -- The domain name
644 : * \li full-domain -- The full domain: with sub-domains, domain, and TLD
645 : * \li option -- The option number \p part
646 : * \li option-count -- The number of options
647 : * \li original -- The original URI or ""
648 : * \li password -- The password
649 : * \li path -- The folder name number \p part
650 : * \li path-count -- the number of paths
651 : * \li protocol -- The protocol
652 : * \li query-string -- The query string number \p part
653 : * \li query-string-count -- The number of query strings
654 : * \li sub-domain -- The sub-domain name number \p part
655 : * \li sub-domain-count -- The number of sub-domains
656 : * \li tld or top-level-domain -- the top-level domain name
657 : * \li uri -- the full URI as you want it in an href="..." attribute
658 : * \li username -- The username
659 : *
660 : * \param[in] name The named part to retrieve.
661 : * \param[in] part The part number when required (i.e. sub-domains)
662 : *
663 : * \return The data representing this part as a string.
664 : */
665 0 : QString snap_uri::get_part(QString const& name, int part) const
666 : {
667 0 : if(name.isEmpty())
668 : {
669 : // should this be an error?
670 0 : return "";
671 : }
672 0 : switch(name[0].unicode())
673 : {
674 0 : case 'a':
675 0 : if(name == "anchor")
676 : {
677 0 : return f_anchor;
678 : }
679 0 : break;
680 :
681 0 : case 'd':
682 0 : if(name == "domain")
683 : {
684 0 : return f_domain;
685 : }
686 0 : break;
687 :
688 0 : case 'f':
689 0 : if(name == "full-domain")
690 : {
691 0 : return full_domain();
692 : }
693 0 : break;
694 :
695 0 : case 'o':
696 0 : if(name == "option")
697 : {
698 0 : if(part < 0 || part >= f_options.size())
699 : {
700 0 : throw snap_uri_exception_out_of_bounds(QString("option %1 does not exist (range is 0 to %2)").arg(part).arg(f_options.size()));
701 : }
702 0 : return (f_options.begin() + part).value();
703 : }
704 0 : if(name == "option-count")
705 : {
706 0 : QString count(QString("%1").arg(f_options.size()));
707 0 : return count;
708 0 : }
709 0 : if(name == "original")
710 : {
711 0 : return f_original;
712 : }
713 0 : break;
714 :
715 0 : case 'p':
716 0 : if(name == "password")
717 : {
718 0 : return f_password;
719 : }
720 0 : if(name == "path")
721 : {
722 0 : if(part < 0 || part >= f_path.size())
723 : {
724 0 : throw snap_uri_exception_out_of_bounds(QString("path %1 is not available (range 0 to %2)").arg(part).arg(f_path.size()));
725 : }
726 0 : return f_path[part];
727 : }
728 0 : if(name == "path-count")
729 : {
730 0 : QString count(QString("%1").arg(f_path.size()));
731 0 : return count;
732 0 : }
733 0 : if(name == "port")
734 : {
735 0 : QString port(QString("%1").arg(f_port));
736 0 : return port;
737 0 : }
738 0 : if(name == "protocol")
739 : {
740 0 : return f_protocol;
741 : }
742 0 : break;
743 :
744 0 : case 'q':
745 0 : if(name == "query-string")
746 : {
747 0 : if(part < 0 || part >= f_query_strings.size())
748 : {
749 0 : throw snap_uri_exception_out_of_bounds(QString("query-string %1 does not exist (range 0 to %2)").arg(part).arg(f_query_strings.size()));
750 : }
751 0 : return (f_query_strings.begin() + part).value();
752 : }
753 0 : if(name == "query-string-count")
754 : {
755 0 : QString const count(QString("%1").arg(f_query_strings.size()));
756 0 : return count;
757 0 : }
758 0 : break;
759 :
760 0 : case 's':
761 0 : if(name == "sub-domain")
762 : {
763 0 : if(part < 0 || part >= f_sub_domains.size())
764 : {
765 0 : throw snap_uri_exception_out_of_bounds(QString("sub-domain %1 does not exist (range 0 to %2)").arg(part).arg(f_sub_domains.size()));
766 : }
767 0 : return f_sub_domains[part];
768 : }
769 0 : if(name == "sub-domain-count")
770 : {
771 0 : QString const count(QString("%1").arg(f_sub_domains.size()));
772 0 : return count;
773 0 : }
774 0 : break;
775 :
776 0 : case 't':
777 0 : if(name == "tld" || name == "top-level-domain")
778 : {
779 0 : return f_top_level_domain;
780 : }
781 0 : break;
782 :
783 0 : case 'u':
784 0 : if(name == "uri")
785 : {
786 0 : return get_uri();
787 : }
788 0 : if(name == "username")
789 : {
790 0 : return f_username;
791 : }
792 0 : break;
793 :
794 0 : default:
795 : // no match for other characters
796 0 : break;
797 :
798 : }
799 :
800 0 : return "";
801 : }
802 :
803 :
804 : /** \brief Change the protocol.
805 : *
806 : * This function is called to set the protocol.
807 : *
808 : * The protocol is not checked since this can be used for any
809 : * URI, not just the HTTP and HTTPS protocols. The name is
810 : * expected to be all lowercase and lowercase letters [a-z].
811 : *
812 : * \param[in] uri_protocol The name of the protocol.
813 : */
814 0 : void snap_uri::set_protocol(QString const & uri_protocol)
815 : {
816 0 : if(uri_protocol.isEmpty())
817 : {
818 0 : throw snap_uri_exception_invalid_parameter("the uri_protocol parameter cannot be an empty string");
819 : }
820 0 : f_protocol = uri_protocol;
821 0 : }
822 :
823 :
824 : /** \brief Retrieve a copy of the protocol.
825 : *
826 : * This value is the name that defines how messages are being
827 : * sent between the client and the server.
828 : *
829 : * The main interface only accepts "http" and "https", but the
830 : * snap_uri object accepts all protocols so one can write URIs
831 : * with protocols such as "ftp", "mail", and "gopher".
832 : *
833 : * \return A constant reference to the protocol of this URI.
834 : */
835 0 : QString const& snap_uri::protocol() const
836 : {
837 0 : return f_protocol;
838 : }
839 :
840 :
841 : /** \brief Process a domain name and break it up.
842 : *
843 : * This function processes a domain name and breaks it up in
844 : * the domain name, the sub-domains, and the TLD.
845 : *
846 : * \note
847 : * If the function returns false, then the out parameters may not
848 : * all be defined properly. None of them should be used in that
849 : * case anyway.
850 : *
851 : * \param[in] full_domain_name The complete domain with sub-domains and TLD.
852 : * \param[out] sub_domain_names An array of sub-domains, may be empty.
853 : * \param[out] domain_name The domain by itself (no TLD and no sub-domain.)
854 : * \param[out] tld The TLD part by itself.
855 : *
856 : * \return true if the function succeeds, false otherwise
857 : */
858 0 : bool snap_uri::process_domain(QString const & full_domain_name,
859 : snap_string_list & sub_domain_names, QString & domain_name, QString & tld)
860 : {
861 : // first we need to determine the TLD, we use the tld()
862 : // function from the libtld library for this purpose
863 :
864 : // (note that the URI is expected to be encoded so the UTF-8
865 : // encoding is the same as ASCII)
866 0 : QByteArray full_domain_utf8(full_domain_name.toUtf8());
867 : struct tld_info info;
868 0 : char const *fd(full_domain_utf8.data());
869 0 : tld_result r(::tld(fd, &info));
870 0 : if(r != TLD_RESULT_SUCCESS)
871 : {
872 : // (should we accept TLD_RESULT_INVALID URIs?)
873 : // the URI doesn't end with a known TLD
874 0 : return false;
875 : }
876 :
877 : // got the TLD, save it in the user's supplied variable
878 0 : tld = urldecode(info.f_tld);
879 :
880 : // search where the domain name starts
881 0 : char const *compute_domain_name(fd + info.f_offset);
882 0 : while(compute_domain_name > fd)
883 : {
884 0 : --compute_domain_name;
885 0 : if(*compute_domain_name == '.')
886 : {
887 0 : ++compute_domain_name;
888 0 : break;
889 : }
890 : }
891 0 : domain_name = urldecode(QString::fromUtf8(compute_domain_name, static_cast<int>(info.f_tld - compute_domain_name)));
892 :
893 : // now cut the remainder on each period, these are the sub-domains
894 : // there may be none if there are no other periods in the full name
895 0 : if(compute_domain_name > fd)
896 : {
897 : // forget the period
898 0 : --compute_domain_name;
899 : }
900 0 : QString all_sub_domains(QString::fromUtf8(fd, static_cast<int>(compute_domain_name - fd)));
901 0 : sub_domain_names = all_sub_domains.split('.');
902 :
903 : // verify that all the sub-domains are valid (i.e. no "..")
904 0 : if(!all_sub_domains.isEmpty())
905 : {
906 0 : int const max_sub_domain_names(sub_domain_names.size());
907 0 : for(int i(0); i < max_sub_domain_names; ++i)
908 : {
909 0 : if(sub_domain_names[i].isEmpty())
910 : {
911 : // sub-domains cannot be empty or the URI includes
912 : // two period one after another (this should actually
913 : // be caught by the tld() call.)
914 0 : return false;
915 : }
916 0 : sub_domain_names[i] = urldecode(sub_domain_names[i]);
917 :
918 : // TODO: look into whether we have to check for periods in the
919 : // decoded sub-domain names (i.e. a %2E is probably not a
920 : // valid character in a sub-domain name, at the same time
921 : // if we reach here, there should not be such a DNS entry...
922 : // but not automatically because a hacker can take an IP
923 : // and use it with any URI and send an HTTP request that
924 : // way... still, we would catch that in our domain/website
925 : // canonicalization.) Maybe we should decode the domain part
926 : // first, then parse it.
927 : }
928 : }
929 :
930 0 : return true;
931 : }
932 :
933 :
934 : /** \brief Set the domain to 'domain'.
935 : *
936 : * This function changes the Snap URI to the specified full domain.
937 : * This means changing the set of sub-domains, the TLD and the domain
938 : * it-self are updated with the corresponding data from the full domain.
939 : * The function takes care of breaking the input
940 : *
941 : * If any error is discovered in the full domain name, then the internal
942 : * variables do not get modified.
943 : *
944 : * Note that the domain is not expected to include a user name, password
945 : * and port information. You want to get rid of that information before
946 : * calling this function or consider calling set_uri() instead.
947 : *
948 : * \note
949 : * The only potential problem is when you get an out of memory error
950 : * while allocating a string.
951 : *
952 : * \todo
953 : * Check that the URL is not an IPv4 or IPv6 address. Such will always
954 : * fail and we should look into avoiding the use of an exception in
955 : * that circumstance.
956 : *
957 : * \exception snap_uri_exception_invalid_uri
958 : * If the domain cannot properly be broken up in sub-domains,
959 : * the doman name and the tld, then this exception is raised.
960 : *
961 : * \param[in] full_domain_name A full domain name, without protocol, path,
962 : * query string or anchor.
963 : */
964 0 : void snap_uri::set_domain(QString const & full_domain_name)
965 : {
966 0 : snap_string_list sub_domain_names;
967 0 : QString domain_name;
968 0 : QString tld;
969 0 : if(!process_domain(full_domain_name, sub_domain_names, domain_name, tld))
970 : {
971 0 : throw snap_uri_exception_invalid_uri(QString("could not break up \"%1\" as a valid domain name").arg(full_domain_name));
972 : }
973 :
974 0 : f_domain = domain_name;
975 0 : f_top_level_domain = tld;
976 0 : f_sub_domains = sub_domain_names;
977 0 : }
978 :
979 :
980 : /** \brief Reconstruct the full domain from the broken down information
981 : *
982 : * This function rebuilds a full domain name from the broken down
983 : * data saved in the Snap URI: the sub-domains, the domain name,
984 : * and the TLD.
985 : *
986 : * \return The full domain name representation of this Snap URI.
987 : */
988 0 : QString snap_uri::full_domain() const
989 : {
990 0 : QString full_domains(f_sub_domains.join("."));
991 0 : if(!full_domains.isEmpty())
992 : {
993 0 : full_domains += '.';
994 : }
995 0 : full_domains += f_domain;
996 0 : full_domains += f_top_level_domain;
997 0 : return full_domains;
998 : }
999 :
1000 : /** \brief Get the top level domain name.
1001 : *
1002 : * This function returns the top level domain name by itself.
1003 : * For example, in "www.example.com", the top level domain name
1004 : * is "com".
1005 : *
1006 : * \return The top level domain name of the Snap URI.
1007 : */
1008 0 : QString const& snap_uri::top_level_domain() const
1009 : {
1010 0 : return f_top_level_domain;
1011 : }
1012 :
1013 :
1014 : /** \brief Get the domain name by itself.
1015 : *
1016 : * This function returns the stripped down domain name. This name
1017 : * has no period since it includes no sub-domains and no top level
1018 : * domain names.
1019 : *
1020 : * \return The stripped down domain name.
1021 : */
1022 0 : QString const& snap_uri::domain() const
1023 : {
1024 0 : return f_domain;
1025 : }
1026 :
1027 :
1028 : /** \brief Return the concatenated list of sub-domains.
1029 : *
1030 : * This function returns the concatenated list of sub-domains
1031 : * in one string.
1032 : *
1033 : * \return The concatenated sub-domains separated by periods.
1034 : */
1035 0 : QString snap_uri::sub_domains() const
1036 : {
1037 0 : return f_sub_domains.join(".");
1038 : }
1039 :
1040 :
1041 : /** \brief Return the number of sub-domains defined.
1042 : *
1043 : * This function defines a set of sub-domains.
1044 : *
1045 : * \return The number of sub-domains.
1046 : */
1047 0 : int snap_uri::sub_domain_count() const
1048 : {
1049 0 : return f_sub_domains.size();
1050 : }
1051 :
1052 :
1053 : /** \brief Return one of the sub-domain names.
1054 : *
1055 : * This function returns the specified domain name.
1056 : *
1057 : * \param[in] part The sub-domain name index.
1058 : *
1059 : * \return The sub-domain corresponding to the specified index.
1060 : */
1061 0 : QString snap_uri::sub_domain(int part) const
1062 : {
1063 0 : if(part < 0 || part >= f_sub_domains.size())
1064 : {
1065 0 : throw snap_uri_exception_out_of_bounds(QString("sub-domain %1 does not exist (range 0 to %2)").arg(part).arg(f_sub_domains.size()));
1066 : }
1067 0 : return f_sub_domains[part];
1068 : }
1069 :
1070 :
1071 : /** \brief Return the array of sub-domains.
1072 : *
1073 : * This function gives you a constant reference to all the sub-domains
1074 : * at once. You may use this function to make use of the list iterator,
1075 : * for example.
1076 : *
1077 : * The strings are in order as in the first is the left-most sub-domain
1078 : * (or the furthest away from the domain name.)
1079 : *
1080 : * \return A list of strings representing the sub-domains.
1081 : */
1082 0 : snap_string_list const & snap_uri::sub_domains_list() const
1083 : {
1084 0 : return f_sub_domains;
1085 : }
1086 :
1087 :
1088 : /** \brief Set the port to the specified string.
1089 : *
1090 : * This function changes the port of the URI from what it is now
1091 : * to the specified value.
1092 : *
1093 : * The port value must be a positive number or zero.
1094 : *
1095 : * Negative values or other invalid numbers generate an error.
1096 : *
1097 : * You can retrieve the port number with the get_port() function.
1098 : *
1099 : * \exception snap_uri_exception_invalid_parameter
1100 : * This function generates an exception if an invalid port is detected
1101 : * (negative, larger than 65535, or characters other than 0-9).
1102 : *
1103 : * \param[in] port The new port for this Snap URI object.
1104 : */
1105 0 : void snap_uri::set_port(QString const & port)
1106 : {
1107 : bool ok;
1108 0 : int p = port.toInt(&ok);
1109 0 : if(!ok || p < 0 || p > 65535)
1110 : {
1111 0 : throw snap_uri_exception_invalid_parameter(QString("\"%1\" is an invalid port number").arg(port));
1112 : }
1113 0 : f_port = p;
1114 0 : }
1115 :
1116 :
1117 : /** \brief Set the port to the specified string.
1118 : *
1119 : * This function changes the port of the URI from what it is now
1120 : * to the specified value.
1121 : *
1122 : * The port value must be a positive number or zero.
1123 : *
1124 : * Negative values or invalid numbers generate an error.
1125 : *
1126 : * \exception snap_uri_exception_invalid_parameter
1127 : * This function generates an exception if an invalid port is
1128 : * detected (negative or characters other than 0-9).
1129 : *
1130 : * \param[in] port The new port for this Snap URI object.
1131 : */
1132 0 : void snap_uri::set_port(int port)
1133 : {
1134 0 : if(port < 0 || port > 65535)
1135 : {
1136 0 : throw snap_uri_exception_invalid_parameter(QString("port \"%1\" is out of range (1 to 65535)").arg(port));
1137 : }
1138 0 : f_port = port;
1139 0 : }
1140 :
1141 :
1142 : /** \brief Retrieve the port number.
1143 : *
1144 : * This function returns the specific port used to access
1145 : * the server. This parameter can be used as one of the
1146 : * options used to select a specific website.
1147 : *
1148 : * \return The port as an integer.
1149 : */
1150 0 : int snap_uri::get_port() const
1151 : {
1152 0 : return f_port;
1153 : }
1154 :
1155 :
1156 : /** \brief Replace the current path.
1157 : *
1158 : * This function can be used to replace the entire path of
1159 : * the URI by starting the new path with a slash (/something).
1160 : * If the \p path parameter does not start with a slash, then
1161 : * it is used as a relative path from the existing path.
1162 : *
1163 : * A path includes parts separated by one or more slashes (/).
1164 : * The function removes parts that are just "." since these
1165 : * mean "this directory" and they would not be valid in a
1166 : * canonicalized path.
1167 : *
1168 : * A path may include one or more ".." as a path part. These
1169 : * mean remove one part prior.
1170 : *
1171 : * The ".." are accepted in any path, however, it must be
1172 : * correct in that it is not possible to use ".." without at
1173 : * least one part just before that (i.e. "/this/one/../other/one" is
1174 : * valid, but "/../that/one/is/not" since ".." from / does not
1175 : * exist. This is not how Unix usually manages paths since
1176 : * in Unix / and /.. are one and the same folder.)
1177 : *
1178 : * Note that if you wanted to make use of the hash bang feature,
1179 : * you would still make use of this function to setup your path in
1180 : * the Snap URI object. The hash bang feature determines how
1181 : * the path is handled when you get the URI with get_uri().
1182 : *
1183 : * \exception snap_uri_exception_invalid_path
1184 : * The function raises this exception if the path includes more
1185 : * ".." than there are "normal" parts on the left side of the "..".
1186 : *
1187 : * \param[in] uri_path The new path for this URI.
1188 : *
1189 : * \sa path()
1190 : */
1191 0 : void snap_uri::set_path(QString uri_path)
1192 : {
1193 : // check whether the path starts with a '/':
1194 : // if so, then we replace the existing path;
1195 : // if not, then we append uri_path to the existing path.
1196 : //
1197 0 : if((uri_path.isEmpty() || uri_path[0] != '/')
1198 0 : && !f_path.empty())
1199 : {
1200 : // append unless the user passed a path starting with "/"
1201 : // or the current path is empty
1202 0 : uri_path = f_path.join("/") + "/" + uri_path;
1203 : }
1204 :
1205 : // if the path starts with a '/' or includes a double '/'
1206 : // within itself, it will be removed because of the SkipEmptyParts
1207 0 : snap_string_list p(uri_path.split('/', QString::SkipEmptyParts));
1208 :
1209 : // next we remove all ".." (and the previous part); if ".." was
1210 : // at the start of the path, then an exception is raised
1211 : //
1212 0 : int max_parts(p.size());
1213 0 : for(int i(0); i < max_parts; ++i)
1214 : {
1215 0 : if(p[i] == ".")
1216 : {
1217 : // canonalization includes removing "." parts which are
1218 : // viewed exactly as empty parts
1219 0 : p.removeAt(i);
1220 0 : --i;
1221 0 : --max_parts;
1222 : }
1223 0 : else if(p[i] == "..")
1224 : {
1225 : // note: max should not be less than 2 if i != 0
1226 0 : if(i == 0 || max_parts < 2)
1227 : {
1228 0 : throw snap_uri_exception_invalid_path(QString("path \"%1\" is not valid (it includes too many \"..\")").arg(uri_path));
1229 : }
1230 0 : p.removeAt(i);
1231 0 : p.removeAt(--i);
1232 0 : --i;
1233 0 : max_parts -= 2;
1234 : }
1235 : }
1236 :
1237 : // the input was valid, save the new result
1238 0 : f_path.swap(p);
1239 0 : }
1240 :
1241 :
1242 : /** \brief Return the full path.
1243 : *
1244 : * This function returns the full concatenated path of the URI.
1245 : *
1246 : * The function encodes the path appropriately. The path can thus be
1247 : * used anywhere an encoded path is accepted. The encoding can be
1248 : * avoided by setting the \p encoded flag to false.
1249 : *
1250 : * Note that a non encoded path may include / characters instead of
1251 : * the %2F encoded character and thus not match the internal path.
1252 : *
1253 : * \note
1254 : * The URL encode will not encode the ~ character which is at times
1255 : * used for user references (~username/...).
1256 : *
1257 : * \warning
1258 : * The result of the function returns what looks like a relative path.
1259 : * This is useful since in many cases you need to remove the starting
1260 : * slash, so we avoid adding it in the first place. If there is no path,
1261 : * the function returns the empty string ("").
1262 : *
1263 : * \param[in] encoded Should the resulting path be URL encoded already?
1264 : * By default the path is URL encoded as expected by the HTTP protocol.
1265 : *
1266 : * \return The full path of the URI.
1267 : */
1268 0 : QString snap_uri::path(bool encoded) const
1269 : {
1270 0 : if(encoded)
1271 : {
1272 0 : QString output;
1273 0 : bool first(true);
1274 0 : for(snap_string_list::const_iterator it(f_path.begin());
1275 0 : it != f_path.end(); ++it)
1276 : {
1277 0 : if(first)
1278 : {
1279 0 : first = false;
1280 : }
1281 : else
1282 : {
1283 0 : output += '/';
1284 : }
1285 0 : output += urlencode(*it, "~");
1286 : }
1287 0 : return output;
1288 : }
1289 0 : return f_path.join("/");
1290 : }
1291 :
1292 :
1293 : /** \brief Retrieve the number of folder names defined in the path.
1294 : *
1295 : * This function returns the number of folder names defined in the
1296 : * path. Each name can be retrieved with the path_folder() function.
1297 : *
1298 : * The function may return 0 if no folder name is available.
1299 : *
1300 : * \return The number of folder names available.
1301 : *
1302 : * \sa path_folder()
1303 : */
1304 0 : int snap_uri::path_count() const
1305 : {
1306 0 : return f_path.size();
1307 : }
1308 :
1309 :
1310 : /** \brief Get a folder name from the path.
1311 : *
1312 : * This function is used to retrieve the name of a specific folder.
1313 : * This is useful when you make use of a folder name as a dynamic
1314 : * name. For example with a path such as "journal/george",
1315 : * path_folder_name(1); returns "george" which may be the name of
1316 : * the journal owner.
1317 : *
1318 : * When you use this function to retrieve dynamic entries, it is
1319 : * assumed that you do it after the path options were removed so a
1320 : * path such as "en/journal/george" would be changed to
1321 : * "journal/george" and path_folder_name(1); would still return
1322 : * "george".
1323 : *
1324 : * \exception snap_uri_exception_out_of_bounds
1325 : * This function raises this exception if the \p part parameter is
1326 : * outside the range of folder names available. \p part should be
1327 : * between 0 and path_count() - 1. If the path is empty, then this
1328 : * function cannot be called.
1329 : *
1330 : * \param[in] part The index of the folder to retrieve.
1331 : *
1332 : * \return The folder name.
1333 : *
1334 : * \sa path_count();
1335 : */
1336 0 : QString snap_uri::path_folder_name(int part) const
1337 : {
1338 0 : if(part < 0 || part >= f_path.size())
1339 : {
1340 0 : throw snap_uri_exception_out_of_bounds(QString("no path section %1 available (range 0 to %2)").arg(part).arg(f_path.size()));
1341 : }
1342 0 : return f_path[part];
1343 : }
1344 :
1345 :
1346 : /** \brief The array of folder names.
1347 : *
1348 : * This function returns a reference to the array used to hold the
1349 : * folder names forming the URI path.
1350 : *
1351 : * \return A constant reference to the list of string forming the path.
1352 : */
1353 0 : snap_string_list const & snap_uri::path_list() const
1354 : {
1355 0 : return f_path;
1356 : }
1357 :
1358 :
1359 : /** \brief Set an option.
1360 : *
1361 : * This function is used to define the value of an option in a URI.
1362 : * Remember that options only work for URIs that are clearly marked
1363 : * as from this website.
1364 : *
1365 : * Setting the value to an empty string has the effect of deleting
1366 : * the given option. You may also call the unset_option() function.
1367 : *
1368 : * \param[in] name The name of the option to set.
1369 : * \param[in] value The new value for this option.
1370 : *
1371 : * \sa option();
1372 : * \sa unset_option();
1373 : */
1374 0 : void snap_uri::set_option(QString const& name, QString const& value)
1375 : {
1376 0 : if(value.isEmpty())
1377 : {
1378 0 : f_options.remove(name);
1379 : }
1380 : else
1381 : {
1382 0 : f_options[name] = value;
1383 : }
1384 0 : }
1385 :
1386 : /** \brief Remove the specified option.
1387 : *
1388 : * This function is used to remove (delete) an option from the list
1389 : * of options. For example, going to a page where the language is
1390 : * neutral, you probably want to remove the language option.
1391 : *
1392 : * \param[in] name The name of the option to remove.
1393 : *
1394 : * \sa set_option();
1395 : */
1396 0 : void snap_uri::unset_option(QString const& name)
1397 : {
1398 0 : f_options.remove(name);
1399 0 : }
1400 :
1401 :
1402 : /** \brief Retrieve the value of the named option.
1403 : *
1404 : * This function retrieves the current value of the named option.
1405 : *
1406 : * If the option is not defined, then the function returns an empty
1407 : * string. The empty string always represents an undefined option.
1408 : *
1409 : * \param[in] name The name of the option to retrieve.
1410 : *
1411 : * \return The value of the named option.
1412 : *
1413 : * \sa set_option();
1414 : */
1415 0 : QString snap_uri::option(QString const& name) const
1416 : {
1417 0 : if(f_options.contains(name))
1418 : {
1419 0 : return f_options[name];
1420 : }
1421 0 : return "";
1422 : }
1423 :
1424 :
1425 : /** \brief Retrieve the number of currently defined options.
1426 : *
1427 : * This function returns the number of options that can be retrieved
1428 : * with the option() function using an index. If the function returns
1429 : * zero, then no options are defined.
1430 : *
1431 : * \return The number of options defined in this URI.
1432 : */
1433 0 : int snap_uri::option_count() const
1434 : {
1435 0 : return f_options.size();
1436 : }
1437 :
1438 :
1439 : /** \brief Retrieve an option by index.
1440 : *
1441 : * This function allows you to retrieve the name and value of an option
1442 : * using its index. The index (\p part) must be a number between 0 and
1443 : * option_count() - 1.
1444 : *
1445 : * \param[in] part The index of the option to retrieve.
1446 : * \param[out] name The name of the option being retrieved.
1447 : *
1448 : * \return The value of the option being retrieved.
1449 : *
1450 : * \sa option();
1451 : * \sa option_count();
1452 : */
1453 0 : QString snap_uri::option(int part, QString& name) const
1454 : {
1455 0 : if(part < 0 || part >= f_options.size())
1456 : {
1457 0 : throw snap_uri_exception_out_of_bounds(QString("no option %1 available (range 0 to %2)").arg(part).arg(f_options.size()));
1458 : }
1459 0 : auto it(f_options.begin() + part);
1460 0 : name = it.key();
1461 0 : return it.value();
1462 : }
1463 :
1464 :
1465 : /** \brief Retrieve the map of options.
1466 : *
1467 : * This function returns the map of options so one can use the begin()
1468 : * and end() functions to go through the entire list without having to
1469 : * use the option() function.
1470 : *
1471 : * \return A constant reference to the map of options.
1472 : *
1473 : * \sa option();
1474 : */
1475 0 : snap_uri::snap_uri_options_t const& snap_uri::options_list() const
1476 : {
1477 0 : return f_options;
1478 : }
1479 :
1480 :
1481 : /** \brief Set a query string option.
1482 : *
1483 : * This function is used to change the named query string with the
1484 : * specified value.
1485 : *
1486 : * A query string option with an empty string as a value is considered
1487 : * undefined and is not shown on the final URI. So setting an option to
1488 : * the empty string ("") is equivalent to unset_query_option().
1489 : *
1490 : * \param[in] name The name of the query string option.
1491 : * \param[in] value The value of the query string option.
1492 : */
1493 0 : void snap_uri::set_query_option(QString const& name, QString const& value)
1494 : {
1495 0 : if(name.isEmpty())
1496 : {
1497 : // this happens if the name was not defined in the configuration file
1498 0 : return;
1499 : }
1500 :
1501 : // TODO: see whether we currently use this feature, because it is rather
1502 : // incorrect, it is possible to have an empty value in a query
1503 : // string (i.e. "...?logout")
1504 : //
1505 : // we should use unset_query_option() instead
1506 : //
1507 0 : if(value.isEmpty())
1508 : {
1509 0 : f_query_strings.remove(name);
1510 : }
1511 : else
1512 : {
1513 0 : f_query_strings[name] = value;
1514 : }
1515 : }
1516 :
1517 :
1518 : /** \brief Unset the named query string option.
1519 : *
1520 : * This function ensures that the named query string option is deleted
1521 : * and thus will not appear in the URI.
1522 : *
1523 : * \param[in] name The name of the option to delete.
1524 : */
1525 0 : void snap_uri::unset_query_option(QString const& name)
1526 : {
1527 0 : if(name.isEmpty())
1528 : {
1529 : // this happens if the name was not defined in the configuration file
1530 0 : return;
1531 : }
1532 :
1533 0 : f_query_strings.remove(name);
1534 : }
1535 :
1536 :
1537 : /** \brief Set the query string.
1538 : *
1539 : * This function can be used to reset the query string to the
1540 : * parameters defined in this URI query string.
1541 : *
1542 : * The function does not clear all the existing query strings,
1543 : * it only replaces existing entries. This means also means that
1544 : * it does not detect whether the input includes the same option
1545 : * more than once and only the last one sticks.
1546 : *
1547 : * The query string variable names and data gets URL decoded.
1548 : *
1549 : * \warning
1550 : * This function does not clear the existing list of query
1551 : * string options.
1552 : *
1553 : * \param[in] uri_query_string The query string to add to the existing data.
1554 : */
1555 0 : void snap_uri::set_query_string(QString const & uri_query_string)
1556 : {
1557 0 : snap_string_list const value_pairs(uri_query_string.split('&', QString::SkipEmptyParts));
1558 0 : for(snap_string_list::const_iterator it(value_pairs.begin());
1559 0 : it != value_pairs.end();
1560 : ++it)
1561 : {
1562 0 : int const pos(it->indexOf('='));
1563 0 : if(pos == -1)
1564 : {
1565 : // no value
1566 0 : f_query_strings[urldecode(*it)] = QString();
1567 : }
1568 0 : else if(pos == 0)
1569 : {
1570 : // name is missing, use "*" instead
1571 0 : f_query_strings["*"] = urldecode(*it);
1572 : }
1573 : else
1574 : {
1575 0 : f_query_strings[urldecode(it->mid(0, pos))] = urldecode(it->mid(pos + 1));
1576 : }
1577 : }
1578 0 : }
1579 :
1580 :
1581 : /** \brief Clear all query option strings.
1582 : *
1583 : * This is useful if you want to "start fresh" with the base URI.
1584 : */
1585 0 : void snap_uri::clear_query_options()
1586 : {
1587 0 : f_query_strings.clear();
1588 0 : }
1589 :
1590 :
1591 : /** \brief Generate the query string.
1592 : *
1593 : * This function goes through the list of defined query string options
1594 : * and builds the resulting query string to generate the final URI.
1595 : *
1596 : * The result is already URL ecoded since you would otherwise not know
1597 : * where/which equal and ampersand are legal.
1598 : *
1599 : * \return The URI query string.
1600 : */
1601 0 : QString snap_uri::query_string() const
1602 : {
1603 0 : QString result;
1604 0 : for(snap_uri_options_t::const_iterator it(f_query_strings.begin());
1605 0 : it != f_query_strings.end();
1606 : ++it)
1607 : {
1608 0 : if(!result.isEmpty())
1609 : {
1610 0 : result += '&';
1611 : }
1612 0 : result += urlencode(it.key());
1613 0 : if(!it.value().isEmpty())
1614 : {
1615 : // add the value only if not empty
1616 0 : result += "=";
1617 : // we now support commas in URIs because... well... it is
1618 : // common and it won't break anything
1619 : //
1620 0 : result += urlencode(it.value(), ",");
1621 : }
1622 : }
1623 0 : return result;
1624 : }
1625 :
1626 :
1627 : /** \brief Retrieve whether a query option is defined.
1628 : *
1629 : * This function returns true if a query option is defined. Note that
1630 : * an option may be the empty string ("") and that cannot be distinguish
1631 : * from the empty string ("") returned when the query_option() function
1632 : * is used against an undefined option.
1633 : *
1634 : * \param[in] name The name of the option to query.
1635 : *
1636 : * \return true when the has_query_option() is defined.
1637 : *
1638 : * \sa query_option();
1639 : */
1640 0 : bool snap_uri::has_query_option(const QString& name) const
1641 : {
1642 0 : if(name.isEmpty())
1643 : {
1644 : // this happens if the name was not defined in the configuration file
1645 0 : return false;
1646 : }
1647 :
1648 0 : return f_query_strings.contains(name);
1649 : }
1650 :
1651 : /** \brief Retrieve a query string option.
1652 : *
1653 : * This function can be used to retrieve the current value of a query
1654 : * string option.
1655 : *
1656 : * Note that you cannot know whether an option is defined using this
1657 : * function since the function returns an empty string whether it is
1658 : * empty or undefined. Instead, use the has_query_option() function
1659 : * to determine whether an option is defined.
1660 : *
1661 : * \param[in] name Name of the query string option to return.
1662 : *
1663 : * \sa has_query_option();
1664 : */
1665 0 : QString snap_uri::query_option(const QString & name) const
1666 : {
1667 0 : if(name.isEmpty())
1668 : {
1669 : // this happens if the name was not defined in the configuration file
1670 0 : return "";
1671 : }
1672 :
1673 0 : if(f_query_strings.contains(name))
1674 : {
1675 0 : return f_query_strings[name];
1676 : }
1677 0 : return "";
1678 : }
1679 :
1680 : /** \brief Return the number of options are defined in the query string.
1681 : *
1682 : * This function returns the number of options currently defined in the
1683 : * query string. This is useful to go over the list of options with the
1684 : * query_option(int part, QString& name) function.
1685 : *
1686 : * \return The number of query string options currently defined.
1687 : */
1688 0 : int snap_uri::query_option_count() const
1689 : {
1690 0 : return f_query_strings.size();
1691 : }
1692 :
1693 : /** \brief Retrieve an option specifying its index.
1694 : *
1695 : * This function returns the name and value of the option defined at
1696 : * index \p part.
1697 : *
1698 : * The index must be between 0 and the number of options available minus
1699 : * 1 (i.e. query_options_count() - 1).
1700 : *
1701 : * \param[in] part The index of the query string option to retrieve.
1702 : * \param[out] name The name of the option at that index.
1703 : *
1704 : * \return The value of the option at that index.
1705 : *
1706 : * \sa query_option_count();
1707 : */
1708 0 : QString snap_uri::query_option(int part, QString& name) const
1709 : {
1710 0 : if(part < 0 || part >= f_query_strings.size())
1711 : {
1712 0 : throw snap_uri_exception_out_of_bounds(QString("query-option %1 does not exist (range 0 to %2)").arg(part).arg(f_query_strings.size()));
1713 : }
1714 0 : auto it(f_query_strings.begin() + part);
1715 0 : name = it.key();
1716 0 : return it.value();
1717 : }
1718 :
1719 : /** \brief Return the complete map of query strings.
1720 : *
1721 : * This function returns a reference to the internal map of query strings.
1722 : * This is useful to use the begin()/end() and other functions to go through
1723 : * the map.
1724 : *
1725 : * \return A constant reference to the internal query string map.
1726 : */
1727 0 : const snap_uri::snap_uri_options_t& snap_uri::query_string_list() const
1728 : {
1729 0 : return f_query_strings;
1730 : }
1731 :
1732 : /** \brief Define the anchor for this URI.
1733 : *
1734 : * This function is used to setup the anchor used in this URI.
1735 : *
1736 : * An anchor can be defined only if you don't plan to make use of
1737 : * the hash bang feature (see get_uri() for more info) since both
1738 : * features make use of the same technical option.
1739 : *
1740 : * The \p anchor parameter cannot include a '#' character.
1741 : *
1742 : * \note
1743 : * The anchor string can start with a bang (!) since it is legal
1744 : * in an anchor. If you are not using the hash bang feature, it
1745 : * is fine, although it may confuse some search engines.
1746 : *
1747 : * \param[in] uri_anchor The new value for the anchor.
1748 : *
1749 : * \sa get_uri()
1750 : */
1751 0 : void snap_uri::set_anchor(const QString& uri_anchor)
1752 : {
1753 0 : if(uri_anchor.indexOf('#') != -1)
1754 : {
1755 0 : throw snap_uri_exception_invalid_parameter(QString("anchor string \"%1\" cannot include a '#' character").arg(uri_anchor));
1756 : }
1757 0 : f_anchor = uri_anchor;
1758 0 : }
1759 :
1760 : /** \brief Retrieve the current anchor.
1761 : *
1762 : * This function returns a copy of the current anchor. The empty string
1763 : * represents the fact that the anchor is not defined.
1764 : *
1765 : * \return A constant reference to the anchor.
1766 : */
1767 0 : const QString& snap_uri::anchor() const
1768 : {
1769 0 : return f_anchor;
1770 : }
1771 :
1772 : /** \brief Compare two URIs against each other.
1773 : *
1774 : * This function compares two URIs and returns true if they are
1775 : * equal. The URIs are tested using what the get_uri() function
1776 : * generates which means not 100% of the information included
1777 : * in the Snap URI object.
1778 : *
1779 : * \param[in] rhs The right handside to compare this against.
1780 : *
1781 : * \return true when both URIs are equal.
1782 : */
1783 0 : bool snap_uri::operator == (const snap_uri& rhs) const
1784 : {
1785 0 : return get_uri() == rhs.get_uri();
1786 : }
1787 :
1788 : /** \brief Compare two URIs against each other.
1789 : *
1790 : * This function compares two URIs and returns true if they are
1791 : * not equal. The URIs are tested using what the get_uri() function
1792 : * generates which means not 100% of the information included
1793 : * in the Snap URI object.
1794 : *
1795 : * \param[in] rhs The right handside to compare this against.
1796 : *
1797 : * \return true when both URIs differ.
1798 : */
1799 0 : bool snap_uri::operator != (const snap_uri& rhs) const
1800 : {
1801 0 : return !operator == (rhs);
1802 : }
1803 :
1804 : /** \brief Compare two URIs against each other.
1805 : *
1806 : * This function compares two URIs and returns true if this is
1807 : * smaller than the \p rhs parameter. The URIs are tested using
1808 : * what the get_uri() function generates which means not 100% of
1809 : * the information included in the Snap URI object.
1810 : *
1811 : * \param[in] rhs The right handside to compare this against.
1812 : *
1813 : * \return true when this is smaller than rhs.
1814 : */
1815 0 : bool snap_uri::operator < (const snap_uri& rhs) const
1816 : {
1817 0 : return get_uri() < rhs.get_uri();
1818 : }
1819 :
1820 : /** \brief Compare two URIs against each other.
1821 : *
1822 : * This function compares two URIs and returns true if this is
1823 : * smaller or equal to \p rhs. The URIs are tested using
1824 : * what the get_uri() function generates which means not 100% of
1825 : * the information included in the Snap URI object.
1826 : *
1827 : * \param[in] rhs The right handside to compare this against.
1828 : *
1829 : * \return true when this is smaller or equal to rhs.
1830 : */
1831 0 : bool snap_uri::operator <= (const snap_uri& rhs) const
1832 : {
1833 0 : return get_uri() <= rhs.get_uri();
1834 : }
1835 :
1836 :
1837 : /** \brief Compare two URIs against each other.
1838 : *
1839 : * This function compares two URIs and returns true if this is
1840 : * larger than the \p rhs parameter. The URIs are tested using
1841 : * what the get_uri() function generates which means not 100% of
1842 : * the information included in the Snap URI object.
1843 : *
1844 : * \param[in] rhs The right handside to compare this against.
1845 : *
1846 : * \return true when this is larger than rhs.
1847 : */
1848 0 : bool snap_uri::operator > (const snap_uri& rhs) const
1849 : {
1850 0 : return !operator <= (rhs);
1851 : }
1852 :
1853 :
1854 : /** \brief Compare two URIs against each other.
1855 : *
1856 : * This function compares two URIs and returns true if this is
1857 : * larger or equal to \p rhs. The URIs are tested using
1858 : * what the get_uri() function generates which means not 100% of
1859 : * the information included in the Snap URI object.
1860 : *
1861 : * \param[in] rhs The right handside to compare this against.
1862 : *
1863 : * \return true when this is larger or equal to rhs.
1864 : */
1865 0 : bool snap_uri::operator >= (const snap_uri& rhs) const
1866 : {
1867 0 : return !operator < (rhs);
1868 : }
1869 :
1870 :
1871 : /** \brief Encode a URI so it is valid for HTTP.
1872 : *
1873 : * This function encodes all the characters that need to be encoded
1874 : * for a URI to be valid for the HTTP protocol.
1875 : *
1876 : * WARNING: This encodes the entire string. Remember that the string
1877 : * cannot include characters such as :, /, @, ?, =, &, #, ~ which at
1878 : * times appear in fully qualified URIs. Instead, it must be built
1879 : * piece by piece.
1880 : *
1881 : * Note that we do not encode underscores.
1882 : *
1883 : * The \p accepted parameter can be used to avoid converting certain
1884 : * characters (such as / in an anchor and ~ in a path).
1885 : *
1886 : * \param[in] uri The URI to encode.
1887 : * \param[in] accepted Extra characters accepted and not encoded. This
1888 : * parameter cannot be set to nullptr. Use "" instead if no extra characters
1889 : * are accepted.
1890 : *
1891 : * \return The encoded URI, it may be equal to the input.
1892 : */
1893 0 : QString snap_uri::urlencode(QString const & uri, char const * accepted)
1894 : {
1895 0 : QString encoded;
1896 :
1897 0 : QByteArray utf8(uri.toUtf8());
1898 0 : for(const char *u(utf8.data()); *u != '\0'; ++u)
1899 : {
1900 0 : if((*u >= 'A' && *u <= 'Z')
1901 0 : || (*u >= 'a' && *u <= 'z')
1902 0 : || (*u >= '0' && *u <= '9')
1903 0 : || *u == '.' || *u == '-' || *u == '_'
1904 0 : || strchr(accepted, *u) != nullptr)
1905 : {
1906 0 : encoded += *u;
1907 : }
1908 : else
1909 : {
1910 : // note that we are encoding space as %20 and not +
1911 : // because the + should not be supported anymore
1912 0 : encoded += '%';
1913 0 : QString v(QString("%1").arg(*u & 255, 2, 16, QLatin1Char('0')));
1914 0 : encoded += v;
1915 : }
1916 : }
1917 :
1918 0 : return encoded;
1919 : }
1920 :
1921 :
1922 : /** \brief Decode a URI so it can be used internally.
1923 : *
1924 : * This function decodes all the characters that need to be decoded
1925 : * in a URI. In general, this is done to use URI components in a
1926 : * query string, although it needs to be applied to the entire URI.
1927 : *
1928 : * The input is expected to be a valid ASCII string (i.e. A-Z,
1929 : * 0-9, ., %, _, -, ~, and ! characters.) To enter UTF-8 characters,
1930 : * use the % and UTF-8 encoded characters. At this point we do not
1931 : * support the U+ syntax which MS Internet Explorer supports. It may
1932 : * be necessary to add that support at some point.
1933 : *
1934 : * \exception snap_uri_exception_invalid_uri
1935 : * This exception is raised if an invalid character is found in the
1936 : * input URI. This means the URI includes a character that should
1937 : * have been encoded or a %XX is not a valid hexadecimal number.
1938 : *
1939 : * \param[in] uri The URI to encode.
1940 : * \param[in] relax Relax the syntax and accept otherwise invalid codes.
1941 : *
1942 : * \return The decoded URI, it may be equal to the input.
1943 : */
1944 0 : QString snap_uri::urldecode(QString const & uri, bool relax)
1945 : {
1946 : // Note that if the URI is properly encoded, then latin1 == UTF-8
1947 0 : QByteArray input(uri.toUtf8());
1948 :
1949 0 : QByteArray utf8;
1950 0 : for(char const * u(input.data()); *u != '\0'; ++u)
1951 : {
1952 0 : if(*u == '+')
1953 : {
1954 0 : utf8 += ' ';
1955 : }
1956 0 : else if(*u == '%')
1957 : {
1958 0 : ++u;
1959 : char c;
1960 0 : if(u[0] >= '0' && u[0] <= '9')
1961 : {
1962 0 : c = static_cast<char>((u[0] - '0') * 16);
1963 : }
1964 0 : else if(u[0] >= 'A' && u[0] <= 'F')
1965 : {
1966 0 : c = static_cast<char>((u[0] - ('A' - 10)) * 16);
1967 : }
1968 0 : else if(u[0] >= 'a' && u[0] <= 'f')
1969 : {
1970 0 : c = static_cast<char>((u[0] - ('a' - 10)) * 16);
1971 : }
1972 : else
1973 : {
1974 0 : if(!relax)
1975 : {
1976 : //#ifdef DEBUG
1977 : //SNAP_LOG_TRACE() << "url decode?! [" << uri << "]";
1978 : //#endif
1979 0 : throw snap_uri_exception_invalid_uri(QString("urldecode(\"%1\", %2) failed because of an invalid %%xx character (digits are %3 / %4)")
1980 0 : .arg(uri)
1981 0 : .arg(relax ? "true" : "false")
1982 0 : .arg(static_cast<int>(u[0]))
1983 0 : .arg(static_cast<int>(u[1])));
1984 : }
1985 : // use the % as is
1986 0 : utf8 += '%';
1987 0 : --u;
1988 0 : continue;
1989 : }
1990 0 : if(u[1] >= '0' && u[1] <= '9')
1991 : {
1992 0 : c = static_cast<char>(c + u[1] - '0');
1993 : }
1994 0 : else if(u[1] >= 'A' && u[1] <= 'F')
1995 : {
1996 0 : c = static_cast<char>(c + u[1] - ('A' - 10));
1997 : }
1998 0 : else if(u[1] >= 'a' && u[1] <= 'f')
1999 : {
2000 0 : c = static_cast<char>(c + u[1] - ('a' - 10));
2001 : }
2002 : else
2003 : {
2004 0 : if(!relax)
2005 : {
2006 : //#ifdef DEBUG
2007 : //SNAP_LOG_TRACE() << "url decode?! [" << uri << "] (2)";
2008 : //#endif
2009 0 : throw snap_uri_exception_invalid_uri(QString("urldecode(\"%1\", %2) failed because of an invalid %%xx character (digits are %3 / %4)")
2010 0 : .arg(uri)
2011 0 : .arg(relax ? "true" : "false")
2012 0 : .arg(static_cast<int>(u[0]))
2013 0 : .arg(static_cast<int>(u[1])));
2014 : }
2015 : // use the % as is
2016 0 : utf8 += c;
2017 0 : --u;
2018 0 : continue;
2019 : }
2020 : // skip one of the two characters here, the other
2021 : // is skipped in the for() statement
2022 0 : ++u;
2023 0 : utf8 += c;
2024 : }
2025 0 : else if(relax
2026 :
2027 : // these are the only characters allowed by the RFC
2028 0 : || (*u >= 'A' && *u <= 'Z')
2029 0 : || (*u >= 'a' && *u <= 'z')
2030 0 : || (*u >= '0' && *u <= '9')
2031 0 : || *u == '.' || *u == '-'
2032 0 : || *u == '/' || *u == '_'
2033 :
2034 : // not legal in a URI considered 100% valid but most
2035 : // systems accept the following as is so we do too
2036 0 : || *u == '~' || *u == '!'
2037 0 : || *u == '@' || *u == ','
2038 0 : || *u == ';' || *u == ':'
2039 0 : || *u == '(' || *u == ')'
2040 : )
2041 : {
2042 : // The tilde (~), when used, is often to indicate a user a la
2043 : // Unix (~<name>/... or just ~/... for the current user.)
2044 : //
2045 : // The exclamation point (!) is most often used with the hash
2046 : // bang; if that appears in a query string variable, then we
2047 : // need to accept at least the exclamation point (the hash has
2048 : // to be encoded no matter what.)
2049 : //
2050 : // The at sign (@) is used in email addresses.
2051 : //
2052 : // The comma (,) is often used to separate elements; for example
2053 : // the paging support uses "page=p3,s30" for show page 3 with
2054 : // 30 elements per page.
2055 : //
2056 : // The semi-colon (;) may appear if you have an HTML entity in
2057 : // a query string (i.e. "...?value=this+%26amp;+that".)
2058 : //
2059 : // The colon (:) can be used to separate values within a
2060 : // parameter when the comma is not appropriate.
2061 : //
2062 0 : utf8 += *u;
2063 : }
2064 : else
2065 : {
2066 : //#ifdef DEBUG
2067 : //SNAP_LOG_TRACE() << "url decode?! found an invalid character [" << uri << "] (3)";
2068 : //#endif
2069 0 : throw snap_uri_exception_invalid_uri(QString("urldecode(\"%1\", %2) failed because of an invalid character (%3)")
2070 0 : .arg(uri)
2071 0 : .arg(relax ? "true" : "false")
2072 0 : .arg(static_cast<int>(*u)));
2073 : }
2074 : }
2075 :
2076 0 : return QString::fromUtf8(utf8.data());
2077 : }
2078 :
2079 :
2080 : /** \brief Return the port corresponding to a protocol.
2081 : *
2082 : * This function determines what port corresponds to a given protocol
2083 : * assuming that the default is being used.
2084 : *
2085 : * It will handle common protocols internally, others make use of the
2086 : * /etc/services file via the services function calls.
2087 : *
2088 : * \param[in] protocol The protocol to convert to a port number.
2089 : *
2090 : * \return The corresponding port number or -1 if the function cannot
2091 : * determine that number.
2092 : */
2093 0 : int snap_uri::protocol_to_port(QString const & protocol)
2094 : {
2095 0 : if(protocol == "http") // 99% so put it first
2096 : {
2097 0 : return 80;
2098 : }
2099 0 : if(protocol == "https") // 0.9% so put it next
2100 : {
2101 0 : return 443;
2102 : }
2103 0 : if(protocol == "ftp")
2104 : {
2105 0 : return 21;
2106 : }
2107 0 : if(protocol == "ssh")
2108 : {
2109 0 : return 22;
2110 : }
2111 0 : if(protocol == "telnet")
2112 : {
2113 0 : return 23;
2114 : }
2115 0 : if(protocol == "smtp")
2116 : {
2117 0 : return 25;
2118 : }
2119 0 : if(protocol == "gopher")
2120 : {
2121 0 : return 70;
2122 : }
2123 :
2124 : // not a common service, ask the system... (probably less than 0.01%)
2125 0 : QByteArray p(protocol.toUtf8());
2126 0 : servent *s = getservbyname(p.data(), "tcp");
2127 0 : if(s == nullptr)
2128 : {
2129 0 : s = getservbyname(p.data(), "udp");
2130 0 : if(s == nullptr)
2131 : {
2132 : // we don't know...
2133 0 : return -1;
2134 : }
2135 : }
2136 0 : return s->s_port;
2137 : }
2138 :
2139 :
2140 :
2141 :
2142 0 : void domain_variable::read(QtSerialization::QReader& r)
2143 : {
2144 0 : QtSerialization::QComposite comp;
2145 0 : QtSerialization::QFieldInt32 type(comp, "domain_variable::type", f_type);
2146 0 : QtSerialization::QFieldString name(comp, "domain_variable::name", f_name);
2147 0 : QtSerialization::QFieldString value(comp, "domain_variable::value", f_value);
2148 0 : QtSerialization::QFieldString default_value(comp, "domain_variable::default", f_default);
2149 0 : QtSerialization::QFieldBasicType<bool> required(comp, "domain_variable::required", f_required);
2150 0 : r.read(comp);
2151 0 : }
2152 :
2153 :
2154 0 : void domain_variable::write(QtSerialization::QWriter& w) const
2155 : {
2156 0 : QtSerialization::QWriter::QTag tag(w, "domain_variable");
2157 0 : QtSerialization::writeTag(w, "domain_variable::type", f_type);
2158 0 : QtSerialization::writeTag(w, "domain_variable::name", f_name);
2159 0 : QtSerialization::writeTag(w, "domain_variable::value", f_value);
2160 0 : switch(f_type)
2161 : {
2162 0 : case DOMAIN_VARIABLE_TYPE_WEBSITE:
2163 : case DOMAIN_VARIABLE_TYPE_FLAG_WITH_DEFAULT:
2164 0 : QtSerialization::writeTag(w, "domain_variable::default", f_default);
2165 0 : break;
2166 :
2167 0 : default:
2168 : // no default value
2169 0 : break;
2170 :
2171 : }
2172 0 : if(f_required)
2173 : {
2174 0 : QtSerialization::writeTag(w, "domain_variable::required", f_required);
2175 : }
2176 0 : }
2177 :
2178 :
2179 0 : void domain_info::read(QtSerialization::QReader& r)
2180 : {
2181 0 : QtSerialization::QComposite comp;
2182 0 : QtSerialization::QFieldString name(comp, "domain_info::name", f_name);
2183 0 : QtSerialization::QFieldTag vars(comp, "domain_variable", this);
2184 0 : r.read(comp);
2185 0 : }
2186 :
2187 :
2188 0 : void domain_info::readTag(const QString& name, QtSerialization::QReader& r)
2189 : {
2190 0 : if(name == "domain_variable")
2191 : {
2192 : // create a variable with an invalid name
2193 0 : QSharedPointer<domain_variable> var(new domain_variable(domain_variable::DOMAIN_VARIABLE_TYPE_STANDARD, "***", ""));
2194 : // read the data from the reader
2195 0 : var->read(r);
2196 : // add to the variable vector
2197 0 : add_var(var);
2198 : }
2199 0 : }
2200 :
2201 :
2202 0 : void domain_info::write(QtSerialization::QWriter& w) const
2203 : {
2204 0 : QtSerialization::QWriter::QTag tag(w, "domain_info");
2205 0 : QtSerialization::writeTag(w, "domain_info::name", f_name);
2206 0 : int max_vars(f_vars.size());
2207 0 : for(int i(0); i < max_vars; ++i)
2208 : {
2209 0 : f_vars[i]->write(w);
2210 : }
2211 0 : }
2212 :
2213 :
2214 0 : void domain_rules::read(QtSerialization::QReader& r)
2215 : {
2216 0 : QtSerialization::QComposite comp;
2217 0 : QtSerialization::QFieldTag rules(comp, "domain_rules", this);
2218 0 : r.read(comp);
2219 0 : }
2220 :
2221 :
2222 0 : void domain_rules::readTag(const QString& name, QtSerialization::QReader& r)
2223 : {
2224 0 : if(name == "domain_rules")
2225 : {
2226 0 : QtSerialization::QComposite comp;
2227 0 : QtSerialization::QFieldTag info(comp, "domain_info", this);
2228 0 : r.read(comp);
2229 : }
2230 0 : else if(name == "domain_info")
2231 : {
2232 : // create a variable with an invalid name
2233 0 : QSharedPointer<domain_info> info(new domain_info);
2234 : // read the data from the reader
2235 0 : info->read(r);
2236 : // add the info to the rules
2237 0 : add_info(info);
2238 : }
2239 0 : }
2240 :
2241 :
2242 0 : void domain_rules::write(QtSerialization::QWriter& w) const
2243 : {
2244 0 : QtSerialization::QWriter::QTag tag(w, "domain_rules");
2245 0 : int max_info(f_info.size());
2246 0 : for(int i(0); i < max_info; ++i)
2247 : {
2248 0 : f_info[i]->write(w);
2249 : }
2250 0 : }
2251 :
2252 :
2253 :
2254 :
2255 : // the following uses the parser very heavily
2256 : using namespace parser;
2257 :
2258 :
2259 : /** \brief Callback function executed when a qualified name is reduced.
2260 : *
2261 : * Concatenate the qualification and the remainder of the name and save
2262 : * the result in the first token so it looks exactly like a non-qualified
2263 : * name.
2264 : *
2265 : * \param[in] r The rule that generated the callback.
2266 : * \param[in] t The token node holding the data parsed so far.
2267 : */
2268 0 : void domain_set_qualified_name(const rule& r, QSharedPointer<token_node>& t)
2269 : {
2270 0 : NOTUSED(r);
2271 :
2272 0 : QSharedPointer<parser::token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2273 0 : (*t)[0]->set_value((*n)[0]->get_value().toString() + "::" + (*t)[2]->get_value().toString());
2274 0 : }
2275 :
2276 : /** \brief Callback function executed when a standard variable is reduced.
2277 : *
2278 : * This function creates a domain variable of type Standard
2279 : * (DOMAIN_VARIABLE_TYPE_STANDARD) and save it's name and value
2280 : * in the variable.
2281 : *
2282 : * The variable is then saved in the node as user data.
2283 : *
2284 : * \param[in] r The rule that generated the callback.
2285 : * \param[in] t The token node holding the data parsed so far.
2286 : */
2287 0 : void domain_set_standard_var(const rule& r, QSharedPointer<token_node>& t)
2288 : {
2289 0 : NOTUSED(r);
2290 :
2291 : // get the node where the qualified name is defined
2292 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2293 :
2294 0 : QSharedPointer<domain_variable> v(new domain_variable(domain_variable::DOMAIN_VARIABLE_TYPE_STANDARD, (*n)[0]->get_value().toString(), (*t)[2]->get_value().toString()));
2295 0 : t->set_user_data(v);
2296 0 : }
2297 :
2298 : /** \brief Callback function executed when a website variable is reduced.
2299 : *
2300 : * This function creates a domain variable of type Website
2301 : * (DOMAIN_VARIABLE_TYPE_WEBSITE) and save it's name and value
2302 : * in the variable.
2303 : *
2304 : * The variable is then saved in the node as user data.
2305 : *
2306 : * \param[in] r The rule that generated the callback.
2307 : * \param[in] t The token node holding the data parsed so far.
2308 : */
2309 0 : void domain_set_website_var(const rule& r, QSharedPointer<token_node>& t)
2310 : {
2311 0 : NOTUSED(r);
2312 :
2313 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2314 0 : QSharedPointer<domain_variable> v(new domain_variable(domain_variable::DOMAIN_VARIABLE_TYPE_WEBSITE, (*n)[0]->get_value().toString(), (*t)[4]->get_value().toString()));
2315 0 : v->set_default((*t)[6]->get_value().toString());
2316 0 : t->set_user_data(v);
2317 0 : }
2318 :
2319 : /** \brief Callback function executed when a flag variable is reduced.
2320 : *
2321 : * This function creates a domain variable of type Flag
2322 : * and save it's name and value in the variable.
2323 : *
2324 : * The type of the variable is set to DOMAIN_VARIABLE_TYPE_FLAG_NO_DEFAULT
2325 : * if the flag definition does not have a second parameter.
2326 : *
2327 : * The type is set to DOMAIN_VARIABLE_TYPE_FLAG_WITH_DEFAULT when a default
2328 : * is found.
2329 : *
2330 : * The variable is then saved in the node as user data.
2331 : *
2332 : * \param[in] r The rule that generated the callback.
2333 : * \param[in] t The token node holding the data parsed so far.
2334 : */
2335 0 : void domain_set_flag_var(const rule& r, QSharedPointer<token_node>& t)
2336 : {
2337 0 : NOTUSED(r);
2338 :
2339 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2340 0 : QSharedPointer<token_node> o(qSharedPointerDynamicCast<token_node, token>((*t)[5]));
2341 :
2342 : // if o starts with an empty token then it's the empty rule
2343 : // (if not the empty rule, it is the comma)
2344 0 : bool is_empty = (*o)[0]->get_id() == token_t::TOKEN_ID_EMPTY_ENUM;
2345 :
2346 0 : domain_variable::domain_variable_type_t type(is_empty ?
2347 : domain_variable::DOMAIN_VARIABLE_TYPE_FLAG_NO_DEFAULT
2348 : : domain_variable::DOMAIN_VARIABLE_TYPE_FLAG_WITH_DEFAULT);
2349 : QSharedPointer<domain_variable> v(new domain_variable(type,
2350 0 : (*n)[0]->get_value().toString(), (*t)[4]->get_value().toString()));
2351 0 : if(!is_empty)
2352 : {
2353 : // there is a default so we can access the next token
2354 0 : v->set_default((*o)[1]->get_value().toString());
2355 : }
2356 0 : t->set_user_data(v);
2357 0 : }
2358 :
2359 : /** \brief Callback function executed when a sub-domain is reduced.
2360 : *
2361 : * This function marks the sub-domain variable as required.
2362 : *
2363 : * The variable is then saved in the node as user data.
2364 : *
2365 : * \param[in] r The rule that generated the callback.
2366 : * \param[in] t The token node holding the data parsed so far.
2367 : */
2368 0 : void domain_set_var_required(const rule& r, QSharedPointer<token_node>& t)
2369 : {
2370 0 : NOTUSED(r);
2371 :
2372 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[1]));
2373 0 : QSharedPointer<domain_variable> v(qSharedPointerDynamicCast<domain_variable, parser_user_data>(n->get_user_data()));
2374 0 : v->set_required();
2375 0 : t->set_user_data(v);
2376 0 : }
2377 :
2378 : /** \brief Callback function executed when a sub-domain is reduced.
2379 : *
2380 : * This function marks the sub-domain variable as optional.
2381 : *
2382 : * The variable is then saved in the node as user data.
2383 : *
2384 : * \param[in] r The rule that generated the callback.
2385 : * \param[in] t The token node holding the data parsed so far.
2386 : */
2387 0 : void domain_set_var_optional(const rule& r, QSharedPointer<token_node>& t)
2388 : {
2389 0 : NOTUSED(r);
2390 :
2391 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[1]));
2392 0 : QSharedPointer<domain_variable> v(qSharedPointerDynamicCast<domain_variable, parser_user_data>(n->get_user_data()));
2393 0 : v->set_required(false);
2394 0 : t->set_user_data(v);
2395 0 : }
2396 :
2397 : /** \brief Callback function executed when a sub-domain list is reduced.
2398 : *
2399 : * This function creates a new domain information object and adds
2400 : * the domain variable to it.
2401 : *
2402 : * The result is then saved in the node as user data.
2403 : *
2404 : * \param[in] r The rule that generated the callback.
2405 : * \param[in] t The token node holding the data parsed so far.
2406 : */
2407 0 : void domain_set_new_domain_list(const rule& r, QSharedPointer<token_node>& t)
2408 : {
2409 0 : NOTUSED(r);
2410 :
2411 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2412 0 : QSharedPointer<domain_variable> v(qSharedPointerDynamicCast<domain_variable, parser_user_data>(n->get_user_data()));
2413 0 : QSharedPointer<domain_info> info(new domain_info);
2414 0 : info->add_var(v);
2415 0 : t->set_user_data(info);
2416 0 : }
2417 :
2418 : /** \brief Callback function executed when a sub-domain list is reduced.
2419 : *
2420 : * This function adds the domain variable to an existing domain information
2421 : * object.
2422 : *
2423 : * The result is then saved in the node as user data.
2424 : *
2425 : * \param[in] r The rule that generated the callback.
2426 : * \param[in] t The token node holding the data parsed so far.
2427 : */
2428 0 : void domain_set_add_domain_list(const rule& r, QSharedPointer<token_node>& t)
2429 : {
2430 0 : NOTUSED(r);
2431 :
2432 0 : QSharedPointer<token_node> nl(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2433 0 : QSharedPointer<token_node> nr(qSharedPointerDynamicCast<token_node, token>((*t)[1]));
2434 0 : QSharedPointer<domain_info> info(qSharedPointerDynamicCast<domain_info, parser_user_data>(nl->get_user_data()));
2435 0 : QSharedPointer<domain_variable> v(qSharedPointerDynamicCast<domain_variable, parser_user_data>(nr->get_user_data()));
2436 0 : info->add_var(v);
2437 0 : t->set_user_data(info);
2438 0 : }
2439 :
2440 : /** \brief Callback function executed when the rule is reduced.
2441 : *
2442 : * This function defines the name of the rule in the domain information
2443 : * object.
2444 : *
2445 : * The result is then saved in the node as user data.
2446 : *
2447 : * \param[in] r The rule that generated the callback.
2448 : * \param[in] t The token node holding the data parsed so far.
2449 : */
2450 0 : void domain_set_rule(const rule& r, QSharedPointer<token_node>& t)
2451 : {
2452 0 : NOTUSED(r);
2453 :
2454 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[2]));
2455 0 : QSharedPointer<domain_info> info(qSharedPointerDynamicCast<domain_info, parser_user_data>(n->get_user_data()));
2456 0 : info->set_name((*t)[0]->get_value().toString());
2457 0 : t->set_user_data(info);
2458 0 : }
2459 :
2460 : /** \brief Callback function executed when the rule list is reduced.
2461 : *
2462 : * This function creates a new domain rule object and adds the
2463 : * domain information to it.
2464 : *
2465 : * The result is then saved in the node as user data.
2466 : *
2467 : * \param[in] r The rule that generated the callback.
2468 : * \param[in] t The token node holding the data parsed so far.
2469 : */
2470 0 : void domain_set_new_rule_list(const rule& r, QSharedPointer<token_node>& t)
2471 : {
2472 0 : NOTUSED(r);
2473 :
2474 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2475 0 : QSharedPointer<domain_info> info(qSharedPointerDynamicCast<domain_info, parser_user_data>(n->get_user_data()));
2476 0 : QSharedPointer<domain_rules> rules(new domain_rules);
2477 0 : rules->add_info(info);
2478 0 : t->set_user_data(rules);
2479 0 : }
2480 :
2481 : /** \brief Callback function executed when the rule list is reduced.
2482 : *
2483 : * This function adds the domain information to an existing rules
2484 : * object.
2485 : *
2486 : * The result is then saved in the node as user data.
2487 : *
2488 : * \param[in] r The rule that generated the callback.
2489 : * \param[in] t The token node holding the data parsed so far.
2490 : */
2491 0 : void domain_set_add_rule_list(const rule& r, QSharedPointer<token_node>& t)
2492 : {
2493 0 : NOTUSED(r);
2494 :
2495 0 : QSharedPointer<token_node> nl(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2496 0 : QSharedPointer<token_node> nr(qSharedPointerDynamicCast<token_node, token>((*t)[1]));
2497 0 : QSharedPointer<domain_rules> rules(qSharedPointerDynamicCast<domain_rules, parser_user_data>(nl->get_user_data()));
2498 0 : QSharedPointer<domain_info> info(qSharedPointerDynamicCast<domain_info, parser_user_data>(nr->get_user_data()));
2499 0 : rules->add_info(info);
2500 0 : t->set_user_data(rules);
2501 0 : }
2502 :
2503 : /** \brief Callback function executed when the start rule is reduced.
2504 : *
2505 : * This function saves the result, domain_rules, in the start rule user data.
2506 : *
2507 : * \param[in] r The rule that generated the callback.
2508 : * \param[in] t The token node holding the data parsed so far.
2509 : */
2510 0 : void domain_set_start_result(const rule& r, QSharedPointer<token_node>& t)
2511 : {
2512 0 : NOTUSED(r);
2513 :
2514 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2515 : // we don't need the dynamic cast since we don't need to access the rules
2516 0 : t->set_user_data(n->get_user_data());
2517 0 : }
2518 :
2519 : /** \brief Parse a Domain Rule Script
2520 : *
2521 : * This function takes a script and parses it into a set of regular expressions
2522 : * given a name and settings such as whether the expression is optional, has
2523 : * a default value, etc.
2524 : *
2525 : * Details about the domain rule script are found here:
2526 : * https://snapwebsites.org/implementation/basic-concept-url-website/url-test
2527 : *
2528 : * start: rule_list
2529 : *
2530 : * rule_list: rule
2531 : * | rule_list rule
2532 : * rule: IDENTIFIER '{' sub_domain_list '}' ';'
2533 : *
2534 : * sub_domain_list: sub_domain
2535 : * | sub_domain_list sub_domain
2536 : * sub_domain: OPTIONAL sub_domain_var ';'
2537 : * | REQUIRED sub_domain_var ';'
2538 : * sub_domain_var: qualified_name '=' STRING
2539 : * | qualified_name '=' WEBSITE(STRING, STRING)
2540 : * | qualified_name '=' FLAG(STRING [, STRING] )
2541 : *
2542 : * qualified_name: IDENTIFIER
2543 : * | qualified_name '::' IDENTIFIER
2544 : *
2545 : * The following are all the post-parsing tests that we apply to the data to
2546 : * make sure that it is valid.
2547 : *
2548 : * \li The variable qualified name must be "global::<name>", "domain::<name>",
2549 : * or "global::domain::<name>".
2550 : * \li Each variable name must be unique within one rule, ignoring the
2551 : * qualification names.
2552 : * \li A flag cannot have a default value when required.
2553 : * \li Domain names, as defined in the rule, must be unique within the entire
2554 : * definition.
2555 : * \li Variable values cannot include a regular expression that captures
2556 : * data or it will generate problems with our algorithms.
2557 : * \li Variable values must be valid regular expressions (i.e. they must be
2558 : * compilable without errors.)
2559 : *
2560 : * \param[in] script The script as written by an administrator.
2561 : * \param[out] result The resulting compressed script when the function
2562 : * returns true.
2563 : *
2564 : * \return true if the parser succeeds, false otherwise.
2565 : */
2566 0 : bool snap_uri_rules::parse_domain_rules(const QString& script, QByteArray& result)
2567 : {
2568 : // LEXER
2569 :
2570 : // lexer object
2571 0 : parser::lexer lexer;
2572 0 : lexer.set_input(script);
2573 0 : parser::keyword keyword_flag(lexer, "flag");
2574 0 : parser::keyword keyword_optional(lexer, "optional");
2575 0 : parser::keyword keyword_required(lexer, "required");
2576 0 : parser::keyword keyword_website(lexer, "website");
2577 :
2578 : // GRAMMAR
2579 0 : parser::grammar g;
2580 :
2581 : // qualified_name
2582 0 : choices qualified_name(&g, "qualified_name");
2583 0 : qualified_name >>= TOKEN_ID_IDENTIFIER // keep identifier as is
2584 0 : | qualified_name >> "::" >> TOKEN_ID_IDENTIFIER
2585 0 : >= domain_set_qualified_name
2586 : ;
2587 :
2588 : // flag_opt_param
2589 0 : choices flag_opt_param(&g, "flag_opt_param");
2590 0 : flag_opt_param >>= TOKEN_ID_EMPTY // keep as is
2591 0 : | "," >> TOKEN_ID_STRING // keep as is
2592 : ;
2593 :
2594 : // sub_domain_var
2595 0 : parser::choices sub_domain_var(&g, "sub_domain_var");
2596 0 : sub_domain_var >>= qualified_name >> "=" >> TOKEN_ID_STRING
2597 : >= domain_set_standard_var
2598 0 : | qualified_name >> "=" >> keyword_website
2599 0 : >> "(" >> TOKEN_ID_STRING >> "," >> TOKEN_ID_STRING
2600 0 : >> ")" >= domain_set_website_var
2601 0 : | qualified_name >> "=" >> keyword_flag >>
2602 0 : "(" >> TOKEN_ID_STRING >> flag_opt_param >> ")"
2603 0 : >= domain_set_flag_var
2604 : ;
2605 :
2606 : // sub_domain:
2607 0 : parser::choices sub_domain(&g, "sub_domain");
2608 0 : sub_domain >>= keyword_required >> sub_domain_var >> ";" >= domain_set_var_required
2609 0 : | keyword_optional >> sub_domain_var >> ";" >= domain_set_var_optional
2610 : ;
2611 :
2612 : // sub_domain_list:
2613 0 : parser::choices sub_domain_list(&g, "sub_domain_list");
2614 0 : sub_domain_list >>= sub_domain >= domain_set_new_domain_list
2615 0 : | sub_domain_list >> sub_domain >= domain_set_add_domain_list
2616 : ;
2617 :
2618 : // rule
2619 0 : parser::choices rule(&g, "rule");
2620 0 : rule >>= TOKEN_ID_IDENTIFIER >> "{" >> sub_domain_list >> "}"
2621 0 : >> ";" >= domain_set_rule
2622 : ;
2623 :
2624 : // rule_list
2625 0 : parser::choices rule_list(&g, "rule_list");
2626 0 : rule_list >>= rule >= domain_set_new_rule_list
2627 0 : | rule_list >> rule >= domain_set_add_rule_list
2628 : ;
2629 :
2630 : // start
2631 0 : parser::choices start(&g, "start");
2632 0 : start >>= rule_list >= domain_set_start_result;
2633 :
2634 0 : if(!g.parse(lexer, start))
2635 : {
2636 0 : f_errmsg = "parsing error";
2637 0 : return false;
2638 : }
2639 :
2640 : // if we reach here, we've got a "parser valid result"
2641 : // but there may be other problems that we check here
2642 : // . rule, domain name used twice
2643 : // . valid namespaces
2644 : // . flags with a default when marked as required
2645 : // . invalid, unacceptable regular expressions
2646 0 : QSharedPointer<token_node> r(g.get_result());
2647 0 : QSharedPointer<domain_rules> dr(qSharedPointerDynamicCast<domain_rules, parser_user_data>(r->get_user_data()));
2648 0 : QMap<QString, int> rule_names;
2649 0 : int domain_max(dr->size());
2650 0 : for(int i = 0; i < domain_max; ++i)
2651 : {
2652 0 : QSharedPointer<domain_info> info((*dr)[i]);
2653 0 : const QString& rule_name(info->get_name());
2654 0 : if(rule_names.contains(rule_name))
2655 : {
2656 : // the same domain name was defined twice
2657 0 : f_errmsg = "found two rules named \"" + rule_name + "\"";
2658 0 : return false;
2659 : }
2660 : // the map value is ignored
2661 0 : rule_names.insert(rule_name, 0);
2662 :
2663 0 : QMap<QString, int> var_names;
2664 0 : int info_max(info->size());
2665 0 : for(int j = 0; j < info_max; ++j)
2666 : {
2667 0 : QSharedPointer<domain_variable> var((*info)[j]);
2668 0 : const QString& var_name(var->get_name());
2669 0 : const snap_string_list var_qualified_names(var_name.split("::"));
2670 0 : if(var_names.contains(var_qualified_names.last()))
2671 : {
2672 : // the same domain variable name was defined twice
2673 0 : f_errmsg = "found two variables named \"" + var_name + "\"";
2674 0 : return false;
2675 : }
2676 0 : var_names.insert(var_qualified_names.last(), 1);
2677 0 : switch(var_qualified_names.size())
2678 : {
2679 0 : case 1:
2680 : // just the name no extra test necessary
2681 0 : break;
2682 :
2683 0 : case 2:
2684 : // one qualification, can be global or domain
2685 0 : if(var_qualified_names.first() != "global"
2686 0 : && var_qualified_names.first() != "domain")
2687 : {
2688 : // invalid qualification
2689 0 : f_errmsg = "incompatible qualified name \"" + var_qualified_names.first() + "\"";
2690 0 : return false;
2691 : }
2692 0 : break;
2693 :
2694 0 : case 3:
2695 : // two qualification, must be global::domain
2696 0 : if(var_qualified_names[0] != "global"
2697 0 : || var_qualified_names[1] != "domain")
2698 : {
2699 : // invalid qualification
2700 0 : f_errmsg = "incompatible qualified name \"" + var_qualified_names[0] + "::" + var_qualified_names[1] + "\"";
2701 0 : return false;
2702 : }
2703 0 : break;
2704 :
2705 0 : default:
2706 : // invalid qualification, cannot be more than 2 names
2707 0 : f_errmsg = "incompatible qualified names \"" + var_qualified_names[0] + "::" + var_qualified_names[1] + "::...\"";
2708 0 : return false;
2709 :
2710 : }
2711 0 : if(var->get_required()
2712 0 : && var->get_type() == domain_variable::DOMAIN_VARIABLE_TYPE_FLAG_WITH_DEFAULT)
2713 : {
2714 : // if a flag is required, then no default can be specified
2715 0 : f_errmsg = "a required flag cannot have a default value";
2716 0 : return false;
2717 : }
2718 : // the values cannot include the delimiters since we are to
2719 : // merge them to use them so here we take that in account
2720 0 : const QString& value(var->get_value());
2721 0 : QRegExp re(value);
2722 0 : if(!re.isValid())
2723 : {
2724 : // the regular expression is not valid!?
2725 0 : f_errmsg = "regular expression \"" + value + "\" is not valid";
2726 0 : return false;
2727 : }
2728 0 : if(re.captureCount() != 0)
2729 : {
2730 : // we do not allow users to capture anything, they have to
2731 : // use the Perl syntax: (?:<pattern>)
2732 0 : f_errmsg = "regular expression \"" + value + "\" cannot include a capture (something between parenthesis)";
2733 0 : return false;
2734 : }
2735 : }
2736 : }
2737 :
2738 : // now we can generate the result
2739 : //QDataStream archive(&result, QIODevice::WriteOnly);
2740 : //archive << *dr;
2741 :
2742 0 : QBuffer archive(&result);
2743 0 : archive.open(QIODevice::WriteOnly);
2744 0 : QtSerialization::QWriter w(archive, "domain_rules", 1, 0);
2745 0 : dr->write(w);
2746 :
2747 0 : return true;
2748 : }
2749 :
2750 :
2751 0 : void website_variable::read(QtSerialization::QReader& r)
2752 : {
2753 0 : QtSerialization::QComposite comp;
2754 0 : QtSerialization::QFieldInt32 type(comp, "website_variable::type", f_type);
2755 0 : QtSerialization::QFieldInt32 part(comp, "website_variable::part", f_part);
2756 0 : QtSerialization::QFieldString name(comp, "website_variable::name", f_name);
2757 0 : QtSerialization::QFieldString value(comp, "website_variable::value", f_value);
2758 0 : QtSerialization::QFieldString default_value(comp, "website_variable::default", f_default);
2759 0 : QtSerialization::QFieldBasicType<bool> required(comp, "website_variable::required", f_required);
2760 0 : r.read(comp);
2761 0 : }
2762 :
2763 0 : void website_variable::write(QtSerialization::QWriter& w) const
2764 : {
2765 0 : QtSerialization::QWriter::QTag tag(w, "website_variable");
2766 0 : QtSerialization::writeTag(w, "website_variable::type", f_type);
2767 0 : QtSerialization::writeTag(w, "website_variable::part", f_part);
2768 0 : QtSerialization::writeTag(w, "website_variable::name", f_name);
2769 0 : QtSerialization::writeTag(w, "website_variable::value", f_value);
2770 0 : switch(f_type)
2771 : {
2772 0 : case WEBSITE_VARIABLE_TYPE_WEBSITE:
2773 : case WEBSITE_VARIABLE_TYPE_FLAG_WITH_DEFAULT:
2774 0 : QtSerialization::writeTag(w, "website_variable::default", f_default);
2775 0 : break;
2776 :
2777 0 : default:
2778 : // not default value
2779 0 : break;
2780 :
2781 : }
2782 0 : if(f_required)
2783 : {
2784 0 : QtSerialization::writeTag(w, "website_variable::required", f_required);
2785 : }
2786 0 : }
2787 :
2788 0 : void website_info::read(QtSerialization::QReader& r)
2789 : {
2790 0 : QtSerialization::QComposite comp;
2791 0 : QtSerialization::QFieldString name(comp, "website_info::name", f_name);
2792 0 : QtSerialization::QFieldTag vars(comp, "website_variable", this);
2793 0 : r.read(comp);
2794 0 : }
2795 :
2796 0 : void website_info::readTag(const QString& name, QtSerialization::QReader& r)
2797 : {
2798 0 : if(name == "website_variable")
2799 : {
2800 : // create a variable with an invalid name
2801 0 : QSharedPointer<website_variable> var(new website_variable(website_variable::WEBSITE_VARIABLE_TYPE_STANDARD, "***", ""));
2802 : // read the data from the reader
2803 0 : var->read(r);
2804 : // add to the variable vector
2805 0 : add_var(var);
2806 : }
2807 0 : }
2808 :
2809 0 : void website_info::write(QtSerialization::QWriter& w) const
2810 : {
2811 0 : QtSerialization::QWriter::QTag tag(w, "website_info");
2812 0 : QtSerialization::writeTag(w, "website_info::name", f_name);
2813 0 : int max_vars(f_vars.size());
2814 0 : for(int i(0); i < max_vars; ++i)
2815 : {
2816 0 : f_vars[i]->write(w);
2817 : }
2818 0 : }
2819 :
2820 0 : void website_rules::read(QtSerialization::QReader& r)
2821 : {
2822 0 : QtSerialization::QComposite comp;
2823 0 : QtSerialization::QFieldTag info(comp, "website_rules", this);
2824 0 : r.read(comp);
2825 0 : }
2826 :
2827 0 : void website_rules::readTag(const QString& name, QtSerialization::QReader& r)
2828 : {
2829 0 : if(name == "website_rules")
2830 : {
2831 0 : QtSerialization::QComposite comp;
2832 0 : QtSerialization::QFieldTag info(comp, "website_info", this);
2833 0 : r.read(comp);
2834 : }
2835 0 : else if(name == "website_info")
2836 : {
2837 : // create a variable with an invalid name
2838 0 : QSharedPointer<website_info> info(new website_info);
2839 : // read the data from the reader
2840 0 : info->read(r);
2841 : // add the info to the rules
2842 0 : add_info(info);
2843 : }
2844 0 : }
2845 :
2846 0 : void website_rules::write(QtSerialization::QWriter& w) const
2847 : {
2848 0 : QtSerialization::QWriter::QTag tag(w, "website_rules");
2849 0 : int max_info(f_info.size());
2850 0 : for(int i(0); i < max_info; ++i)
2851 : {
2852 0 : f_info[i]->write(w);
2853 : }
2854 0 : }
2855 :
2856 :
2857 : /** \brief Callback function executed when a qualified name is reduced.
2858 : *
2859 : * Concatenate the qualification and the remainder of the name and save
2860 : * the result in the first token so it looks exactly like a non-qualified
2861 : * name.
2862 : *
2863 : * \param[in] r The rule that generated the callback.
2864 : * \param[in] t The token node holding the data parsed so far.
2865 : */
2866 0 : void website_set_qualified_name(const rule& r, QSharedPointer<token_node>& t)
2867 : {
2868 0 : NOTUSED(r);
2869 :
2870 0 : QSharedPointer<parser::token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2871 0 : (*t)[0]->set_value((*n)[0]->get_value().toString() + "::" + (*t)[2]->get_value().toString());
2872 0 : }
2873 :
2874 : /** \brief Callback function executed when a standard variable is reduced.
2875 : *
2876 : * This function creates a website variable of type Standard
2877 : * (WEBSITE_VARIABLE_TYPE_STANDARD) and save it's name and value
2878 : * in the variable.
2879 : *
2880 : * The variable is then saved in the node as user data.
2881 : *
2882 : * \param[in] r The rule that generated the callback.
2883 : * \param[in] t The token node holding the data parsed so far.
2884 : */
2885 0 : void website_set_standard_var(const rule& r, QSharedPointer<token_node>& t)
2886 : {
2887 0 : NOTUSED(r);
2888 :
2889 : // get the node where the qualified name is defined
2890 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2891 :
2892 0 : QSharedPointer<website_variable> v(new website_variable(website_variable::WEBSITE_VARIABLE_TYPE_STANDARD, (*n)[0]->get_value().toString(), (*t)[2]->get_value().toString()));
2893 0 : t->set_user_data(v);
2894 0 : }
2895 :
2896 : /** \brief Callback function executed when a website variable is reduced.
2897 : *
2898 : * This function creates a website variable of type Website
2899 : * (WEBSITE_VARIABLE_TYPE_WEBSITE) and save it's name and value
2900 : * in the variable.
2901 : *
2902 : * The variable is then saved in the node as user data.
2903 : *
2904 : * \param[in] r The rule that generated the callback.
2905 : * \param[in] t The token node holding the data parsed so far.
2906 : */
2907 0 : void website_set_website_var(const rule& r, QSharedPointer<token_node>& t)
2908 : {
2909 0 : NOTUSED(r);
2910 :
2911 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2912 0 : QSharedPointer<website_variable> v(new website_variable(website_variable::WEBSITE_VARIABLE_TYPE_WEBSITE, (*n)[0]->get_value().toString(), (*t)[4]->get_value().toString()));
2913 0 : v->set_default((*t)[6]->get_value().toString());
2914 0 : t->set_user_data(v);
2915 0 : }
2916 :
2917 : /** \brief Callback function executed when a flag variable is reduced.
2918 : *
2919 : * This function creates a website variable of type Flag
2920 : * and save it's name and value in the variable.
2921 : *
2922 : * The type of the variable is set to WEBSITE_VARIABLE_TYPE_FLAG_NO_DEFAULT
2923 : * if the flag definition does not have a second parameter.
2924 : *
2925 : * The type is set to WEBSITE_VARIABLE_TYPE_FLAG_WITH_DEFAULT when a default
2926 : * is found.
2927 : *
2928 : * The variable is then saved in the node as user data.
2929 : *
2930 : * \param[in] r The rule that generated the callback.
2931 : * \param[in] t The token node holding the data parsed so far.
2932 : */
2933 0 : void website_set_flag_var(const rule& r, QSharedPointer<token_node>& t)
2934 : {
2935 0 : NOTUSED(r);
2936 :
2937 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
2938 0 : QSharedPointer<token_node> o(qSharedPointerDynamicCast<token_node, token>((*t)[5]));
2939 :
2940 : // if o starts with an empty token then it's the empty rule
2941 : // (if not the empty rule, it is the comma)
2942 0 : bool is_empty = (*o)[0]->get_id() == token_t::TOKEN_ID_EMPTY_ENUM;
2943 :
2944 0 : website_variable::website_variable_type_t type(is_empty ?
2945 : website_variable::WEBSITE_VARIABLE_TYPE_FLAG_NO_DEFAULT
2946 : : website_variable::WEBSITE_VARIABLE_TYPE_FLAG_WITH_DEFAULT);
2947 : QSharedPointer<website_variable> v(new website_variable(type,
2948 0 : (*n)[0]->get_value().toString(), (*t)[4]->get_value().toString()));
2949 0 : if(!is_empty)
2950 : {
2951 : // there is a default so we can access the next token
2952 0 : v->set_default((*o)[1]->get_value().toString());
2953 : }
2954 0 : t->set_user_data(v);
2955 0 : }
2956 :
2957 : /** \brief Callback function executed when a website is reduced.
2958 : *
2959 : * This function marks the website variable as required.
2960 : *
2961 : * The variable is then saved in the node as user data.
2962 : *
2963 : * \param[in] r The rule that generated the callback.
2964 : * \param[in] t The token node holding the data parsed so far.
2965 : */
2966 0 : void website_set_var_required(const rule& r, QSharedPointer<token_node>& t)
2967 : {
2968 0 : NOTUSED(r);
2969 :
2970 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[1]));
2971 0 : QSharedPointer<website_variable> v(qSharedPointerDynamicCast<website_variable, parser_user_data>(n->get_user_data()));
2972 0 : v->set_required();
2973 0 : t->set_user_data(v);
2974 0 : }
2975 :
2976 : /** \brief Callback function executed when a website is reduced.
2977 : *
2978 : * This function marks the website variable as optional.
2979 : *
2980 : * The variable is then saved in the node as user data.
2981 : *
2982 : * \param[in] r The rule that generated the callback.
2983 : * \param[in] t The token node holding the data parsed so far.
2984 : */
2985 0 : void website_set_var_optional(const rule& r, QSharedPointer<token_node>& t)
2986 : {
2987 0 : NOTUSED(r);
2988 :
2989 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[1]));
2990 0 : QSharedPointer<website_variable> v(qSharedPointerDynamicCast<website_variable, parser_user_data>(n->get_user_data()));
2991 0 : v->set_required(false);
2992 0 : t->set_user_data(v);
2993 0 : }
2994 :
2995 : /** \brief Callback function executed when a path rule is reduced.
2996 : *
2997 : * This function marks the website variable as a path part.
2998 : *
2999 : * The result is then saved in the node as user data.
3000 : *
3001 : * \param[in] r The rule that generated the callback.
3002 : * \param[in] t The token node holding the data parsed so far.
3003 : */
3004 0 : void website_set_var_path(const rule& r, QSharedPointer<token_node>& t)
3005 : {
3006 0 : NOTUSED(r);
3007 :
3008 0 : if(t->size() == 1)
3009 : {
3010 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
3011 0 : QSharedPointer<website_variable> v(qSharedPointerDynamicCast<website_variable, parser_user_data>(n->get_user_data()));
3012 : //v->set_part(website_variable::WEBSITE_VARIABLE_PART_PATH); -- this is the default anyway
3013 0 : t->set_user_data(v);
3014 : }
3015 : else
3016 : {
3017 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[1]));
3018 0 : QSharedPointer<website_variable> v(qSharedPointerDynamicCast<website_variable, parser_user_data>(n->get_user_data()));
3019 : //v->set_part(website_variable::WEBSITE_VARIABLE_PART_PATH); -- this is the default anyway
3020 0 : t->set_user_data(v);
3021 : }
3022 0 : }
3023 :
3024 : /** \brief Callback function executed when a port rule is reduced.
3025 : *
3026 : * This function marks the website variable as a port part.
3027 : *
3028 : * The result is then saved in the node as user data.
3029 : *
3030 : * \param[in] r The rule that generated the callback.
3031 : * \param[in] t The token node holding the data parsed so far.
3032 : */
3033 0 : void website_set_var_port(const rule& r, QSharedPointer<token_node>& t)
3034 : {
3035 0 : NOTUSED(r);
3036 :
3037 : QSharedPointer<website_variable> v(new website_variable(website_variable::WEBSITE_VARIABLE_TYPE_STANDARD,
3038 0 : "port", (*t)[2]->get_value().toString()));
3039 0 : v->set_part(website_variable::WEBSITE_VARIABLE_PART_PORT);
3040 0 : v->set_required();
3041 0 : t->set_user_data(v);
3042 0 : }
3043 :
3044 : /** \brief Callback function executed when a query rule is reduced.
3045 : *
3046 : * This function marks the website variable as a query part.
3047 : *
3048 : * The result is then saved in the node as user data.
3049 : *
3050 : * \param[in] r The rule that generated the callback.
3051 : * \param[in] t The token node holding the data parsed so far.
3052 : */
3053 0 : void website_set_var_query(const rule& r, QSharedPointer<token_node>& t)
3054 : {
3055 0 : NOTUSED(r);
3056 :
3057 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[1]));
3058 0 : QSharedPointer<website_variable> v(qSharedPointerDynamicCast<website_variable, parser_user_data>(n->get_user_data()));
3059 0 : v->set_part(website_variable::WEBSITE_VARIABLE_PART_QUERY);
3060 0 : t->set_user_data(v);
3061 0 : }
3062 :
3063 : /** \brief Callback function executed when a protocol rule is reduced.
3064 : *
3065 : * This function marks the website variable as a protocol part.
3066 : *
3067 : * The result is then saved in the node as user data.
3068 : *
3069 : * \param[in] r The rule that generated the callback.
3070 : * \param[in] t The token node holding the data parsed so far.
3071 : */
3072 0 : void website_set_var_protocol(const rule& r, QSharedPointer<token_node>& t)
3073 : {
3074 0 : NOTUSED(r);
3075 :
3076 : //QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[1]));
3077 : QSharedPointer<website_variable> v(new website_variable(website_variable::WEBSITE_VARIABLE_TYPE_STANDARD,
3078 0 : "protocol", (*t)[2]->get_value().toString()));
3079 0 : v->set_part(website_variable::WEBSITE_VARIABLE_PART_PROTOCOL);
3080 0 : v->set_required();
3081 0 : t->set_user_data(v);
3082 0 : }
3083 :
3084 : /** \brief Callback function executed when the website rule is reduced.
3085 : *
3086 : * This function saves the result, website_rule, in the token user data.
3087 : *
3088 : * \param[in] r The rule that generated the callback.
3089 : * \param[in] t The token node holding the data parsed so far.
3090 : */
3091 0 : void website_set_variable_rule(const rule& r, QSharedPointer<token_node>& t)
3092 : {
3093 0 : NOTUSED(r);
3094 :
3095 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
3096 : // we don't need the dynamic cast since we don't need to access the variables
3097 0 : t->set_user_data(n->get_user_data());
3098 0 : }
3099 :
3100 : /** \brief Callback function executed when a website list is reduced.
3101 : *
3102 : * This function creates a new website information object and adds
3103 : * the website variable to it.
3104 : *
3105 : * The result is then saved in the node as user data.
3106 : *
3107 : * \param[in] r The rule that generated the callback.
3108 : * \param[in] t The token node holding the data parsed so far.
3109 : */
3110 0 : void website_set_new_website_list(const rule& r, QSharedPointer<token_node>& t)
3111 : {
3112 0 : NOTUSED(r);
3113 :
3114 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
3115 0 : QSharedPointer<website_variable> v(qSharedPointerDynamicCast<website_variable, parser_user_data>(n->get_user_data()));
3116 0 : QSharedPointer<website_info> info(new website_info);
3117 0 : info->add_var(v);
3118 0 : t->set_user_data(info);
3119 0 : }
3120 :
3121 : /** \brief Callback function executed when a website list is reduced.
3122 : *
3123 : * This function adds the website variable to an existing website information
3124 : * object.
3125 : *
3126 : * The result is then saved in the node as user data.
3127 : *
3128 : * \param[in] r The rule that generated the callback.
3129 : * \param[in] t The token node holding the data parsed so far.
3130 : */
3131 0 : void website_set_add_website_list(const rule& r, QSharedPointer<token_node>& t)
3132 : {
3133 0 : NOTUSED(r);
3134 :
3135 0 : QSharedPointer<token_node> nl(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
3136 0 : QSharedPointer<token_node> nr(qSharedPointerDynamicCast<token_node, token>((*t)[1]));
3137 0 : QSharedPointer<website_info> info(qSharedPointerDynamicCast<website_info, parser_user_data>(nl->get_user_data()));
3138 0 : QSharedPointer<website_variable> v(qSharedPointerDynamicCast<website_variable, parser_user_data>(nr->get_user_data()));
3139 0 : info->add_var(v);
3140 0 : t->set_user_data(info);
3141 0 : }
3142 :
3143 : /** \brief Callback function executed when the rule is reduced.
3144 : *
3145 : * This function defines the name of the rule in the website information
3146 : * object.
3147 : *
3148 : * The result is then saved in the node as user data.
3149 : *
3150 : * \param[in] r The rule that generated the callback.
3151 : * \param[in] t The token node holding the data parsed so far.
3152 : */
3153 0 : void website_set_rule(const rule& r, QSharedPointer<token_node>& t)
3154 : {
3155 0 : NOTUSED(r);
3156 :
3157 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[2]));
3158 0 : QSharedPointer<website_info> info(qSharedPointerDynamicCast<website_info, parser_user_data>(n->get_user_data()));
3159 0 : info->set_name((*t)[0]->get_value().toString());
3160 0 : t->set_user_data(info);
3161 0 : }
3162 :
3163 : /** \brief Callback function executed when the rule list is reduced.
3164 : *
3165 : * This function creates a new website rule object and adds the
3166 : * website information to it.
3167 : *
3168 : * The result is then saved in the node as user data.
3169 : *
3170 : * \param[in] r The rule that generated the callback.
3171 : * \param[in] t The token node holding the data parsed so far.
3172 : */
3173 0 : void website_set_new_rule_list(const rule& r, QSharedPointer<token_node>& t)
3174 : {
3175 0 : NOTUSED(r);
3176 :
3177 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
3178 0 : QSharedPointer<website_info> info(qSharedPointerDynamicCast<website_info, parser_user_data>(n->get_user_data()));
3179 0 : QSharedPointer<website_rules> rules(new website_rules);
3180 0 : rules->add_info(info);
3181 0 : t->set_user_data(rules);
3182 0 : }
3183 :
3184 : /** \brief Callback function executed when the rule list is reduced.
3185 : *
3186 : * This function adds the website information to an existing rules
3187 : * object.
3188 : *
3189 : * The result is then saved in the node as user data.
3190 : *
3191 : * \param[in] r The rule that generated the callback.
3192 : * \param[in] t The token node holding the data parsed so far.
3193 : */
3194 0 : void website_set_add_rule_list(const rule& r, QSharedPointer<token_node>& t)
3195 : {
3196 0 : NOTUSED(r);
3197 :
3198 0 : QSharedPointer<token_node> nl(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
3199 0 : QSharedPointer<token_node> nr(qSharedPointerDynamicCast<token_node, token>((*t)[1]));
3200 0 : QSharedPointer<website_rules> rules(qSharedPointerDynamicCast<website_rules, parser_user_data>(nl->get_user_data()));
3201 0 : QSharedPointer<website_info> info(qSharedPointerDynamicCast<website_info, parser_user_data>(nr->get_user_data()));
3202 0 : rules->add_info(info);
3203 0 : t->set_user_data(rules);
3204 0 : }
3205 :
3206 : /** \brief Callback function executed when the start rule is reduced.
3207 : *
3208 : * This function saves the result, website_rules, in the start rule user data.
3209 : *
3210 : * \param[in] r The rule that generated the callback.
3211 : * \param[in] t The token node holding the data parsed so far.
3212 : */
3213 0 : void website_set_start_result(rule const & r, QSharedPointer<token_node> & t)
3214 : {
3215 0 : NOTUSED(r);
3216 :
3217 0 : QSharedPointer<token_node> n(qSharedPointerDynamicCast<token_node, token>((*t)[0]));
3218 : // we don't need the dynamic cast since we don't need to access the rules
3219 0 : t->set_user_data(n->get_user_data());
3220 0 : }
3221 :
3222 :
3223 : /** \brief Parse a Website Rule Script
3224 : *
3225 : * This function takes a script and parses it into a set of regular expressions
3226 : * given a name and settings such as whether the expression is optional, has
3227 : * a default value, etc.
3228 : *
3229 : * Details about the website rule script are found here:
3230 : * https://snapwebsites.org/implementation/basic-concept-url-website/url-test
3231 : *
3232 : * \code
3233 : * start: rule_list
3234 : *
3235 : * rule_list: rule
3236 : * | rule_list rule
3237 : * rule: IDENTIFIER '{' website_rule_list '}' ';'
3238 : *
3239 : * website_rule_list: website_rule
3240 : * | website_rule_list website_rule
3241 : * website_rule: protocol_rule ';'
3242 : * | port_rule ';'
3243 : * | path_rule ';'
3244 : * | query_rule ';'
3245 : *
3246 : * protocol_rule: PROTOCOL '=' STRING
3247 : *
3248 : * port_rule: PORT '=' STRING
3249 : *
3250 : * path_rule: PATH website
3251 : * | website
3252 : *
3253 : * query_rule: QUERY website
3254 : *
3255 : * website: OPTIONAL website_var ';'
3256 : * | REQUIRED website_var ';'
3257 : * website_var: qualified_name '=' STRING
3258 : * | qualified_name '=' WEBSITE(STRING, STRING)
3259 : * | qualified_name '=' FLAG(STRING [, STRING] )
3260 : *
3261 : * qualified_name: IDENTIFIER
3262 : * | qualified_name '::' IDENTIFIER
3263 : * \endcode
3264 : *
3265 : * The following are all the post-parsing tests that we apply to the data to
3266 : * make sure that it is valid.
3267 : *
3268 : * \li The variable qualified name must be "global::<name>", "site::<name>",
3269 : * or "global::site::<name>".
3270 : * \li Each variable name must be unique within one rule, ignoring the
3271 : * qualification names.
3272 : * \li A flag cannot have a default value when required.
3273 : * \li Domain names, as defined in the rule, must be unique within the entire
3274 : * definition.
3275 : * \li Variable values cannot include a regular expression that captures
3276 : * data or it will generate problems with our algorithms.
3277 : * \li Variable values must be valid regular expressions (i.e. they must be
3278 : * compilable without errors.)
3279 : *
3280 : * \param[in] script The script as written by an administrator.
3281 : * \param[out] result The resulting compressed script when the function
3282 : * returns true.
3283 : *
3284 : * \return true if the parser succeeds, false otherwise.
3285 : */
3286 0 : bool snap_uri_rules::parse_website_rules(QString const& script, QByteArray& result)
3287 : {
3288 : // LEXER
3289 :
3290 : // lexer object
3291 0 : parser::lexer lexer;
3292 0 : lexer.set_input(script);
3293 0 : parser::keyword keyword_flag(lexer, "flag");
3294 0 : parser::keyword keyword_optional(lexer, "optional");
3295 0 : parser::keyword keyword_path(lexer, "path");
3296 0 : parser::keyword keyword_port(lexer, "port");
3297 0 : parser::keyword keyword_protocol(lexer, "protocol");
3298 0 : parser::keyword keyword_query(lexer, "query");
3299 0 : parser::keyword keyword_required(lexer, "required");
3300 0 : parser::keyword keyword_website(lexer, "website");
3301 :
3302 : // GRAMMAR
3303 0 : parser::grammar g;
3304 :
3305 : // qualified_name
3306 0 : choices qualified_name(&g, "qualified_name");
3307 0 : qualified_name >>= TOKEN_ID_IDENTIFIER // keep identifier as is
3308 0 : | qualified_name >> "::" >> TOKEN_ID_IDENTIFIER
3309 0 : >= website_set_qualified_name
3310 : ;
3311 :
3312 : // flag_opt_param
3313 0 : choices flag_opt_param(&g, "flag_opt_param");
3314 0 : flag_opt_param >>= TOKEN_ID_EMPTY // keep as is
3315 0 : | "," >> TOKEN_ID_STRING // keep as is
3316 : ;
3317 :
3318 : // website_var
3319 0 : parser::choices website_var(&g, "website_var");
3320 0 : website_var >>= qualified_name >> "=" >> TOKEN_ID_STRING
3321 : >= website_set_standard_var
3322 0 : | qualified_name >> "=" >> keyword_website
3323 0 : >> "(" >> TOKEN_ID_STRING >> "," >> TOKEN_ID_STRING
3324 0 : >> ")" >= website_set_website_var
3325 0 : | qualified_name >> "=" >> keyword_flag >>
3326 0 : "(" >> TOKEN_ID_STRING >> flag_opt_param >> ")"
3327 0 : >= website_set_flag_var
3328 : ;
3329 :
3330 : // website:
3331 0 : parser::choices website(&g, "website");
3332 0 : website >>= keyword_required >> website_var >= website_set_var_required
3333 0 : | keyword_optional >> website_var >= website_set_var_optional
3334 : ;
3335 :
3336 : // query_rule:
3337 0 : parser::choices query_rule(&g, "query_rule");
3338 0 : query_rule >>= keyword_query >> website >= website_set_var_query
3339 : ;
3340 :
3341 : // path_rule:
3342 0 : parser::choices path_rule(&g, "path_rule");
3343 0 : path_rule >>= keyword_path >> website >= website_set_var_path
3344 0 : | website >= website_set_var_path
3345 : ;
3346 :
3347 : // port_rule:
3348 0 : parser::choices port_rule(&g, "port_rule");
3349 0 : port_rule >>= keyword_port >> "=" >> TOKEN_ID_STRING >= website_set_var_port
3350 : ;
3351 :
3352 : // protocol_rule:
3353 0 : parser::choices protocol_rule(&g, "protocol_rule");
3354 0 : protocol_rule >>= keyword_protocol >> "=" >> TOKEN_ID_STRING >= website_set_var_protocol
3355 : ;
3356 :
3357 : // website_rule:
3358 0 : parser::choices website_rule(&g, "website_rule");
3359 0 : website_rule >>= protocol_rule >> ";" >= website_set_variable_rule
3360 0 : | port_rule >> ";" >= website_set_variable_rule
3361 0 : | path_rule >> ";" >= website_set_variable_rule
3362 0 : | query_rule >> ";" >= website_set_variable_rule
3363 : ;
3364 :
3365 : // website_rule_list:
3366 0 : parser::choices website_rule_list(&g, "website_rule_list");
3367 0 : website_rule_list >>= website_rule >= website_set_new_website_list
3368 0 : | website_rule_list >> website_rule >= website_set_add_website_list
3369 : ;
3370 :
3371 : // rule
3372 0 : parser::choices rule(&g, "rule");
3373 0 : rule >>= TOKEN_ID_IDENTIFIER >> "{" >> website_rule_list >> "}"
3374 0 : >> ";" >= website_set_rule
3375 : ;
3376 :
3377 : // rule_list
3378 0 : parser::choices rule_list(&g, "rule_list");
3379 0 : rule_list >>= rule >= website_set_new_rule_list
3380 0 : | rule_list >> rule >= website_set_add_rule_list
3381 : ;
3382 :
3383 : // start
3384 0 : parser::choices start(&g, "start");
3385 0 : start >>= rule_list >= website_set_start_result;
3386 :
3387 0 : if(!g.parse(lexer, start))
3388 : {
3389 0 : f_errmsg = "parsing error";
3390 0 : return false;
3391 : }
3392 :
3393 : // if we reach here, we've got a "parser valid result"
3394 : // but there may be other problems that we check here
3395 : // . rule, website name used twice
3396 : // . valid namespaces
3397 : // . flags with a default when marked as required
3398 : // . invalid, unacceptable regular expressions
3399 0 : QSharedPointer<token_node> r(g.get_result());
3400 0 : QSharedPointer<website_rules> ws(qSharedPointerDynamicCast<website_rules, parser_user_data>(r->get_user_data()));
3401 0 : QMap<QString, int> rule_names;
3402 0 : int website_max(ws->size());
3403 0 : for(int i = 0; i < website_max; ++i)
3404 : {
3405 0 : QSharedPointer<website_info> info((*ws)[i]);
3406 0 : const QString& rule_name(info->get_name());
3407 0 : if(rule_names.contains(rule_name))
3408 : {
3409 : // the same website name was defined twice
3410 0 : f_errmsg = "found two rules named \"" + rule_name + "\"";
3411 0 : return false;
3412 : }
3413 : // the map value is ignored
3414 0 : rule_names.insert(rule_name, 0);
3415 :
3416 0 : QMap<QString, int> var_names;
3417 0 : int info_max(info->size());
3418 0 : for(int j = 0; j < info_max; ++j) {
3419 0 : QSharedPointer<website_variable> var((*info)[j]);
3420 0 : const QString& var_name(var->get_name());
3421 0 : const snap_string_list var_qualified_names(var_name.split("::"));
3422 0 : if(var_names.contains(var_qualified_names.last()))
3423 : {
3424 : // the same website variable name was defined twice
3425 0 : f_errmsg = "found two variables named \"" + var_name + "\"";
3426 0 : return false;
3427 : }
3428 0 : var_names.insert(var_qualified_names.last(), 1);
3429 0 : switch(var_qualified_names.size())
3430 : {
3431 0 : case 1:
3432 : // just the name no extra test necessary
3433 0 : break;
3434 :
3435 0 : case 2:
3436 : // one qualification, can be global or site
3437 0 : if(var_qualified_names.first() != "global"
3438 0 : && var_qualified_names.first() != "site") {
3439 : // invalid qualification
3440 0 : f_errmsg = "incompatible qualified name \"" + var_qualified_names.first() + "\"";
3441 0 : return false;
3442 : }
3443 0 : break;
3444 :
3445 0 : case 3:
3446 : // two qualification, must be global::site
3447 0 : if(var_qualified_names[0] != "global"
3448 0 : || var_qualified_names[1] != "site") {
3449 : // invalid qualification
3450 0 : f_errmsg = "incompatible qualified name \"" + var_qualified_names[0] + "::" + var_qualified_names[1] + "\"";
3451 0 : return false;
3452 : }
3453 0 : break;
3454 :
3455 0 : default:
3456 : // invalid qualification, cannot be more than 2 names
3457 0 : f_errmsg = "incompatible qualified names \"" + var_qualified_names[0] + "::" + var_qualified_names[1] + "::...\"";
3458 0 : return false;
3459 :
3460 : }
3461 0 : if(var->get_required()
3462 0 : && var->get_type() == website_variable::WEBSITE_VARIABLE_TYPE_FLAG_WITH_DEFAULT)
3463 : {
3464 : // if a flag is required, then no default can be specified
3465 0 : f_errmsg = "a required flag cannot have a default value";
3466 0 : return false;
3467 : }
3468 : // the values cannot include the delimiters since we are to
3469 : // merge them to use them so here we take that in account
3470 0 : const QString& value(var->get_value());
3471 0 : QRegExp re(value);
3472 0 : if(!re.isValid())
3473 : {
3474 : // the regular expression is not valid!?
3475 0 : f_errmsg = "regular expression \"" + value + "\" is not valid";
3476 0 : return false;
3477 : }
3478 0 : if(re.captureCount() != 0)
3479 : {
3480 : // we do not allow users to capture anything, they have to
3481 : // use the Perl syntax: (?:<pattern>)
3482 0 : f_errmsg = "regular expression \"" + value + "\" cannot include a capture (something between parenthesis without the ?: at the start)";
3483 0 : return false;
3484 : }
3485 : }
3486 : }
3487 :
3488 : // now we can generate the result
3489 : //QDataStream archive(&result, QIODevice::WriteOnly);
3490 : //archive << *ws;
3491 :
3492 0 : QBuffer archive(&result);
3493 0 : archive.open(QIODevice::WriteOnly);
3494 0 : QtSerialization::QWriter w(archive, "website_rules", 1, 0);
3495 0 : ws->write(w);
3496 :
3497 0 : return true;
3498 : }
3499 :
3500 :
3501 :
3502 :
3503 6 : } // namespace snap
3504 :
3505 : // vim: ts=4 sw=4 et
|