Line data Source code
1 : /*
2 : * File:
3 : * advgetopt/conf_file.cpp -- a replacement to the Unix getopt() implementation
4 : *
5 : * License:
6 : * Copyright (c) 2006-2019 Made to Order Software Corp. All Rights Reserved
7 : *
8 : * https://snapwebsites.org/
9 : * contact@m2osw.com
10 : *
11 : * This program is free software; you can redistribute it and/or modify
12 : * it under the terms of the GNU General Public License as published by
13 : * the Free Software Foundation; either version 2 of the License, or
14 : * (at your option) any later version.
15 : *
16 : * This program is distributed in the hope that it will be useful,
17 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 : * GNU General Public License for more details.
20 : *
21 : * You should have received a copy of the GNU General Public License along
22 : * with this program; if not, write to the Free Software Foundation, Inc.,
23 : * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 : *
25 : * Authors:
26 : * Alexis Wilke alexis@m2osw.com
27 : * Doug Barbieri doug@m2osw.com
28 : */
29 :
30 :
31 : /** \file
32 : * \brief Implementation of the option_info class.
33 : *
34 : * This is the implementation of the class used to load and save
35 : * configuration files.
36 : */
37 :
38 : // self
39 : //
40 : #include "advgetopt/conf_file.h"
41 :
42 :
43 : // advgetopt lib
44 : //
45 : #include "advgetopt/exception.h"
46 : #include "advgetopt/log.h"
47 : #include "advgetopt/utils.h"
48 :
49 :
50 : // snapdev lib
51 : //
52 : #include <snapdev/safe_variable.h>
53 : #include <snapdev/tokenize_string.h>
54 :
55 :
56 : // boost lib
57 : //
58 : #include <boost/algorithm/string/join.hpp>
59 : #include <boost/algorithm/string/replace.hpp>
60 :
61 : // C++ lib
62 : //
63 : #include <algorithm>
64 : #include <fstream>
65 :
66 :
67 : // last include
68 : //
69 : #include <snapdev/poison.h>
70 :
71 :
72 :
73 : namespace advgetopt
74 : {
75 :
76 :
77 :
78 : /** \brief Private conf_file data.
79 : *
80 : * The conf_file has a few globals used to cache configuration files.
81 : * Since it has to work in a multi-thread environment, we also have
82 : * a mutex.
83 : */
84 : namespace
85 : {
86 :
87 :
88 :
89 : /** \brief A map of configuration files.
90 : *
91 : * This typedef defines a type used to hold all the configuration files
92 : * that were loaded so far.
93 : *
94 : * The map is indexed by a string representing the full path to the
95 : * configuration file.
96 : *
97 : * The value is a shared pointer to configuration file. Since we may
98 : * share that data between multiple users, it made sense to force you
99 : * to use a configuration file smart pointer. Note, though, that we
100 : * never destroy the pointer until we quit (i.e. you cannot force a
101 : * re-load of the configuration file. Changes that happen in memory
102 : * are visible to all users, but changes to the actual configuration
103 : * file are complete invisible to use.)
104 : */
105 : typedef std::map<std::string, conf_file::pointer_t> conf_file_map_t;
106 :
107 :
108 : /** \brief The configuration files.
109 : *
110 : * This global defines a list of configuration files indexed by
111 : * filename (full path, but not the URL, just a path.)
112 : *
113 : * Whenever a configuration file is being retrieved with the
114 : * conf_file::get_conf_file() function, it is first searched
115 : * in this map. If it exists in the map, that version gets
116 : * used (if the URL of the two setups match one to one.)
117 : * If there is no such file in the map, then a new one is
118 : * created by loading the corresponding file.
119 : */
120 2 : conf_file_map_t g_conf_files = conf_file_map_t();
121 :
122 :
123 : class conf_mutex
124 : {
125 : public:
126 : /** \brief A mutex to protect configuration calls.
127 : *
128 : * Dealing with configuration files may happen in a multi-threaded
129 : * environment. In that case we have to protect many function calls
130 : * which access the data because that can change over time.
131 : *
132 : * \note
133 : * The getopt object is already managed on its own:
134 : * it reads the parameters on load and then offer constant access
135 : * to all of what was loaded, found in an environment variable, or
136 : * was handled by parsing the command line arguments.
137 : */
138 2 : conf_mutex()
139 2 : {
140 : pthread_mutexattr_t mattr;
141 2 : int err(pthread_mutexattr_init(&mattr));
142 2 : if(err != 0)
143 : {
144 : throw getopt_exception_initialization("pthread_muteattr_init() failed"); // LCOV_EXCL_LINE
145 : }
146 2 : err = pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
147 2 : if(err != 0)
148 : {
149 : pthread_mutexattr_destroy(&mattr); // LCOV_EXCL_LINE
150 : throw getopt_exception_initialization("pthread_muteattr_settype() failed"); // LCOV_EXCL_LINE
151 : }
152 2 : err = pthread_mutex_init(&f_mutex, &mattr);
153 2 : if(err != 0)
154 : {
155 : pthread_mutexattr_destroy(&mattr); // LCOV_EXCL_LINE
156 : throw getopt_exception_initialization("pthread_mutex_init() failed"); // LCOV_EXCL_LINE
157 : }
158 2 : err = pthread_mutexattr_destroy(&mattr);
159 2 : if(err != 0)
160 : {
161 : throw getopt_exception_initialization("pthread_mutexattr_destroy() failed"); // LCOV_EXCL_LINE
162 : }
163 2 : }
164 :
165 : /** \brief Clean up the pthread mutex object.
166 : *
167 : * This function performs the necessary clean up of the pthread mutex.
168 : *
169 : * The constructor will have initialized a valid \c f_mutex. That
170 : * variable member must be de-initialized before we release this
171 : * object.
172 : */
173 2 : ~conf_mutex()
174 2 : {
175 2 : pthread_mutex_destroy(&f_mutex);
176 2 : }
177 :
178 : /* \brief Lock this mutex.
179 : *
180 : * This function locks the mutex.
181 : *
182 : * The lock() and unlock() functions should not be called directly.
183 : * Instead you should use the safe_lock object which will make sure
184 : * that the two functions get called in pairs as expected (i.e. for
185 : * each call to the lock() a corresponding call to unlock() will
186 : * automatically happen.)
187 : */
188 5021 : void lock()
189 : {
190 5021 : int const err(pthread_mutex_lock(&f_mutex));
191 5021 : if(err != 0)
192 : {
193 : throw getopt_exception_invalid("pthread_mutex_lock() failed"); // LCOV_EXCL_LINE
194 : }
195 5021 : }
196 :
197 : /** \brief Unlock this mutex.
198 : *
199 : * This function unlocks the mutex.
200 : *
201 : * This is \em rarely used to unlock the mutex early.
202 : *
203 : * \warning
204 : * Since a mutex can be locked multiple times (recursively), there is
205 : * no protection to know whether it is still locked or not.
206 : */
207 5021 : void unlock()
208 : {
209 5021 : int const err(pthread_mutex_unlock(&f_mutex));
210 5021 : if(err != 0)
211 : {
212 : throw getopt_exception_invalid("pthread_mutex_unlock() failed"); // LCOV_EXCL_LINE
213 : }
214 5021 : }
215 :
216 : private:
217 : /** \brief The Linux mutex.
218 : *
219 : * This definition is the base Linux mutex as defined by the pthread
220 : * implementation under Linux.
221 : *
222 : * It gets initialized on construction. If the initialization fails,
223 : * the constructor function throws so it is always defined when the
224 : * object was successfully created.
225 : */
226 : pthread_mutex_t f_mutex = pthread_mutex_t();
227 : };
228 :
229 :
230 : /** \brief The configuration file mutex.
231 : *
232 : * This options are generally viewed as read-only global variables. They
233 : * get setup once early on and then used and reused as many times as
234 : * required.
235 : *
236 : * This mutex makes sure that access between multiple thread happens in
237 : * a safe manner.
238 : */
239 2 : conf_mutex g_mutex;
240 :
241 :
242 :
243 : /** \brief Safely lock/unlock a mutex.
244 : *
245 : * This function allows for locking and unlocking a mutex in a safe
246 : * manner which means that it will always get unlocked when you exit
247 : * a context, whether you exit with a return, break, continue or
248 : * an exception.
249 : *
250 : * The constructor locks the mutex.
251 : *
252 : * The destructor unlocks the mutex.
253 : *
254 : * When necessary we create a sub-block to make sure that that
255 : * the mutex gets released as soon as possible.
256 : */
257 : class safe_lock
258 : {
259 : public:
260 : /** \brief Lock the mutex.
261 : *
262 : * The constructor takes a reference to a mutex as input. It saves
263 : * that reference and then calls the lock() function on that object.
264 : *
265 : * \param[in] m The mutex to lock and unlock.
266 : */
267 5021 : safe_lock(conf_mutex & m)
268 5021 : : f_mutex(m)
269 : {
270 5021 : f_mutex.lock();
271 5021 : }
272 :
273 : /** \brief Unlock the mutex.
274 : *
275 : * Whenever we reach the end of the context, unlock the mutex.
276 : * This function always calls the unlock and it will happen
277 : * even on exceptions or some other early returning within a
278 : * function.
279 : */
280 5021 : ~safe_lock()
281 5021 : {
282 5021 : f_mutex.unlock();
283 5021 : }
284 :
285 : private:
286 : /** \brief The mutex to loack and unlock.
287 : *
288 : * This variable member holds a reference to the mutex that we
289 : * want to lock on construction and unlock on destruction.
290 : */
291 : conf_mutex & f_mutex;
292 : };
293 :
294 :
295 :
296 : } // no name namespace
297 :
298 :
299 :
300 :
301 :
302 : /** \brief Initialize the file setup object.
303 : *
304 : * This constructor initializes the setup object which can later be used
305 : * to search for an existing conf_file or creating a new conf_file.
306 : *
307 : * The setup holds the various parameters used to know how to load a
308 : * configuration file in memory. The parameters include
309 : *
310 : * \li \p filename -- the name of the file to read as a configuration file.
311 : * \li \p line_continuation -- how lines in the files are being read; in
312 : * most cases a line in a text file ends when a newline character (`\\n`)
313 : * is found; this parameter allows for lines that span (continue) on
314 : * multiple text lines. Only one type of continuation or no continue
315 : * (a.k.a. "single line") can be used per file.
316 : * \li \p assignment_operator -- the character(s) accepted between the
317 : * name of a variable and its value; by default this is the equal sign
318 : * (`=`). Multiple operators can be accepted.
319 : * \li \p comment -- how comments are introduced when supported. Multiple
320 : * introducers can be accepted within one file. By default we accept the
321 : * Unix Shell (`#`) and INI file (`;`) comment introducers.
322 : * \li \p section_operator -- the set of characters accepted as section
323 : * separator. By default we accept the INI file syntax (the `[section]`
324 : * syntax.)
325 : *
326 : * \param[in] filename A valid filename.
327 : * \param[in] line_continue One of the line_continuation_t values.
328 : * \param[in] assignment_operator A set of assignment operator flags.
329 : * \param[in] comment A set of comment flags.
330 : * \param[in] section_operator A set of section operator flags.
331 : */
332 28483 : conf_file_setup::conf_file_setup(
333 : std::string const & filename
334 : , line_continuation_t line_continuation
335 : , assignment_operator_t assignment_operator
336 : , comment_t comment
337 : , section_operator_t section_operator)
338 : : f_line_continuation(line_continuation)
339 28483 : , f_assignment_operator(assignment_operator == 0
340 : ? ASSIGNMENT_OPERATOR_EQUAL
341 : : assignment_operator)
342 : , f_comment(comment)
343 56967 : , f_section_operator(section_operator)
344 : {
345 28483 : if(filename.empty())
346 : {
347 1 : throw getopt_exception_invalid("trying to load a configuration file using an empty filename.");
348 : }
349 :
350 56964 : std::unique_ptr<char, decltype(&::free)> fn(realpath(filename.c_str(), nullptr), &::free);
351 28482 : if(fn != nullptr)
352 : {
353 28008 : f_filename = fn.get();
354 : }
355 28482 : }
356 :
357 :
358 : /** \brief Check whether the setup is considered valid.
359 : *
360 : * This function is used to check whether the conf_file_setup is valid or
361 : * not. It is valid when everything is in order, which at this point means
362 : * the filename is not empty.
363 : *
364 : * All the other parameters are always viewed as being valid.
365 : *
366 : * \return true if the conf_file_setup is considered valid.
367 : */
368 25857 : bool conf_file_setup::is_valid() const
369 : {
370 25857 : return !f_filename.empty();
371 : }
372 :
373 :
374 : /** \brief Get the filename.
375 : *
376 : * When creating a new conf_file_setup, you have to specify a filename.
377 : * This function returns that filename after it was canonicalized by
378 : * the constructor.
379 : *
380 : * The canonicalization process computes the full path to the real
381 : * file. If such does not exist then no filename is defined, so this
382 : * function may return an empty string.
383 : *
384 : * \return The filename or an empty string if the realpath() could not
385 : * be calculated.
386 : */
387 28491 : std::string const & conf_file_setup::get_filename() const
388 : {
389 28491 : return f_filename;
390 : }
391 :
392 :
393 : /** \brief Get the line continuation setting.
394 : *
395 : * This function returns the line continuation for this setup.
396 : *
397 : * This parameter is not a set of flags. We only support one type of
398 : * line continuation per file. Many continuations could be contradictory
399 : * if used simultaneously.
400 : *
401 : * The continuation setting is one of the following:
402 : *
403 : * \li line_continuation_t::single_line -- no continuation support; any
404 : * definition must be on one single line.
405 : * \li line_continuation_t::rfc_822 -- like email/HTTP, whitespace at
406 : * the start of the next line means that the current line continues there;
407 : * those whitespaces get removed from the value so if you want a space
408 : * between two lines, make sure to finish the current line with a space.
409 : * \li line_continuation_t::msdos -- `&` at end of the line.
410 : * \li line_continuation_t::unix -- `\` at end of the line.
411 : * \li line_continuation_t::fortran -- `&` at the start of the next line;
412 : * there cannot be any spaces, the `&` has to be the very first character.
413 : * \li line_continuation_t::semicolon -- `;` ends the _line_; when reading
414 : * a line with this continuation mode, the reader stops only when it finds
415 : * the `;` or EOF (also if a comment is found.)
416 : *
417 : * \return a line continuation mode.
418 : */
419 26164 : line_continuation_t conf_file_setup::get_line_continuation() const
420 : {
421 26164 : return f_line_continuation;
422 : }
423 :
424 :
425 : /** \brief Get the accepted assignment operators.
426 : *
427 : * This function returns the set of flags describing the list of
428 : * accepted operators one can use to do assignments.
429 : *
430 : * Right now we support the follow:
431 : *
432 : * \li ASSIGNMENT_OPERATOR_EQUAL -- the equal (`=`) character, like in
433 : * most Unix configuration files and shell scripts.
434 : * \li ASSIGNMENT_OPERATOR_COLON -- the colon (`:`) character, like in
435 : * email and HTTP headers.
436 : * \li ASSIGNMENT_OPERATOR_SPACE -- the space (` `) character; this is
437 : * less used, but many Unix configuration files still use this scheme.
438 : *
439 : * \todo
440 : * Add support for additional operators such as:
441 : * \todo
442 : * \li `+=` -- append data
443 : * \li `?=` -- set to this value if not yet set
444 : *
445 : * \return The set of accepted assignment operators.
446 : *
447 : * \sa is_assignment_operator()
448 : */
449 1144304 : assignment_operator_t conf_file_setup::get_assignment_operator() const
450 : {
451 1144304 : return f_assignment_operator;
452 : }
453 :
454 :
455 : /** Get the comment flags.
456 : *
457 : * This function returns the comment flags. These describe which type
458 : * of comments are supported in this configuration file.
459 : *
460 : * Currently we support:
461 : *
462 : * \li COMMENT_INI -- INI file like comments, these are introduced with
463 : * a semi-colon (`;`) and end with a newline.
464 : * \li COMMENT_SHELL -- Unix shell like comments, these are introduced
465 : * with a hash (`#`) and end with a newline.
466 : * \li COMMENT_CPP -- C++ like comments, these are introduced with two
467 : * slashes (`//`) and end with a newline.
468 : *
469 : * Right now we only support line comments. Configuration entries cannot
470 : * include comments. A comment character can be preceeded by spaces and
471 : * tabs.
472 : *
473 : * Line continuation is taken in account with comments. So the following
474 : * when the line continuation is set to Unix is one long comment:
475 : *
476 : * \code
477 : * # line continuation works with comments \
478 : * just like with any other line... because the \
479 : * continuation character and the newline characters \
480 : * just get removed before the get_line() function \
481 : * returns...
482 : * \endcode
483 : *
484 : * \return The comment flags.
485 : *
486 : * \sa is_comment()
487 : */
488 26104 : comment_t conf_file_setup::get_comment() const
489 : {
490 26104 : return f_comment;
491 : }
492 :
493 :
494 : /** \brief Get the accepted section operators.
495 : *
496 : * This function returns the flags representing which of the
497 : * section operators are accepted.
498 : *
499 : * We currently support the following types of sections:
500 : *
501 : * \li SECTION_OPERATOR_NONE -- no sections are accepted.
502 : * \li SECTION_OPERATOR_C -- the period (`.`) is viewed as a section/name
503 : * separator as when you access a variable member in a structure.
504 : * \li SECTION_OPERATOR_CPP -- the scope operator (`::`) is viewed as a
505 : * section/name separator; if used at the very beginning, it is viewed
506 : * as "global scope" and whatever other section is currently active is
507 : * ignored.
508 : * \li SECTION_OPERATOR_BLOCK -- the configuration files can include
509 : * opening (`{`) and closing (`}`) curvly brackets to group parameters
510 : * together; a name must preceed the opening bracket, it represents
511 : * the section name.
512 : * \li SECTION_OPERATOR_INI_FILE -- like in the MS-DOS .ini files, the
513 : * configuration file can include square brackets to mark sections; this
514 : * method limits the number of section names to one level.
515 : *
516 : * \bug
517 : * The INI file support does not verify that a section name does not
518 : * itself include more sub-sections. For example, the following would
519 : * be three section names:
520 : * \bug
521 : * \code
522 : * [a::b::c]
523 : * var=123
524 : * \endcode
525 : * \bug
526 : * So in effect, the variable named `var` ends up in section `a`,
527 : * sub-section `b`, and sub-sub-section `c` (or section `a::b::c`.)
528 : * Before saving the results in the parameters, all section operators
529 : * get transformed to the C++ scope (`::`) operator, which is why that
530 : * operator used in any name ends up looking like a section separator.
531 : */
532 43025 : section_operator_t conf_file_setup::get_section_operator() const
533 : {
534 43025 : return f_section_operator;
535 : }
536 :
537 :
538 : /** \brief Transform the setup in a URL.
539 : *
540 : * This function transforms the configuration file setup in a unique URL.
541 : * This URL allows us to verify that two setup are the same so when
542 : * attempting to reload the same configuration file, we can make sure
543 : * you are attempting to do so with the same URL.
544 : *
545 : * This is because trying to read the same file with, for example, line
546 : * continuation set to Unix the first time and then set to MS-DOS the
547 : * second time would not load the same thing is either line continuation
548 : * was used.
549 : *
550 : * \todo
551 : * We should look into have a set_config_url() or have a constructor
552 : * which accepts a URL.
553 : *
554 : * \return The URL representing this setup.
555 : */
556 41262 : std::string conf_file_setup::get_config_url() const
557 : {
558 41262 : if(f_url.empty())
559 : {
560 56264 : std::stringstream ss;
561 :
562 28132 : ss << "file://"
563 28132 : << (f_filename.empty()
564 : ? "/<empty>"
565 56264 : : f_filename);
566 :
567 56264 : std::vector<std::string> params;
568 28132 : if(f_line_continuation != line_continuation_t::unix)
569 : {
570 46274 : std::string name;
571 23137 : switch(f_line_continuation)
572 : {
573 : case line_continuation_t::single_line:
574 4223 : name = "single-line";
575 4223 : break;
576 :
577 : case line_continuation_t::rfc_822:
578 4727 : name = "rfc-822";
579 4727 : break;
580 :
581 : case line_continuation_t::msdos:
582 4727 : name = "msdos";
583 4727 : break;
584 :
585 : // we should not ever receive this one since we don't enter
586 : // this block when the value is "unix"
587 : //
588 : //case line_continuation_t::unix:
589 : // name = "unix";
590 : // break;
591 :
592 : case line_continuation_t::fortran:
593 4728 : name = "fortran";
594 4728 : break;
595 :
596 : case line_continuation_t::semicolon:
597 4727 : name = "semi-colon";
598 4727 : break;
599 :
600 : default:
601 5 : throw getopt_exception_logic("unexpected line continuation.");
602 :
603 : }
604 23132 : params.push_back("line-continuation=" + name);
605 : }
606 :
607 28127 : if(f_assignment_operator != ASSIGNMENT_OPERATOR_EQUAL)
608 : {
609 42326 : std::vector<std::string> assignments;
610 21163 : if((f_assignment_operator & ASSIGNMENT_OPERATOR_EQUAL) != 0)
611 : {
612 10577 : assignments.push_back("equal");
613 : }
614 21163 : if((f_assignment_operator & ASSIGNMENT_OPERATOR_COLON) != 0)
615 : {
616 14111 : assignments.push_back("colon");
617 : }
618 21163 : if((f_assignment_operator & ASSIGNMENT_OPERATOR_SPACE) != 0)
619 : {
620 14104 : assignments.push_back("space");
621 : }
622 21163 : if(!assignments.empty())
623 : {
624 21163 : params.push_back("assignment-operator=" + boost::algorithm::join(assignments, ","));
625 : }
626 : }
627 :
628 : if(f_comment != COMMENT_INI | COMMENT_SHELL)
629 : {
630 56254 : std::vector<std::string> comment;
631 28127 : if((f_comment & COMMENT_INI) != 0)
632 : {
633 12388 : comment.push_back("ini");
634 : }
635 28127 : if((f_comment & COMMENT_SHELL) != 0)
636 : {
637 11931 : comment.push_back("shell");
638 : }
639 28127 : if((f_comment & COMMENT_CPP) != 0)
640 : {
641 12379 : comment.push_back("cpp");
642 : }
643 28127 : if(comment.empty())
644 : {
645 3816 : params.push_back("comment=none");
646 : }
647 : else
648 : {
649 24311 : params.push_back("comment=" + boost::algorithm::join(comment, ","));
650 : }
651 : }
652 :
653 28127 : if(f_section_operator != SECTION_OPERATOR_INI_FILE)
654 : {
655 52520 : std::vector<std::string> section_operator;
656 26260 : if((f_section_operator & SECTION_OPERATOR_C) != 0)
657 : {
658 13005 : section_operator.push_back("c");
659 : }
660 26260 : if((f_section_operator & SECTION_OPERATOR_CPP) != 0)
661 : {
662 12996 : section_operator.push_back("cpp");
663 : }
664 26260 : if((f_section_operator & SECTION_OPERATOR_BLOCK) != 0)
665 : {
666 12991 : section_operator.push_back("block");
667 : }
668 26260 : if((f_section_operator & SECTION_OPERATOR_INI_FILE) != 0)
669 : {
670 11134 : section_operator.push_back("ini-file");
671 : }
672 26260 : if(!section_operator.empty())
673 : {
674 24146 : params.push_back("section-operator=" + boost::algorithm::join(section_operator, ","));
675 : }
676 : }
677 :
678 56254 : std::string const query_string(boost::algorithm::join(params, "&"));
679 28127 : if(!query_string.empty())
680 : {
681 28127 : ss << '?'
682 28127 : << query_string;
683 : }
684 :
685 28127 : f_url = ss.str();
686 : }
687 :
688 41257 : return f_url;
689 : }
690 :
691 :
692 :
693 :
694 : /** \brief Create and read a conf_file.
695 : *
696 : * This function creates a new conf_file object unless one with the same
697 : * filename already exists.
698 : *
699 : * If the configuration file was already loaded, then that pointer gets
700 : * returned instead of reloading the file. There is currently no API to
701 : * allow for the removal because another thread or function may have
702 : * the existing pointer cached and we want all instances of a configuration
703 : * file to be the same (i.e. if you update the value of a parameter then
704 : * that new value should be visible by all the users of that configuration
705 : * file.) Therefore, you can think of a configuration file as a global
706 : * variable.
707 : *
708 : * \note
709 : * Any number of call this function to load a given file always returns
710 : * exactly the same pointer.
711 : *
712 : * \todo
713 : * With the communicator, we will at some point implement a class
714 : * used to detect that a file changed, allowing us to get a signal
715 : * and reload the file as required. This get_conf_file() function
716 : * will greatly benefit from such since that way we can automatically
717 : * reload the configuration file. In other words, process A could
718 : * make a change, then process B reloads and sees the change that
719 : * process A made. Such an implementation will require a proper
720 : * locking mechanism of the configuration files while modifications
721 : * are being performed.
722 : *
723 : * \param[in] setup The settings to be used in this configuration file reader.
724 : *
725 : * \return A pointer to the configuration file data.
726 : */
727 2808 : conf_file::pointer_t conf_file::get_conf_file(conf_file_setup const & setup)
728 : {
729 5616 : safe_lock lock(g_mutex);
730 :
731 2808 : auto it(g_conf_files.find(setup.get_filename()));
732 2808 : if(it != g_conf_files.end())
733 : {
734 2631 : if(it->second->get_setup().get_config_url() != setup.get_config_url())
735 : {
736 : throw getopt_exception_logic("trying to load configuration file \""
737 5250 : + setup.get_config_url()
738 5250 : + "\" but an existing configuration file with the same name was loaded with URL: \""
739 10500 : + it->second->get_setup().get_config_url()
740 7875 : + "\".");
741 : }
742 6 : return it->second;
743 : }
744 354 : conf_file::pointer_t cf(new conf_file(setup));
745 177 : g_conf_files[setup.get_filename()] = cf;
746 177 : return cf;
747 : }
748 :
749 :
750 : /** \brief Save the configuration file.
751 : *
752 : * This function saves the current data from this configuration file to
753 : * the file. It overwrites the existing file.
754 : *
755 : * Note that when you load the configuration, you may get data from
756 : * many different configuration files. This very file will only
757 : * include the data that was loaded from this file, though, and whatever
758 : * modifications you made.
759 : *
760 : * If the conf is not marked as modified, the function returns immediately
761 : * with true.
762 : *
763 : * \param[in] create_backup Whether to create a backup or not.
764 : *
765 : * \return true if the save worked as expected.
766 : */
767 2 : bool conf_file::save_configuration(bool create_backup)
768 : {
769 2 : if(f_modified)
770 : {
771 : // create backup?
772 : //
773 1 : if(create_backup)
774 : {
775 : // TODO: offer means to set the backup extension
776 : //
777 2 : std::string const backup_filename(f_setup.get_filename() + ".bak");
778 :
779 2 : if(unlink(backup_filename.c_str()) != 0
780 1 : && errno != ENOENT)
781 : {
782 : f_errno = errno; // LCOV_EXCL_LINE
783 : return false; // LCOV_EXCL_LINE
784 : }
785 :
786 1 : if(rename(f_setup.get_filename().c_str(), backup_filename.c_str()) != 0)
787 : {
788 : f_errno = errno; // LCOV_EXCL_LINE
789 : return false; // LCOV_EXCL_LINE
790 : }
791 : }
792 :
793 : // save parameters to file
794 : //
795 2 : std::ofstream conf;
796 1 : conf.open(f_setup.get_filename().c_str());
797 1 : if(!conf.is_open())
798 : {
799 : f_errno = errno; // LCOV_EXCL_LINE
800 : return false; // LCOV_EXCL_LINE
801 : }
802 :
803 1 : time_t const now(time(nullptr));
804 : tm t;
805 1 : gmtime_r(&now, &t);
806 : char str_date[16];
807 1 : strftime(str_date, sizeof(str_date), "%Y/%m/%d", &t);
808 : char str_time[16];
809 1 : strftime(str_time, sizeof(str_time), "%H:%M:%S", &t);
810 :
811 : // header warning with date & time
812 : //
813 1 : conf << "# This file was auto-generated by snap_config.cpp on " << str_date << " at " << str_time << "." << std::endl
814 1 : << "# Making modifications here is likely safe unless the tool handling this" << std::endl
815 1 : << "# configuration file is actively working on it while you do the edits." << std::endl;
816 4 : for(auto p : f_parameters)
817 : {
818 3 : conf << p.first << "=";
819 :
820 : // prevent saving \r and \n characters as is when part of the
821 : // value; also double \ otherwise reading those back would fail
822 : //
823 6 : std::string value(p.second);
824 3 : boost::replace_all(value, "\\", "\\\\");
825 3 : boost::replace_all(value, "\r", "\\r");
826 3 : boost::replace_all(value, "\n", "\\n");
827 3 : boost::replace_all(value, "\t", "\\t");
828 3 : conf << value << std::endl;
829 :
830 3 : if(!conf)
831 : {
832 : return false; // LCOV_EXCL_LINE
833 : }
834 : }
835 :
836 : // it all worked, it's considered saved now
837 : //
838 1 : f_modified = false;
839 : }
840 :
841 2 : return true;
842 : }
843 :
844 :
845 : /** \brief Initialize and read a configuration file.
846 : *
847 : * This constructor initializes this conf_file object and then reads the
848 : * corresponding configuration file.
849 : *
850 : * Note that you have to use the create_conf_file() function for you
851 : * to be able to create a configuration file. It is done that way became
852 : * a file can be read only once. Once loaded, it gets cached until your
853 : * application quits.
854 : *
855 : * \param[in] filename The path and name of the configuration file to be read.
856 : * \param[in] line_continuation How lines end in this file.
857 : * \param[in] assignment_operator What appears between the name and value.
858 : * \param[in] comment The supported comment introducer(s).
859 : */
860 177 : conf_file::conf_file(conf_file_setup const & setup)
861 177 : : f_setup(setup)
862 : {
863 177 : read_configuration();
864 177 : }
865 :
866 :
867 : /** \brief Get the configuration file setup.
868 : *
869 : * This function returns a copy of the setup used to load this
870 : * configuration file.
871 : *
872 : * \note
873 : * This function has no mutex protection because the setup can't
874 : * change so there is no multi-thread protection necessary (the
875 : * fact that you hold a shared pointer to the conf_file object
876 : * is enough protection in this case.)
877 : *
878 : * \return A reference to this configuration file setup.
879 : */
880 5403 : conf_file_setup const & conf_file::get_setup() const
881 : {
882 5403 : return f_setup;
883 : }
884 :
885 :
886 : /** \brief Set a callback to detect when changes happen.
887 : *
888 : * This function is used to attach a callback to this file. This is
889 : * useful if you'd like to know when a change happen to a parameter
890 : * in this configuration file.
891 : *
892 : * The callback gets called when:
893 : *
894 : * \li The set_parameter() is called and the parameter gets created.
895 : * \li The set_parameter() is called and the parameter gets updated.
896 : * \li The erase_parameter() is called and the parameter gets erased.
897 : *
898 : * You can cancel your callback by calling this function again without
899 : * a target (i.e. `cf->set_callback(callback_t());`).
900 : *
901 : * To attach another object to your callback, you can either create
902 : * a callback which is attached to your object and a function
903 : * member or use std::bind() to attach the object to the function
904 : * call.
905 : *
906 : * \param[in] callback The new callback std::function.
907 : */
908 1 : void conf_file::set_callback(callback_t callback)
909 : {
910 1 : f_callback = callback;
911 1 : }
912 :
913 :
914 : /** \brief Get the error number opening/reading the configuration file.
915 : *
916 : * The class registers the errno value whenever an I/O error happens
917 : * while handling the configuration file. In most cases the function
918 : * is expected to return 0.
919 : *
920 : * The ENOENT error should not happen since the setup is going to be
921 : * marked as invalid when a configuration file does not exist and
922 : * you should not end up creation a conf_file object when that
923 : * happens. However, it is expected when you want to make some
924 : * changes to a few parameters and save them back to file (i.e.
925 : * the very first time there will be no file under the writable
926 : * configuration folder.)
927 : *
928 : * \return The last errno detected while accessing the configuration file.
929 : */
930 152 : int conf_file::get_errno() const
931 : {
932 304 : safe_lock lock(g_mutex);
933 :
934 304 : return f_errno;
935 : }
936 :
937 :
938 : /** \brief Get a list of sections.
939 : *
940 : * This function returns a copy of the list of sections defined in
941 : * this configuration file. In most cases, you should not need this
942 : * function since you are expected to know what parameters may be
943 : * defined. There are times though when it can be very practical.
944 : * For example, the options_config.cpp makes use of it since each
945 : * section is a parameter which we do not know the name of until
946 : * we have access to this array of sections.
947 : *
948 : * \note
949 : * We return a list because in a multithread environment another thread
950 : * may decide to make changes to the list of parameters which has the
951 : * side effect of eventually adding a section.
952 : *
953 : * \return A copy of the list of sections.
954 : */
955 156 : conf_file::sections_t conf_file::get_sections() const
956 : {
957 312 : safe_lock lock(g_mutex);
958 :
959 312 : return f_sections;
960 : }
961 :
962 :
963 : /** \brief Get a list of parameters.
964 : *
965 : * This function returns a copy of the list of parameters defined in
966 : * this configuration file.
967 : *
968 : * \note
969 : * We return a list because in a multithread environment another thread
970 : * may decide to make changes to the list of parameters (including
971 : * erasing a parameter.)
972 : *
973 : * \return A copy of the list of parameters.
974 : */
975 174 : conf_file::parameters_t conf_file::get_parameters() const
976 : {
977 348 : safe_lock lock(g_mutex);
978 :
979 348 : return f_parameters;
980 : }
981 :
982 :
983 : /** \brief Check whether a parameter is defined.
984 : *
985 : * This function checks for the existance of a parameter. It is a good
986 : * idea to first check for the existance of a parameter since the
987 : * get_parameter() function may otherwise return an empty string and
988 : * you cannot know whether that empty string means that the parameter
989 : * was not defined or it was set to the empty string.
990 : *
991 : * \param[in] name The name of the parameter to check.
992 : *
993 : * \return true if the parameter is defined, false otherwise.
994 : *
995 : * \sa get_parameter()
996 : * \sa set_parameter()
997 : */
998 622 : bool conf_file::has_parameter(std::string name) const
999 : {
1000 622 : std::replace(name.begin(), name.end(), '_', '-');
1001 :
1002 1244 : safe_lock lock(g_mutex);
1003 :
1004 622 : auto it(f_parameters.find(name));
1005 1244 : return it != f_parameters.end();
1006 : }
1007 :
1008 :
1009 : /** \brief Get the named parameter.
1010 : *
1011 : * This function searches for the specified parameter. If that parameter
1012 : * exists, then its value is returned. Note that the value of a parameter
1013 : * may be the empty string.
1014 : *
1015 : * If the parameter does not exist, the function returns the empty string.
1016 : * To distinguish between an undefined parameter and a parameter set to
1017 : * the empty string, use the has_parameter() function.
1018 : *
1019 : * \param[in] name The name of the parameter to retrieve.
1020 : *
1021 : * \return The current value of the parameter or an empty string.
1022 : *
1023 : * \sa has_parameter()
1024 : * \sa set_parameter()
1025 : */
1026 608 : std::string conf_file::get_parameter(std::string name) const
1027 : {
1028 608 : std::replace(name.begin(), name.end(), '_', '-');
1029 :
1030 1216 : safe_lock lock(g_mutex);
1031 :
1032 608 : auto it(f_parameters.find(name));
1033 608 : if(it != f_parameters.end())
1034 : {
1035 465 : return it->second;
1036 : }
1037 143 : return std::string();
1038 : }
1039 :
1040 :
1041 : /** \brief Set a parameter.
1042 : *
1043 : * This function sets a parameter to the specified value.
1044 : *
1045 : * The name of the value includes the \p section names and the \p name
1046 : * parameter concatenated with a C++ scopre operator (::) in between
1047 : * (unless \p section is the empty string in which case no scope operator
1048 : * gets added.)
1049 : *
1050 : * When the \p name parameter starts with a scope parameter, the \p section
1051 : * parameter is ignored. This allows one to ignore the current section
1052 : * (i.e. the last '[...]' or any '\<name> { ... }').
1053 : *
1054 : * The \p section parameter is a list of section names separated by
1055 : * the C++ scope operator (::).
1056 : *
1057 : * The \p name parameter may include C (.) and/or C++ (::) section
1058 : * separators when the configuration file supports those. Internally,
1059 : * those get moved to the \p section parameter. That allows us to
1060 : * verify that the number of sections is valid.
1061 : *
1062 : * This function may be called any number of time. The last value is
1063 : * the one kept. While reading the configuration file, though, a warning
1064 : * is generated when a parameter gets overwritten since this is often the
1065 : * source of a problem.
1066 : *
1067 : * In the following configuration file:
1068 : *
1069 : * \code
1070 : * var=name
1071 : * var=twice
1072 : * \endcode
1073 : *
1074 : * The variable named `var` will be set to `twice` on return and a warning
1075 : * will have been generated warning about the fact that the variable was
1076 : * modified while reading the configuration file.
1077 : *
1078 : * The full name of the parameter (i.e. section + name) cannot include any
1079 : * of the following characters:
1080 : *
1081 : * \li control characters (any character between 0x00 and 0x1F)
1082 : * \li a space (0x20)
1083 : * \li a backslash (`\`)
1084 : * \li quotation (`"` and `'`)
1085 : * \li comment (';', '#', '/')
1086 : * \li assignment ('=', ':', '?', '+')
1087 : *
1088 : * \note
1089 : * The \p section and \p name parameters have any underscore (`_`)
1090 : * replaced with dashes (`-`) before getting used. The very first
1091 : * character can be a dash. This allows you to therefore create
1092 : * parameters which cannot appear in a configuration file, an
1093 : * environment variable or on the command line (where parameter are
1094 : * not allowed to start with a dash.)
1095 : *
1096 : * \warning
1097 : * It is important to note that when a \p name includes a C++ scope
1098 : * operator, the final parameter name looks like it includes a section
1099 : * name (i.e. the name "a::b", when the C++ section flag is not set,
1100 : * is accepted as is; so the final parameter name is going to be "a::b"
1101 : * and therefore it will include what looks like a section name.)
1102 : * There should not be any concern about this small \em glitch though
1103 : * since you do not have to accept any such parameter.
1104 : *
1105 : * \param[in] section The list of section or an empty string.
1106 : * \param[in] name The name of the parameter.
1107 : * \param[in] value The value of the parameter.
1108 : */
1109 620 : bool conf_file::set_parameter(std::string section, std::string name, std::string const & value)
1110 : {
1111 : // use the tokenize_string() function because we do not want to support
1112 : // quoted strings in this list of sections which our split_string()
1113 : // does automatically
1114 : //
1115 1240 : string_list_t section_list;
1116 :
1117 620 : std::replace(section.begin(), section.end(), '_', '-');
1118 620 : std::replace(name.begin(), name.end(), '_', '-');
1119 :
1120 620 : char const * n(name.c_str());
1121 :
1122 : // global scope? if so ignore the section parameter
1123 : //
1124 1240 : if((f_setup.get_section_operator() & SECTION_OPERATOR_CPP) != 0
1125 32 : && n[0] == ':'
1126 622 : && n[1] == ':')
1127 : {
1128 2 : do
1129 : {
1130 4 : ++n;
1131 : }
1132 4 : while(*n == ':');
1133 : }
1134 : else
1135 : {
1136 1236 : snap::tokenize_string(section_list
1137 : , section
1138 : , "::"
1139 : , true
1140 : , std::string()
1141 618 : , &snap::string_predicate<string_list_t>);
1142 : }
1143 :
1144 620 : char const * s(n);
1145 7334 : while(*n != '\0')
1146 : {
1147 6718 : if((f_setup.get_section_operator() & SECTION_OPERATOR_C) != 0
1148 3359 : && *n == '.')
1149 : {
1150 32 : if(s == n)
1151 : {
1152 2 : log << log_level_t::error
1153 1 : << "option name \""
1154 1 : << name
1155 1 : << "\" cannot start with a period (.)."
1156 1 : << end;
1157 1 : return false;
1158 : }
1159 31 : section_list.push_back(std::string(s, n - s));
1160 8 : do
1161 : {
1162 39 : ++n;
1163 : }
1164 39 : while(*n == '.');
1165 31 : s = n;
1166 : }
1167 6654 : else if((f_setup.get_section_operator() & SECTION_OPERATOR_CPP) != 0
1168 66 : && n[0] == ':'
1169 3339 : && n[1] == ':')
1170 : {
1171 12 : if(s == n)
1172 : {
1173 2 : log << log_level_t::error
1174 1 : << "option name \""
1175 1 : << name
1176 1 : << "\" cannot start with a scope operator (::)."
1177 1 : << end;
1178 1 : return false;
1179 : }
1180 11 : section_list.push_back(std::string(s, n - s));
1181 11 : do
1182 : {
1183 22 : ++n;
1184 : }
1185 22 : while(*n == ':');
1186 11 : s = n;
1187 : }
1188 : else
1189 : {
1190 3315 : ++n;
1191 : }
1192 : }
1193 618 : if(s == n)
1194 : {
1195 4 : log << log_level_t::error
1196 2 : << "option name \""
1197 2 : << name
1198 2 : << "\" cannot end with a section operator or be empty."
1199 2 : << end;
1200 2 : return false;
1201 : }
1202 1232 : std::string param_name(s, n - s);
1203 :
1204 1232 : std::string const section_name(boost::algorithm::join(section_list, "::"));
1205 :
1206 1232 : if(f_setup.get_section_operator() == SECTION_OPERATOR_NONE
1207 616 : && !section_list.empty())
1208 : {
1209 2 : log << log_level_t::error
1210 1 : << "option name \""
1211 1 : << name
1212 1 : << "\" cannot be added to section \""
1213 1 : << section_name
1214 1 : << "\" because there is no section support for this configuration file."
1215 1 : << end;
1216 1 : return false;
1217 : }
1218 1230 : if((f_setup.get_section_operator() & SECTION_OPERATOR_ONE_SECTION) != 0
1219 615 : && section_list.size() > 1)
1220 : {
1221 10 : log << log_level_t::error
1222 5 : << "option name \""
1223 5 : << name
1224 5 : << "\" cannot be added to section \""
1225 5 : << section_name
1226 5 : << "\" because this configuration only accepts one section level."
1227 5 : << end;
1228 5 : return false;
1229 : }
1230 :
1231 610 : section_list.push_back(param_name);
1232 1220 : std::string const full_name(boost::algorithm::join(section_list, "::"));
1233 :
1234 : // verify that each section name only includes characters we accept
1235 : // for a parameter name
1236 : //
1237 : // WARNING: we do not test with full_name because it includes ':'
1238 : //
1239 1251 : for(auto sn : section_list)
1240 : {
1241 4002 : for(char const * f(sn.c_str()); *f != '\0'; ++f)
1242 : {
1243 3361 : switch(*f)
1244 : {
1245 : case '\001': // forbid controls
1246 : case '\002':
1247 : case '\003':
1248 : case '\004':
1249 : case '\005':
1250 : case '\006':
1251 : case '\007':
1252 : case '\010':
1253 : case '\011':
1254 : case '\012':
1255 : case '\013':
1256 : case '\014':
1257 : case '\015':
1258 : case '\016':
1259 : case '\017':
1260 : case '\020':
1261 : case '\021':
1262 : case '\022':
1263 : case '\023':
1264 : case '\024':
1265 : case '\025':
1266 : case '\026':
1267 : case '\027':
1268 : case '\030':
1269 : case '\031':
1270 : case '\032':
1271 : case '\033':
1272 : case '\034':
1273 : case '\035':
1274 : case '\036':
1275 : case '\037':
1276 : case ' ': // forbid spaces
1277 : case '\'': // forbid all quotes
1278 : case '"': // forbid all quotes
1279 : case ';': // forbid all comment operators
1280 : case '#': // forbid all comment operators
1281 : case '/': // forbid all comment operators
1282 : case '=': // forbid all assignment operators
1283 : case ':': // forbid all assignment operators
1284 : case '?': // forbid all assignment operators (for later)
1285 : case '+': // forbid all assignment operators (for later)
1286 : case '\\': // forbid backslashes
1287 218 : log << log_level_t::error
1288 109 : << "parameter \""
1289 109 : << full_name
1290 109 : << "\" on line "
1291 218 : << f_line
1292 109 : << " in configuration file \""
1293 218 : << f_setup.get_filename()
1294 109 : << "\" includes a character not acceptable for a section or parameter name (controls, space, quotes, and \";#/=:?+\\\"."
1295 109 : << end;
1296 109 : return false;
1297 :
1298 : }
1299 : }
1300 : }
1301 :
1302 1002 : safe_lock lock(g_mutex);
1303 :
1304 : // add the section to the list of sections
1305 : //
1306 : // TODO: should we have a list of all the parent sections? Someone can
1307 : // write "a::b::c::d = 123" and we currently only get section
1308 : // "a::b::c", no section "a" and no section "a::b".
1309 : //
1310 501 : if(!section_name.empty())
1311 : {
1312 122 : f_sections.insert(section_name);
1313 : }
1314 :
1315 501 : callback_action_t action(callback_action_t::created);
1316 501 : auto it(f_parameters.find(full_name));
1317 501 : if(it == f_parameters.end())
1318 : {
1319 493 : f_parameters[full_name] = value;
1320 : }
1321 : else
1322 : {
1323 8 : if(f_reading)
1324 : {
1325 : // this is just a warning; it can be neat to know about such
1326 : // problems and fix them early
1327 : //
1328 4 : log << log_level_t::warning
1329 2 : << "parameter \""
1330 2 : << full_name
1331 2 : << "\" on line "
1332 4 : << f_line
1333 2 : << " in configuration file \""
1334 4 : << f_setup.get_filename()
1335 2 : << "\" was found twice in the same configuration file."
1336 2 : << end;
1337 : }
1338 :
1339 8 : it->second = value;
1340 :
1341 8 : action = callback_action_t::updated;
1342 : }
1343 :
1344 501 : if(!f_reading)
1345 : {
1346 8 : f_modified = true;
1347 :
1348 8 : if(f_callback)
1349 : {
1350 4 : f_callback(shared_from_this(), action, full_name, value);
1351 : }
1352 : }
1353 :
1354 501 : return true;
1355 : }
1356 :
1357 :
1358 : /** \brief Erase the named parameter from this configuration file.
1359 : *
1360 : * This function can be used to remove the specified parameter from
1361 : * this configuration file.
1362 : *
1363 : * If that parameter is not defined in the file, then nothing happens.
1364 : *
1365 : * \param[in] name The name of the parameter to remove.
1366 : *
1367 : * \return true if the parameter was removed, false if it did not exist.
1368 : */
1369 2 : bool conf_file::erase_parameter(std::string name)
1370 : {
1371 2 : std::replace(name.begin(), name.end(), '_', '-');
1372 :
1373 2 : auto it(f_parameters.find(name));
1374 2 : if(it == f_parameters.end())
1375 : {
1376 1 : return false;
1377 : }
1378 :
1379 1 : f_parameters.erase(it);
1380 :
1381 1 : if(!f_reading)
1382 : {
1383 1 : f_modified = true;
1384 :
1385 1 : if(f_callback)
1386 : {
1387 1 : f_callback(shared_from_this(), callback_action_t::erased, name, std::string());
1388 : }
1389 : }
1390 :
1391 1 : return true;
1392 : }
1393 :
1394 :
1395 : /** \brief Check whether this configuration file was modified.
1396 : *
1397 : * This function returns the value of the f_modified flag which is true
1398 : * if any value was createed, updated, or erased from the configuration
1399 : * file since after it was loaded.
1400 : *
1401 : * This tells you whether you should call the save() function, assuming
1402 : * you want to keep such changes.
1403 : *
1404 : * \return true if changes were made to this file parameters.
1405 : */
1406 7 : bool conf_file::was_modified() const
1407 : {
1408 7 : return f_modified;
1409 : }
1410 :
1411 :
1412 : /** \brief Read one characte from the input stream.
1413 : *
1414 : * This function reads one character from the input stream and returns it
1415 : * as an `int`.
1416 : *
1417 : * If there is an ungotten character (i.e. ungetc() was called) then that
1418 : * character is returned.
1419 : *
1420 : * When the end of the file is reached, this function returns -1.
1421 : *
1422 : * \note
1423 : * This function is oblivious of UTF-8. It should not matter since any
1424 : * Unicode character would anyway be treated as is.
1425 : *
1426 : * \param[in,out] in The input stream.
1427 : *
1428 : * \return The character read or -1 when EOF is reached.
1429 : */
1430 11612 : int conf_file::getc(std::ifstream & in)
1431 : {
1432 11612 : if(f_unget_char != '\0')
1433 : {
1434 34 : int const r(f_unget_char);
1435 34 : f_unget_char = '\0';
1436 34 : return r;
1437 : }
1438 :
1439 : char c;
1440 11578 : in.get(c);
1441 :
1442 11578 : if(in.eof())
1443 : {
1444 173 : return EOF;
1445 : }
1446 :
1447 11405 : return static_cast<std::uint8_t>(c);
1448 : }
1449 :
1450 :
1451 : /** \brief Restore one character.
1452 : *
1453 : * This function is used whenever we read one additional character to
1454 : * know whether a certain character followed another. For example, we
1455 : * check for a `'\\n'` whenever we find a `'\\r'`. However, if the
1456 : * character right after the `'\\r'` is not a `'\\n'` we call this
1457 : * ungetc() function so next time we can re-read that same character.
1458 : *
1459 : * \note
1460 : * You can call ungetc() only once between calls to getc(). The
1461 : * current buffer is just one single character. Right now our
1462 : * parser doesn't need more than that.
1463 : *
1464 : * \param[in] c The character to restore.
1465 : */
1466 34 : void conf_file::ungetc(int c)
1467 : {
1468 34 : if(f_unget_char != '\0')
1469 : {
1470 : throw getopt_exception_logic("conf_file::ungetc() called when the f_unget_char variable member is not '\\0'."); // LCOV_EXCL_LINE
1471 : }
1472 34 : f_unget_char = c;
1473 34 : }
1474 :
1475 :
1476 : /** \brief Get one line.
1477 : *
1478 : * This function reads one line. The function takes the line continuation
1479 : * setup in account. So for example a line that ends with a backslash
1480 : * continues on the next line when the line continuation is setup to Unix.
1481 : *
1482 : * Note that by default comments are also continued. So a backslash in
1483 : * Unix mode continues a comment on the next line.
1484 : *
1485 : * There is a special case with the semicolon continuation setup. When
1486 : * the line starts as a comment, it will end on the first standalone
1487 : * newline (i.e. a comment does not need to end with a semi-colon.)
1488 : *
1489 : * \param[in,out] in The input stream.
1490 : * \param[out] line Where the line gets saved.
1491 : *
1492 : * \return true if a line was read, false on EOF.
1493 : */
1494 938 : bool conf_file::get_line(std::ifstream & in, std::string & line)
1495 : {
1496 938 : line.clear();
1497 :
1498 10588 : for(;;)
1499 : {
1500 11526 : int c(getc(in));
1501 11526 : if(c == EOF)
1502 : {
1503 172 : return false;
1504 : }
1505 11354 : if(c == ';'
1506 11354 : && f_setup.get_line_continuation() == line_continuation_t::semicolon)
1507 : {
1508 1 : return true;
1509 : }
1510 :
1511 11415 : while(c == '\n' || c == '\r')
1512 : {
1513 796 : if(c == '\r')
1514 : {
1515 19 : c = getc(in);
1516 19 : if(c != '\n')
1517 : {
1518 3 : ungetc(c);
1519 : }
1520 19 : c = '\n';
1521 : }
1522 :
1523 796 : ++f_line;
1524 796 : switch(f_setup.get_line_continuation())
1525 : {
1526 : case line_continuation_t::single_line:
1527 : // continuation support
1528 76 : return true;
1529 :
1530 : case line_continuation_t::rfc_822:
1531 17 : c = getc(in);
1532 17 : if(!iswspace(c))
1533 : {
1534 15 : ungetc(c);
1535 15 : return true;
1536 : }
1537 4 : do
1538 : {
1539 4 : c = getc(in);
1540 : }
1541 : while(iswspace(c));
1542 2 : break;
1543 :
1544 : case line_continuation_t::msdos:
1545 34 : if(line.empty()
1546 17 : || line.back() != '&')
1547 : {
1548 16 : return true;
1549 : }
1550 1 : line.pop_back();
1551 1 : c = getc(in);
1552 1 : break;
1553 :
1554 : case line_continuation_t::unix:
1555 1304 : if(line.empty()
1556 652 : || line.back() != '\\')
1557 : {
1558 641 : return true;
1559 : }
1560 11 : line.pop_back();
1561 11 : c = getc(in);
1562 11 : break;
1563 :
1564 : case line_continuation_t::fortran:
1565 17 : c = getc(in);
1566 17 : if(c != '&')
1567 : {
1568 16 : ungetc(c);
1569 16 : return true;
1570 : }
1571 1 : c = getc(in);
1572 1 : break;
1573 :
1574 : case line_continuation_t::semicolon:
1575 : // if we have a comment, we want to return immediately;
1576 : // at this time, the comments are not multi-line so
1577 : // the call can return true only if we were reading the
1578 : // very first line
1579 : //
1580 17 : if(is_comment(line.c_str()))
1581 : {
1582 1 : return true;
1583 : }
1584 : // the semicolon is checked earlier, just keep the newline
1585 : // in this case
1586 : //
1587 16 : line += c;
1588 16 : c = getc(in);
1589 16 : break;
1590 :
1591 : }
1592 : }
1593 :
1594 10588 : line += c;
1595 : }
1596 : }
1597 :
1598 :
1599 : /** \brief Read a configuration file.
1600 : *
1601 : * This function reads a configuration file and saves all the parameters it
1602 : * finds in a map which can later be checked against an option table for
1603 : * validation.
1604 : *
1605 : * \todo
1606 : * Add support for quotes in configuration files as parameters are otherwise
1607 : * saved as a separated list of parameters losing the number of spaces between
1608 : * each entry.
1609 : */
1610 177 : void conf_file::read_configuration()
1611 : {
1612 349 : snap::safe_variable<decltype(f_reading)> safe_reading(f_reading, true);
1613 :
1614 349 : std::ifstream conf(f_setup.get_filename());
1615 177 : if(!conf)
1616 : {
1617 5 : f_errno = errno;
1618 5 : return;
1619 : }
1620 :
1621 344 : std::string current_section;
1622 344 : std::vector<std::string> sections;
1623 344 : std::string str;
1624 172 : f_line = 0;
1625 1704 : while(get_line(conf, str))
1626 : {
1627 766 : char const * s(str.c_str());
1628 862 : while(iswspace(*s))
1629 : {
1630 48 : ++s;
1631 : }
1632 1532 : if(*s == '\0'
1633 766 : || is_comment(s))
1634 : {
1635 : // skip empty lines and comments
1636 235 : continue;
1637 : }
1638 1310 : if((f_setup.get_section_operator() & SECTION_OPERATOR_BLOCK) != 0
1639 655 : && *s == '}')
1640 : {
1641 5 : current_section = sections.back();
1642 5 : sections.pop_back();
1643 5 : continue;
1644 : }
1645 650 : char const * str_name(s);
1646 650 : char const * e(nullptr);
1647 11914 : while(!is_assignment_operator(*s)
1648 3605 : && ((f_setup.get_section_operator() & SECTION_OPERATOR_BLOCK) == 0 || (*s != '{' && *s != '}'))
1649 3605 : && ((f_setup.get_section_operator() & SECTION_OPERATOR_INI_FILE) == 0 || *s != ']')
1650 3578 : && *s != '\0'
1651 7751 : && !iswspace(*s))
1652 : {
1653 3538 : ++s;
1654 : }
1655 650 : if(iswspace(*s))
1656 : {
1657 28 : e = s;
1658 164 : while(iswspace(*s))
1659 : {
1660 68 : ++s;
1661 : }
1662 56 : if(*s != '\0'
1663 28 : && !is_assignment_operator(*s)
1664 12 : && (f_setup.get_assignment_operator() & ASSIGNMENT_OPERATOR_SPACE) == 0
1665 37 : && ((f_setup.get_section_operator() & SECTION_OPERATOR_BLOCK) == 0 || (*s != '{' && *s != '}')))
1666 : {
1667 6 : log << log_level_t::error
1668 3 : << "option name from \""
1669 3 : << str
1670 3 : << "\" on line "
1671 6 : << f_line
1672 3 : << " in configuration file \""
1673 6 : << f_setup.get_filename()
1674 3 : << "\" cannot include a space, missing assignment operator?"
1675 3 : << end;
1676 3 : continue;
1677 : }
1678 : }
1679 647 : if(e == nullptr)
1680 : {
1681 622 : e = s;
1682 : }
1683 647 : if(e - str_name == 0)
1684 : {
1685 2 : log << log_level_t::error
1686 1 : << "no option name in \""
1687 1 : << str
1688 1 : << "\" on line "
1689 2 : << f_line
1690 1 : << " from configuration file \""
1691 2 : << f_setup.get_filename()
1692 1 : << "\", missing name before the assignment operator?"
1693 1 : << end;
1694 1 : continue;
1695 : }
1696 1288 : std::string name(str_name, e - str_name);
1697 646 : std::replace(name.begin(), name.end(), '_', '-');
1698 646 : if(name[0] == '-')
1699 : {
1700 4 : log << log_level_t::error
1701 2 : << "option names in configuration files cannot start with a dash or an underscore in \""
1702 2 : << str
1703 2 : << "\" on line "
1704 4 : << f_line
1705 2 : << " from configuration file \""
1706 4 : << f_setup.get_filename()
1707 2 : << "\"."
1708 2 : << end;
1709 2 : continue;
1710 : }
1711 1288 : if((f_setup.get_section_operator() & SECTION_OPERATOR_INI_FILE) != 0
1712 155 : && name.length() >= 1
1713 155 : && name[0] == '['
1714 671 : && *s == ']')
1715 : {
1716 27 : ++s;
1717 27 : if(!sections.empty())
1718 : {
1719 2 : log << log_level_t::error
1720 1 : << "`[...]` sections can't be used within a `section { ... }` on line "
1721 2 : << f_line
1722 1 : << " from configuration file \""
1723 2 : << f_setup.get_filename()
1724 1 : << "\"."
1725 1 : << end;
1726 1 : continue;
1727 : }
1728 30 : while(iswspace(*s))
1729 : {
1730 2 : ++s;
1731 : }
1732 52 : if(*s != '\0'
1733 26 : && !is_comment(s))
1734 : {
1735 2 : log << log_level_t::error
1736 1 : << "section names in configuration files cannot be followed by anything other than spaces in \""
1737 1 : << str
1738 1 : << "\" on line "
1739 2 : << f_line
1740 1 : << " from configuration file \""
1741 2 : << f_setup.get_filename()
1742 1 : << "\"."
1743 1 : << end;
1744 1 : continue;
1745 : }
1746 25 : if(name.length() == 1)
1747 : {
1748 : // "[]" removes the section
1749 : //
1750 1 : current_section.clear();
1751 : }
1752 : else
1753 : {
1754 24 : current_section = name.substr(1);
1755 24 : current_section += "::";
1756 : }
1757 : }
1758 1234 : else if((f_setup.get_section_operator() & SECTION_OPERATOR_BLOCK) != 0
1759 617 : && *s == '{')
1760 : {
1761 6 : sections.push_back(current_section);
1762 6 : current_section += name;
1763 6 : current_section += "::";
1764 : }
1765 : else
1766 : {
1767 611 : if(is_assignment_operator(*s))
1768 : {
1769 593 : ++s;
1770 : }
1771 645 : while(iswspace(*s))
1772 : {
1773 17 : ++s;
1774 : }
1775 625 : for(e = str.c_str() + str.length(); e > s; --e)
1776 : {
1777 609 : if(!iswspace(e[-1]))
1778 : {
1779 595 : break;
1780 : }
1781 : }
1782 611 : size_t const len(e - s);
1783 1222 : std::string value(s, len);
1784 611 : boost::replace_all(value, "\\\\", "\\");
1785 611 : boost::replace_all(value, "\\r", "\r");
1786 611 : boost::replace_all(value, "\\n", "\n");
1787 611 : boost::replace_all(value, "\\t", "\t");
1788 611 : set_parameter(current_section, name, value);
1789 : }
1790 : }
1791 172 : if(!sections.empty())
1792 : {
1793 2 : log << log_level_t::error
1794 1 : << "unterminated `section { ... }`, the `}` is missing in configuration file \""
1795 2 : << f_setup.get_filename()
1796 1 : << "\"."
1797 1 : << end;
1798 : }
1799 : }
1800 :
1801 :
1802 : /** \brief Check whether `c` is an assignment operator.
1803 : *
1804 : * This function checks the \p c parameter to know whether it matches
1805 : * one of the character allowed as an assignment character.
1806 : *
1807 : * \param[in] c The character to be checked.
1808 : *
1809 : * \return true if c is considered to represent an assignment character.
1810 : */
1811 1118939 : bool conf_file::is_assignment_operator(int c) const
1812 : {
1813 1118939 : assignment_operator_t const assignment_operator(f_setup.get_assignment_operator());
1814 2237745 : return ((assignment_operator & ASSIGNMENT_OPERATOR_EQUAL) != 0 && c == '=')
1815 1117771 : || ((assignment_operator & ASSIGNMENT_OPERATOR_COLON) != 0 && c == ':')
1816 2236688 : || ((assignment_operator & ASSIGNMENT_OPERATOR_SPACE) != 0 && std::iswspace(c));
1817 : }
1818 :
1819 :
1820 : /** \brief Check whether the string starts with a comment introducer.
1821 : *
1822 : * This function checks whether the \p s string starts with a comment.
1823 : *
1824 : * We support different types of comment introducers. This function
1825 : * checks the flags as defined in the constructor and returns true
1826 : * if the type of character introducer defines a comment.
1827 : *
1828 : * We currently support:
1829 : *
1830 : * \li .ini file comments, introduced by a semi-colon (;)
1831 : *
1832 : * \li Shell file comments, introduced by a hash character (#)
1833 : *
1834 : * \li C++ comment, introduced by two slashes (//)
1835 : *
1836 : * \param[in] s The string to check for a comment.
1837 : *
1838 : * \return `true` if the string represents a comment.
1839 : */
1840 751 : bool conf_file::is_comment(char const * s) const
1841 : {
1842 751 : comment_t const comment(f_setup.get_comment());
1843 751 : if((comment & COMMENT_INI) != 0
1844 157 : && *s == ';')
1845 : {
1846 5 : return true;
1847 : }
1848 :
1849 746 : if((comment & COMMENT_SHELL) != 0
1850 415 : && *s == '#')
1851 : {
1852 69 : return true;
1853 : }
1854 :
1855 677 : if((comment & COMMENT_CPP) != 0
1856 10 : && s[0] == '/'
1857 5 : && s[1] == '/')
1858 : {
1859 5 : return true;
1860 : }
1861 :
1862 672 : return false;
1863 : }
1864 :
1865 :
1866 : /** \brief Returns true if \p c is considered to be a whitespace.
1867 : *
1868 : * Our iswspace() function is equivalent to the std::iswspace() function
1869 : * except that `'\\r'` and `'\\n'` are never viewed as white spaces.
1870 : *
1871 : * \return true if c is considered to be a white space character.
1872 : */
1873 1120521 : bool iswspace(int c)
1874 : {
1875 : return c != '\n'
1876 1120515 : && c != '\r'
1877 2241035 : && std::iswspace(c);
1878 : }
1879 :
1880 :
1881 6 : } // namespace advgetopt
1882 : // vim: ts=4 sw=4 et
|