Line data Source code
1 : /*
2 : * License:
3 : * Copyright (c) 2006-2019 Made to Order Software Corp. All Rights Reserved
4 : *
5 : * https://snapwebsites.org/
6 : * contact@m2osw.com
7 : *
8 : * This program is free software; you can redistribute it and/or modify
9 : * it under the terms of the GNU General Public License as published by
10 : * the Free Software Foundation; either version 2 of the License, or
11 : * (at your option) any later version.
12 : *
13 : * This program is distributed in the hope that it will be useful,
14 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : * GNU General Public License for more details.
17 : *
18 : * You should have received a copy of the GNU General Public License along
19 : * with this program; if not, write to the Free Software Foundation, Inc.,
20 : * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 : *
22 : * Authors:
23 : * Alexis Wilke alexis@m2osw.com
24 : * Doug Barbieri doug@m2osw.com
25 : */
26 :
27 :
28 : /** \file
29 : * \brief Implementation of the option_info class.
30 : *
31 : * This is the implementation of the class used to load and save
32 : * configuration files.
33 : */
34 :
35 : // self
36 : //
37 : #include "advgetopt/conf_file.h"
38 :
39 :
40 : // advgetopt lib
41 : //
42 : #include "advgetopt/exception.h"
43 : #include "advgetopt/log.h"
44 : #include "advgetopt/utils.h"
45 :
46 :
47 : // snapdev lib
48 : //
49 : #include <snapdev/safe_variable.h>
50 : #include <snapdev/tokenize_string.h>
51 :
52 :
53 : // boost lib
54 : //
55 : #include <boost/algorithm/string/join.hpp>
56 : #include <boost/algorithm/string/replace.hpp>
57 :
58 : // C++ lib
59 : //
60 : #include <algorithm>
61 : #include <fstream>
62 :
63 :
64 : // last include
65 : //
66 : #include <snapdev/poison.h>
67 :
68 :
69 :
70 : namespace advgetopt
71 : {
72 :
73 :
74 :
75 : /** \brief Private conf_file data.
76 : *
77 : * The conf_file has a few globals used to cache configuration files.
78 : * Since it has to work in a multi-thread environment, we also have
79 : * a mutex.
80 : */
81 : namespace
82 : {
83 :
84 :
85 :
86 : /** \brief A map of configuration files.
87 : *
88 : * This typedef defines a type used to hold all the configuration files
89 : * that were loaded so far.
90 : *
91 : * The map is indexed by a string representing the full path to the
92 : * configuration file.
93 : *
94 : * The value is a shared pointer to configuration file. Since we may
95 : * share that data between multiple users, it made sense to force you
96 : * to use a configuration file smart pointer. Note, though, that we
97 : * never destroy the pointer until we quit (i.e. you cannot force a
98 : * re-load of the configuration file. Changes that happen in memory
99 : * are visible to all users, but changes to the actual configuration
100 : * file are complete invisible to use.)
101 : */
102 : typedef std::map<std::string, conf_file::pointer_t> conf_file_map_t;
103 :
104 :
105 : /** \brief The configuration files.
106 : *
107 : * This global defines a list of configuration files indexed by
108 : * filename (full path, but not the URL, just a path.)
109 : *
110 : * Whenever a configuration file is being retrieved with the
111 : * conf_file::get_conf_file() function, it is first searched
112 : * in this map. If it exists in the map, that version gets
113 : * used (if the URL of the two setups match one to one.)
114 : * If there is no such file in the map, then a new one is
115 : * created by loading the corresponding file.
116 : */
117 2 : conf_file_map_t g_conf_files = conf_file_map_t();
118 :
119 :
120 : class conf_mutex
121 : {
122 : public:
123 : /** \brief A mutex to protect configuration calls.
124 : *
125 : * Dealing with configuration files may happen in a multi-threaded
126 : * environment. In that case we have to protect many function calls
127 : * which access the data because that can change over time.
128 : *
129 : * \note
130 : * The getopt object is already managed on its own:
131 : * it reads the parameters on load and then offer constant access
132 : * to all of what was loaded, found in an environment variable, or
133 : * was handled by parsing the command line arguments.
134 : */
135 2 : conf_mutex()
136 2 : {
137 : pthread_mutexattr_t mattr;
138 2 : int err(pthread_mutexattr_init(&mattr));
139 2 : if(err != 0)
140 : {
141 : throw getopt_exception_initialization("pthread_muteattr_init() failed"); // LCOV_EXCL_LINE
142 : }
143 2 : err = pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
144 2 : if(err != 0)
145 : {
146 : pthread_mutexattr_destroy(&mattr); // LCOV_EXCL_LINE
147 : throw getopt_exception_initialization("pthread_muteattr_settype() failed"); // LCOV_EXCL_LINE
148 : }
149 2 : err = pthread_mutex_init(&f_mutex, &mattr);
150 2 : if(err != 0)
151 : {
152 : pthread_mutexattr_destroy(&mattr); // LCOV_EXCL_LINE
153 : throw getopt_exception_initialization("pthread_mutex_init() failed"); // LCOV_EXCL_LINE
154 : }
155 2 : err = pthread_mutexattr_destroy(&mattr);
156 2 : if(err != 0)
157 : {
158 : throw getopt_exception_initialization("pthread_mutexattr_destroy() failed"); // LCOV_EXCL_LINE
159 : }
160 2 : }
161 :
162 : /** \brief Clean up the pthread mutex object.
163 : *
164 : * This function performs the necessary clean up of the pthread mutex.
165 : *
166 : * The constructor will have initialized a valid \c f_mutex. That
167 : * variable member must be de-initialized before we release this
168 : * object.
169 : */
170 2 : ~conf_mutex()
171 2 : {
172 2 : pthread_mutex_destroy(&f_mutex);
173 2 : }
174 :
175 : /* \brief Lock this mutex.
176 : *
177 : * This function locks the mutex.
178 : *
179 : * The lock() and unlock() functions should not be called directly.
180 : * Instead you should use the safe_lock object which will make sure
181 : * that the two functions get called in pairs as expected (i.e. for
182 : * each call to the lock() a corresponding call to unlock() will
183 : * automatically happen.)
184 : */
185 5043 : void lock()
186 : {
187 5043 : int const err(pthread_mutex_lock(&f_mutex));
188 5043 : if(err != 0)
189 : {
190 : throw getopt_exception_invalid("pthread_mutex_lock() failed"); // LCOV_EXCL_LINE
191 : }
192 5043 : }
193 :
194 : /** \brief Unlock this mutex.
195 : *
196 : * This function unlocks the mutex.
197 : *
198 : * This is \em rarely used to unlock the mutex early.
199 : *
200 : * \warning
201 : * Since a mutex can be locked multiple times (recursively), there is
202 : * no protection to know whether it is still locked or not.
203 : */
204 5043 : void unlock()
205 : {
206 5043 : int const err(pthread_mutex_unlock(&f_mutex));
207 5043 : if(err != 0)
208 : {
209 : throw getopt_exception_invalid("pthread_mutex_unlock() failed"); // LCOV_EXCL_LINE
210 : }
211 5043 : }
212 :
213 : private:
214 : /** \brief The Linux mutex.
215 : *
216 : * This definition is the base Linux mutex as defined by the pthread
217 : * implementation under Linux.
218 : *
219 : * It gets initialized on construction. If the initialization fails,
220 : * the constructor function throws so it is always defined when the
221 : * object was successfully created.
222 : */
223 : pthread_mutex_t f_mutex = pthread_mutex_t();
224 : };
225 :
226 :
227 : /** \brief The configuration file mutex.
228 : *
229 : * This options are generally viewed as read-only global variables. They
230 : * get setup once early on and then used and reused as many times as
231 : * required.
232 : *
233 : * This mutex makes sure that access between multiple thread happens in
234 : * a safe manner.
235 : */
236 2 : conf_mutex g_mutex;
237 :
238 :
239 :
240 : /** \brief Safely lock/unlock a mutex.
241 : *
242 : * This function allows for locking and unlocking a mutex in a safe
243 : * manner which means that it will always get unlocked when you exit
244 : * a context, whether you exit with a return, break, continue or
245 : * an exception.
246 : *
247 : * The constructor locks the mutex.
248 : *
249 : * The destructor unlocks the mutex.
250 : *
251 : * When necessary we create a sub-block to make sure that that
252 : * the mutex gets released as soon as possible.
253 : */
254 : class safe_lock
255 : {
256 : public:
257 : /** \brief Lock the mutex.
258 : *
259 : * The constructor takes a reference to a mutex as input. It saves
260 : * that reference and then calls the lock() function on that object.
261 : *
262 : * \param[in] m The mutex to lock and unlock.
263 : */
264 5043 : safe_lock(conf_mutex & m)
265 5043 : : f_mutex(m)
266 : {
267 5043 : f_mutex.lock();
268 5043 : }
269 :
270 : /** \brief Unlock the mutex.
271 : *
272 : * Whenever we reach the end of the context, unlock the mutex.
273 : * This function always calls the unlock and it will happen
274 : * even on exceptions or some other early returning within a
275 : * function.
276 : */
277 5043 : ~safe_lock()
278 5043 : {
279 5043 : f_mutex.unlock();
280 5043 : }
281 :
282 : private:
283 : /** \brief The mutex to loack and unlock.
284 : *
285 : * This variable member holds a reference to the mutex that we
286 : * want to lock on construction and unlock on destruction.
287 : */
288 : conf_mutex & f_mutex;
289 : };
290 :
291 :
292 :
293 : } // no name namespace
294 :
295 :
296 :
297 :
298 :
299 : /** \brief Initialize the file setup object.
300 : *
301 : * This constructor initializes the setup object which can later be used
302 : * to search for an existing conf_file or creating a new conf_file.
303 : *
304 : * The setup holds the various parameters used to know how to load a
305 : * configuration file in memory. The parameters include
306 : *
307 : * \li \p filename -- the name of the file to read as a configuration file.
308 : * \li \p line_continuation -- how lines in the files are being read; in
309 : * most cases a line in a text file ends when a newline character (`\\n`)
310 : * is found; this parameter allows for lines that span (continue) on
311 : * multiple text lines. Only one type of continuation or no continue
312 : * (a.k.a. "single line") can be used per file.
313 : * \li \p assignment_operator -- the character(s) accepted between the
314 : * name of a variable and its value; by default this is the equal sign
315 : * (`=`). Multiple operators can be accepted.
316 : * \li \p comment -- how comments are introduced when supported. Multiple
317 : * introducers can be accepted within one file. By default we accept the
318 : * Unix Shell (`#`) and INI file (`;`) comment introducers.
319 : * \li \p section_operator -- the set of characters accepted as section
320 : * separator. By default we accept the INI file syntax (the `[section]`
321 : * syntax.)
322 : *
323 : * \param[in] filename A valid filename.
324 : * \param[in] line_continue One of the line_continuation_t values.
325 : * \param[in] assignment_operator A set of assignment operator flags.
326 : * \param[in] comment A set of comment flags.
327 : * \param[in] section_operator A set of section operator flags.
328 : */
329 28483 : conf_file_setup::conf_file_setup(
330 : std::string const & filename
331 : , line_continuation_t line_continuation
332 : , assignment_operator_t assignment_operator
333 : , comment_t comment
334 : , section_operator_t section_operator)
335 : : f_line_continuation(line_continuation)
336 28483 : , f_assignment_operator(assignment_operator == 0
337 : ? ASSIGNMENT_OPERATOR_EQUAL
338 : : assignment_operator)
339 : , f_comment(comment)
340 56967 : , f_section_operator(section_operator)
341 : {
342 28483 : if(filename.empty())
343 : {
344 1 : throw getopt_exception_invalid("trying to load a configuration file using an empty filename.");
345 : }
346 :
347 56964 : std::unique_ptr<char, decltype(&::free)> fn(realpath(filename.c_str(), nullptr), &::free);
348 28482 : if(fn != nullptr)
349 : {
350 28008 : f_filename = fn.get();
351 : }
352 28482 : }
353 :
354 :
355 : /** \brief Check whether the setup is considered valid.
356 : *
357 : * This function is used to check whether the conf_file_setup is valid or
358 : * not. It is valid when everything is in order, which at this point means
359 : * the filename is not empty.
360 : *
361 : * All the other parameters are always viewed as being valid.
362 : *
363 : * \return true if the conf_file_setup is considered valid.
364 : */
365 25857 : bool conf_file_setup::is_valid() const
366 : {
367 25857 : return !f_filename.empty();
368 : }
369 :
370 :
371 : /** \brief Get the filename.
372 : *
373 : * When creating a new conf_file_setup, you have to specify a filename.
374 : * This function returns that filename after it was canonicalized by
375 : * the constructor.
376 : *
377 : * The canonicalization process computes the full path to the real
378 : * file. If such does not exist then no filename is defined, so this
379 : * function may return an empty string.
380 : *
381 : * \return The filename or an empty string if the realpath() could not
382 : * be calculated.
383 : */
384 28491 : std::string const & conf_file_setup::get_filename() const
385 : {
386 28491 : return f_filename;
387 : }
388 :
389 :
390 : /** \brief Get the line continuation setting.
391 : *
392 : * This function returns the line continuation for this setup.
393 : *
394 : * This parameter is not a set of flags. We only support one type of
395 : * line continuation per file. Many continuations could be contradictory
396 : * if used simultaneously.
397 : *
398 : * The continuation setting is one of the following:
399 : *
400 : * \li line_continuation_t::single_line -- no continuation support; any
401 : * definition must be on one single line.
402 : * \li line_continuation_t::rfc_822 -- like email/HTTP, whitespace at
403 : * the start of the next line means that the current line continues there;
404 : * those whitespaces get removed from the value so if you want a space
405 : * between two lines, make sure to finish the current line with a space.
406 : * \li line_continuation_t::msdos -- `&` at end of the line.
407 : * \li line_continuation_t::unix -- `\` at end of the line.
408 : * \li line_continuation_t::fortran -- `&` at the start of the next line;
409 : * there cannot be any spaces, the `&` has to be the very first character.
410 : * \li line_continuation_t::semicolon -- `;` ends the _line_; when reading
411 : * a line with this continuation mode, the reader stops only when it finds
412 : * the `;` or EOF (also if a comment is found.)
413 : *
414 : * \return a line continuation mode.
415 : */
416 26164 : line_continuation_t conf_file_setup::get_line_continuation() const
417 : {
418 26164 : return f_line_continuation;
419 : }
420 :
421 :
422 : /** \brief Get the accepted assignment operators.
423 : *
424 : * This function returns the set of flags describing the list of
425 : * accepted operators one can use to do assignments.
426 : *
427 : * Right now we support the follow:
428 : *
429 : * \li ASSIGNMENT_OPERATOR_EQUAL -- the equal (`=`) character, like in
430 : * most Unix configuration files and shell scripts.
431 : * \li ASSIGNMENT_OPERATOR_COLON -- the colon (`:`) character, like in
432 : * email and HTTP headers.
433 : * \li ASSIGNMENT_OPERATOR_SPACE -- the space (` `) character; this is
434 : * less used, but many Unix configuration files still use this scheme.
435 : *
436 : * \todo
437 : * Add support for additional operators such as:
438 : * \todo
439 : * \li `+=` -- append data
440 : * \li `?=` -- set to this value if not yet set
441 : *
442 : * \return The set of accepted assignment operators.
443 : *
444 : * \sa is_assignment_operator()
445 : */
446 1144304 : assignment_operator_t conf_file_setup::get_assignment_operator() const
447 : {
448 1144304 : return f_assignment_operator;
449 : }
450 :
451 :
452 : /** Get the comment flags.
453 : *
454 : * This function returns the comment flags. These describe which type
455 : * of comments are supported in this configuration file.
456 : *
457 : * Currently we support:
458 : *
459 : * \li COMMENT_INI -- INI file like comments, these are introduced with
460 : * a semi-colon (`;`) and end with a newline.
461 : * \li COMMENT_SHELL -- Unix shell like comments, these are introduced
462 : * with a hash (`#`) and end with a newline.
463 : * \li COMMENT_CPP -- C++ like comments, these are introduced with two
464 : * slashes (`//`) and end with a newline.
465 : *
466 : * Right now we only support line comments. Configuration entries cannot
467 : * include comments. A comment character can be preceeded by spaces and
468 : * tabs.
469 : *
470 : * Line continuation is taken in account with comments. So the following
471 : * when the line continuation is set to Unix is one long comment:
472 : *
473 : * \code
474 : * # line continuation works with comments \
475 : * just like with any other line... because the \
476 : * continuation character and the newline characters \
477 : * just get removed before the get_line() function \
478 : * returns...
479 : * \endcode
480 : *
481 : * \return The comment flags.
482 : *
483 : * \sa is_comment()
484 : */
485 26104 : comment_t conf_file_setup::get_comment() const
486 : {
487 26104 : return f_comment;
488 : }
489 :
490 :
491 : /** \brief Get the accepted section operators.
492 : *
493 : * This function returns the flags representing which of the
494 : * section operators are accepted.
495 : *
496 : * We currently support the following types of sections:
497 : *
498 : * \li SECTION_OPERATOR_NONE -- no sections are accepted.
499 : * \li SECTION_OPERATOR_C -- the period (`.`) is viewed as a section/name
500 : * separator as when you access a variable member in a structure.
501 : * \li SECTION_OPERATOR_CPP -- the scope operator (`::`) is viewed as a
502 : * section/name separator; if used at the very beginning, it is viewed
503 : * as "global scope" and whatever other section is currently active is
504 : * ignored.
505 : * \li SECTION_OPERATOR_BLOCK -- the configuration files can include
506 : * opening (`{`) and closing (`}`) curvly brackets to group parameters
507 : * together; a name must preceed the opening bracket, it represents
508 : * the section name.
509 : * \li SECTION_OPERATOR_INI_FILE -- like in the MS-DOS .ini files, the
510 : * configuration file can include square brackets to mark sections; this
511 : * method limits the number of section names to one level.
512 : *
513 : * \bug
514 : * The INI file support does not verify that a section name does not
515 : * itself include more sub-sections. For example, the following would
516 : * be three section names:
517 : * \bug
518 : * \code
519 : * [a::b::c]
520 : * var=123
521 : * \endcode
522 : * \bug
523 : * So in effect, the variable named `var` ends up in section `a`,
524 : * sub-section `b`, and sub-sub-section `c` (or section `a::b::c`.)
525 : * Before saving the results in the parameters, all section operators
526 : * get transformed to the C++ scope (`::`) operator, which is why that
527 : * operator used in any name ends up looking like a section separator.
528 : */
529 43025 : section_operator_t conf_file_setup::get_section_operator() const
530 : {
531 43025 : return f_section_operator;
532 : }
533 :
534 :
535 : /** \brief Transform the setup in a URL.
536 : *
537 : * This function transforms the configuration file setup in a unique URL.
538 : * This URL allows us to verify that two setup are the same so when
539 : * attempting to reload the same configuration file, we can make sure
540 : * you are attempting to do so with the same URL.
541 : *
542 : * This is because trying to read the same file with, for example, line
543 : * continuation set to Unix the first time and then set to MS-DOS the
544 : * second time would not load the same thing is either line continuation
545 : * was used.
546 : *
547 : * \todo
548 : * We should look into have a set_config_url() or have a constructor
549 : * which accepts a URL.
550 : *
551 : * \return The URL representing this setup.
552 : */
553 41262 : std::string conf_file_setup::get_config_url() const
554 : {
555 41262 : if(f_url.empty())
556 : {
557 56264 : std::stringstream ss;
558 :
559 28132 : ss << "file://"
560 28132 : << (f_filename.empty()
561 : ? "/<empty>"
562 56264 : : f_filename);
563 :
564 56264 : std::vector<std::string> params;
565 28132 : if(f_line_continuation != line_continuation_t::line_continuation_unix)
566 : {
567 46274 : std::string name;
568 23137 : switch(f_line_continuation)
569 : {
570 : case line_continuation_t::line_continuation_single_line:
571 4223 : name = "single-line";
572 4223 : break;
573 :
574 : case line_continuation_t::line_continuation_rfc_822:
575 4727 : name = "rfc-822";
576 4727 : break;
577 :
578 : case line_continuation_t::line_continuation_msdos:
579 4727 : name = "msdos";
580 4727 : break;
581 :
582 : // we should not ever receive this one since we don't enter
583 : // this block when the value is "unix"
584 : //
585 : //case line_continuation_t::line_continuation_unix:
586 : // name = "unix";
587 : // break;
588 :
589 : case line_continuation_t::line_continuation_fortran:
590 4728 : name = "fortran";
591 4728 : break;
592 :
593 : case line_continuation_t::line_continuation_semicolon:
594 4727 : name = "semi-colon";
595 4727 : break;
596 :
597 : default:
598 5 : throw getopt_exception_logic("unexpected line continuation.");
599 :
600 : }
601 23132 : params.push_back("line-continuation=" + name);
602 : }
603 :
604 28127 : if(f_assignment_operator != ASSIGNMENT_OPERATOR_EQUAL)
605 : {
606 42326 : std::vector<std::string> assignments;
607 21163 : if((f_assignment_operator & ASSIGNMENT_OPERATOR_EQUAL) != 0)
608 : {
609 10577 : assignments.push_back("equal");
610 : }
611 21163 : if((f_assignment_operator & ASSIGNMENT_OPERATOR_COLON) != 0)
612 : {
613 14111 : assignments.push_back("colon");
614 : }
615 21163 : if((f_assignment_operator & ASSIGNMENT_OPERATOR_SPACE) != 0)
616 : {
617 14104 : assignments.push_back("space");
618 : }
619 21163 : if(!assignments.empty())
620 : {
621 21163 : params.push_back("assignment-operator=" + boost::algorithm::join(assignments, ","));
622 : }
623 : }
624 :
625 : if(f_comment != COMMENT_INI | COMMENT_SHELL)
626 : {
627 56254 : std::vector<std::string> comment;
628 28127 : if((f_comment & COMMENT_INI) != 0)
629 : {
630 12388 : comment.push_back("ini");
631 : }
632 28127 : if((f_comment & COMMENT_SHELL) != 0)
633 : {
634 11931 : comment.push_back("shell");
635 : }
636 28127 : if((f_comment & COMMENT_CPP) != 0)
637 : {
638 12379 : comment.push_back("cpp");
639 : }
640 28127 : if(comment.empty())
641 : {
642 3816 : params.push_back("comment=none");
643 : }
644 : else
645 : {
646 24311 : params.push_back("comment=" + boost::algorithm::join(comment, ","));
647 : }
648 : }
649 :
650 28127 : if(f_section_operator != SECTION_OPERATOR_INI_FILE)
651 : {
652 52520 : std::vector<std::string> section_operator;
653 26260 : if((f_section_operator & SECTION_OPERATOR_C) != 0)
654 : {
655 13005 : section_operator.push_back("c");
656 : }
657 26260 : if((f_section_operator & SECTION_OPERATOR_CPP) != 0)
658 : {
659 12996 : section_operator.push_back("cpp");
660 : }
661 26260 : if((f_section_operator & SECTION_OPERATOR_BLOCK) != 0)
662 : {
663 12991 : section_operator.push_back("block");
664 : }
665 26260 : if((f_section_operator & SECTION_OPERATOR_INI_FILE) != 0)
666 : {
667 11134 : section_operator.push_back("ini-file");
668 : }
669 26260 : if(!section_operator.empty())
670 : {
671 24146 : params.push_back("section-operator=" + boost::algorithm::join(section_operator, ","));
672 : }
673 : }
674 :
675 56254 : std::string const query_string(boost::algorithm::join(params, "&"));
676 28127 : if(!query_string.empty())
677 : {
678 28127 : ss << '?'
679 28127 : << query_string;
680 : }
681 :
682 28127 : f_url = ss.str();
683 : }
684 :
685 41257 : return f_url;
686 : }
687 :
688 :
689 :
690 :
691 : /** \brief Create and read a conf_file.
692 : *
693 : * This function creates a new conf_file object unless one with the same
694 : * filename already exists.
695 : *
696 : * If the configuration file was already loaded, then that pointer gets
697 : * returned instead of reloading the file. There is currently no API to
698 : * allow for the removal because another thread or function may have
699 : * the existing pointer cached and we want all instances of a configuration
700 : * file to be the same (i.e. if you update the value of a parameter then
701 : * that new value should be visible by all the users of that configuration
702 : * file.) Therefore, you can think of a configuration file as a global
703 : * variable.
704 : *
705 : * \note
706 : * Any number of call this function to load a given file always returns
707 : * exactly the same pointer.
708 : *
709 : * \todo
710 : * With the communicator, we will at some point implement a class
711 : * used to detect that a file changed, allowing us to get a signal
712 : * and reload the file as required. This get_conf_file() function
713 : * will greatly benefit from such since that way we can automatically
714 : * reload the configuration file. In other words, process A could
715 : * make a change, then process B reloads and sees the change that
716 : * process A made. Such an implementation will require a proper
717 : * locking mechanism of the configuration files while modifications
718 : * are being performed.
719 : *
720 : * \param[in] setup The settings to be used in this configuration file reader.
721 : *
722 : * \return A pointer to the configuration file data.
723 : */
724 2808 : conf_file::pointer_t conf_file::get_conf_file(conf_file_setup const & setup)
725 : {
726 5616 : safe_lock lock(g_mutex);
727 :
728 2808 : auto it(g_conf_files.find(setup.get_filename()));
729 2808 : if(it != g_conf_files.end())
730 : {
731 2631 : if(it->second->get_setup().get_config_url() != setup.get_config_url())
732 : {
733 : throw getopt_exception_logic("trying to load configuration file \""
734 5250 : + setup.get_config_url()
735 5250 : + "\" but an existing configuration file with the same name was loaded with URL: \""
736 10500 : + it->second->get_setup().get_config_url()
737 7875 : + "\".");
738 : }
739 6 : return it->second;
740 : }
741 354 : conf_file::pointer_t cf(new conf_file(setup));
742 177 : g_conf_files[setup.get_filename()] = cf;
743 177 : return cf;
744 : }
745 :
746 :
747 : /** \brief Save the configuration file.
748 : *
749 : * This function saves the current data from this configuration file to
750 : * the file. It overwrites the existing file.
751 : *
752 : * Note that when you load the configuration, you may get data from
753 : * many different configuration files. This very file will only
754 : * include the data that was loaded from this file, though, and whatever
755 : * modifications you made.
756 : *
757 : * If the conf is not marked as modified, the function returns immediately
758 : * with true.
759 : *
760 : * \param[in] create_backup Whether to create a backup or not.
761 : *
762 : * \return true if the save worked as expected.
763 : */
764 2 : bool conf_file::save_configuration(bool create_backup)
765 : {
766 2 : if(f_modified)
767 : {
768 : // create backup?
769 : //
770 1 : if(create_backup)
771 : {
772 : // TODO: offer means to set the backup extension
773 : //
774 2 : std::string const backup_filename(f_setup.get_filename() + ".bak");
775 :
776 2 : if(unlink(backup_filename.c_str()) != 0
777 1 : && errno != ENOENT)
778 : {
779 : f_errno = errno; // LCOV_EXCL_LINE
780 : return false; // LCOV_EXCL_LINE
781 : }
782 :
783 1 : if(rename(f_setup.get_filename().c_str(), backup_filename.c_str()) != 0)
784 : {
785 : f_errno = errno; // LCOV_EXCL_LINE
786 : return false; // LCOV_EXCL_LINE
787 : }
788 : }
789 :
790 : // save parameters to file
791 : //
792 2 : std::ofstream conf;
793 1 : conf.open(f_setup.get_filename().c_str());
794 1 : if(!conf.is_open())
795 : {
796 : f_errno = errno; // LCOV_EXCL_LINE
797 : return false; // LCOV_EXCL_LINE
798 : }
799 :
800 1 : time_t const now(time(nullptr));
801 : tm t;
802 1 : gmtime_r(&now, &t);
803 : char str_date[16];
804 1 : strftime(str_date, sizeof(str_date), "%Y/%m/%d", &t);
805 : char str_time[16];
806 1 : strftime(str_time, sizeof(str_time), "%H:%M:%S", &t);
807 :
808 : // header warning with date & time
809 : //
810 1 : conf << "# This file was auto-generated by snap_config.cpp on " << str_date << " at " << str_time << "." << std::endl
811 1 : << "# Making modifications here is likely safe unless the tool handling this" << std::endl
812 1 : << "# configuration file is actively working on it while you do the edits." << std::endl;
813 4 : for(auto p : f_parameters)
814 : {
815 3 : conf << p.first << "=";
816 :
817 : // prevent saving \r and \n characters as is when part of the
818 : // value; also double \ otherwise reading those back would fail
819 : //
820 6 : std::string value(p.second);
821 3 : boost::replace_all(value, "\\", "\\\\");
822 3 : boost::replace_all(value, "\r", "\\r");
823 3 : boost::replace_all(value, "\n", "\\n");
824 3 : boost::replace_all(value, "\t", "\\t");
825 3 : conf << value << std::endl;
826 :
827 3 : if(!conf)
828 : {
829 : return false; // LCOV_EXCL_LINE
830 : }
831 : }
832 :
833 : // it all worked, it's considered saved now
834 : //
835 1 : f_modified = false;
836 : }
837 :
838 2 : return true;
839 : }
840 :
841 :
842 : /** \brief Initialize and read a configuration file.
843 : *
844 : * This constructor initializes this conf_file object and then reads the
845 : * corresponding configuration file.
846 : *
847 : * Note that you have to use the create_conf_file() function for you
848 : * to be able to create a configuration file. It is done that way became
849 : * a file can be read only once. Once loaded, it gets cached until your
850 : * application quits.
851 : *
852 : * \param[in] filename The path and name of the configuration file to be read.
853 : * \param[in] line_continuation How lines end in this file.
854 : * \param[in] assignment_operator What appears between the name and value.
855 : * \param[in] comment The supported comment introducer(s).
856 : */
857 177 : conf_file::conf_file(conf_file_setup const & setup)
858 177 : : f_setup(setup)
859 : {
860 177 : read_configuration();
861 177 : }
862 :
863 :
864 : /** \brief Get the configuration file setup.
865 : *
866 : * This function returns a copy of the setup used to load this
867 : * configuration file.
868 : *
869 : * \note
870 : * This function has no mutex protection because the setup can't
871 : * change so there is no multi-thread protection necessary (the
872 : * fact that you hold a shared pointer to the conf_file object
873 : * is enough protection in this case.)
874 : *
875 : * \return A reference to this configuration file setup.
876 : */
877 5403 : conf_file_setup const & conf_file::get_setup() const
878 : {
879 5403 : return f_setup;
880 : }
881 :
882 :
883 : /** \brief Set a callback to detect when changes happen.
884 : *
885 : * This function is used to attach a callback to this file. This is
886 : * useful if you'd like to know when a change happen to a parameter
887 : * in this configuration file.
888 : *
889 : * The callback gets called when:
890 : *
891 : * \li The set_parameter() is called and the parameter gets created.
892 : * \li The set_parameter() is called and the parameter gets updated.
893 : * \li The erase_parameter() is called and the parameter gets erased.
894 : *
895 : * You can cancel your callback by calling this function again without
896 : * a target (i.e. `cf->set_callback(callback_t());`).
897 : *
898 : * To attach another object to your callback, you can either create
899 : * a callback which is attached to your object and a function
900 : * member or use std::bind() to attach the object to the function
901 : * call.
902 : *
903 : * \param[in] callback The new callback std::function.
904 : */
905 1 : void conf_file::set_callback(callback_t callback)
906 : {
907 1 : f_callback = callback;
908 1 : }
909 :
910 :
911 : /** \brief Get the error number opening/reading the configuration file.
912 : *
913 : * The class registers the errno value whenever an I/O error happens
914 : * while handling the configuration file. In most cases the function
915 : * is expected to return 0.
916 : *
917 : * The ENOENT error should not happen since the setup is going to be
918 : * marked as invalid when a configuration file does not exist and
919 : * you should not end up creation a conf_file object when that
920 : * happens. However, it is expected when you want to make some
921 : * changes to a few parameters and save them back to file (i.e.
922 : * the very first time there will be no file under the writable
923 : * configuration folder.)
924 : *
925 : * \return The last errno detected while accessing the configuration file.
926 : */
927 152 : int conf_file::get_errno() const
928 : {
929 304 : safe_lock lock(g_mutex);
930 :
931 304 : return f_errno;
932 : }
933 :
934 :
935 : /** \brief Get a list of sections.
936 : *
937 : * This function returns a copy of the list of sections defined in
938 : * this configuration file. In most cases, you should not need this
939 : * function since you are expected to know what parameters may be
940 : * defined. There are times though when it can be very practical.
941 : * For example, the options_config.cpp makes use of it since each
942 : * section is a parameter which we do not know the name of until
943 : * we have access to this array of sections.
944 : *
945 : * \note
946 : * We return a list because in a multithread environment another thread
947 : * may decide to make changes to the list of parameters which has the
948 : * side effect of eventually adding a section.
949 : *
950 : * \return A copy of the list of sections.
951 : */
952 178 : conf_file::sections_t conf_file::get_sections() const
953 : {
954 356 : safe_lock lock(g_mutex);
955 :
956 356 : return f_sections;
957 : }
958 :
959 :
960 : /** \brief Get a list of parameters.
961 : *
962 : * This function returns a copy of the list of parameters defined in
963 : * this configuration file.
964 : *
965 : * \note
966 : * We return a list because in a multithread environment another thread
967 : * may decide to make changes to the list of parameters (including
968 : * erasing a parameter.)
969 : *
970 : * \return A copy of the list of parameters.
971 : */
972 174 : conf_file::parameters_t conf_file::get_parameters() const
973 : {
974 348 : safe_lock lock(g_mutex);
975 :
976 348 : return f_parameters;
977 : }
978 :
979 :
980 : /** \brief Check whether a parameter is defined.
981 : *
982 : * This function checks for the existance of a parameter. It is a good
983 : * idea to first check for the existance of a parameter since the
984 : * get_parameter() function may otherwise return an empty string and
985 : * you cannot know whether that empty string means that the parameter
986 : * was not defined or it was set to the empty string.
987 : *
988 : * \param[in] name The name of the parameter to check.
989 : *
990 : * \return true if the parameter is defined, false otherwise.
991 : *
992 : * \sa get_parameter()
993 : * \sa set_parameter()
994 : */
995 622 : bool conf_file::has_parameter(std::string name) const
996 : {
997 622 : std::replace(name.begin(), name.end(), '_', '-');
998 :
999 1244 : safe_lock lock(g_mutex);
1000 :
1001 622 : auto it(f_parameters.find(name));
1002 1244 : return it != f_parameters.end();
1003 : }
1004 :
1005 :
1006 : /** \brief Get the named parameter.
1007 : *
1008 : * This function searches for the specified parameter. If that parameter
1009 : * exists, then its value is returned. Note that the value of a parameter
1010 : * may be the empty string.
1011 : *
1012 : * If the parameter does not exist, the function returns the empty string.
1013 : * To distinguish between an undefined parameter and a parameter set to
1014 : * the empty string, use the has_parameter() function.
1015 : *
1016 : * \param[in] name The name of the parameter to retrieve.
1017 : *
1018 : * \return The current value of the parameter or an empty string.
1019 : *
1020 : * \sa has_parameter()
1021 : * \sa set_parameter()
1022 : */
1023 608 : std::string conf_file::get_parameter(std::string name) const
1024 : {
1025 608 : std::replace(name.begin(), name.end(), '_', '-');
1026 :
1027 1216 : safe_lock lock(g_mutex);
1028 :
1029 608 : auto it(f_parameters.find(name));
1030 608 : if(it != f_parameters.end())
1031 : {
1032 465 : return it->second;
1033 : }
1034 143 : return std::string();
1035 : }
1036 :
1037 :
1038 : /** \brief Set a parameter.
1039 : *
1040 : * This function sets a parameter to the specified value.
1041 : *
1042 : * The name of the value includes the \p section names and the \p name
1043 : * parameter concatenated with a C++ scopre operator (::) in between
1044 : * (unless \p section is the empty string in which case no scope operator
1045 : * gets added.)
1046 : *
1047 : * When the \p name parameter starts with a scope parameter, the \p section
1048 : * parameter is ignored. This allows one to ignore the current section
1049 : * (i.e. the last '[...]' or any '\<name> { ... }').
1050 : *
1051 : * The \p section parameter is a list of section names separated by
1052 : * the C++ scope operator (::).
1053 : *
1054 : * The \p name parameter may include C (.) and/or C++ (::) section
1055 : * separators when the configuration file supports those. Internally,
1056 : * those get moved to the \p section parameter. That allows us to
1057 : * verify that the number of sections is valid.
1058 : *
1059 : * This function may be called any number of time. The last value is
1060 : * the one kept. While reading the configuration file, though, a warning
1061 : * is generated when a parameter gets overwritten since this is often the
1062 : * source of a problem.
1063 : *
1064 : * In the following configuration file:
1065 : *
1066 : * \code
1067 : * var=name
1068 : * var=twice
1069 : * \endcode
1070 : *
1071 : * The variable named `var` will be set to `twice` on return and a warning
1072 : * will have been generated warning about the fact that the variable was
1073 : * modified while reading the configuration file.
1074 : *
1075 : * The full name of the parameter (i.e. section + name) cannot include any
1076 : * of the following characters:
1077 : *
1078 : * \li control characters (any character between 0x00 and 0x1F)
1079 : * \li a space (0x20)
1080 : * \li a backslash (`\`)
1081 : * \li quotation (`"` and `'`)
1082 : * \li comment (';', '#', '/')
1083 : * \li assignment ('=', ':', '?', '+')
1084 : *
1085 : * \note
1086 : * The \p section and \p name parameters have any underscore (`_`)
1087 : * replaced with dashes (`-`) before getting used. The very first
1088 : * character can be a dash. This allows you to therefore create
1089 : * parameters which cannot appear in a configuration file, an
1090 : * environment variable or on the command line (where parameter are
1091 : * not allowed to start with a dash.)
1092 : *
1093 : * \warning
1094 : * It is important to note that when a \p name includes a C++ scope
1095 : * operator, the final parameter name looks like it includes a section
1096 : * name (i.e. the name "a::b", when the C++ section flag is not set,
1097 : * is accepted as is; so the final parameter name is going to be "a::b"
1098 : * and therefore it will include what looks like a section name.)
1099 : * There should not be any concern about this small \em glitch though
1100 : * since you do not have to accept any such parameter.
1101 : *
1102 : * \param[in] section The list of section or an empty string.
1103 : * \param[in] name The name of the parameter.
1104 : * \param[in] value The value of the parameter.
1105 : */
1106 620 : bool conf_file::set_parameter(std::string section, std::string name, std::string const & value)
1107 : {
1108 : // use the tokenize_string() function because we do not want to support
1109 : // quoted strings in this list of sections which our split_string()
1110 : // does automatically
1111 : //
1112 1240 : string_list_t section_list;
1113 :
1114 620 : std::replace(section.begin(), section.end(), '_', '-');
1115 620 : std::replace(name.begin(), name.end(), '_', '-');
1116 :
1117 620 : char const * n(name.c_str());
1118 :
1119 : // global scope? if so ignore the section parameter
1120 : //
1121 1240 : if((f_setup.get_section_operator() & SECTION_OPERATOR_CPP) != 0
1122 32 : && n[0] == ':'
1123 622 : && n[1] == ':')
1124 : {
1125 2 : do
1126 : {
1127 4 : ++n;
1128 : }
1129 4 : while(*n == ':');
1130 : }
1131 : else
1132 : {
1133 1236 : snap::tokenize_string(section_list
1134 : , section
1135 : , "::"
1136 : , true
1137 : , std::string()
1138 618 : , &snap::string_predicate<string_list_t>);
1139 : }
1140 :
1141 620 : char const * s(n);
1142 7334 : while(*n != '\0')
1143 : {
1144 6718 : if((f_setup.get_section_operator() & SECTION_OPERATOR_C) != 0
1145 3359 : && *n == '.')
1146 : {
1147 32 : if(s == n)
1148 : {
1149 2 : log << log_level_t::error
1150 1 : << "option name \""
1151 1 : << name
1152 1 : << "\" cannot start with a period (.)."
1153 1 : << end;
1154 1 : return false;
1155 : }
1156 31 : section_list.push_back(std::string(s, n - s));
1157 8 : do
1158 : {
1159 39 : ++n;
1160 : }
1161 39 : while(*n == '.');
1162 31 : s = n;
1163 : }
1164 6654 : else if((f_setup.get_section_operator() & SECTION_OPERATOR_CPP) != 0
1165 66 : && n[0] == ':'
1166 3339 : && n[1] == ':')
1167 : {
1168 12 : if(s == n)
1169 : {
1170 2 : log << log_level_t::error
1171 1 : << "option name \""
1172 1 : << name
1173 1 : << "\" cannot start with a scope operator (::)."
1174 1 : << end;
1175 1 : return false;
1176 : }
1177 11 : section_list.push_back(std::string(s, n - s));
1178 11 : do
1179 : {
1180 22 : ++n;
1181 : }
1182 22 : while(*n == ':');
1183 11 : s = n;
1184 : }
1185 : else
1186 : {
1187 3315 : ++n;
1188 : }
1189 : }
1190 618 : if(s == n)
1191 : {
1192 4 : log << log_level_t::error
1193 2 : << "option name \""
1194 2 : << name
1195 2 : << "\" cannot end with a section operator or be empty."
1196 2 : << end;
1197 2 : return false;
1198 : }
1199 1232 : std::string param_name(s, n - s);
1200 :
1201 1232 : std::string const section_name(boost::algorithm::join(section_list, "::"));
1202 :
1203 1232 : if(f_setup.get_section_operator() == SECTION_OPERATOR_NONE
1204 616 : && !section_list.empty())
1205 : {
1206 2 : log << log_level_t::error
1207 1 : << "option name \""
1208 1 : << name
1209 1 : << "\" cannot be added to section \""
1210 1 : << section_name
1211 1 : << "\" because there is no section support for this configuration file."
1212 1 : << end;
1213 1 : return false;
1214 : }
1215 1230 : if((f_setup.get_section_operator() & SECTION_OPERATOR_ONE_SECTION) != 0
1216 615 : && section_list.size() > 1)
1217 : {
1218 10 : log << log_level_t::error
1219 5 : << "option name \""
1220 5 : << name
1221 5 : << "\" cannot be added to section \""
1222 5 : << section_name
1223 5 : << "\" because this configuration only accepts one section level."
1224 5 : << end;
1225 5 : return false;
1226 : }
1227 :
1228 610 : section_list.push_back(param_name);
1229 1220 : std::string const full_name(boost::algorithm::join(section_list, "::"));
1230 :
1231 : // verify that each section name only includes characters we accept
1232 : // for a parameter name
1233 : //
1234 : // WARNING: we do not test with full_name because it includes ':'
1235 : //
1236 1251 : for(auto sn : section_list)
1237 : {
1238 4002 : for(char const * f(sn.c_str()); *f != '\0'; ++f)
1239 : {
1240 3361 : switch(*f)
1241 : {
1242 : case '\001': // forbid controls
1243 : case '\002':
1244 : case '\003':
1245 : case '\004':
1246 : case '\005':
1247 : case '\006':
1248 : case '\007':
1249 : case '\010':
1250 : case '\011':
1251 : case '\012':
1252 : case '\013':
1253 : case '\014':
1254 : case '\015':
1255 : case '\016':
1256 : case '\017':
1257 : case '\020':
1258 : case '\021':
1259 : case '\022':
1260 : case '\023':
1261 : case '\024':
1262 : case '\025':
1263 : case '\026':
1264 : case '\027':
1265 : case '\030':
1266 : case '\031':
1267 : case '\032':
1268 : case '\033':
1269 : case '\034':
1270 : case '\035':
1271 : case '\036':
1272 : case '\037':
1273 : case ' ': // forbid spaces
1274 : case '\'': // forbid all quotes
1275 : case '"': // forbid all quotes
1276 : case ';': // forbid all comment operators
1277 : case '#': // forbid all comment operators
1278 : case '/': // forbid all comment operators
1279 : case '=': // forbid all assignment operators
1280 : case ':': // forbid all assignment operators
1281 : case '?': // forbid all assignment operators (for later)
1282 : case '+': // forbid all assignment operators (for later)
1283 : case '\\': // forbid backslashes
1284 218 : log << log_level_t::error
1285 109 : << "parameter \""
1286 109 : << full_name
1287 109 : << "\" on line "
1288 218 : << f_line
1289 109 : << " in configuration file \""
1290 218 : << f_setup.get_filename()
1291 109 : << "\" includes a character not acceptable for a section or parameter name (controls, space, quotes, and \";#/=:?+\\\"."
1292 109 : << end;
1293 109 : return false;
1294 :
1295 : }
1296 : }
1297 : }
1298 :
1299 1002 : safe_lock lock(g_mutex);
1300 :
1301 : // add the section to the list of sections
1302 : //
1303 : // TODO: should we have a list of all the parent sections? Someone can
1304 : // write "a::b::c::d = 123" and we currently only get section
1305 : // "a::b::c", no section "a" and no section "a::b".
1306 : //
1307 501 : if(!section_name.empty())
1308 : {
1309 122 : f_sections.insert(section_name);
1310 : }
1311 :
1312 501 : callback_action_t action(callback_action_t::created);
1313 501 : auto it(f_parameters.find(full_name));
1314 501 : if(it == f_parameters.end())
1315 : {
1316 493 : f_parameters[full_name] = value;
1317 : }
1318 : else
1319 : {
1320 8 : if(f_reading)
1321 : {
1322 : // this is just a warning; it can be neat to know about such
1323 : // problems and fix them early
1324 : //
1325 4 : log << log_level_t::warning
1326 2 : << "parameter \""
1327 2 : << full_name
1328 2 : << "\" on line "
1329 4 : << f_line
1330 2 : << " in configuration file \""
1331 4 : << f_setup.get_filename()
1332 2 : << "\" was found twice in the same configuration file."
1333 2 : << end;
1334 : }
1335 :
1336 8 : it->second = value;
1337 :
1338 8 : action = callback_action_t::updated;
1339 : }
1340 :
1341 501 : if(!f_reading)
1342 : {
1343 8 : f_modified = true;
1344 :
1345 8 : if(f_callback)
1346 : {
1347 4 : f_callback(shared_from_this(), action, full_name, value);
1348 : }
1349 : }
1350 :
1351 501 : return true;
1352 : }
1353 :
1354 :
1355 : /** \brief Erase the named parameter from this configuration file.
1356 : *
1357 : * This function can be used to remove the specified parameter from
1358 : * this configuration file.
1359 : *
1360 : * If that parameter is not defined in the file, then nothing happens.
1361 : *
1362 : * \param[in] name The name of the parameter to remove.
1363 : *
1364 : * \return true if the parameter was removed, false if it did not exist.
1365 : */
1366 2 : bool conf_file::erase_parameter(std::string name)
1367 : {
1368 2 : std::replace(name.begin(), name.end(), '_', '-');
1369 :
1370 2 : auto it(f_parameters.find(name));
1371 2 : if(it == f_parameters.end())
1372 : {
1373 1 : return false;
1374 : }
1375 :
1376 1 : f_parameters.erase(it);
1377 :
1378 1 : if(!f_reading)
1379 : {
1380 1 : f_modified = true;
1381 :
1382 1 : if(f_callback)
1383 : {
1384 1 : f_callback(shared_from_this(), callback_action_t::erased, name, std::string());
1385 : }
1386 : }
1387 :
1388 1 : return true;
1389 : }
1390 :
1391 :
1392 : /** \brief Check whether this configuration file was modified.
1393 : *
1394 : * This function returns the value of the f_modified flag which is true
1395 : * if any value was createed, updated, or erased from the configuration
1396 : * file since after it was loaded.
1397 : *
1398 : * This tells you whether you should call the save() function, assuming
1399 : * you want to keep such changes.
1400 : *
1401 : * \return true if changes were made to this file parameters.
1402 : */
1403 7 : bool conf_file::was_modified() const
1404 : {
1405 7 : return f_modified;
1406 : }
1407 :
1408 :
1409 : /** \brief Read one characte from the input stream.
1410 : *
1411 : * This function reads one character from the input stream and returns it
1412 : * as an `int`.
1413 : *
1414 : * If there is an ungotten character (i.e. ungetc() was called) then that
1415 : * character is returned.
1416 : *
1417 : * When the end of the file is reached, this function returns -1.
1418 : *
1419 : * \note
1420 : * This function is oblivious of UTF-8. It should not matter since any
1421 : * Unicode character would anyway be treated as is.
1422 : *
1423 : * \param[in,out] in The input stream.
1424 : *
1425 : * \return The character read or -1 when EOF is reached.
1426 : */
1427 11612 : int conf_file::getc(std::ifstream & in)
1428 : {
1429 11612 : if(f_unget_char != '\0')
1430 : {
1431 34 : int const r(f_unget_char);
1432 34 : f_unget_char = '\0';
1433 34 : return r;
1434 : }
1435 :
1436 : char c;
1437 11578 : in.get(c);
1438 :
1439 11578 : if(in.eof())
1440 : {
1441 173 : return EOF;
1442 : }
1443 :
1444 11405 : return static_cast<std::uint8_t>(c);
1445 : }
1446 :
1447 :
1448 : /** \brief Restore one character.
1449 : *
1450 : * This function is used whenever we read one additional character to
1451 : * know whether a certain character followed another. For example, we
1452 : * check for a `'\\n'` whenever we find a `'\\r'`. However, if the
1453 : * character right after the `'\\r'` is not a `'\\n'` we call this
1454 : * ungetc() function so next time we can re-read that same character.
1455 : *
1456 : * \note
1457 : * You can call ungetc() only once between calls to getc(). The
1458 : * current buffer is just one single character. Right now our
1459 : * parser doesn't need more than that.
1460 : *
1461 : * \param[in] c The character to restore.
1462 : */
1463 34 : void conf_file::ungetc(int c)
1464 : {
1465 34 : if(f_unget_char != '\0')
1466 : {
1467 : throw getopt_exception_logic("conf_file::ungetc() called when the f_unget_char variable member is not '\\0'."); // LCOV_EXCL_LINE
1468 : }
1469 34 : f_unget_char = c;
1470 34 : }
1471 :
1472 :
1473 : /** \brief Get one line.
1474 : *
1475 : * This function reads one line. The function takes the line continuation
1476 : * setup in account. So for example a line that ends with a backslash
1477 : * continues on the next line when the line continuation is setup to Unix.
1478 : *
1479 : * Note that by default comments are also continued. So a backslash in
1480 : * Unix mode continues a comment on the next line.
1481 : *
1482 : * There is a special case with the semicolon continuation setup. When
1483 : * the line starts as a comment, it will end on the first standalone
1484 : * newline (i.e. a comment does not need to end with a semi-colon.)
1485 : *
1486 : * \param[in,out] in The input stream.
1487 : * \param[out] line Where the line gets saved.
1488 : *
1489 : * \return true if a line was read, false on EOF.
1490 : */
1491 938 : bool conf_file::get_line(std::ifstream & in, std::string & line)
1492 : {
1493 938 : line.clear();
1494 :
1495 10588 : for(;;)
1496 : {
1497 11526 : int c(getc(in));
1498 11526 : if(c == EOF)
1499 : {
1500 172 : return false;
1501 : }
1502 11354 : if(c == ';'
1503 11354 : && f_setup.get_line_continuation() == line_continuation_t::line_continuation_semicolon)
1504 : {
1505 1 : return true;
1506 : }
1507 :
1508 11415 : while(c == '\n' || c == '\r')
1509 : {
1510 796 : if(c == '\r')
1511 : {
1512 19 : c = getc(in);
1513 19 : if(c != '\n')
1514 : {
1515 3 : ungetc(c);
1516 : }
1517 19 : c = '\n';
1518 : }
1519 :
1520 796 : ++f_line;
1521 796 : switch(f_setup.get_line_continuation())
1522 : {
1523 : case line_continuation_t::line_continuation_single_line:
1524 : // continuation support
1525 76 : return true;
1526 :
1527 : case line_continuation_t::line_continuation_rfc_822:
1528 17 : c = getc(in);
1529 17 : if(!iswspace(c))
1530 : {
1531 15 : ungetc(c);
1532 15 : return true;
1533 : }
1534 4 : do
1535 : {
1536 4 : c = getc(in);
1537 : }
1538 : while(iswspace(c));
1539 2 : break;
1540 :
1541 : case line_continuation_t::line_continuation_msdos:
1542 34 : if(line.empty()
1543 17 : || line.back() != '&')
1544 : {
1545 16 : return true;
1546 : }
1547 1 : line.pop_back();
1548 1 : c = getc(in);
1549 1 : break;
1550 :
1551 : case line_continuation_t::line_continuation_unix:
1552 1304 : if(line.empty()
1553 652 : || line.back() != '\\')
1554 : {
1555 641 : return true;
1556 : }
1557 11 : line.pop_back();
1558 11 : c = getc(in);
1559 11 : break;
1560 :
1561 : case line_continuation_t::line_continuation_fortran:
1562 17 : c = getc(in);
1563 17 : if(c != '&')
1564 : {
1565 16 : ungetc(c);
1566 16 : return true;
1567 : }
1568 1 : c = getc(in);
1569 1 : break;
1570 :
1571 : case line_continuation_t::line_continuation_semicolon:
1572 : // if we have a comment, we want to return immediately;
1573 : // at this time, the comments are not multi-line so
1574 : // the call can return true only if we were reading the
1575 : // very first line
1576 : //
1577 17 : if(is_comment(line.c_str()))
1578 : {
1579 1 : return true;
1580 : }
1581 : // the semicolon is checked earlier, just keep the newline
1582 : // in this case
1583 : //
1584 16 : line += c;
1585 16 : c = getc(in);
1586 16 : break;
1587 :
1588 : }
1589 : }
1590 :
1591 10588 : line += c;
1592 : }
1593 : }
1594 :
1595 :
1596 : /** \brief Read a configuration file.
1597 : *
1598 : * This function reads a configuration file and saves all the parameters it
1599 : * finds in a map which can later be checked against an option table for
1600 : * validation.
1601 : *
1602 : * \todo
1603 : * Add support for quotes in configuration files as parameters are otherwise
1604 : * saved as a separated list of parameters losing the number of spaces between
1605 : * each entry.
1606 : */
1607 177 : void conf_file::read_configuration()
1608 : {
1609 349 : snap::safe_variable<decltype(f_reading)> safe_reading(f_reading, true);
1610 :
1611 349 : std::ifstream conf(f_setup.get_filename());
1612 177 : if(!conf)
1613 : {
1614 5 : f_errno = errno;
1615 5 : return;
1616 : }
1617 :
1618 344 : std::string current_section;
1619 344 : std::vector<std::string> sections;
1620 344 : std::string str;
1621 172 : f_line = 0;
1622 1704 : while(get_line(conf, str))
1623 : {
1624 766 : char const * s(str.c_str());
1625 862 : while(iswspace(*s))
1626 : {
1627 48 : ++s;
1628 : }
1629 1532 : if(*s == '\0'
1630 766 : || is_comment(s))
1631 : {
1632 : // skip empty lines and comments
1633 235 : continue;
1634 : }
1635 1310 : if((f_setup.get_section_operator() & SECTION_OPERATOR_BLOCK) != 0
1636 655 : && *s == '}')
1637 : {
1638 5 : current_section = sections.back();
1639 5 : sections.pop_back();
1640 5 : continue;
1641 : }
1642 650 : char const * str_name(s);
1643 650 : char const * e(nullptr);
1644 11914 : while(!is_assignment_operator(*s)
1645 3605 : && ((f_setup.get_section_operator() & SECTION_OPERATOR_BLOCK) == 0 || (*s != '{' && *s != '}'))
1646 3605 : && ((f_setup.get_section_operator() & SECTION_OPERATOR_INI_FILE) == 0 || *s != ']')
1647 3578 : && *s != '\0'
1648 7751 : && !iswspace(*s))
1649 : {
1650 3538 : ++s;
1651 : }
1652 650 : if(iswspace(*s))
1653 : {
1654 28 : e = s;
1655 164 : while(iswspace(*s))
1656 : {
1657 68 : ++s;
1658 : }
1659 56 : if(*s != '\0'
1660 28 : && !is_assignment_operator(*s)
1661 12 : && (f_setup.get_assignment_operator() & ASSIGNMENT_OPERATOR_SPACE) == 0
1662 37 : && ((f_setup.get_section_operator() & SECTION_OPERATOR_BLOCK) == 0 || (*s != '{' && *s != '}')))
1663 : {
1664 6 : log << log_level_t::error
1665 3 : << "option name from \""
1666 3 : << str
1667 3 : << "\" on line "
1668 6 : << f_line
1669 3 : << " in configuration file \""
1670 6 : << f_setup.get_filename()
1671 3 : << "\" cannot include a space, missing assignment operator?"
1672 3 : << end;
1673 3 : continue;
1674 : }
1675 : }
1676 647 : if(e == nullptr)
1677 : {
1678 622 : e = s;
1679 : }
1680 647 : if(e - str_name == 0)
1681 : {
1682 2 : log << log_level_t::error
1683 1 : << "no option name in \""
1684 1 : << str
1685 1 : << "\" on line "
1686 2 : << f_line
1687 1 : << " from configuration file \""
1688 2 : << f_setup.get_filename()
1689 1 : << "\", missing name before the assignment operator?"
1690 1 : << end;
1691 1 : continue;
1692 : }
1693 1288 : std::string name(str_name, e - str_name);
1694 646 : std::replace(name.begin(), name.end(), '_', '-');
1695 646 : if(name[0] == '-')
1696 : {
1697 4 : log << log_level_t::error
1698 2 : << "option names in configuration files cannot start with a dash or an underscore in \""
1699 2 : << str
1700 2 : << "\" on line "
1701 4 : << f_line
1702 2 : << " from configuration file \""
1703 4 : << f_setup.get_filename()
1704 2 : << "\"."
1705 2 : << end;
1706 2 : continue;
1707 : }
1708 1288 : if((f_setup.get_section_operator() & SECTION_OPERATOR_INI_FILE) != 0
1709 155 : && name.length() >= 1
1710 155 : && name[0] == '['
1711 671 : && *s == ']')
1712 : {
1713 27 : ++s;
1714 27 : if(!sections.empty())
1715 : {
1716 2 : log << log_level_t::error
1717 1 : << "`[...]` sections can't be used within a `section { ... }` on line "
1718 2 : << f_line
1719 1 : << " from configuration file \""
1720 2 : << f_setup.get_filename()
1721 1 : << "\"."
1722 1 : << end;
1723 1 : continue;
1724 : }
1725 30 : while(iswspace(*s))
1726 : {
1727 2 : ++s;
1728 : }
1729 52 : if(*s != '\0'
1730 26 : && !is_comment(s))
1731 : {
1732 2 : log << log_level_t::error
1733 1 : << "section names in configuration files cannot be followed by anything other than spaces in \""
1734 1 : << str
1735 1 : << "\" on line "
1736 2 : << f_line
1737 1 : << " from configuration file \""
1738 2 : << f_setup.get_filename()
1739 1 : << "\"."
1740 1 : << end;
1741 1 : continue;
1742 : }
1743 25 : if(name.length() == 1)
1744 : {
1745 : // "[]" removes the section
1746 : //
1747 1 : current_section.clear();
1748 : }
1749 : else
1750 : {
1751 24 : current_section = name.substr(1);
1752 24 : current_section += "::";
1753 : }
1754 : }
1755 1234 : else if((f_setup.get_section_operator() & SECTION_OPERATOR_BLOCK) != 0
1756 617 : && *s == '{')
1757 : {
1758 6 : sections.push_back(current_section);
1759 6 : current_section += name;
1760 6 : current_section += "::";
1761 : }
1762 : else
1763 : {
1764 611 : if(is_assignment_operator(*s))
1765 : {
1766 593 : ++s;
1767 : }
1768 645 : while(iswspace(*s))
1769 : {
1770 17 : ++s;
1771 : }
1772 625 : for(e = str.c_str() + str.length(); e > s; --e)
1773 : {
1774 609 : if(!iswspace(e[-1]))
1775 : {
1776 595 : break;
1777 : }
1778 : }
1779 611 : size_t const len(e - s);
1780 1222 : std::string value(s, len);
1781 611 : boost::replace_all(value, "\\\\", "\\");
1782 611 : boost::replace_all(value, "\\r", "\r");
1783 611 : boost::replace_all(value, "\\n", "\n");
1784 611 : boost::replace_all(value, "\\t", "\t");
1785 611 : set_parameter(current_section, name, value);
1786 : }
1787 : }
1788 172 : if(!sections.empty())
1789 : {
1790 2 : log << log_level_t::error
1791 1 : << "unterminated `section { ... }`, the `}` is missing in configuration file \""
1792 2 : << f_setup.get_filename()
1793 1 : << "\"."
1794 1 : << end;
1795 : }
1796 : }
1797 :
1798 :
1799 : /** \brief Check whether `c` is an assignment operator.
1800 : *
1801 : * This function checks the \p c parameter to know whether it matches
1802 : * one of the character allowed as an assignment character.
1803 : *
1804 : * \param[in] c The character to be checked.
1805 : *
1806 : * \return true if c is considered to represent an assignment character.
1807 : */
1808 1118939 : bool conf_file::is_assignment_operator(int c) const
1809 : {
1810 1118939 : assignment_operator_t const assignment_operator(f_setup.get_assignment_operator());
1811 2237745 : return ((assignment_operator & ASSIGNMENT_OPERATOR_EQUAL) != 0 && c == '=')
1812 1117771 : || ((assignment_operator & ASSIGNMENT_OPERATOR_COLON) != 0 && c == ':')
1813 2236688 : || ((assignment_operator & ASSIGNMENT_OPERATOR_SPACE) != 0 && std::iswspace(c));
1814 : }
1815 :
1816 :
1817 : /** \brief Check whether the string starts with a comment introducer.
1818 : *
1819 : * This function checks whether the \p s string starts with a comment.
1820 : *
1821 : * We support different types of comment introducers. This function
1822 : * checks the flags as defined in the constructor and returns true
1823 : * if the type of character introducer defines a comment.
1824 : *
1825 : * We currently support:
1826 : *
1827 : * \li .ini file comments, introduced by a semi-colon (;)
1828 : *
1829 : * \li Shell file comments, introduced by a hash character (#)
1830 : *
1831 : * \li C++ comment, introduced by two slashes (//)
1832 : *
1833 : * \param[in] s The string to check for a comment.
1834 : *
1835 : * \return `true` if the string represents a comment.
1836 : */
1837 751 : bool conf_file::is_comment(char const * s) const
1838 : {
1839 751 : comment_t const comment(f_setup.get_comment());
1840 751 : if((comment & COMMENT_INI) != 0
1841 157 : && *s == ';')
1842 : {
1843 5 : return true;
1844 : }
1845 :
1846 746 : if((comment & COMMENT_SHELL) != 0
1847 415 : && *s == '#')
1848 : {
1849 69 : return true;
1850 : }
1851 :
1852 677 : if((comment & COMMENT_CPP) != 0
1853 10 : && s[0] == '/'
1854 5 : && s[1] == '/')
1855 : {
1856 5 : return true;
1857 : }
1858 :
1859 672 : return false;
1860 : }
1861 :
1862 :
1863 : /** \brief Returns true if \p c is considered to be a whitespace.
1864 : *
1865 : * Our iswspace() function is equivalent to the std::iswspace() function
1866 : * except that `'\\r'` and `'\\n'` are never viewed as white spaces.
1867 : *
1868 : * \return true if c is considered to be a white space character.
1869 : */
1870 1120521 : bool iswspace(int c)
1871 : {
1872 : return c != '\n'
1873 1120515 : && c != '\r'
1874 2241035 : && std::iswspace(c);
1875 : }
1876 :
1877 :
1878 6 : } // namespace advgetopt
1879 : // vim: ts=4 sw=4 et
|