advgetopt 2.0.47
Parse complex command line arguments and configuration files in C++.
validator.cpp
Go to the documentation of this file.
1// Copyright (c) 2006-2024 Made to Order Software Corp. All Rights Reserved
2//
3// https://snapwebsites.org/project/advgetopt
4// contact@m2osw.com
5//
6// This program is free software; you can redistribute it and/or modify
7// it under the terms of the GNU General Public License as published by
8// the Free Software Foundation; either version 2 of the License, or
9// (at your option) any later version.
10//
11// This program is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15//
16// You should have received a copy of the GNU General Public License along
17// with this program; if not, write to the Free Software Foundation, Inc.,
18// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
20
30// self
31//
32#include "advgetopt/validator.h"
33
34#include "advgetopt/exception.h"
36
37
38// cppthread
39//
40#include <cppthread/log.h>
41
42
43// snapdev
44//
45#include <snapdev/not_reached.h>
46#include <snapdev/join_strings.h>
47
48
49// C++
50//
51#include <map>
52
53
54// last include
55//
56#include <snapdev/poison.h>
57
58
59
60namespace advgetopt
61{
62
63
64
65namespace
66{
67
68
69typedef std::map<std::string, validator_factory const *> factory_map_t;
70
72
73
89
90
91class token
92{
93public:
94 token(token_t tok, std::string const & value = std::string())
95 : f_token(tok)
96 , f_value(value)
97 {
98 }
99
100 token_t tok() const
101 {
102 return f_token;
103 }
104
105 std::string const & value() const
106 {
107 return f_value;
108 }
109
110private:
111 token_t f_token = token_t::TOK_EOF;
112 std::string f_value = std::string();
113};
114
115class lexer
116{
117public:
118 lexer(char const * in)
119 : f_in(in)
120 {
121 }
122
124 {
125 for(;;)
126 {
127 int c(getc());
128 switch(c)
129 {
130 case '\0':
131 return token(token_t::TOK_EOF);
132
133 case '(':
134 return token(token_t::TOK_OPEN_PARENTHESIS);
135
136 case ')':
137 return token(token_t::TOK_CLOSE_PARENTHESIS);
138
139 case ',':
140 return token(token_t::TOK_COMMA);
141
142 case '|':
143 c = getc();
144 if(c != '|') // allow for || like in C
145 {
146 ungetc(c);
147 }
148 return token(token_t::TOK_OR);
149
150 case '"':
151 case '\'':
152 {
153 int const quote(c);
154 std::string s;
155 for(;;)
156 {
157 c = getc();
158 if(c == quote)
159 {
160 break;
161 }
162 s += static_cast<char>(c);
163 }
164 return token(token_t::TOK_STRING, s);
165 }
166
167 case '/':
168 {
169 std::string r;
170 for(;;)
171 {
172 r += static_cast<char>(c);
173 c = getc();
174 if(c == '/')
175 {
176 r += static_cast<char>(c);
177 break;
178 }
179 if(c < ' ' && c != '\t')
180 {
181 cppthread::log << cppthread::log_level_t::error
182 << "validator(): unexpected character for a regular expression ("
183 << static_cast<int>(c)
184 << ")."
185 << cppthread::end;
186 return token(token_t::TOK_INVALID);
187 }
188 if(c == '\\')
189 {
190 // we keep the backslash, it's important when
191 // further parsing happens
192 //
193 r += c;
194
195 c = getc();
196 if(c < ' ' && c != '\t')
197 {
198 cppthread::log << cppthread::log_level_t::error
199 << "validator(): unexpected escaped character for a regular expression ("
200 << static_cast<int>(c)
201 << ")."
202 << cppthread::end;
203 return token(token_t::TOK_INVALID);
204 }
205 }
206 }
207 // also allow for flags after the closing '/'
208 //
209 // at this time we only support 'i' but here we allow any
210 // letter for forward compatibility
211 //
212 for(;;)
213 {
214 c = getc();
215 if(c == '\0')
216 {
217 break;
218 }
219 if(c < 'a' || c > 'z')
220 {
221 ungetc(c);
222 if(c != ','
223 && c != ')')
224 {
225 cppthread::log << cppthread::log_level_t::error
226 << "validator(): unexpected flag character for a regular expression ("
227 << static_cast<int>(c)
228 << ")."
229 << cppthread::end;
230 return token(token_t::TOK_INVALID);
231 }
232 break;
233 }
234 r += c;
235 }
236 return token(token_t::TOK_REGEX, r);
237 }
238
239 case ' ':
240 // ignore spaces
241 break;
242
243 default:
244 {
245 std::string id;
246 for(;;)
247 {
248 switch(c)
249 {
250 case '(':
251 case ')':
252 case ',':
253 case '|':
254 case ' ':
255 ungetc(c);
256 [[fallthrough]];
257 case '\0':
258 return token(token_t::TOK_IDENTIFIER, id);
259
260 default:
261 if(c < ' ' || c > '~')
262 {
263 cppthread::log << cppthread::log_level_t::error
264 << "validator(): unexpected character for an identifier ("
265 << static_cast<int>(c)
266 << ")."
267 << cppthread::end;
268 return token(token_t::TOK_INVALID);
269 }
270 break;
271
272 }
273 id += static_cast<char>(c);
274 c = getc();
275 }
276 }
277 break;
278
279 }
280 }
281 snapdev::NOT_REACHED();
282 }
283
284 std::string remains() const
285 {
286 if(*f_in == '\0')
287 {
288 return std::string("...EOS");
289 }
290
291 return f_in;
292 }
293
294private:
295 int getc()
296 {
297 if(f_c != '\0')
298 {
299 int const c(f_c);
300 f_c = '\0';
301 return c;
302 }
303
304 if(*f_in == '\0')
305 {
306 return '\0';
307 }
308 else
309 {
310 int const c(*f_in);
311 ++f_in;
312 return c;
313 }
314 }
315
316 void ungetc(int c)
317 {
318 if(f_c != '\0')
319 {
320 throw getopt_logic_error("ungetc() already called once, getc() must be called in between now"); // LCOV_EXCL_LINE
321 }
322 f_c = c;
323 }
324
325 char const * f_in = nullptr;
326 int f_c = '\0';
327};
328
329
331{
332public:
333 typedef std::vector<validator_with_params> vector_t;
334
335 validator_with_params(std::string const & name)
336 : f_name(name)
337 {
338 }
339
340 std::string const & get_name() const
341 {
342 return f_name;
343 }
344
345 void add_param(std::string const & param)
346 {
347 f_params.push_back(param);
348 }
349
350 string_list_t const & get_params() const
351 {
352 return f_params;
353 }
354
355private:
356 std::string f_name = std::string();
358};
359
360
362{
363public:
365 : f_lexer(l)
366 {
367 }
368
369 bool parse()
370 {
371 token t(f_lexer.next_token());
372 if(t.tok() == token_t::TOK_EOF)
373 {
374 // empty list
375 //
376 return true;
377 }
378
379 // TODO: show location on an error
380 //
381 for(;;)
382 {
383 switch(t.tok())
384 {
385 case token_t::TOK_REGEX:
386 {
387 validator_with_params v("regex");
388 v.add_param(t.value());
389 f_validators.push_back(v);
390
391 t = f_lexer.next_token();
392 }
393 break;
394
395 case token_t::TOK_IDENTIFIER:
396 {
398
399 t = f_lexer.next_token();
400 if(t.tok() == token_t::TOK_OPEN_PARENTHESIS)
401 {
402 t = f_lexer.next_token();
403 if(t.tok() != token_t::TOK_CLOSE_PARENTHESIS)
404 {
405 for(;;)
406 {
407 if(t.tok() == token_t::TOK_INVALID)
408 {
409 return false;
410 }
411 if(t.tok() != token_t::TOK_IDENTIFIER
412 && t.tok() != token_t::TOK_STRING
413 && t.tok() != token_t::TOK_REGEX)
414 {
415 cppthread::log << cppthread::log_level_t::error
416 << "validator(): expected a regex, an identifier or a string inside the () of a parameter. Remaining input: \""
417 << f_lexer.remains()
418 << "\""
419 << cppthread::end;
420 return false;
421 }
422 v.add_param(t.value());
423
424 t = f_lexer.next_token();
425 if(t.tok() == token_t::TOK_CLOSE_PARENTHESIS)
426 {
427 break;
428 }
429
430 if(t.tok() == token_t::TOK_EOF)
431 {
432 cppthread::log << cppthread::log_level_t::error
433 << "validator(): parameter list must end with ')'. Remaining input: \""
434 << f_lexer.remains()
435 << "\""
436 << cppthread::end;
437 return false;
438 }
439
440 if(t.tok() != token_t::TOK_COMMA)
441 {
442 if(t.tok() == token_t::TOK_INVALID)
443 {
444 return false;
445 }
446 cppthread::log << cppthread::log_level_t::error
447 << "validator(): parameters must be separated by ','. Remaining input: \""
448 << f_lexer.remains()
449 << "\""
450 << cppthread::end;
451 return false;
452 }
453 do
454 {
455 t = f_lexer.next_token();
456 }
457 while(t.tok() == token_t::TOK_COMMA);
458 }
459 }
460 t = f_lexer.next_token();
461 }
462
463 f_validators.push_back(v);
464 }
465 break;
466
467 default:
468 if(t.tok() != token_t::TOK_INVALID)
469 {
470 cppthread::log << cppthread::log_level_t::error
471 << "validator(): unexpected token in validator definition;"
472 " expected an identifier. Remaining input: \""
473 << f_lexer.remains()
474 << "\"."
475 << cppthread::end;
476 }
477 return false;
478
479 }
480
481 if(t.tok() == token_t::TOK_EOF)
482 {
483 return true;
484 }
485
486 if(t.tok() != token_t::TOK_OR)
487 {
488 if(t.tok() != token_t::TOK_INVALID)
489 {
490 cppthread::log << cppthread::log_level_t::error
491 << "validator(): validator definitions must be separated by '|'. Remaining input: \""
492 << f_lexer.remains()
493 << "\""
494 << cppthread::end;
495 }
496 return false;
497 }
498
499 t = f_lexer.next_token();
500 }
501 snapdev::NOT_REACHED();
502 }
503
505 {
506 return f_validators;
507 }
508
509private:
513};
514
515
516
517} // no name namespace
518
519
520
529
530
531
532
533
534
543
544
572{
573 if(g_validator_factories == nullptr)
574 {
575 g_validator_factories = new factory_map_t();
576 }
577 auto it(g_validator_factories->find(factory.get_name()));
578 if(it != g_validator_factories->end())
579 {
580 throw getopt_logic_error(
581 "you have two or more validator factories named \""
582 + factory.get_name()
583 + "\".");
584 }
585 (*g_validator_factories)[factory.get_name()] = &factory;
586}
587
588
590{
591 if(g_validator_factories == nullptr)
592 {
593 return validator::pointer_t(); // LCOV_EXCL_LINE
594 }
595
596 auto it(g_validator_factories->find(name));
597 if(it == g_validator_factories->end())
598 {
599 return validator::pointer_t();
600 }
601
602 return it->second->create(data);
603}
604
605
627{
628 if(name_and_params.empty())
629 {
630 return validator::pointer_t();
631 }
632
633 // the name and parameters can be written as a function call, we have
634 // a special case for regex which do not require the function call
635 //
636 // validator_list: name_and_params
637 // | name_and_params ',' validator_list
638 //
639 // name_and_params: name '(' params ')'
640 // | '/' ... '/' /* regex special case */
641 //
642 // name: [a-zA-Z_][a-zA-Z_0-9]*
643 //
644 // params: (thing - [,()'" ])
645 // | '\'' (thing - '\'') '\''
646 // | '"' (thing - '"') '"'
647 //
648 // thing: [ -~]*
649 // | '\\' [ -~]
650 //
651
652 lexer l(name_and_params.c_str());
653 parser p(l);
654 if(!p.parse())
655 {
656 return validator::pointer_t();
657 }
658
659 validator_with_params::vector_t const & validators(p.get_validators());
660
661 if(validators.size() == 0)
662 {
663 return validator::pointer_t();
664 }
665
666 if(validators.size() == 1)
667 {
668 return create(validators[0].get_name(), validators[0].get_params());
669 }
670
671 // we need a list validator to handle this case
672 //
674 validator_list::pointer_t list(std::dynamic_pointer_cast<validator_list>(lst));
675 if(list == nullptr)
676 {
677 throw getopt_logic_error("we just created a list and the dynamic cast failed."); // LCOV_EXCL_LINE
678 }
679 for(auto const & v : validators)
680 {
681 list->add_validator(create(v.get_name(), v.get_params()));
682 }
683
684 return list;
685}
686
687
688
689} // namespace advgetopt
690// vim: ts=4 sw=4 et
validator_with_params::vector_t const & get_validators() const
token(token_t tok, std::string const &value=std::string())
Definition validator.cpp:94
virtual ~validator_factory()
The destructor to ease derived classes.
std::shared_ptr< validator_list > pointer_t
static pointer_t create(std::string const &name, string_list_t const &data)
static void register_validator(validator_factory const &factory)
virtual ~validator()
The validator destructor to support virtuals.
std::shared_ptr< validator > pointer_t
Definition validator.h:64
virtual std::string name() const =0
Return the name of the validator.
Definitions of the advanced getopt exceptions.
std::map< std::string, validator_factory const * > factory_map_t
Definition validator.cpp:69
The advgetopt environment to parse command line options.
constexpr flag_t option_flags_merge()
Definition flags.h:87
std::string quote(std::string const &s, char open, char close)
The converse of unquote.
Definition utils.cpp:201
std::vector< std::string > string_list_t
Definition utils.h:41
Declaration of validators which can be used to verify the parameters.
Declaration of a validator handling multiple sub-validators.

This document is part of the Snap! Websites Project.

Copyright by Made to Order Software Corp.