advgetopt 2.0.49
Parse complex command line arguments and configuration files in C++.
validator.cpp
Go to the documentation of this file.
1// Copyright (c) 2006-2025 Made to Order Software Corp. All Rights Reserved
2//
3// https://snapwebsites.org/project/advgetopt
4// contact@m2osw.com
5//
6// This program is free software; you can redistribute it and/or modify
7// it under the terms of the GNU General Public License as published by
8// the Free Software Foundation; either version 2 of the License, or
9// (at your option) any later version.
10//
11// This program is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15//
16// You should have received a copy of the GNU General Public License along
17// with this program; if not, write to the Free Software Foundation, Inc.,
18// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
20
31// self
32//
33#include "advgetopt/validator.h"
34
35#include "advgetopt/exception.h"
37
38
39// cppthread
40//
41#include <cppthread/log.h>
42
43
44// snapdev
45//
46#include <snapdev/not_reached.h>
47#include <snapdev/join_strings.h>
48
49
50// C++
51//
52#include <map>
53
54
55// last include
56//
57#include <snapdev/poison.h>
58
59
60
61namespace advgetopt
62{
63
64
65
66namespace
67{
68
69
70typedef std::map<std::string, validator_factory const *> factory_map_t;
71
73
74
90
91
92class token
93{
94public:
95 token(token_t tok, std::string const & value = std::string())
96 : f_token(tok)
97 , f_value(value)
98 {
99 }
100
101 token_t tok() const
102 {
103 return f_token;
104 }
105
106 std::string const & value() const
107 {
108 return f_value;
109 }
110
111private:
112 token_t f_token = token_t::TOK_EOF;
113 std::string f_value = std::string();
114};
115
116class lexer
117{
118public:
119 lexer(char const * in)
120 : f_in(in)
121 {
122 }
123
125 {
126 for(;;)
127 {
128 int c(getc());
129 switch(c)
130 {
131 case '\0':
132 return token(token_t::TOK_EOF);
133
134 case '(':
135 return token(token_t::TOK_OPEN_PARENTHESIS);
136
137 case ')':
138 return token(token_t::TOK_CLOSE_PARENTHESIS);
139
140 case ',':
141 return token(token_t::TOK_COMMA);
142
143 case '|':
144 c = getc();
145 if(c != '|') // allow for || like in C
146 {
147 ungetc(c);
148 }
149 return token(token_t::TOK_OR);
150
151 case '"':
152 case '\'':
153 {
154 int const quote(c);
155 std::string s;
156 for(;;)
157 {
158 c = getc();
159 if(c == quote)
160 {
161 break;
162 }
163 s += static_cast<char>(c);
164 }
165 return token(token_t::TOK_STRING, s);
166 }
167
168 case '/':
169 {
170 std::string r;
171 for(;;)
172 {
173 r += static_cast<char>(c);
174 c = getc();
175 if(c == '/')
176 {
177 r += static_cast<char>(c);
178 break;
179 }
180 if(c < ' ' && c != '\t')
181 {
182 cppthread::log << cppthread::log_level_t::error
183 << "validator(): unexpected character for a regular expression ("
184 << static_cast<int>(c)
185 << ")."
186 << cppthread::end;
187 return token(token_t::TOK_INVALID);
188 }
189 if(c == '\\')
190 {
191 // we keep the backslash, it's important when
192 // further parsing happens
193 //
194 r += c;
195
196 c = getc();
197 if(c < ' ' && c != '\t')
198 {
199 cppthread::log << cppthread::log_level_t::error
200 << "validator(): unexpected escaped character for a regular expression ("
201 << static_cast<int>(c)
202 << ")."
203 << cppthread::end;
204 return token(token_t::TOK_INVALID);
205 }
206 }
207 }
208 // also allow for flags after the closing '/'
209 //
210 // at this time we only support 'i' but here we allow any
211 // letter for forward compatibility
212 //
213 for(;;)
214 {
215 c = getc();
216 if(c == '\0')
217 {
218 break;
219 }
220 if(c < 'a' || c > 'z')
221 {
222 ungetc(c);
223 if(c != ','
224 && c != ')')
225 {
226 cppthread::log << cppthread::log_level_t::error
227 << "validator(): unexpected flag character for a regular expression ("
228 << static_cast<int>(c)
229 << ")."
230 << cppthread::end;
231 return token(token_t::TOK_INVALID);
232 }
233 break;
234 }
235 r += c;
236 }
237 return token(token_t::TOK_REGEX, r);
238 }
239
240 case ' ':
241 // ignore spaces
242 break;
243
244 default:
245 {
246 std::string id;
247 for(;;)
248 {
249 switch(c)
250 {
251 case '(':
252 case ')':
253 case ',':
254 case '|':
255 case ' ':
256 ungetc(c);
257 [[fallthrough]];
258 case '\0':
259 return token(token_t::TOK_IDENTIFIER, id);
260
261 default:
262 if(c < ' ' || c > '~')
263 {
264 cppthread::log << cppthread::log_level_t::error
265 << "validator(): unexpected character for an identifier ("
266 << static_cast<int>(c)
267 << ")."
268 << cppthread::end;
269 return token(token_t::TOK_INVALID);
270 }
271 break;
272
273 }
274 id += static_cast<char>(c);
275 c = getc();
276 }
277 }
278 break;
279
280 }
281 }
282 snapdev::NOT_REACHED();
283 }
284
285 std::string remains() const
286 {
287 if(*f_in == '\0')
288 {
289 return std::string("...EOS");
290 }
291
292 return f_in;
293 }
294
295private:
296 int getc()
297 {
298 if(f_c != '\0')
299 {
300 int const c(f_c);
301 f_c = '\0';
302 return c;
303 }
304
305 if(*f_in == '\0')
306 {
307 return '\0';
308 }
309 else
310 {
311 int const c(*f_in);
312 ++f_in;
313 return c;
314 }
315 }
316
317 void ungetc(int c)
318 {
319 if(f_c != '\0')
320 {
321 throw getopt_logic_error("ungetc() already called once, getc() must be called at least once in between."); // LCOV_EXCL_LINE
322 }
323 f_c = c;
324 }
325
326 char const * f_in = nullptr;
327 int f_c = '\0';
328};
329
330
332{
333public:
334 typedef std::vector<validator_with_params> vector_t;
335
336 validator_with_params(std::string const & name)
337 : f_name(name)
338 {
339 }
340
341 std::string const & get_name() const
342 {
343 return f_name;
344 }
345
346 void add_param(std::string const & param)
347 {
348 f_params.push_back(param);
349 }
350
351 string_list_t const & get_params() const
352 {
353 return f_params;
354 }
355
356private:
357 std::string f_name = std::string();
359};
360
361
363{
364public:
366 : f_lexer(l)
367 {
368 }
369
370 bool parse()
371 {
372 token t(f_lexer.next_token());
373 if(t.tok() == token_t::TOK_EOF)
374 {
375 // empty list
376 //
377 return true;
378 }
379
380 // TODO: show location on an error
381 //
382 for(;;)
383 {
384 switch(t.tok())
385 {
386 case token_t::TOK_REGEX:
387 {
388 validator_with_params v("regex");
389 v.add_param(t.value());
390 f_validators.push_back(v);
391
392 t = f_lexer.next_token();
393 }
394 break;
395
396 case token_t::TOK_IDENTIFIER:
397 {
399
400 t = f_lexer.next_token();
401 if(t.tok() == token_t::TOK_OPEN_PARENTHESIS)
402 {
403 t = f_lexer.next_token();
404 if(t.tok() != token_t::TOK_CLOSE_PARENTHESIS)
405 {
406 for(;;)
407 {
408 if(t.tok() == token_t::TOK_INVALID)
409 {
410 return false;
411 }
412 if(t.tok() != token_t::TOK_IDENTIFIER // <- anything which is not a separator, string, regex including numbers (it is called 'thing' in the grammar)
413 && t.tok() != token_t::TOK_STRING
414 && t.tok() != token_t::TOK_REGEX)
415 {
416 cppthread::log << cppthread::log_level_t::error
417 << "validator(): expected a regex, an identifier or a string inside the () of a parameter. Remaining input: \""
418 << f_lexer.remains()
419 << "\""
420 << cppthread::end;
421 return false;
422 }
423 v.add_param(t.value());
424
425 t = f_lexer.next_token();
426 if(t.tok() == token_t::TOK_CLOSE_PARENTHESIS)
427 {
428 break;
429 }
430
431 if(t.tok() == token_t::TOK_EOF)
432 {
433 cppthread::log << cppthread::log_level_t::error
434 << "validator(): parameter list must end with ')'. Remaining input: \""
435 << f_lexer.remains()
436 << "\""
437 << cppthread::end;
438 return false;
439 }
440
441 if(t.tok() != token_t::TOK_COMMA)
442 {
443 if(t.tok() == token_t::TOK_INVALID)
444 {
445 return false;
446 }
447 cppthread::log << cppthread::log_level_t::error
448 << "validator(): parameters must be separated by ','. Remaining input: \""
449 << f_lexer.remains()
450 << "\""
451 << cppthread::end;
452 return false;
453 }
454 do
455 {
456 t = f_lexer.next_token();
457 }
458 while(t.tok() == token_t::TOK_COMMA);
459 }
460 }
461 t = f_lexer.next_token();
462 }
463
464 f_validators.push_back(v);
465 }
466 break;
467
468 default:
469 if(t.tok() != token_t::TOK_INVALID)
470 {
471 cppthread::log << cppthread::log_level_t::error
472 << "validator(): unexpected token in validator definition;"
473 " expected an identifier. Remaining input: \""
474 << f_lexer.remains()
475 << "\"."
476 << cppthread::end;
477 }
478 return false;
479
480 }
481
482 if(t.tok() == token_t::TOK_EOF)
483 {
484 return true;
485 }
486
487 if(t.tok() != token_t::TOK_OR)
488 {
489 if(t.tok() != token_t::TOK_INVALID)
490 {
491 cppthread::log << cppthread::log_level_t::error
492 << "validator(): validator definitions must be separated by '|'. Remaining input: \""
493 << f_lexer.remains()
494 << "\""
495 << cppthread::end;
496 }
497 return false;
498 }
499
500 t = f_lexer.next_token();
501 }
502 snapdev::NOT_REACHED();
503 }
504
506 {
507 return f_validators;
508 }
509
510private:
514};
515
516
517
518} // no name namespace
519
520
521
530
531
532
533
534
535
544
545
575void validator::set_error(std::string const & msg) const
576{
577 f_error = msg;
578}
579
580
581std::string const & validator::get_error() const
582{
583 return f_error;
584}
585
586
588{
589 if(g_validator_factories == nullptr)
590 {
591 g_validator_factories = new factory_map_t();
592 }
593 auto it(g_validator_factories->find(factory.get_name()));
594 if(it != g_validator_factories->end())
595 {
596 throw getopt_logic_error(
597 "you have two or more validator factories named \""
598 + factory.get_name()
599 + "\".");
600 }
601 (*g_validator_factories)[factory.get_name()] = &factory;
602}
603
604
606{
607 if(g_validator_factories == nullptr)
608 {
609 return validator::pointer_t(); // LCOV_EXCL_LINE
610 }
611
612 auto it(g_validator_factories->find(name));
613 if(it == g_validator_factories->end())
614 {
615 return validator::pointer_t();
616 }
617
618 return it->second->create(data);
619}
620
621
667{
668 if(name_and_params.empty())
669 {
670 return validator::pointer_t();
671 }
672
673 lexer l(name_and_params.c_str());
674 parser p(l);
675 if(!p.parse())
676 {
677 return validator::pointer_t();
678 }
679
680 validator_with_params::vector_t const & validators(p.get_validators());
681
682 if(validators.size() == 0)
683 {
684 return validator::pointer_t();
685 }
686
687 if(validators.size() == 1)
688 {
689 return create(validators[0].get_name(), validators[0].get_params());
690 }
691
692 // we need a list validator to handle this case
693 //
695 validator_list::pointer_t list(std::dynamic_pointer_cast<validator_list>(lst));
696 if(list == nullptr)
697 {
698 throw getopt_logic_error("we just created a list and the dynamic cast failed."); // LCOV_EXCL_LINE
699 }
700 for(auto const & v : validators)
701 {
702 list->add_validator(create(v.get_name(), v.get_params()));
703 }
704
705 return list;
706}
707
708
709
710} // namespace advgetopt
711// vim: ts=4 sw=4 et
validator_with_params::vector_t const & get_validators() const
token(token_t tok, std::string const &value=std::string())
Definition validator.cpp:95
virtual ~validator_factory()
The destructor to ease derived classes.
std::shared_ptr< validator_list > pointer_t
std::string f_error
Definition validator.h:82
static pointer_t create(std::string const &name, string_list_t const &data)
void set_error(std::string const &msg) const
static void register_validator(validator_factory const &factory)
virtual ~validator()
The validator destructor to support virtuals.
std::shared_ptr< validator > pointer_t
Definition validator.h:64
virtual std::string name() const =0
Return the name of the validator.
std::string const & get_error() const
Definitions of the advanced getopt exceptions.
std::map< std::string, validator_factory const * > factory_map_t
Definition validator.cpp:70
The advgetopt environment to parse command line options.
Definition version.h:37
constexpr flag_t option_flags_merge()
Definition flags.h:87
std::string quote(std::string const &s, char open, char close)
The converse of unquote.
Definition utils.cpp:201
std::vector< std::string > string_list_t
Definition utils.h:41
Declaration of validators which can be used to verify the parameters.
Declaration of a validator handling multiple sub-validators.

This document is part of the Snap! Websites Project.

Copyright by Made to Order Software Corp.