Line data Source code
1 : // Snap Websites Server -- transform magic definitions to a .js file
2 : // Copyright (c) 2014-2019 Made to Order Software Corp. All Rights Reserved
3 : //
4 : // This program is free software; you can redistribute it and/or modify
5 : // it under the terms of the GNU General Public License as published by
6 : // the Free Software Foundation; either version 2 of the License, or
7 : // (at your option) any later version.
8 : //
9 : // This program is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : // GNU General Public License for more details.
13 : //
14 : // You should have received a copy of the GNU General Public License
15 : // along with this program; if not, write to the Free Software
16 : // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 :
18 : // self
19 : //
20 : #include "magic-to-js.h"
21 :
22 :
23 : // snapwebsites lib
24 : //
25 : #include <snapwebsites/snapwebsites.h>
26 :
27 :
28 : // snapdev lib
29 : //
30 : #include <snapdev/not_reached.h>
31 : #include <snapdev/not_used.h>
32 :
33 :
34 : // C++ lib
35 : //
36 : #include <algorithm>
37 : #include <cstring>
38 : #include <fstream>
39 : #include <iomanip>
40 : #include <memory>
41 : #include <vector>
42 :
43 :
44 : // C lib
45 : //
46 : #include <math.h>
47 :
48 :
49 : // last include
50 : //
51 : #include <snapdev/poison.h>
52 :
53 :
54 :
55 :
56 : /** \file
57 : * \brief Tool used to transform magic files in .js files.
58 : *
59 : * This tool is used to parse magic data files to use in JavaScript
60 : * to detect file formats on file Drag & Drop.
61 : *
62 : * The documentation of the format of the files is found in the magic
63 : * man page:
64 : *
65 : * \code
66 : * man 5 magic
67 : * \endcode
68 : *
69 : * The following is an approximation of the lexer:
70 : *
71 : * \code
72 : * start: comment
73 : * | empty_line
74 : * | command
75 : * | line
76 : *
77 : * comment: '#' end_of_line new_line
78 : *
79 : * empty_line: new_line
80 : * | spaces new_line
81 : *
82 : * command: '!' ':' cmd
83 : *
84 : * cmd: mimetype
85 : * | apple
86 : * | strength
87 : *
88 : * mimetype: 'mimetype' spaces end_of_line new_line
89 : *
90 : * apple: 'apple' spaces end_of_line new_line
91 : *
92 : * strength: 'strength' spaces binop spaces number new_line
93 : *
94 : * line: level offset spaces type spaces value opt_message new_line
95 : *
96 : * level: '>'
97 : * | level '>'
98 : *
99 : * offset: number
100 : * | opt_index '(' opt_index number opt_size opt_adjustment ')'
101 : *
102 : * type: identifier
103 : * | identifier '&' number
104 : * | identifier '/' flags
105 : * | identifier '/' number -- search/123
106 : *
107 : * -- valid types are: byte, short, long, quad, float, double, string, pstring,
108 : * date, qdate, ldate, qldate, beid3, beshort, belong,
109 : * bequad, befloat, bedouble, bedate, deqdate, beldate,
110 : * beqldate, bestring16, leid3, leshort, lelong, lequad,
111 : * lefloat, ledouble, ledate, leqdate, leldate, leqldate,
112 : * lestring16, melong, medate, meldate, indirect, name,
113 : * use, regex, search, default, and 'u'-<integer type>
114 : *
115 : * value: str_value
116 : * | num_value
117 : * | '!' str_value
118 : * | '!' num_value
119 : *
120 : * str_value: opt_str_comparison [! \n\r]+
121 : *
122 : * opt_str_comparison: '='
123 : * | '<'
124 : * | '>'
125 : *
126 : * num_value: opt_num_comparison number
127 : * | x
128 : *
129 : * opt_num_comparison: opt_str_comparison
130 : * | '&'
131 : * | '^'
132 : * | '~'
133 : *
134 : * opt_message: (* empty *)
135 : * | spaces
136 : * | spaces end_of_line
137 : *
138 : * new_line: '\n'
139 : * | '\r'
140 : * | '\r' '\n'
141 : *
142 : * opt_spaces: (* empty *)
143 : * | spaces
144 : *
145 : * spaces: space
146 : * | spaces space
147 : *
148 : * space: ' '
149 : * | '\t'
150 : *
151 : * identifier: [a-zA-Z_][0-9a-zA-Z_]*
152 : *
153 : * flags: [a-zA-Z]+
154 : *
155 : * -- valid flags for string are: WwcCtb
156 : * -- valid flags for pstring are: BHhLlJ
157 : *
158 : * opt_size: (* empty *)
159 : * | '.' [bilmsBILS]
160 : *
161 : * opt_index: (* empty *)
162 : * | &
163 : *
164 : * opt_adjustment: '+' number
165 : * | '-' number
166 : * | '*' number
167 : * | '/' number
168 : * | '%' number
169 : * | '&' number
170 : * | '|' number
171 : * | '^' number
172 : * | '(' offset ')'
173 : *
174 : * binop: '+' opt_spaces number
175 : * | '-' opt_spaces number
176 : * | '*' opt_spaces number
177 : * | '/' opt_spaces number
178 : *
179 : * number: decimal
180 : * | octal
181 : * | hexadecimal
182 : * | floating_point
183 : *
184 : * decimal: [1-9][0-9]+
185 : *
186 : * floating_point: [1-9][0-9]* '.' [0-9]* ( [eE] [-+]? [0-9]+ )?
187 : *
188 : * octal: 0[0-7]*
189 : *
190 : * hexadecimal: 0[xX][0-9a-fA-F]+
191 : *
192 : * end_of_line: .*
193 : * \endcode
194 : */
195 :
196 :
197 : namespace
198 : {
199 :
200 : bool g_debug = false;
201 :
202 : } // no name namespace
203 :
204 :
205 : /** \brief Lexer used to read the data from the input files.
206 : *
207 : * The lexer transforms the input files in token.
208 : */
209 1 : class lexer
210 : {
211 : public:
212 : enum class mode_t
213 : {
214 : LEXER_MODE_NORMAL, // normal parsing
215 : LEXER_MODE_NORMAL_WITHOUT_FLOATS, // normal parsing, but no floats
216 : LEXER_MODE_MESSAGE, // read whatever up to the end of line as a string (keep spaces, do not convert integers, etc.)
217 : LEXER_MODE_REGEX // reading a regular expression (read as a string)
218 : };
219 :
220 : typedef std::shared_ptr<lexer> pointer_t;
221 : typedef std::vector<std::string> filenames_t;
222 :
223 613 : class token_t
224 : {
225 : public:
226 : enum class type_t
227 : {
228 : TOKEN_TYPE_EOT, // end of token
229 : TOKEN_TYPE_CHARACTER, // '\n' for new line, ' ' for spaces (space or tab), other operators as themselves
230 : TOKEN_TYPE_STRING, // string/identifier depending on where it appears
231 : TOKEN_TYPE_INTEGER, // decimal, hexadecimal, and octal
232 : TOKEN_TYPE_FLOAT, // floating point ('.' is the trigger)
233 : TOKEN_TYPE_COMMAND // !:<command> a string with "command"
234 : };
235 : typedef char character_t;
236 : typedef std::string string_t;
237 : typedef int64_t integer_t;
238 : typedef double float_t;
239 :
240 156 : token_t()
241 156 : : f_type(type_t::TOKEN_TYPE_EOT)
242 : {
243 156 : }
244 :
245 82 : token_t(character_t character)
246 82 : : f_type(type_t::TOKEN_TYPE_CHARACTER)
247 82 : , f_character(character)
248 : {
249 82 : }
250 :
251 60 : token_t(string_t string, bool is_string = true)
252 60 : : f_type(is_string ? type_t::TOKEN_TYPE_STRING : type_t::TOKEN_TYPE_COMMAND)
253 60 : , f_string(string)
254 : {
255 60 : }
256 :
257 26 : token_t(integer_t integer)
258 26 : : f_type(type_t::TOKEN_TYPE_INTEGER)
259 26 : , f_integer(integer)
260 : {
261 26 : }
262 :
263 0 : token_t(float_t floating_point)
264 0 : : f_type(type_t::TOKEN_TYPE_FLOAT)
265 0 : , f_float(floating_point)
266 : {
267 0 : }
268 :
269 253 : type_t get_type() const { return f_type; }
270 :
271 98 : character_t get_character() const { return f_character; }
272 34 : string_t get_string() const { return f_string; }
273 26 : integer_t get_integer() const { return f_integer; }
274 0 : float_t get_float() const { return f_float; }
275 :
276 : private:
277 : type_t f_type = type_t::TOKEN_TYPE_EOT;
278 :
279 : // TODO: redefine controlled vars with the typedef's of this class
280 : char f_character = 0;
281 : std::string f_string = std::string();
282 : int64_t f_integer = 0;
283 : double f_float = 0.0;
284 : };
285 :
286 : lexer(filenames_t fn);
287 :
288 : std::string list_of_filenames() const;
289 : token_t get_token(mode_t mode);
290 0 : std::string current_filename() const { return f_filenames.empty() ? "<no filenames>" : f_filenames[f_fpos - 1]; }
291 0 : int32_t current_line() const { return f_line; }
292 :
293 : private:
294 : int getc();
295 : void ungetc(int c);
296 : token_t get_normal_token(mode_t mode);
297 : token_t get_message_token();
298 : token_t get_identifier_token(int c);
299 : token_t get_string_token();
300 : token_t get_number_token(mode_t mode, int c);
301 :
302 : filenames_t f_filenames = filenames_t();
303 : size_t f_fpos = 0;
304 : int32_t f_line = 1;
305 : bool f_start_of_line = true;
306 : std::shared_ptr<std::ifstream> f_file = std::shared_ptr<std::ifstream>(); // current stream
307 : std::vector<char> f_unget = std::vector<char>();
308 : };
309 :
310 :
311 : /** \brief Print out a token.
312 : *
313 : * This function prints out a token to the specified output stream.
314 : *
315 : * \param[in,out] out The stream where the token is written.
316 : * \param[in] token The token to write out.
317 : *
318 : * \return A reference to the output stream passed in.
319 : */
320 0 : std::ostream& operator << (std::ostream& out, lexer::token_t const& token)
321 : {
322 0 : switch(token.get_type())
323 : {
324 0 : case lexer::token_t::type_t::TOKEN_TYPE_EOT:
325 0 : out << "end of token";
326 0 : break;
327 :
328 0 : case lexer::token_t::type_t::TOKEN_TYPE_CHARACTER:
329 : {
330 0 : char c(token.get_character());
331 0 : if(c == 0)
332 : {
333 0 : out << "character '\\0'";
334 : }
335 0 : else if(c == '\a')
336 : {
337 0 : out << "character '\\a'";
338 : }
339 0 : else if(c == '\b')
340 : {
341 0 : out << "character '\\b'";
342 : }
343 0 : else if(c == '\f')
344 : {
345 0 : out << "character '\\f'";
346 : }
347 0 : else if(c == '\n')
348 : {
349 0 : out << "character '\\n'";
350 : }
351 0 : else if(c == '\r')
352 : {
353 0 : out << "character '\\r'";
354 : }
355 0 : else if(c == '\t')
356 : {
357 0 : out << "character '\\t'";
358 : }
359 0 : else if(c == '\v')
360 : {
361 0 : out << "character '\\v'";
362 : }
363 0 : else if(c < 0x20 || c >= 0x7F)
364 : {
365 0 : out << "character '\\" << std::oct << std::setw(3) << static_cast<int>(c) << std::dec
366 0 : << "' (\\x" << std::hex << std::uppercase << static_cast<int>(c)
367 0 : << std::dec << std::nouppercase << ")";
368 : }
369 : else
370 : {
371 0 : out << "character '" << c << "'";
372 : }
373 : }
374 0 : break;
375 :
376 0 : case lexer::token_t::type_t::TOKEN_TYPE_STRING:
377 0 : out << "string \"" << token.get_string() << "\"";
378 0 : break;
379 :
380 0 : case lexer::token_t::type_t::TOKEN_TYPE_INTEGER:
381 0 : out << "integer " << token.get_integer() << " (0x"
382 0 : << std::hex << std::uppercase << token.get_integer()
383 0 : << std::dec << std::nouppercase << ")";
384 0 : break;
385 :
386 0 : case lexer::token_t::type_t::TOKEN_TYPE_FLOAT:
387 0 : out << "float " << token.get_float();
388 0 : break;
389 :
390 0 : case lexer::token_t::type_t::TOKEN_TYPE_COMMAND:
391 0 : out << "command !:" << token.get_string();
392 0 : break;
393 :
394 : }
395 :
396 0 : return out;
397 : }
398 :
399 :
400 : /* \brief Initializes a lexer.
401 : *
402 : * Magic files are text files. Everything is line based. The lexer
403 : * detects the different elements and has intelligence to parse a
404 : * line into separate tokens.
405 : *
406 : * The input is any number of files. Once the end of a file is reached,
407 : * the next file is read. A file is always considered to end with a newline
408 : * even if none are found in the file.
409 : *
410 : * \param[in] fn The list of files to read from.
411 : */
412 1 : lexer::lexer(filenames_t fn)
413 1 : : f_filenames(fn)
414 : {
415 1 : if(fn.size() > 0)
416 : {
417 1 : f_file.reset(new std::ifstream);
418 1 : f_file->open(f_filenames[0]);
419 1 : if(!f_file->is_open())
420 : {
421 0 : std::cerr << "error: could not open file \"" << f_filenames[0] << "\".\n";
422 0 : exit(1);
423 : }
424 1 : f_fpos = 1;
425 : }
426 1 : }
427 :
428 :
429 : /** \brief Generate the list of filenames for documentation purposes.
430 : *
431 : * This function generates a list of filenames that can be output in the
432 : * output documentation.
433 : *
434 : * \return List of filenames in a string.
435 : */
436 1 : std::string lexer::list_of_filenames() const
437 : {
438 1 : std::string result;
439 3 : for(size_t i(0); i < f_filenames.size(); ++i)
440 : {
441 2 : result += " * \\li " + f_filenames[i] + "\n";
442 : }
443 1 : return result;
444 : }
445 :
446 :
447 : /** \brief Read one token.
448 : *
449 : * This function reads one token from the magic file.
450 : */
451 155 : lexer::token_t lexer::get_token(mode_t mode)
452 : {
453 155 : lexer::token_t token;
454 155 : switch(mode)
455 : {
456 133 : case mode_t::LEXER_MODE_NORMAL:
457 : case mode_t::LEXER_MODE_NORMAL_WITHOUT_FLOATS:
458 133 : token = get_normal_token(mode);
459 133 : break;
460 :
461 20 : case mode_t::LEXER_MODE_MESSAGE:
462 20 : token = get_message_token();
463 20 : break;
464 :
465 2 : case mode_t::LEXER_MODE_REGEX:
466 2 : token = get_string_token();
467 2 : break;
468 :
469 0 : default:
470 0 : throw std::logic_error("lexer::get_token() called with an invalid mode");
471 :
472 : }
473 :
474 155 : if(g_debug)
475 : {
476 0 : std::cerr << token << std::endl;
477 : }
478 :
479 155 : return token;
480 : }
481 :
482 :
483 : /** \brief Get one character from the input file.
484 : *
485 : * If the end of the current input file is reached (eof() is returned) then
486 : * the function tries to open the next file. If that fails, then the function
487 : * returns eof().
488 : *
489 : * \return The next character, or std::istream::traits_type::eof().
490 : */
491 1128 : int lexer::getc()
492 : {
493 1128 : if(!f_unget.empty())
494 : {
495 120 : int const c(f_unget.back());
496 120 : f_unget.pop_back();
497 120 : return c;
498 : }
499 : for(;;)
500 : {
501 1009 : int const c(f_file->get());
502 1009 : if(c != std::istream::traits_type::eof())
503 : {
504 : // get a character, return it
505 : //std::cerr << static_cast<char>(c);
506 1007 : return c;
507 : }
508 : // more files to read?
509 2 : if(f_fpos >= f_filenames.size())
510 : {
511 1 : return std::istream::traits_type::eof();
512 : }
513 1 : f_file.reset(new std::ifstream);
514 1 : f_file->open(f_filenames[f_fpos]);
515 1 : if(!f_file->is_open())
516 : {
517 : // file cannot be read...
518 0 : std::cerr << "error: could not open file \"" << f_filenames[f_fpos] << "\".\n";
519 0 : exit(1);
520 : }
521 1 : ++f_fpos;
522 1 : f_line = 1;
523 1 : }
524 : }
525 :
526 :
527 : /** \brief Restore a character.
528 : *
529 : * Note that we support restoring any character, although it is supposed to
530 : * be the last character read. You may call ungetc() any number of times.
531 : * Note that this does not modify the file stream in any way.
532 : *
533 : * \param[in] c The character to restore.
534 : */
535 120 : void lexer::ungetc(int c)
536 : {
537 120 : if(c != std::istream::traits_type::eof())
538 : {
539 120 : f_unget.push_back(c);
540 : }
541 120 : }
542 :
543 :
544 : /** \brief Retrieve a token, here the parser transform the input to a type.
545 : *
546 : * This function reads one token and returns it.
547 : *
548 : * If the end of all the input files is reached, then the type_t::TOKEN_TYPE_EOT
549 : * token is returned.
550 : *
551 : * \return The next token.
552 : */
553 151 : lexer::token_t lexer::get_normal_token(mode_t mode)
554 : {
555 : // at this time the only reason we loop is a line commented out
556 : // or an empty line; anything else either returns or generates
557 : // an error and exit the tool at once
558 : for(;;)
559 : {
560 151 : bool is_start(f_start_of_line);
561 151 : f_start_of_line = false;
562 151 : int c(getc());
563 151 : switch(c)
564 : {
565 10 : case '#':
566 10 : if(is_start)
567 : {
568 : // skip the comment, it's just like a message!
569 10 : get_message_token();
570 10 : getc(); // skip the '\n' right away
571 10 : ++f_line;
572 10 : f_start_of_line = true; // next call we're at the start of the line
573 10 : break;
574 : }
575 0 : return get_string_token();
576 :
577 110 : case ' ':
578 : case '\t':
579 : // skip all the spaces between tokens and return ONE space
580 : for(;;)
581 : {
582 188 : c = getc();
583 110 : if(c != ' ' && c != '\t')
584 : {
585 32 : ungetc(c);
586 32 : break;
587 : }
588 : }
589 32 : return token_t(static_cast<token_t::character_t>(' '));
590 :
591 0 : case '\r':
592 : // remove \r\n if such is found
593 0 : c = getc();
594 0 : if(c != '\n')
595 : {
596 0 : ungetc(c);
597 : }
598 : #if __cplusplus >= 201700
599 : [[fallthrough]];
600 : #endif
601 : case '\n':
602 28 : ++f_line;
603 28 : f_start_of_line = true; // next call we're at the start of the line
604 28 : if(is_start)
605 : {
606 : // no need to return empty lines
607 8 : break;
608 : }
609 20 : return token_t(static_cast<token_t::character_t>('\n'));
610 :
611 24 : case '>':
612 : case '<':
613 : case '=':
614 : case '&':
615 : case '^':
616 : case '*':
617 : case '/':
618 : case '+':
619 : case '-':
620 : case '(':
621 : case ')':
622 : case '.':
623 24 : return token_t(static_cast<token_t::character_t>(c));
624 :
625 10 : case '!':
626 : // TBD: should we force this check at the start of a line?
627 : // (if it works like this for us, we will be just fine.)
628 10 : c = getc();
629 10 : if(c == ':')
630 : {
631 : // read an identifier
632 8 : token_t id(get_string_token());
633 : // and transform to a command
634 4 : return token_t(id.get_string(), false);
635 6 : }
636 6 : ungetc(c);
637 6 : return token_t(static_cast<token_t::character_t>('!'));
638 :
639 47 : default:
640 47 : if(c >= '0' && c <= '9')
641 : {
642 26 : return get_number_token(mode, c);
643 : }
644 21 : if((c >= 'a' && c <= 'z')
645 1 : || (c >= 'A' && c <= 'Z')
646 1 : || c == '_')
647 : {
648 20 : return get_identifier_token(c);
649 : }
650 1 : if(c == std::istream::traits_type::eof())
651 : {
652 1 : return token_t();
653 : }
654 0 : std::cerr << "error:" << f_filenames[f_fpos - 1]
655 0 : << ":" << f_line
656 0 : << ": unsupported character " << c
657 0 : << " (0x" << std::hex << std::uppercase << c
658 0 : << ") from input file.\n";
659 0 : exit(1);
660 : snap::NOTREACHED();
661 :
662 : }
663 18 : }
664 : }
665 :
666 :
667 : /** \brief Retrieve the message.
668 : *
669 : * This function reads characters up to the following new line character.
670 : * If the end of the file is found first, then the process stops on that
671 : * even too.
672 : *
673 : * \return The message token (a string token).
674 : */
675 30 : lexer::token_t lexer::get_message_token()
676 : {
677 : // the message ends the line, no special parsing of messages
678 60 : std::string message;
679 : for(;;)
680 : {
681 674 : int c(getc());
682 674 : if(c == std::istream::traits_type::eof())
683 : {
684 : // return type_t::TOKEN_TYPE_EOT
685 0 : return token_t(message);
686 : }
687 674 : if(c == '\r')
688 : {
689 0 : c = getc();
690 0 : if(c != '\n')
691 : {
692 0 : ungetc(c);
693 : }
694 0 : c = '\n';
695 : }
696 674 : if(c == '\n')
697 : {
698 : // we need a new line at the end of the string so keep it here
699 30 : ungetc('\n');
700 30 : return token_t(message);
701 : }
702 644 : message += c;
703 644 : }
704 : }
705 :
706 :
707 : /** \brief We found a digit, so reading a number.
708 : *
709 : * This function reads a number, either an integer, or if a period (.)
710 : * is found, a floating point.
711 : *
712 : * Integers support decimal, octal, and hexadecimal.
713 : *
714 : * Floating points only support decimal with 'e' for the exponent.
715 : *
716 : * This function does not detect a sign at the start of the number.
717 : *
718 : * \param[in] mode The mode used to read this token.
719 : * \param[in] c The start digit.
720 : */
721 26 : lexer::token_t lexer::get_number_token(mode_t mode, int c)
722 : {
723 26 : token_t::integer_t ri(0);
724 26 : token_t::float_t rf(0.0);
725 :
726 26 : int d(getc());
727 :
728 : // hexadecimal?
729 26 : if(c == '0')
730 : {
731 14 : if(d == 'x' || d == 'X')
732 : {
733 : // in C, hexadecimal is simple, any character can follow
734 : for(;;)
735 : {
736 39 : d = getc();
737 21 : if(d >= '0' && d <= '9')
738 : {
739 16 : ri = ri * 16 + (d - '0');
740 : }
741 5 : else if(d >= 'a' && d <= 'f')
742 : {
743 2 : ri = ri * 16 + (d - 'a' + 10);
744 : }
745 3 : else if(d >= 'A' && d <= 'F')
746 : {
747 0 : ri = ri * 16 + (d - 'A' + 10);
748 : }
749 : else
750 : {
751 3 : ungetc(d);
752 3 : return token_t(ri);
753 : }
754 : }
755 : }
756 :
757 : // if no 'x' or 'X' then it is octal
758 : for(;;)
759 : {
760 11 : if(d >= '0' && d <= '7')
761 : {
762 0 : ri = ri * 8 + (d - '0');
763 : }
764 11 : else if(d == '8' || d == '9')
765 : {
766 0 : std::cerr << "error: invalid octal number in \"" << f_filenames[f_fpos - 1] << "\".\n";
767 0 : exit(1);
768 : }
769 : else
770 : {
771 11 : ungetc(d);
772 11 : return token_t(ri);
773 : }
774 0 : d = getc();
775 : }
776 : snap::NOTREACHED();
777 : }
778 :
779 : // first read the number as if it were an integer
780 12 : ri = c - '0';
781 : for(;;)
782 : {
783 20 : if(d >= '0' && d <= '9')
784 : {
785 4 : ri = ri * 10 + (d - '0');
786 : }
787 : else
788 : {
789 : break;
790 : }
791 4 : d = getc();
792 : }
793 :
794 : // floating point number?
795 : // TBD: we may need to support detecting 'e' or 'E' as a floating point too?
796 12 : if(d == '.'
797 0 : && mode == lexer::mode_t::LEXER_MODE_NORMAL_WITHOUT_FLOATS)
798 : {
799 : // TBD: for floating points we may want to use the strtod() or
800 : // similar function to make sure that we get the same result
801 : // as what other users would get in other languages.
802 : // (those functions may have heuristics to properly handle
803 : // very large or very small numbers which we may not have
804 : // properly captured here.)
805 0 : double dec = 1.0;
806 : for(;;)
807 : {
808 0 : d = getc();
809 0 : if(d >= '0' && d <= '9')
810 : {
811 0 : dec *= 10.0;
812 0 : rf = rf + (d - '0') / dec;
813 : }
814 : else
815 : {
816 : break;
817 : }
818 : }
819 0 : if(d == 'e' || d == 'E')
820 : {
821 : // exponent
822 0 : double sign(1.0);
823 0 : d = getc();
824 0 : if(d == '-')
825 : {
826 0 : sign = -1.0;
827 0 : d = getc();
828 : }
829 0 : else if(d == '+')
830 : {
831 0 : d = getc();
832 : }
833 0 : if(d >= '0' && d <= '9')
834 : {
835 0 : token_t::float_t exponent(0.0);
836 : for(;;)
837 : {
838 0 : exponent = exponent * 1 + (d - '0');
839 0 : d = getc();
840 0 : if(d < '0' || d > '9')
841 : {
842 0 : ungetc(d);
843 0 : rf *= pow(10, exponent * sign);
844 0 : return token_t(rf);
845 : }
846 : }
847 : }
848 : else
849 : {
850 0 : std::cerr << "error: invalid floating point exponent, digits expected after the 'e', in \"" << f_filenames[f_fpos - 1] << "\".\n";
851 0 : exit(1);
852 : }
853 : }
854 0 : ungetc(d);
855 0 : return token_t(rf);
856 : }
857 :
858 12 : ungetc(d);
859 12 : return token_t(ri);
860 : }
861 :
862 :
863 : /** \brief Read one identifier.
864 : *
865 : * This function reads one C-like identifier. Identifiers are parsed from
866 : * the 3rd token in a standard line.
867 : *
868 : * \param[in] c The first character that was already read.
869 : *
870 : * \return A string token.
871 : */
872 20 : lexer::token_t lexer::get_identifier_token(int c)
873 : {
874 40 : std::string identifier;
875 : for(;;)
876 : {
877 160 : identifier += c; // note: c may be '\0' here!
878 90 : c = getc();
879 90 : if((c < '0' || c > '9')
880 90 : && (c < 'a' || c > 'z')
881 20 : && (c < 'A' || c > 'Z')
882 20 : && c != '_')
883 : {
884 : // done reading this identifier
885 20 : ungetc(c);
886 40 : return token_t(identifier);
887 : }
888 : }
889 : }
890 :
891 :
892 : /** \brief Read one string ending with a space.
893 : *
894 : * This function reads one string that ends with a space. This string can
895 : * generally include any character. Special characters are added with a
896 : * backslash.
897 : *
898 : * \param[in] c The first character that was already read.
899 : *
900 : * \return A string token.
901 : */
902 6 : lexer::token_t lexer::get_string_token()
903 : {
904 12 : std::string str;
905 : for(;;)
906 : {
907 32 : int c(getc());
908 32 : if(c == '\\') // really allow any character in identifier including spaces!
909 : {
910 0 : c = getc();
911 0 : if(c == std::istream::traits_type::eof())
912 : {
913 0 : return token_t(str);
914 : }
915 : // transform the backslash character
916 0 : switch(c)
917 : {
918 0 : case '0':
919 : {
920 0 : int d(getc());
921 0 : if(d == 'x' || d == 'X')
922 : {
923 : // hexadecimal character, get one or 2 more digits
924 0 : c = 0;
925 0 : int max_chars(2);
926 0 : for(; max_chars > 0; --max_chars)
927 : {
928 0 : d = getc();
929 0 : if(d >= '0' && d <= '7')
930 : {
931 0 : c = c * 16 + (d - '0');
932 : }
933 0 : else if(d >= 'a' && d <= 'f')
934 : {
935 0 : c = c * 16 + (d - 'a' + 10);
936 : }
937 0 : else if(d >= 'A' && d <= 'F')
938 : {
939 0 : c = c * 16 + (d - 'A' + 10);
940 : }
941 : else
942 : {
943 : break;
944 : }
945 : }
946 0 : if(max_chars == 2)
947 : {
948 : // invalid \x without an hex digit
949 0 : std::cerr << "error: invalid use of \\x without a valid hexadecimal number following in \"" << f_filenames[f_fpos - 1] << "\".\n";
950 0 : exit(1);
951 : }
952 0 : break;
953 : }
954 0 : ungetc(d);
955 : }
956 : #if __cplusplus >= 201700
957 : [[fallthrough]];
958 : #endif
959 0 : case '1':
960 : case '2':
961 : case '3':
962 : case '4':
963 : case '5':
964 : case '6':
965 : case '7':
966 0 : c = c - '0';
967 0 : for(int max_chars(3); max_chars > 0; --max_chars)
968 : {
969 0 : int d(getc());
970 0 : if(d >= '0' && d <= '7')
971 : {
972 0 : c = c * 8 + (d - '0');
973 : }
974 : else
975 : {
976 : break;
977 : }
978 0 : }
979 0 : break;
980 :
981 0 : case 'a':
982 0 : c = '\a';
983 0 : break;
984 :
985 0 : case 'b':
986 0 : c = '\b';
987 0 : break;
988 :
989 0 : case 'f':
990 0 : c = '\f';
991 0 : break;
992 :
993 0 : case 'n':
994 0 : c = '\n';
995 0 : break;
996 :
997 0 : case 'r':
998 0 : c = '\r';
999 0 : break;
1000 :
1001 0 : case 't':
1002 0 : c = '\t';
1003 0 : break;
1004 :
1005 0 : case 'v':
1006 0 : c = '\v';
1007 0 : break;
1008 :
1009 : //default: -- keep 'c' as is
1010 : }
1011 : }
1012 62 : else if(c == ' ' || c == '\t'
1013 26 : || c == '\r' || c == '\n'
1014 58 : || c == std::istream::traits_type::eof())
1015 : {
1016 : // done reading this string
1017 6 : ungetc(c);
1018 6 : return token_t(str);
1019 : }
1020 26 : str += c; // note: c may be '\0' here!
1021 26 : }
1022 : snap::NOTREACHED();
1023 : }
1024 :
1025 :
1026 : /** \brief Parse magic files.
1027 : *
1028 : * This class is used to parse magic files.
1029 : */
1030 1 : class parser
1031 : {
1032 : public:
1033 : typedef std::shared_ptr<parser> pointer_t;
1034 :
1035 32 : class entry_t
1036 : {
1037 : public:
1038 : typedef std::shared_ptr<entry_t> pointer_t;
1039 :
1040 : enum class type_t
1041 : {
1042 : ENTRY_TYPE_UNKNOWN,
1043 :
1044 : // int -- 1 byte
1045 : ENTRY_TYPE_BYTE,
1046 : ENTRY_TYPE_UBYTE,
1047 : // int -- 2 bytes
1048 : ENTRY_TYPE_SHORT,
1049 : ENTRY_TYPE_LESHORT,
1050 : ENTRY_TYPE_BESHORT,
1051 : ENTRY_TYPE_USHORT,
1052 : ENTRY_TYPE_ULESHORT,
1053 : ENTRY_TYPE_UBESHORT,
1054 : // int -- 4 bytes
1055 : ENTRY_TYPE_LONG,
1056 : ENTRY_TYPE_LELONG,
1057 : ENTRY_TYPE_BELONG,
1058 : ENTRY_TYPE_MELONG,
1059 : ENTRY_TYPE_ULONG,
1060 : ENTRY_TYPE_ULELONG,
1061 : ENTRY_TYPE_UBELONG,
1062 : ENTRY_TYPE_UMELONG,
1063 : // int -- 4 bytes -- an ID3 size is 32 bits defined as: ((size & 0x0FFFFFFF) * 4)
1064 : ENTRY_TYPE_BEID3,
1065 : ENTRY_TYPE_LEID3,
1066 : ENTRY_TYPE_UBEID3,
1067 : ENTRY_TYPE_ULEID3,
1068 : // int -- 8 bytes
1069 : ENTRY_TYPE_QUAD,
1070 : ENTRY_TYPE_BEQUAD,
1071 : ENTRY_TYPE_LEQUAD,
1072 : ENTRY_TYPE_UQUAD,
1073 : ENTRY_TYPE_UBEQUAD,
1074 : ENTRY_TYPE_ULEQUAD,
1075 : // float -- 4 bytes
1076 : ENTRY_TYPE_FLOAT,
1077 : ENTRY_TYPE_BEFLOAT,
1078 : ENTRY_TYPE_LEFLOAT,
1079 : // float -- 8 bytes
1080 : ENTRY_TYPE_DOUBLE,
1081 : ENTRY_TYPE_BEDOUBLE,
1082 : ENTRY_TYPE_LEDOUBLE,
1083 : // "text" (if value includes characters considered binary bytes then it is considered binary too)
1084 : ENTRY_TYPE_STRING,
1085 : ENTRY_TYPE_PSTRING,
1086 : ENTRY_TYPE_BESTRING16,
1087 : ENTRY_TYPE_LESTRING16,
1088 : ENTRY_TYPE_SEARCH,
1089 : ENTRY_TYPE_REGEX,
1090 : // date
1091 : ENTRY_TYPE_DATE,
1092 : ENTRY_TYPE_QDATE,
1093 : ENTRY_TYPE_LDATE,
1094 : ENTRY_TYPE_QLDATE,
1095 : ENTRY_TYPE_BEDATE,
1096 : ENTRY_TYPE_BEQDATE,
1097 : ENTRY_TYPE_BELDATE,
1098 : ENTRY_TYPE_BEQLDATE,
1099 : ENTRY_TYPE_LEDATE,
1100 : ENTRY_TYPE_LEQDATE,
1101 : ENTRY_TYPE_LELDATE,
1102 : ENTRY_TYPE_LEQLDATE,
1103 : ENTRY_TYPE_MEDATE,
1104 : ENTRY_TYPE_MELDATE,
1105 : // special
1106 : ENTRY_TYPE_INDIRECT,
1107 : ENTRY_TYPE_DEFAULT,
1108 : ENTRY_TYPE_NAME,
1109 : ENTRY_TYPE_USE
1110 : };
1111 :
1112 : typedef lexer::token_t::integer_t integer_t;
1113 : typedef lexer::token_t::float_t float_t;
1114 :
1115 : // string & search flags
1116 : static integer_t const ENTRY_FLAG_COMPACT_BLANK = 0x00000001; // W
1117 : static integer_t const ENTRY_FLAG_BLANK = 0x00000002; // w
1118 : static integer_t const ENTRY_FLAG_LOWER_INSENSITIVE = 0x00000004; // c
1119 : static integer_t const ENTRY_FLAG_UPPER_INSENSITIVE = 0x00000008; // C
1120 : static integer_t const ENTRY_FLAG_TEXT_FILE = 0x00000010; // t
1121 : static integer_t const ENTRY_FLAG_BINARY_FILE = 0x00000020; // b
1122 : // pstring sizes
1123 : static integer_t const ENTRY_FLAG_BYTE = 0x00000040; // B
1124 : static integer_t const ENTRY_FLAG_BE_SHORT = 0x00000080; // H
1125 : static integer_t const ENTRY_FLAG_LE_SHORT = 0x00000100; // h
1126 : static integer_t const ENTRY_FLAG_BE_LONG = 0x00000200; // L
1127 : static integer_t const ENTRY_FLAG_LE_LONG = 0x00000400; // l
1128 : static integer_t const ENTRY_FLAG_SELF_INCLUDED = 0x00000800; // J (size includes itself + string)
1129 : // compare value
1130 : static integer_t const ENTRY_FLAG_NOT = 0x00001000; // !value
1131 : static integer_t const ENTRY_FLAG_EQUAL = 0x00002000; // =value
1132 : static integer_t const ENTRY_FLAG_LESS = 0x00004000; // <value
1133 : static integer_t const ENTRY_FLAG_GREATER = 0x00008000; // >value
1134 : static integer_t const ENTRY_FLAG_ARE_SET = 0x00010000; // &value integer only
1135 : static integer_t const ENTRY_FLAG_ARE_CLEAR = 0x00020000; // ^value integer only
1136 : static integer_t const ENTRY_FLAG_NEGATE = 0x00040000; // ~value integer only
1137 : static integer_t const ENTRY_FLAG_TRUE = 0x00080000; // x numbers only
1138 : // regex flags
1139 : static integer_t const ENTRY_FLAG_LINES = 0x00100000; // l regex only
1140 : static integer_t const ENTRY_FLAG_CASE_INSENSITIVE = 0x00200000; // c regex only
1141 : static integer_t const ENTRY_FLAG_START_OFFSET = 0x00400000; // s regex only
1142 : // offset flags
1143 : static integer_t const ENTRY_FLAG_RELATIVE = 0x04000000; // & before the offset
1144 : static integer_t const ENTRY_FLAG_INDIRECT_RELATIVE = 0x08000000; // (&...) before the indirect offset
1145 :
1146 : // indirect sizes (TBD: what are the "i and I"? why have "b and B"?)
1147 : static integer_t const ENTRY_FLAG_INDIRECT_BYTE = 0x01000000000; // b or B (B not used in existing files)
1148 : static integer_t const ENTRY_FLAG_INDIRECT_BE_SHORT = 0x02000000000; // S
1149 : static integer_t const ENTRY_FLAG_INDIRECT_LE_SHORT = 0x04000000000; // s
1150 : static integer_t const ENTRY_FLAG_INDIRECT_BE_LONG = 0x08000000000; // L
1151 : static integer_t const ENTRY_FLAG_INDIRECT_LE_LONG = 0x10000000000; // l
1152 : static integer_t const ENTRY_FLAG_INDIRECT_ME_LONG = 0x20000000000; // m
1153 : static integer_t const ENTRY_FLAG_INDIRECT_BE_ID3 = 0x40000000000; // I
1154 : static integer_t const ENTRY_FLAG_INDIRECT_LE_ID3 = 0x80000000000; // i
1155 :
1156 12 : void set_level(integer_t level) { f_level = level; }
1157 49 : integer_t get_level() const { return f_level; }
1158 :
1159 16 : void set_offset(integer_t offset) { f_offset = offset; }
1160 12 : integer_t get_offset() const { return f_offset; }
1161 :
1162 16 : void set_type(type_t type) { f_type = type; }
1163 60 : type_t get_type() const { return f_type; }
1164 :
1165 0 : void set_mask(integer_t mask) { f_mask = mask; }
1166 : integer_t get_mask() const { return f_mask; }
1167 :
1168 0 : void set_maxlength(integer_t maxlength) { f_maxlength = maxlength; }
1169 0 : integer_t get_maxlength() const { return f_maxlength; }
1170 :
1171 10 : void set_flags(integer_t flags) { f_flags |= flags; }
1172 : void clear_flags(integer_t flags) { f_flags &= ~flags; }
1173 0 : integer_t get_flags() const { return f_flags; }
1174 : std::string flags_to_js_operator() const;
1175 :
1176 4 : void set_mimetype(std::string mimetype) { f_mimetype = mimetype; }
1177 44 : std::string get_mimetype() const { return f_mimetype; }
1178 :
1179 10 : void set_integer(integer_t integer) { f_integer = integer; }
1180 10 : integer_t get_integer() const { return f_integer; }
1181 :
1182 0 : void set_float(float_t flt) { f_float = flt; }
1183 : float_t get_float() const { return f_float; }
1184 :
1185 2 : void set_string(std::string string) { f_string = string; }
1186 2 : std::string get_string() const { return f_string; }
1187 :
1188 : private:
1189 : integer_t f_level = 0; // number of > at the start (0+)
1190 : integer_t f_offset = 0; // no support for indirections at this point (it's not that complicated, just time consuming to make sure it works right.)
1191 : type_t f_type = type_t::ENTRY_TYPE_UNKNOWN; // see enum
1192 : integer_t f_mask = 0; // defined with the type as in: "long&0xF0F0F0F0"
1193 : integer_t f_maxlength = 0; // search/<maxlength>
1194 : integer_t f_flags = 0; // [p]string/<flags>, and NOT (!)
1195 : std::string f_mimetype = std::string(); // a string found after the !:mimetype ...
1196 : integer_t f_integer = 0; // compare with this integer
1197 : float_t f_float = 0.0; // compare with this float
1198 : std::string f_string = std::string(); // compare with this string (may include '\0')
1199 : };
1200 : typedef std::vector<entry_t::pointer_t> entry_vector_t;
1201 :
1202 1 : parser(lexer::pointer_t& l, std::string const& magic_name)
1203 1 : : f_lexer(l)
1204 1 : , f_magic_name(magic_name)
1205 : {
1206 1 : }
1207 :
1208 : void parse();
1209 : void output();
1210 :
1211 : private:
1212 : void output_entry(size_t start, size_t end, bool has_mime);
1213 : void output_header();
1214 : void output_footer();
1215 :
1216 : lexer::pointer_t f_lexer = lexer::pointer_t();
1217 :
1218 : entry_vector_t f_entries = entry_vector_t();
1219 : std::string f_magic_name = std::string();
1220 : };
1221 :
1222 :
1223 :
1224 20 : std::string parser::entry_t::flags_to_js_operator() const
1225 : {
1226 20 : if((f_flags & ENTRY_FLAG_NOT) != 0)
1227 : {
1228 6 : return "!==";
1229 : }
1230 : else
1231 : {
1232 14 : return "===";
1233 : }
1234 : // TODO: support <, >, &, ^, ~...
1235 : //static integer_t const ENTRY_FLAG_NOT = 0x00001000; // !value
1236 : //static integer_t const ENTRY_FLAG_EQUAL = 0x00002000; // =value
1237 : //static integer_t const ENTRY_FLAG_LESS = 0x00004000; // <value
1238 : //static integer_t const ENTRY_FLAG_GREATER = 0x00008000; // >value
1239 : //static integer_t const ENTRY_FLAG_ARE_SET = 0x00010000; // &value integer only
1240 : //static integer_t const ENTRY_FLAG_ARE_CLEAR = 0x00020000; // ^value integer only
1241 : //static integer_t const ENTRY_FLAG_NEGATE = 0x00040000; // ~value integer only
1242 : //static integer_t const ENTRY_FLAG_TRUE = 0x00080000; // x numbers only
1243 : }
1244 :
1245 :
1246 : /** \brief Parse the magic files data.
1247 : *
1248 : * This function reads magic files and parse them for any number of
1249 : * magic definitions.
1250 : *
1251 : * \todo
1252 : * According to the magic documentation, all magic tests that apply
1253 : * to text files need to be run after all the binary magic tests.
1254 : * So at some point we would need to add a sorting capability which
1255 : * ensures that such happens as expected.
1256 : */
1257 1 : void parser::parse()
1258 : {
1259 2 : entry_t::pointer_t e;
1260 : for(;;)
1261 : {
1262 37 : lexer::token_t token(f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL));
1263 21 : switch(token.get_type())
1264 : {
1265 4 : case lexer::token_t::type_t::TOKEN_TYPE_COMMAND:
1266 4 : if(!e)
1267 : {
1268 0 : std::cerr << "error: a command without any line is not legal.\n";
1269 0 : exit(1);
1270 : }
1271 4 : if(token.get_string() == "mime")
1272 : {
1273 : // these we accept!
1274 4 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_MESSAGE);
1275 8 : std::string mimetype(token.get_string());
1276 12 : while(mimetype[0] == ' ' || mimetype[0] == '\t')
1277 : {
1278 4 : mimetype.erase(mimetype.begin());
1279 : }
1280 4 : e->set_mimetype(mimetype);
1281 : }
1282 0 : else if(token.get_string() == "apple" || token.get_string() == "strength")
1283 : {
1284 : // ignore those for now
1285 0 : f_lexer->get_token(lexer::mode_t::LEXER_MODE_MESSAGE);
1286 : }
1287 : else
1288 : {
1289 0 : std::cerr << "error: unknown command (!:" << token.get_string() << ").\n";
1290 0 : exit(1);
1291 : }
1292 4 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1293 8 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1294 4 : || token.get_character() != '\n')
1295 : {
1296 0 : std::cerr << "error: a command line is expected to end with a new line.\n";
1297 0 : exit(1);
1298 : }
1299 4 : continue;
1300 :
1301 1 : case lexer::token_t::type_t::TOKEN_TYPE_EOT:
1302 : // we are done parsing
1303 2 : return;
1304 :
1305 12 : case lexer::token_t::type_t::TOKEN_TYPE_CHARACTER:
1306 : // a line may start with characters (>)
1307 12 : if(token.get_character() != '>')
1308 : {
1309 0 : std::cerr << "error: expected '>' to indicate the level of this line. Got " << token.get_character() << " instead.\n";
1310 0 : exit(1);
1311 : }
1312 12 : e.reset(new entry_t);
1313 : {
1314 12 : int level(0);
1315 12 : do
1316 : {
1317 24 : ++level;
1318 24 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1319 : }
1320 24 : while(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1321 24 : && token.get_character() == '>');
1322 12 : e->set_level(level);
1323 : }
1324 :
1325 24 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1326 12 : && token.get_character() == '&')
1327 : {
1328 0 : e->set_flags(entry_t::ENTRY_FLAG_RELATIVE);
1329 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1330 : }
1331 :
1332 : {
1333 12 : int offset_sign(1);
1334 24 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1335 12 : && token.get_character() == '-')
1336 : {
1337 0 : offset_sign = -1;
1338 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1339 : }
1340 :
1341 12 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
1342 : {
1343 : // the actual offset
1344 12 : e->set_offset(token.get_integer() * offset_sign);
1345 12 : break;
1346 : }
1347 :
1348 0 : if(offset_sign == -1)
1349 : {
1350 0 : std::cerr << "error:" << f_lexer->current_filename()
1351 0 : << ":" << f_lexer->current_line()
1352 0 : << ": expected an integer after a '-' in the offset.\n";
1353 0 : exit(1);
1354 0 : }
1355 : }
1356 :
1357 : // indirect
1358 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1359 0 : || token.get_character() != '(')
1360 : {
1361 0 : std::cerr << "error:" << f_lexer->current_filename()
1362 0 : << ":" << f_lexer->current_line()
1363 0 : << ": expected an integer, '&', or '(' after the level indication.\n";
1364 0 : exit(1);
1365 : }
1366 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1367 0 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1368 0 : && token.get_character() == '&')
1369 : {
1370 0 : e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_RELATIVE);
1371 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1372 : }
1373 :
1374 : // indirect offset
1375 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
1376 : {
1377 0 : std::cerr << "error: expected an integer for the indirect offset.\n";
1378 0 : exit(1);
1379 : }
1380 0 : e->set_offset(token.get_integer());
1381 :
1382 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1383 0 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1384 0 : && token.get_character() == '.')
1385 : {
1386 : // NOTE: The documentation says that the size is
1387 : // optional, and if not defined, long is used
1388 : // (but they do not specify the endian, so I would
1389 : // imagine that the machine endian is to be used?!)
1390 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1391 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_STRING)
1392 : {
1393 0 : std::cerr << "error: indirect offsets can be followed by a size (.b, .l, etc.), here the size is missing.\n";
1394 0 : exit(1);
1395 : }
1396 0 : std::string size(token.get_string());
1397 0 : if(size.size() != 1)
1398 : {
1399 0 : std::cerr << "error: indirect offsets size (.b, .l, etc.), must be exactly one chracter.\n";
1400 0 : exit(1);
1401 : }
1402 0 : switch(size[0])
1403 : {
1404 0 : case 'b':
1405 : case 'B':
1406 0 : e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_BYTE);
1407 0 : break;
1408 :
1409 0 : case 'S':
1410 0 : e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_BE_SHORT);
1411 0 : break;
1412 :
1413 0 : case 's':
1414 0 : e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_LE_SHORT);
1415 0 : break;
1416 :
1417 0 : case 'l':
1418 0 : e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_BE_LONG);
1419 0 : break;
1420 :
1421 0 : case 'L':
1422 0 : e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_LE_LONG);
1423 0 : break;
1424 :
1425 0 : case 'm':
1426 0 : e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_ME_LONG);
1427 0 : break;
1428 :
1429 0 : case 'I':
1430 0 : e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_BE_ID3);
1431 0 : break;
1432 :
1433 0 : case 'i':
1434 0 : e->set_flags(entry_t::ENTRY_FLAG_INDIRECT_LE_ID3);
1435 0 : break;
1436 :
1437 0 : default:
1438 0 : std::cerr << "error: invalid character used as an offset size (" << size[0] << ").\n";
1439 0 : exit(1);
1440 :
1441 : }
1442 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1443 : }
1444 0 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1445 0 : && token.get_character() != ')')
1446 : {
1447 0 : switch(token.get_character())
1448 : {
1449 0 : case '+':
1450 : case '-':
1451 : case '*':
1452 : case '/':
1453 : case '%':
1454 : case '&':
1455 : case '|':
1456 : case '^':
1457 : //e->set_indirect_adjustment_operator(token.get_character());
1458 0 : break;
1459 :
1460 0 : default:
1461 : std::cerr << "error: indirect adjustment operator ("
1462 0 : << token.get_character() << ") not supported."
1463 0 : << std::endl;
1464 0 : exit(1);
1465 :
1466 : }
1467 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1468 0 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1469 0 : && token.get_character() == '(')
1470 : {
1471 : // case were we have a negative number and they
1472 : // generally use (<position>.<size>+(-<offset>))
1473 : //
1474 0 : int sign(1);
1475 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1476 0 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1477 0 : && token.get_character() == '-')
1478 : {
1479 0 : sign = -1;
1480 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1481 : }
1482 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
1483 : {
1484 0 : std::cerr << "error:" << f_lexer->current_filename()
1485 0 : << ":" << f_lexer->current_line()
1486 0 : << ": indirect adjustment operator must be followed by an integer."
1487 0 : << std::endl;
1488 0 : exit(1);
1489 : }
1490 : // Note: the + and - can be optimized by replacing the
1491 : // integer with -integer and the '-' by '+'
1492 : //e->set_indirect_adjustment(token.get_integer() * sign);
1493 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1494 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1495 0 : && token.get_character() != ')')
1496 : {
1497 0 : std::cerr << "error:" << f_lexer->current_filename()
1498 0 : << ":" << f_lexer->current_line()
1499 0 : << ": indirect adjustment operator sub-offset must be ended by a ')'."
1500 0 : << std::endl;
1501 0 : exit(1);
1502 : }
1503 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1504 : }
1505 : else
1506 : {
1507 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
1508 : {
1509 : // Note: in the documentation they say you can also have
1510 : // another parenthesis layer as in: +(-4)
1511 0 : std::cerr << "error:" << f_lexer->current_filename()
1512 0 : << ":" << f_lexer->current_line()
1513 0 : << ": indirect adjustment operator must be followed by an integer."
1514 0 : << std::endl;
1515 0 : exit(1);
1516 : }
1517 : // Note: the + and - can be optimized by replacing the
1518 : // integer with -integer and the '-' by '+'
1519 : //e->set_indirect_adjustment(token.get_integer());
1520 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1521 : }
1522 : }
1523 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1524 0 : || token.get_character() != ')')
1525 : {
1526 0 : std::cerr << "error: an indirect offset must end with ')'.\n";
1527 0 : exit(1);
1528 : }
1529 0 : break;
1530 :
1531 4 : case lexer::token_t::type_t::TOKEN_TYPE_INTEGER:
1532 : // the offset for this line
1533 4 : e.reset(new entry_t);
1534 4 : e->set_offset(token.get_integer());
1535 4 : break;
1536 :
1537 0 : default:
1538 0 : std::cerr << "error: expected a standard line token: an integer optionally preceeded by '>' characters.\n";
1539 4 : exit(1);
1540 :
1541 : }
1542 :
1543 : // after the offset we have to have a space then the type
1544 16 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1545 32 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1546 16 : || token.get_character() != ' ')
1547 : {
1548 0 : std::cerr << "error: expected a space or tab after the offset.\n";
1549 0 : exit(1);
1550 : }
1551 :
1552 16 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1553 16 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_STRING)
1554 : {
1555 0 : std::cerr << "error: expected a string to indicate the type on this line.\n";
1556 0 : exit(1);
1557 : }
1558 :
1559 32 : std::string type(token.get_string());
1560 16 : if(type == "byte")
1561 : {
1562 6 : e->set_type(entry_t::type_t::ENTRY_TYPE_BYTE);
1563 : }
1564 10 : else if(type == "ubyte")
1565 : {
1566 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_UBYTE);
1567 : }
1568 10 : else if(type == "short")
1569 : {
1570 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_SHORT);
1571 : }
1572 10 : else if(type == "leshort")
1573 : {
1574 2 : e->set_type(entry_t::type_t::ENTRY_TYPE_LESHORT);
1575 : }
1576 8 : else if(type == "beshort")
1577 : {
1578 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_BESHORT);
1579 : }
1580 8 : else if(type == "ushort")
1581 : {
1582 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_USHORT);
1583 : }
1584 8 : else if(type == "uleshort")
1585 : {
1586 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_ULESHORT);
1587 : }
1588 8 : else if(type == "ubeshort")
1589 : {
1590 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_UBESHORT);
1591 : }
1592 8 : else if(type == "long")
1593 : {
1594 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_LONG);
1595 : }
1596 8 : else if(type == "lelong")
1597 : {
1598 4 : e->set_type(entry_t::type_t::ENTRY_TYPE_LELONG);
1599 : }
1600 4 : else if(type == "belong")
1601 : {
1602 2 : e->set_type(entry_t::type_t::ENTRY_TYPE_BELONG);
1603 : }
1604 2 : else if(type == "melong")
1605 : {
1606 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_MELONG);
1607 : }
1608 2 : else if(type == "ulong")
1609 : {
1610 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_ULONG);
1611 : }
1612 2 : else if(type == "ulong")
1613 : {
1614 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_ULONG);
1615 : }
1616 2 : else if(type == "ulelong")
1617 : {
1618 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_ULELONG);
1619 : }
1620 2 : else if(type == "ubelong")
1621 : {
1622 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_UBELONG);
1623 : }
1624 2 : else if(type == "umelong")
1625 : {
1626 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_UMELONG);
1627 : }
1628 2 : else if(type == "beid3")
1629 : {
1630 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_BEID3);
1631 : }
1632 2 : else if(type == "leid3")
1633 : {
1634 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_LEID3);
1635 : }
1636 2 : else if(type == "ubeid3")
1637 : {
1638 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_UBEID3);
1639 : }
1640 2 : else if(type == "uleid3")
1641 : {
1642 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_ULEID3);
1643 : }
1644 2 : else if(type == "quad")
1645 : {
1646 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_QUAD);
1647 : }
1648 2 : else if(type == "bequad")
1649 : {
1650 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_BEQUAD);
1651 : }
1652 2 : else if(type == "lequad")
1653 : {
1654 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_LEQUAD);
1655 : }
1656 2 : else if(type == "uquad")
1657 : {
1658 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_UQUAD);
1659 : }
1660 2 : else if(type == "ubequad")
1661 : {
1662 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_UBEQUAD);
1663 : }
1664 2 : else if(type == "ulequad")
1665 : {
1666 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_ULEQUAD);
1667 : }
1668 2 : else if(type == "float")
1669 : {
1670 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_FLOAT);
1671 : }
1672 2 : else if(type == "befloat")
1673 : {
1674 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_BEFLOAT);
1675 : }
1676 2 : else if(type == "lefloat")
1677 : {
1678 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_LEFLOAT);
1679 : }
1680 2 : else if(type == "double")
1681 : {
1682 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_DOUBLE);
1683 : }
1684 2 : else if(type == "bedouble")
1685 : {
1686 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_BEDOUBLE);
1687 : }
1688 2 : else if(type == "ledouble")
1689 : {
1690 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_LEDOUBLE);
1691 : }
1692 2 : else if(type == "string")
1693 : {
1694 2 : e->set_type(entry_t::type_t::ENTRY_TYPE_STRING);
1695 : }
1696 0 : else if(type == "pstring")
1697 : {
1698 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_PSTRING);
1699 : }
1700 0 : else if(type == "bestring16")
1701 : {
1702 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_BESTRING16);
1703 : }
1704 0 : else if(type == "lestring16")
1705 : {
1706 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_LESTRING16);
1707 : }
1708 0 : else if(type == "search")
1709 : {
1710 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_SEARCH);
1711 : }
1712 0 : else if(type == "regex")
1713 : {
1714 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_REGEX);
1715 : }
1716 0 : else if(type == "date")
1717 : {
1718 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_DATE);
1719 : }
1720 0 : else if(type == "qdate")
1721 : {
1722 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_QDATE);
1723 : }
1724 0 : else if(type == "ldate")
1725 : {
1726 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_LDATE);
1727 : }
1728 0 : else if(type == "qldate")
1729 : {
1730 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_QLDATE);
1731 : }
1732 0 : else if(type == "bedate")
1733 : {
1734 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_BEDATE);
1735 : }
1736 0 : else if(type == "beqdate")
1737 : {
1738 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_BEQDATE);
1739 : }
1740 0 : else if(type == "beldate")
1741 : {
1742 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_BELDATE);
1743 : }
1744 0 : else if(type == "beqldate")
1745 : {
1746 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_BEQLDATE);
1747 : }
1748 0 : else if(type == "ledate")
1749 : {
1750 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_LEDATE);
1751 : }
1752 0 : else if(type == "leqdate")
1753 : {
1754 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_LEQDATE);
1755 : }
1756 0 : else if(type == "leldate")
1757 : {
1758 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_LELDATE);
1759 : }
1760 0 : else if(type == "leqldate")
1761 : {
1762 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_LEQLDATE);
1763 : }
1764 0 : else if(type == "medate")
1765 : {
1766 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_MEDATE);
1767 : }
1768 0 : else if(type == "meldate")
1769 : {
1770 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_MELDATE);
1771 : }
1772 0 : else if(type == "indirect")
1773 : {
1774 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_INDIRECT);
1775 : }
1776 0 : else if(type == "default")
1777 : {
1778 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_DEFAULT);
1779 : }
1780 0 : else if(type == "name")
1781 : {
1782 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_NAME);
1783 : }
1784 0 : else if(type == "use")
1785 : {
1786 0 : e->set_type(entry_t::type_t::ENTRY_TYPE_USE);
1787 : }
1788 : else
1789 : {
1790 0 : std::cerr << "error:" << f_lexer->current_filename()
1791 0 : << ":" << f_lexer->current_line()
1792 0 : << ": unknown type \"" << type << "\".\n";
1793 0 : exit(1);
1794 : }
1795 :
1796 16 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1797 16 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER)
1798 : {
1799 16 : switch(token.get_character())
1800 : {
1801 0 : case '&': // <integer-type> & <integer>
1802 0 : switch(e->get_type())
1803 : {
1804 0 : case entry_t::type_t::ENTRY_TYPE_BYTE:
1805 : case entry_t::type_t::ENTRY_TYPE_UBYTE:
1806 : case entry_t::type_t::ENTRY_TYPE_SHORT:
1807 : case entry_t::type_t::ENTRY_TYPE_LESHORT:
1808 : case entry_t::type_t::ENTRY_TYPE_BESHORT:
1809 : case entry_t::type_t::ENTRY_TYPE_USHORT:
1810 : case entry_t::type_t::ENTRY_TYPE_ULESHORT:
1811 : case entry_t::type_t::ENTRY_TYPE_UBESHORT:
1812 : case entry_t::type_t::ENTRY_TYPE_LONG:
1813 : case entry_t::type_t::ENTRY_TYPE_LELONG:
1814 : case entry_t::type_t::ENTRY_TYPE_BELONG:
1815 : case entry_t::type_t::ENTRY_TYPE_MELONG:
1816 : case entry_t::type_t::ENTRY_TYPE_ULONG:
1817 : case entry_t::type_t::ENTRY_TYPE_ULELONG:
1818 : case entry_t::type_t::ENTRY_TYPE_UBELONG:
1819 : case entry_t::type_t::ENTRY_TYPE_UMELONG:
1820 : case entry_t::type_t::ENTRY_TYPE_BEID3:
1821 : case entry_t::type_t::ENTRY_TYPE_LEID3:
1822 : case entry_t::type_t::ENTRY_TYPE_UBEID3:
1823 : case entry_t::type_t::ENTRY_TYPE_ULEID3:
1824 : case entry_t::type_t::ENTRY_TYPE_QUAD:
1825 : case entry_t::type_t::ENTRY_TYPE_BEQUAD:
1826 : case entry_t::type_t::ENTRY_TYPE_LEQUAD:
1827 : case entry_t::type_t::ENTRY_TYPE_UQUAD:
1828 : case entry_t::type_t::ENTRY_TYPE_UBEQUAD:
1829 : case entry_t::type_t::ENTRY_TYPE_ULEQUAD:
1830 : case entry_t::type_t::ENTRY_TYPE_DATE:
1831 : case entry_t::type_t::ENTRY_TYPE_QDATE:
1832 : case entry_t::type_t::ENTRY_TYPE_LDATE:
1833 : case entry_t::type_t::ENTRY_TYPE_QLDATE:
1834 : case entry_t::type_t::ENTRY_TYPE_BEDATE:
1835 : case entry_t::type_t::ENTRY_TYPE_BEQDATE:
1836 : case entry_t::type_t::ENTRY_TYPE_BELDATE:
1837 : case entry_t::type_t::ENTRY_TYPE_BEQLDATE:
1838 : case entry_t::type_t::ENTRY_TYPE_LEDATE:
1839 : case entry_t::type_t::ENTRY_TYPE_LEQDATE:
1840 : case entry_t::type_t::ENTRY_TYPE_LELDATE:
1841 : case entry_t::type_t::ENTRY_TYPE_LEQLDATE:
1842 : case entry_t::type_t::ENTRY_TYPE_MEDATE:
1843 : case entry_t::type_t::ENTRY_TYPE_MELDATE:
1844 0 : break;
1845 :
1846 0 : default:
1847 0 : std::cerr << "error: a type followed by & must be an integral type.\n";
1848 0 : exit(1);
1849 : snap::NOTREACHED();
1850 :
1851 : }
1852 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1853 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
1854 : {
1855 0 : std::cerr << "error: a type followed by & must next be followed by an integer.\n";
1856 0 : exit(1);
1857 : }
1858 0 : e->set_mask(token.get_integer());
1859 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1860 0 : break;
1861 :
1862 0 : case '/': // <string-type> '/' <flags>, or "search" '/' <number>
1863 0 : switch(e->get_type())
1864 : {
1865 0 : case entry_t::type_t::ENTRY_TYPE_STRING:
1866 : case entry_t::type_t::ENTRY_TYPE_BESTRING16:
1867 : case entry_t::type_t::ENTRY_TYPE_LESTRING16:
1868 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1869 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_STRING)
1870 : {
1871 0 : std::cerr << "error: a search followed by / must next be followed by a set of flags.\n";
1872 0 : exit(1);
1873 : }
1874 : {
1875 0 : std::string const flags(token.get_string());
1876 0 : for(char const *f(flags.c_str()); *f != '\0'; ++f)
1877 : {
1878 0 : switch(*f)
1879 : {
1880 0 : case 'W':
1881 0 : e->set_flags(entry_t::ENTRY_FLAG_COMPACT_BLANK);
1882 0 : break;
1883 :
1884 0 : case 'w':
1885 0 : e->set_flags(entry_t::ENTRY_FLAG_BLANK);
1886 0 : break;
1887 :
1888 0 : case 'c':
1889 0 : e->set_flags(entry_t::ENTRY_FLAG_LOWER_INSENSITIVE);
1890 0 : break;
1891 :
1892 0 : case 'C':
1893 0 : e->set_flags(entry_t::ENTRY_FLAG_UPPER_INSENSITIVE);
1894 0 : break;
1895 :
1896 0 : case 't':
1897 0 : e->set_flags(entry_t::ENTRY_FLAG_TEXT_FILE);
1898 0 : break;
1899 :
1900 0 : case 'b':
1901 0 : e->set_flags(entry_t::ENTRY_FLAG_BINARY_FILE);
1902 0 : break;
1903 :
1904 0 : default:
1905 0 : std::cerr << "error:" << f_lexer->current_filename()
1906 0 : << ":" << f_lexer->current_line()
1907 : << ": invalid character used as a string, bestring16, or lestring16 ("
1908 0 : << *f << ").\n";
1909 0 : exit(1);
1910 :
1911 : }
1912 0 : }
1913 : }
1914 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1915 0 : break;
1916 :
1917 0 : case entry_t::type_t::ENTRY_TYPE_PSTRING:
1918 : // only width of the string size is expected here
1919 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1920 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_STRING)
1921 : {
1922 0 : std::cerr << "error: a search followed by / must next be followed by a set of flags.\n";
1923 0 : exit(1);
1924 : }
1925 : {
1926 0 : std::string const flags(token.get_string());
1927 0 : for(char const *f(flags.c_str()); *f != '\0'; ++f)
1928 : {
1929 0 : switch(*f)
1930 : {
1931 0 : case 'B':
1932 0 : e->set_flags(entry_t::ENTRY_FLAG_BYTE);
1933 0 : break;
1934 :
1935 0 : case 'H':
1936 0 : e->set_flags(entry_t::ENTRY_FLAG_BE_SHORT);
1937 0 : break;
1938 :
1939 0 : case 'h':
1940 0 : e->set_flags(entry_t::ENTRY_FLAG_LE_SHORT);
1941 0 : break;
1942 :
1943 0 : case 'L':
1944 0 : e->set_flags(entry_t::ENTRY_FLAG_BE_LONG);
1945 0 : break;
1946 :
1947 0 : case 'l':
1948 0 : e->set_flags(entry_t::ENTRY_FLAG_LE_LONG);
1949 0 : break;
1950 :
1951 0 : case 'J':
1952 0 : e->set_flags(entry_t::ENTRY_FLAG_SELF_INCLUDED);
1953 0 : break;
1954 :
1955 0 : default:
1956 0 : std::cerr << "error: invalid character used as a pstring flag (pstring/" << *f << ").\n";
1957 0 : exit(1);
1958 :
1959 : }
1960 0 : }
1961 : }
1962 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1963 0 : break;
1964 :
1965 0 : case entry_t::type_t::ENTRY_TYPE_REGEX: // <regex> / <flags> or <regex> / <number>
1966 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1967 : // TBD:
1968 : // I would imagine that both could be used (integer + flags)
1969 : // but it is not documented so at this point I read one or
1970 : // the other and that is enough with the existing files.
1971 0 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
1972 : {
1973 : // the number of lines to check the regex against
1974 0 : e->set_maxlength(token.get_integer());
1975 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
1976 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
1977 0 : || token.get_character() != '/')
1978 : {
1979 : // no extra flags
1980 0 : break;
1981 : }
1982 : }
1983 0 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_STRING)
1984 : {
1985 : // regex flags are 'l', 's' and 'c'
1986 0 : std::string flags(token.get_string());
1987 0 : for(char const *f(flags.c_str()); *f != '\0'; ++f)
1988 : {
1989 0 : switch(*f)
1990 : {
1991 0 : case 'l':
1992 0 : e->set_flags(entry_t::ENTRY_FLAG_LINES);
1993 0 : break;
1994 :
1995 0 : case 'c':
1996 0 : e->set_flags(entry_t::ENTRY_FLAG_CASE_INSENSITIVE);
1997 0 : break;
1998 :
1999 0 : case 's':
2000 0 : e->set_flags(entry_t::ENTRY_FLAG_START_OFFSET);
2001 0 : break;
2002 :
2003 0 : default:
2004 0 : std::cerr << "error: invalid character used as a regex flag (regex/" << *f << ").\n";
2005 0 : exit(1);
2006 :
2007 : }
2008 : }
2009 : }
2010 : else
2011 : {
2012 0 : std::cerr << "error: a search followed by / must next be followed by an integer and/or flags.\n";
2013 0 : exit(1);
2014 : }
2015 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2016 0 : break;
2017 :
2018 0 : case entry_t::type_t::ENTRY_TYPE_SEARCH:
2019 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2020 0 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
2021 : {
2022 0 : e->set_maxlength(token.get_integer());
2023 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2024 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
2025 0 : || token.get_character() != '/')
2026 : {
2027 : // no extra flags
2028 0 : break;
2029 : }
2030 : }
2031 0 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_STRING)
2032 : {
2033 0 : std::string flags(token.get_string());
2034 0 : for(char const *f(flags.c_str()); *f != '\0'; ++f)
2035 : {
2036 0 : switch(*f)
2037 : {
2038 0 : case 'W':
2039 0 : e->set_flags(entry_t::ENTRY_FLAG_COMPACT_BLANK);
2040 0 : break;
2041 :
2042 0 : case 'w':
2043 0 : e->set_flags(entry_t::ENTRY_FLAG_BLANK);
2044 0 : break;
2045 :
2046 0 : case 'c':
2047 0 : e->set_flags(entry_t::ENTRY_FLAG_LOWER_INSENSITIVE);
2048 0 : break;
2049 :
2050 0 : case 'C':
2051 0 : e->set_flags(entry_t::ENTRY_FLAG_UPPER_INSENSITIVE);
2052 0 : break;
2053 :
2054 0 : case 't':
2055 0 : e->set_flags(entry_t::ENTRY_FLAG_TEXT_FILE);
2056 0 : break;
2057 :
2058 0 : case 'b':
2059 0 : e->set_flags(entry_t::ENTRY_FLAG_BINARY_FILE);
2060 0 : break;
2061 :
2062 0 : default:
2063 0 : std::cerr << "error: invalid character used as a search flag (" << *f << ").\n";
2064 0 : exit(1);
2065 :
2066 : }
2067 : }
2068 : }
2069 : else
2070 : {
2071 0 : std::cerr << "error: a search followed by / must next be followed by an integer (count) or a string (flags).\n";
2072 0 : exit(1);
2073 : }
2074 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2075 0 : break;
2076 :
2077 0 : default:
2078 0 : std::cerr << "error: a type followed by / must be a string type.\n";
2079 0 : exit(1);
2080 : snap::NOTREACHED();
2081 :
2082 : }
2083 0 : break;
2084 :
2085 : }
2086 : }
2087 :
2088 32 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
2089 16 : || token.get_character() != ' ')
2090 : {
2091 0 : std::cerr << "error: expected a space or tab after the type.\n";
2092 0 : exit(1);
2093 : }
2094 :
2095 : // the next get_token() mode depends on the type so we do that
2096 : // separately
2097 16 : bool is_float(false);
2098 16 : switch(e->get_type())
2099 : {
2100 0 : case entry_t::type_t::ENTRY_TYPE_FLOAT:
2101 : case entry_t::type_t::ENTRY_TYPE_BEFLOAT:
2102 : case entry_t::type_t::ENTRY_TYPE_LEFLOAT:
2103 : case entry_t::type_t::ENTRY_TYPE_DOUBLE:
2104 : case entry_t::type_t::ENTRY_TYPE_BEDOUBLE:
2105 : case entry_t::type_t::ENTRY_TYPE_LEDOUBLE:
2106 0 : is_float = true;
2107 : #if __cplusplus >= 201700
2108 : [[fallthrough]];
2109 : #endif
2110 14 : case entry_t::type_t::ENTRY_TYPE_BYTE:
2111 : case entry_t::type_t::ENTRY_TYPE_UBYTE:
2112 : case entry_t::type_t::ENTRY_TYPE_SHORT:
2113 : case entry_t::type_t::ENTRY_TYPE_LESHORT:
2114 : case entry_t::type_t::ENTRY_TYPE_BESHORT:
2115 : case entry_t::type_t::ENTRY_TYPE_USHORT:
2116 : case entry_t::type_t::ENTRY_TYPE_ULESHORT:
2117 : case entry_t::type_t::ENTRY_TYPE_UBESHORT:
2118 : case entry_t::type_t::ENTRY_TYPE_LONG:
2119 : case entry_t::type_t::ENTRY_TYPE_LELONG:
2120 : case entry_t::type_t::ENTRY_TYPE_BELONG:
2121 : case entry_t::type_t::ENTRY_TYPE_MELONG:
2122 : case entry_t::type_t::ENTRY_TYPE_ULONG:
2123 : case entry_t::type_t::ENTRY_TYPE_ULELONG:
2124 : case entry_t::type_t::ENTRY_TYPE_UBELONG:
2125 : case entry_t::type_t::ENTRY_TYPE_UMELONG:
2126 : case entry_t::type_t::ENTRY_TYPE_BEID3:
2127 : case entry_t::type_t::ENTRY_TYPE_LEID3:
2128 : case entry_t::type_t::ENTRY_TYPE_UBEID3:
2129 : case entry_t::type_t::ENTRY_TYPE_ULEID3:
2130 : case entry_t::type_t::ENTRY_TYPE_QUAD:
2131 : case entry_t::type_t::ENTRY_TYPE_BEQUAD:
2132 : case entry_t::type_t::ENTRY_TYPE_LEQUAD:
2133 : case entry_t::type_t::ENTRY_TYPE_UQUAD:
2134 : case entry_t::type_t::ENTRY_TYPE_UBEQUAD:
2135 : case entry_t::type_t::ENTRY_TYPE_ULEQUAD:
2136 : case entry_t::type_t::ENTRY_TYPE_DATE:
2137 : case entry_t::type_t::ENTRY_TYPE_QDATE:
2138 : case entry_t::type_t::ENTRY_TYPE_LDATE:
2139 : case entry_t::type_t::ENTRY_TYPE_QLDATE:
2140 : case entry_t::type_t::ENTRY_TYPE_BEDATE:
2141 : case entry_t::type_t::ENTRY_TYPE_BEQDATE:
2142 : case entry_t::type_t::ENTRY_TYPE_BELDATE:
2143 : case entry_t::type_t::ENTRY_TYPE_BEQLDATE:
2144 : case entry_t::type_t::ENTRY_TYPE_LEDATE:
2145 : case entry_t::type_t::ENTRY_TYPE_LEQDATE:
2146 : case entry_t::type_t::ENTRY_TYPE_LELDATE:
2147 : case entry_t::type_t::ENTRY_TYPE_LEQLDATE:
2148 : case entry_t::type_t::ENTRY_TYPE_MEDATE:
2149 : case entry_t::type_t::ENTRY_TYPE_MELDATE:
2150 : // integers expect a number of flags so we manage these here
2151 14 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2152 : // first check whether we have a '!' (must be the very first)
2153 28 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
2154 14 : && token.get_character() == '!')
2155 : {
2156 6 : e->set_flags(entry_t::ENTRY_FLAG_NOT);
2157 6 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2158 : }
2159 : {
2160 14 : bool has_operator(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER);
2161 14 : if(has_operator
2162 14 : && token.get_character() != '-')
2163 : {
2164 : // verify that it is legal with a floating point value if such
2165 0 : if(is_float)
2166 : {
2167 0 : switch(token.get_character())
2168 : {
2169 0 : case '&':
2170 : case '^':
2171 : case '~':
2172 0 : std::cerr << "error:" << f_lexer->current_filename()
2173 0 : << ":" << f_lexer->current_line()
2174 0 : << ": " << static_cast<char>(token.get_character())
2175 0 : << " used with a floating point number.\n";
2176 0 : exit(1);
2177 : snap::NOTREACHED();
2178 :
2179 : }
2180 : }
2181 0 : switch(token.get_character())
2182 : {
2183 0 : case '=':
2184 0 : e->set_flags(entry_t::ENTRY_FLAG_EQUAL);
2185 0 : break;
2186 :
2187 0 : case '<':
2188 0 : e->set_flags(entry_t::ENTRY_FLAG_LESS);
2189 0 : break;
2190 :
2191 0 : case '>':
2192 0 : e->set_flags(entry_t::ENTRY_FLAG_GREATER);
2193 0 : break;
2194 :
2195 0 : case '&':
2196 0 : e->set_flags(entry_t::ENTRY_FLAG_ARE_SET);
2197 0 : break;
2198 :
2199 0 : case '^':
2200 0 : e->set_flags(entry_t::ENTRY_FLAG_ARE_CLEAR);
2201 0 : break;
2202 :
2203 0 : case '~':
2204 0 : e->set_flags(entry_t::ENTRY_FLAG_NEGATE);
2205 0 : break;
2206 :
2207 0 : default:
2208 : std::cerr << "error:"
2209 0 : << f_lexer->current_filename() << ":"
2210 : << f_lexer->current_line() << ": unknown comparison operator "
2211 0 : << token.get_character() << ".\n";
2212 0 : exit(1);
2213 : snap::NOTREACHED();
2214 :
2215 : }
2216 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2217 :
2218 : // we allow spaces after an operator
2219 0 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
2220 0 : && token.get_character() == ' ')
2221 : {
2222 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2223 : }
2224 : }
2225 : // one special case here: "x"
2226 28 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_STRING
2227 18 : && token.get_string() == "x"
2228 32 : && !has_operator)
2229 : {
2230 4 : e->set_flags(entry_t::ENTRY_FLAG_TRUE);
2231 : }
2232 : else
2233 : {
2234 10 : int sign(1);
2235 20 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
2236 10 : && token.get_character() == '-')
2237 : {
2238 0 : sign = -1;
2239 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2240 : }
2241 10 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_FLOAT)
2242 : {
2243 0 : if(!is_float)
2244 : {
2245 0 : std::cerr << "error:" << f_lexer->current_filename()
2246 0 : << ":" << f_lexer->current_line()
2247 0 : << ": an integer was expected for an entry specifying a number type.\n";
2248 0 : exit(1);
2249 : }
2250 :
2251 0 : e->set_float(token.get_float() * static_cast<double>(sign));
2252 : }
2253 10 : else if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_INTEGER)
2254 : {
2255 10 : if(is_float)
2256 : {
2257 0 : std::cerr << "error:" << f_lexer->current_filename()
2258 0 : << ":" << f_lexer->current_line()
2259 0 : << ": a floating point number was expected for an entry specifying a floating point type, got an integer.\n";
2260 0 : exit(1);
2261 : }
2262 :
2263 10 : e->set_integer(token.get_integer() * sign);
2264 : }
2265 : else
2266 : {
2267 0 : std::cerr << "error:" << f_lexer->current_filename()
2268 0 : << ":" << f_lexer->current_line()
2269 0 : << ": an \"x\", an integer, or a floating point number were expected (instead token type is: "
2270 0 : << static_cast<int>(token.get_type())
2271 0 : << ").\n";
2272 0 : exit(1);
2273 : }
2274 : }
2275 : }
2276 14 : break;
2277 :
2278 2 : case entry_t::type_t::ENTRY_TYPE_STRING:
2279 : case entry_t::type_t::ENTRY_TYPE_PSTRING:
2280 : case entry_t::type_t::ENTRY_TYPE_BESTRING16:
2281 : case entry_t::type_t::ENTRY_TYPE_LESTRING16:
2282 : case entry_t::type_t::ENTRY_TYPE_SEARCH:
2283 : // strings can start with !, !=, !<, !>, =, <, >
2284 : // however, we better read the string as a whole
2285 : {
2286 2 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_REGEX);
2287 4 : std::string str(token.get_string());
2288 2 : if(str[0] == '!')
2289 : {
2290 0 : str.erase(str.begin());
2291 0 : e->set_flags(entry_t::ENTRY_FLAG_NOT);
2292 : }
2293 2 : switch(str[0])
2294 : {
2295 0 : case '=':
2296 0 : str.erase(str.begin());
2297 0 : e->set_flags(entry_t::ENTRY_FLAG_EQUAL);
2298 0 : break;
2299 :
2300 0 : case '<':
2301 0 : str.erase(str.begin());
2302 0 : e->set_flags(entry_t::ENTRY_FLAG_LESS);
2303 0 : break;
2304 :
2305 0 : case '>':
2306 0 : str.erase(str.begin());
2307 0 : e->set_flags(entry_t::ENTRY_FLAG_GREATER);
2308 0 : break;
2309 :
2310 : }
2311 4 : e->set_string(str);
2312 : }
2313 2 : break;
2314 :
2315 0 : case entry_t::type_t::ENTRY_TYPE_REGEX:
2316 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_REGEX);
2317 0 : e->set_string(token.get_string());
2318 0 : break;
2319 :
2320 0 : case entry_t::type_t::ENTRY_TYPE_NAME: // this creates a macro
2321 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2322 0 : e->set_string(token.get_string());
2323 0 : break;
2324 :
2325 0 : case entry_t::type_t::ENTRY_TYPE_USE: // this calls a macro
2326 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2327 0 : e->set_string(token.get_string());
2328 0 : break;
2329 :
2330 0 : case entry_t::type_t::ENTRY_TYPE_INDIRECT:
2331 : // the indirect may or may not be followed by the 'x' before
2332 : // the message... since we ignore the message we can also
2333 : // ignore the x here
2334 0 : break;
2335 :
2336 0 : case entry_t::type_t::ENTRY_TYPE_DEFAULT:
2337 0 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2338 0 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_STRING
2339 0 : || token.get_string() != "x")
2340 : {
2341 0 : std::cerr << "error: default must always be used with \"x\".\n";
2342 0 : exit(1);
2343 : }
2344 0 : e->set_flags(entry_t::ENTRY_FLAG_TRUE);
2345 0 : break;
2346 :
2347 0 : case entry_t::type_t::ENTRY_TYPE_UNKNOWN:
2348 0 : std::cerr << "error: entry type still unknown when defining its value.\n";
2349 0 : exit(1);
2350 : snap::NOTREACHED();
2351 :
2352 : }
2353 16 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_MESSAGE);
2354 16 : if(token.get_type() == lexer::token_t::type_t::TOKEN_TYPE_STRING)
2355 : {
2356 : // We don't do anything with the message, but just in case I
2357 : // show here that we'd have to skip the spaces before saving it
2358 : //std::string msg(token.get_string());
2359 : //while(msg[0] == ' ' || msg[0] == '\t')
2360 : //{
2361 : // msg.erase(msg.begin());
2362 : //}
2363 : //e->set_message(msg);
2364 :
2365 : // we can switch back to normal to read the \n
2366 16 : token = f_lexer->get_token(lexer::mode_t::LEXER_MODE_NORMAL);
2367 : }
2368 32 : if(token.get_type() != lexer::token_t::type_t::TOKEN_TYPE_CHARACTER
2369 16 : || token.get_character() != '\n')
2370 : {
2371 0 : std::cerr << "error: expected an optional message and a new line at the end of the line.\n";
2372 0 : exit(1);
2373 : }
2374 :
2375 16 : f_entries.push_back(e);
2376 20 : }
2377 : }
2378 :
2379 :
2380 1 : void parser::output()
2381 : {
2382 : // the output is sent to stdout so that way we can save the data to
2383 : // any file using a redirection or see it on the screen
2384 1 : size_t const max_entries(f_entries.size());
2385 1 : if(max_entries == 0)
2386 : {
2387 0 : std::cerr << "error: read some magic files, but did not get an valid entries...\n";
2388 0 : exit(1);
2389 : }
2390 :
2391 1 : if(f_entries[0]->get_level() != 0)
2392 : {
2393 0 : std::cerr << "error: the very first entry must always be a level zero entry.\n";
2394 0 : exit(1);
2395 : }
2396 :
2397 1 : output_header();
2398 :
2399 1 : bool has_mime(false);
2400 2 : std::string name;
2401 1 : size_t start(0);
2402 17 : for(size_t i(0); i < max_entries; ++i)
2403 : {
2404 16 : if(f_entries[i]->get_level() == 0)
2405 : {
2406 : // if we get an entry with a mime type, then send it out
2407 4 : if(has_mime)
2408 : {
2409 3 : output_entry(start, i, true);
2410 3 : has_mime = false;
2411 : }
2412 1 : else if(!name.empty())
2413 : {
2414 0 : std::cout << "__macro_" << name << " = function(offset) {" << std::endl;
2415 0 : output_entry(start, i, false);
2416 0 : std::cout << "return false;};" << std::endl;
2417 0 : name.clear();
2418 : }
2419 4 : start = i;
2420 : }
2421 16 : if(!f_entries[i]->get_mimetype().empty())
2422 : {
2423 : // this means it is worth encoding
2424 4 : has_mime = true;
2425 : }
2426 16 : if(f_entries[i]->get_type() == entry_t::type_t::ENTRY_TYPE_NAME)
2427 : {
2428 : // found a macro
2429 0 : name = f_entries[i]->get_string();
2430 : }
2431 : }
2432 1 : if(has_mime)
2433 : {
2434 1 : output_entry(start, max_entries, true);
2435 : }
2436 :
2437 1 : output_footer();
2438 1 : }
2439 :
2440 :
2441 4 : void parser::output_entry(size_t start, size_t end, bool has_mime)
2442 : {
2443 4 : struct recursive_output
2444 : {
2445 4 : recursive_output(bool has_mime)
2446 4 : : f_has_mime(has_mime)
2447 : {
2448 4 : }
2449 :
2450 12 : size_t output(size_t pos)
2451 : {
2452 12 : output_if(pos);
2453 12 : size_t next_pos(pos + 1);
2454 24 : if(next_pos < f_entries.size()
2455 12 : && f_entries[pos]->get_level() <= f_entries[next_pos]->get_level())
2456 : {
2457 : // returns our new next_pos
2458 8 : next_pos = output(next_pos); // recursive call
2459 : }
2460 4 : else if(!f_has_mime)
2461 : {
2462 0 : std::cout << "return true;" << std::endl;
2463 : }
2464 12 : output_mimetype(pos);
2465 12 : output_endif(pos);
2466 :
2467 12 : return next_pos;
2468 : }
2469 :
2470 12 : void output_if(size_t pos)
2471 : {
2472 : typedef void (recursive_output::*output_func_t)(size_t pos);
2473 : #pragma GCC diagnostic push
2474 : #pragma GCC diagnostic ignored "-Wpedantic"
2475 : static output_func_t const output_by_type[] =
2476 : {
2477 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UNKNOWN)] = &recursive_output::output_unknown,
2478 :
2479 : // int -- 1 byte
2480 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BYTE)] = &recursive_output::output_byte,
2481 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UBYTE)] = &recursive_output::output_ubyte,
2482 : // int -- 2 bytes
2483 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_SHORT)] = &recursive_output::output_short,
2484 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LESHORT)] = &recursive_output::output_leshort,
2485 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BESHORT)] = &recursive_output::output_beshort,
2486 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_USHORT)] = &recursive_output::output_ushort,
2487 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_ULESHORT)] = &recursive_output::output_uleshort,
2488 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UBESHORT)] = &recursive_output::output_ubeshort,
2489 : // int -- 4 bytes
2490 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LONG)] = &recursive_output::output_long,
2491 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LELONG)] = &recursive_output::output_lelong,
2492 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BELONG)] = &recursive_output::output_belong,
2493 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_MELONG)] = &recursive_output::output_melong,
2494 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_ULONG)] = &recursive_output::output_ulong,
2495 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_ULELONG)] = &recursive_output::output_ulelong,
2496 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UBELONG)] = &recursive_output::output_ubelong,
2497 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UMELONG)] = &recursive_output::output_umelong,
2498 : // int -- 4 bytes -- an ID3 size is 32 bits defined as: ((size & 0x0FFFFFFF) * 4)
2499 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEID3)] = &recursive_output::output_beid3,
2500 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEID3)] = &recursive_output::output_leid3,
2501 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UBEID3)] = &recursive_output::output_ubeid3,
2502 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_ULEID3)] = &recursive_output::output_uleid3,
2503 : // int -- 8 bytes
2504 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_QUAD)] = &recursive_output::output_quad,
2505 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEQUAD)] = &recursive_output::output_bequad,
2506 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEQUAD)] = &recursive_output::output_lequad,
2507 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UQUAD)] = &recursive_output::output_uquad,
2508 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_UBEQUAD)] = &recursive_output::output_ubequad,
2509 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_ULEQUAD)] = &recursive_output::output_ulequad,
2510 : // float -- 4 bytes
2511 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_FLOAT)] = &recursive_output::output_float,
2512 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEFLOAT)] = &recursive_output::output_befloat,
2513 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEFLOAT)] = &recursive_output::output_lefloat,
2514 : // float -- 8 bytes
2515 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_DOUBLE)] = &recursive_output::output_double,
2516 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEDOUBLE)] = &recursive_output::output_bedouble,
2517 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEDOUBLE)] = &recursive_output::output_ledouble,
2518 : // "text" (if value includes characters considered binary bytes then it is considered binary too)
2519 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_STRING)] = &recursive_output::output_string,
2520 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_PSTRING)] = &recursive_output::output_pstring,
2521 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BESTRING16)] = &recursive_output::output_besearch16,
2522 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LESTRING16)] = &recursive_output::output_lesearch16,
2523 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_SEARCH)] = &recursive_output::output_search,
2524 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_REGEX)] = &recursive_output::output_regex,
2525 : // date
2526 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_DATE)] = &recursive_output::output_date,
2527 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_QDATE)] = &recursive_output::output_qdate,
2528 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LDATE)] = &recursive_output::output_ldate,
2529 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_QLDATE)] = &recursive_output::output_qldate,
2530 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEDATE)] = &recursive_output::output_bedate,
2531 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEQDATE)] = &recursive_output::output_beqdate,
2532 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BELDATE)] = &recursive_output::output_beldate,
2533 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_BEQLDATE)] = &recursive_output::output_beqldate,
2534 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEDATE)] = &recursive_output::output_ledate,
2535 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEQDATE)] = &recursive_output::output_leqdate,
2536 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LELDATE)] = &recursive_output::output_leldate,
2537 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_LEQLDATE)] = &recursive_output::output_leqldate,
2538 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_MEDATE)] = &recursive_output::output_medate,
2539 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_MELDATE)] = &recursive_output::output_meldate,
2540 : // special
2541 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_INDIRECT)] = &recursive_output::output_indirect,
2542 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_DEFAULT)] = &recursive_output::output_default,
2543 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_NAME)] = &recursive_output::output_name,
2544 : [static_cast<int>(entry_t::type_t::ENTRY_TYPE_USE)] = &recursive_output::output_use
2545 : };
2546 : #pragma GCC diagnostic pop
2547 :
2548 12 : std::cout << "if(";
2549 12 : (this->*output_by_type[static_cast<int>(f_entries[pos]->get_type())])(pos);
2550 12 : std::cout << ")\n{\n";
2551 12 : }
2552 :
2553 0 : void output_unknown(size_t pos)
2554 : {
2555 0 : snap::NOTUSED(pos);
2556 0 : std::cerr << "error: found an unknown entry while outputing data.\n";
2557 0 : exit(1);
2558 : }
2559 :
2560 2 : void output_byte(size_t pos)
2561 : {
2562 2 : int64_t const be(f_entries[pos]->get_integer());
2563 2 : std::cout << "buf[" << (f_has_mime ? "" : "offset+") << f_entries[pos]->get_offset() << "]"
2564 : << " "
2565 4 : << f_entries[pos]->flags_to_js_operator()
2566 4 : << " 0x"
2567 2 : << std::hex << std::uppercase
2568 2 : << (be & 0xff)
2569 2 : << std::dec << std::nouppercase;
2570 2 : }
2571 :
2572 0 : void output_ubyte(size_t pos)
2573 : {
2574 0 : int64_t const be(f_entries[pos]->get_integer());
2575 0 : std::cout << "buf[" << (f_has_mime ? "" : "offset+") << f_entries[pos]->get_offset() << "]"
2576 : << " "
2577 0 : << f_entries[pos]->flags_to_js_operator()
2578 0 : << " 0x"
2579 0 : << std::hex << std::uppercase
2580 0 : << (be & 0xff)
2581 0 : << std::dec << std::nouppercase;
2582 0 : }
2583 :
2584 0 : void output_short(size_t pos)
2585 : {
2586 0 : snap::NOTUSED(pos);
2587 0 : std::cerr << "error: type not implemented yet (short).\n";
2588 0 : exit(1);
2589 : }
2590 :
2591 2 : void output_leshort(size_t pos)
2592 : {
2593 2 : int64_t const le(f_entries[pos]->get_integer());
2594 2 : int64_t const offset(f_entries[pos]->get_offset());
2595 2 : std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
2596 2 : << "] + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
2597 : << "] * 256 "
2598 4 : << f_entries[pos]->flags_to_js_operator()
2599 4 : << " 0x"
2600 2 : << std::hex << std::uppercase
2601 2 : << (le & 0xffff)
2602 2 : << std::dec << std::nouppercase;
2603 2 : }
2604 :
2605 0 : void output_beshort(size_t pos)
2606 : {
2607 0 : int64_t const be(f_entries[pos]->get_integer());
2608 0 : int64_t const offset(f_entries[pos]->get_offset());
2609 0 : std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
2610 0 : << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
2611 : << " "
2612 0 : << f_entries[pos]->flags_to_js_operator()
2613 0 : << " 0x"
2614 0 : << std::hex << std::uppercase
2615 0 : << (be & 0xffff)
2616 0 : << std::dec << std::nouppercase;
2617 0 : }
2618 :
2619 0 : void output_ushort(size_t pos)
2620 : {
2621 0 : snap::NOTUSED(pos);
2622 0 : std::cerr << "error: type not implemented yet (ushort).\n";
2623 0 : exit(1);
2624 : }
2625 :
2626 0 : void output_uleshort(size_t pos)
2627 : {
2628 0 : int64_t const ule(f_entries[pos]->get_integer());
2629 0 : int64_t const offset(f_entries[pos]->get_offset());
2630 0 : std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
2631 0 : << "] + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
2632 : << " * 256 "
2633 0 : << f_entries[pos]->flags_to_js_operator()
2634 0 : << " 0x"
2635 0 : << std::hex << std::uppercase
2636 0 : << (ule & 0xffff)
2637 0 : << std::dec << std::nouppercase;
2638 0 : }
2639 :
2640 0 : void output_ubeshort(size_t pos)
2641 : {
2642 0 : int64_t const ube(f_entries[pos]->get_integer());
2643 0 : int64_t const offset(f_entries[pos]->get_offset());
2644 0 : std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
2645 0 : << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
2646 : << " "
2647 0 : << f_entries[pos]->flags_to_js_operator()
2648 0 : << " 0x"
2649 0 : << std::hex << std::uppercase
2650 0 : << (ube & 0xffff)
2651 0 : << std::dec << std::nouppercase;
2652 0 : }
2653 :
2654 0 : void output_long(size_t pos)
2655 : {
2656 : // this is a machine byte order, I am not currently sure
2657 : // on how we could really get that in JavaScript; for
2658 : // now do a little endian since most users have x86 based
2659 : // processors which are in little endian
2660 : //
2661 0 : int64_t const le(f_entries[pos]->get_integer());
2662 0 : int64_t const offset(f_entries[pos]->get_offset());
2663 0 : std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
2664 0 : << "] + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
2665 0 : << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 2)
2666 0 : << "] * 65536 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 3)
2667 : << "] * 16777216 "
2668 0 : << f_entries[pos]->flags_to_js_operator()
2669 0 : << " 0x"
2670 0 : << std::hex << std::uppercase
2671 0 : << (le & 0xffffffffLL)
2672 0 : << std::dec << std::nouppercase;
2673 0 : }
2674 :
2675 4 : void output_lelong(size_t pos)
2676 : {
2677 4 : int64_t const le(f_entries[pos]->get_integer());
2678 4 : int64_t const offset(f_entries[pos]->get_offset());
2679 4 : std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
2680 4 : << "] + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
2681 4 : << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 2)
2682 4 : << "] * 65536 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 3)
2683 : << "] * 16777216 "
2684 8 : << f_entries[pos]->flags_to_js_operator()
2685 8 : << " 0x"
2686 4 : << std::hex << std::uppercase
2687 4 : << (le & 0xffffffffLL)
2688 4 : << std::dec << std::nouppercase;
2689 4 : }
2690 :
2691 2 : void output_belong(size_t pos)
2692 : {
2693 2 : int64_t const be(f_entries[pos]->get_integer());
2694 2 : int64_t const offset(f_entries[pos]->get_offset());
2695 2 : std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
2696 2 : << "] * 16777216 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
2697 2 : << "] * 65536 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 2)
2698 2 : << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 3)
2699 : << "] "
2700 4 : << f_entries[pos]->flags_to_js_operator()
2701 4 : << " 0x"
2702 2 : << std::hex << std::uppercase
2703 2 : << (be & 0xffffffffLL)
2704 2 : << std::dec << std::nouppercase;
2705 2 : }
2706 :
2707 0 : void output_melong(size_t pos)
2708 : {
2709 0 : snap::NOTUSED(pos);
2710 0 : std::cerr << "error: type not implemented yet (melong).\n";
2711 0 : exit(1);
2712 : }
2713 :
2714 0 : void output_ulong(size_t pos)
2715 : {
2716 0 : snap::NOTUSED(pos);
2717 0 : std::cerr << "error: type not implemented yet (ulong).\n";
2718 0 : exit(1);
2719 : }
2720 :
2721 0 : void output_ulelong(size_t pos)
2722 : {
2723 0 : snap::NOTUSED(pos);
2724 0 : std::cerr << "error: type not implemented yet (ulelong).\n";
2725 0 : exit(1);
2726 : }
2727 :
2728 0 : void output_ubelong(size_t pos)
2729 : {
2730 0 : int64_t const ube(f_entries[pos]->get_integer());
2731 0 : int64_t const offset(f_entries[pos]->get_offset());
2732 0 : std::cout << "buf[" << (f_has_mime ? "" : "offset+") << offset
2733 0 : << "] * 16777216 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 1)
2734 0 : << "] * 65536 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 2)
2735 0 : << "] * 256 + buf[" << (f_has_mime ? "" : "offset+") << (offset + 3)
2736 : << "] "
2737 0 : << f_entries[pos]->flags_to_js_operator()
2738 0 : << " 0x"
2739 0 : << std::hex << std::uppercase
2740 0 : << (ube & 0xffffffffLL)
2741 0 : << std::dec << std::nouppercase;
2742 0 : }
2743 :
2744 0 : void output_umelong(size_t pos)
2745 : {
2746 0 : snap::NOTUSED(pos);
2747 0 : std::cerr << "error: type not implemented yet (umelong).\n";
2748 0 : exit(1);
2749 : }
2750 :
2751 0 : void output_beid3(size_t pos)
2752 : {
2753 0 : snap::NOTUSED(pos);
2754 0 : std::cerr << "error: type not implemented yet (beid3).\n";
2755 0 : exit(1);
2756 : }
2757 :
2758 0 : void output_leid3(size_t pos)
2759 : {
2760 0 : snap::NOTUSED(pos);
2761 0 : std::cerr << "error: type not implemented yet (leid3).\n";
2762 0 : exit(1);
2763 : }
2764 :
2765 0 : void output_ubeid3(size_t pos)
2766 : {
2767 0 : snap::NOTUSED(pos);
2768 0 : std::cerr << "error: type not implemented yet (ubeid3).\n";
2769 0 : exit(1);
2770 : }
2771 :
2772 0 : void output_uleid3(size_t pos)
2773 : {
2774 0 : snap::NOTUSED(pos);
2775 0 : std::cerr << "error: type not implemented yet (uleid3).\n";
2776 0 : exit(1);
2777 : }
2778 :
2779 0 : void output_quad(size_t pos)
2780 : {
2781 0 : snap::NOTUSED(pos);
2782 0 : std::cerr << "error: type not implemented yet (quad).\n";
2783 0 : exit(1);
2784 : }
2785 :
2786 0 : void output_bequad(size_t pos)
2787 : {
2788 0 : snap::NOTUSED(pos);
2789 0 : std::cerr << "error: type not implemented yet (bequad).\n";
2790 0 : exit(1);
2791 : }
2792 :
2793 0 : void output_lequad(size_t pos)
2794 : {
2795 0 : snap::NOTUSED(pos);
2796 0 : std::cerr << "error: type not implemented yet (lequad).\n";
2797 0 : exit(1);
2798 : }
2799 :
2800 0 : void output_uquad(size_t pos)
2801 : {
2802 0 : snap::NOTUSED(pos);
2803 0 : std::cerr << "error: type not implemented yet (uquad).\n";
2804 0 : exit(1);
2805 : }
2806 :
2807 0 : void output_ubequad(size_t pos)
2808 : {
2809 0 : snap::NOTUSED(pos);
2810 0 : std::cerr << "error: type not implemented yet (ubequad).\n";
2811 0 : exit(1);
2812 : }
2813 :
2814 0 : void output_ulequad(size_t pos)
2815 : {
2816 0 : snap::NOTUSED(pos);
2817 0 : std::cerr << "error: type not implemented yet (ulequad).\n";
2818 0 : exit(1);
2819 : }
2820 :
2821 0 : void output_float(size_t pos)
2822 : {
2823 0 : snap::NOTUSED(pos);
2824 0 : std::cerr << "error: type not implemented yet (float).\n";
2825 0 : exit(1);
2826 : }
2827 :
2828 0 : void output_befloat(size_t pos)
2829 : {
2830 0 : snap::NOTUSED(pos);
2831 0 : std::cerr << "error: type not implemented yet (befloat).\n";
2832 0 : exit(1);
2833 : }
2834 :
2835 0 : void output_lefloat(size_t pos)
2836 : {
2837 0 : snap::NOTUSED(pos);
2838 0 : std::cerr << "error: type not implemented yet (lefloat).\n";
2839 0 : exit(1);
2840 : }
2841 :
2842 0 : void output_double(size_t pos)
2843 : {
2844 0 : snap::NOTUSED(pos);
2845 0 : std::cerr << "error: type not implemented yet (double).\n";
2846 0 : exit(1);
2847 : }
2848 :
2849 0 : void output_bedouble(size_t pos)
2850 : {
2851 0 : snap::NOTUSED(pos);
2852 0 : std::cerr << "error: type not implemented yet (bedouble).\n";
2853 0 : exit(1);
2854 : }
2855 :
2856 0 : void output_ledouble(size_t pos)
2857 : {
2858 0 : snap::NOTUSED(pos);
2859 0 : std::cerr << "error: type not implemented yet (ledouble).\n";
2860 0 : exit(1);
2861 : }
2862 :
2863 2 : void output_string(size_t pos)
2864 : {
2865 2 : parser::entry_t::integer_t offset(f_entries[pos]->get_offset());
2866 4 : std::string const str(f_entries[pos]->get_string());
2867 12 : for(size_t i(0); i < str.length(); ++i, ++offset)
2868 : {
2869 : std::cout << (i > 0 ? "\n&& " : "")
2870 : << "buf["
2871 10 : << (f_has_mime ? "" : "offset+")
2872 10 : << offset
2873 : << "] "
2874 20 : << f_entries[pos]->flags_to_js_operator()
2875 20 : << " 0x"
2876 10 : << std::hex << std::uppercase
2877 10 : << (static_cast<int>(str[i]) & 0xff)
2878 10 : << std::dec << std::nouppercase;
2879 : }
2880 2 : }
2881 :
2882 0 : void output_pstring(size_t pos)
2883 : {
2884 0 : snap::NOTUSED(pos);
2885 0 : std::cerr << "error: type not implemented yet (pstring).\n";
2886 0 : exit(1);
2887 : }
2888 :
2889 0 : void output_besearch16(size_t pos)
2890 : {
2891 0 : snap::NOTUSED(pos);
2892 0 : std::cerr << "error: type not implemented yet (besearch16).\n";
2893 0 : exit(1);
2894 : }
2895 :
2896 0 : void output_lesearch16(size_t pos)
2897 : {
2898 0 : snap::NOTUSED(pos);
2899 0 : std::cerr << "error: type not implemented yet (lesearch16).\n";
2900 0 : exit(1);
2901 : }
2902 :
2903 0 : void output_search(size_t pos)
2904 : {
2905 0 : parser::entry_t::integer_t const offset(f_entries[pos]->get_offset());
2906 0 : std::cout << "snapwebsites.BufferToMIMESystemImages.scan(buf,"
2907 0 : << offset << "," << f_entries[pos]->get_maxlength()
2908 0 : << ",{";
2909 0 : std::string const str(f_entries[pos]->get_string());
2910 0 : for(size_t i(0); i < str.length(); ++i)
2911 : {
2912 0 : std::cout << (i == 0 ? "" : ",")
2913 0 : << std::hex << std::uppercase
2914 0 : << "0x" << static_cast<int>(str[i])
2915 0 : << std::dec << std::nouppercase;
2916 : }
2917 0 : std::cout << "});";
2918 0 : }
2919 :
2920 0 : void output_regex(size_t pos)
2921 : {
2922 0 : parser::entry_t::integer_t const offset(f_entries[pos]->get_offset());
2923 0 : std::cout << "snapwebsites.BufferToMIMESystemImages.regex(buf,"
2924 0 : << offset << "," << f_entries[pos]->get_maxlength()
2925 0 : << ",{";
2926 0 : std::string const str(f_entries[pos]->get_string());
2927 0 : for(size_t i(0); i < str.length(); ++i)
2928 : {
2929 0 : std::cout << (i == 0 ? "" : ",")
2930 0 : << std::hex << std::uppercase
2931 0 : << "0x" << static_cast<int>(str[i])
2932 0 : << std::dec << std::nouppercase;
2933 : }
2934 0 : std::cout << "},"
2935 : << (
2936 0 : ((f_entries[pos]->get_flags() & entry_t::ENTRY_FLAG_LINES ) != 0 ? 1 : 0)
2937 0 : | ((f_entries[pos]->get_flags() & entry_t::ENTRY_FLAG_CASE_INSENSITIVE) != 0 ? 2 : 0)
2938 0 : | ((f_entries[pos]->get_flags() & entry_t::ENTRY_FLAG_START_OFFSET ) != 0 ? 4 : 0)
2939 : )
2940 0 : << ");";
2941 0 : }
2942 :
2943 0 : void output_date(size_t pos)
2944 : {
2945 0 : snap::NOTUSED(pos);
2946 0 : std::cerr << "error: type not implemented yet (date).\n";
2947 0 : exit(1);
2948 : }
2949 :
2950 0 : void output_qdate(size_t pos)
2951 : {
2952 0 : snap::NOTUSED(pos);
2953 0 : std::cerr << "error: type not implemented yet (qdate).\n";
2954 0 : exit(1);
2955 : }
2956 :
2957 0 : void output_ldate(size_t pos)
2958 : {
2959 0 : snap::NOTUSED(pos);
2960 0 : std::cerr << "error: type not implemented yet (ldate).\n";
2961 0 : exit(1);
2962 : }
2963 :
2964 0 : void output_qldate(size_t pos)
2965 : {
2966 0 : snap::NOTUSED(pos);
2967 0 : std::cerr << "error: type not implemented yet (qldate).\n";
2968 0 : exit(1);
2969 : }
2970 :
2971 0 : void output_bedate(size_t pos)
2972 : {
2973 0 : snap::NOTUSED(pos);
2974 0 : std::cerr << "error: type not implemented yet (bedate).\n";
2975 0 : exit(1);
2976 : }
2977 :
2978 0 : void output_beqdate(size_t pos)
2979 : {
2980 0 : snap::NOTUSED(pos);
2981 0 : std::cerr << "error: type not implemented yet (beqdate).\n";
2982 0 : exit(1);
2983 : }
2984 :
2985 0 : void output_beldate(size_t pos)
2986 : {
2987 0 : snap::NOTUSED(pos);
2988 0 : std::cerr << "error: type not implemented yet (beldate).\n";
2989 0 : exit(1);
2990 : }
2991 :
2992 0 : void output_beqldate(size_t pos)
2993 : {
2994 0 : snap::NOTUSED(pos);
2995 0 : std::cerr << "error: type not implemented yet (beqldate).\n";
2996 0 : exit(1);
2997 : }
2998 :
2999 0 : void output_ledate(size_t pos)
3000 : {
3001 0 : snap::NOTUSED(pos);
3002 0 : std::cerr << "error: type not implemented yet (ledate).\n";
3003 0 : exit(1);
3004 : }
3005 :
3006 0 : void output_leqdate(size_t pos)
3007 : {
3008 0 : snap::NOTUSED(pos);
3009 0 : std::cerr << "error: type not implemented yet (leqdate).\n";
3010 0 : exit(1);
3011 : }
3012 :
3013 0 : void output_leldate(size_t pos)
3014 : {
3015 0 : snap::NOTUSED(pos);
3016 0 : std::cerr << "error: type not implemented yet (leldate).\n";
3017 0 : exit(1);
3018 : }
3019 :
3020 0 : void output_leqldate(size_t pos)
3021 : {
3022 0 : snap::NOTUSED(pos);
3023 0 : std::cerr << "error: type not implemented yet (leqldate).\n";
3024 0 : exit(1);
3025 : }
3026 :
3027 0 : void output_medate(size_t pos)
3028 : {
3029 0 : snap::NOTUSED(pos);
3030 0 : std::cerr << "error: type not implemented yet (medate).\n";
3031 0 : exit(1);
3032 : }
3033 :
3034 0 : void output_meldate(size_t pos)
3035 : {
3036 0 : snap::NOTUSED(pos);
3037 0 : std::cerr << "error: type not implemented yet (meldate).\n";
3038 0 : exit(1);
3039 : }
3040 :
3041 0 : void output_indirect(size_t pos)
3042 : {
3043 0 : snap::NOTUSED(pos);
3044 0 : std::cerr << "error: type not implemented yet (indirect).\n";
3045 0 : exit(1);
3046 : }
3047 :
3048 0 : void output_default(size_t pos)
3049 : {
3050 : // default is always true
3051 0 : snap::NOTUSED(pos);
3052 0 : std::cout << "true";
3053 0 : }
3054 :
3055 0 : void output_name(size_t pos)
3056 : {
3057 0 : snap::NOTUSED(pos);
3058 : // this is already done in the caller which generates the
3059 : // function declaration
3060 0 : }
3061 :
3062 0 : void output_use(size_t pos)
3063 : {
3064 0 : std::cout << "__macro_" << f_entries[pos]->get_string()
3065 0 : << "("
3066 0 : << f_entries[pos]->get_offset()
3067 0 : << ")";
3068 0 : }
3069 :
3070 12 : void output_mimetype(size_t pos)
3071 : {
3072 24 : std::string const mimetype(f_entries[pos]->get_mimetype());
3073 12 : if(!mimetype.empty())
3074 : {
3075 4 : std::cout << "return \"" << mimetype << "\";" << std::endl;
3076 : }
3077 12 : }
3078 :
3079 12 : void output_endif(size_t pos)
3080 : {
3081 12 : snap::NOTUSED(pos);
3082 12 : std::cout << "}" << std::endl;
3083 12 : }
3084 :
3085 : // variable members
3086 : entry_vector_t f_entries = entry_vector_t();
3087 : bool f_has_mime = false;
3088 : };
3089 8 : recursive_output out(has_mime);
3090 :
3091 : // first remove all entries that we are not going to use (i.e.
3092 : // anything at the end which does not include a MIME type)
3093 4 : entry_t::integer_t l(-1);
3094 4 : if(!has_mime)
3095 : {
3096 0 : l = f_entries[end - 1]->get_level();
3097 : }
3098 4 : size_t j(end);
3099 36 : while(j > start)
3100 : {
3101 16 : --j;
3102 :
3103 16 : if(f_entries[j]->get_type() != entry_t::type_t::ENTRY_TYPE_NAME)
3104 : {
3105 16 : if(f_entries[j]->get_mimetype().empty())
3106 : {
3107 12 : if(f_entries[j]->get_level() <= l)
3108 : {
3109 8 : out.f_entries.insert(out.f_entries.begin(), f_entries[j]);
3110 : }
3111 : }
3112 : else
3113 : {
3114 4 : l = f_entries[j]->get_level();
3115 4 : out.f_entries.insert(out.f_entries.begin(), f_entries[j]);
3116 : }
3117 : }
3118 : }
3119 :
3120 4 : out.output(0);
3121 4 : }
3122 :
3123 :
3124 1 : void parser::output_header()
3125 : {
3126 2 : std::string lower_magic_name(f_magic_name);
3127 1 : std::transform(lower_magic_name.begin(), lower_magic_name.end(), lower_magic_name.begin(), ::tolower);
3128 :
3129 : std::cout <<
3130 : "/** @preserve\n"
3131 : " * WARNING: AUTO-GENERATED FILE, DO NOT EDIT. See Source: magic-to-js.cpp\n"
3132 : " * Name: mimetype-" << lower_magic_name << "\n"
3133 : " * Version: " << MIMETYPE_VERSION_STRING << "\n"
3134 : " * Browsers: all\n"
3135 : " * Copyright: Copyright (c) 2014-2019 Made to Order Software Corp. All Rights Reserved.\n"
3136 : " * Depends: output (0.1.5.5)\n"
3137 : " * License: GPL 2.0\n"
3138 : " * Source: File generated by magic-to-js from magic library definition files.\n"
3139 : " */\n"
3140 : "\n"
3141 : "\n"
3142 : "//\n"
3143 : "// Inline \"command line\" parameters for the Google Closure Compiler\n"
3144 : "// See output of:\n"
3145 : "// java -jar .../google-js-compiler/compiler.jar --help\n"
3146 : "//\n"
3147 : "// ==ClosureCompiler==\n"
3148 : "// @compilation_level ADVANCED_OPTIMIZATIONS\n"
3149 : "// @externs $CLOSURE_COMPILER/contrib/externs/jquery-1.9.js\n"
3150 : "// @externs plugins/output/externs/jquery-extensions.js\n"
3151 : "// ==/ClosureCompiler==\n"
3152 : "//\n"
3153 : "\n"
3154 : "/*jslint nomen: true, todo: true, devel: true */\n"
3155 : "/*global snapwebsites: false, jQuery: false, Uint8Array: true */\n"
3156 : "\n"
3157 : "\n"
3158 : "\n"
3159 : "/** \\brief Check for \"system\" images.\n"
3160 : " *\n"
3161 : " * This function checks for well known images. The function is generally\n"
3162 : " * very fast because it checks only the few very well known image file\n"
3163 : " * formats.\n"
3164 : " *\n"
3165 : " * @return {!snapwebsites.BufferToMIMESystemImages} A reference to this new\n"
3166 : " * object.\n"
3167 : " *\n"
3168 : " * @extends {snapwebsites.BufferToMIMETemplate}\n"
3169 : " * @constructor\n"
3170 : " */\n"
3171 : "snapwebsites.BufferToMIME" << f_magic_name << " = function()\n"
3172 : "{\n"
3173 : " snapwebsites.BufferToMIME" << f_magic_name << ".superClass_.constructor.call(this);\n"
3174 : "\n"
3175 : " return this;\n"
3176 : "};\n"
3177 : "\n"
3178 : "\n"
3179 : "/** \\brief Chain up the extension.\n"
3180 : " *\n"
3181 : " * This is the chain between this class and it's super.\n"
3182 : " */\n"
3183 : "snapwebsites.inherits(snapwebsites.BufferToMIME" << f_magic_name << ", snapwebsites.BufferToMIMETemplate);\n"
3184 : "\n"
3185 : "\n"
3186 : "/** \\brief Check for the " << f_magic_name << " file formats.\n"
3187 : " *\n"
3188 : " * This function checks for file formats as defined in the magic library.\n"
3189 : " * This version includes the descriptions from the following files:\n"
3190 : " *\n"
3191 2 : << f_lexer->list_of_filenames() <<
3192 : " *\n"
3193 : " * @param {!Uint8Array} buf The array of data to check for a known magic.\n"
3194 : " *\n"
3195 : " * @return {!string} The MIME type or the empty string if not determined.\n"
3196 : " *\n"
3197 : " * @override\n"
3198 : " */\n"
3199 1 : "snapwebsites.BufferToMIME" << f_magic_name << ".prototype.bufferToMIME = function(buf)\n"
3200 2 : "{\n"
3201 : ;
3202 :
3203 1 : }
3204 :
3205 :
3206 1 : void parser::output_footer()
3207 : {
3208 : // close the function we opened in the header
3209 : std::cout <<
3210 : "return \"\";\n"
3211 : "};\n"
3212 : "\n"
3213 : "// auto-initialize\n"
3214 : "jQuery(document).ready(\n"
3215 : " function()\n"
3216 : " {\n"
3217 1 : " snapwebsites.OutputInstance.registerBufferToMIME(new snapwebsites.BufferToMIME" << f_magic_name << "());\n"
3218 : " }\n"
3219 1 : ");\n"
3220 : ;
3221 :
3222 1 : }
3223 :
3224 :
3225 :
3226 0 : int usage()
3227 : {
3228 0 : std::cout << "Usage: magic-to-js <input files> ..." << std::endl;
3229 0 : std::cout << "You may also want to redirect the output to a .js file" << std::endl;
3230 0 : std::cout << " --debug | -d print out debug information in stderr" << std::endl;
3231 0 : std::cout << " --help | -h print out this help screen" << std::endl;
3232 0 : std::cout << " --lib-version print out this tool's version" << std::endl;
3233 0 : std::cout << " --name | -n specify the name of the magic MIME to output" << std::endl;
3234 0 : std::cout << " --version print out this tool's version" << std::endl;
3235 0 : exit(1);
3236 : }
3237 :
3238 :
3239 1 : int main(int argc, char *argv[])
3240 : {
3241 : try
3242 : {
3243 2 : lexer::filenames_t fn;
3244 2 : std::string magic_name;
3245 :
3246 4 : for(int i(1); i < argc; ++i)
3247 : {
3248 3 : if(strcmp(argv[i], "-h") == 0
3249 3 : || strcmp(argv[i], "--help") == 0)
3250 : {
3251 0 : usage();
3252 0 : snap::NOTREACHED();
3253 : }
3254 3 : if(strcmp(argv[i], "--version") == 0)
3255 : {
3256 0 : std::cout << MIMETYPE_VERSION_STRING << std::endl;
3257 0 : exit(1);
3258 : snap::NOTREACHED();
3259 : }
3260 3 : if(strcmp(argv[i], "--lib-version") == 0)
3261 : {
3262 0 : std::cout << SNAPWEBSITES_VERSION_MAJOR << "." << SNAPWEBSITES_VERSION_MINOR << "." << SNAPWEBSITES_VERSION_PATCH << std::endl;
3263 0 : exit(1);
3264 : snap::NOTREACHED();
3265 : }
3266 3 : if(strcmp(argv[i], "-d") == 0
3267 3 : || strcmp(argv[i], "--debug") == 0)
3268 : {
3269 0 : std::cerr << "info: turning debug ON\n";
3270 0 : g_debug = true;
3271 : }
3272 3 : else if(strcmp(argv[i], "-n") == 0
3273 3 : || strcmp(argv[i], "--name") == 0)
3274 : {
3275 1 : ++i;
3276 1 : if(i >= argc)
3277 : {
3278 0 : std::cerr << "error: -n/--name expect to be followed by one argument, the magic name." << std::endl;
3279 0 : exit(1);
3280 : }
3281 1 : magic_name = argv[i];
3282 : }
3283 : else
3284 : {
3285 2 : fn.push_back(argv[i]);
3286 : }
3287 : }
3288 :
3289 1 : if(fn.empty())
3290 : {
3291 0 : std::cerr << "error: expected at least one filename on the command line. Try --help for more info." << std::endl;
3292 0 : exit(1);
3293 : }
3294 :
3295 1 : if(magic_name.empty())
3296 : {
3297 0 : std::cerr << "error: a magic name must be specified (--name option)" << std::endl;
3298 0 : exit(1);
3299 : }
3300 :
3301 2 : lexer::pointer_t l(new lexer(fn));
3302 2 : parser::pointer_t p(new parser(l, magic_name));
3303 1 : p->parse();
3304 :
3305 : // it worked, the parser has now a pile of parsed lines we can
3306 : // convert in JavaScript
3307 1 : p->output();
3308 :
3309 1 : return 0;
3310 : }
3311 0 : catch(std::exception const & e)
3312 : {
3313 0 : std::cerr << "magic-to-js: exception: " << e.what() << std::endl;
3314 0 : return 1;
3315 : }
3316 3 : }
3317 :
3318 :
3319 : // vim: ts=4 sw=4 et
|