37#include    <libutf8/base.h> 
   38#include    <libutf8/libutf8.h> 
   43#include    <snapdev/not_reached.h> 
   44#include    <snapdev/trim_string.h> 
   49#include    <snapdev/poison.h> 
   59          std::string 
const & filename
 
   61        , node::pointer_t & root)
 
   62    : f_filename(filename)
 
   75void parser::load(node::pointer_t & root)
 
   77    token_t tok(get_token(
false));
 
   79    tok = skip_empty(tok);
 
   80    if(tok == token_t::TOK_PROCESSOR) 
 
   82        tok = get_token(
false);
 
   84    tok = skip_empty(tok);
 
   88    if(tok != token_t::TOK_OPEN_TAG)
 
   90        throw unexpected_token(
 
   93                + std::to_string(f_line)
 
   94                + 
": cannot be empty or include anything other than a processor tag and comments before the root tag.");
 
   96    root = std::make_shared<node>(f_value);
 
   97    if(read_tag_attributes(root) == token_t::TOK_EMPTY_TAG)
 
   99        throw unexpected_token(
 
  102                + std::to_string(f_line)
 
  103                + 
": root tag cannot be an empty tag.");
 
  105    tok = get_token(
false);
 
  107    node::pointer_t parent(root);
 
  112        case token_t::TOK_OPEN_TAG:
 
  114                node::pointer_t child(std::make_shared<node>(f_value));
 
  115                parent->append_child(child);
 
  116                if(read_tag_attributes(child) == token_t::TOK_END_TAG)
 
  123        case token_t::TOK_CLOSE_TAG:
 
  124            if(parent->tag_name() != f_value)
 
  126                throw unexpected_token(
 
  129                        + std::to_string(f_line)
 
  130                        + 
": unexpected token \"" 
  132                        + 
"\" in this closing tag; expected \"" 
  136            parent = parent->parent();
 
  137            if(parent == 
nullptr)
 
  141                    tok = get_token(
false);
 
  144                    case token_t::TOK_EOF:
 
  149                    case token_t::TOK_TEXT:
 
  150                        tok = skip_empty(tok);
 
  153                    case token_t::TOK_PROCESSOR:
 
  158                        throw unexpected_token(
 
  161                                + std::to_string(f_line)
 
  162                                + 
": we reached the end of the XML file, but still found a token of type " 
  163                                + std::to_string(
static_cast<int>(tok))
 
  164                                + 
" after the closing root tag instead of the end of the file.");
 
  171        case token_t::TOK_TEXT:
 
  172            parent->append_text(f_value);
 
  175        case token_t::TOK_EOF:
 
  176            throw unexpected_token(
 
  179                  + std::to_string(f_line)
 
  180                  + 
": reached the end of the file without first closing the root tag.");
 
  183        case token_t::TOK_EMPTY_TAG:
 
  184        case token_t::TOK_END_TAG:
 
  185        case token_t::TOK_EQUAL:
 
  186        case token_t::TOK_IDENTIFIER:
 
  187        case token_t::TOK_PROCESSOR:
 
  188        case token_t::TOK_STRING:
 
  189            throw logic_error(
"Received an unexpected token in the switch handler.");
 
  193        tok = get_token(
false);
 
 
  198parser::token_t parser::skip_empty(token_t tok)
 
  200    while(tok == token_t::TOK_TEXT)
 
  202        f_value = snapdev::trim_string(f_value);
 
  205            throw unexpected_token(
 
  208                    + std::to_string(f_line)
 
  209                    + 
": cannot include text data before or after the root tag.");
 
  211        tok = get_token(
false);
 
  218parser::token_t parser::read_tag_attributes(node::pointer_t & tag)
 
  222        token_t tok(get_token(
true));
 
  223        if(tok == token_t::TOK_END_TAG
 
  224        || tok == token_t::TOK_EMPTY_TAG)
 
  228        if(tok != token_t::TOK_IDENTIFIER)
 
  233                    + std::to_string(f_line)
 
  234                    + 
": expected the end of the tag (>) or an attribute name.");
 
  236        std::string 
const name(f_value);
 
  237        tok = get_token(
true);
 
  238        if(tok != token_t::TOK_EQUAL)
 
  243                    + std::to_string(f_line)
 
  244                    + 
": expected the '=' character between the attribute name and value.");
 
  246        tok = get_token(
true);
 
  247        if(tok != token_t::TOK_STRING)
 
  252                    + std::to_string(f_line)
 
  253                    + 
": expected a quoted value after the '=' sign.");
 
  255        if(!tag->attribute(name).empty())
 
  260                    + std::to_string(f_line)
 
  261                    + 
": attribute \"" + name + 
"\" defined twice; we do not allow such.");
 
  263        tag->set_attribute(name, f_value);
 
  265    snapdev::NOT_REACHED();
 
  269parser::token_t parser::get_token(
bool parsing_attributes)
 
  278        case static_cast<char32_t>(EOF):
 
  279            return token_t::TOK_EOF;
 
  286            if(parsing_attributes)
 
  303                    if(c == 
static_cast<char32_t>(EOF))
 
  305                        throw unexpected_eof(
 
  308                            + std::to_string(f_line)
 
  309                            + 
": reached the end of the file while reading a processor (\"<?...?>\") tag.");
 
  316                            return token_t::TOK_PROCESSOR;
 
  320                    f_value += libutf8::to_u8string(c);
 
  322                snapdev::NOT_REACHED();
 
  323                return token_t::TOK_PROCESSOR;
 
  327                if((c >= 
'A' && c <= 
'Z')
 
  328                || (c >= 
'a' && c <= 
'z'))
 
  335                        + std::to_string(f_line)
 
  336                        + 
": found an element definition (such as an \"<!ELEMENT...>\" sequence), which is not supported.");
 
  342                    char32_t const * expected = U
"CDATA[";
 
  343                    for(
int j(0); j < 6; ++j)
 
  345                        if(getc() != expected[j])
 
  350                                + std::to_string(f_line)
 
  351                                + 
": found an unexpected sequence of character in a \"<![CDATA[...\" sequence.");
 
  357                        if(c == 
static_cast<char32_t>(EOF))
 
  359                            throw unexpected_eof(
 
  362                                + std::to_string(f_line)
 
  363                                + 
": found EOF while parsing a \"<![CDATA[...]]>\" sequence.");
 
  381                                    return token_t::TOK_TEXT;
 
  384                                f_value += libutf8::to_u8string(c);
 
  389                                f_value += libutf8::to_u8string(c);
 
  394                            f_value += libutf8::to_u8string(c);
 
  410                            if(c == 
static_cast<char32_t>(EOF))
 
  412                                throw unexpected_eof(
 
  415                                    + std::to_string(f_line)
 
  416                                    + 
": found EOF while parsing a comment (\"<!--...-->\") sequence.");
 
  438                        + std::to_string(f_line)
 
  439                        + std::string(
": character '")
 
  440                        + libutf8::to_u8string(c)
 
  441                        + 
"' was not expected after a \"<!\" sequence.");
 
  449                if(!is_name_start_char(c))
 
  451                    if(c == 
static_cast<char32_t>(EOF))
 
  453                        throw unexpected_eof(
 
  456                            + std::to_string(f_line)
 
  457                            + 
": expected a tag name after \"</\", not EOF.");
 
  462                            + std::to_string(f_line)
 
  464                            + libutf8::to_u8string(c)
 
  465                            + 
"' is not valid for a tag name.");
 
  469                    f_value += libutf8::to_u8string(c);
 
  482                    if(c == 
static_cast<char32_t>(EOF))
 
  484                        throw unexpected_eof(
 
  487                            + std::to_string(f_line)
 
  488                            + 
": expected '>', not EOF.");
 
  493                            + std::to_string(f_line)
 
  494                            + 
": found an unexpected '" 
  495                            + 
static_cast<char>(c)
 
  496                            + 
"' in a closing tag, expected '>' instead.");
 
  498                return token_t::TOK_CLOSE_TAG;
 
  509            if(!is_name_start_char(c))
 
  511                if(c == 
static_cast<char32_t>(EOF))
 
  513                    throw unexpected_eof(
 
  516                        + std::to_string(f_line)
 
  517                        + 
": expected a tag name after '<', not EOF.");
 
  522                        + std::to_string(f_line)
 
  524                        + libutf8::to_u8string(c)
 
  525                        + 
"' is not valid for a tag name.");
 
  529                f_value += libutf8::to_u8string(c);
 
  544            else if(c != 
'>' && c != 
'/')
 
  549                        + std::to_string(f_line)
 
  551                        + libutf8::to_u8string(c)
 
  552                        + 
"' is not valid right after a tag name.");
 
  555            return token_t::TOK_OPEN_TAG;
 
  558            if(parsing_attributes)
 
  560                return token_t::TOK_END_TAG;
 
  565            if(parsing_attributes)
 
  570                    return token_t::TOK_EMPTY_TAG;
 
  578            if(parsing_attributes)
 
  580                return token_t::TOK_EQUAL;
 
  586            if(parsing_attributes)
 
  595                        return token_t::TOK_STRING;
 
  602                            + std::to_string(f_line)
 
  603                            + 
": character '>' not expected inside a tag value; please use \">\" instead.");
 
  605                    f_value += libutf8::to_u8string(c);
 
  607                snapdev::NOT_REACHED();
 
  613        if(parsing_attributes
 
  618                f_value += libutf8::to_u8string(c);
 
  623                    return token_t::TOK_IDENTIFIER;
 
  626            snapdev::NOT_REACHED();
 
  631            f_value += libutf8::to_u8string(c);
 
  634            || c == 
static_cast<decltype(c)
>(EOF))
 
  638                return token_t::TOK_TEXT;
 
  645void parser::unescape_entities()
 
  647    for(std::string::size_type pos(0);;)
 
  649        pos = f_value.find(
'&', pos);
 
  650        if(pos == std::string::npos)
 
  654        std::string::size_type 
const end(f_value.find(
';', pos + 1));
 
  655        if(end == std::string::npos)
 
  661        std::string name(f_value.substr(pos + 1, end - pos - 1));
 
  664            f_value.replace(pos, end - pos + 1, 1, 
'&');
 
  667        else if(name == 
"quot")
 
  669            f_value.replace(pos, end - pos + 1, 1, 
'"');
 
  672        else if(name == 
"lt")
 
  674            f_value.replace(pos, end - pos + 1, 1, 
'<');
 
  677        else if(name == 
"gt")
 
  679            f_value.replace(pos, end - pos + 1, 1, 
'>');
 
  682        else if(name == 
"apos")
 
  684            f_value.replace(pos, end - pos + 1, 1, 
'\'');
 
  687        else if(name.empty())
 
  689            throw invalid_entity(
 
  692                    + std::to_string(f_line)
 
  693                    + 
": the name of an entity cannot be empty (\"&;\" is not valid XML).");
 
  695        else if(name[0] == 
'#')
 
  697            if(name.length() == 1)
 
  699                throw invalid_entity(
 
  702                    + std::to_string(f_line)
 
  703                    + 
": a numeric entity must have a number (\"&#;\" is not valid XML).");
 
  718            char32_t const unicode(strtol(name.c_str(), &e, base));
 
  723                throw invalid_number(
 
  726                    + std::to_string(f_line)
 
  727                    + 
": the number found in numeric entity, \"" 
  729                    + 
"\", is not considered valid.");
 
  731            std::string 
const utf8(libutf8::to_u8string(unicode));
 
  732            f_value.replace(pos, end - pos + 1, utf8);
 
  733            pos += utf8.length();
 
  737            throw invalid_entity(
 
  740                    + std::to_string(f_line)
 
  741                    + 
": unsupported entity (\"&" 
  749char32_t parser::getc()
 
  754        return f_ungetc[f_ungetc_pos];
 
  777        std::size_t 
const count(c < 0xE0 ? 2UL : (c < 0xF0 ? 3UL : 4UL));
 
  781        for(; len < count; ++len)
 
  784            if(c < 0x80 || c >= 0xC0)
 
  794        char32_t result(U
'\0');
 
  796        if(libutf8::mbstowc(result, s, len) == -1)
 
  809void parser::ungetc(
char32_t c)
 
  811    if(c != 
static_cast<char32_t>(EOF))
 
  813        if(f_ungetc_pos >= std::size(f_ungetc))
 
  819                    + std::to_string(f_line)
 
  820                    + 
": somehow the f_ungetc buffer was overflowed.");
 
  824        f_ungetc[f_ungetc_pos] = c;
 
Snap! Database exceptions.
Database file implementation.
Database file implementation.