LCOV - coverage.info - /home/snapwebsites/snapcpp/snapwebsites/snapdatabase/snapdatabase/data/xml.cpp

LCOV - code coverage report

Current view:	top level - home/snapwebsites/snapcpp/snapwebsites/snapdatabase/snapdatabase/data - xml.cpp (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	297	422	70.4 %
Date:	2019-12-15 17:13:15	Functions:	30	31	96.8 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : // Copyright (c) 2019  Made to Order Software Corp.  All Rights Reserved
       2             : //
       3             : // https://snapwebsites.org/project/snapdatabase
       4             : // contact@m2osw.com
       5             : //
       6             : // This program is free software; you can redistribute it and/or modify
       7             : // it under the terms of the GNU General Public License as published by
       8             : // the Free Software Foundation; either version 2 of the License, or
       9             : // (at your option) any later version.
      10             : //
      11             : // This program is distributed in the hope that it will be useful,
      12             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             : // GNU General Public License for more details.
      15             : //
      16             : // You should have received a copy of the GNU General Public License along
      17             : // with this program; if not, write to the Free Software Foundation, Inc.,
      18             : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      19             : 
      20             : 
      21             : /** \file
      22             :  * \brief Database file implementation.
      23             :  *
      24             :  * Each table uses one or more files. Each file is handled by a dbfile
      25             :  * object and a corresponding set of blocks.
      26             :  */
      27             : 
      28             : // self
      29             : //
      30             : #include    "snapdatabase/data/xml.h"
      31             : 
      32             : #include    "snapdatabase/data/convert.h"
      33             : #include    "snapdatabase/exception.h"
      34             : 
      35             : 
      36             : // libutf8 lib
      37             : //
      38             : #include    <libutf8/libutf8.h>
      39             : 
      40             : 
      41             : // snapdev lib
      42             : //
      43             : #include    <snapdev/not_reached.h>
      44             : 
      45             : 
      46             : // boost lib
      47             : //
      48             : #include    <boost/algorithm/string.hpp>
      49             : 
      50             : 
      51             : // C++ lib
      52             : //
      53             : #include    <fstream>
      54             : #include    <iostream>
      55             : 
      56             : 
      57             : // C lib
      58             : //
      59             : #include    <string.h>
      60             : 
      61             : 
      62             : // last include
      63             : //
      64             : #include    <snapdev/poison.h>
      65             : 
      66             : 
      67             : 
      68             : namespace snapdatabase
      69             : {
      70             : 
      71             : 
      72             : 
      73             : namespace
      74             : {
      75             : 
      76             : 
      77        1014 : bool is_alpha(char c)
      78             : {
      79         888 :     return (c >= 'a' && c <= 'z')
      80         126 :         || (c >= 'A' && c <= 'Z')
      81        1140 :         || c == '_';
      82             : }
      83             : 
      84             : 
      85         122 : bool is_digit(char c)
      86             : {
      87          72 :     return (c >= '0' && c <= '9')
      88         244 :         || c == '-';
      89             : }
      90             : 
      91             : 
      92          97 : bool is_space(char c)
      93             : {
      94             :     return c == ' '
      95          97 :         || c == '\t'
      96          97 :         || c == '\v'
      97          97 :         || c == '\f'
      98          97 :         || c == '\n'
      99         194 :         || c == '\r';
     100             : }
     101             : 
     102             : 
     103          46 : bool is_token(std::string const s)
     104             : {
     105          46 :     if(s.empty())
     106             :     {
     107           0 :         return false;
     108             :     }
     109             : 
     110          46 :     if(!is_alpha(s[0]))
     111             :     {
     112           0 :         return false;
     113             :     }
     114             : 
     115          46 :     std::string::size_type const max(s.length());
     116         334 :     for(std::string::size_type idx(1); idx < max; ++idx)
     117             :     {
     118         288 :         char const c(s[idx]);
     119         576 :         if(!is_alpha(c)
     120          15 :         && !is_digit(c)
     121         288 :         && c != '-')
     122             :         {
     123           0 :             return false;
     124             :         }
     125             :     }
     126          46 :     if(s[max - 1] == '-')
     127             :     {
     128           0 :         return false;
     129             :     }
     130             : 
     131          46 :     return true;
     132             : }
     133             : 
     134             : 
     135             : 
     136             : enum class token_t
     137             : {
     138             :     TOK_CLOSE_TAG,
     139             :     TOK_EMPTY_TAG,
     140             :     TOK_END_TAG,
     141             :     TOK_EOF,
     142             :     TOK_EQUAL,
     143             :     TOK_IDENTIFIER,
     144             :     TOK_OPEN_TAG,
     145             :     TOK_PROCESSOR,
     146             :     TOK_STRING,
     147             :     TOK_TEXT
     148             : };
     149             : 
     150             : 
     151           7 : class xml_parser
     152             : {
     153             : public:
     154             :                         xml_parser(std::string const & filename, xml_node::pointer_t & root);
     155             : 
     156             : private:
     157             :     void                read_xml(xml_node::pointer_t & root);
     158             :     token_t             get_token(bool parsing_attributes);
     159             :     void                unescape_entities();
     160             :     int                 getc();
     161             :     void                ungetc(int c);
     162             : 
     163             :     std::string         f_filename = std::string();
     164             :     std::ifstream       f_in;
     165             :     size_t              f_ungetc_pos = 0;
     166             :     int                 f_ungetc[4] = { '\0' };
     167             :     int                 f_line = 1;
     168             :     std::string         f_value = std::string();
     169             : };
     170             : 
     171             : 
     172           9 : xml_parser::xml_parser(std::string const & filename, xml_node::pointer_t & root)
     173             :     : f_filename(filename)
     174          11 :     , f_in(filename)
     175             : {
     176           9 :     if(!f_in.is_open())
     177             :     {
     178           0 :         int const e(errno);
     179           0 :         throw file_not_found(std::string("Could not open XML table file \"")
     180           0 :                            + filename
     181           0 :                            + "\": " + strerror(e) + ".");
     182             :     }
     183             : 
     184           9 :     read_xml(root);
     185           7 : }
     186             : 
     187             : 
     188             : /** \brief
     189             :  *
     190             :  * This function reads the XML but it does not verify the Schema format.
     191             :  * It does verify the XML syntax fairly strongly.
     192             :  *
     193             :  * \param[in] root  A reference to the root pointer where the results are saved.
     194             :  */
     195           9 : void xml_parser::read_xml(xml_node::pointer_t & root)
     196             : {
     197           9 :     token_t tok(get_token(false));
     198             : 
     199          22 :     auto skip_empty = [&]()
     200             :     {
     201          30 :         while(tok == token_t::TOK_TEXT)
     202             :         {
     203           8 :             boost::trim(f_value);
     204           4 :             if(!f_value.empty())
     205             :             {
     206             :                 throw unexpected_token(
     207             :                           "File \""
     208           0 :                         + f_filename
     209           0 :                         + "\" cannot include text data before the root tag.");
     210             :             }
     211           4 :             tok = get_token(false);
     212             :         }
     213          27 :     };
     214             : 
     215          46 :     auto read_tag_attributes = [&](xml_node::pointer_t & tag)
     216             :     {
     217             :         for(;;)
     218             :         {
     219         184 :             tok = get_token(true);
     220          46 :             if(tok == token_t::TOK_END_TAG
     221          14 :             || tok == token_t::TOK_EMPTY_TAG)
     222             :             {
     223          66 :                 return tok;
     224             :             }
     225          13 :             if(tok != token_t::TOK_IDENTIFIER)
     226             :             {
     227           0 :                 throw invalid_xml("Expected the end of the tag (>) or an attribute name.");
     228             :             }
     229          26 :             std::string const name(f_value);
     230          13 :             tok = get_token(true);
     231          13 :             if(tok != token_t::TOK_EQUAL)
     232             :             {
     233           0 :                 throw invalid_xml("Expected the '=' character between the attribute name and value.");
     234             :             }
     235          13 :             tok = get_token(true);
     236          13 :             if(tok != token_t::TOK_STRING)
     237             :             {
     238           0 :                 throw invalid_xml("Expected a value of the attribute after the '=' sign.");
     239             :             }
     240          13 :             if(!tag->attribute(name).empty())
     241             :             {
     242           0 :                 throw invalid_xml("Attribute \"" + name + "\" defined twice. We do not allow such.");
     243             :             }
     244          26 :             tag->set_attribute(name, f_value);
     245          13 :         }
     246           9 :     };
     247             : 
     248           9 :     skip_empty();
     249           9 :     if(tok == token_t::TOK_PROCESSOR)
     250             :     {
     251           4 :         tok = get_token(false);
     252             :     }
     253           9 :     skip_empty();
     254             : 
     255             :     // now we have to have the root tag
     256           9 :     if(tok != token_t::TOK_OPEN_TAG)
     257             :     {
     258             :         throw unexpected_token(
     259             :                   "File \""
     260           2 :                 + f_filename
     261           3 :                 + "\" cannot be empty or include anything other than a processor tag and comments before the root tag.");
     262             :     }
     263           8 :     root = std::make_shared<xml_node>(f_value);
     264           8 :     if(read_tag_attributes(root) == token_t::TOK_EMPTY_TAG)
     265             :     {
     266             :         throw unexpected_token(
     267             :                   "File \""
     268           2 :                 + f_filename
     269           3 :                 + "\" root tag cannot be an empty tag.");
     270             :     }
     271           7 :     tok = get_token(false);
     272             : 
     273           7 :     xml_node::pointer_t parent(root);
     274         149 :     while(tok != token_t::TOK_EOF)
     275             :     {
     276          78 :         switch(tok)
     277             :         {
     278          25 :         case token_t::TOK_OPEN_TAG:
     279             :             {
     280          50 :                 xml_node::pointer_t child(std::make_shared<xml_node>(f_value));
     281          25 :                 parent->append_child(child);
     282          25 :                 if(read_tag_attributes(child) == token_t::TOK_END_TAG)
     283             :                 {
     284          25 :                     parent = child;
     285          25 :                 }
     286             :             }
     287          25 :             break;
     288             : 
     289          32 :         case token_t::TOK_CLOSE_TAG:
     290          32 :             if(parent->tag_name() != f_value)
     291             :             {
     292             :                 throw unexpected_token(
     293             :                           "Unexpected token name \""
     294           0 :                         + f_value
     295           0 :                         + "\" in this closing tag. Expected \""
     296           0 :                         + parent->tag_name()
     297           0 :                         + "\" instead.");
     298             :             }
     299          32 :             parent = parent->parent();
     300          32 :             if(parent == nullptr)
     301             :             {
     302             :                 for(;;)
     303             :                 {
     304           7 :                     tok = get_token(false);
     305           7 :                     switch(tok)
     306             :                     {
     307           7 :                     case token_t::TOK_EOF:
     308             :                         // it worked, we're done
     309             :                         //
     310           7 :                         return;
     311             : 
     312           0 :                     case token_t::TOK_TEXT:
     313           0 :                         skip_empty();
     314           0 :                         break;
     315             : 
     316           0 :                     case token_t::TOK_PROCESSOR:
     317             :                         // completely ignore those
     318           0 :                         break;
     319             : 
     320           0 :                     default:
     321             :                         throw unexpected_token(
     322             :                                   "We reached the end of the XML file, but still found a token of type "
     323           0 :                                 + std::to_string(static_cast<int>(tok))
     324           0 :                                 + " instead of the end of the file.");
     325             : 
     326             :                     }
     327             :                 }
     328             :             }
     329          25 :             break;
     330             : 
     331          21 :         case token_t::TOK_TEXT:
     332          21 :             parent->append_text(f_value);
     333          21 :             break;
     334             : 
     335           0 :         case token_t::TOK_EOF:
     336             :         case token_t::TOK_EMPTY_TAG:
     337             :         case token_t::TOK_END_TAG:
     338             :         case token_t::TOK_EQUAL:
     339             :         case token_t::TOK_IDENTIFIER:
     340             :         case token_t::TOK_PROCESSOR:
     341             :         case token_t::TOK_STRING:
     342           0 :             throw snapdatabase_logic_error("Received an unexpected token in the switch handler.");
     343             : 
     344             :         }
     345          71 :         tok = get_token(false);
     346             :     }
     347             : }
     348             : 
     349             : 
     350         174 : token_t xml_parser::get_token(bool parsing_attributes)
     351             : {
     352         174 :     f_value.clear();
     353             : 
     354             :     for(;;)
     355             :     {
     356         186 :         int c(getc());
     357         186 :         switch(c)
     358             :         {
     359           8 :         case EOF:
     360           8 :             return token_t::TOK_EOF;
     361             : 
     362          13 :         case ' ':
     363             :         case '\t':
     364             :         case '\v':
     365             :         case '\f':
     366             :         case '\n':
     367          13 :             if(parsing_attributes)
     368             :             {
     369           8 :                 continue;
     370             :             }
     371           5 :             break;
     372             : 
     373          73 :         case '<':
     374          73 :             c = getc();
     375          73 :             switch(c)
     376             :             {
     377          72 :             case '?':
     378             :                 // we do not parse the processor entry, we do not care about
     379             :                 // it at the moment
     380             :                 for(;;)
     381             :                 {
     382         140 :                     c = getc();
     383          72 :                     if(c == EOF)
     384             :                     {
     385           0 :                         throw unexpected_eof("Found an unexpected sequence of character in a processor (\"<?...?>\") sequence.");
     386             :                     }
     387           0 :                     while(c == '?')
     388             :                     {
     389           4 :                         c = getc();
     390           4 :                         if(c == '>')
     391             :                         {
     392           4 :                             return token_t::TOK_PROCESSOR;
     393             :                         }
     394           0 :                         f_value += '?';
     395             :                     }
     396          68 :                     f_value += static_cast<char>(c);
     397             :                 }
     398             :                 snap::NOTREACHED();
     399             :                 return token_t::TOK_PROCESSOR;
     400             : 
     401           4 :             case '!':
     402           4 :                 c = getc();
     403           4 :                 if(is_alpha(c))
     404             :                 {
     405             :                     // of course, this may be anything other than an element but still something we don't support
     406             :                     //
     407           0 :                     throw invalid_xml("Found an element definition (such as an \"<!ELEMENT...>\" sequence, which is not supported.");
     408             :                 }
     409           4 :                 if(c == '[')
     410             :                 {
     411             :                     // <![CDATA[ ... or throw
     412             :                     //
     413           0 :                     char const * expected = "CDATA[";
     414           0 :                     for(int j(0); j < 6; ++j)
     415             :                     {
     416           0 :                         if(getc() != expected[j])
     417             :                         {
     418           0 :                             throw invalid_xml("Found an unexpected sequence of character in a \"<![CDATA[...\" sequence.");
     419             :                         }
     420             :                     }
     421             :                     for(;;)
     422             :                     {
     423           0 :                         c = getc();
     424           0 :                         if(c == EOF)
     425             :                         {
     426           0 :                             throw unexpected_eof("Found EOF while parsing a \"<![CDATA[...]]>\" sequence.");
     427             :                         }
     428           0 :                         if(c == ']')
     429             :                         {
     430           0 :                             c = getc();
     431           0 :                             if(c == ']')
     432             :                             {
     433           0 :                                 c = getc();
     434           0 :                                 while(c == ']')
     435             :                                 {
     436           0 :                                     f_value += ']';
     437           0 :                                     c = getc();
     438             :                                 }
     439           0 :                                 if(c == '>')
     440             :                                 {
     441             :                                     // this is just like some text
     442             :                                     // except we do not convert entities
     443             :                                     //
     444           0 :                                     return token_t::TOK_TEXT;
     445             :                                 }
     446           0 :                                 f_value += "]]";
     447           0 :                                 f_value += static_cast<char>(c);
     448             :                             }
     449             :                             else
     450             :                             {
     451           0 :                                 f_value += ']';
     452           0 :                                 f_value += static_cast<char>(c);
     453             :                             }
     454             :                         }
     455             :                         else
     456             :                         {
     457           0 :                             f_value += static_cast<char>(c);
     458             :                         }
     459             :                     }
     460           4 :                 }
     461           4 :                 if(c == '-')
     462             :                 {
     463           4 :                     c = getc();
     464           4 :                     if(c == '-')
     465             :                     {
     466           4 :                         bool found(false);
     467         162 :                         while(!found)
     468             :                         {
     469          79 :                             c = getc();
     470          79 :                             if(c == EOF)
     471             :                             {
     472           0 :                                 throw unexpected_eof("Found EOF while parsing a comment (\"<!--...-->\") sequence.");
     473             :                             }
     474          79 :                             if(c == '-')
     475             :                             {
     476           5 :                                 c = getc();
     477           5 :                                 while(c == '-')
     478             :                                 {
     479           4 :                                     c = getc();
     480           4 :                                     if(c == '>')
     481             :                                     {
     482           4 :                                         found = true;
     483           4 :                                         break;
     484             :                                     }
     485             :                                 }
     486             :                             }
     487             :                         }
     488           4 :                         continue;
     489             :                     }
     490             :                 }
     491             :                 throw invalid_token(
     492           0 :                           std::string("Character '")
     493           0 :                         + static_cast<char>(c)
     494           0 :                         + "' was not expected after a \"<!\" sequence.");
     495             : 
     496          32 :             case '/':
     497          32 :                 c = getc();
     498          32 :                 while(is_space(c))
     499             :                 {
     500           0 :                     c = getc();
     501             :                 }
     502          32 :                 if(!is_alpha(c))
     503             :                 {
     504           0 :                     if(c == EOF)
     505             :                     {
     506           0 :                         throw unexpected_eof("Expected a tag name after \"</\", not EOF.");
     507             :                     }
     508             :                     throw invalid_token(
     509           0 :                               std::string("Character '")
     510           0 :                             + static_cast<char>(c)
     511           0 :                             + "' is not valid for a tag name.");
     512             :                 }
     513             :                 for(;;)
     514             :                 {
     515         496 :                     f_value += static_cast<char>(c);
     516         264 :                     c = getc();
     517         528 :                     if(!is_alpha(c)
     518         264 :                     && !is_digit(c))
     519             :                     {
     520          32 :                         break;
     521             :                     }
     522             :                 }
     523          32 :                 while(is_space(c))
     524             :                 {
     525           0 :                     c = getc();
     526             :                 }
     527          32 :                 if(c != '>')
     528             :                 {
     529           0 :                     if(c == EOF)
     530             :                     {
     531           0 :                         throw unexpected_eof("Expected '>', not EOF.");
     532             :                     }
     533             :                     throw invalid_xml(
     534           0 :                               std::string("Found an unexpected '")
     535           0 :                             + static_cast<char>(c)
     536           0 :                             + "' in a closing tag, expected '>' instead.");
     537             :                 }
     538          32 :                 return token_t::TOK_CLOSE_TAG;
     539             : 
     540          33 :             }
     541             : 
     542             :             // in this case we need to read the name only, the attributes
     543             :             // will be read by the parser instead of the lexer
     544             :             //
     545          33 :             while(is_space(c))
     546             :             {
     547           0 :                 c = getc();
     548             :             }
     549          33 :             if(!is_alpha(c))
     550             :             {
     551           0 :                 if(c == EOF)
     552             :                 {
     553           0 :                     throw unexpected_eof("Expected a tag name after \"</\", not EOF.");
     554             :                 }
     555             :                 throw invalid_token(
     556           0 :                           std::string("Character '")
     557           0 :                         + static_cast<char>(c)
     558           0 :                         + "' is not valid for a tag name.");
     559             :             }
     560             :             for(;;)
     561             :             {
     562         505 :                 f_value += static_cast<char>(c);
     563         269 :                 c = getc();
     564         538 :                 if(!is_alpha(c)
     565          47 :                 && !is_digit(c)
     566         302 :                 && c != '-')
     567             :                 {
     568          33 :                     break;
     569             :                 }
     570             :             }
     571          33 :             if(isspace(c))
     572             :             {
     573           0 :                 do
     574             :                 {
     575           5 :                     c = getc();
     576             :                 }
     577           5 :                 while(isspace(c));
     578             :             }
     579          28 :             else if(c != '>' && c != '/')
     580             :             {
     581             :                 throw invalid_token(
     582           0 :                           std::string("Character '")
     583           0 :                         + static_cast<char>(c)
     584           0 :                         + "' is not valid right after a tag name.");
     585             :             }
     586          33 :             ungetc(c);
     587          33 :             return token_t::TOK_OPEN_TAG;
     588             : 
     589          32 :         case '>':
     590          32 :             if(parsing_attributes)
     591             :             {
     592          32 :                 return token_t::TOK_END_TAG;
     593             :             }
     594           0 :             break;
     595             : 
     596           1 :         case '/':
     597           1 :             if(parsing_attributes)
     598             :             {
     599           1 :                 c = getc();
     600           1 :                 if(c == '>')
     601             :                 {
     602           1 :                     return token_t::TOK_EMPTY_TAG;
     603             :                 }
     604           0 :                 ungetc(c);
     605           0 :                 c = '/';
     606             :             }
     607           0 :             break;
     608             : 
     609          13 :         case '=':
     610          13 :             if(parsing_attributes)
     611             :             {
     612          13 :                 return token_t::TOK_EQUAL;
     613             :             }
     614           0 :             break;
     615             : 
     616          13 :         case '"':
     617             :         case '\'':
     618          13 :             if(parsing_attributes)
     619             :             {
     620          13 :                 int quote(c);
     621             :                 for(;;)
     622             :                 {
     623         261 :                     c = getc();
     624         137 :                     if(c == quote)
     625             :                     {
     626          13 :                         unescape_entities();
     627          13 :                         return token_t::TOK_STRING;
     628             :                     }
     629         124 :                     if(c == '>')
     630             :                     {
     631           0 :                         throw invalid_token("Character '>' not expected inside a tag value. Please use \"&gt;\" instead.");
     632             :                     }
     633         124 :                     f_value += static_cast<char>(c);
     634             :                 }
     635           0 :             }
     636           0 :             break;
     637             : 
     638             :         }
     639             : 
     640          38 :         if(parsing_attributes
     641          38 :         && is_alpha(c))
     642             :         {
     643             :             for(;;)
     644             :             {
     645         117 :                 f_value += static_cast<char>(c);
     646          65 :                 c = getc();
     647         130 :                 if(!is_alpha(c)
     648          14 :                 && !is_digit(c)
     649          78 :                 && c != '-')
     650             :                 {
     651          13 :                     ungetc(c);
     652          13 :                     return token_t::TOK_IDENTIFIER;
     653             :                 }
     654             :             }
     655             :         }
     656             : 
     657             :         for(;;)
     658             :         {
     659         185 :             f_value += static_cast<char>(c);
     660         105 :             c = getc();
     661         105 :             if(c == '<'
     662          80 :             || c == EOF)
     663             :             {
     664          25 :                 ungetc(c);
     665          25 :                 unescape_entities();
     666          25 :                 return token_t::TOK_TEXT;
     667             :             }
     668             :         }
     669          12 :     }
     670             : }
     671             : 
     672             : 
     673          38 : void xml_parser::unescape_entities()
     674             : {
     675          38 :     for(std::string::size_type pos(0);;)
     676             :     {
     677          50 :         pos = f_value.find('&', pos);
     678          50 :         if(pos == std::string::npos)
     679             :         {
     680          76 :             break;
     681             :         }
     682          12 :         std::string::size_type end(f_value.find(';', pos + 1));
     683          12 :         if(end == std::string::npos)
     684             :         {
     685             :             // generate an error here?
     686             :             //
     687           0 :             break;
     688             :         }
     689          24 :         std::string name(f_value.substr(pos + 1, end - pos - 1));
     690          12 :         if(name == "amp")
     691             :         {
     692           1 :             f_value.replace(pos, end - pos + 1, 1, '&');
     693           1 :             ++pos;
     694             :         }
     695          11 :         else if(name == "quot")
     696             :         {
     697           4 :             f_value.replace(pos, end - pos + 1, 1, '"');
     698           4 :             ++pos;
     699             :         }
     700           7 :         else if(name == "lt")
     701             :         {
     702           1 :             f_value.replace(pos, end - pos + 1, 1, '<');
     703           1 :             ++pos;
     704             :         }
     705           6 :         else if(name == "gt")
     706             :         {
     707           2 :             f_value.replace(pos, end - pos + 1, 1, '>');
     708           2 :             ++pos;
     709             :         }
     710           4 :         else if(name == "apos")
     711             :         {
     712           1 :             f_value.replace(pos, end - pos + 1, 1, '\'');
     713           1 :             ++pos;
     714             :         }
     715           3 :         else if(name.empty())
     716             :         {
     717           0 :             throw invalid_entity("the name of an entity cannot be empty ('&;' is not valid XML).");
     718             :         }
     719           3 :         else if(name[0] == '#')
     720             :         {
     721           3 :             if(name.length() == 1)
     722             :             {
     723           0 :                 throw invalid_entity("a numeric entity must have a number ('&#; is not valid XML).");
     724             :             }
     725           6 :             if(name[1] == 'x'
     726           3 :             || name[1] == 'X')
     727             :             {
     728           2 :                 name[0] = '0';
     729             :             }
     730             :             else
     731             :             {
     732           1 :                 name[0] = ' ';
     733             :             }
     734             :             // TODO: enforce base 10 or 16
     735             :             //
     736           3 :             char32_t unicode(convert_to_int(name, 32));
     737           6 :             std::string const utf8(libutf8::to_u8string(unicode));
     738           3 :             f_value.replace(pos, end - pos + 1, utf8);
     739           3 :             pos += utf8.length();
     740             :         }
     741             :         else
     742             :         {
     743             :             throw invalid_entity(
     744             :                       "Unsupported entity ('&"
     745           0 :                     + name
     746           0 :                     + ";').");
     747             :         }
     748          12 :     }
     749          38 : }
     750             : 
     751             : 
     752        1309 : int xml_parser::getc()
     753             : {
     754        1309 :     if(f_ungetc_pos > 0)
     755             :     {
     756          71 :         --f_ungetc_pos;
     757             : //std::cerr << "re-getc() - '" << static_cast<char>(f_ungetc[f_ungetc_pos]) << "'\n";
     758          71 :         return f_ungetc[f_ungetc_pos];
     759             :     }
     760             : 
     761        1238 :     int c(f_in.get());
     762        1238 :     if(c == '\r')
     763             :     {
     764           0 :         ++f_line;
     765           0 :         c = f_in.get();
     766           0 :         if(c != '\n')
     767             :         {
     768           0 :             ungetc(c);
     769           0 :             c = '\n';
     770             :         }
     771             :     }
     772        1238 :     else if(c == '\n')
     773             :     {
     774          10 :         ++f_line;
     775             :     }
     776             : 
     777             : //if(c == EOF)
     778             : //{
     779             : //std::cerr << "getc() - 'EOF'\n";
     780             : //}
     781             : //else
     782             : //{
     783             : //std::cerr << "getc() - '" << static_cast<char>(c) << "'\n";
     784             : //}
     785        1238 :     return c;
     786             : }
     787             : 
     788             : 
     789          71 : void xml_parser::ungetc(int c)
     790             : {
     791          71 :     if(c != EOF)
     792             :     {
     793          71 :         if(f_ungetc_pos >= sizeof(f_ungetc) / sizeof(f_ungetc[0]))
     794             :         {
     795           0 :             throw snapdatabase_logic_error("Somehow the f_ungetc buffer was overflowed.");
     796             :         }
     797             : 
     798          71 :         f_ungetc[f_ungetc_pos] = c;
     799          71 :         ++f_ungetc_pos;
     800             :     }
     801          71 : }
     802             : 
     803             : 
     804             : 
     805             : } // empty namespace
     806             : 
     807             : 
     808             : 
     809          33 : xml_node::xml_node(std::string const & name)
     810          33 :     : f_name(name)
     811             : {
     812          33 :     if(!is_token(name))
     813             :     {
     814           0 :         throw invalid_token("\"" + name + "\" is not a valid token as a tag name.");
     815             :     }
     816          33 : }
     817             : 
     818             : 
     819         114 : std::string const & xml_node::tag_name() const
     820             : {
     821         114 :     return f_name;
     822             : }
     823             : 
     824             : 
     825          24 : std::string xml_node::text() const
     826             : {
     827          24 :     return f_text;
     828             : }
     829             : 
     830             : 
     831          21 : void xml_node::append_text(std::string const & text)
     832             : {
     833          21 :     f_text += text;
     834          21 : }
     835             : 
     836             : 
     837           5 : xml_node::attribute_map_t xml_node::all_attributes() const
     838             : {
     839           5 :     return f_attributes;
     840             : }
     841             : 
     842             : 
     843          39 : std::string xml_node::attribute(std::string const & name) const
     844             : {
     845          39 :     auto const it(f_attributes.find(name));
     846          39 :     if(it == f_attributes.end())
     847             :     {
     848          26 :         return std::string();
     849             :     }
     850          13 :     return it->second;
     851             : }
     852             : 
     853             : 
     854          13 : void xml_node::set_attribute(std::string const & name, std::string const & value)
     855             : {
     856          13 :     if(!is_token(name))
     857             :     {
     858           0 :         throw invalid_token("\"" + name + "\" is not a valid token as an attribute name.");
     859             :     }
     860          13 :     f_attributes[name] = value;
     861          13 : }
     862             : 
     863             : 
     864          25 : void xml_node::append_child(pointer_t n)
     865             : {
     866          50 :     if(n->f_next != nullptr
     867          50 :     || n->f_previous.lock() != nullptr)
     868             :     {
     869           0 :         throw node_already_in_tree("Somehow you are trying to add a child xml_node of a xml_node that was already added to a tree of nodes.");
     870             :     }
     871             : 
     872          50 :     auto l(last_child());
     873          25 :     if(l == nullptr)
     874             :     {
     875           7 :         f_child = n;
     876             :     }
     877             :     else
     878             :     {
     879          18 :         l->f_next = n;
     880          18 :         n->f_previous = l;
     881             :     }
     882             : 
     883          25 :     n->f_parent = shared_from_this();
     884          25 : }
     885             : 
     886             : 
     887          42 : xml_node::pointer_t xml_node::parent() const
     888             : {
     889          42 :     auto result(f_parent.lock());
     890          42 :     return result;
     891             : }
     892             : 
     893             : 
     894          16 : xml_node::pointer_t xml_node::first_child() const
     895             : {
     896          16 :     return f_child;
     897             : }
     898             : 
     899             : 
     900          35 : xml_node::pointer_t xml_node::last_child() const
     901             : {
     902          35 :     if(f_child == nullptr)
     903             :     {
     904          15 :         return xml_node::pointer_t();
     905             :     }
     906             : 
     907          40 :     pointer_t l(f_child);
     908         100 :     while(l->f_next != nullptr)
     909             :     {
     910          40 :         l = l->f_next;
     911             :     }
     912             : 
     913          20 :     return l;
     914             : }
     915             : 
     916             : 
     917          34 : xml_node::pointer_t xml_node::next() const
     918             : {
     919          34 :     return f_next;
     920             : }
     921             : 
     922             : 
     923          10 : xml_node::pointer_t xml_node::previous() const
     924             : {
     925          10 :     return f_previous.lock();
     926             : }
     927             : 
     928             : 
     929           0 : std::ostream & operator << (std::ostream & out, xml_node const & n)
     930             : {
     931           0 :     out << '<';
     932           0 :     out << n.tag_name();
     933           0 :     for(auto const & a : n.all_attributes())
     934             :     {
     935             :         out << a.first
     936             :             << "=\""
     937             :             << a.second
     938           0 :             << '"';
     939             :     }
     940           0 :     auto child(n.first_child());
     941           0 :     bool empty(child == nullptr);
     942           0 :     if(empty)
     943             :     {
     944           0 :         out << '/';
     945             :     }
     946           0 :     out << '>';
     947           0 :     if(!empty)
     948             :     {
     949           0 :         out << '\n';
     950           0 :         while(child != nullptr)
     951             :         {
     952           0 :             out << child;       // recursive call
     953           0 :             child = child->next();
     954             :         }
     955           0 :         out << '\n';
     956             :     }
     957           0 :     if(!n.text().empty())
     958             :     {
     959           0 :         out << n.text();
     960           0 :         if(!empty)
     961             :         {
     962           0 :             out << '\n';
     963             :         }
     964             :     }
     965           0 :     if(!empty)
     966             :     {
     967             :         out << "</"
     968           0 :             << n.tag_name()
     969           0 :             << '>';
     970             :     }
     971             : 
     972           0 :     return out;
     973             : }
     974             : 
     975             : 
     976             : 
     977             : 
     978          11 : xml::xml(std::string const & filename)
     979             : {
     980           9 :     xml_parser p(filename, f_root);
     981           7 : }
     982             : 
     983             : 
     984           7 : xml_node::pointer_t xml::root()
     985             : {
     986           7 :     return f_root;
     987             : }
     988             : 
     989             : 
     990             : 
     991           6 : } // namespace snapdatabase
     992             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13