LCOV - coverage.info - edhttp/uri.cpp

LCOV - code coverage report

Current view:	top level - edhttp - uri.cpp (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	115	617	18.6 %
Date:	2022-03-15 17:12:29	Functions:	9	54	16.7 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : // Copyright (c) 2011-2019  Made to Order Software Corp.  All Rights Reserved
       2             : //
       3             : // https://snapwebsites.org/
       4             : // contact@m2osw.com
       5             : //
       6             : // This program is free software; you can redistribute it and/or modify
       7             : // it under the terms of the GNU General Public License as published by
       8             : // the Free Software Foundation; either version 2 of the License, or
       9             : // (at your option) any later version.
      10             : //
      11             : // This program is distributed in the hope that it will be useful,
      12             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             : // GNU General Public License for more details.
      15             : //
      16             : // You should have received a copy of the GNU General Public License along
      17             : // with this program; if not, write to the Free Software Foundation, Inc.,
      18             : // 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
      19             : 
      20             : 
      21             : // self
      22             : //
      23             : #include    "edhttp/uri.h"
      24             : 
      25             : 
      26             : 
      27             : // snaplogger
      28             : //
      29             : #include    <snaplogger/message.h>
      30             : 
      31             : 
      32             : // snapdev
      33             : //
      34             : #include    <snapdev/hexadecimal_string.h>
      35             : #include    <snapdev/join_strings.h>
      36             : #include    <snapdev/not_used.h>
      37             : #include    <snapdev/tokenize_string.h>
      38             : 
      39             : 
      40             : // libaddr
      41             : //
      42             : #include    <libaddr/addr_parser.h>
      43             : 
      44             : 
      45             : // libtld
      46             : //
      47             : #include    <libtld/tld.h>
      48             : 
      49             : 
      50             : // C
      51             : //
      52             : #include    <netdb.h>
      53             : #include    <string.h>
      54             : 
      55             : 
      56             : // last include
      57             : //
      58             : #include    <snapdev/poison.h>
      59             : 
      60             : 
      61             : 
      62             : 
      63             : namespace edhttp
      64             : {
      65             : 
      66             : 
      67             : 
      68             : /** \brief This function intializes a default Snap URI object.
      69             :  *
      70             :  * Initialize a default Snap URI object.
      71             :  *
      72             :  * By default, the protocol is set to HTTP and everything else is set to
      73             :  * empty. This also means the original URI is set to empty (and stays that
      74             :  * way unless you later call set_uri() with a valid URI.)
      75             :  *
      76             :  * \sa set_uri()
      77             :  * \sa set_protocol()
      78             :  * \sa set_domain()
      79             :  * \sa set_path()
      80             :  * \sa set_option()
      81             :  * \sa set_query_string()
      82             :  * \sa set_anchor()
      83             :  */
      84           0 : uri::uri()
      85             : {
      86           0 : }
      87             : 
      88             : /** \brief Set the URI to the specified string.
      89             :  *
      90             :  * This function sets the URI to the specified string. The parsing
      91             :  * is the same as in the set_uri() function.
      92             :  *
      93             :  * \todo
      94             :  * Should this function throw if the URI is considered invalid?
      95             :  *
      96             :  * \param[in] u  The URI to assign to this Snap URI object.
      97             :  *
      98             :  * \sa set_uri()
      99             :  */
     100           4 : uri::uri(std::string const & u)
     101             : {
     102           4 :     if(!set_uri(u))
     103             :     {
     104             :         // TBD: should we throw if set_uri() returns false?
     105           0 :         SNAP_LOG_ERROR
     106             :             << "URI \""
     107             :             << u
     108             :             << "\" is considered invalid."
     109             :             << SNAP_LOG_SEND;
     110             :     }
     111           4 : }
     112             : 
     113             : /** \brief Replace the URI of this Snap URI object.
     114             :  *
     115             :  * This function replaces the current Snap URI object information
     116             :  * with the specified \p str data.
     117             :  *
     118             :  * Before calling this function YOU must force a URI encoding if the
     119             :  * URI is not yet encoded.
     120             :  *
     121             :  * Anything wrong in the syntax and the function returns false. Wrong
     122             :  * means empty entries, invalid encoding sequence, etc.
     123             :  *
     124             :  * \param[in] str  The new URI to replace all the current data of this Snap URI object.
     125             :  *
     126             :  * \return false if the URI could not be parsed (in which case nothing's changed in the object); true otherwise
     127             :  */
     128           4 : bool uri::set_uri(std::string const & str)
     129             : {
     130           4 :     char const * u(str.c_str());
     131             : 
     132             :     // retrieve the protocol
     133           4 :     char const * s(u);
     134          36 :     while(*u != '\0' && *u != ':')
     135             :     {
     136          16 :         ++u;
     137             :     }
     138           4 :     if(u - s < 1 || *u == '\0' || u[1] != '/' || u[2] != '/')
     139             :     {
     140             :         // protocol is not followed by :// or is an empty string
     141           0 :         return false;
     142             :     }
     143           8 :     std::string uri_protocol(s, u - s);
     144             : 
     145             :     // skip the ://
     146           4 :     u += 3;
     147             : 
     148             :     // retrieve the sub-domains and domain parts
     149             :     // we may also discover a name, password, and port
     150           4 :     char const * colon1(nullptr);
     151           4 :     char const * colon2(nullptr);
     152           4 :     char const * at(nullptr);
     153          52 :     for(s = u; *u != '\0' && *u != '/'; ++u)
     154             :     {
     155          48 :         if(*u == ':')
     156             :         {
     157           0 :             if(colon1 == nullptr)
     158             :             {
     159           0 :                 colon1 = u;
     160             :             }
     161             :             else
     162             :             {
     163           0 :                 if(at != nullptr)
     164             :                 {
     165           0 :                     if(colon2 != nullptr)
     166             :                     {
     167           0 :                         return false;
     168             :                     }
     169           0 :                     colon2 = u;
     170             :                 }
     171             :                 else
     172             :                 {
     173           0 :                     return false;
     174             :                 }
     175             :             }
     176             :         }
     177          48 :         if(*u == '@')
     178             :         {
     179           0 :             if(at != nullptr)
     180             :             {
     181             :                 // we cannot have more than one @ character that wasn't escaped
     182           0 :                 return false;
     183             :             }
     184           0 :             at = u;
     185             :         }
     186             :     }
     187             :     // without an at (@) colon1 indicates a port
     188           4 :     if(at == nullptr && colon1 != nullptr)
     189             :     {
     190             :         // colon2 is nullptr since otherwise we already returned with false
     191           0 :         colon2 = colon1;
     192           0 :         colon1 = nullptr;
     193             :     }
     194             : 
     195           8 :     std::string username;
     196           8 :     std::string password;
     197           8 :     std::string full_domain_name;
     198           4 :     int port(protocol_to_port(uri_protocol));
     199             : 
     200             :     // retrieve the data
     201           4 :     if(colon1 != nullptr)
     202             :     {
     203             :         // if(at == nullptr) -- missing '@'? this is not possible since we just
     204             :         //                   turned colon1 to colon2 if no '@' was defined
     205           0 :         username.insert(0, s, colon1 - s);
     206           0 :         s = colon1 + 1;
     207             :     }
     208           4 :     if(at != nullptr)
     209             :     {
     210           0 :         password.insert(0, s, at - s);
     211           0 :         s = at + 1;
     212             :     }
     213           4 :     if(colon2 != nullptr)
     214             :     {
     215           0 :         full_domain_name.insert(0, s, colon2 - s);
     216           0 :         char const * p(colon2 + 1);
     217           0 :         if(p == u)
     218             :         {
     219             :             // empty port entries are considered invalid
     220           0 :             return false;
     221             :         }
     222           0 :         port = 0;  // Reset port.
     223           0 :         for(; p < u; ++p)
     224             :         {
     225           0 :             char const d(*p);
     226           0 :             if(d < '0' || d > '9')
     227             :             {
     228             :                 // ports only accept digits
     229           0 :                 return false;
     230             :             }
     231           0 :             port = port * 10 + d - '0';
     232           0 :             if(port > 65535)
     233             :             {
     234             :                 // port overflow
     235           0 :                 return false;
     236             :             }
     237             :         }
     238             :     }
     239             :     else
     240             :     {
     241           4 :         full_domain_name.insert(0, s, u - s);
     242             :     }
     243             : 
     244             :     // verify that there is a domain
     245           4 :     if(full_domain_name.empty())
     246             :     {
     247           0 :         return false;
     248             :     }
     249             : 
     250             :     // force a username AND password or neither
     251           4 :     if(username.empty() ^ password.empty())
     252             :     {
     253           0 :         return false;
     254             :     }
     255             : 
     256             :     // break-up the domain in sub-domains, base domain, and TLD
     257           8 :     advgetopt::string_list_t sub_domain_names;
     258           8 :     std::string domain_name;
     259           8 :     std::string tld;
     260           4 :     if(!process_domain(full_domain_name, sub_domain_names, domain_name, tld))
     261             :     {
     262           0 :         return false;
     263             :     }
     264             : 
     265             :     // now we are ready to parse further (i.e. path)
     266           8 :     advgetopt::string_list_t uri_path;
     267           4 :     if(*u != '\0')
     268             :     {
     269             :         // skip the '/'
     270             :         //
     271           3 :         ++u;
     272          16 :         for(s = u; *u != '\0' && *u != '?' && *u != '#'; ++u)
     273             :         {
     274          13 :             if(*u == '/')
     275             :             {
     276           5 :                 if(s != u)
     277             :                 {
     278             :                     // decode right here since we just separate one segment
     279             :                     //
     280           2 :                     uri_path.push_back(urldecode(std::string(s, u - s)));
     281             :                 }
     282             :                 // skip the '/'
     283             :                 //
     284           5 :                 s = u + 1;
     285             :             }
     286             :         }
     287           3 :         if(s != u)
     288             :         {
     289             :             // last segment when it does not end with '/'
     290             :             //
     291           1 :             uri_path.push_back(urldecode(std::string(s, u - s)));
     292             :         }
     293             :     }
     294             : 
     295           8 :     uri_options_t query_strings;
     296           4 :     if(*u == '?')
     297             :     {
     298             :         // skip the '?' and then any (invalid?) introductory '&'
     299           0 :         do
     300             :         {
     301           0 :             ++u;
     302             :         }
     303           0 :         while(*u == '&');
     304           0 :         char const * e(nullptr);
     305           0 :         for(s = u;; ++u)
     306             :         {
     307           0 :             if(*u || *u == '&' || *u == '#')
     308             :             {
     309           0 :                 if(e == nullptr)
     310             :                 {
     311             :                     // special case when a parameter appears without value
     312             :                     // ...&name&...
     313           0 :                     e = u;
     314             :                 }
     315           0 :                 std::string name(s, e - s);
     316           0 :                 if(name.empty())
     317             :                 {
     318             :                     // this is a very special case!!!
     319             :                     // ...&=value&...
     320             :                     // so we use a "special" name, also even that name could be
     321             :                     // defined in the query string (with '%2A=value' although
     322             :                     // we do not decode the name)
     323             :                     //
     324           0 :                     name = "*";
     325             :                 }
     326             : 
     327             :                 // query strings are saved as options (name/value pairs)
     328             :                 // although the value may not be defined at all (...&name&...)
     329             :                 // query string names are case sensitive (as per 6.2.2.1 of RFC 3986)
     330           0 :                 std::string value;
     331           0 :                 if(e != u)
     332             :                 {
     333             :                     // note that we reach here if there is an equal sign,
     334             :                     // the value may still be empty (i.e. u - e - 1 == 0 is
     335             :                     // possible)
     336             :                     //
     337           0 :                     value = std::string(e + 1, u - e - 1);
     338             :                 }
     339           0 :                 name = urldecode(name);
     340           0 :                 if(query_strings.find(name) != query_strings.end())
     341             :                 {
     342             :                     // two parameters with the same name, refused
     343             :                     //
     344             :                     // (this is not correct as far as URIs are concerned,
     345             :                     // the same parameter can appear any number of times,
     346             :                     // but in our world, we consider that useless and
     347             :                     // possibly dangerous)
     348             :                     //
     349           0 :                     return false;
     350             :                 }
     351           0 :                 query_strings[name] = urldecode(value);
     352             : 
     353             :                 // skip all the & and then reset s and e
     354           0 :                 while(*u == '&')
     355             :                 {
     356           0 :                     ++u;
     357             :                 }
     358           0 :                 if(*u == '\0' || *u == '#')
     359             :                 {
     360             :                     // reached the end of the query strings
     361             :                     break;
     362             :                 }
     363           0 :                 s = u;
     364           0 :                 e = nullptr;
     365             :             }
     366           0 :             else if(e == nullptr && *u == '=')
     367             :             {
     368           0 :                 e = u;
     369             :             }
     370           0 :         }
     371             :     }
     372             : 
     373             :     // finally check for an anchor
     374             :     // (note that browsers do not send us the anchor data, however, URIs
     375             :     // defined on the server side can very well include such.)
     376             :     //
     377           8 :     std::string uri_anchor;
     378           4 :     if(*u == '#')
     379             :     {
     380           0 :         ++u;
     381             : 
     382             :         // we need to decode the string so we add the whole string here
     383             :         //
     384           0 :         std::string p(u);
     385           0 :         p = urldecode(p);
     386           0 :         if(!p.empty() && p[0] == '!')
     387             :         {
     388             :             // what do we do here?!
     389             :             //
     390             :             // it seems to me that we should not get those here, but that
     391             :             // could be from someone who wrote the URL in their document.
     392             :             //
     393           0 :             u = p.c_str();
     394           0 :             for(s = u; *u != '\0'; ++u)
     395             :             {
     396           0 :                 if(*u == '/')
     397             :                 {
     398             :                     // encode right here since we have separate strings
     399             :                     //
     400           0 :                     if(s != u)
     401             :                     {
     402           0 :                         uri_path.push_back(urldecode(std::string(s, u - s)));
     403             :                     }
     404             :                     // skip the '/'
     405             :                     //
     406           0 :                     s = u + 1;
     407             :                 }
     408             :             }
     409           0 :             if(s != u)
     410             :             {
     411             :                 // last path that doesn't end with '/'
     412             :                 //
     413           0 :                 uri_path.push_back(urldecode(std::string(s, u - s)));
     414             :             }
     415             :         }
     416             :         else
     417             :         {
     418           0 :             uri_anchor = p;
     419             :         }
     420             :     }
     421             : 
     422             :     // the path may include some ".." which we want to eliminate
     423             :     // note that contrary to Unix we do not accept "/.." as an equivalent
     424             :     // to "/" and we do not verify that all the paths exist... (i.e.
     425             :     // if "c" does not exist under "/a/b" (folder /a/b/c), then it should
     426             :     // be an error to use "/a/b/c/.." since "/a/b/c" cannot be computed.)
     427             :     //
     428           4 :     int max_path(uri_path.size());
     429           7 :     for(int i(0); i < max_path; ++i)
     430             :     {
     431           3 :         if(uri_path[i] == "..")
     432             :         {
     433           0 :             if(i == 0 || max_path < 2)
     434             :             {
     435             :                 // the path starts with a ".." or has too many ".."
     436             :                 //
     437           0 :                 return false;
     438             :             }
     439             : 
     440             :             // remove the ".." and previous path segment
     441             :             //
     442           0 :             uri_path.erase(uri_path.begin() + i - 1, uri_path.begin() + i + 1);
     443           0 :             --i;
     444           0 :             max_path -= 2;
     445             :         }
     446             :     }
     447             : 
     448             :     // totally unchanged URI, but only if it is considered valid
     449             :     //
     450           4 :     f_original = str;
     451             : 
     452             :     // now decode all the entries that may be encoded
     453             :     //
     454           4 :     f_protocol = uri_protocol;
     455           4 :     f_username = urldecode(username);
     456           4 :     f_password = urldecode(password);
     457           4 :     if(port != -1)
     458             :     {
     459           4 :         f_port = port;
     460             :     }
     461           4 :     f_domain = domain_name;
     462           4 :     f_top_level_domain = tld;
     463           4 :     f_sub_domains = sub_domain_names;
     464           4 :     f_path = uri_path;
     465             : 
     466             :     // options come from parsing the sub-domains, query strings and paths
     467             :     // and at this point we do not have that information...
     468             :     //
     469           4 :     f_options.clear();
     470           4 :     f_address_ranges.clear();
     471             : 
     472           4 :     f_query_strings = query_strings;
     473           4 :     f_anchor = uri_anchor;
     474             : 
     475           4 :     return true;
     476             : }
     477             : 
     478             : 
     479             : /** \brief Return the original URI used to define the Snap URI object.
     480             :  *
     481             :  * This function returns the original URI as defined when calling the
     482             :  * set_uri() or creating the Snap URI object with the uri() constructor
     483             :  * accepting a string.
     484             :  *
     485             :  * Note that it is possible to use the uri object without using the
     486             :  * set_uri() or a string in the constructor by calling the setters of
     487             :  * the different parts of a URI. This is actually how snap_child does it
     488             :  * because Apache does not give us one plane URI, instead we get pre
     489             :  * separated parts. Therefore the get_original_uri() is always empty when
     490             :  * called from that f_uri variable.
     491             :  *
     492             :  * Note that this URI may still include security issues, although if the
     493             :  * input was not considered valid (i.e. had a valid protocol, etc.) then
     494             :  * this function returns an empty string.
     495             :  *
     496             :  * \return A constant reference to the original Snap URI.
     497             :  */
     498           0 : std::string const & uri::get_original_uri() const
     499             : {
     500           0 :     return f_original;
     501             : }
     502             : 
     503             : 
     504             : /** \brief Return the current URI define in this Snap URI object.
     505             :  *
     506             :  * This function concatenate all the URI parts in a fully qualified URI
     507             :  * and returns the result.
     508             :  *
     509             :  * This function does NOT take the rules in account (since it does not
     510             :  * know anything about them.) So you may want to consider using the
     511             :  * uri_rules::process_uri() function instead.
     512             :  *
     513             :  * \note
     514             :  * The returned URI is already encoded as required by HTTP and such.
     515             :  *
     516             :  * \param[in] use_hash_bang  When this flag is set to true the URI is returned
     517             :  * as a hash bang (i.e. domain/path becomes domain/#!path).
     518             :  *
     519             :  * \return The URI represented by this Snap URI object.
     520             :  */
     521           0 : std::string uri::get_uri(bool use_hash_bang) const
     522             : {
     523           0 :     std::string result(f_protocol);
     524             : 
     525           0 :     result += "://";
     526             : 
     527             :     // username/password if defined
     528           0 :     if(!f_username.empty())
     529             :     {
     530           0 :         result += urlencode(f_username);
     531           0 :         if(!f_password.empty())
     532             :         {
     533           0 :             result += ':';
     534           0 :             result += urlencode(f_password);
     535             :         }
     536           0 :         result += '@';
     537             :     }
     538             : 
     539             :     // full domain
     540             :     // domains should rarely require encoding for special characters, however,
     541             :     // it often is for international domains that make use of UTF-8 characters
     542             :     // outside of the standard ASCII letters and those definitively require
     543             :     // URL encoding to work right.
     544           0 :     result += urlencode(full_domain());
     545           0 :     if(f_port != protocol_to_port(f_protocol))
     546             :     {
     547           0 :         result += std::to_string(f_port);
     548             :     }
     549           0 :     result += '/';
     550             : 
     551             :     // path if no hash bang
     552             :     //
     553           0 :     std::string const p(path());
     554           0 :     if(!use_hash_bang && p.length() > 0)
     555             :     {
     556             :         // avoid a double slash if possible
     557             :         //
     558             :         // XXX: should the path not have a leading slash?
     559             :         //      (as far as I know path() never return a path with a leading
     560             :         //      slash; but we would need a test to make sure of it)
     561             :         //
     562           0 :         if(p[0] == '/')
     563             :         {
     564           0 :             result += p.substr(1);
     565             :         }
     566             :         else
     567             :         {
     568           0 :             result += p;
     569             :         }
     570             :     }
     571             : 
     572             :     // query string
     573           0 :     std::string const q(query_string());
     574           0 :     if(!q.empty())
     575             :     {
     576           0 :         result += '?';
     577           0 :         result += q;
     578             :     }
     579             : 
     580             :     // anchor
     581           0 :     if(!f_anchor.empty())
     582             :     {
     583           0 :         if(use_hash_bang)
     584             :         {
     585             :             // hash bang and anchor are exclusive
     586           0 :             throw uri_exception_exclusive_parameters("you cannot use the hash bang (#!) and an anchor (#) in the same URI");
     587             :         }
     588           0 :         result += '#';
     589           0 :         result += urlencode(f_anchor, "!/~");
     590             :     }
     591             : 
     592             :     // path when using the hash bang but only if not empty
     593           0 :     if(use_hash_bang && !p.empty())
     594             :     {
     595           0 :         result += "#!/";
     596           0 :         result += p;
     597             :     }
     598             : 
     599           0 :     return result;
     600             : }
     601             : 
     602             : 
     603             : /** \brief Retrieve the URI of the website.
     604             :  *
     605             :  * This function returns the URI of the website, without any path,
     606             :  * query string options, anchor. The port is included only if it
     607             :  * does not correspond to the protocol and the \p include_port flag
     608             :  * is set to true.
     609             :  *
     610             :  * \param[in] include_port  Whether the port should be included.
     611             :  *
     612             :  * \return The domain name with the protocol and optionally the port.
     613             :  */
     614           0 : std::string uri::get_website_uri(bool include_port) const
     615             : {
     616           0 :     std::string result(f_protocol);
     617             : 
     618           0 :     result += "://";
     619           0 :     result += full_domain();
     620             : 
     621             :     // only include the port if the caller wants it and if it does not
     622             :     // match the default protocol port
     623             :     //
     624           0 :     if(include_port
     625           0 :     && protocol_to_port(f_protocol) != f_port)
     626             :     {
     627           0 :         result += ':';
     628           0 :         result += std::to_string(f_port);
     629             :     }
     630             : 
     631           0 :     result += '/';
     632             : 
     633           0 :     return result;
     634             : }
     635             : 
     636             : 
     637             : /** \brief Retrieve a part by name.
     638             :  *
     639             :  * This function allows you to retrieve a part by name.
     640             :  *
     641             :  * The supported parts are:
     642             :  *
     643             :  * \li anchor -- The anchor
     644             :  * \li domain -- The domain name
     645             :  * \li full-domain -- The full domain: with sub-domains, domain, and TLD
     646             :  * \li option -- The option number \p part
     647             :  * \li option-count -- The number of options
     648             :  * \li original -- The original URI or ""
     649             :  * \li password -- The password
     650             :  * \li path -- The folder name number \p part
     651             :  * \li path-count -- the number of paths
     652             :  * \li protocol -- The protocol
     653             :  * \li query-string -- The query string number \p part
     654             :  * \li query-string-count -- The number of query strings
     655             :  * \li sub-domain -- The sub-domain name number \p part
     656             :  * \li sub-domain-count -- The number of sub-domains
     657             :  * \li tld or top-level-domain -- the top-level domain name
     658             :  * \li uri -- the full URI as you want it in an href="..." attribute
     659             :  * \li username -- The username
     660             :  *
     661             :  * \param[in] name  The named part to retrieve.
     662             :  * \param[in] part  The part number when required (i.e. sub-domains)
     663             :  *
     664             :  * \return The data representing this part as a string.
     665             :  */
     666           0 : std::string uri::get_part(std::string const & name, int part) const
     667             : {
     668           0 :     if(name.empty())
     669             :     {
     670             :         // should this be an error?
     671           0 :         return "";
     672             :     }
     673           0 :     switch(name[0])
     674             :     {
     675           0 :     case 'a':
     676           0 :         if(name == "anchor")
     677             :         {
     678           0 :             return f_anchor;
     679             :         }
     680           0 :         break;
     681             : 
     682           0 :     case 'd':
     683           0 :         if(name == "domain")
     684             :         {
     685           0 :             return f_domain;
     686             :         }
     687           0 :         break;
     688             : 
     689           0 :     case 'f':
     690           0 :         if(name == "full-domain")
     691             :         {
     692           0 :             return full_domain();
     693             :         }
     694           0 :         break;
     695             : 
     696           0 :     case 'o':
     697           0 :         if(name == "option")
     698             :         {
     699           0 :             if(static_cast<std::size_t>(part) >= f_options.size())
     700             :             {
     701             :                 throw edhttp_uri_exception_out_of_bounds(
     702             :                       "option "
     703           0 :                     + std::to_string(part)
     704           0 :                     + " does not exist (range is 0 to "
     705           0 :                     + std::to_string(f_options.size())
     706           0 :                     + ")");
     707             :             }
     708           0 :             auto it(f_options.begin());
     709           0 :             std::advance(it, part);
     710           0 :             return it->second;
     711           0 :         }
     712           0 :         if(name == "option-count")
     713             :         {
     714           0 :             return std::to_string(f_options.size());
     715             :         }
     716           0 :         if(name == "original")
     717             :         {
     718           0 :             return f_original;
     719             :         }
     720           0 :         break;
     721             : 
     722           0 :     case 'p':
     723           0 :         if(name == "password")
     724             :         {
     725           0 :             return f_password;
     726             :         }
     727           0 :         if(name == "path")
     728             :         {
     729           0 :             if(static_cast<std::size_t>(part) >= f_path.size())
     730             :             {
     731             :                 throw edhttp_uri_exception_out_of_bounds(
     732             :                       "path "
     733           0 :                     + std::to_string(part)
     734           0 :                     + " is not available (range 0 to "
     735           0 :                     + std::to_string(f_path.size())
     736           0 :                     + ")");
     737             :             }
     738           0 :             return f_path[part];
     739             :         }
     740           0 :         if(name == "path-count")
     741             :         {
     742           0 :             return std::to_string(f_path.size());
     743             :         }
     744           0 :         if(name == "port")
     745             :         {
     746           0 :             return std::to_string(f_port);
     747             :         }
     748           0 :         if(name == "protocol")
     749             :         {
     750           0 :             return f_protocol;
     751             :         }
     752           0 :         break;
     753             : 
     754           0 :     case 'q':
     755           0 :         if(name == "query-string")
     756             :         {
     757           0 :             if(static_cast<std::size_t>(part) >= f_query_strings.size())
     758             :             {
     759             :                 throw edhttp_uri_exception_out_of_bounds(
     760             :                       "query-string "
     761           0 :                     + std::to_string(part)
     762           0 :                     + " does not exist (range 0 to "
     763           0 :                     + std::to_string(f_query_strings.size())
     764           0 :                     + ")");
     765             :             }
     766           0 :             auto it(f_query_strings.begin());
     767           0 :             std::advance(it, part);
     768           0 :             return it->second;
     769           0 :         }
     770           0 :         if(name == "query-string-count")
     771             :         {
     772           0 :             return std::to_string(f_query_strings.size());
     773             :         }
     774           0 :         break;
     775             : 
     776           0 :     case 's':
     777           0 :         if(name == "sub-domain")
     778             :         {
     779           0 :             if(static_cast<std::size_t>(part) >= f_sub_domains.size())
     780             :             {
     781             :                 throw edhttp_uri_exception_out_of_bounds(
     782             :                       "sub-domain "
     783           0 :                     + std::to_string(part)
     784           0 :                     + " does not exist (range 0 to "
     785           0 :                     + std::to_string(f_sub_domains.size())
     786           0 :                     + ")");
     787             :             }
     788           0 :             return f_sub_domains[part];
     789             :         }
     790           0 :         if(name == "sub-domain-count")
     791             :         {
     792           0 :             return std::to_string(f_sub_domains.size());
     793             :         }
     794           0 :         break;
     795             : 
     796           0 :     case 't':
     797           0 :         if(name == "tld" || name == "top-level-domain")
     798             :         {
     799           0 :             return f_top_level_domain;
     800             :         }
     801           0 :         break;
     802             : 
     803           0 :     case 'u':
     804           0 :         if(name == "uri")
     805             :         {
     806           0 :             return get_uri();
     807             :         }
     808           0 :         if(name == "username")
     809             :         {
     810           0 :             return f_username;
     811             :         }
     812           0 :         break;
     813             : 
     814           0 :     default:
     815             :         // no match for other characters
     816           0 :         break;
     817             : 
     818             :     }
     819             : 
     820           0 :     return "";
     821             : }
     822             : 
     823             : 
     824             : /** \brief Change the protocol.
     825             :  *
     826             :  * This function is called to set the protocol.
     827             :  *
     828             :  * The protocol is not checked since this can be used for any
     829             :  * URI, not just the HTTP and HTTPS protocols. The name is
     830             :  * expected to be all lowercase and lowercase letters [a-z].
     831             :  *
     832             :  * \param[in] uri_protocol  The name of the protocol.
     833             :  */
     834           0 : void uri::set_protocol(std::string const & uri_protocol)
     835             : {
     836           0 :     if(uri_protocol.empty())
     837             :     {
     838           0 :         throw uri_exception_invalid_parameter("the uri_protocol parameter cannot be an empty string");
     839             :     }
     840           0 :     f_protocol = uri_protocol;
     841           0 : }
     842             : 
     843             : 
     844             : /** \brief Retrieve a copy of the protocol.
     845             :  *
     846             :  * This value is the name that defines how messages are being
     847             :  * sent between the client and the server.
     848             :  *
     849             :  * The main interface only accepts "http" and "https", but the
     850             :  * uri object accepts all protocols so one can write URIs
     851             :  * with protocols such as "ftp", "mail", and "gopher".
     852             :  *
     853             :  * \return A constant reference to the protocol of this URI.
     854             :  */
     855           0 : std::string const & uri::protocol() const
     856             : {
     857           0 :     return f_protocol;
     858             : }
     859             : 
     860             : 
     861             : /** \brief Process a domain name and break it up.
     862             :  *
     863             :  * This function processes a domain name and breaks it up in
     864             :  * the domain name, the sub-domains, and the TLD.
     865             :  *
     866             :  * \note
     867             :  * If the function returns false, then the out parameters may not
     868             :  * all be defined properly. None of them should be used in that
     869             :  * case anyway.
     870             :  *
     871             :  * \param[in] full_domain_name  The complete domain with sub-domains and TLD.
     872             :  * \param[out] sub_domain_names  An array of sub-domains, may be empty.
     873             :  * \param[out] domain_name  The domain by itself (no TLD and no sub-domain.)
     874             :  * \param[out] tld  The TLD part by itself.
     875             :  *
     876             :  * \return true if the function succeeds, false otherwise
     877             :  */
     878           4 : bool uri::process_domain(
     879             :       std::string const & full_domain_name
     880             :     , advgetopt::string_list_t & sub_domain_names
     881             :     , std::string & domain_name
     882             :     , std::string & tld)
     883             : {
     884             :     // first we need to determine the TLD, we use the tld()
     885             :     // function from the libtld library for this purpose
     886             : 
     887             :     // (note that the URI is expected to be encoded so the UTF-8
     888             :     // encoding is the same as ASCII)
     889           4 :     struct tld_info info;
     890           4 :     char const *fd(full_domain_name.c_str());
     891           4 :     tld_result r(::tld(fd, &info));
     892           4 :     if(r != TLD_RESULT_SUCCESS)
     893             :     {
     894             :         // (should we accept TLD_RESULT_INVALID URIs?)
     895             :         // the URI doesn't end with a known TLD
     896           0 :         return false;
     897             :     }
     898             : 
     899             :     // got the TLD, save it in the user's supplied variable
     900           4 :     tld = urldecode(info.f_tld);
     901             : 
     902             :     // search where the domain name starts
     903           4 :     char const *compute_domain_name(fd + info.f_offset);
     904          36 :     while(compute_domain_name > fd)
     905             :     {
     906          16 :         --compute_domain_name;
     907          16 :         if(*compute_domain_name == '.')
     908             :         {
     909           0 :             ++compute_domain_name;
     910           0 :             break;
     911             :         }
     912             :     }
     913           4 :     domain_name = urldecode(std::string(compute_domain_name, info.f_tld - compute_domain_name));
     914             : 
     915             :     // now cut the remainder on each period, these are the sub-domains
     916             :     // there may be none if there are no other periods in the full name
     917           4 :     if(compute_domain_name > fd)
     918             :     {
     919             :         // forget the period
     920           0 :         --compute_domain_name;
     921             :     }
     922           8 :     std::string all_sub_domains(std::string(fd, compute_domain_name - fd));
     923             : 
     924             :     // verify that all the sub-domains are valid (i.e. no "..")
     925           4 :     if(!all_sub_domains.empty())
     926             :     {
     927           0 :         snapdev::tokenize_string(sub_domain_names, all_sub_domains, ".");
     928             : 
     929           0 :         for(auto & sub_domain : sub_domain_names)
     930             :         {
     931           0 :             if(sub_domain.empty())
     932             :             {
     933             :                 // sub-domains cannot be empty or the URI includes
     934             :                 // two period one after the other (this should actually
     935             :                 // be caught by the tld() call.)
     936             :                 //
     937           0 :                 return false;
     938             :             }
     939             : 
     940             :             // make sure it is decodable
     941             :             //
     942           0 :             sub_domain = urldecode(sub_domain);
     943             : 
     944             :             // TODO: look into whether we have to check for periods in the
     945             :             //       decoded sub-domain names (i.e. a %2E is probably not a
     946             :             //       valid character in a sub-domain name, at the same time
     947             :             //       if we reach here, there should not be such a DNS entry...
     948             :             //       but not automatically because a hacker can take an IP
     949             :             //       and use it with any URI and send an HTTP request that
     950             :             //       way... still, we would catch that in our domain/website
     951             :             //       canonicalization.) Maybe we should decode the domain part
     952             :             //       first, then parse it.
     953             :         }
     954             :     }
     955             : 
     956           4 :     return true;
     957             : }
     958             : 
     959             : 
     960             : /** \brief Set the domain to 'domain'.
     961             :  *
     962             :  * This function changes the Snap URI to the specified full domain.
     963             :  * This means changing the set of sub-domains, the TLD and the domain
     964             :  * it-self are updated with the corresponding data from the full domain.
     965             :  * The function takes care of breaking the input
     966             :  *
     967             :  * If any error is discovered in the full domain name, then the internal
     968             :  * variables do not get modified.
     969             :  *
     970             :  * Note that the domain is not expected to include a user name, password
     971             :  * and port information. You want to get rid of that information before
     972             :  * calling this function or consider calling set_uri() instead.
     973             :  *
     974             :  * \note
     975             :  * The only potential problem is when you get an out of memory error
     976             :  * while allocating a string.
     977             :  *
     978             :  * \todo
     979             :  * Check that the URL is not an IPv4 or IPv6 address. Such will always
     980             :  * fail and we should look into avoiding the use of an exception in
     981             :  * that circumstance.
     982             :  *
     983             :  * \exception uri_exception_invalid_uri
     984             :  * If the domain cannot properly be broken up in sub-domains,
     985             :  * the doman name and the tld, then this exception is raised.
     986             :  *
     987             :  * \param[in] full_domain_name  A full domain name, without protocol, path,
     988             :  *                              query string or anchor.
     989             :  */
     990           0 : void uri::set_domain(std::string const & full_domain_name)
     991             : {
     992           0 :     advgetopt::string_list_t sub_domain_names;
     993           0 :     std::string domain_name;
     994           0 :     std::string tld;
     995           0 :     if(!process_domain(full_domain_name, sub_domain_names, domain_name, tld))
     996             :     {
     997             :         throw uri_exception_invalid_uri(
     998             :               "could not break up \""
     999           0 :             + full_domain_name
    1000           0 :             + "\" as a valid domain name");
    1001             :     }
    1002             : 
    1003           0 :     f_domain = domain_name;
    1004           0 :     f_top_level_domain = tld;
    1005           0 :     f_sub_domains = sub_domain_names;
    1006             : 
    1007           0 :     f_address_ranges.clear();
    1008           0 : }
    1009             : 
    1010             : 
    1011             : /** \brief Reconstruct the full domain from the broken down information
    1012             :  *
    1013             :  * This function rebuilds a full domain name from the broken down
    1014             :  * data saved in the Snap URI: the sub-domains, the domain name,
    1015             :  * and the TLD.
    1016             :  *
    1017             :  * \todo
    1018             :  * Add caching so calling the function more than once will be fast.
    1019             :  *
    1020             :  * \return The full domain name representation of this Snap URI.
    1021             :  */
    1022           0 : std::string uri::full_domain() const
    1023             : {
    1024           0 :     std::string full_domains(snapdev::join_strings(f_sub_domains, "."));
    1025           0 :     if(!full_domains.empty())
    1026             :     {
    1027           0 :         full_domains += '.';
    1028             :     }
    1029           0 :     full_domains += f_domain;
    1030           0 :     full_domains += f_top_level_domain;
    1031           0 :     return full_domains;
    1032             : }
    1033             : 
    1034             : /** \brief Get the top level domain name.
    1035             :  *
    1036             :  * This function returns the top level domain name by itself.
    1037             :  * For example, in "www.example.com", the top level domain name
    1038             :  * is "com".
    1039             :  *
    1040             :  * \return The top level domain name of the Snap URI.
    1041             :  */
    1042           4 : std::string const& uri::top_level_domain() const
    1043             : {
    1044           4 :     return f_top_level_domain;
    1045             : }
    1046             : 
    1047             : 
    1048             : /** \brief Get the domain name by itself.
    1049             :  *
    1050             :  * This function returns the stripped down domain name. This name
    1051             :  * has no period since it includes no sub-domains and no top level
    1052             :  * domain names.
    1053             :  *
    1054             :  * \return The stripped down domain name.
    1055             :  */
    1056           4 : std::string const & uri::domain() const
    1057             : {
    1058           4 :     return f_domain;
    1059             : }
    1060             : 
    1061             : 
    1062             : /** \brief Return the concatenated list of sub-domains.
    1063             :  *
    1064             :  * This function returns the concatenated list of sub-domains
    1065             :  * in one string.
    1066             :  *
    1067             :  * \return The concatenated sub-domains separated by periods.
    1068             :  */
    1069           0 : std::string uri::sub_domains() const
    1070             : {
    1071           0 :     return snapdev::join_strings(f_sub_domains, ".");
    1072             : }
    1073             : 
    1074             : 
    1075             : /** \brief Return the number of sub-domains defined.
    1076             :  *
    1077             :  * This function defines a set of sub-domains.
    1078             :  *
    1079             :  * \return The number of sub-domains.
    1080             :  */
    1081           0 : int uri::sub_domain_count() const
    1082             : {
    1083           0 :     return f_sub_domains.size();
    1084             : }
    1085             : 
    1086             : 
    1087             : /** \brief Return one of the sub-domain names.
    1088             :  *
    1089             :  * This function returns the specified domain name.
    1090             :  *
    1091             :  * \param[in] part  The sub-domain name index.
    1092             :  *
    1093             :  * \return The sub-domain corresponding to the specified index.
    1094             :  */
    1095           0 : std::string uri::sub_domain(int part) const
    1096             : {
    1097           0 :     if(static_cast<std::size_t>(part) >= f_sub_domains.size())
    1098             :     {
    1099             :         throw edhttp_uri_exception_out_of_bounds(
    1100             :               "sub-domain "
    1101           0 :             + std::to_string(part)
    1102           0 :             + " does not exist (range 0 to "
    1103           0 :             + std::to_string(f_sub_domains.size())
    1104           0 :             + ")");
    1105             :     }
    1106           0 :     return f_sub_domains[part];
    1107             : }
    1108             : 
    1109             : 
    1110             : /** \brief Return the array of sub-domains.
    1111             :  *
    1112             :  * This function gives you a constant reference to all the sub-domains
    1113             :  * at once. You may use this function to make use of the list iterator,
    1114             :  * for example.
    1115             :  *
    1116             :  * The strings are in order as in the first is the left-most sub-domain
    1117             :  * (or the furthest away from the domain name.)
    1118             :  *
    1119             :  * \return A list of strings representing the sub-domains.
    1120             :  */
    1121           0 : advgetopt::string_list_t const & uri::sub_domains_list() const
    1122             : {
    1123           0 :     return f_sub_domains;
    1124             : }
    1125             : 
    1126             : 
    1127             : /** \brief Transforms the hostname and port in an array of addresses.
    1128             :  *
    1129             :  * This function generates an array of addresses for the specified
    1130             :  * hostname and port.
    1131             :  *
    1132             :  * The function calls the full_domain() function to get the domain name
    1133             :  * and uses get_port() for the port. From the resulting data, it attempts
    1134             :  * to compute one or more addresses which can be used to connect to
    1135             :  * the specified domain (i.e. if you have an IPv6 and IPv4 or multiple
    1136             :  * computers, then this will return more than one IP address).
    1137             :  *
    1138             :  * The domain can later be retrieved using the addr::get_hostname()
    1139             :  * function.
    1140             :  *
    1141             :  * \return A reference to a vector of addr::addr_range objects.
    1142             :  */
    1143           0 : addr::addr_range::vector_t const & uri::address_ranges()
    1144             : {
    1145           0 :     if(f_address_ranges.empty())
    1146             :     {
    1147           0 :         addr::addr_parser p;
    1148           0 :         p.set_default_port(get_port());
    1149           0 :         p.set_protocol(IPPROTO_TCP);
    1150           0 :         p.set_sort_order(addr::SORT_IPV6_FIRST | addr::SORT_NO_EMPTY);
    1151           0 :         p.set_allow(addr::allow_t::ALLOW_REQUIRED_ADDRESS, true);
    1152           0 :         f_address_ranges = p.parse(full_domain());
    1153             :     }
    1154             : 
    1155           0 :     return f_address_ranges;
    1156             : }
    1157             : 
    1158             : 
    1159             : /** \brief Set the port to the specified string.
    1160             :  *
    1161             :  * This function changes the port of the URI from what it is now
    1162             :  * to the specified value.
    1163             :  *
    1164             :  * The port value must be a positive number or zero.
    1165             :  *
    1166             :  * Negative values or other invalid numbers generate an error.
    1167             :  *
    1168             :  * You can retrieve the port number with the get_port() function.
    1169             :  *
    1170             :  * \exception uri_exception_invalid_parameter
    1171             :  * This function generates an exception if an invalid port is detected
    1172             :  * (negative, larger than 65535, or characters other than 0-9).
    1173             :  *
    1174             :  * \param[in] port  The new port for this Snap URI object.
    1175             :  */
    1176           0 : void uri::set_port(std::string const & port)
    1177             : {
    1178           0 :     long p = std::stol(port);
    1179           0 :     if(p < 0 || p > 65535)
    1180             :     {
    1181             :         throw uri_exception_invalid_parameter(
    1182             :               "\""
    1183           0 :             + port
    1184           0 :             + "\" is an invalid port number");
    1185             :     }
    1186           0 :     f_port = p;
    1187           0 :     f_address_ranges.clear();
    1188           0 : }
    1189             : 
    1190             : 
    1191             : /** \brief Set the port to the specified string.
    1192             :  *
    1193             :  * This function changes the port of the URI from what it is now
    1194             :  * to the specified value.
    1195             :  *
    1196             :  * The port value must be a positive number or zero.
    1197             :  *
    1198             :  * Negative values or invalid numbers generate an error.
    1199             :  *
    1200             :  * \exception uri_exception_invalid_parameter
    1201             :  * This function generates an exception if an invalid port is
    1202             :  * detected (negative or characters other than 0-9).
    1203             :  *
    1204             :  * \param[in] port  The new port for this Snap URI object.
    1205             :  */
    1206           0 : void uri::set_port(int port)
    1207             : {
    1208           0 :     if(port < 0 || port > 65535)
    1209             :     {
    1210             :         throw uri_exception_invalid_parameter(
    1211             :               "port \""
    1212           0 :             + std::to_string(port)
    1213           0 :             + "\" is out of range (1 to 65535)");
    1214             :     }
    1215           0 :     f_port = port;
    1216           0 : }
    1217             : 
    1218             : 
    1219             : /** \brief Retrieve the port number.
    1220             :  *
    1221             :  * This function returns the specific port used to access
    1222             :  * the server. This parameter can be used as one of the
    1223             :  * options used to select a specific website.
    1224             :  *
    1225             :  * \return The port as an integer.
    1226             :  */
    1227           0 : int uri::get_port() const
    1228             : {
    1229           0 :     return f_port;
    1230             : }
    1231             : 
    1232             : 
    1233             : /** \brief Replace the current path.
    1234             :  *
    1235             :  * This function can be used to replace the entire path of
    1236             :  * the URI by starting the new path with a slash (/something).
    1237             :  * If the \p path parameter does not start with a slash, then
    1238             :  * it is used as a relative path from the existing path.
    1239             :  *
    1240             :  * A path includes parts separated by one or more slashes (/).
    1241             :  * The function removes parts that are just "." since these
    1242             :  * mean "this directory" and they would not be valid in a
    1243             :  * canonicalized path.
    1244             :  *
    1245             :  * A path may include one or more ".." as a path part. These
    1246             :  * mean remove one part prior.
    1247             :  *
    1248             :  * The ".." are accepted in any path, however, it must be
    1249             :  * correct in that it is not possible to use ".." without at
    1250             :  * least one part just before that (i.e. "/this/one/../other/one" is
    1251             :  * valid, but "/../that/one/is/not" since ".." from / does not
    1252             :  * exist. This is not how Unix usually manages paths since
    1253             :  * in Unix / and /.. are one and the same folder.)
    1254             :  *
    1255             :  * Note that if you wanted to make use of the hash bang feature,
    1256             :  * you would still make use of this function to setup your path in
    1257             :  * the Snap URI object. The hash bang feature determines how
    1258             :  * the path is handled when you get the URI with get_uri().
    1259             :  *
    1260             :  * \exception uri_exception_invalid_path
    1261             :  * The function raises this exception if the path includes more
    1262             :  * ".." than there are "normal" parts on the left side of the "..".
    1263             :  *
    1264             :  * \param[in] uri_path  The new path for this URI.
    1265             :  *
    1266             :  * \sa path()
    1267             :  */
    1268           0 : void uri::set_path(std::string uri_path)
    1269             : {
    1270             :     // check whether the path starts with a '/':
    1271             :     // if so, then we replace the existing path;
    1272             :     // if not, then we append uri_path to the existing path.
    1273             :     //
    1274           0 :     if((uri_path.empty() || uri_path[0] != '/')
    1275           0 :     && !f_path.empty())
    1276             :     {
    1277             :         // append unless the user passed a path starting with "/"
    1278             :         // or the current path is empty
    1279           0 :         uri_path = snapdev::join_strings(f_path, "/") + "/" + uri_path;
    1280             :     }
    1281             : 
    1282             :     // if the path starts with a '/' or includes a double '/'
    1283             :     // within itself, it will be removed because of the SkipEmptyParts
    1284           0 :     advgetopt::string_list_t p;
    1285           0 :     advgetopt::split_string(uri_path, p, {"/"});
    1286             : 
    1287             :     // next we remove all ".." (and the previous part); if ".." was
    1288             :     // at the start of the path, then an exception is raised
    1289             :     //
    1290           0 :     int max_parts(p.size());
    1291           0 :     for(int i(0); i < max_parts; ++i)
    1292             :     {
    1293           0 :         if(p[i] == ".")
    1294             :         {
    1295             :             // canonalization includes removing "." parts which are
    1296             :             // viewed exactly as empty parts
    1297           0 :             p.erase(p.begin() + i);
    1298           0 :             --i;
    1299           0 :             --max_parts;
    1300             :         }
    1301           0 :         else if(p[i] == "..")
    1302             :         {
    1303             :             // note: max should not be less than 2 if i != 0
    1304           0 :             if(i == 0 || max_parts < 2)
    1305             :             {
    1306             :                 throw uri_exception_invalid_path(
    1307             :                       "path \""
    1308           0 :                     + uri_path
    1309           0 :                     + "\" is not valid (it includes too many \"..\")");
    1310             :             }
    1311           0 :             p.erase(p.begin() + i - 1, p.begin() + i + 1);
    1312           0 :             --i;
    1313           0 :             max_parts -= 2;
    1314             :         }
    1315             :     }
    1316             : 
    1317             :     // the input was valid, save the new result
    1318           0 :     f_path.swap(p);
    1319           0 : }
    1320             : 
    1321             : 
    1322             : /** \brief Return the full path.
    1323             :  *
    1324             :  * This function returns the full concatenated path of the URI.
    1325             :  *
    1326             :  * The function encodes the path appropriately. The path can thus be
    1327             :  * used anywhere an encoded path is accepted. The encoding can be
    1328             :  * avoided by setting the \p encoded flag to false.
    1329             :  *
    1330             :  * Note that a non encoded path may include / characters instead of
    1331             :  * the %2F encoded character and thus not match the internal path.
    1332             :  *
    1333             :  * \note
    1334             :  * The URL encode will not encode the ~ character which is at times
    1335             :  * used for user references (~username/...).
    1336             :  *
    1337             :  * \warning
    1338             :  * The result of the function returns what looks like a relative path.
    1339             :  * This is useful since in many cases you need to remove the starting
    1340             :  * slash, so we avoid adding it in the first place. If there is no path,
    1341             :  * the function returns the empty string ("").
    1342             :  *
    1343             :  * \param[in] encoded  Should the resulting path be URL encoded already?
    1344             :  * By default the path is URL encoded as expected by the HTTP protocol.
    1345             :  *
    1346             :  * \return The full path of the URI.
    1347             :  */
    1348           1 : std::string uri::path(bool encoded) const
    1349             : {
    1350           1 :     if(encoded)
    1351             :     {
    1352           2 :         std::string output;
    1353           1 :         bool first(true);
    1354           4 :         for(auto const segment : f_path)
    1355             :         {
    1356           3 :             if(first)
    1357             :             {
    1358           1 :                 first = false;
    1359             :             }
    1360             :             else
    1361             :             {
    1362           2 :                 output += '/';
    1363             :             }
    1364           3 :             output += urlencode(segment, "~");
    1365             :         }
    1366           1 :         return output;
    1367             :     }
    1368           0 :     return snapdev::join_strings(f_path, "/");
    1369             : }
    1370             : 
    1371             : 
    1372             : /** \brief Retrieve the number of folder names defined in the path.
    1373             :  *
    1374             :  * This function returns the number of folder names defined in the
    1375             :  * path. Each name can be retrieved with the path_folder() function.
    1376             :  *
    1377             :  * The function may return 0 if no folder name is available.
    1378             :  *
    1379             :  * \return The number of folder names available.
    1380             :  *
    1381             :  * \sa path_folder()
    1382             :  */
    1383           0 : int uri::path_count() const
    1384             : {
    1385           0 :     return f_path.size();
    1386             : }
    1387             : 
    1388             : 
    1389             : /** \brief Get a folder name from the path.
    1390             :  *
    1391             :  * This function is used to retrieve the name of a specific folder.
    1392             :  * This is useful when you make use of a folder name as a dynamic
    1393             :  * name. For example with a path such as "journal/george",
    1394             :  * path_folder_name(1); returns "george" which may be the name of
    1395             :  * the journal owner.
    1396             :  *
    1397             :  * When you use this function to retrieve dynamic entries, it is
    1398             :  * assumed that you do it after the path options were removed so a
    1399             :  * path such as "en/journal/george" would be changed to
    1400             :  * "journal/george" and path_folder_name(1); would still return
    1401             :  * "george".
    1402             :  *
    1403             :  * \exception edhttp_uri_exception_out_of_bounds
    1404             :  * This function raises this exception if the \p part parameter is
    1405             :  * outside the range of folder names available. \p part should be
    1406             :  * between 0 and path_count() - 1. If the path is empty, then this
    1407             :  * function cannot be called.
    1408             :  *
    1409             :  * \param[in] part  The index of the folder to retrieve.
    1410             :  *
    1411             :  * \return The folder name.
    1412             :  *
    1413             :  * \sa path_count();
    1414             :  */
    1415           0 : std::string uri::path_folder_name(int part) const
    1416             : {
    1417           0 :     if(static_cast<std::size_t>(part) >= f_path.size())
    1418             :     {
    1419             :         throw edhttp_uri_exception_out_of_bounds(
    1420             :               "no path section "
    1421           0 :             + std::to_string(part)
    1422           0 :             + " available (range 0 to "
    1423           0 :             + std::to_string(f_path.size())
    1424           0 :             + ")");
    1425             :     }
    1426           0 :     return f_path[part];
    1427             : }
    1428             : 
    1429             : 
    1430             : /** \brief The array of folder names.
    1431             :  *
    1432             :  * This function returns a reference to the array used to hold the
    1433             :  * folder names forming the URI path.
    1434             :  *
    1435             :  * \return A constant reference to the list of string forming the path.
    1436             :  */
    1437           0 : advgetopt::string_list_t const & uri::path_list() const
    1438             : {
    1439           0 :     return f_path;
    1440             : }
    1441             : 
    1442             : 
    1443             : /** \brief Set an option.
    1444             :  *
    1445             :  * This function is used to define the value of an option in a URI.
    1446             :  * Remember that options only work for URIs that are clearly marked
    1447             :  * as from this website.
    1448             :  *
    1449             :  * Setting the value to an empty string has the effect of deleting
    1450             :  * the given option. You may also call the unset_option() function.
    1451             :  *
    1452             :  * \param[in] name  The name of the option to set.
    1453             :  * \param[in] value  The new value for this option.
    1454             :  *
    1455             :  * \sa option();
    1456             :  * \sa unset_option();
    1457             :  */
    1458           0 : void uri::set_option(std::string const& name, std::string const& value)
    1459             : {
    1460           0 :     if(value.empty())
    1461             :     {
    1462           0 :         auto it(f_options.find(name));
    1463           0 :         if(it != f_options.end())
    1464             :         {
    1465           0 :             f_options.erase(it);
    1466             :         }
    1467             :     }
    1468             :     else
    1469             :     {
    1470           0 :         f_options[name] = value;
    1471             :     }
    1472           0 : }
    1473             : 
    1474             : /** \brief Remove the specified option.
    1475             :  *
    1476             :  * This function is used to remove (delete) an option from the list
    1477             :  * of options. For example, going to a page where the language is
    1478             :  * neutral, you probably want to remove the language option.
    1479             :  *
    1480             :  * \param[in] name  The name of the option to remove.
    1481             :  *
    1482             :  * \sa set_option();
    1483             :  */
    1484           0 : void uri::unset_option(std::string const & name)
    1485             : {
    1486           0 :     auto it(f_options.find(name));
    1487           0 :     if(it != f_options.end())
    1488             :     {
    1489           0 :         f_options.erase(it);
    1490             :     }
    1491           0 : }
    1492             : 
    1493             : 
    1494             : /** \brief Retrieve the value of the named option.
    1495             :  *
    1496             :  * This function retrieves the current value of the named option.
    1497             :  *
    1498             :  * If the option is not defined, then the function returns an empty
    1499             :  * string. The empty string always represents an undefined option.
    1500             :  *
    1501             :  * \param[in] name  The name of the option to retrieve.
    1502             :  *
    1503             :  * \return The value of the named option.
    1504             :  *
    1505             :  * \sa set_option();
    1506             :  */
    1507           0 : std::string uri::option(std::string const& name) const
    1508             : {
    1509           0 :     auto it(f_options.find(name));
    1510           0 :     if(it != f_options.end())
    1511             :     {
    1512           0 :         return it->second;
    1513             :     }
    1514           0 :     return std::string();
    1515             : }
    1516             : 
    1517             : 
    1518             : /** \brief Retrieve the number of currently defined options.
    1519             :  *
    1520             :  * This function returns the number of options that can be retrieved
    1521             :  * with the option() function using an index. If the function returns
    1522             :  * zero, then no options are defined.
    1523             :  *
    1524             :  * \return The number of options defined in this URI.
    1525             :  */
    1526           0 : int uri::option_count() const
    1527             : {
    1528           0 :     return f_options.size();
    1529             : }
    1530             : 
    1531             : 
    1532             : /** \brief Retrieve an option by index.
    1533             :  *
    1534             :  * This function allows you to retrieve the name and value of an option
    1535             :  * using its index. The index (\p part) must be a number between 0 and
    1536             :  * option_count() - 1.
    1537             :  *
    1538             :  * \param[in] part  The index of the option to retrieve.
    1539             :  * \param[out] name  The name of the option being retrieved.
    1540             :  *
    1541             :  * \return The value of the option being retrieved.
    1542             :  *
    1543             :  * \sa option();
    1544             :  * \sa option_count();
    1545             :  */
    1546           0 : std::string uri::option(int part, std::string & name) const
    1547             : {
    1548           0 :     if(static_cast<std::size_t>(part) >= f_options.size())
    1549             :     {
    1550             :         throw edhttp_uri_exception_out_of_bounds(
    1551             :               "no option "
    1552           0 :             + std::to_string(part)
    1553           0 :             + " available (range 0 to "
    1554           0 :             + std::to_string(f_options.size())
    1555           0 :             + ")");
    1556             :     }
    1557           0 :     auto it(f_options.begin());
    1558           0 :     std::advance(it, part);
    1559           0 :     name = it->first;
    1560           0 :     return it->second;
    1561             : }
    1562             : 
    1563             : 
    1564             : /** \brief Retrieve the map of options.
    1565             :  *
    1566             :  * This function returns the map of options so one can use the begin()
    1567             :  * and end() functions to go through the entire list without having to
    1568             :  * use the option() function.
    1569             :  *
    1570             :  * \return A constant reference to the map of options.
    1571             :  *
    1572             :  * \sa option();
    1573             :  */
    1574           0 : uri::uri_options_t const& uri::options_list() const
    1575             : {
    1576           0 :     return f_options;
    1577             : }
    1578             : 
    1579             : 
    1580             : /** \brief Set a query string option.
    1581             :  *
    1582             :  * This function is used to change the named query string with the
    1583             :  * specified value.
    1584             :  *
    1585             :  * A query string option with an empty string as a value is considered
    1586             :  * undefined and is not shown on the final URI. So setting an option to
    1587             :  * the empty string ("") is equivalent to unset_query_option().
    1588             :  *
    1589             :  * \param[in] name  The name of the query string option.
    1590             :  * \param[in] value  The value of the query string option.
    1591             :  */
    1592           0 : void uri::set_query_option(std::string const& name, std::string const& value)
    1593             : {
    1594           0 :     if(name.empty())
    1595             :     {
    1596             :         // this happens if the name was not defined in the configuration file
    1597           0 :         return;
    1598             :     }
    1599             : 
    1600             :     // TODO: see whether we currently use this feature, because it is rather
    1601             :     //       incorrect, it is possible to have an empty value in a query
    1602             :     //       string (i.e. "...?logout")
    1603             :     //
    1604             :     //       we should use unset_query_option() instead
    1605             :     //
    1606           0 :     if(value.empty())
    1607             :     {
    1608           0 :         auto it(f_query_strings.find(name));
    1609           0 :         if(it != f_query_strings.end())
    1610             :         {
    1611           0 :             f_query_strings.erase(it);
    1612             :         }
    1613             :     }
    1614             :     else
    1615             :     {
    1616           0 :         f_query_strings[name] = value;
    1617             :     }
    1618             : }
    1619             : 
    1620             : 
    1621             : /** \brief Unset the named query string option.
    1622             :  *
    1623             :  * This function ensures that the named query string option is deleted
    1624             :  * and thus will not appear in the URI.
    1625             :  *
    1626             :  * \param[in] name  The name of the option to delete.
    1627             :  */
    1628           0 : void uri::unset_query_option(std::string const& name)
    1629             : {
    1630           0 :     if(name.empty())
    1631             :     {
    1632             :         // this happens if the name was not defined in the configuration file
    1633           0 :         return;
    1634             :     }
    1635             : 
    1636           0 :     auto it(f_query_strings.find(name));
    1637           0 :     if(it != f_query_strings.end())
    1638             :     {
    1639           0 :         f_query_strings.erase(it);
    1640             :     }
    1641             : }
    1642             : 
    1643             : 
    1644             : /** \brief Set the query string.
    1645             :  *
    1646             :  * This function can be used to reset the query string to the
    1647             :  * parameters defined in this URI query string.
    1648             :  *
    1649             :  * The function does not clear all the existing query strings,
    1650             :  * it only replaces existing entries. This means also means that
    1651             :  * it does not detect whether the input includes the same option
    1652             :  * more than once and only the last one sticks.
    1653             :  *
    1654             :  * The query string variable names and data gets URL decoded.
    1655             :  *
    1656             :  * \warning
    1657             :  * This function does not clear the existing list of query
    1658             :  * string options.
    1659             :  *
    1660             :  * \param[in] uri_query_string  The query string to add to the existing data.
    1661             :  */
    1662           0 : void uri::set_query_string(std::string const & uri_query_string)
    1663             : {
    1664           0 :     advgetopt::string_list_t value_pairs;
    1665           0 :     advgetopt::split_string(uri_query_string, value_pairs, {"&"});
    1666           0 :     for(auto const & name_value : value_pairs)
    1667             :     {
    1668           0 :         std::string::size_type const pos(name_value.find('='));
    1669           0 :         if(pos == std::string::npos)
    1670             :         {
    1671             :             // no value
    1672           0 :             f_query_strings[urldecode(name_value)] = std::string();
    1673             :         }
    1674           0 :         else if(pos == 0)
    1675             :         {
    1676             :             // name is missing, use "*" instead
    1677           0 :             f_query_strings["*"] = urldecode(name_value.substr(1));
    1678             :         }
    1679             :         else
    1680             :         {
    1681           0 :             f_query_strings[urldecode(name_value.substr(0, pos))] = urldecode(name_value.substr(pos + 1));
    1682             :         }
    1683             :     }
    1684           0 : }
    1685             : 
    1686             : 
    1687             : /** \brief Clear all query option strings.
    1688             :  *
    1689             :  * This is useful if you want to "start fresh" with the base URI.
    1690             :  */
    1691           0 : void uri::clear_query_options()
    1692             : {
    1693           0 :     f_query_strings.clear();
    1694           0 : }
    1695             : 
    1696             : 
    1697             : /** \brief Generate the query string.
    1698             :  *
    1699             :  * This function goes through the list of defined query string options
    1700             :  * and builds the resulting query string to generate the final URI.
    1701             :  *
    1702             :  * The result is already URL ecoded since you would otherwise not know
    1703             :  * where/which equal and ampersand are legal.
    1704             :  *
    1705             :  * \return The URI query string.
    1706             :  */
    1707           0 : std::string uri::query_string() const
    1708             : {
    1709           0 :     std::string result;
    1710           0 :     for(auto const & name_value : f_query_strings)
    1711             :     {
    1712           0 :         if(!result.empty())
    1713             :         {
    1714           0 :             result += '&';
    1715             :         }
    1716           0 :         result += urlencode(name_value.first);
    1717           0 :         if(!name_value.second.empty())
    1718             :         {
    1719             :             // add the value only if not empty
    1720           0 :             result += '=';
    1721             :             // we now support commas in URIs because... well... it is
    1722             :             // common and it won't break anything
    1723             :             //
    1724           0 :             result += urlencode(name_value.second, ",");
    1725             :         }
    1726             :     }
    1727           0 :     return result;
    1728             : }
    1729             : 
    1730             : 
    1731             : /** \brief Retrieve whether a query option is defined.
    1732             :  *
    1733             :  * This function returns true if a query option is defined. Note that
    1734             :  * an option may be the empty string ("") and that cannot be distinguish
    1735             :  * from the empty string ("") returned when the query_option() function
    1736             :  * is used against an undefined option.
    1737             :  *
    1738             :  * \param[in] name  The name of the option to query.
    1739             :  *
    1740             :  * \return true when the has_query_option() is defined.
    1741             :  *
    1742             :  * \sa query_option();
    1743             :  */
    1744           0 : bool uri::has_query_option(std::string const & name) const
    1745             : {
    1746           0 :     if(name.empty())
    1747             :     {
    1748             :         // this happens if the name was not defined in the configuration file
    1749           0 :         return false;
    1750             :     }
    1751             : 
    1752           0 :     return f_query_strings.find(name) != f_query_strings.end();
    1753             : }
    1754             : 
    1755             : /** \brief Retrieve a query string option.
    1756             :  *
    1757             :  * This function can be used to retrieve the current value of a query
    1758             :  * string option.
    1759             :  *
    1760             :  * Note that you cannot know whether an option is defined using this
    1761             :  * function since the function returns an empty string whether it is
    1762             :  * empty or undefined. Instead, use the has_query_option() function
    1763             :  * to determine whether an option is defined.
    1764             :  *
    1765             :  * \param[in] name  Name of the query string option to return.
    1766             :  *
    1767             :  * \sa has_query_option();
    1768             :  */
    1769           0 : std::string uri::query_option(std::string const & name) const
    1770             : {
    1771           0 :     if(!name.empty())
    1772             :     {
    1773           0 :         auto const it(f_query_strings.find(name));
    1774           0 :         if(it != f_query_strings.end())
    1775             :         {
    1776           0 :             return it->second;
    1777             :         }
    1778             :     }
    1779             : 
    1780           0 :     return std::string();
    1781             : }
    1782             : 
    1783             : /** \brief Return the number of options are defined in the query string.
    1784             :  *
    1785             :  * This function returns the number of options currently defined in the
    1786             :  * query string. This is useful to go over the list of options with the
    1787             :  * query_option(int part, QString& name) function.
    1788             :  *
    1789             :  * \return The number of query string options currently defined.
    1790             :  */
    1791           0 : int uri::query_option_count() const
    1792             : {
    1793           0 :     return f_query_strings.size();
    1794             : }
    1795             : 
    1796             : /** \brief Retrieve an option specifying its index.
    1797             :  *
    1798             :  * This function returns the name and value of the option defined at
    1799             :  * index \p part.
    1800             :  *
    1801             :  * The index must be between 0 and the number of options available minus
    1802             :  * 1 (i.e. query_options_count() - 1).
    1803             :  *
    1804             :  * \param[in] part  The index of the query string option to retrieve.
    1805             :  * \param[out] name  The name of the option at that index.
    1806             :  *
    1807             :  * \return The value of the option at that index.
    1808             :  *
    1809             :  * \sa query_option_count();
    1810             :  */
    1811           0 : std::string uri::query_option(int part, std::string& name) const
    1812             : {
    1813           0 :     if(static_cast<std::size_t>(part) >= f_query_strings.size())
    1814             :     {
    1815             :         throw edhttp_uri_exception_out_of_bounds(
    1816             :               "query-option "
    1817           0 :             + std::to_string(part)
    1818           0 :             + " does not exist (range 0 to "
    1819           0 :             + std::to_string(f_query_strings.size())
    1820           0 :             + ")");
    1821             :     }
    1822           0 :     auto it(f_query_strings.begin());
    1823           0 :     std::advance(it, part);
    1824           0 :     name = it->first;
    1825           0 :     return it->second;
    1826             : }
    1827             : 
    1828             : /** \brief Return the complete map of query strings.
    1829             :  *
    1830             :  * This function returns a reference to the internal map of query strings.
    1831             :  * This is useful to use the begin()/end() and other functions to go through
    1832             :  * the map.
    1833             :  *
    1834             :  * \return A constant reference to the internal query string map.
    1835             :  */
    1836           0 : const uri::uri_options_t& uri::query_string_list() const
    1837             : {
    1838           0 :     return f_query_strings;
    1839             : }
    1840             : 
    1841             : 
    1842             : /** \brief Define the anchor for this URI.
    1843             :  *
    1844             :  * This function is used to setup the anchor used in this URI.
    1845             :  *
    1846             :  * An anchor can be defined only if you don't plan to make use of
    1847             :  * the hash bang feature (see get_uri() for more info) since both
    1848             :  * features make use of the same technical option.
    1849             :  *
    1850             :  * The \p anchor parameter cannot include a '#' character.
    1851             :  *
    1852             :  * \note
    1853             :  * The anchor string can start with a bang (!) since it is legal
    1854             :  * in an anchor. If you are not using the hash bang feature, it
    1855             :  * is fine, although it may confuse some search engines.
    1856             :  *
    1857             :  * \param[in] uri_anchor  The new value for the anchor.
    1858             :  *
    1859             :  * \sa get_uri()
    1860             :  */
    1861           0 : void uri::set_anchor(std::string const & uri_anchor)
    1862             : {
    1863           0 :     if(uri_anchor.find('#') != std::string::npos)
    1864             :     {
    1865             :         throw uri_exception_invalid_parameter(
    1866             :               "anchor string \""
    1867           0 :             + uri_anchor
    1868           0 :             + "\" cannot include a '#' character");
    1869             :     }
    1870           0 :     f_anchor = uri_anchor;
    1871           0 : }
    1872             : 
    1873             : 
    1874             : /** \brief Retrieve the current anchor.
    1875             :  *
    1876             :  * This function returns a copy of the current anchor. The empty string
    1877             :  * represents the fact that the anchor is not defined.
    1878             :  *
    1879             :  * \return A constant reference to the anchor.
    1880             :  */
    1881           0 : std::string const & uri::anchor() const
    1882             : {
    1883           0 :     return f_anchor;
    1884             : }
    1885             : 
    1886             : 
    1887             : /** \brief Compare two URIs against each other.
    1888             :  *
    1889             :  * This function compares two URIs and returns true if they are
    1890             :  * equal. The URIs are tested using what the get_uri() function
    1891             :  * generates which means not 100% of the information included
    1892             :  * in the Snap URI object.
    1893             :  *
    1894             :  * \param[in] rhs  The right handside to compare this against.
    1895             :  *
    1896             :  * \return true when both URIs are equal.
    1897             :  */
    1898           0 : bool uri::operator == (const uri& rhs) const
    1899             : {
    1900           0 :     return get_uri() == rhs.get_uri();
    1901             : }
    1902             : 
    1903             : 
    1904             : /** \brief Compare two URIs against each other.
    1905             :  *
    1906             :  * This function compares two URIs and returns true if they are
    1907             :  * not equal. The URIs are tested using what the get_uri() function
    1908             :  * generates which means not 100% of the information included
    1909             :  * in the Snap URI object.
    1910             :  *
    1911             :  * \param[in] rhs  The right handside to compare this against.
    1912             :  *
    1913             :  * \return true when both URIs differ.
    1914             :  */
    1915           0 : bool uri::operator != (uri const & rhs) const
    1916             : {
    1917           0 :     return !operator == (rhs);
    1918             : }
    1919             : 
    1920             : 
    1921             : /** \brief Compare two URIs against each other.
    1922             :  *
    1923             :  * This function compares two URIs and returns true if this is
    1924             :  * smaller than the \p rhs parameter. The URIs are tested using
    1925             :  * what the get_uri() function generates which means not 100% of
    1926             :  * the information included in the Snap URI object.
    1927             :  *
    1928             :  * \param[in] rhs  The right handside to compare this against.
    1929             :  *
    1930             :  * \return true when this is smaller than rhs.
    1931             :  */
    1932           0 : bool uri::operator < (uri const & rhs) const
    1933             : {
    1934           0 :     return get_uri() < rhs.get_uri();
    1935             : }
    1936             : 
    1937             : 
    1938             : /** \brief Compare two URIs against each other.
    1939             :  *
    1940             :  * This function compares two URIs and returns true if this is
    1941             :  * smaller or equal to \p rhs. The URIs are tested using
    1942             :  * what the get_uri() function generates which means not 100% of
    1943             :  * the information included in the Snap URI object.
    1944             :  *
    1945             :  * \param[in] rhs  The right handside to compare this against.
    1946             :  *
    1947             :  * \return true when this is smaller or equal to rhs.
    1948             :  */
    1949           0 : bool uri::operator <= (uri const & rhs) const
    1950             : {
    1951           0 :     return get_uri() <= rhs.get_uri();
    1952             : }
    1953             : 
    1954             : 
    1955             : /** \brief Compare two URIs against each other.
    1956             :  *
    1957             :  * This function compares two URIs and returns true if this is
    1958             :  * larger than the \p rhs parameter. The URIs are tested using
    1959             :  * what the get_uri() function generates which means not 100% of
    1960             :  * the information included in the Snap URI object.
    1961             :  *
    1962             :  * \param[in] rhs  The right handside to compare this against.
    1963             :  *
    1964             :  * \return true when this is larger than rhs.
    1965             :  */
    1966           0 : bool uri::operator > (uri const & rhs) const
    1967             : {
    1968           0 :     return !operator <= (rhs);
    1969             : }
    1970             : 
    1971             : 
    1972             : /** \brief Compare two URIs against each other.
    1973             :  *
    1974             :  * This function compares two URIs and returns true if this is
    1975             :  * larger or equal to \p rhs. The URIs are tested using
    1976             :  * what the get_uri() function generates which means not 100% of
    1977             :  * the information included in the Snap URI object.
    1978             :  *
    1979             :  * \param[in] rhs  The right handside to compare this against.
    1980             :  *
    1981             :  * \return true when this is larger or equal to rhs.
    1982             :  */
    1983           0 : bool uri::operator >= (uri const & rhs) const
    1984             : {
    1985           0 :     return !operator < (rhs);
    1986             : }
    1987             : 
    1988             : 
    1989             : /** \brief Encode a URI so it is valid for HTTP.
    1990             :  *
    1991             :  * This function encodes all the characters that need to be encoded
    1992             :  * for a URI to be valid for the HTTP protocol.
    1993             :  *
    1994             :  * WARNING: This encodes the entire string. Remember that the string
    1995             :  * cannot include characters such as :, /, @, ?, =, &, #, ~ which at
    1996             :  * times appear in fully qualified URIs. Instead, it must be built
    1997             :  * piece by piece.
    1998             :  *
    1999             :  * Note that we do not encode underscores.
    2000             :  *
    2001             :  * The \p accepted parameter can be used to avoid converting certain
    2002             :  * characters (such as / in an anchor and ~ in a path).
    2003             :  *
    2004             :  * \param[in] in  URI to encode.
    2005             :  * \param[in] accepted  Extra characters accepted and not encoded. This
    2006             :  * parameter cannot be set to nullptr. Use "" instead if no extra characters
    2007             :  * are accepted.
    2008             :  *
    2009             :  * \return The encoded URI, it may be equal to the input.
    2010             :  */
    2011           3 : std::string uri::urlencode(std::string const & in, char const * accepted)
    2012             : {
    2013           3 :     std::string encoded;
    2014             : 
    2015          11 :     for(const char *u(in.data()); *u != '\0'; ++u)
    2016             :     {
    2017           8 :         if((*u >= 'A' && *u <= 'Z')
    2018           8 :         || (*u >= 'a' && *u <= 'z')
    2019           0 :         || (*u >= '0' && *u <= '9')
    2020           0 :         || *u == '.' || *u == '-' || *u == '_'
    2021           0 :         || strchr(accepted, *u) != nullptr)
    2022             :         {
    2023           8 :             encoded += *u;
    2024             :         }
    2025             :         else
    2026             :         {
    2027             :             // note that we are encoding space as %20 and not +
    2028             :             // because the + should not be supported anymore
    2029           0 :             encoded += '%';
    2030           0 :             encoded += snapdev::int_to_hex(*u, true, 2);
    2031             :         }
    2032             :     }
    2033             : 
    2034           3 :     return encoded;
    2035             : }
    2036             : 
    2037             : 
    2038             : /** \brief Decode a URI so it can be used internally.
    2039             :  *
    2040             :  * This function decodes all the characters that need to be decoded
    2041             :  * in a URI. In general, this is done to use URI components in a
    2042             :  * query string, although it needs to be applied to the entire URI.
    2043             :  *
    2044             :  * The input is expected to be a valid ASCII string (i.e. A-Z,
    2045             :  * 0-9, ., %, _, -, ~, and ! characters.) To enter UTF-8 characters,
    2046             :  * use the % and UTF-8 encoded characters. At this point we do not
    2047             :  * support the U+ syntax which MS Internet Explorer supports. It may
    2048             :  * be necessary to add that support at some point.
    2049             :  *
    2050             :  * \exception uri_exception_invalid_uri
    2051             :  * This exception is raised if an invalid character is found in the
    2052             :  * input URI. This means the URI includes a character that should
    2053             :  * have been encoded or a %XX is not a valid hexadecimal number.
    2054             :  *
    2055             :  * \param[in] in  The URI to encode.
    2056             :  * \param[in] relax  Relax the syntax and accept otherwise invalid codes.
    2057             :  *
    2058             :  * \return The decoded URI, it may be equal to the input.
    2059             :  */
    2060          19 : std::string uri::urldecode(std::string const & in, bool relax)
    2061             : {
    2062             :     // Note that if the URI is properly encoded, then latin1 == UTF-8
    2063             : 
    2064          19 :     std::string out;
    2065          75 :     for(char const * u(in.c_str()); *u != '\0'; ++u)
    2066             :     {
    2067          56 :         if(*u == '+')
    2068             :         {
    2069           0 :             out += ' ';
    2070             :         }
    2071          56 :         else if(*u == '%')
    2072             :         {
    2073           0 :             ++u;
    2074             :             char c;
    2075           0 :             if(u[0] >= '0' && u[0] <= '9')
    2076             :             {
    2077           0 :                 c = static_cast<char>((u[0] - '0') * 16);
    2078             :             }
    2079           0 :             else if(u[0] >= 'A' && u[0] <= 'F')
    2080             :             {
    2081           0 :                 c = static_cast<char>((u[0] - ('A' - 10)) * 16);
    2082             :             }
    2083           0 :             else if(u[0] >= 'a' && u[0] <= 'f')
    2084             :             {
    2085           0 :                 c = static_cast<char>((u[0] - ('a' - 10)) * 16);
    2086             :             }
    2087             :             else
    2088             :             {
    2089           0 :                 if(!relax)
    2090             :                 {
    2091             : //#ifdef DEBUG
    2092             : //SNAP_LOG_TRACE() << "url decode?! [" << uri << "]";
    2093             : //#endif
    2094             :                     throw uri_exception_invalid_uri(
    2095             :                           "urldecode(\""
    2096           0 :                         + in
    2097           0 :                         + "\", "
    2098           0 :                         + (relax ? "true" : "false")
    2099           0 :                         + ") failed because of an invalid %xx character (digits are "
    2100           0 :                         + std::to_string(u[0])
    2101           0 :                         + " / "
    2102           0 :                         + std::to_string(u[1])
    2103           0 :                         + ")");
    2104             :                 }
    2105             :                 // use the % as is
    2106           0 :                 out += '%';
    2107           0 :                 --u;
    2108           0 :                 continue;
    2109             :             }
    2110           0 :             if(u[1] >= '0' && u[1] <= '9')
    2111             :             {
    2112           0 :                 c = static_cast<char>(c + u[1] - '0');
    2113             :             }
    2114           0 :             else if(u[1] >= 'A' && u[1] <= 'F')
    2115             :             {
    2116           0 :                 c = static_cast<char>(c + u[1] - ('A' - 10));
    2117             :             }
    2118           0 :             else if(u[1] >= 'a' && u[1] <= 'f')
    2119             :             {
    2120           0 :                 c = static_cast<char>(c + u[1] - ('a' - 10));
    2121             :             }
    2122             :             else
    2123             :             {
    2124           0 :                 if(!relax)
    2125             :                 {
    2126             : //#ifdef DEBUG
    2127             : //SNAP_LOG_TRACE() << "url decode?! [" << in << "] (2)";
    2128             : //#endif
    2129             :                     throw uri_exception_invalid_uri(
    2130             :                           "urldecode(\""
    2131           0 :                          + in
    2132           0 :                          + "\", "
    2133           0 :                          + (relax ? "true" : "false")
    2134           0 :                          + ") failed because of an invalid %xx character (digits are "
    2135           0 :                          + std::to_string(static_cast<int>(u[0]))
    2136           0 :                          + " / "
    2137           0 :                          + std::to_string(static_cast<int>(u[1]))
    2138           0 :                          + ")");
    2139             :                 }
    2140             :                 // use the % as is
    2141           0 :                 out += c;
    2142           0 :                 --u;
    2143           0 :                 continue;
    2144             :             }
    2145             :             // skip one of the two characters here, the other
    2146             :             // is skipped in the for() statement
    2147           0 :             ++u;
    2148           0 :             out += c;
    2149             :         }
    2150          56 :         else if(relax
    2151             : 
    2152             :                 // these are the only characters allowed by the RFC
    2153          56 :                 || (*u >= 'A' && *u <= 'Z')
    2154          56 :                 || (*u >= 'a' && *u <= 'z')
    2155           4 :                 || (*u >= '0' && *u <= '9')
    2156           4 :                 || *u == '.' || *u == '-'
    2157           0 :                 || *u == '/' || *u == '_'
    2158             : 
    2159             :                 // not legal in a URI considered 100% valid but most
    2160             :                 // systems accept the following as is so we do too
    2161           0 :                 || *u == '~' || *u == '!'
    2162           0 :                 || *u == '@' || *u == ','
    2163           0 :                 || *u == ';' || *u == ':'
    2164           0 :                 || *u == '(' || *u == ')'
    2165             :         )
    2166             :         {
    2167             :             // The tilde (~), when used, is often to indicate a user a la
    2168             :             // Unix (~<name>/... or just ~/... for the current user.)
    2169             :             //
    2170             :             // The exclamation point (!) is most often used with the hash
    2171             :             // bang; if that appears in a query string variable, then we
    2172             :             // need to accept at least the exclamation point (the hash has
    2173             :             // to be encoded no matter what.)
    2174             :             //
    2175             :             // The at sign (@) is used in email addresses.
    2176             :             //
    2177             :             // The comma (,) is often used to separate elements; for example
    2178             :             // the paging support uses "page=p3,s30" for show page 3 with
    2179             :             // 30 elements per page.
    2180             :             //
    2181             :             // The semi-colon (;) may appear if you have an HTML entity in
    2182             :             // a query string (i.e. "...?value=this+%26amp;+that".)
    2183             :             //
    2184             :             // The colon (:) can be used to separate values within a
    2185             :             // parameter when the comma is not appropriate.
    2186             :             //
    2187          56 :             out += *u;
    2188             :         }
    2189             :         else
    2190             :         {
    2191             : //#ifdef DEBUG
    2192             : //SNAP_LOG_TRACE() << "url decode?! found an invalid character [" << in << "] (3)";
    2193             : //#endif
    2194             :             throw uri_exception_invalid_uri(
    2195             :                     "urldecode(\""
    2196           0 :                   + in
    2197           0 :                   + "\", "
    2198           0 :                   + (relax ? "true" : "false")
    2199           0 :                   + ") failed because of an invalid character ("
    2200           0 :                   + std::to_string(static_cast<int>(*u))
    2201           0 :                   + ")");
    2202             :         }
    2203             :     }
    2204             : 
    2205          19 :     return out;
    2206             : }
    2207             : 
    2208             : 
    2209             : /** \brief Return the port corresponding to a protocol.
    2210             :  *
    2211             :  * This function determines what port corresponds to a given protocol
    2212             :  * assuming that the default is being used.
    2213             :  *
    2214             :  * It will handle common protocols internally, others make use of the
    2215             :  * /etc/services file via the services function calls.
    2216             :  *
    2217             :  * \param[in] protocol  The protocol to convert to a port number.
    2218             :  *
    2219             :  * \return The corresponding port number or -1 if the function cannot
    2220             :  *         determine that number.
    2221             :  */
    2222           4 : int uri::protocol_to_port(std::string const & protocol)
    2223             : {
    2224           4 :     if(protocol == g_name_edhttp_protocol_http) // 99% so put it first
    2225             :     {
    2226           4 :         return 80;
    2227             :     }
    2228           0 :     if(protocol == g_name_edhttp_protocol_https) // 0.9% so put it next
    2229             :     {
    2230           0 :         return 443;
    2231             :     }
    2232           0 :     if(protocol == g_name_edhttp_protocol_ftp)
    2233             :     {
    2234           0 :         return 21;
    2235             :     }
    2236           0 :     if(protocol == g_name_edhttp_protocol_ssh)
    2237             :     {
    2238           0 :         return 22;
    2239             :     }
    2240           0 :     if(protocol == g_name_edhttp_protocol_telnet)
    2241             :     {
    2242           0 :         return 23;
    2243             :     }
    2244           0 :     if(protocol == g_name_edhttp_protocol_smtp)
    2245             :     {
    2246           0 :         return 25;
    2247             :     }
    2248           0 :     if(protocol == g_name_edhttp_protocol_gopher)
    2249             :     {
    2250           0 :         return 70;
    2251             :     }
    2252             : 
    2253             :     // not a common service, ask the system... (probably less than 0.01%)
    2254           0 :     servent * s(getservbyname(protocol.c_str(), g_name_edhttp_protocol_tcp));
    2255           0 :     if(s == nullptr)
    2256             :     {
    2257           0 :         s = getservbyname(protocol.c_str(), g_name_edhttp_protocol_udp);
    2258           0 :         if(s == nullptr)
    2259             :         {
    2260             :             // we don't know...
    2261           0 :             return -1;
    2262             :         }
    2263             :     }
    2264           0 :     return s->s_port;
    2265             : }
    2266             : 
    2267             : 
    2268             : 
    2269             : } // namespace edhttp
    2270             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13