LCOV - code coverage report
Current view: top level - edhttp - uri.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 182 681 26.7 %
Date: 2022-07-09 10:44:38 Functions: 13 65 20.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Copyright (c) 2011-2022  Made to Order Software Corp.  All Rights Reserved
       2             : //
       3             : // https://snapwebsites.org/project/edhttp
       4             : // contact@m2osw.com
       5             : //
       6             : // This program is free software: you can redistribute it and/or modify
       7             : // it under the terms of the GNU General Public License as published by
       8             : // the Free Software Foundation, either version 3 of the License, or
       9             : // (at your option) any later version.
      10             : //
      11             : // This program is distributed in the hope that it will be useful,
      12             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             : // GNU General Public License for more details.
      15             : //
      16             : // You should have received a copy of the GNU General Public License
      17             : // along with this program.  If not, see <https://www.gnu.org/licenses/>.
      18             : 
      19             : // self
      20             : //
      21             : #include    "edhttp/uri.h"
      22             : 
      23             : #include    <edhttp/exception.h>
      24             : 
      25             : 
      26             : // snaplogger
      27             : //
      28             : #include    <snaplogger/message.h>
      29             : 
      30             : 
      31             : // snapdev
      32             : //
      33             : #include    <snapdev/hexadecimal_string.h>
      34             : #include    <snapdev/join_strings.h>
      35             : #include    <snapdev/not_used.h>
      36             : #include    <snapdev/safe_assert.h>
      37             : #include    <snapdev/tokenize_string.h>
      38             : 
      39             : 
      40             : // libaddr
      41             : //
      42             : #include    <libaddr/addr_parser.h>
      43             : 
      44             : 
      45             : // libtld
      46             : //
      47             : #include    <libtld/tld.h>
      48             : 
      49             : 
      50             : // C++
      51             : //
      52             : #include    <cstring>
      53             : 
      54             : 
      55             : // C
      56             : //
      57             : #include    <netdb.h>
      58             : 
      59             : 
      60             : // last include
      61             : //
      62             : #include    <snapdev/poison.h>
      63             : 
      64             : 
      65             : 
      66             : 
      67             : namespace edhttp
      68             : {
      69             : 
      70             : 
      71             : 
      72             : /** \brief This function intializes a default Snap URI object.
      73             :  *
      74             :  * Initialize a default Snap URI object.
      75             :  *
      76             :  * By default, the scheme is set to HTTP and everything else is set to
      77             :  * empty. This also means the original URI is set to empty (and stays that
      78             :  * way unless you later call set_uri() with a valid URI.)
      79             :  *
      80             :  * \sa set_uri()
      81             :  * \sa set_scheme()
      82             :  * \sa set_domain()
      83             :  * \sa set_path()
      84             :  * \sa set_option()
      85             :  * \sa set_query_string()
      86             :  * \sa set_anchor()
      87             :  */
      88          81 : uri::uri()
      89             : {
      90          81 : }
      91             : 
      92             : /** \brief Set the URI to the specified string.
      93             :  *
      94             :  * This function sets the URI to the specified string. The parsing
      95             :  * is the same as in the set_uri() function.
      96             :  *
      97             :  * \todo
      98             :  * Should this function throw if the URI is considered invalid?
      99             :  *
     100             :  * \param[in] u  The URI to assign to this Snap URI object.
     101             :  * \param[in] accept_path  Whether to accept path like URIs (such as
     102             :  * "file:///<path>").
     103             :  *
     104             :  * \sa set_uri()
     105             :  */
     106           4 : uri::uri(std::string const & u, bool accept_path)
     107             : {
     108           4 :     if(!set_uri(u, accept_path))
     109             :     {
     110             :         // TBD: should we throw if set_uri() returns false?
     111           0 :         SNAP_LOG_ERROR
     112             :             << "URI \""
     113             :             << u
     114             :             << "\" is considered invalid."
     115             :             << SNAP_LOG_SEND;
     116             :     }
     117           4 : }
     118             : 
     119             : 
     120             : /** \brief Clean up the URI.
     121             :  *
     122             :  * The destructor clears the password variable if set.
     123             :  *
     124             :  * \note
     125             :  * This is probably very much useless since many other functions make copies
     126             :  * of it and thus the value is likely still available somewhere in the process
     127             :  * memory.
     128             :  */
     129         170 : uri::~uri()
     130             : {
     131          85 :     if(!f_password.empty())
     132             :     {
     133             :         // clear for safety reasons
     134             :         //
     135           0 :         memset(f_password.data(), 0, f_password.length());
     136             :     }
     137          85 : }
     138             : 
     139             : 
     140             : /** \brief Replace the URI of this object.
     141             :  *
     142             :  * This function replaces the current object information with the specified
     143             :  * \p str data.
     144             :  *
     145             :  * Before calling this function YOU must force a URI encoding if the
     146             :  * URI is not yet encoded.
     147             :  *
     148             :  * Anything wrong in the syntax and the function returns false. Wrong
     149             :  * means empty entries, invalid encoding sequence, a bare IP address
     150             :  * when the \p accept_ip is false, etc. The function sets the
     151             :  * last error message accordingly.
     152             :  *
     153             :  * If the function returns false, you can retrieve an error message
     154             :  * with the get_last_error_message() function.
     155             :  *
     156             :  * \todo
     157             :  * A the moment, the RFC is not followed. We should verify the characters
     158             :  * of each element are considered legal for that location.
     159             :  *
     160             :  * \sa
     161             :  * https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
     162             :  *
     163             :  * \param[in] str  The new URI to replace all the current data of this object.
     164             :  * \param[in] accept_path  Whether to accept path like URIs (such as
     165             :  * "file:///<path>").
     166             :  * \param[in] accept_ip  Whether a bare IP address is acceptable.
     167             :  *
     168             :  * \return false if the URI could not be parsed (in which case nothing's
     169             :  * changed in the object); true otherwise
     170             :  *
     171             :  * \sa get_last_error_message()
     172             :  */
     173          85 : bool uri::set_uri(
     174             :           std::string const & str
     175             :         , bool accept_path
     176             :         , bool accept_ip)
     177             : {
     178          85 :     char const * u(str.c_str());
     179             : 
     180             :     // retrieve the scheme
     181             :     //
     182          85 :     char const * s(u);
     183         955 :     while(*u != '\0' && *u != ':')
     184             :     {
     185         435 :         ++u;
     186             :     }
     187          85 :     if(u - s < 1 || *u == '\0' || u[1] != '/' || u[2] != '/')
     188             :     {
     189             :         // scheme is not followed by :// or is an empty string
     190             :         //
     191             :         // (TBD: add support for mailto:...?)
     192             :         //
     193          33 :         f_last_error_message = "scheme not followed by \"://\".";
     194          33 :         return false;
     195             :     }
     196         104 :     std::string const uri_scheme(s, u - s);
     197             : 
     198             :     // skip the ://
     199             :     //
     200          52 :     u += 3;
     201             : 
     202         104 :     std::string username;
     203         104 :     std::string password;
     204         104 :     advgetopt::string_list_t sub_domain_names;
     205         104 :     std::string domain_name;
     206         104 :     std::string tld;
     207          52 :     int port(scheme_to_port(uri_scheme));
     208             : 
     209          52 :     if(*u != '/'
     210           3 :     || !accept_path)
     211             :     {
     212             :         // retrieve the sub-domains and domain parts
     213             :         // we may also discover a name, password, and port
     214             :         //
     215          50 :         char const * colon1(nullptr);
     216          50 :         char const * colon2(nullptr);
     217          50 :         char const * at(nullptr);
     218         611 :         for(s = u; *u != '\0' && *u != '/'; ++u)
     219             :         {
     220         561 :             if(*u == ':')
     221             :             {
     222           0 :                 if(colon1 == nullptr)
     223             :                 {
     224           0 :                     colon1 = u;
     225             :                 }
     226             :                 else
     227             :                 {
     228           0 :                     if(at != nullptr)
     229             :                     {
     230           0 :                         if(colon2 != nullptr)
     231             :                         {
     232           0 :                             f_last_error_message = "more than one ':' in the domain name segment (after an '@').";
     233           0 :                             return false;
     234             :                         }
     235           0 :                         colon2 = u;
     236             :                     }
     237             :                     else
     238             :                     {
     239           0 :                         f_last_error_message = "more than one ':' without an '@' character.";
     240           0 :                         return false;
     241             :                     }
     242             :                 }
     243             :             }
     244         561 :             if(*u == '@')
     245             :             {
     246           0 :                 if(at != nullptr)
     247             :                 {
     248             :                     // we cannot have more than one @ character that wasn't escaped
     249             :                     //
     250           0 :                     f_last_error_message = "more than one '@' character found.";
     251           0 :                     return false;
     252             :                 }
     253           0 :                 at = u;
     254             :             }
     255             :         }
     256             :         // without an at (@) colon1 indicates a port
     257             :         //
     258          50 :         if(at == nullptr && colon1 != nullptr)
     259             :         {
     260           0 :             snapdev::SAFE_ASSERT(colon2 == nullptr, "colon2 is not nullptr when at is nullptr?");
     261           0 :             colon2 = colon1;
     262           0 :             colon1 = nullptr;
     263             :         }
     264             : 
     265          92 :         std::string full_domain_name;
     266             : 
     267             :         // retrieve the data
     268             :         //
     269          50 :         if(colon1 != nullptr)
     270             :         {
     271           0 :             snapdev::SAFE_ASSERT(at != nullptr, "missing '@' when colon1 is set.");
     272           0 :             username.insert(0, s, colon1 - s);
     273           0 :             s = colon1 + 1;
     274             :         }
     275          50 :         if(at != nullptr)
     276             :         {
     277           0 :             password.insert(0, s, at - s);
     278           0 :             s = at + 1;
     279             :         }
     280          50 :         if(colon2 != nullptr)
     281             :         {
     282           0 :             full_domain_name.insert(0, s, colon2 - s);
     283           0 :             char const * p(colon2 + 1);
     284           0 :             if(p == u)
     285             :             {
     286             :                 // empty port entries are considered invalid
     287             :                 //
     288           0 :                 f_last_error_message = "port cannot be an empty string.";
     289           0 :                 return false;
     290             :             }
     291           0 :             port = 0;  // Reset port.
     292           0 :             for(; p < u; ++p)
     293             :             {
     294           0 :                 char const d(*p);
     295           0 :                 if(d < '0' || d > '9')
     296             :                 {
     297             :                     // ports only accept digits
     298             :                     //
     299           0 :                     f_last_error_message = "port must be a valid decimal number.";
     300           0 :                     return false;
     301             :                 }
     302           0 :                 port = port * 10 + d - '0';
     303           0 :                 if(port > 65535)
     304             :                 {
     305             :                     // port overflow
     306             :                     //
     307           0 :                     f_last_error_message = "port must be between 0 and 65536.";
     308           0 :                     return false;
     309             :                 }
     310             :             }
     311             :         }
     312             :         else
     313             :         {
     314          50 :             full_domain_name.insert(0, s, u - s);
     315             :         }
     316             : 
     317             :         // verify that there is a domain
     318             :         //
     319          50 :         if(full_domain_name.empty())
     320             :         {
     321           1 :             f_last_error_message = "a domain name is required.";
     322           1 :             return false;
     323             :         }
     324             : 
     325             :         // force a username AND password or neither
     326             :         //
     327          49 :         if(username.empty() ^ password.empty())
     328             :         {
     329           0 :             f_last_error_message = "username and password must both be defined (or define neither).";
     330           0 :             return false;
     331             :         }
     332             : 
     333             :         // break-up the domain in sub-domains, base domain, and TLD
     334             :         //
     335          49 :         if(!process_domain(full_domain_name, sub_domain_names, domain_name, tld))
     336             :         {
     337           9 :             if(!accept_ip)
     338             :             {
     339          14 :                 f_last_error_message =
     340             :                       "could not verify domain name \""
     341          14 :                     + full_domain_name
     342          21 :                     + "\".";
     343           7 :                 return false;
     344             :             }
     345             : 
     346             :             // prevent lookup (we want to verify that it is an IP)
     347             :             //
     348           4 :             addr::addr_parser p;
     349           2 :             p.set_allow(addr::allow_t::ALLOW_REQUIRED_ADDRESS, true);
     350           2 :             p.set_allow(addr::allow_t::ALLOW_ADDRESS_LOOKUP, false);
     351           2 :             p.set_allow(addr::allow_t::ALLOW_PORT, false);
     352           2 :             p.set_protocol(IPPROTO_TCP); // TODO: better manage this issue...
     353           4 :             addr::addr_range::vector_t result(p.parse(full_domain_name));
     354           2 :             if(result.size() != 1)
     355             :             {
     356           0 :                 f_last_error_message =
     357             :                       "could not parse \""
     358           0 :                     + full_domain_name
     359           0 :                     + "\" as a domain name or an IP address.";
     360           0 :                 return false;
     361             :             }
     362           4 :             if(result[0].has_to()
     363           2 :             || result[0].is_range()
     364           4 :             || !result[0].has_from())
     365             :             {
     366             :                 // TBD: after all, a domain name could represent a set of
     367             :                 //      IPs to try to connect to so a range here could be
     368             :                 //      supported as well
     369             :                 //
     370           0 :                 f_last_error_message =
     371             :                       "it looks like \""
     372           0 :                     + full_domain_name
     373           0 :                     + "\" is a range of IP addresses, which is not supported in a URI.";
     374           0 :                 return false;
     375             :             }
     376           2 :             domain_name = result[0].get_from().to_ipv4or6_string(addr::string_ip_t::STRING_IP_BRACKETS);
     377             :         }
     378             :     }
     379             : 
     380             :     // now we are ready to parse further (i.e. path)
     381             :     //
     382          88 :     advgetopt::string_list_t uri_path;
     383          44 :     if(*u != '\0')
     384             :     {
     385             :         // skip the '/'
     386             :         //
     387          43 :         ++u;
     388         189 :         for(s = u; *u != '\0' && *u != '?' && *u != '#'; ++u)
     389             :         {
     390         146 :             if(*u == '/')
     391             :             {
     392          18 :                 if(s != u)
     393             :                 {
     394             :                     // decode one segment
     395             :                     //
     396          15 :                     uri_path.push_back(urldecode(std::string(s, u - s)));
     397             :                 }
     398             :                 // skip the '/'
     399             :                 //
     400          18 :                 s = u + 1;
     401             :             }
     402             :         }
     403          43 :         if(s != u)
     404             :         {
     405             :             // last segment when it does not end with '/'
     406             :             //
     407          14 :             uri_path.push_back(urldecode(std::string(s, u - s)));
     408             :         }
     409             :     }
     410             : 
     411          88 :     uri_options_t query_strings;
     412          44 :     if(*u == '?')
     413             :     {
     414             :         // skip the '?' and then any (invalid?) introductory '&'
     415             :         //
     416           0 :         do
     417             :         {
     418          11 :             ++u;
     419             :         }
     420          11 :         while(*u == '&');
     421          11 :         char const * e(nullptr);
     422         109 :         for(s = u;; ++u)
     423             :         {
     424         109 :             if(*u == '\0' || *u == '&' || *u == '#')
     425             :             {
     426          11 :                 if(e == nullptr)
     427             :                 {
     428             :                     // special case when a parameter appears without value
     429             :                     // ...&name&...
     430             :                     //
     431           2 :                     e = u;
     432             :                 }
     433          11 :                 std::string name(s, e - s);
     434          11 :                 if(name.empty())
     435             :                 {
     436             :                     // this is a very special case!!!
     437             :                     // ...&=value&...
     438             :                     // so we use a "special" name, also even that name could be
     439             :                     // defined in the query string (with '%2A=value' although
     440             :                     // we do not decode the name)
     441             :                     //
     442           0 :                     name = "*";
     443             :                 }
     444             :                 else
     445             :                 {
     446          11 :                     name = urldecode(name);
     447             :                 }
     448             : 
     449             :                 // query strings are saved as options (name/value pairs)
     450             :                 // although the value may not be defined at all (...&name&...)
     451             :                 // query string names are case sensitive (as per 6.2.2.1 of RFC 3986)
     452             :                 //
     453          11 :                 std::string value;
     454          11 :                 if(e != u)
     455             :                 {
     456             :                     // note that we reach here if there is an equal sign,
     457             :                     // the value may still be empty (i.e. u - e - 1 == 0 is
     458             :                     // possible)
     459             :                     //
     460           9 :                     value = std::string(e + 1, u - e - 1);
     461             :                 }
     462          11 :                 if(query_strings.find(name) != query_strings.end())
     463             :                 {
     464             :                     // two parameters with the same name, refused
     465             :                     //
     466             :                     // (this is not correct as far as URIs are concerned,
     467             :                     // the same parameter can appear any number of times,
     468             :                     // but in our world, we consider that useless and
     469             :                     // possibly dangerous)
     470             :                     //
     471           0 :                     f_last_error_message =
     472             :                           "query string \""
     473           0 :                         + name
     474           0 :                         + "\" found more than once.";
     475           0 :                     return false;
     476             :                 }
     477          11 :                 query_strings[name] = urldecode(value);
     478             : 
     479             :                 // skip all the & and then reset s and e
     480             :                 //
     481          11 :                 while(*u == '&')
     482             :                 {
     483           0 :                     ++u;
     484             :                 }
     485          11 :                 if(*u == '\0' || *u == '#')
     486             :                 {
     487             :                     // reached the end of the query strings
     488             :                     //
     489             :                     break;
     490             :                 }
     491           0 :                 s = u;
     492           0 :                 e = nullptr;
     493             :             }
     494          98 :             else if(e == nullptr && *u == '=')
     495             :             {
     496           9 :                 e = u;
     497             :             }
     498          98 :         }
     499             :     }
     500             : 
     501             :     // finally check for an anchor
     502             :     // (note that browsers do not send us the anchor data, however, URIs
     503             :     // defined on the server side can very well include such.)
     504             :     //
     505          88 :     std::string uri_anchor;
     506          44 :     if(*u == '#')
     507             :     {
     508          11 :         ++u;
     509             : 
     510             :         // we need to decode the string so we add the whole string here
     511             :         //
     512          22 :         std::string p(u);
     513          11 :         p = urldecode(p);
     514          11 :         if(!p.empty() && p[0] == '!')
     515             :         {
     516             :             // what do we do here?!
     517             :             //
     518             :             // it seems to me that we should not get those here, but that
     519             :             // could be from someone who wrote the URL in their document.
     520             :             //
     521           0 :             u = p.c_str();
     522           0 :             for(s = u; *u != '\0'; ++u)
     523             :             {
     524           0 :                 if(*u == '/')
     525             :                 {
     526             :                     // encode right here since we have separate strings
     527             :                     //
     528           0 :                     if(s != u)
     529             :                     {
     530           0 :                         uri_path.push_back(urldecode(std::string(s, u - s)));
     531             :                     }
     532             :                     // skip the '/'
     533             :                     //
     534           0 :                     s = u + 1;
     535             :                 }
     536             :             }
     537           0 :             if(s != u)
     538             :             {
     539             :                 // last path that doesn't end with '/'
     540             :                 //
     541           0 :                 uri_path.push_back(urldecode(std::string(s, u - s)));
     542             :             }
     543             :         }
     544             :         else
     545             :         {
     546          11 :             uri_anchor = p;
     547             :         }
     548             :     }
     549             : 
     550             :     // the path may include some ".." which we want to eliminate
     551             :     // note that contrary to Unix we do not accept "/.." as an equivalent
     552             :     // to "/" and we do not verify that all the paths exist... (i.e.
     553             :     // if "c" does not exist under "/a/b" (folder /a/b/c), then it should
     554             :     // be an error to use "/a/b/c/.." since "/a/b/c" cannot be computed.)
     555             :     //
     556          44 :     int max_path(uri_path.size());
     557          73 :     for(int i(0); i < max_path; ++i)
     558             :     {
     559          29 :         if(uri_path[i] == "..")
     560             :         {
     561           0 :             if(i == 0 || max_path < 2)
     562             :             {
     563             :                 // the path starts with a ".." or has too many ".."
     564             :                 //
     565           0 :                 f_last_error_message = "found \"..\" at the beginning of your path.";
     566           0 :                 return false;
     567             :             }
     568             : 
     569             :             // remove the ".." and previous path segment
     570             :             //
     571           0 :             uri_path.erase(uri_path.begin() + i - 1, uri_path.begin() + i + 1);
     572           0 :             --i;
     573           0 :             max_path -= 2;
     574             :         }
     575             :     }
     576             : 
     577             :     // totally unchanged URI, but only if it is considered valid
     578             :     //
     579          44 :     f_original = str;
     580             : 
     581             :     // now decode all the entries that may be encoded
     582             :     //
     583          44 :     f_scheme = uri_scheme;
     584          44 :     f_username = urldecode(username);
     585          44 :     f_password = urldecode(password);
     586          44 :     if(port != -1)
     587             :     {
     588          33 :         f_port = port;
     589             :     }
     590          44 :     f_domain = domain_name;
     591          44 :     f_top_level_domain = tld;
     592          44 :     f_sub_domains = sub_domain_names;
     593          44 :     f_path = uri_path;
     594             : 
     595             :     // options come from parsing the sub-domains, query strings and paths
     596             :     // and at this point we do not have that information...
     597             :     //
     598          44 :     f_options.clear();
     599          44 :     f_address_ranges.clear();
     600             : 
     601          44 :     f_query_strings = query_strings;
     602          44 :     f_anchor = uri_anchor;
     603             : 
     604          44 :     return true;
     605             : }
     606             : 
     607             : 
     608             : /** \brief Return the original URI used to define the Snap URI object.
     609             :  *
     610             :  * This function returns the original URI as defined when calling the
     611             :  * set_uri() or creating the Snap URI object with the uri() constructor
     612             :  * accepting a string.
     613             :  *
     614             :  * Note that it is possible to use the uri object without using the
     615             :  * set_uri() or a string in the constructor by calling the setters of
     616             :  * the different parts of a URI. This is actually how snap_child does it
     617             :  * because Apache does not give us one plane URI, instead we get pre
     618             :  * separated parts. Therefore the get_original_uri() is always empty when
     619             :  * called from that f_uri variable.
     620             :  *
     621             :  * Note that this URI may still include security issues, although if the
     622             :  * input was not considered valid (i.e. had a valid scheme, etc.) then
     623             :  * this function returns an empty string.
     624             :  *
     625             :  * \return A constant reference to the original Snap URI.
     626             :  */
     627           0 : std::string const & uri::get_original_uri() const
     628             : {
     629           0 :     return f_original;
     630             : }
     631             : 
     632             : 
     633             : /** \brief Return the current URI define in this Snap URI object.
     634             :  *
     635             :  * This function concatenate all the URI parts in a fully qualified URI
     636             :  * and returns the result.
     637             :  *
     638             :  * This function does NOT take the rules in account (since it does not
     639             :  * know anything about them.) So you may want to consider using the
     640             :  * uri_rules::process_uri() function instead.
     641             :  *
     642             :  * \note
     643             :  * The returned URI is already encoded as required by HTTP and such.
     644             :  *
     645             :  * \param[in] use_hash_bang  When this flag is set to true the URI is returned
     646             :  * as a hash bang (i.e. domain/path becomes domain/#!path).
     647             :  * \param[in] redact  If this string is not empty and the URI includes a
     648             :  * password, this string is used instead of the password. This is often set
     649             :  * to something like "XXX" or similar.
     650             :  *
     651             :  * \return The URI represented by this Snap URI object.
     652             :  */
     653           0 : std::string uri::get_uri(bool use_hash_bang, std::string const & redact) const
     654             : {
     655           0 :     std::string result(f_scheme);
     656             : 
     657           0 :     result += "://";
     658             : 
     659             :     // username/password if defined
     660           0 :     if(!f_username.empty())
     661             :     {
     662           0 :         result += urlencode(f_username);
     663           0 :         if(!f_password.empty())
     664             :         {
     665           0 :             result += ':';
     666           0 :             result += urlencode(redact.empty() ? f_password : redact);
     667             :         }
     668           0 :         result += '@';
     669             :     }
     670             : 
     671             :     // full domain
     672             :     // domains should rarely require encoding for special characters, however,
     673             :     // it often is for international domains that make use of UTF-8 characters
     674             :     // outside of the standard ASCII letters and those definitively require
     675             :     // URL encoding to work right.
     676           0 :     result += urlencode(full_domain());
     677           0 :     if(f_port != scheme_to_port(f_scheme))
     678             :     {
     679           0 :         result += std::to_string(f_port);
     680             :     }
     681           0 :     result += '/';
     682             : 
     683             :     // path if no hash bang
     684             :     //
     685           0 :     std::string const p(path());
     686           0 :     if(!use_hash_bang && p.length() > 0)
     687             :     {
     688             :         // avoid a double slash if possible
     689             :         //
     690             :         // XXX: should the path not have a leading slash?
     691             :         //      (as far as I know path() never return a path with a leading
     692             :         //      slash; but we would need a test to make sure of it)
     693             :         //
     694           0 :         if(p[0] == '/')
     695             :         {
     696           0 :             result += p.substr(1);
     697             :         }
     698             :         else
     699             :         {
     700           0 :             result += p;
     701             :         }
     702             :     }
     703             : 
     704             :     // query string
     705           0 :     std::string const q(query_string());
     706           0 :     if(!q.empty())
     707             :     {
     708           0 :         result += '?';
     709           0 :         result += q;
     710             :     }
     711             : 
     712             :     // anchor
     713           0 :     if(!f_anchor.empty())
     714             :     {
     715           0 :         if(use_hash_bang)
     716             :         {
     717             :             // hash bang and anchor are exclusive
     718           0 :             throw uri_exception_exclusive_parameters("you cannot use the hash bang (#!) and an anchor (#) in the same URI");
     719             :         }
     720           0 :         result += '#';
     721           0 :         result += urlencode(f_anchor, "!/~");
     722             :     }
     723             : 
     724             :     // path when using the hash bang but only if not empty
     725           0 :     if(use_hash_bang && !p.empty())
     726             :     {
     727           0 :         result += "#!/";
     728           0 :         result += p;
     729             :     }
     730             : 
     731           0 :     return result;
     732             : }
     733             : 
     734             : 
     735             : /** \brief Retrieve the URI of the website.
     736             :  *
     737             :  * This function returns the URI of the website, without any path,
     738             :  * query string options, anchor. The port is included only if it
     739             :  * does not correspond to the scheme and the \p include_port flag
     740             :  * is set to true.
     741             :  *
     742             :  * \param[in] include_port  Whether the port should be included.
     743             :  *
     744             :  * \return The domain name with the scheme and optionally the port.
     745             :  */
     746           0 : std::string uri::get_website_uri(bool include_port) const
     747             : {
     748           0 :     std::string result(f_scheme);
     749             : 
     750           0 :     result += "://";
     751           0 :     result += full_domain();
     752             : 
     753             :     // only include the port if the caller wants it and if it does not
     754             :     // match the default scheme port
     755             :     //
     756           0 :     if(include_port
     757           0 :     && scheme_to_port(f_scheme) != f_port)
     758             :     {
     759           0 :         result += ':';
     760           0 :         result += std::to_string(f_port);
     761             :     }
     762             : 
     763           0 :     result += '/';
     764             : 
     765           0 :     return result;
     766             : }
     767             : 
     768             : 
     769             : /** \brief Return the last error message.
     770             :  *
     771             :  * This function returns the last error message from the set_uri() call.
     772             :  *
     773             :  * \todo
     774             :  * Make other functions also generate errors.
     775             :  *
     776             :  * \return The last error message or an empty string.
     777             :  */
     778           0 : std::string uri::get_last_error_message() const
     779             : {
     780           0 :     return f_last_error_message;
     781             : }
     782             : 
     783             : 
     784             : /** \brief Clear the last error message.
     785             :  *
     786             :  * This function makes sure that the last error message is cleared so
     787             :  * new errors can be detected by checking whether the last error message
     788             :  * is an empty string or not.
     789             :  */
     790           0 : void uri::clear_last_error_message()
     791             : {
     792           0 :     f_last_error_message.clear();
     793           0 : }
     794             : 
     795             : 
     796             : /** \brief Retrieve a part by name.
     797             :  *
     798             :  * This function allows you to retrieve a part by name.
     799             :  *
     800             :  * The supported parts are:
     801             :  *
     802             :  * \li anchor -- The anchor
     803             :  * \li domain -- The domain name
     804             :  * \li full-domain -- The full domain: with sub-domains, domain, and TLD
     805             :  * \li option -- The option number \p part
     806             :  * \li option-count -- The number of options
     807             :  * \li original -- The original URI or ""
     808             :  * \li password -- The password
     809             :  * \li path -- The folder name number \p part
     810             :  * \li path-count -- the number of paths
     811             :  * \li scheme -- The scheme
     812             :  * \li query-string -- The query string number \p part
     813             :  * \li query-string-count -- The number of query strings
     814             :  * \li sub-domain -- The sub-domain name number \p part
     815             :  * \li sub-domain-count -- The number of sub-domains
     816             :  * \li tld or top-level-domain -- the top-level domain name
     817             :  * \li uri -- the full URI as you want it in an href="..." attribute
     818             :  * \li username -- The username
     819             :  *
     820             :  * \param[in] name  The named part to retrieve.
     821             :  * \param[in] part  The part number when required (i.e. sub-domains)
     822             :  *
     823             :  * \return The data representing this part as a string.
     824             :  */
     825           0 : std::string uri::get_part(std::string const & name, int part) const
     826             : {
     827           0 :     if(name.empty())
     828             :     {
     829             :         // should this be an error?
     830           0 :         return "";
     831             :     }
     832           0 :     switch(name[0])
     833             :     {
     834           0 :     case 'a':
     835           0 :         if(name == "anchor")
     836             :         {
     837           0 :             return f_anchor;
     838             :         }
     839           0 :         break;
     840             : 
     841           0 :     case 'd':
     842           0 :         if(name == "domain")
     843             :         {
     844           0 :             return f_domain;
     845             :         }
     846           0 :         break;
     847             : 
     848           0 :     case 'f':
     849           0 :         if(name == "full-domain")
     850             :         {
     851           0 :             return full_domain();
     852             :         }
     853           0 :         break;
     854             : 
     855           0 :     case 'o':
     856           0 :         if(name == "option")
     857             :         {
     858           0 :             if(static_cast<std::size_t>(part) >= f_options.size())
     859             :             {
     860             :                 throw edhttp_uri_exception_out_of_range(
     861             :                       "option "
     862           0 :                     + std::to_string(part)
     863           0 :                     + " does not exist (range is 0 to "
     864           0 :                     + std::to_string(f_options.size())
     865           0 :                     + ")");
     866             :             }
     867           0 :             auto it(f_options.begin());
     868           0 :             std::advance(it, part);
     869           0 :             return it->second;
     870           0 :         }
     871           0 :         if(name == "option-count")
     872             :         {
     873           0 :             return std::to_string(f_options.size());
     874             :         }
     875           0 :         if(name == "original")
     876             :         {
     877           0 :             return f_original;
     878             :         }
     879           0 :         break;
     880             : 
     881           0 :     case 'p':
     882           0 :         if(name == "password")
     883             :         {
     884           0 :             return f_password;
     885             :         }
     886           0 :         if(name == "path")
     887             :         {
     888           0 :             if(static_cast<std::size_t>(part) >= f_path.size())
     889             :             {
     890             :                 throw edhttp_uri_exception_out_of_range(
     891             :                       "path "
     892           0 :                     + std::to_string(part)
     893           0 :                     + " is not available (range 0 to "
     894           0 :                     + std::to_string(f_path.size())
     895           0 :                     + ")");
     896             :             }
     897           0 :             return f_path[part];
     898             :         }
     899           0 :         if(name == "path-count")
     900             :         {
     901           0 :             return std::to_string(f_path.size());
     902             :         }
     903           0 :         if(name == "port")
     904             :         {
     905           0 :             return std::to_string(f_port);
     906             :         }
     907           0 :         if(name == "scheme")
     908             :         {
     909           0 :             return f_scheme;
     910             :         }
     911           0 :         break;
     912             : 
     913           0 :     case 'q':
     914           0 :         if(name == "query-string")
     915             :         {
     916           0 :             if(static_cast<std::size_t>(part) >= f_query_strings.size())
     917             :             {
     918             :                 throw edhttp_uri_exception_out_of_range(
     919             :                       "query-string "
     920           0 :                     + std::to_string(part)
     921           0 :                     + " does not exist (range 0 to "
     922           0 :                     + std::to_string(f_query_strings.size())
     923           0 :                     + ")");
     924             :             }
     925           0 :             auto it(f_query_strings.begin());
     926           0 :             std::advance(it, part);
     927           0 :             return it->second;
     928           0 :         }
     929           0 :         if(name == "query-string-count")
     930             :         {
     931           0 :             return std::to_string(f_query_strings.size());
     932             :         }
     933           0 :         break;
     934             : 
     935           0 :     case 's':
     936           0 :         if(name == "sub-domain")
     937             :         {
     938           0 :             if(static_cast<std::size_t>(part) >= f_sub_domains.size())
     939             :             {
     940             :                 throw edhttp_uri_exception_out_of_range(
     941             :                       "sub-domain "
     942           0 :                     + std::to_string(part)
     943           0 :                     + " does not exist (range 0 to "
     944           0 :                     + std::to_string(f_sub_domains.size())
     945           0 :                     + ")");
     946             :             }
     947           0 :             return f_sub_domains[part];
     948             :         }
     949           0 :         if(name == "sub-domain-count")
     950             :         {
     951           0 :             return std::to_string(f_sub_domains.size());
     952             :         }
     953           0 :         break;
     954             : 
     955           0 :     case 't':
     956           0 :         if(name == "tld" || name == "top-level-domain")
     957             :         {
     958           0 :             return f_top_level_domain;
     959             :         }
     960           0 :         break;
     961             : 
     962           0 :     case 'u':
     963           0 :         if(name == "uri")
     964             :         {
     965           0 :             return get_uri();
     966             :         }
     967           0 :         if(name == "username")
     968             :         {
     969           0 :             return f_username;
     970             :         }
     971           0 :         break;
     972             : 
     973           0 :     default:
     974             :         // no match for other characters
     975           0 :         break;
     976             : 
     977             :     }
     978             : 
     979           0 :     return "";
     980             : }
     981             : 
     982             : 
     983             : /** \brief Set a user name.
     984             :  *
     985             :  * This function changes the URI user name definition. In many cases,
     986             :  * using a username in your URI is not considered safe.
     987             :  *
     988             :  * You may pass an empty string to remove the user name.
     989             :  *
     990             :  * \param[in] username  The new user name of the URI.
     991             :  */
     992           0 : void uri::set_username(std::string const & username)
     993             : {
     994           0 :     f_username = username;
     995           0 : }
     996             : 
     997             : 
     998             : /** \brief Get the user name.
     999             :  *
    1000             :  * This function returns the URI user name. In most cases, a URI should not
    1001             :  * have a user name and password so this function is likely to return an
    1002             :  * empty string.
    1003             :  *
    1004             :  * In most cases, when you define a user name you also define a password.
    1005             :  * Note, however, that without a user name, the password is ignored and
    1006             :  * not output to a URI (like by the get_uri() function). This does not
    1007             :  * prevent the URI from holding a copy of your password.
    1008             :  *
    1009             :  * \return The URI user name.
    1010             :  *
    1011             :  * \sa get_password()
    1012             :  */
    1013           0 : std::string uri::get_username() const
    1014             : {
    1015           0 :     return f_username;
    1016             : }
    1017             : 
    1018             : 
    1019             : /** \brief Get the URI password.
    1020             :  *
    1021             :  * A URI can include a password. This function allows you to replace that
    1022             :  * password with another.
    1023             :  *
    1024             :  * \note
    1025             :  * The password is not encrypted while kept in meomry.
    1026             :  *
    1027             :  * \param[in] password  The URI new password.
    1028             :  */
    1029           0 : void uri::set_password(std::string const & password)
    1030             : {
    1031           0 :     f_password = password;
    1032           0 : }
    1033             : 
    1034             : 
    1035             : /** \brief Get the URI password.
    1036             :  *
    1037             :  * Ths URI can include a password. This function retrieves that password.
    1038             :  *
    1039             :  * \remark
    1040             :  * A password is not output by the get_uri() function when there is not
    1041             :  * user name. The formatting of the URI is invalid with only a password.
    1042             :  *
    1043             :  * \note
    1044             :  * The password is not encrypted while kept in meomry.
    1045             :  *
    1046             :  * \return The password of the URI or an empty string.
    1047             :  *
    1048             :  * \sa get_username()
    1049             :  */
    1050           0 : std::string uri::get_password() const
    1051             : {
    1052           0 :     return f_password;
    1053             : }
    1054             : 
    1055             : 
    1056             : /** \brief Change the scheme.
    1057             :  *
    1058             :  * This function is called to set the scheme.
    1059             :  *
    1060             :  * The scheme is not checked since this can be used for any
    1061             :  * URI, not just the HTTP and HTTPS schemes. The name is
    1062             :  * expected to be all lowercase and lowercase letters [a-z].
    1063             :  *
    1064             :  * \param[in] uri_scheme  The name of the scheme.
    1065             :  */
    1066           0 : void uri::set_scheme(std::string const & uri_scheme)
    1067             : {
    1068           0 :     if(uri_scheme.empty())
    1069             :     {
    1070           0 :         throw uri_exception_invalid_parameter("the uri_scheme parameter cannot be an empty string");
    1071             :     }
    1072           0 :     f_scheme = uri_scheme;
    1073           0 : }
    1074             : 
    1075             : 
    1076             : /** \brief Retrieve a copy of the scheme.
    1077             :  *
    1078             :  * This value is the name that defines how messages are being
    1079             :  * sent between the client and the server.
    1080             :  *
    1081             :  * The main interface only accepts "http" and "https", but the
    1082             :  * uri object accepts all schemes so one can write URIs
    1083             :  * with schemes such as "ftp", "mail", and "gopher".
    1084             :  *
    1085             :  * \return A constant reference to the scheme of this URI.
    1086             :  */
    1087           0 : std::string const & uri::scheme() const
    1088             : {
    1089           0 :     return f_scheme;
    1090             : }
    1091             : 
    1092             : 
    1093             : /** \brief Process a domain name and break it up.
    1094             :  *
    1095             :  * This function processes a domain name and breaks it up in
    1096             :  * the domain name, the sub-domains, and the TLD.
    1097             :  *
    1098             :  * \note
    1099             :  * If the function returns false, then the out parameters may not
    1100             :  * all be defined properly. None of them should be used in that
    1101             :  * case anyway.
    1102             :  *
    1103             :  * \param[in] full_domain_name  The complete domain with sub-domains and TLD.
    1104             :  * \param[out] sub_domain_names  An array of sub-domains, may be empty.
    1105             :  * \param[out] domain_name  The domain by itself (no TLD and no sub-domain.)
    1106             :  * \param[out] tld  The TLD part by itself.
    1107             :  *
    1108             :  * \return true if the function succeeds, false otherwise
    1109             :  */
    1110          49 : bool uri::process_domain(
    1111             :       std::string const & full_domain_name
    1112             :     , advgetopt::string_list_t & sub_domain_names
    1113             :     , std::string & domain_name
    1114             :     , std::string & tld)
    1115             : {
    1116             :     // first we need to determine the TLD, we use the tld()
    1117             :     // function from the libtld library for this purpose
    1118             : 
    1119             :     // (note that the URI is expected to be encoded so the UTF-8
    1120             :     // encoding is the same as ASCII)
    1121          49 :     struct tld_info info;
    1122          49 :     char const *fd(full_domain_name.c_str());
    1123          49 :     tld_result r(::tld(fd, &info));
    1124          49 :     if(r != TLD_RESULT_SUCCESS)
    1125             :     {
    1126             :         // (should we accept TLD_RESULT_INVALID URIs?)
    1127             :         // the URI doesn't end with a known TLD
    1128           9 :         return false;
    1129             :     }
    1130             : 
    1131             :     // got the TLD, save it in the user's supplied variable
    1132          40 :     tld = urldecode(info.f_tld);
    1133             : 
    1134             :     // search where the domain name starts
    1135          40 :     char const *compute_domain_name(fd + info.f_offset);
    1136         360 :     while(compute_domain_name > fd)
    1137             :     {
    1138         160 :         --compute_domain_name;
    1139         160 :         if(*compute_domain_name == '.')
    1140             :         {
    1141           0 :             ++compute_domain_name;
    1142           0 :             break;
    1143             :         }
    1144             :     }
    1145          40 :     domain_name = urldecode(std::string(compute_domain_name, info.f_tld - compute_domain_name));
    1146             : 
    1147             :     // now cut the remainder on each period, these are the sub-domains
    1148             :     // there may be none if there are no other periods in the full name
    1149          40 :     if(compute_domain_name > fd)
    1150             :     {
    1151             :         // forget the period
    1152           0 :         --compute_domain_name;
    1153             :     }
    1154          80 :     std::string all_sub_domains(std::string(fd, compute_domain_name - fd));
    1155             : 
    1156             :     // verify that all the sub-domains are valid (i.e. no "..")
    1157          40 :     if(!all_sub_domains.empty())
    1158             :     {
    1159           0 :         snapdev::tokenize_string(sub_domain_names, all_sub_domains, ".");
    1160             : 
    1161           0 :         for(auto & sub_domain : sub_domain_names)
    1162             :         {
    1163           0 :             if(sub_domain.empty())
    1164             :             {
    1165             :                 // sub-domains cannot be empty or the URI includes
    1166             :                 // two period one after the other (this should actually
    1167             :                 // be caught by the tld() call.)
    1168             :                 //
    1169           0 :                 return false;
    1170             :             }
    1171             : 
    1172             :             // make sure it is decodable
    1173             :             //
    1174           0 :             sub_domain = urldecode(sub_domain);
    1175             : 
    1176             :             // TODO: look into whether we have to check for periods in the
    1177             :             //       decoded sub-domain names (i.e. a %2E is probably not a
    1178             :             //       valid character in a sub-domain name, at the same time
    1179             :             //       if we reach here, there should not be such a DNS entry...
    1180             :             //       but not automatically because a hacker can take an IP
    1181             :             //       and use it with any URI and send an HTTP request that
    1182             :             //       way... still, we would catch that in our domain/website
    1183             :             //       canonicalization.) Maybe we should decode the domain part
    1184             :             //       first, then parse it.
    1185             :         }
    1186             :     }
    1187             : 
    1188          40 :     return true;
    1189             : }
    1190             : 
    1191             : 
    1192             : /** \brief Set the domain to 'domain'.
    1193             :  *
    1194             :  * This function changes the Snap URI to the specified full domain.
    1195             :  * This means changing the set of sub-domains, the TLD and the domain
    1196             :  * it-self are updated with the corresponding data from the full domain.
    1197             :  * The function takes care of breaking the input
    1198             :  *
    1199             :  * If any error is discovered in the full domain name, then the internal
    1200             :  * variables do not get modified.
    1201             :  *
    1202             :  * Note that the domain is not expected to include a user name, password
    1203             :  * and port information. You want to get rid of that information before
    1204             :  * calling this function or consider calling set_uri() instead.
    1205             :  *
    1206             :  * \note
    1207             :  * The only potential problem is when you get an out of memory error
    1208             :  * while allocating a string.
    1209             :  *
    1210             :  * \todo
    1211             :  * Check that the URL is not an IPv4 or IPv6 address. Such will always
    1212             :  * fail and we should look into avoiding the use of an exception in
    1213             :  * that circumstance.
    1214             :  *
    1215             :  * \exception uri_exception_invalid_uri
    1216             :  * If the domain cannot properly be broken up in sub-domains,
    1217             :  * the doman name and the tld, then this exception is raised.
    1218             :  *
    1219             :  * \param[in] full_domain_name  A full domain name, without scheme, path,
    1220             :  *                              query string or anchor.
    1221             :  */
    1222           0 : void uri::set_domain(std::string const & full_domain_name)
    1223             : {
    1224           0 :     advgetopt::string_list_t sub_domain_names;
    1225           0 :     std::string domain_name;
    1226           0 :     std::string tld;
    1227           0 :     if(!process_domain(full_domain_name, sub_domain_names, domain_name, tld))
    1228             :     {
    1229             :         throw uri_exception_invalid_uri(
    1230             :               "could not break up \""
    1231           0 :             + full_domain_name
    1232           0 :             + "\" as a valid domain name");
    1233             :     }
    1234             : 
    1235           0 :     f_domain = domain_name;
    1236           0 :     f_top_level_domain = tld;
    1237           0 :     f_sub_domains = sub_domain_names;
    1238             : 
    1239           0 :     f_address_ranges.clear();
    1240           0 : }
    1241             : 
    1242             : 
    1243             : /** \brief Reconstruct the full domain from the broken down information
    1244             :  *
    1245             :  * This function rebuilds a full domain name from the broken down
    1246             :  * data saved in the Snap URI: the sub-domains, the domain name,
    1247             :  * and the TLD.
    1248             :  *
    1249             :  * \todo
    1250             :  * Add caching so calling the function more than once will be fast.
    1251             :  *
    1252             :  * \return The full domain name representation of this Snap URI.
    1253             :  */
    1254           0 : std::string uri::full_domain() const
    1255             : {
    1256           0 :     std::string full_domains(snapdev::join_strings(f_sub_domains, "."));
    1257           0 :     if(!full_domains.empty())
    1258             :     {
    1259           0 :         full_domains += '.';
    1260             :     }
    1261           0 :     full_domains += f_domain;
    1262           0 :     full_domains += f_top_level_domain;
    1263           0 :     return full_domains;
    1264             : }
    1265             : 
    1266             : /** \brief Get the top level domain name.
    1267             :  *
    1268             :  * This function returns the top level domain name by itself.
    1269             :  * For example, in "www.example.com", the top level domain name
    1270             :  * is "com".
    1271             :  *
    1272             :  * \return The top level domain name of the Snap URI.
    1273             :  */
    1274           4 : std::string const& uri::top_level_domain() const
    1275             : {
    1276           4 :     return f_top_level_domain;
    1277             : }
    1278             : 
    1279             : 
    1280             : /** \brief Get the domain name by itself.
    1281             :  *
    1282             :  * This function returns the stripped down domain name. This name
    1283             :  * has no period since it includes no sub-domains and no top level
    1284             :  * domain names.
    1285             :  *
    1286             :  * \return The stripped down domain name.
    1287             :  */
    1288           4 : std::string const & uri::domain() const
    1289             : {
    1290           4 :     return f_domain;
    1291             : }
    1292             : 
    1293             : 
    1294             : /** \brief Return the concatenated list of sub-domains.
    1295             :  *
    1296             :  * This function returns the concatenated list of sub-domains
    1297             :  * in one string.
    1298             :  *
    1299             :  * \return The concatenated sub-domains separated by periods.
    1300             :  */
    1301           0 : std::string uri::sub_domains() const
    1302             : {
    1303           0 :     return snapdev::join_strings(f_sub_domains, ".");
    1304             : }
    1305             : 
    1306             : 
    1307             : /** \brief Return the number of sub-domains defined.
    1308             :  *
    1309             :  * This function defines a set of sub-domains.
    1310             :  *
    1311             :  * \return The number of sub-domains.
    1312             :  */
    1313           0 : int uri::sub_domain_count() const
    1314             : {
    1315           0 :     return f_sub_domains.size();
    1316             : }
    1317             : 
    1318             : 
    1319             : /** \brief Return one of the sub-domain names.
    1320             :  *
    1321             :  * This function returns the specified domain name.
    1322             :  *
    1323             :  * \param[in] part  The sub-domain name index.
    1324             :  *
    1325             :  * \return The sub-domain corresponding to the specified index.
    1326             :  */
    1327           0 : std::string uri::sub_domain(int part) const
    1328             : {
    1329           0 :     if(static_cast<std::size_t>(part) >= f_sub_domains.size())
    1330             :     {
    1331             :         throw edhttp_uri_exception_out_of_range(
    1332             :               "sub-domain "
    1333           0 :             + std::to_string(part)
    1334           0 :             + " does not exist (range 0 to "
    1335           0 :             + std::to_string(f_sub_domains.size())
    1336           0 :             + ")");
    1337             :     }
    1338           0 :     return f_sub_domains[part];
    1339             : }
    1340             : 
    1341             : 
    1342             : /** \brief Return the array of sub-domains.
    1343             :  *
    1344             :  * This function gives you a constant reference to all the sub-domains
    1345             :  * at once. You may use this function to make use of the list iterator,
    1346             :  * for example.
    1347             :  *
    1348             :  * The strings are in order as in the first is the left-most sub-domain
    1349             :  * (or the furthest away from the domain name.)
    1350             :  *
    1351             :  * \return A list of strings representing the sub-domains.
    1352             :  */
    1353           0 : advgetopt::string_list_t const & uri::sub_domains_list() const
    1354             : {
    1355           0 :     return f_sub_domains;
    1356             : }
    1357             : 
    1358             : 
    1359             : /** \brief Transforms the hostname and port in an array of addresses.
    1360             :  *
    1361             :  * This function generates an array of addresses for the specified
    1362             :  * hostname and port.
    1363             :  *
    1364             :  * The function calls the full_domain() function to get the domain name
    1365             :  * and uses get_port() for the port. From the resulting data, it attempts
    1366             :  * to compute one or more addresses which can be used to connect to
    1367             :  * the specified domain (i.e. if you have an IPv6 and IPv4 or multiple
    1368             :  * computers, then this will return more than one IP address).
    1369             :  *
    1370             :  * The domain can later be retrieved using the addr::get_hostname()
    1371             :  * function.
    1372             :  *
    1373             :  * \return A reference to a vector of addr::addr_range objects.
    1374             :  */
    1375           0 : addr::addr_range::vector_t const & uri::address_ranges()
    1376             : {
    1377           0 :     if(f_address_ranges.empty())
    1378             :     {
    1379           0 :         addr::addr_parser p;
    1380           0 :         p.set_default_port(get_port());
    1381           0 :         p.set_protocol(IPPROTO_TCP);
    1382           0 :         p.set_sort_order(addr::SORT_IPV6_FIRST | addr::SORT_NO_EMPTY);
    1383           0 :         p.set_allow(addr::allow_t::ALLOW_REQUIRED_ADDRESS, true);
    1384           0 :         f_address_ranges = p.parse(full_domain());
    1385             :     }
    1386             : 
    1387           0 :     return f_address_ranges;
    1388             : }
    1389             : 
    1390             : 
    1391             : /** \brief Set the port to the specified string.
    1392             :  *
    1393             :  * This function changes the port of the URI from what it is now
    1394             :  * to the specified value.
    1395             :  *
    1396             :  * The port value must be a positive number or zero.
    1397             :  *
    1398             :  * Negative values or other invalid numbers generate an error.
    1399             :  *
    1400             :  * You can retrieve the port number with the get_port() function.
    1401             :  *
    1402             :  * \exception uri_exception_invalid_parameter
    1403             :  * This function generates an exception if an invalid port is detected
    1404             :  * (negative, larger than 65535, or characters other than 0-9).
    1405             :  *
    1406             :  * \param[in] port  The new port for this Snap URI object.
    1407             :  */
    1408           0 : void uri::set_port(std::string const & port)
    1409             : {
    1410           0 :     long p = std::stol(port);
    1411           0 :     if(p < 0 || p > 65535)
    1412             :     {
    1413             :         throw uri_exception_invalid_parameter(
    1414             :               "\""
    1415           0 :             + port
    1416           0 :             + "\" is an invalid port number");
    1417             :     }
    1418           0 :     f_port = p;
    1419           0 :     f_address_ranges.clear();
    1420           0 : }
    1421             : 
    1422             : 
    1423             : /** \brief Set the port to the specified string.
    1424             :  *
    1425             :  * This function changes the port of the URI from what it is now
    1426             :  * to the specified value.
    1427             :  *
    1428             :  * The port value must be a positive number or zero.
    1429             :  *
    1430             :  * Negative values or invalid numbers generate an error.
    1431             :  *
    1432             :  * \exception uri_exception_invalid_parameter
    1433             :  * This function generates an exception if an invalid port is
    1434             :  * detected (negative or characters other than 0-9).
    1435             :  *
    1436             :  * \param[in] port  The new port for this Snap URI object.
    1437             :  */
    1438           0 : void uri::set_port(int port)
    1439             : {
    1440           0 :     if(port < 0 || port > 65535)
    1441             :     {
    1442             :         throw uri_exception_invalid_parameter(
    1443             :               "port \""
    1444           0 :             + std::to_string(port)
    1445           0 :             + "\" is out of range (1 to 65535)");
    1446             :     }
    1447           0 :     f_port = port;
    1448           0 : }
    1449             : 
    1450             : 
    1451             : /** \brief Retrieve the port number.
    1452             :  *
    1453             :  * This function returns the specific port used to access
    1454             :  * the server. This parameter can be used as one of the
    1455             :  * options used to select a specific website.
    1456             :  *
    1457             :  * \return The port as an integer.
    1458             :  */
    1459           0 : int uri::get_port() const
    1460             : {
    1461           0 :     return f_port;
    1462             : }
    1463             : 
    1464             : 
    1465             : /** \brief Retrieve the port number as a string.
    1466             :  *
    1467             :  * This function returns the specific port used to access
    1468             :  * the server as a string instead of an integer.
    1469             :  *
    1470             :  * \return The port as a string.
    1471             :  */
    1472           0 : std::string uri::get_str_port() const
    1473             : {
    1474           0 :     return std::to_string(f_port);
    1475             : }
    1476             : 
    1477             : 
    1478             : /** \brief Check whether the URI represents a Unix path.
    1479             :  *
    1480             :  * The set_uri() function sets the domain to an empty string if the URI
    1481             :  * represents a Unix URI (i.e. a path to a file representing a socket).
    1482             :  *
    1483             :  * Note that the function does not in any way verify whether the other
    1484             :  * parameters than f_domain are valid and represent a correct Unix
    1485             :  * URI. This is the responsability of the caller.
    1486             :  *
    1487             :  * \return true if the domain string is empty.
    1488             :  */
    1489           0 : bool uri::is_unix() const
    1490             : {
    1491           0 :     return f_domain.empty();
    1492             : }
    1493             : 
    1494             : 
    1495             : /** \brief Replace the current path.
    1496             :  *
    1497             :  * This function can be used to replace the entire path of
    1498             :  * the URI by starting the new path with a slash (/something).
    1499             :  * If the \p path parameter does not start with a slash, then
    1500             :  * it is used as a relative path from the existing path.
    1501             :  *
    1502             :  * A path includes parts separated by one or more slashes (/).
    1503             :  * The function removes parts that are just "." since these
    1504             :  * mean "this directory" and they would not be valid in a
    1505             :  * canonicalized path.
    1506             :  *
    1507             :  * A path may include one or more ".." as a path part. These
    1508             :  * mean remove one part prior.
    1509             :  *
    1510             :  * The ".." are accepted in any path, however, it must be
    1511             :  * correct in that it is not possible to use ".." without at
    1512             :  * least one part just before that (i.e. "/this/one/../other/one" is
    1513             :  * valid, but "/../that/one/is/not" since ".." from / does not
    1514             :  * exist. This is not how Unix usually manages paths since
    1515             :  * in Unix / and /.. are one and the same folder.)
    1516             :  *
    1517             :  * Note that if you wanted to make use of the hash bang feature,
    1518             :  * you would still make use of this function to setup your path in
    1519             :  * the Snap URI object. The hash bang feature determines how
    1520             :  * the path is handled when you get the URI with get_uri().
    1521             :  *
    1522             :  * \exception uri_exception_invalid_path
    1523             :  * The function raises this exception if the path includes more
    1524             :  * ".." than there are "normal" parts on the left side of the "..".
    1525             :  *
    1526             :  * \param[in] uri_path  The new path for this URI.
    1527             :  *
    1528             :  * \sa path()
    1529             :  */
    1530           0 : void uri::set_path(std::string uri_path)
    1531             : {
    1532             :     // check whether the path starts with a '/':
    1533             :     // if so, then we replace the existing path;
    1534             :     // if not, then we append uri_path to the existing path.
    1535             :     //
    1536           0 :     if((uri_path.empty() || uri_path[0] != '/')
    1537           0 :     && !f_path.empty())
    1538             :     {
    1539             :         // append unless the user passed a path starting with "/"
    1540             :         // or the current path is empty
    1541           0 :         uri_path = snapdev::join_strings(f_path, "/") + "/" + uri_path;
    1542             :     }
    1543             : 
    1544             :     // if the path starts with a '/' or includes a double '/'
    1545             :     // within itself, it will be removed because of the SkipEmptyParts
    1546           0 :     advgetopt::string_list_t p;
    1547           0 :     advgetopt::split_string(uri_path, p, {"/"});
    1548             : 
    1549             :     // next we remove all ".." (and the previous part); if ".." was
    1550             :     // at the start of the path, then an exception is raised
    1551             :     //
    1552           0 :     int max_parts(p.size());
    1553           0 :     for(int i(0); i < max_parts; ++i)
    1554             :     {
    1555           0 :         if(p[i] == ".")
    1556             :         {
    1557             :             // canonalization includes removing "." parts which are
    1558             :             // viewed exactly as empty parts
    1559           0 :             p.erase(p.begin() + i);
    1560           0 :             --i;
    1561           0 :             --max_parts;
    1562             :         }
    1563           0 :         else if(p[i] == "..")
    1564             :         {
    1565             :             // note: max should not be less than 2 if i != 0
    1566           0 :             if(i == 0 || max_parts < 2)
    1567             :             {
    1568             :                 throw uri_exception_invalid_path(
    1569             :                       "path \""
    1570           0 :                     + uri_path
    1571           0 :                     + "\" is not valid (it includes too many \"..\")");
    1572             :             }
    1573           0 :             p.erase(p.begin() + i - 1, p.begin() + i + 1);
    1574           0 :             --i;
    1575           0 :             max_parts -= 2;
    1576             :         }
    1577             :     }
    1578             : 
    1579             :     // the input was valid, save the new result
    1580           0 :     f_path.swap(p);
    1581           0 : }
    1582             : 
    1583             : 
    1584             : /** \brief Return the full path.
    1585             :  *
    1586             :  * This function returns the full concatenated path of the URI.
    1587             :  *
    1588             :  * The function encodes the path appropriately. The path can thus be
    1589             :  * used anywhere an encoded path is accepted. The encoding can be
    1590             :  * avoided by setting the \p encoded flag to false.
    1591             :  *
    1592             :  * Note that a non encoded path may include / characters instead of
    1593             :  * the %2F encoded character and thus not match the internal path.
    1594             :  *
    1595             :  * \note
    1596             :  * The URL encode will not encode the ~ character which is at times
    1597             :  * used for user references (~username/...).
    1598             :  *
    1599             :  * \warning
    1600             :  * The result of the function returns what looks like a relative path.
    1601             :  * This is useful since in many cases you need to remove the starting
    1602             :  * slash, so we avoid adding it in the first place. If there is no path,
    1603             :  * the function returns the empty string ("").
    1604             :  *
    1605             :  * \param[in] encoded  Should the resulting path be URL encoded already?
    1606             :  * By default the path is URL encoded as expected by the HTTP scheme.
    1607             :  *
    1608             :  * \return The full path of the URI.
    1609             :  */
    1610           1 : std::string uri::path(bool encoded) const
    1611             : {
    1612           1 :     if(encoded)
    1613             :     {
    1614           2 :         std::string output;
    1615           1 :         bool first(true);
    1616           4 :         for(auto const & segment : f_path)
    1617             :         {
    1618           3 :             if(first)
    1619             :             {
    1620           1 :                 first = false;
    1621             :             }
    1622             :             else
    1623             :             {
    1624           2 :                 output += '/';
    1625             :             }
    1626           3 :             output += urlencode(segment, "~");
    1627             :         }
    1628           1 :         return output;
    1629             :     }
    1630           0 :     return snapdev::join_strings(f_path, "/");
    1631             : }
    1632             : 
    1633             : 
    1634             : /** \brief Retrieve the number of folder names defined in the path.
    1635             :  *
    1636             :  * This function returns the number of folder names defined in the
    1637             :  * path. Each name can be retrieved with the path_folder() function.
    1638             :  *
    1639             :  * The function may return 0 if no folder name is available.
    1640             :  *
    1641             :  * \return The number of folder names available.
    1642             :  *
    1643             :  * \sa path_folder()
    1644             :  */
    1645           0 : int uri::path_count() const
    1646             : {
    1647           0 :     return f_path.size();
    1648             : }
    1649             : 
    1650             : 
    1651             : /** \brief Get a folder name from the path.
    1652             :  *
    1653             :  * This function is used to retrieve the name of a specific folder.
    1654             :  * This is useful when you make use of a folder name as a dynamic
    1655             :  * name. For example with a path such as "journal/george",
    1656             :  * path_folder_name(1); returns "george" which may be the name of
    1657             :  * the journal owner.
    1658             :  *
    1659             :  * When you use this function to retrieve dynamic entries, it is
    1660             :  * assumed that you do it after the path options were removed so a
    1661             :  * path such as "en/journal/george" would be changed to
    1662             :  * "journal/george" and path_folder_name(1); would still return
    1663             :  * "george".
    1664             :  *
    1665             :  * \exception edhttp_uri_exception_out_of_range
    1666             :  * This function raises this exception if the \p part parameter is
    1667             :  * outside the range of folder names available. \p part should be
    1668             :  * between 0 and path_count() - 1. If the path is empty, then this
    1669             :  * function cannot be called.
    1670             :  *
    1671             :  * \param[in] part  The index of the folder to retrieve.
    1672             :  *
    1673             :  * \return The folder name.
    1674             :  *
    1675             :  * \sa path_count();
    1676             :  */
    1677           0 : std::string uri::path_folder_name(int part) const
    1678             : {
    1679           0 :     if(static_cast<std::size_t>(part) >= f_path.size())
    1680             :     {
    1681             :         throw edhttp_uri_exception_out_of_range(
    1682             :               "no path section "
    1683           0 :             + std::to_string(part)
    1684           0 :             + " available (range 0 to "
    1685           0 :             + std::to_string(f_path.size())
    1686           0 :             + ")");
    1687             :     }
    1688           0 :     return f_path[part];
    1689             : }
    1690             : 
    1691             : 
    1692             : /** \brief The array of folder names.
    1693             :  *
    1694             :  * This function returns a reference to the array used to hold the
    1695             :  * folder names forming the URI path.
    1696             :  *
    1697             :  * \return A constant reference to the list of string forming the path.
    1698             :  */
    1699           0 : advgetopt::string_list_t const & uri::path_list() const
    1700             : {
    1701           0 :     return f_path;
    1702             : }
    1703             : 
    1704             : 
    1705             : /** \brief Set an option.
    1706             :  *
    1707             :  * This function is used to define the value of an option in a URI.
    1708             :  * Remember that options only work for URIs that are clearly marked
    1709             :  * as from this website.
    1710             :  *
    1711             :  * Setting the value to an empty string has the effect of deleting
    1712             :  * the given option. You may also call the unset_option() function.
    1713             :  *
    1714             :  * \param[in] name  The name of the option to set.
    1715             :  * \param[in] value  The new value for this option.
    1716             :  *
    1717             :  * \sa option();
    1718             :  * \sa unset_option();
    1719             :  */
    1720           0 : void uri::set_option(std::string const& name, std::string const& value)
    1721             : {
    1722           0 :     if(value.empty())
    1723             :     {
    1724           0 :         auto it(f_options.find(name));
    1725           0 :         if(it != f_options.end())
    1726             :         {
    1727           0 :             f_options.erase(it);
    1728             :         }
    1729             :     }
    1730             :     else
    1731             :     {
    1732           0 :         f_options[name] = value;
    1733             :     }
    1734           0 : }
    1735             : 
    1736             : /** \brief Remove the specified option.
    1737             :  *
    1738             :  * This function is used to remove (delete) an option from the list
    1739             :  * of options. For example, going to a page where the language is
    1740             :  * neutral, you probably want to remove the language option.
    1741             :  *
    1742             :  * \param[in] name  The name of the option to remove.
    1743             :  *
    1744             :  * \sa set_option();
    1745             :  */
    1746           0 : void uri::unset_option(std::string const & name)
    1747             : {
    1748           0 :     auto it(f_options.find(name));
    1749           0 :     if(it != f_options.end())
    1750             :     {
    1751           0 :         f_options.erase(it);
    1752             :     }
    1753           0 : }
    1754             : 
    1755             : 
    1756             : /** \brief Retrieve the value of the named option.
    1757             :  *
    1758             :  * This function retrieves the current value of the named option.
    1759             :  *
    1760             :  * If the option is not defined, then the function returns an empty
    1761             :  * string. The empty string always represents an undefined option.
    1762             :  *
    1763             :  * \param[in] name  The name of the option to retrieve.
    1764             :  *
    1765             :  * \return The value of the named option.
    1766             :  *
    1767             :  * \sa set_option();
    1768             :  */
    1769           0 : std::string uri::option(std::string const& name) const
    1770             : {
    1771           0 :     auto it(f_options.find(name));
    1772           0 :     if(it != f_options.end())
    1773             :     {
    1774           0 :         return it->second;
    1775             :     }
    1776           0 :     return std::string();
    1777             : }
    1778             : 
    1779             : 
    1780             : /** \brief Retrieve the number of currently defined options.
    1781             :  *
    1782             :  * This function returns the number of options that can be retrieved
    1783             :  * with the option() function using an index. If the function returns
    1784             :  * zero, then no options are defined.
    1785             :  *
    1786             :  * \return The number of options defined in this URI.
    1787             :  */
    1788           0 : int uri::option_count() const
    1789             : {
    1790           0 :     return f_options.size();
    1791             : }
    1792             : 
    1793             : 
    1794             : /** \brief Retrieve an option by index.
    1795             :  *
    1796             :  * This function allows you to retrieve the name and value of an option
    1797             :  * using its index. The index (\p part) must be a number between 0 and
    1798             :  * option_count() - 1.
    1799             :  *
    1800             :  * \param[in] part  The index of the option to retrieve.
    1801             :  * \param[out] name  The name of the option being retrieved.
    1802             :  *
    1803             :  * \return The value of the option being retrieved.
    1804             :  *
    1805             :  * \sa option();
    1806             :  * \sa option_count();
    1807             :  */
    1808           0 : std::string uri::option(int part, std::string & name) const
    1809             : {
    1810           0 :     if(static_cast<std::size_t>(part) >= f_options.size())
    1811             :     {
    1812             :         throw edhttp_uri_exception_out_of_range(
    1813             :               "no option "
    1814           0 :             + std::to_string(part)
    1815           0 :             + " available (range 0 to "
    1816           0 :             + std::to_string(f_options.size())
    1817           0 :             + ")");
    1818             :     }
    1819           0 :     auto it(f_options.begin());
    1820           0 :     std::advance(it, part);
    1821           0 :     name = it->first;
    1822           0 :     return it->second;
    1823             : }
    1824             : 
    1825             : 
    1826             : /** \brief Retrieve the map of options.
    1827             :  *
    1828             :  * This function returns the map of options so one can use the begin()
    1829             :  * and end() functions to go through the entire list without having to
    1830             :  * use the option() function.
    1831             :  *
    1832             :  * \return A constant reference to the map of options.
    1833             :  *
    1834             :  * \sa option();
    1835             :  */
    1836           0 : uri::uri_options_t const& uri::options_list() const
    1837             : {
    1838           0 :     return f_options;
    1839             : }
    1840             : 
    1841             : 
    1842             : /** \brief Set a query string option.
    1843             :  *
    1844             :  * This function is used to change the named query string with the
    1845             :  * specified value.
    1846             :  *
    1847             :  * A query string option with an empty string as a value is considered
    1848             :  * undefined and is not shown on the final URI. So setting an option to
    1849             :  * the empty string ("") is equivalent to unset_query_option().
    1850             :  *
    1851             :  * \param[in] name  The name of the query string option.
    1852             :  * \param[in] value  The value of the query string option.
    1853             :  */
    1854           0 : void uri::set_query_option(std::string const& name, std::string const& value)
    1855             : {
    1856           0 :     if(name.empty())
    1857             :     {
    1858             :         // this happens if the name was not defined in the configuration file
    1859           0 :         return;
    1860             :     }
    1861             : 
    1862             :     // TODO: see whether we currently use this feature, because it is rather
    1863             :     //       incorrect, it is possible to have an empty value in a query
    1864             :     //       string (i.e. "...?logout")
    1865             :     //
    1866             :     //       we should use unset_query_option() instead
    1867             :     //
    1868           0 :     if(value.empty())
    1869             :     {
    1870           0 :         auto it(f_query_strings.find(name));
    1871           0 :         if(it != f_query_strings.end())
    1872             :         {
    1873           0 :             f_query_strings.erase(it);
    1874             :         }
    1875             :     }
    1876             :     else
    1877             :     {
    1878           0 :         f_query_strings[name] = value;
    1879             :     }
    1880             : }
    1881             : 
    1882             : 
    1883             : /** \brief Unset the named query string option.
    1884             :  *
    1885             :  * This function ensures that the named query string option is deleted
    1886             :  * and thus will not appear in the URI.
    1887             :  *
    1888             :  * \param[in] name  The name of the option to delete.
    1889             :  */
    1890           0 : void uri::unset_query_option(std::string const& name)
    1891             : {
    1892           0 :     if(name.empty())
    1893             :     {
    1894             :         // this happens if the name was not defined in the configuration file
    1895           0 :         return;
    1896             :     }
    1897             : 
    1898           0 :     auto it(f_query_strings.find(name));
    1899           0 :     if(it != f_query_strings.end())
    1900             :     {
    1901           0 :         f_query_strings.erase(it);
    1902             :     }
    1903             : }
    1904             : 
    1905             : 
    1906             : /** \brief Set the query string.
    1907             :  *
    1908             :  * This function can be used to reset the query string to the
    1909             :  * parameters defined in this URI query string.
    1910             :  *
    1911             :  * The function does not clear all the existing query strings,
    1912             :  * it only replaces existing entries. This means also means that
    1913             :  * it does not detect whether the input includes the same option
    1914             :  * more than once and only the last one sticks.
    1915             :  *
    1916             :  * The query string variable names and data gets URL decoded.
    1917             :  *
    1918             :  * \warning
    1919             :  * This function does not clear the existing list of query
    1920             :  * string options.
    1921             :  *
    1922             :  * \param[in] uri_query_string  The query string to add to the existing data.
    1923             :  */
    1924           0 : void uri::set_query_string(std::string const & uri_query_string)
    1925             : {
    1926           0 :     advgetopt::string_list_t value_pairs;
    1927           0 :     advgetopt::split_string(uri_query_string, value_pairs, {"&"});
    1928           0 :     for(auto const & name_value : value_pairs)
    1929             :     {
    1930           0 :         std::string::size_type const pos(name_value.find('='));
    1931           0 :         if(pos == std::string::npos)
    1932             :         {
    1933             :             // no value
    1934           0 :             f_query_strings[urldecode(name_value)] = std::string();
    1935             :         }
    1936           0 :         else if(pos == 0)
    1937             :         {
    1938             :             // name is missing, use "*" instead
    1939           0 :             f_query_strings["*"] = urldecode(name_value.substr(1));
    1940             :         }
    1941             :         else
    1942             :         {
    1943           0 :             f_query_strings[urldecode(name_value.substr(0, pos))] = urldecode(name_value.substr(pos + 1));
    1944             :         }
    1945             :     }
    1946           0 : }
    1947             : 
    1948             : 
    1949             : /** \brief Clear all query option strings.
    1950             :  *
    1951             :  * This is useful if you want to "start fresh" with the base URI.
    1952             :  */
    1953           0 : void uri::clear_query_options()
    1954             : {
    1955           0 :     f_query_strings.clear();
    1956           0 : }
    1957             : 
    1958             : 
    1959             : /** \brief Generate the query string.
    1960             :  *
    1961             :  * This function goes through the list of defined query string options
    1962             :  * and builds the resulting query string to generate the final URI.
    1963             :  *
    1964             :  * The result is already URL ecoded since you would otherwise not know
    1965             :  * where/which equal and ampersand are legal.
    1966             :  *
    1967             :  * \return The URI query string.
    1968             :  */
    1969           0 : std::string uri::query_string() const
    1970             : {
    1971           0 :     std::string result;
    1972           0 :     for(auto const & name_value : f_query_strings)
    1973             :     {
    1974           0 :         if(!result.empty())
    1975             :         {
    1976           0 :             result += '&';
    1977             :         }
    1978           0 :         result += urlencode(name_value.first);
    1979           0 :         if(!name_value.second.empty())
    1980             :         {
    1981             :             // add the value only if not empty
    1982           0 :             result += '=';
    1983             :             // we now support commas in URIs because... well... it is
    1984             :             // common and it won't break anything
    1985             :             //
    1986           0 :             result += urlencode(name_value.second, ",");
    1987             :         }
    1988             :     }
    1989           0 :     return result;
    1990             : }
    1991             : 
    1992             : 
    1993             : /** \brief Retrieve whether a query option is defined.
    1994             :  *
    1995             :  * This function returns true if a query option is defined. Note that
    1996             :  * an option may be the empty string ("") and that cannot be distinguish
    1997             :  * from the empty string ("") returned when the query_option() function
    1998             :  * is used against an undefined option.
    1999             :  *
    2000             :  * \param[in] name  The name of the option to query.
    2001             :  *
    2002             :  * \return true when the has_query_option() is defined.
    2003             :  *
    2004             :  * \sa query_option();
    2005             :  */
    2006           0 : bool uri::has_query_option(std::string const & name) const
    2007             : {
    2008           0 :     if(name.empty())
    2009             :     {
    2010             :         // this happens if the name was not defined in the configuration file
    2011           0 :         return false;
    2012             :     }
    2013             : 
    2014           0 :     return f_query_strings.find(name) != f_query_strings.end();
    2015             : }
    2016             : 
    2017             : /** \brief Retrieve a query string option.
    2018             :  *
    2019             :  * This function can be used to retrieve the current value of a query
    2020             :  * string option.
    2021             :  *
    2022             :  * Note that you cannot know whether an option is defined using this
    2023             :  * function since the function returns an empty string whether it is
    2024             :  * empty or undefined. Instead, use the has_query_option() function
    2025             :  * to determine whether an option is defined.
    2026             :  *
    2027             :  * \param[in] name  Name of the query string option to return.
    2028             :  *
    2029             :  * \sa has_query_option();
    2030             :  */
    2031           0 : std::string uri::query_option(std::string const & name) const
    2032             : {
    2033           0 :     if(!name.empty())
    2034             :     {
    2035           0 :         auto const it(f_query_strings.find(name));
    2036           0 :         if(it != f_query_strings.end())
    2037             :         {
    2038           0 :             return it->second;
    2039             :         }
    2040             :     }
    2041             : 
    2042           0 :     return std::string();
    2043             : }
    2044             : 
    2045             : /** \brief Return the number of options are defined in the query string.
    2046             :  *
    2047             :  * This function returns the number of options currently defined in the
    2048             :  * query string. This is useful to go over the list of options with the
    2049             :  * query_option(int part, QString& name) function.
    2050             :  *
    2051             :  * \return The number of query string options currently defined.
    2052             :  */
    2053           0 : int uri::query_option_count() const
    2054             : {
    2055           0 :     return f_query_strings.size();
    2056             : }
    2057             : 
    2058             : /** \brief Retrieve an option specifying its index.
    2059             :  *
    2060             :  * This function returns the name and value of the option defined at
    2061             :  * index \p part.
    2062             :  *
    2063             :  * The index must be between 0 and the number of options available minus
    2064             :  * 1 (i.e. query_options_count() - 1).
    2065             :  *
    2066             :  * \param[in] part  The index of the query string option to retrieve.
    2067             :  * \param[out] name  The name of the option at that index.
    2068             :  *
    2069             :  * \return The value of the option at that index.
    2070             :  *
    2071             :  * \sa query_option_count();
    2072             :  */
    2073           0 : std::string uri::query_option(int part, std::string& name) const
    2074             : {
    2075           0 :     if(static_cast<std::size_t>(part) >= f_query_strings.size())
    2076             :     {
    2077             :         throw edhttp_uri_exception_out_of_range(
    2078             :               "query-option "
    2079           0 :             + std::to_string(part)
    2080           0 :             + " does not exist (range 0 to "
    2081           0 :             + std::to_string(f_query_strings.size())
    2082           0 :             + ")");
    2083             :     }
    2084           0 :     auto it(f_query_strings.begin());
    2085           0 :     std::advance(it, part);
    2086           0 :     name = it->first;
    2087           0 :     return it->second;
    2088             : }
    2089             : 
    2090             : /** \brief Return the complete map of query strings.
    2091             :  *
    2092             :  * This function returns a reference to the internal map of query strings.
    2093             :  * This is useful to use the begin()/end() and other functions to go through
    2094             :  * the map.
    2095             :  *
    2096             :  * \return A constant reference to the internal query string map.
    2097             :  */
    2098           0 : const uri::uri_options_t& uri::query_string_list() const
    2099             : {
    2100           0 :     return f_query_strings;
    2101             : }
    2102             : 
    2103             : 
    2104             : /** \brief Define the anchor for this URI.
    2105             :  *
    2106             :  * This function is used to setup the anchor used in this URI.
    2107             :  *
    2108             :  * An anchor can be defined only if you don't plan to make use of
    2109             :  * the hash bang feature (see get_uri() for more info) since both
    2110             :  * features make use of the same technical option.
    2111             :  *
    2112             :  * The \p anchor parameter cannot include a '#' character.
    2113             :  *
    2114             :  * \note
    2115             :  * The anchor string can start with a bang (!) since it is legal
    2116             :  * in an anchor. If you are not using the hash bang feature, it
    2117             :  * is fine, although it may confuse some search engines.
    2118             :  *
    2119             :  * \param[in] uri_anchor  The new value for the anchor.
    2120             :  *
    2121             :  * \sa get_uri()
    2122             :  */
    2123           0 : void uri::set_anchor(std::string const & uri_anchor)
    2124             : {
    2125           0 :     if(uri_anchor.find('#') != std::string::npos)
    2126             :     {
    2127             :         throw uri_exception_invalid_parameter(
    2128             :               "anchor string \""
    2129           0 :             + uri_anchor
    2130           0 :             + "\" cannot include a '#' character");
    2131             :     }
    2132           0 :     f_anchor = uri_anchor;
    2133           0 : }
    2134             : 
    2135             : 
    2136             : /** \brief Retrieve the current anchor.
    2137             :  *
    2138             :  * This function returns a copy of the current anchor. The empty string
    2139             :  * represents the fact that the anchor is not defined.
    2140             :  *
    2141             :  * \return A constant reference to the anchor.
    2142             :  */
    2143           0 : std::string const & uri::anchor() const
    2144             : {
    2145           0 :     return f_anchor;
    2146             : }
    2147             : 
    2148             : 
    2149             : /** \brief Compare two URIs against each other.
    2150             :  *
    2151             :  * This function compares two URIs and returns true if they are
    2152             :  * equal. The URIs are tested using what the get_uri() function
    2153             :  * generates which means not 100% of the information included
    2154             :  * in the Snap URI object.
    2155             :  *
    2156             :  * \param[in] rhs  The right handside to compare this against.
    2157             :  *
    2158             :  * \return true when both URIs are equal.
    2159             :  */
    2160           0 : bool uri::operator == (const uri& rhs) const
    2161             : {
    2162           0 :     return get_uri() == rhs.get_uri();
    2163             : }
    2164             : 
    2165             : 
    2166             : /** \brief Compare two URIs against each other.
    2167             :  *
    2168             :  * This function compares two URIs and returns true if they are
    2169             :  * not equal. The URIs are tested using what the get_uri() function
    2170             :  * generates which means not 100% of the information included
    2171             :  * in the Snap URI object.
    2172             :  *
    2173             :  * \param[in] rhs  The right handside to compare this against.
    2174             :  *
    2175             :  * \return true when both URIs differ.
    2176             :  */
    2177           0 : bool uri::operator != (uri const & rhs) const
    2178             : {
    2179           0 :     return !operator == (rhs);
    2180             : }
    2181             : 
    2182             : 
    2183             : /** \brief Compare two URIs against each other.
    2184             :  *
    2185             :  * This function compares two URIs and returns true if this is
    2186             :  * smaller than the \p rhs parameter. The URIs are tested using
    2187             :  * what the get_uri() function generates which means not 100% of
    2188             :  * the information included in the Snap URI object.
    2189             :  *
    2190             :  * \param[in] rhs  The right handside to compare this against.
    2191             :  *
    2192             :  * \return true when this is smaller than rhs.
    2193             :  */
    2194           0 : bool uri::operator < (uri const & rhs) const
    2195             : {
    2196           0 :     return get_uri() < rhs.get_uri();
    2197             : }
    2198             : 
    2199             : 
    2200             : /** \brief Compare two URIs against each other.
    2201             :  *
    2202             :  * This function compares two URIs and returns true if this is
    2203             :  * smaller or equal to \p rhs. The URIs are tested using
    2204             :  * what the get_uri() function generates which means not 100% of
    2205             :  * the information included in the Snap URI object.
    2206             :  *
    2207             :  * \param[in] rhs  The right handside to compare this against.
    2208             :  *
    2209             :  * \return true when this is smaller or equal to rhs.
    2210             :  */
    2211           0 : bool uri::operator <= (uri const & rhs) const
    2212             : {
    2213           0 :     return get_uri() <= rhs.get_uri();
    2214             : }
    2215             : 
    2216             : 
    2217             : /** \brief Compare two URIs against each other.
    2218             :  *
    2219             :  * This function compares two URIs and returns true if this is
    2220             :  * larger than the \p rhs parameter. The URIs are tested using
    2221             :  * what the get_uri() function generates which means not 100% of
    2222             :  * the information included in the Snap URI object.
    2223             :  *
    2224             :  * \param[in] rhs  The right handside to compare this against.
    2225             :  *
    2226             :  * \return true when this is larger than rhs.
    2227             :  */
    2228           0 : bool uri::operator > (uri const & rhs) const
    2229             : {
    2230           0 :     return !operator <= (rhs);
    2231             : }
    2232             : 
    2233             : 
    2234             : /** \brief Compare two URIs against each other.
    2235             :  *
    2236             :  * This function compares two URIs and returns true if this is
    2237             :  * larger or equal to \p rhs. The URIs are tested using
    2238             :  * what the get_uri() function generates which means not 100% of
    2239             :  * the information included in the Snap URI object.
    2240             :  *
    2241             :  * \param[in] rhs  The right handside to compare this against.
    2242             :  *
    2243             :  * \return true when this is larger or equal to rhs.
    2244             :  */
    2245           0 : bool uri::operator >= (uri const & rhs) const
    2246             : {
    2247           0 :     return !operator < (rhs);
    2248             : }
    2249             : 
    2250             : 
    2251             : /** \brief Encode a URI so it is valid for HTTP.
    2252             :  *
    2253             :  * This function encodes all the characters that need to be encoded
    2254             :  * for a URI to be valid for the HTTP scheme.
    2255             :  *
    2256             :  * WARNING: This encodes the entire string. Remember that the string
    2257             :  * cannot include characters such as :, /, @, ?, =, &, #, ~ which at
    2258             :  * times appear in fully qualified URIs. Instead, it must be built
    2259             :  * piece by piece.
    2260             :  *
    2261             :  * Note that we do not encode underscores.
    2262             :  *
    2263             :  * The \p accepted parameter can be used to avoid converting certain
    2264             :  * characters (such as / in an anchor and ~ in a path).
    2265             :  *
    2266             :  * \param[in] in  URI to encode.
    2267             :  * \param[in] accepted  Extra characters accepted and not encoded. This
    2268             :  * parameter cannot be set to nullptr. Use "" instead if no extra characters
    2269             :  * are accepted.
    2270             :  *
    2271             :  * \return The encoded URI, it may be equal to the input.
    2272             :  */
    2273           3 : std::string uri::urlencode(std::string const & in, char const * accepted)
    2274             : {
    2275           3 :     std::string encoded;
    2276             : 
    2277          11 :     for(const char *u(in.data()); *u != '\0'; ++u)
    2278             :     {
    2279           8 :         if((*u >= 'A' && *u <= 'Z')
    2280           8 :         || (*u >= 'a' && *u <= 'z')
    2281           0 :         || (*u >= '0' && *u <= '9')
    2282           0 :         || *u == '.' || *u == '-' || *u == '_'
    2283           0 :         || strchr(accepted, *u) != nullptr)
    2284             :         {
    2285           8 :             encoded += *u;
    2286             :         }
    2287             :         else
    2288             :         {
    2289             :             // note that we are encoding space as %20 and not +
    2290             :             // because the + should not be supported anymore
    2291           0 :             encoded += '%';
    2292           0 :             encoded += snapdev::int_to_hex(*u, true, 2);
    2293             :         }
    2294             :     }
    2295             : 
    2296           3 :     return encoded;
    2297             : }
    2298             : 
    2299             : 
    2300             : /** \brief Decode a URI so it can be used internally.
    2301             :  *
    2302             :  * This function decodes all the characters that need to be decoded
    2303             :  * in a URI. In general, this is done to use URI components in a
    2304             :  * query string, although it needs to be applied to the entire URI.
    2305             :  *
    2306             :  * The input is expected to be a valid ASCII string (i.e. A-Z,
    2307             :  * 0-9, ., %, _, -, ~, and ! characters.) To enter UTF-8 characters,
    2308             :  * use the % and UTF-8 encoded characters. At this point we do not
    2309             :  * support the U+ syntax which MS Internet Explorer supports. It may
    2310             :  * be necessary to add that support at some point.
    2311             :  *
    2312             :  * \exception uri_exception_invalid_uri
    2313             :  * This exception is raised if an invalid character is found in the
    2314             :  * input URI. This means the URI includes a character that should
    2315             :  * have been encoded or a %XX is not a valid hexadecimal number.
    2316             :  *
    2317             :  * \param[in] in  The URI to encode.
    2318             :  * \param[in] relax  Relax the syntax and accept otherwise invalid codes.
    2319             :  *
    2320             :  * \return The decoded URI, it may be equal to the input.
    2321             :  */
    2322         230 : std::string uri::urldecode(std::string const & in, bool relax)
    2323             : {
    2324             :     // Note that if the URI is properly encoded, then latin1 == UTF-8
    2325             : 
    2326         230 :     std::string out;
    2327        1032 :     for(char const * u(in.c_str()); *u != '\0'; ++u)
    2328             :     {
    2329         802 :         if(*u == '+')
    2330             :         {
    2331           0 :             out += ' ';
    2332             :         }
    2333         802 :         else if(*u == '%')
    2334             :         {
    2335           0 :             ++u;
    2336             :             char c;
    2337           0 :             if(u[0] >= '0' && u[0] <= '9')
    2338             :             {
    2339           0 :                 c = static_cast<char>((u[0] - '0') * 16);
    2340             :             }
    2341           0 :             else if(u[0] >= 'A' && u[0] <= 'F')
    2342             :             {
    2343           0 :                 c = static_cast<char>((u[0] - ('A' - 10)) * 16);
    2344             :             }
    2345           0 :             else if(u[0] >= 'a' && u[0] <= 'f')
    2346             :             {
    2347           0 :                 c = static_cast<char>((u[0] - ('a' - 10)) * 16);
    2348             :             }
    2349             :             else
    2350             :             {
    2351           0 :                 if(!relax)
    2352             :                 {
    2353             : //#ifdef DEBUG
    2354             : //SNAP_LOG_TRACE() << "url decode?! [" << uri << "]";
    2355             : //#endif
    2356             :                     throw uri_exception_invalid_uri(
    2357             :                           "urldecode(\""
    2358           0 :                         + in
    2359           0 :                         + "\", "
    2360           0 :                         + (relax ? "true" : "false")
    2361           0 :                         + ") failed because of an invalid %xx character (digits are "
    2362           0 :                         + std::to_string(u[0])
    2363           0 :                         + " / "
    2364           0 :                         + std::to_string(u[1])
    2365           0 :                         + ")");
    2366             :                 }
    2367             :                 // use the % as is
    2368           0 :                 out += '%';
    2369           0 :                 --u;
    2370           0 :                 continue;
    2371             :             }
    2372           0 :             if(u[1] >= '0' && u[1] <= '9')
    2373             :             {
    2374           0 :                 c = static_cast<char>(c + u[1] - '0');
    2375             :             }
    2376           0 :             else if(u[1] >= 'A' && u[1] <= 'F')
    2377             :             {
    2378           0 :                 c = static_cast<char>(c + u[1] - ('A' - 10));
    2379             :             }
    2380           0 :             else if(u[1] >= 'a' && u[1] <= 'f')
    2381             :             {
    2382           0 :                 c = static_cast<char>(c + u[1] - ('a' - 10));
    2383             :             }
    2384             :             else
    2385             :             {
    2386           0 :                 if(!relax)
    2387             :                 {
    2388             : //#ifdef DEBUG
    2389             : //SNAP_LOG_TRACE() << "url decode?! [" << in << "] (2)";
    2390             : //#endif
    2391             :                     throw uri_exception_invalid_uri(
    2392             :                           "urldecode(\""
    2393           0 :                          + in
    2394           0 :                          + "\", "
    2395           0 :                          + (relax ? "true" : "false")
    2396           0 :                          + ") failed because of an invalid %xx character (digits are "
    2397           0 :                          + std::to_string(static_cast<int>(u[0]))
    2398           0 :                          + " / "
    2399           0 :                          + std::to_string(static_cast<int>(u[1]))
    2400           0 :                          + ")");
    2401             :                 }
    2402             :                 // use the % as is
    2403           0 :                 out += c;
    2404           0 :                 --u;
    2405           0 :                 continue;
    2406             :             }
    2407             :             // skip one of the two characters here, the other
    2408             :             // is skipped in the for() statement
    2409           0 :             ++u;
    2410           0 :             out += c;
    2411             :         }
    2412         802 :         else if(relax
    2413             : 
    2414             :                 // these are the only characters allowed by the RFC
    2415         802 :                 || (*u >= 'A' && *u <= 'Z')
    2416         802 :                 || (*u >= 'a' && *u <= 'z')
    2417          49 :                 || (*u >= '0' && *u <= '9')
    2418          49 :                 || *u == '.' || *u == '-'
    2419           0 :                 || *u == '/' || *u == '_'
    2420             : 
    2421             :                 // not legal in a URI considered 100% valid but most
    2422             :                 // systems accept the following as is so we do too
    2423           0 :                 || *u == '~' || *u == '!'
    2424           0 :                 || *u == '@' || *u == ','
    2425           0 :                 || *u == ';' || *u == ':'
    2426           0 :                 || *u == '(' || *u == ')'
    2427             :         )
    2428             :         {
    2429             :             // The tilde (~), when used, is often to indicate a user a la
    2430             :             // Unix (~<name>/... or just ~/... for the current user.)
    2431             :             //
    2432             :             // The exclamation point (!) is most often used with the hash
    2433             :             // bang; if that appears in a query string variable, then we
    2434             :             // need to accept at least the exclamation point (the hash has
    2435             :             // to be encoded no matter what.)
    2436             :             //
    2437             :             // The at sign (@) is used in email addresses.
    2438             :             //
    2439             :             // The comma (,) is often used to separate elements; for example
    2440             :             // the paging support uses "page=p3,s30" for show page 3 with
    2441             :             // 30 elements per page.
    2442             :             //
    2443             :             // The semi-colon (;) may appear if you have an HTML entity in
    2444             :             // a query string (i.e. "...?value=this+%26amp;+that".)
    2445             :             //
    2446             :             // The colon (:) can be used to separate values within a
    2447             :             // parameter when the comma is not appropriate.
    2448             :             //
    2449         802 :             out += *u;
    2450             :         }
    2451             :         else
    2452             :         {
    2453             : //#ifdef DEBUG
    2454             : //SNAP_LOG_TRACE() << "url decode?! found an invalid character [" << in << "] (3)";
    2455             : //#endif
    2456             :             throw uri_exception_invalid_uri(
    2457             :                     "urldecode(\""
    2458           0 :                   + in
    2459           0 :                   + "\", "
    2460           0 :                   + (relax ? "true" : "false")
    2461           0 :                   + ") failed because of an invalid character ("
    2462           0 :                   + std::to_string(static_cast<int>(*u))
    2463           0 :                   + ")");
    2464             :         }
    2465             :     }
    2466             : 
    2467         230 :     return out;
    2468             : }
    2469             : 
    2470             : 
    2471             : /** \brief Return the port corresponding to a scheme.
    2472             :  *
    2473             :  * This function determines what port corresponds to a given scheme
    2474             :  * assuming that the default is being used.
    2475             :  *
    2476             :  * It will handle common schemes internally, others make use of the
    2477             :  * /etc/services file via the services function calls.
    2478             :  *
    2479             :  * \param[in] scheme  The scheme to convert to a port number.
    2480             :  *
    2481             :  * \return The corresponding port number or -1 if the function cannot
    2482             :  *         determine that number.
    2483             :  */
    2484          52 : int uri::scheme_to_port(std::string const & scheme)
    2485             : {
    2486          52 :     if(scheme == g_name_edhttp_scheme_http) // 99% so put it first
    2487             :     {
    2488          22 :         return 80;
    2489             :     }
    2490          30 :     if(scheme == g_name_edhttp_scheme_https) // 0.9% so put it next
    2491             :     {
    2492           9 :         return 443;
    2493             :     }
    2494          21 :     if(scheme == g_name_edhttp_scheme_ftp)
    2495             :     {
    2496           0 :         return 21;
    2497             :     }
    2498          21 :     if(scheme == g_name_edhttp_scheme_ssh)
    2499             :     {
    2500           0 :         return 22;
    2501             :     }
    2502          21 :     if(scheme == g_name_edhttp_scheme_telnet)
    2503             :     {
    2504           0 :         return 23;
    2505             :     }
    2506          21 :     if(scheme == g_name_edhttp_scheme_smtp)
    2507             :     {
    2508           0 :         return 25;
    2509             :     }
    2510          21 :     if(scheme == g_name_edhttp_scheme_gopher)
    2511             :     {
    2512           9 :         return 70;
    2513             :     }
    2514             : 
    2515             :     // not a common service, ask the system... (probably less than 0.01%)
    2516          12 :     servent * s(getservbyname(scheme.c_str(), g_name_edhttp_scheme_tcp));
    2517          12 :     if(s == nullptr)
    2518             :     {
    2519          12 :         s = getservbyname(scheme.c_str(), g_name_edhttp_scheme_udp);
    2520          12 :         if(s == nullptr)
    2521             :         {
    2522             :             // we don't know...
    2523          12 :             return -1;
    2524             :         }
    2525             :     }
    2526           0 :     return s->s_port;
    2527             : }
    2528             : 
    2529             : 
    2530             : 
    2531           6 : } // namespace edhttp
    2532             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13