LCOV - code coverage report
Current view: top level - edhttp - weighted_http_string.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 125 125 100.0 %
Date: 2022-07-09 10:44:38 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Copyright (c) 2011-2022  Made to Order Software Corp.  All Rights Reserved
       2             : //
       3             : // https://snapwebsites.org/project/edhttp
       4             : // contact@m2osw.com
       5             : //
       6             : // This program is free software: you can redistribute it and/or modify
       7             : // it under the terms of the GNU General Public License as published by
       8             : // the Free Software Foundation, either version 3 of the License, or
       9             : // (at your option) any later version.
      10             : //
      11             : // This program is distributed in the hope that it will be useful,
      12             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             : // GNU General Public License for more details.
      15             : //
      16             : // You should have received a copy of the GNU General Public License
      17             : // along with this program.  If not, see <https://www.gnu.org/licenses/>.
      18             : 
      19             : // self
      20             : //
      21             : #include    "edhttp/weighted_http_string.h"
      22             : 
      23             : 
      24             : 
      25             : // advgetopt
      26             : //
      27             : #include    <advgetopt/validator_double.h>
      28             : 
      29             : 
      30             : // snaplogger
      31             : //
      32             : #include    <snaplogger/message.h>
      33             : 
      34             : 
      35             : // snapdev
      36             : //
      37             : #include    <snapdev/trim_string.h>
      38             : 
      39             : 
      40             : // last include
      41             : //
      42             : #include    <snapdev/poison.h>
      43             : 
      44             : 
      45             : 
      46             : namespace edhttp
      47             : {
      48             : 
      49             : 
      50             : 
      51             : /** \brief Create a new weighted HTTP string object.
      52             :  *
      53             :  * The constructor is most often passed a language string to be parsed
      54             :  * immediately. The string can be empty, though.
      55             :  *
      56             :  * This function calls the parse() function on the input string.
      57             :  *
      58             :  * \param[in] str  The list of weighted HTTP strings.
      59             :  */
      60          34 : weighted_http_string::weighted_http_string(std::string const & str)
      61             : {
      62          34 :     parse(str);
      63          34 : }
      64             : 
      65             : 
      66             : /** \brief Parse a weighted HTTP string.
      67             :  *
      68             :  * This function parses an "extended weighted HTTP string".
      69             :  *
      70             :  * By extended we means that we support more than just weights
      71             :  * so as to support lists of parameters like in the Cache-Control
      72             :  * field. The extensions are two folds:
      73             :  *
      74             :  * \li The first name can be a parameter with a value (a=b)
      75             :  * \li The value of a parameter can be a string of characters
      76             :  *
      77             :  * As a result, the supported string format is as follow:
      78             :  *
      79             :  * \code
      80             :  *      start: params
      81             :  *      params: options
      82             :  *            | params ',' options
      83             :  *      options: opt
      84             :  *             | options ';' opt
      85             :  *      opt: opt_name
      86             :  *         | opt_name '=' opt_value
      87             :  *      opt_name: CHAR - [,;=]
      88             :  *      opt_value: token
      89             :  *               | quoted_string
      90             :  *      token: CHAR - [,;]
      91             :  *      quoted_string: '"' CHAR '"'
      92             :  *                   | "'" CHAR "'"
      93             :  * \endcode
      94             :  *
      95             :  * From [RFC-9110](https://www.rfc-editor.org/rfc/rfc9110.html)
      96             :  * and [RFC-4647](https://www.rfc-editor.org/rfc/rfc4647.html):
      97             :  *
      98             :  * \code
      99             :  *     Accept-Language  = [ ( language-range [ weight ] ) *( OWS "," OWS (
     100             :  *                           language-range [ weight ] ) ) ]
     101             :  *     language-range   = (1*8ALPHA *("-" 1*8alphanum)) / "*"
     102             :  *     alphanum         = ALPHA / DIGIT
     103             :  *     weight           = OWS ";" OWS "q=" qvalue
     104             :  *     qvalue           = ( "0" [ "." 0*3DIGIT ] )
     105             :  *                      / ( "1" [ "." 0*3("0") ] )
     106             :  *     OWS              = *( SP / HTAB )
     107             :  * \endcode
     108             :  *
     109             :  * For example, the following defines a few language strings
     110             :  * with their weights ("levels"):
     111             :  *
     112             :  * \code
     113             :  *      fr;q=0.8,en;q=0.5,de;q=0.1
     114             :  * \endcode
     115             :  *
     116             :  * This ends up being parsed as:
     117             :  *
     118             :  * \li fr, level 0.8
     119             :  * \li en, level 0.5
     120             :  * \li de, level 0.1
     121             :  *
     122             :  * Note that the input can be in any order. The vector is returned in the
     123             :  * order it was read (first is most important if no levels were specified).
     124             :  *
     125             :  * If you want to sort by level, make sure to retrieve the vector with
     126             :  * get_parts() and then sort it with sort_by_level().
     127             :  *
     128             :  * Remember that by default a string_part object uses the DEFAULT_LEVEL which
     129             :  * is 1.0. In other words, objects with no `q=...` parameter will likely
     130             :  * become first in the list.
     131             :  *
     132             :  * \code
     133             :  *      edhttp::weighted_http_string language_country(locales);
     134             :  *      language_country.sort_by_level();
     135             :  * \endcode
     136             :  *
     137             :  * The "stable" is very important because if two strings have the same
     138             :  * level, then they have to stay in the order they were in the input
     139             :  * string.
     140             :  *
     141             :  * See reference:
     142             :  * https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4
     143             :  *
     144             :  * \note
     145             :  * The function may return false if errors were detected. However, it
     146             :  * will keep whatever strings were loaded so far.
     147             :  *
     148             :  * \todo
     149             :  * We may want to ameliorate the implementation to really limit all
     150             :  * the characters to what is clearly supported in HTTP/1.1 (Which
     151             :  * is the same in HTTP/2.) On the other hand, being "flexible" is not
     152             :  * always a bad thing as long as the use of data coming from a client
     153             :  * is properly checked for possibly tainted parameters (things that
     154             :  * could be doggy and as such need to be ignored.)
     155             :  *
     156             :  * \param[in] str  A weight HTTP string to parse.
     157             :  * \param[in] reset  Reset the existing weighted HTTP strings if true.
     158             :  *
     159             :  * \return true if no error were detected, false otherwise.
     160             :  */
     161          48 : bool weighted_http_string::parse(std::string const & str, bool reset)
     162             : {
     163          48 :     f_error_messages.clear();
     164             : 
     165          48 :     int pos(0);
     166          48 :     if(f_str.empty() || reset)
     167             :     {
     168          45 :         f_parts.clear();
     169          45 :         f_str = str;
     170             :     }
     171             :     else
     172             :     {
     173           3 :         f_str += ',';
     174           3 :         pos = f_str.length();
     175           3 :         f_str += str;
     176             :     }
     177             : 
     178          48 :     char const * s(f_str.c_str() + pos);
     179             :     for(;;)
     180             :     {
     181         349 :         while(std::isspace(*s) || *s == ',')
     182             :         {
     183         108 :             ++s;
     184             :         }
     185         133 :         if(*s == '\0')
     186             :         {
     187             :             // reached the end of the string, we got a clean input
     188             :             //
     189          45 :             break;
     190             :         }
     191             : 
     192             :         // the part name is defined as:
     193             :         //
     194             :         //          language-tag  = primary-tag *( "-" subtag )
     195             :         //          primary-tag   = 1*8ALPHA
     196             :         //          subtag        = 1*8alphanum
     197             :         //          alphanum      = ALPHA / DIGIT
     198             :         //
     199             :         // so the maximum size is 8 + 1 + 8 = 17 (1 to 8 characters,
     200             :         // the dash, 1 to 8 characters) and the smallest is 1.
     201             :         //
     202             :         // note that we may use this parser for other things than just
     203             :         // languages, so make sure that it matches all the categories
     204             :         //
     205             :         // TODO: we want to check that `name` validity (i.e. 8ALPHA)
     206             :         //
     207          88 :         char const * v(s);
     208         482 :         while(*s != '\0' && *s != ',' && *s != ';' && *s != '=' && *s != ' ' && *s != '\t' && *s != '-')
     209             :         {
     210         197 :             ++s;
     211             :         }
     212          88 :         if(s == v || s - v > 8)
     213             :         {
     214             :             // something is invalid, name is not defined (this can
     215             :             // happen if you just put a ';') or is too large
     216             :             //
     217             :             // XXX: should we signal the error in some way?
     218             :             //
     219           1 :             f_error_messages += "part name is empty or too long (limit is '8-8' characters).\n";
     220           1 :             break;
     221             :         }
     222          87 :         if(*s == '-')
     223             :         {
     224          12 :             ++s;
     225          12 :             char const * w(s);
     226          90 :             while(*s != '\0' && *s != ',' && *s != ';' && *s != '=' && *s != ' ' && *s != '\t' && *s != '-')
     227             :             {
     228          39 :                 ++s;
     229             :             }
     230          12 :             if(*s == '-')
     231             :             {
     232           1 :                 f_error_messages += "part name cannot include more than one '-'.\n";
     233           1 :                 break;
     234             :             }
     235          11 :             if(s == w || s - w > 8)
     236             :             {
     237             :                 // something is invalid, name is not defined (this can
     238             :                 // happen if you just put a ';') or is too large
     239             :                 //
     240             :                 // XXX: should we signal the error in some way?
     241             :                 //
     242           1 :                 f_error_messages += "part sub-name is empty or too long (limit is '8-8' characters).\n";
     243           1 :                 break;
     244             :             }
     245             :         }
     246         170 :         std::string name(snapdev::trim_string(std::string(v, s - v), true, true, true));
     247             : 
     248         170 :         string_part part(name);
     249             : 
     250             :         // we allow spaces after the name and before the ';', '=', and ','
     251             :         //
     252         105 :         while(*s == ' ' || *s == '\t')
     253             :         {
     254          10 :             ++s;
     255             :         }
     256             : 
     257             :         // check whether that parameter has a value
     258             :         //
     259          85 :         if(*s == '=')
     260             :         {
     261           6 :             ++s;
     262             : 
     263             :             // allow spaces after an equal sign
     264             :             //
     265          18 :             while(*s == ' ' || *s == '\t')
     266             :             {
     267           6 :                 ++s;
     268             :             }
     269             : 
     270             :             // values can be quoted
     271             :             //
     272           6 :             if(*s == '"' || *s == '\'')
     273             :             {
     274           3 :                 auto const quote(*s);
     275           3 :                 ++s;
     276           3 :                 v = s;
     277          47 :                 while(*s != '\0' && *s != quote)
     278             :                 {
     279             :                     // accept any character within the quotes
     280             :                     // no backslash supported
     281             :                     //
     282          22 :                     ++s;
     283             :                 }
     284           3 :                 part.set_value(std::string(v, s - v));
     285           3 :                 if(*s == quote)
     286             :                 {
     287           3 :                     ++s;
     288             :                 }
     289             : 
     290             :                 // allow spaces after the closing quote
     291             :                 //
     292          11 :                 while(*s == ' ' || *s == '\t')
     293             :                 {
     294           4 :                     ++s;
     295           3 :                 }
     296             :             }
     297             :             else
     298             :             {
     299           3 :                 v = s;
     300          35 :                 while(*s != '\0' && *s != ';' && *s != ',')
     301             :                 {
     302          16 :                     ++s;
     303             :                 }
     304           3 :                 part.set_value(snapdev::trim_string(std::string(v, s - v), true, true, true));
     305             :             }
     306             :         }
     307             : 
     308             :         // XXX: should we check whether another part with the same
     309             :         //      name already exists in the resulting vector?
     310             : 
     311             :         // read all the parameters, although we only keep
     312             :         // the 'q' parameter at this time
     313             :         //
     314         139 :         while(*s == ';')
     315             :         {
     316             :             // skip spaces and extra ';'
     317             :             //
     318          46 :             do
     319             :             {
     320         100 :                 ++s;
     321             :             }
     322         100 :             while(*s == ';' || *s == ' ' || *s == '\t');
     323             : 
     324             :             // read parameter name
     325             :             //
     326          54 :             v = s;
     327         210 :             while(*s != '\0' && *s != ',' && *s != ';' && *s != '=')
     328             :             {
     329          78 :                 ++s;
     330             :             }
     331         108 :             std::string const param_name(snapdev::trim_string(std::string(v, s - v), true, true));
     332             : 
     333             :             // TODO: we want to check that `param_name` validity (i.e. `token`)
     334             :             //       all the following separators are not considered legal
     335             :             //       and also controls (< 0x20) and most certainly characters
     336             :             //       over 0x7E
     337             :             //
     338             :             //        separators     = "(" | ")" | "<" | ">" | "@"
     339             :             //                       | "," | ";" | ":" | "\" | <">
     340             :             //                       | "/" | "[" | "]" | "?" | "="
     341             :             //                       | "{" | "}" | SP | HT
     342             :             // See:
     343             :             // https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
     344             :             //
     345          54 :             if(!param_name.empty())
     346             :             {
     347         108 :                 std::string param_value;
     348          54 :                 if(*s == '=')
     349             :                 {
     350          54 :                     ++s;
     351          70 :                     while(*s == ' ' || *s == '\t')
     352             :                     {
     353           8 :                         ++s;
     354             :                     }
     355          54 :                     if(*s == '\'' || *s == '"')
     356             :                     {
     357          10 :                         char const quote(*s);
     358          10 :                         ++s;
     359          10 :                         v = s;
     360         196 :                         while(*s != '\0' && *s != quote)
     361             :                         {
     362          93 :                             ++s;
     363             :                         }
     364          10 :                         param_value = snapdev::trim_string(std::string(v, s - v));
     365          10 :                         if(*s == quote)
     366             :                         {
     367          10 :                             ++s;
     368             :                         }
     369             : 
     370             :                         // allow spaces after the closing quote
     371             :                         //
     372          18 :                         while(*s == ' ' || *s == '\t')
     373             :                         {
     374           4 :                             ++s;
     375          10 :                         }
     376             :                     }
     377             :                     else
     378             :                     {
     379          44 :                         v = s;
     380         300 :                         while(*s != '\0' && *s != ',' && *s != ';')
     381             :                         {
     382         128 :                             ++s;
     383             :                         }
     384          44 :                         param_value = snapdev::trim_string(std::string(v, s - v), true, true, true);
     385             :                     }
     386             :                 }
     387          54 :                 part.add_parameter(param_name, param_value);
     388             : 
     389             :                 // handle parameters we understand
     390             :                 //
     391          54 :                 if(param_name == "q")
     392             :                 {
     393          42 :                     double level;
     394          42 :                     if(!advgetopt::validator_double::convert_string(param_value, level))
     395             :                     {
     396             :                         // the "quality" (q=...) parameter is not a valid
     397             :                         // floating point value
     398             :                         //
     399           1 :                         f_error_messages += "the quality value (q=...) is not a valid floating point.\n";
     400             :                     }
     401          41 :                     else if(level >= 0.0)
     402             :                     {
     403          40 :                         part.set_level(level);
     404             :                     }
     405             :                     else
     406             :                     {
     407             :                         // The "quality" (q=...) parameter cannot be
     408             :                         // a negative number
     409             :                         //
     410           1 :                         f_error_messages += "the quality value (q=...) cannot be a negative number.\n";
     411             :                     }
     412             :                 }
     413             :                 // TODO add support for other parameters, "charset" is one of
     414             :                 //      them in the Accept header which we want to support
     415             :             }
     416          54 :             if(*s != '\0' && *s != ';' && *s != ',')
     417             :             {
     418           1 :                 f_error_messages += "found a spurious character in a weighted string.\n";
     419             : 
     420             :                 // ignore that entry...
     421             :                 //
     422           1 :                 ++s;
     423           5 :                 while(*s != '\0' && *s != ',' && *s != ';')
     424             :                 {
     425           2 :                     ++s;
     426             :                 }
     427             :             }
     428             :         }
     429             : 
     430          85 :         f_parts.push_back(part);
     431             : 
     432          85 :         if(*s != ',' && *s != '\0')
     433             :         {
     434           1 :             f_error_messages += "part not ended by a comma or end of string.\n";
     435             :         }
     436          85 :     }
     437             : 
     438          48 :     if(!f_error_messages.empty())
     439             :     {
     440             :         // in case the caller "forgets" to print errors...
     441             :         //
     442           7 :         SNAP_LOG_ERROR
     443             :             << "parsing of \""
     444             :             << str
     445             :             << "\" generated errors:\n"
     446             :             << f_error_messages
     447             :             << SNAP_LOG_SEND;
     448             :     }
     449             : 
     450          48 :     return f_error_messages.empty();
     451             : }
     452             : 
     453             : 
     454             : /** \brief Retrieve the level of the named parameter.
     455             :  *
     456             :  * This function searches for a part named \p name. If found, then its
     457             :  * level gets returned.
     458             :  *
     459             :  * A part with an unspecified level will have a level of DEFAULT_LEVEL
     460             :  * (which is 1.0f).
     461             :  *
     462             :  * If \p name is not found in the list of parts, this function returns
     463             :  * UNDEFINED_LEVEL (which is -1.0f).
     464             :  *
     465             :  * \param[in] name  The name of the part for which the level is requested.
     466             :  *
     467             :  * \return The part level or UNDEFINED_LEVEL.
     468             :  */
     469          59 : string_part::level_t weighted_http_string::get_level(std::string const & name)
     470             : {
     471          59 :     const int max_parts(f_parts.size());
     472         129 :     for(int i(0); i < max_parts; ++i)
     473             :     {
     474         102 :         if(f_parts[i].get_name() == name)
     475             :         {
     476          32 :             return f_parts[i].get_level();
     477             :         }
     478             :     }
     479          27 :     return string_part::UNDEFINED_LEVEL();
     480             : }
     481             : 
     482             : 
     483             : /** \brief Use the weight (q=... values) to sort these HTTP strings.
     484             :  *
     485             :  * This function runs a stable sort against the weighted strings. This
     486             :  * is not called by default because some lists of strings are to
     487             :  * be kept sorted the way they are sent to us by the client.
     488             :  *
     489             :  * The function can be called multiple times, although, unless you
     490             :  * modify parts, there should be no need to do it more than once.
     491             :  */
     492           6 : void weighted_http_string::sort_by_level()
     493             : {
     494           6 :     std::stable_sort(f_parts.begin(), f_parts.end());
     495           6 : }
     496             : 
     497             : 
     498             : /** \brief Convert all the parts to a full weighted HTTP string.
     499             :  *
     500             :  * This function converts all the parts of a weighted HTTP string
     501             :  * object to one string. The string representing each part is
     502             :  * generated using the string_part::to_string() function.
     503             :  *
     504             :  * \return The string representing this weighted HTTP string.
     505             :  */
     506          16 : std::string weighted_http_string::to_string() const
     507             : {
     508          16 :     std::string result;
     509          16 :     int const max_parts(f_parts.size());
     510          51 :     for(int i(0); i < max_parts; ++i)
     511             :     {
     512          35 :         if(!result.empty())
     513             :         {
     514          19 :             result += ", ";
     515             :         }
     516          35 :         result += f_parts[i].to_string();
     517             :     }
     518          16 :     return result;
     519             : }
     520             : 
     521             : 
     522             : 
     523             : } // namespace edhttp
     524             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13