LCOV - code coverage report
Current view: top level - edhttp - weighted_http_string.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 72 114 63.2 %
Date: 2022-03-15 17:12:29 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Copyright (c) 2011-2022  Made to Order Software Corp.  All Rights Reserved
       2             : //
       3             : // https://snapwebsites.org/project/edhttp
       4             : // contact@m2osw.com
       5             : //
       6             : // This program is free software: you can redistribute it and/or modify
       7             : // it under the terms of the GNU General Public License as published by
       8             : // the Free Software Foundation, either version 3 of the License, or
       9             : // (at your option) any later version.
      10             : //
      11             : // This program is distributed in the hope that it will be useful,
      12             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             : // GNU General Public License for more details.
      15             : //
      16             : // You should have received a copy of the GNU General Public License
      17             : // along with this program.  If not, see <https://www.gnu.org/licenses/>.
      18             : 
      19             : 
      20             : // self
      21             : //
      22             : #include    "edhttp/weighted_http_string.h"
      23             : 
      24             : 
      25             : 
      26             : // advgetopt
      27             : //
      28             : #include    <advgetopt/validator_double.h>
      29             : 
      30             : 
      31             : // snaplogger
      32             : //
      33             : #include    <snaplogger/message.h>
      34             : 
      35             : 
      36             : // snapdev
      37             : //
      38             : #include    <snapdev/trim_string.h>
      39             : 
      40             : 
      41             : // last include
      42             : //
      43             : #include    <snapdev/poison.h>
      44             : 
      45             : 
      46             : 
      47             : namespace edhttp
      48             : {
      49             : 
      50             : 
      51             : 
      52             : /** \brief Create a new weighted HTTP string object.
      53             :  *
      54             :  * The constructor is most often passed a language string to be parsed
      55             :  * immediately. The string can be empty, though.
      56             :  *
      57             :  * This function calls the parse() function on the input string.
      58             :  *
      59             :  * \param[in] str  The list of weighted HTTP strings.
      60             :  */
      61          24 : weighted_http_string::weighted_http_string(std::string const & str)
      62             :     //: f_str() -- auto-init
      63             :     //, f_parts() -- auto-init
      64             : {
      65          24 :     parse(str);
      66          24 : }
      67             : 
      68             : 
      69             : /** \brief Parse a weighted HTTP string.
      70             :  *
      71             :  * This function parses an "extended weighted HTTP string".
      72             :  *
      73             :  * By extended we means that we support more than just weights
      74             :  * so as to support lists of parameters like in the Cache-Control
      75             :  * field. The extensions are two folds:
      76             :  *
      77             :  * \li The first name can be a parameter with a value (a=b)
      78             :  * \li The value of a parameter can be a string of characters
      79             :  *
      80             :  * As a result, the supported string format is as follow:
      81             :  *
      82             :  * \code
      83             :  *      start: params
      84             :  *      params: options
      85             :  *            | params ',' options
      86             :  *      options: opt
      87             :  *             | options ';' opt
      88             :  *      opt: opt_name
      89             :  *         | opt_name '=' opt_value
      90             :  *      opt_name: CHAR - [,;=]
      91             :  *      opt_value: token
      92             :  *               | quoted_string
      93             :  *      token: CHAR - [,;]
      94             :  *      quoted_string: '"' CHAR '"'
      95             :  *                   | "'" CHAR "'"
      96             :  * \endcode
      97             :  *
      98             :  * For example, the following defines a few language strings
      99             :  * with their weights ("levels"):
     100             :  *
     101             :  * \code
     102             :  *      fr;q=0.8,en;q=0.5,de;q=0.1
     103             :  * \endcode
     104             :  *
     105             :  * This ends up being parsed as:
     106             :  *
     107             :  * \li fr, level 0.8
     108             :  * \li en, level 0.5
     109             :  * \li de, level 0.1
     110             :  *
     111             :  * Note that the input can be in any order. The vector is returned in the
     112             :  * order it was read (first is most important if no levels were specified).
     113             :  *
     114             :  * If you want to sort by level, make sure to retrieve the vector with
     115             :  * get_parts() and then sort it with sort_by_level().
     116             :  *
     117             :  * Remember that by default a string_part object uses the DEFAULT_LEVEL which
     118             :  * is 1.0. In other words, objects with no `q=...` parameter will likely
     119             :  * become first in the list.
     120             :  *
     121             :  * \code
     122             :  *      edhttp::weighted_http_string language_country(locales);
     123             :  *      language_country.sort_by_level();
     124             :  * \endcode
     125             :  *
     126             :  * The "stable" is very important because if two strings have the same
     127             :  * level, then they have to stay in the order they were in the input
     128             :  * string.
     129             :  *
     130             :  * See reference:
     131             :  * https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4
     132             :  *
     133             :  * \note
     134             :  * The function may return false if errors were detected. However, it
     135             :  * will keep whatever strings were loaded so far.
     136             :  *
     137             :  * \todo
     138             :  * We may want to ameliorate the implementation to really limit all
     139             :  * the characters to what is clearly supported in HTTP/1.1 (Which
     140             :  * is the same in HTTP/2.) On the other hand, being "flexible" is not
     141             :  * always a bad thing as long as the use of data coming from a client
     142             :  * is properly checked for possibly tainted parameters (things that
     143             :  * could be doggy and as such need to be ignored.)
     144             :  *
     145             :  * \param[in] str  A weight HTTP string to parse.
     146             :  * \param[in] reset  Reset the existing weighted HTTP strings if true.
     147             :  *
     148             :  * \return true if no error were detected, false otherwise.
     149             :  */
     150          28 : bool weighted_http_string::parse(std::string const & str, bool reset)
     151             : {
     152          28 :     f_error_messages.clear();
     153             : 
     154          28 :     int pos(0);
     155          28 :     if(f_str.empty() || reset)
     156             :     {
     157          25 :         f_parts.clear();
     158          25 :         f_str = str;
     159             :     }
     160             :     else
     161             :     {
     162           3 :         f_str += ',';
     163           3 :         pos = f_str.length();
     164           3 :         f_str += str;
     165             :     }
     166             : 
     167          28 :     char const * s(f_str.c_str() + pos);
     168             :     for(;;)
     169             :     {
     170         284 :         while(std::isspace(*s) || *s == ',')
     171             :         {
     172          93 :             ++s;
     173             :         }
     174          98 :         if(*s == '\0')
     175             :         {
     176             :             // reached the end of the string, we got a clean input
     177             :             //
     178          28 :             break;
     179             :         }
     180          70 :         char const * v(s);
     181         386 :         while(*s != '\0' && *s != ',' && *s != ';' && *s != '=' && *s != ' ' && *s != '\t')
     182             :         {
     183         158 :             ++s;
     184             :         }
     185             : 
     186             :         // Note: we check the length of the resulting name, the
     187             :         //       RFC 2616 definition is:
     188             :         //
     189             :         //          language-tag  = primary-tag *( "-" subtag )
     190             :         //          primary-tag   = 1*8ALPHA
     191             :         //          subtag        = 1*8ALPHA
     192             :         //
     193             :         //       so the maximum size is 8 + 1 + 8 = 17 (1 to 8 characters,
     194             :         //       the dash, 1 to 8 characters) and the smallest is 1.
     195             :         //
     196         140 :         std::string name(snapdev::trim_string(std::string(v, s - v), true, true, true));
     197          70 :         if(name.empty() || name.length() > 17)
     198             :         {
     199             :             // something is invalid, name is not defined (this can
     200             :             // happen if you just put a ';') or is too large
     201             :             //
     202             :             // XXX: should we signal the error in some way?
     203             :             //
     204           0 :             f_error_messages += "part name is empty or too long (limit is 17 characters.)\n";
     205           0 :             break;
     206             :         }
     207             :         // TODO: we want to check that `name` validity (i.e. 8ALPHA)
     208             :         //
     209         140 :         string_part part(name);
     210             : 
     211             :         // we allow spaces after the name and before the ';', '=', and ','
     212             :         //
     213          70 :         while(*s == ' ' || *s == '\t')
     214             :         {
     215           0 :             ++s;
     216             :         }
     217             : 
     218             :         // check whether that parameter has a value
     219             :         //
     220          70 :         if(*s == '=')
     221             :         {
     222           0 :             ++s;
     223             : 
     224             :             // allow spaces after an equal sign
     225             :             //
     226           0 :             while(*s == ' ' || *s == '\t')
     227             :             {
     228           0 :                 ++s;
     229             :             }
     230             : 
     231             :             // values can be quoted
     232             :             //
     233           0 :             if(*s == '"' || *s == '\'')
     234             :             {
     235           0 :                 auto const quote(*s);
     236           0 :                 ++s;
     237           0 :                 v = s;
     238           0 :                 while(*s != '\0' && *s != quote)
     239             :                 {
     240             :                     // accept any character within the quotes
     241             :                     // no backslash supported
     242             :                     //
     243           0 :                     ++s;
     244             :                 }
     245           0 :                 part.set_value(std::string(v, s - v));
     246           0 :                 if(*s == quote)
     247             :                 {
     248           0 :                     ++s;
     249             :                 }
     250             : 
     251             :                 // allow spaces after the closing quote
     252             :                 //
     253           0 :                 while(*s == ' ' || *s == '\t')
     254             :                 {
     255           0 :                     ++s;
     256           0 :                 }
     257             :             }
     258             :             else
     259             :             {
     260           0 :                 v = s;
     261           0 :                 while(*s != '\0' && *s != ';' && *s != ',')
     262             :                 {
     263           0 :                     ++s;
     264             :                 }
     265           0 :                 part.set_value(snapdev::trim_string(std::string(v, s - v), true, true, true));
     266             :             }
     267             :         }
     268             : 
     269             :         // XXX: should we check whether another part with the same
     270             :         //      name already exists in the resulting vector?
     271             : 
     272             :         // read all the parameters, although we only keep
     273             :         // the 'q' parameter at this time
     274             :         //
     275         106 :         while(*s == ';')
     276             :         {
     277             :             // skip spaces and extra ';'
     278             :             //
     279          36 :             do
     280             :             {
     281          72 :                 ++s;
     282             :             }
     283          72 :             while(*s == ';' || *s == ' ' || *s == '\t');
     284             : 
     285             :             // read parameter name
     286             :             //
     287          36 :             v = s;
     288         108 :             while(*s != '\0' && *s != ',' && *s != ';' && *s != '=')
     289             :             {
     290          36 :                 ++s;
     291             :             }
     292          72 :             std::string const param_name(snapdev::trim_string(std::string(v, s - v), true, true));
     293             : 
     294             :             // TODO: we want to check that `param_name` validity (i.e. `token`)
     295             :             //       all the following separators are not considered legal
     296             :             //       and also controls (< 0x20) and most certainly characters
     297             :             //       over 0x7E
     298             :             //
     299             :             //        separators     = "(" | ")" | "<" | ">" | "@"
     300             :             //                       | "," | ";" | ":" | "\" | <">
     301             :             //                       | "/" | "[" | "]" | "?" | "="
     302             :             //                       | "{" | "}" | SP | HT
     303             :             // See:
     304             :             // https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
     305             :             //
     306          36 :             if(!param_name.empty())
     307             :             {
     308          72 :                 std::string param_value;
     309          36 :                 if(*s == '=')
     310             :                 {
     311          36 :                     ++s;
     312          36 :                     while(*s == ' ' || *s == '\t')
     313             :                     {
     314           0 :                         ++s;
     315             :                     }
     316          36 :                     if(*s == '\'' || *s == '"')
     317             :                     {
     318           0 :                         char const quote(*s);
     319           0 :                         ++s;
     320           0 :                         v = s;
     321           0 :                         while(*s != '\0' && *s != quote)
     322             :                         {
     323           0 :                             ++s;
     324             :                         }
     325           0 :                         param_value = snapdev::trim_string(std::string(v, s - v));
     326           0 :                         if(*s == quote)
     327             :                         {
     328           0 :                             ++s;
     329             :                         }
     330             : 
     331             :                         // allow spaces after the closing quote
     332             :                         //
     333           0 :                         while(*s == ' ' || *s == '\t')
     334             :                         {
     335           0 :                             ++s;
     336           0 :                         }
     337             :                     }
     338             :                     else
     339             :                     {
     340          36 :                         v = s;
     341         240 :                         while(*s != '\0' && *s != ',' && *s != ';')
     342             :                         {
     343         102 :                             ++s;
     344             :                         }
     345          36 :                         param_value = snapdev::trim_string(std::string(v, s - v), true, true, true);
     346             :                     }
     347             :                 }
     348          36 :                 part.add_parameter(param_name, param_value);
     349             : 
     350             :                 // handle parameters we understand
     351             :                 //
     352          36 :                 if(param_name == "q")
     353             :                 {
     354          36 :                     double level;
     355          36 :                     if(!advgetopt::validator_double::convert_string(param_value, level))
     356             :                     {
     357             :                         // the "quality" (q=...) parameter is not a valid
     358             :                         // floating point value
     359             :                         //
     360           0 :                         f_error_messages += "the quality value (q=...) is not a valid floating point.\n";
     361             :                     }
     362          36 :                     else if(level >= 0.0)
     363             :                     {
     364          36 :                         part.set_level(level);
     365             :                     }
     366             :                     else
     367             :                     {
     368             :                         // The "quality" (q=...) parameter cannot be
     369             :                         // a negative number
     370             :                         //
     371           0 :                         f_error_messages += "the quality value (q=...) cannot be a negative number.\n";
     372             :                     }
     373             :                 }
     374             :                 // TODO add support for other parameters, "charset" is one of
     375             :                 //      them in the Accept header which we want to support
     376             :             }
     377          36 :             if(*s != '\0' && *s != ';' && *s != ',')
     378             :             {
     379           0 :                 f_error_messages += "found a spurious character in a weighted string.\n";
     380             : 
     381             :                 // ignore that entry...
     382             :                 //
     383           0 :                 ++s;
     384           0 :                 while(*s != '\0' && *s != ',' && *s != ';')
     385             :                 {
     386           0 :                     ++s;
     387             :                 }
     388             :             }
     389             :         }
     390             : 
     391          70 :         f_parts.push_back(part);
     392             : 
     393          70 :         if(*s != ',' && *s != '\0')
     394             :         {
     395           0 :             f_error_messages += "part not ended by a comma or end of string.\n";
     396             :         }
     397          70 :     }
     398             : 
     399          28 :     if(!f_error_messages.empty())
     400             :     {
     401             :         // in case the caller "forgets" to print errors...
     402             :         //
     403           0 :         SNAP_LOG_ERROR
     404             :             << "parsing of \""
     405             :             << str
     406             :             << "\" generated errors:\n"
     407             :             << f_error_messages
     408             :             << SNAP_LOG_SEND;
     409             :     }
     410             : 
     411          28 :     return f_error_messages.empty();
     412             : }
     413             : 
     414             : 
     415             : /** \brief Retrieve the level of the named parameter.
     416             :  *
     417             :  * This function searches for a part named \p name. If found, then its
     418             :  * level gets returned.
     419             :  *
     420             :  * A part with an unspecified level will have a level of DEFAULT_LEVEL
     421             :  * (which is 1.0f).
     422             :  *
     423             :  * If \p name is not found in the list of parts, this function returns
     424             :  * UNDEFINED_LEVEL (which is -1.0f).
     425             :  *
     426             :  * \param[in] name  The name of the part for which the level is requested.
     427             :  *
     428             :  * \return The part level or UNDEFINED_LEVEL.
     429             :  */
     430          43 : string_part::level_t weighted_http_string::get_level(std::string const & name)
     431             : {
     432          43 :     const int max_parts(f_parts.size());
     433         101 :     for(int i(0); i < max_parts; ++i)
     434             :     {
     435          86 :         if(f_parts[i].get_name() == name)
     436             :         {
     437          28 :             return f_parts[i].get_level();
     438             :         }
     439             :     }
     440          15 :     return string_part::UNDEFINED_LEVEL();
     441             : }
     442             : 
     443             : 
     444             : /** \brief Use the weight (q=... values) to sort these HTTP strings.
     445             :  *
     446             :  * This function runs a stable sort against the weighted strings. This
     447             :  * is not called by default because some lists of strings are to
     448             :  * be kept sorted the way they are sent to us by the client.
     449             :  *
     450             :  * The function can be called multiple times, although, unless you
     451             :  * modify parts, there should be no need to do it more than once.
     452             :  */
     453           6 : void weighted_http_string::sort_by_level()
     454             : {
     455           6 :     std::stable_sort(f_parts.begin(), f_parts.end());
     456           6 : }
     457             : 
     458             : 
     459             : /** \brief Convert all the parts to a full weighted HTTP string.
     460             :  *
     461             :  * This function converts all the parts of a weighted HTTP string
     462             :  * object to one string. The string representing each part is
     463             :  * generated using the string_part::to_string() function.
     464             :  *
     465             :  * \return The string representing this weighted HTTP string.
     466             :  */
     467          12 : std::string weighted_http_string::to_string() const
     468             : {
     469          12 :     std::string result;
     470          12 :     int const max_parts(f_parts.size());
     471          43 :     for(int i(0); i < max_parts; ++i)
     472             :     {
     473          31 :         if(!result.empty())
     474             :         {
     475          19 :             result += ", ";
     476             :         }
     477          31 :         result += f_parts[i].to_string();
     478             :     }
     479          12 :     return result;
     480             : }
     481             : 
     482             : 
     483             : 
     484             : } // namespace edhttp
     485             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13