|           Line data    Source code 
       1             : /* TLD library -- TLD, domain name, and sub-domain extraction
       2             :  * Copyright (c) 2011-2021  Made to Order Software Corp.  All Rights Reserved
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the
       6             :  * "Software"), to deal in the Software without restriction, including
       7             :  * without limitation the rights to use, copy, modify, merge, publish,
       8             :  * distribute, sublicense, and/or sell copies of the Software, and to
       9             :  * permit persons to whom the Software is furnished to do so, subject to
      10             :  * the following conditions:
      11             :  *
      12             :  * The above copyright notice and this permission notice shall be included
      13             :  * in all copies or substantial portions of the Software.
      14             :  *
      15             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      16             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      17             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      18             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      19             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      20             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      21             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      22             :  */
      23             : 
      24             : /** \file
      25             :  * \brief Declaration of the C++ tld_object class.
      26             :  *
      27             :  * This source file is the declaration of all the functions of the C++
      28             :  * tld_object class.
      29             :  */
      30             : 
      31             : #include "libtld/tld.h"
      32             : #include <stdio.h>
      33             : 
      34             : /** \class tld_object
      35             :  * \brief Class used to ease the use o the tld() function in C++.
      36             :  *
      37             :  * The tld_object class allows you to query the tld library and then check
      38             :  * each part of the URI with simple calls instead of you having to determine
      39             :  * the location of each part.
      40             :  */
      41             : 
      42             : /** \brief Initialize a tld object with the specified domain.
      43             :  *
      44             :  * This function initializes a TLD object with the specified \p domain
      45             :  * name. This function accepts a null terminated C string pointer.
      46             :  * The pointer can be set to NULL or point to an empty string in which
      47             :  * case the constructor creates an empty TLD object. Note that an
      48             :  * empty TLD object is considered invalid and if called some
      49             :  * functions throw the invalid_domain exception.
      50             :  *
      51             :  * \note
      52             :  * The string is expected to be UTF-8.
      53             :  *
      54             :  * \param[in] domain_name  The domain to parse by this object.
      55             :  */
      56           5 : tld_object::tld_object(char const * domain_name)
      57             : {
      58           5 :     set_domain(domain_name);
      59           5 : }
      60             : 
      61             : /** \brief Initialize a tld object with the specified domain.
      62             :  *
      63             :  * This function initializes a TLD object with the specified \p domain
      64             :  * name. This function accepts standard C++ strings. The string can be
      65             :  * empty to create an empty TLD object. Note that an empty TLD object
      66             :  * is considered invalid and if called some functions throw the
      67             :  * invalid_domain exception.
      68             :  *
      69             :  * \note
      70             :  * The string is expected to be UTF-8.
      71             :  *
      72             :  * \param[in] domain_name  The domain to parse by this object.
      73             :  */
      74           2 : tld_object::tld_object(std::string const & domain_name)
      75             : {
      76           2 :     set_domain(domain_name);
      77           2 : }
      78             : 
      79             : /** \brief Change the domain of a tld object with the newly specified domain.
      80             :  *
      81             :  * This function initializes this TLD object with the specified \p domain
      82             :  * name. This function accepts a null terminated C string pointer.
      83             :  * The pointer can be set to NULL or point to an empty string in which
      84             :  * case the constructor creates an empty TLD object. Note that an
      85             :  * empty TLD object is considered invalid and if called some
      86             :  * functions throw the invalid_domain exception.
      87             :  *
      88             :  * \note
      89             :  * The string is expected to be UTF-8.
      90             :  *
      91             :  * \param[in] domain_name  The domain to parse by this object.
      92             :  */
      93           5 : void tld_object::set_domain(char const * domain_name)
      94             : {
      95           5 :     set_domain(std::string(domain_name == nullptr ? "" : domain_name));
      96           5 : }
      97             : 
      98             : /** \brief Change the domain of a tld object with the newly specified domain.
      99             :  *
     100             :  * This function initializes a TLD object with the specified \p domain
     101             :  * name. This function accepts standard C++ strings. The string can be
     102             :  * empty to create an empty TLD object. Note that an empty TLD object
     103             :  * is considered invalid and if called some functions throw the
     104             :  * invalid_domain exception.
     105             :  *
     106             :  * \note
     107             :  * The string is expected to be UTF-8.
     108             :  *
     109             :  * \param[in] domain_name  The domain to parse by this object.
     110             :  */
     111           7 : void tld_object::set_domain(std::string const & domain_name)
     112             : {
     113             :     // tld() supports empty strings and initializes f_info appropriately
     114           7 :     f_domain = domain_name;
     115           7 :     f_result = tld(f_domain.c_str(), &f_info);
     116             :     // TBD -- should we clear f_domain on an invalid result?
     117           7 : }
     118             : 
     119             : /** \brief Check the result of the tld() command.
     120             :  *
     121             :  * This function returns the result that the tld() command produced
     122             :  * when called with the domain as specified in a constructor or
     123             :  * the set_domain() functions.
     124             :  *
     125             :  * Valid resutls are:
     126             :  *
     127             :  * \li TLD_RESULT_SUCCESS -- the URI is valid and all the tld_object functions can be called
     128             :  * \li TLD_RESULT_INVALID -- the TLD of this URI exists but the combination used is not acceptable
     129             :  * \li TLD_RESULT_NULL -- the domain name is the empty string or NULL
     130             :  * \li TLD_RESULT_NO_TLD -- the domain name does not even include one period
     131             :  * \li TLD_RESULT_BAD_URI -- URI parsing failed (i.e. two periods one after another)
     132             :  * \li TLD_RESULT_NOT_FOUND -- this domain TLD doesn't exist
     133             :  *
     134             :  * \return The last result of the tld() function.
     135             :  */
     136           7 : tld_result tld_object::result() const
     137             : {
     138           7 :     return f_result;
     139             : }
     140             : 
     141             : /** \brief Retrieve the current status of the TLD.
     142             :  *
     143             :  * This function returns the status that the last tld() call generated. status() along with
     144             :  * result() are used to determine whether a call to the TLD succeeded or not. See the
     145             :  * is_valid() function too.
     146             :  *
     147             :  * This function can be used to know why a domain name failed when parsed by the tld() function.
     148             :  *
     149             :  * \li TLD_STATUS_VALID -- This URI is valid and can be queried further.
     150             :  * \li TLD_STATUS_PROPOSED -- This TLD was proposed but is not yet in used.
     151             :  * \li TLD_STATUS_DEPRECATED -- This TLD was used and was deprecated.
     152             :  * \li TLD_STATUS_UNUSED -- This TLD is simply not used.
     153             :  * \li TLD_STATUS_RESERVED -- This TLD is currently reserved.
     154             :  * \li TLD_STATUS_INFRASTRUCTURE -- This TLD represents an infrastructure object (.arpa)
     155             :  * \li TLD_STATUS_UNDEFINED -- The status is undefined if the TLD cannot be found.
     156             :  *
     157             :  * \return The status generated by the last tld() function call.
     158             :  */
     159           7 : tld_status tld_object::status() const
     160             : {
     161           7 :     return f_info.f_status;
     162             : }
     163             : 
     164             : /** \brief Check whether this TLD object is valid.
     165             :  *
     166             :  * This function checks the result and status returned by the last call to
     167             :  * the tld() function. This object is considered valid if and only if the
     168             :  * result is TLD_RESULT_SUCCESS and the status is TLD_STATUS_VALID. At this
     169             :  * point, any other result returns invalid and that prevents you from checking
     170             :  * the object further (i.e. call the tld_only() function to retrieve the TLD
     171             :  * of the specified URI.)
     172             :  *
     173             :  * \return true if the result and status say this TLD object is valid.
     174             :  */
     175          39 : bool tld_object::is_valid() const
     176             : {
     177          39 :     return f_result == TLD_RESULT_SUCCESS && f_info.f_status == TLD_STATUS_VALID;
     178             : }
     179             : 
     180             : /** \brief Retrieve the domain name of this TLD object.
     181             :  *
     182             :  * The TLD object keeps a copy of the domain name as specified with the
     183             :  * constructor. This copy can be retrieved by this function. This is an
     184             :  * exact copy of the input (i.e. no canonicalization.)
     185             :  *
     186             :  * \return The domain as specified to the constructor or the set_domain() functions.
     187             :  */
     188           7 : std::string tld_object::domain() const
     189             : {
     190           7 :     return f_domain;
     191             : }
     192             : 
     193             : /** \brief Retrieve the sub-domains of the URI.
     194             :  *
     195             :  * This function returns the sub-domains found in the URI. This may be
     196             :  * the empty string.
     197             :  *
     198             :  * \exception invalid_domain
     199             :  * This exception is raised when this function is called with an invalid
     200             :  * TLD object. This happens whenever you create the object or call
     201             :  * set_domain() with an invalid URI. You should call is_valid() and if
     202             :  * false, avoid calling this function.
     203             :  *
     204             :  * \return All the sub-domains found in the URI.
     205             :  */
     206           7 : std::string tld_object::sub_domains() const
     207             : {
     208           7 :     if(!is_valid())
     209             :     {
     210           3 :         throw invalid_domain();
     211             :     }
     212           4 :     char const * domain_name(f_info.f_tld);
     213           4 :     char const * start(f_domain.c_str());
     214          34 :     for(; domain_name > start && domain_name[-1] != '.'; --domain_name);
     215           4 :     if(domain_name == start)
     216             :     {
     217           1 :         return std::string();
     218             :     }
     219             :     // no not return the period
     220           3 :     return std::string(start, domain_name - start - 1);
     221             : }
     222             : 
     223             : /** \brief Full domain name: domain and TLD.
     224             :  *
     225             :  * This function returns the domain name and the TLD as a string.
     226             :  *
     227             :  * The result includes the domain name but no sub-domains.
     228             :  *
     229             :  * To get the domain name with the sub-domains, call the domain()
     230             :  * function instead. That function returns the domain as passed to
     231             :  * this object (set_domain() or constructor).
     232             :  *
     233             :  * \exception invalid_domain
     234             :  * This exception is raised when this function is called with an invalid
     235             :  * TLD object. This happens whenever you create the object or call
     236             :  * set_domain() with an invalid URI. You should call is_valid() and if
     237             :  * false, avoid calling this function.
     238             :  *
     239             :  * \return The fully qualified domain name.
     240             :  */
     241           7 : std::string tld_object::full_domain() const
     242             : {
     243           7 :     if(!is_valid())
     244             :     {
     245           3 :         throw invalid_domain();
     246             :     }
     247           4 :     char const * domain_name(f_info.f_tld);
     248           4 :     for(char const * start(f_domain.c_str()); domain_name > start && domain_name[-1] != '.'; --domain_name);
     249           4 :     return domain_name;
     250             : }
     251             : 
     252             : /** \brief Retrieve the domain name only.
     253             :  *
     254             :  * This function returns the domain name without the TLD nor any sub-domains.
     255             :  *
     256             :  * A domain name never includes any period.
     257             :  *
     258             :  * \exception invalid_domain
     259             :  * This exception is raised when this function is called with an invalid
     260             :  * TLD object. This happens whenever you create the object or call
     261             :  * set_domain() with an invalid URI. You should call is_valid() and if
     262             :  * false, avoid calling this function.
     263             :  *
     264             :  * \return The domain name without TLD or sub-domains.
     265             :  */
     266           7 : std::string tld_object::domain_only() const
     267             : {
     268           7 :     if(!is_valid())
     269             :     {
     270           3 :         throw invalid_domain();
     271             :     }
     272           4 :     char const * end(f_info.f_tld);
     273           4 :     char const * domain_name(end);
     274           4 :     for(char const * start(f_domain.c_str()); domain_name > start && domain_name[-1] != '.'; --domain_name);
     275           4 :     return std::string(domain_name, end - domain_name);
     276             : }
     277             : 
     278             : /** \brief Return the TLD of the URI.
     279             :  *
     280             :  * This function returns the TLD part of the URI specified in the constructor
     281             :  * or the set_domain() function.
     282             :  *
     283             :  * The TLD is the part that represents a country, a region, a general TLD, etc.
     284             :  * Generic TLDs have one period (.com, .info,) but in general you must expect TLDs with
     285             :  * several period characters (.ca.us, .indiana.museum, .yawatahama.ehime.jp).
     286             :  *
     287             :  * \exception invalid_domain
     288             :  * This exception is raised when this function is called with an invalid
     289             :  * TLD object. This happens whenever you create the object or call
     290             :  * set_domain() with an invalid URI. You should call is_valid() and if
     291             :  * false, avoid calling this function.
     292             :  *
     293             :  * \return the TLD part of the URI specified in this TLD object.
     294             :  */
     295           7 : std::string tld_object::tld_only() const
     296             : {
     297           7 :     if(!is_valid())
     298             :     {
     299           3 :         throw invalid_domain();
     300             :     }
     301           4 :     return f_info.f_tld;
     302             : }
     303             : 
     304             : /** \brief Retrieve the category of this URI.
     305             :  *
     306             :  * This function is used to retrieve the category of the URI. The category is
     307             :  * just informative and has no special bearing on the TLD, domain, and sub-domain
     308             :  * parts.
     309             :  *
     310             :  * The existing categories are:
     311             :  *
     312             :  * \li TLD_CATEGORY_INTERNATIONAL -- TLD names that can be used by anyone in the world
     313             :  * \li TLD_CATEGORY_PROFESSIONALS -- TLD names reserved to professionals
     314             :  * \li TLD_CATEGORY_LANGUAGE -- language based TLD
     315             :  * \li TLD_CATEGORY_GROUPS -- group based TLD
     316             :  * \li TLD_CATEGORY_REGION -- TLD representing a region (usually within a country)
     317             :  * \li TLD_CATEGORY_TECHNICAL -- technical TLD names used to make it all work
     318             :  * \li TLD_CATEGORY_COUNTRY -- country based TLD
     319             :  * \li TLD_CATEGORY_ENTREPRENEURIAL -- TLD spawned of other official TLD names
     320             :  * \li TLD_CATEGORY_UNDEFINED -- this value means the TLD was not defined
     321             :  *
     322             :  * \return The category of the current URI or TLD_CATEGORY_UNDEFINED.
     323             :  */
     324           7 : tld_category tld_object::category() const
     325             : {
     326           7 :     return f_info.f_category;
     327             : }
     328             : 
     329             : /** \brief The name of the country linked to that TLD.
     330             :  *
     331             :  * This TLD represents a country and this is its name.
     332             :  *
     333             :  * If the TLD does not represent a country then this function returns an
     334             :  * empty string. If category() returns TLD_CATEGORY_COUNTRY then this
     335             :  * function should always return a valid name.
     336             :  *
     337             :  * \note
     338             :  * At a later time we may also include other names such as the language, group, or
     339             :  * region that the TLD represents. At that time we'll certainly rename the function
     340             :  * and field.
     341             :  *
     342             :  * \return The name of the country or "" if undefined.
     343             :  */
     344           7 : std::string tld_object::country() const
     345             : {
     346             :     // std::string doesn't like NULL as a parameter
     347             :     //
     348           7 :     if(f_info.f_country == nullptr)
     349             :     {
     350           3 :         return std::string();
     351             :     }
     352           4 :     return f_info.f_country;
     353             : }
     354             : 
     355             : 
     356             : /** \var tld_object::f_domain
     357             :  * \brief The domain or URI as specified in the constructor or set_domain() function.
     358             :  *
     359             :  * This variable holds the original domain (URI) as passed to the tld_object
     360             :  * constructor or set_domain() function.
     361             :  *
     362             :  * You can retrieve that value with the domain() function. The tld_object never
     363             :  * modifies that string.
     364             :  *
     365             :  * Note that it can be an empty string.
     366             :  *
     367             :  * \sa tld_object()
     368             :  * \sa set_domain()
     369             :  * \sa domain()
     370             :  */
     371             : 
     372             : /** \var tld_object::f_info
     373             :  * \brief The information of the domain of this tld_object.
     374             :  *
     375             :  * This variable holds the information as defined by a call to the tld()
     376             :  * function. It holds information whether or not the domain is valid,
     377             :  * empty, etc.
     378             :  *
     379             :  * The structure gets reinitialized each time a call to set_domain() is
     380             :  * made and those values are considered cached.
     381             :  */
     382             : 
     383             : /** \var tld_object::f_result
     384             :  * \brief The result of the tld() function call.
     385             :  *
     386             :  * This variable caches the result of the last tld() call with the URI
     387             :  * as defined in the f_domain variable. The f_info also corresponds to
     388             :  * this f_result.
     389             :  *
     390             :  * The result is always initialized to a value or another by constructors
     391             :  * and set_domain() methods.
     392             :  */
     393             : 
     394             : 
     395             : /** \class invalid_domain
     396             :  * \brief Exception thrown when querying for data of an invalid domain.
     397             :  *
     398             :  * This exception is raised when a certain set of functions are called in a
     399             :  * tld_object which URI is not valid.
     400             :  *
     401             :  * Instead of catching this error, you should call the is_valid() function
     402             :  * before a function that may otherwise raise this exception and properly
     403             :  * handle the case when it returns false.
     404             :  */
     405             : 
     406             : /** \fn invalid_domain::invalid_domain(char const * what_str)
     407             :  * \brief Initialize the invalid_domain exception.
     408             :  *
     409             :  * This function initializes the invalid_domain exception with the specified
     410             :  * \p what_str parameter as the what() string.
     411             :  *
     412             :  * \param[in] what_str  A string representing the content of the what() string of the exception.
     413             :  */
     414             : 
     415             : 
     416             : /* vim: ts=4 sw=4 et
     417             :  */
 |