Line data Source code
1 : /* TLD library -- TLD, domain name, and sub-domain extraction
2 : * Copyright (C) 2011-2017 Made to Order Software Corp.
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the
6 : * "Software"), to deal in the Software without restriction, including
7 : * without limitation the rights to use, copy, modify, merge, publish,
8 : * distribute, sublicense, and/or sell copies of the Software, and to
9 : * permit persons to whom the Software is furnished to do so, subject to
10 : * the following conditions:
11 : *
12 : * The above copyright notice and this permission notice shall be included
13 : * in all copies or substantial portions of the Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 : */
23 :
24 : /** \file
25 : * \brief Declaration of the C++ tld_object class.
26 : *
27 : * This source file is the declaration of all the functions of the C++
28 : * tld_object class.
29 : */
30 :
31 : #include "libtld/tld.h"
32 : #include <stdio.h>
33 :
34 : /** \class tld_object
35 : * \brief Class used to ease the use o the tld() function in C++.
36 : *
37 : * The tld_object class allows you to query the tld library and then check
38 : * each part of the URI with simple calls instead of you having to determine
39 : * the location of each part.
40 : */
41 :
42 : /** \brief Initialize a tld object with the specified domain.
43 : *
44 : * This function initializes a TLD object with the specified \p domain
45 : * name. This function accepts a null terminated C string pointer.
46 : * The pointer can be set to NULL or point to an empty string in which
47 : * case the constructor creates an empty TLD object. Note that an
48 : * empty TLD object is considered invalid and if called some
49 : * functions throw the invalid_domain exception.
50 : *
51 : * \note
52 : * The string is expected to be UTF-8.
53 : *
54 : * \param[in] domain_name The domain to parse by this object.
55 : */
56 5 : tld_object::tld_object(char const * domain_name)
57 : {
58 5 : set_domain(domain_name);
59 5 : }
60 :
61 : /** \brief Initialize a tld object with the specified domain.
62 : *
63 : * This function initializes a TLD object with the specified \p domain
64 : * name. This function accepts standard C++ strings. The string can be
65 : * empty to create an empty TLD object. Note that an empty TLD object
66 : * is considered invalid and if called some functions throw the
67 : * invalid_domain exception.
68 : *
69 : * \note
70 : * The string is expected to be UTF-8.
71 : *
72 : * \param[in] domain_name The domain to parse by this object.
73 : */
74 2 : tld_object::tld_object(std::string const & domain_name)
75 : {
76 2 : set_domain(domain_name);
77 2 : }
78 :
79 : /** \brief Change the domain of a tld object with the newly specified domain.
80 : *
81 : * This function initializes this TLD object with the specified \p domain
82 : * name. This function accepts a null terminated C string pointer.
83 : * The pointer can be set to NULL or point to an empty string in which
84 : * case the constructor creates an empty TLD object. Note that an
85 : * empty TLD object is considered invalid and if called some
86 : * functions throw the invalid_domain exception.
87 : *
88 : * \note
89 : * The string is expected to be UTF-8.
90 : *
91 : * \param[in] domain_name The domain to parse by this object.
92 : */
93 5 : void tld_object::set_domain(char const * domain_name)
94 : {
95 5 : set_domain(std::string(domain_name == nullptr ? "" : domain_name));
96 5 : }
97 :
98 : /** \brief Change the domain of a tld object with the newly specified domain.
99 : *
100 : * This function initializes a TLD object with the specified \p domain
101 : * name. This function accepts standard C++ strings. The string can be
102 : * empty to create an empty TLD object. Note that an empty TLD object
103 : * is considered invalid and if called some functions throw the
104 : * invalid_domain exception.
105 : *
106 : * \note
107 : * The string is expected to be UTF-8.
108 : *
109 : * \param[in] domain_name The domain to parse by this object.
110 : */
111 7 : void tld_object::set_domain(std::string const & domain_name)
112 : {
113 : // tld() supports empty strings and initializes f_info appropriately
114 7 : f_domain = domain_name;
115 7 : f_result = tld(f_domain.c_str(), &f_info);
116 : // TBD -- should we clear f_domain on an invalid result?
117 7 : }
118 :
119 : /** \brief Check the result of the tld() command.
120 : *
121 : * This function returns the result that the tld() command produced
122 : * when called with the domain as specified in a constructor or
123 : * the set_domain() functions.
124 : *
125 : * Valid resutls are:
126 : *
127 : * \li TLD_RESULT_SUCCESS -- the URI is valid and all the tld_object functions can be called
128 : * \li TLD_RESULT_INVALID -- the TLD of this URI exists but the combination used is not acceptable
129 : * \li TLD_RESULT_NULL -- the domain name is the empty string or NULL
130 : * \li TLD_RESULT_NO_TLD -- the domain name does not even include one period
131 : * \li TLD_RESULT_BAD_URI -- URI parsing failed (i.e. two periods one after another)
132 : * \li TLD_RESULT_NOT_FOUND -- this domain TLD doesn't exist
133 : *
134 : * \return The last result of the tld() function.
135 : */
136 7 : tld_result tld_object::result() const
137 : {
138 7 : return f_result;
139 : }
140 :
141 : /** \brief Retrieve the current status of the TLD.
142 : *
143 : * This function returns the status that the last tld() call generated. status() along with
144 : * result() are used to determine whether a call to the TLD succeeded or not. See the
145 : * is_valid() function too.
146 : *
147 : * This function can be used to know why a domain name failed when parsed by the tld() function.
148 : *
149 : * \li TLD_STATUS_VALID -- This URI is valid and can be queried further.
150 : * \li TLD_STATUS_PROPOSED -- This TLD was proposed but is not yet in used.
151 : * \li TLD_STATUS_DEPRECATED -- This TLD was used and was deprecated.
152 : * \li TLD_STATUS_UNUSED -- This TLD is simply not used.
153 : * \li TLD_STATUS_RESERVED -- This TLD is currently reserved.
154 : * \li TLD_STATUS_INFRASTRUCTURE -- This TLD represents an infrastructure object (.arpa)
155 : * \li TLD_STATUS_UNDEFINED -- The status is undefined if the TLD cannot be found.
156 : *
157 : * \return The status generated by the last tld() function call.
158 : */
159 7 : tld_status tld_object::status() const
160 : {
161 7 : return f_info.f_status;
162 : }
163 :
164 : /** \brief Check whether this TLD object is valid.
165 : *
166 : * This function checks the result and status returned by the last call to
167 : * the tld() function. This object is considered valid if and only if the
168 : * result is TLD_RESULT_SUCCESS and the status is TLD_STATUS_VALID. At this
169 : * point, any other result returns invalid and that prevents you from checking
170 : * the object further (i.e. call the tld_only() function to retrieve the TLD
171 : * of the specified URI.)
172 : *
173 : * \return true if the result and status say this TLD object is valid.
174 : */
175 39 : bool tld_object::is_valid() const
176 : {
177 39 : return f_result == TLD_RESULT_SUCCESS && f_info.f_status == TLD_STATUS_VALID;
178 : }
179 :
180 : /** \brief Retrieve the domain name of this TLD object.
181 : *
182 : * The TLD object keeps a copy of the domain name as specified with the
183 : * constructor. This copy can be retrieved by this function. This is an
184 : * exact copy of the input (i.e. no canonicalization.)
185 : *
186 : * \return The domain as specified to the constructor or the set_domain() functions.
187 : */
188 7 : std::string tld_object::domain() const
189 : {
190 7 : return f_domain;
191 : }
192 :
193 : /** \brief Retrieve the sub-domains of the URI.
194 : *
195 : * This function returns the sub-domains found in the URI. This may be
196 : * the empty string.
197 : *
198 : * \exception invalid_domain
199 : * This exception is raised when this function is called with an invalid
200 : * TLD object. This happens whenever you create the object or call
201 : * set_domain() with an invalid URI. You should call is_valid() and if
202 : * false, avoid calling this function.
203 : *
204 : * \return All the sub-domains found in the URI.
205 : */
206 7 : std::string tld_object::sub_domains() const
207 : {
208 7 : if(!is_valid())
209 : {
210 3 : throw invalid_domain();
211 : }
212 4 : char const * domain_name(f_info.f_tld);
213 4 : char const * start(f_domain.c_str());
214 34 : for(; domain_name > start && domain_name[-1] != '.'; --domain_name);
215 4 : if(domain_name == start)
216 : {
217 1 : return std::string();
218 : }
219 : // no not return the period
220 3 : return std::string(start, domain_name - start - 1);
221 : }
222 :
223 : /** \brief Full domain name: domain and TLD.
224 : *
225 : * This function returns the domain name and the TLD as a string.
226 : *
227 : * The result includes the domain name but no sub-domains.
228 : *
229 : * To get the domain name with the sub-domains, call the domain()
230 : * function instead. That function returns the domain as passed to
231 : * this object (set_domain() or constructor).
232 : *
233 : * \exception invalid_domain
234 : * This exception is raised when this function is called with an invalid
235 : * TLD object. This happens whenever you create the object or call
236 : * set_domain() with an invalid URI. You should call is_valid() and if
237 : * false, avoid calling this function.
238 : *
239 : * \return The fully qualified domain name.
240 : */
241 7 : std::string tld_object::full_domain() const
242 : {
243 7 : if(!is_valid())
244 : {
245 3 : throw invalid_domain();
246 : }
247 4 : char const * domain_name(f_info.f_tld);
248 4 : for(char const * start(f_domain.c_str()); domain_name > start && domain_name[-1] != '.'; --domain_name);
249 4 : return domain_name;
250 : }
251 :
252 : /** \brief Retrieve the domain name only.
253 : *
254 : * This function returns the domain name without the TLD nor any sub-domains.
255 : *
256 : * A domain name never includes any period.
257 : *
258 : * \exception invalid_domain
259 : * This exception is raised when this function is called with an invalid
260 : * TLD object. This happens whenever you create the object or call
261 : * set_domain() with an invalid URI. You should call is_valid() and if
262 : * false, avoid calling this function.
263 : *
264 : * \return The domain name without TLD or sub-domains.
265 : */
266 7 : std::string tld_object::domain_only() const
267 : {
268 7 : if(!is_valid())
269 : {
270 3 : throw invalid_domain();
271 : }
272 4 : char const * end(f_info.f_tld);
273 4 : char const * domain_name(end);
274 4 : for(char const * start(f_domain.c_str()); domain_name > start && domain_name[-1] != '.'; --domain_name);
275 4 : return std::string(domain_name, end - domain_name);
276 : }
277 :
278 : /** \brief Return the TLD of the URI.
279 : *
280 : * This function returns the TLD part of the URI specified in the constructor
281 : * or the set_domain() function.
282 : *
283 : * The TLD is the part that represents a country, a region, a general TLD, etc.
284 : * Generic TLDs have one period (.com, .info,) but in general you must expect TLDs with
285 : * several period characters (.ca.us, .indiana.museum, .yawatahama.ehime.jp).
286 : *
287 : * \exception invalid_domain
288 : * This exception is raised when this function is called with an invalid
289 : * TLD object. This happens whenever you create the object or call
290 : * set_domain() with an invalid URI. You should call is_valid() and if
291 : * false, avoid calling this function.
292 : *
293 : * \return the TLD part of the URI specified in this TLD object.
294 : */
295 7 : std::string tld_object::tld_only() const
296 : {
297 7 : if(!is_valid())
298 : {
299 3 : throw invalid_domain();
300 : }
301 4 : return f_info.f_tld;
302 : }
303 :
304 : /** \brief Retrieve the category of this URI.
305 : *
306 : * This function is used to retrieve the category of the URI. The category is
307 : * just informative and has no special bearing on the TLD, domain, and sub-domain
308 : * parts.
309 : *
310 : * The existing categories are:
311 : *
312 : * \li TLD_CATEGORY_INTERNATIONAL -- TLD names that can be used by anyone in the world
313 : * \li TLD_CATEGORY_PROFESSIONALS -- TLD names reserved to professionals
314 : * \li TLD_CATEGORY_LANGUAGE -- language based TLD
315 : * \li TLD_CATEGORY_GROUPS -- group based TLD
316 : * \li TLD_CATEGORY_REGION -- TLD representing a region (usually within a country)
317 : * \li TLD_CATEGORY_TECHNICAL -- technical TLD names used to make it all work
318 : * \li TLD_CATEGORY_COUNTRY -- country based TLD
319 : * \li TLD_CATEGORY_ENTREPRENEURIAL -- TLD spawned of other official TLD names
320 : * \li TLD_CATEGORY_UNDEFINED -- this value means the TLD was not defined
321 : *
322 : * \return The category of the current URI or TLD_CATEGORY_UNDEFINED.
323 : */
324 7 : tld_category tld_object::category() const
325 : {
326 7 : return f_info.f_category;
327 : }
328 :
329 : /** \brief The name of the country linked to that TLD.
330 : *
331 : * This TLD represents a country and this is its name.
332 : *
333 : * If the TLD does not represent a country then this function returns an
334 : * empty string. If category() returns TLD_CATEGORY_COUNTRY then this
335 : * function should always return a valid name.
336 : *
337 : * \note
338 : * At a later time we may also include other names such as the language, group, or
339 : * region that the TLD represents. At that time we'll certainly rename the function
340 : * and field.
341 : *
342 : * \return The name of the country or "" if undefined.
343 : */
344 7 : std::string tld_object::country() const
345 : {
346 : // std::string doesn't like NULL as a parameter
347 : //
348 7 : if(f_info.f_country == nullptr)
349 : {
350 3 : return std::string();
351 : }
352 4 : return f_info.f_country;
353 : }
354 :
355 :
356 : /** \var tld_object::f_domain
357 : * \brief The domain or URI as specified in the constructor or set_domain() function.
358 : *
359 : * This variable holds the original domain (URI) as passed to the tld_object
360 : * constructor or set_domain() function.
361 : *
362 : * You can retrieve that value with the domain() function. The tld_object never
363 : * modifies that string.
364 : *
365 : * Note that it can be an empty string.
366 : *
367 : * \sa tld_object()
368 : * \sa set_domain()
369 : * \sa domain()
370 : */
371 :
372 : /** \var tld_object::f_info
373 : * \brief The information of the domain of this tld_object.
374 : *
375 : * This variable holds the information as defined by a call to the tld()
376 : * function. It holds information whether or not the domain is valid,
377 : * empty, etc.
378 : *
379 : * The structure gets reinitialized each time a call to set_domain() is
380 : * made and those values are considered cached.
381 : */
382 :
383 : /** \var tld_object::f_result
384 : * \brief The result of the tld() function call.
385 : *
386 : * This variable caches the result of the last tld() call with the URI
387 : * as defined in the f_domain variable. The f_info also corresponds to
388 : * this f_result.
389 : *
390 : * The result is always initialized to a value or another by constructors
391 : * and set_domain() methods.
392 : */
393 :
394 :
395 : /** \class invalid_domain
396 : * \brief Exception thrown when querying for data of an invalid domain.
397 : *
398 : * This exception is raised when a certain set of functions are called in a
399 : * tld_object which URI is not valid.
400 : *
401 : * Instead of catching this error, you should call the is_valid() function
402 : * before a function that may otherwise raise this exception and properly
403 : * handle the case when it returns false.
404 : */
405 :
406 : /** \fn invalid_domain::invalid_domain(char const * what_str)
407 : * \brief Initialize the invalid_domain exception.
408 : *
409 : * This function initializes the invalid_domain exception with the specified
410 : * \p what_str parameter as the what() string.
411 : *
412 : * \param[in] what_str A string representing the content of the what() string of the exception.
413 : */
414 :
415 :
416 : /* vim: ts=4 sw=4 et
417 : */
|