libtld 2.0.14
A library to determine the Top-Level Domain name of any Internet URI.
tld_file.cpp
Go to the documentation of this file.
1/* TLD library -- TLD, domain name, and sub-domain extraction
2 * Copyright (c) 2011-2025 Made to Order Software Corp. All Rights Reserved
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
30// self
31//
32#include "libtld/tld_file.h"
33#include "libtld/tld.h"
34#include "libtld/tld_data.h"
35
36
37// C++
38//
39#include <fstream>
40#include <iostream>
41#include <sstream>
42
43
44// C
45//
46#include <limits.h>
47#include <string.h>
48
49
50
51tld_file_error tld_file_load_stream(tld_file ** file, std::istream & in)
52{
53 tld_magic magic;
54 in.read(reinterpret_cast<char *>(&magic), sizeof(magic));
55 if(!in
56 || in.gcount() != sizeof(magic))
57 {
58 return TLD_FILE_ERROR_CANNOT_READ_FILE;
59 }
60
61 if(magic.f_riff != TLD_MAGIC
62 || magic.f_type != TLD_TLDS)
63 {
64 return TLD_FILE_ERROR_UNRECOGNIZED_FILE;
65 }
66 if(magic.f_size < sizeof(tld_header) + 4
67 || magic.f_size > 1024 * 1024)
68 {
69 return TLD_FILE_ERROR_INVALID_FILE_SIZE;
70 }
71 uint32_t size(magic.f_size - sizeof(uint32_t));
72
73 // we already read the type so we can skip that one in the following
74 // memory buffer & read
75 //
76 *file = reinterpret_cast<tld_file *>(malloc(sizeof(tld_file) + size));
77 if(*file == nullptr)
78 {
79 return TLD_FILE_ERROR_OUT_OF_MEMORY;
80 }
81
82 class auto_free
83 {
84 public:
85 auto_free(tld_file ** ptr)
86 : f_ptr(ptr)
87 {
88 }
89
90 auto_free(auto_free const &) = delete;
91
92 ~auto_free()
93 {
94 if(f_ptr != nullptr
95 && *f_ptr != nullptr)
96 {
97 free(*f_ptr);
98 *f_ptr = nullptr;
99 }
100 }
101
102 auto_free & operator = (auto_free const &) = delete;
103
104 void keep()
105 {
106 f_ptr = nullptr;
107 }
108
109 private:
110 tld_file ** f_ptr = nullptr;
111 };
112 auto_free safe_ptr(file);
113
114 memset(*file, 0, sizeof(tld_file));
115
116 tld_hunk * hunk(reinterpret_cast<tld_hunk *>(*file + 1));
117
118 in.read(reinterpret_cast<char *>(hunk), size);
119 if(!in
120 || in.gcount() != size) // this doesn't fail if the file is larger...
121 {
122 return TLD_FILE_ERROR_CANNOT_READ_FILE;
123 }
124
125 while(size != 0)
126 {
127 if(sizeof(tld_hunk) > size)
128 {
129 return TLD_FILE_ERROR_INVALID_HUNK_SIZE;
130 }
131 size -= sizeof(tld_hunk);
132
133 if(hunk->f_size > size)
134 {
135 return TLD_FILE_ERROR_INVALID_HUNK_SIZE;
136 }
137 size -= hunk->f_size;
138
139 switch(hunk->f_name)
140 {
141 case TLD_HEADER:
142 if(sizeof(tld_header) != hunk->f_size)
143 {
144 return TLD_FILE_ERROR_INVALID_STRUCTURE_SIZE;
145 }
146 if((*file)->f_header != nullptr)
147 {
148 return TLD_FILE_ERROR_HUNK_FOUND_TWICE;
149 }
150 (*file)->f_header = reinterpret_cast<tld_header *>(hunk + 1);
151 if((*file)->f_header->f_version_major != TLD_FILE_VERSION_MAJOR
152 || (*file)->f_header->f_version_minor != TLD_FILE_VERSION_MINOR)
153 {
154 return TLD_FILE_ERROR_UNSUPPORTED_VERSION;
155 }
156 break;
157
158 case TLD_DESCRIPTIONS:
159 (*file)->f_descriptions_count = hunk->f_size / sizeof(tld_description);
160 if((*file)->f_descriptions_count * sizeof(tld_description) != hunk->f_size)
161 {
162 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
163 }
164 if((*file)->f_descriptions != nullptr)
165 {
166 return TLD_FILE_ERROR_HUNK_FOUND_TWICE;
167 }
168 (*file)->f_descriptions = reinterpret_cast<tld_description *>(hunk + 1);
169 break;
170
171 case TLD_TAGS:
172 // the tags are a bit peculiar in that the compression happens
173 // by uin32_t and not by tld_tags so the number of tags cannot
174 // be inferred by the hunk size
175 //
176 (*file)->f_tags_size = hunk->f_size / sizeof(uint32_t);
177 if((*file)->f_tags_size * sizeof(uint32_t) != hunk->f_size)
178 {
179 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
180 }
181 if((*file)->f_tags != nullptr)
182 {
183 return TLD_FILE_ERROR_HUNK_FOUND_TWICE;
184 }
185 (*file)->f_tags = reinterpret_cast<uint32_t *>(hunk + 1);
186 break;
187
188 case TLD_STRING_OFFSETS:
189 if((*file)->f_strings_count == 0)
190 {
191 (*file)->f_strings_count = hunk->f_size / sizeof(tld_string_offset);
192 if((*file)->f_strings_count == 0)
193 {
194 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
195 }
196 }
197 if((*file)->f_strings_count * sizeof(tld_string_offset) != hunk->f_size)
198 {
199 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
200 }
201 if((*file)->f_string_offsets != nullptr)
202 {
203 return TLD_FILE_ERROR_HUNK_FOUND_TWICE;
204 }
205 (*file)->f_string_offsets = reinterpret_cast<tld_string_offset *>(hunk + 1);
206 break;
207
208 case TLD_STRING_LENGTHS:
209 if((*file)->f_strings_count == 0)
210 {
211 (*file)->f_strings_count = hunk->f_size / sizeof(tld_string_length);
212 if((*file)->f_strings_count == 0)
213 {
214 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
215 }
216 }
217 if((*file)->f_strings_count * sizeof(tld_string_length) != hunk->f_size)
218 {
219 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
220 }
221 if((*file)->f_string_lengths != nullptr)
222 {
223 return TLD_FILE_ERROR_HUNK_FOUND_TWICE;
224 }
225 (*file)->f_string_lengths = reinterpret_cast<tld_string_length *>(hunk + 1);
226 break;
227
228 case TLD_STRINGS:
229 if(hunk->f_size == 0)
230 {
231 return TLD_FILE_ERROR_INVALID_ARRAY_SIZE;
232 }
233 if((*file)->f_strings != nullptr)
234 {
235 return TLD_FILE_ERROR_HUNK_FOUND_TWICE;
236 }
237 (*file)->f_strings = reinterpret_cast<char *>(hunk + 1);
238 (*file)->f_strings_end = reinterpret_cast<char *>(hunk + 1) + hunk->f_size;
239 break;
240
241 default:
242 // just skip unrecognized hunks
243 break;
244
245 }
246
247 hunk = reinterpret_cast<tld_hunk *>(reinterpret_cast<char *>(hunk + 1) + hunk->f_size);
248 }
249
250 // verify we got all the required tables
251 //
252 if((*file)->f_header == nullptr
253 || (*file)->f_descriptions == nullptr
254 || (*file)->f_tags == nullptr
255 || (*file)->f_string_offsets == nullptr
256 || (*file)->f_string_lengths == nullptr
257 || (*file)->f_strings == nullptr)
258 {
259 return TLD_FILE_ERROR_MISSING_HUNK;
260 }
261
262 // it worked, do no lose the allocated pointer
263 //
264 safe_ptr.keep();
265
266 return TLD_FILE_ERROR_NONE;
267}
268
269
270#ifdef __cplusplus
271extern "C" {
272#endif
273
274
275enum tld_file_error tld_file_load(char const * filename, tld_file ** file)
276{
277 if(file == nullptr
278 || filename == nullptr)
279 {
280 return TLD_FILE_ERROR_INVALID_POINTER;
281 }
282 if(*file != nullptr)
283 {
284 return TLD_FILE_ERROR_POINTER_PRESENT;
285 }
286
287 std::ifstream in;
288 in.open(filename);
289 if(!in.is_open())
290 {
291 return TLD_FILE_ERROR_CANNOT_OPEN_FILE;
292 }
293
294 return tld_file_load_stream(file, in);
295}
296
297
298const char *tld_file_errstr(tld_file_error err)
299{
300 switch(err)
301 {
302 case TLD_FILE_ERROR_NONE:
303 return "No error";
304
305 case TLD_FILE_ERROR_INVALID_POINTER:
306 return "Invalid pointer";
307
308 case TLD_FILE_ERROR_POINTER_PRESENT:
309 return "Pointer present when it should ne nullptr";
310
311 case TLD_FILE_ERROR_CANNOT_OPEN_FILE:
312 return "Cannot open file";
313
314 case TLD_FILE_ERROR_CANNOT_READ_FILE:
315 return "I/O error reading file";
316
317 case TLD_FILE_ERROR_UNRECOGNIZED_FILE:
318 return "Unrecognized input file";
319
320 case TLD_FILE_ERROR_INVALID_FILE_SIZE:
321 return "Invalid file size";
322
323 case TLD_FILE_ERROR_OUT_OF_MEMORY:
324 return "Out of memory";
325
326 case TLD_FILE_ERROR_INVALID_HUNK_SIZE:
327 return "Invalid hunk size";
328
329 case TLD_FILE_ERROR_INVALID_STRUCTURE_SIZE:
330 return "Invalid structure size";
331
332 case TLD_FILE_ERROR_INVALID_ARRAY_SIZE:
333 return "Invalid array size";
334
335 case TLD_FILE_ERROR_UNSUPPORTED_VERSION:
336 return "Unsupported version";
337
338 case TLD_FILE_ERROR_MISSING_HUNK:
339 return "Missing hunk";
340
341 case TLD_FILE_ERROR_HUNK_FOUND_TWICE:
342 return "Found the same hunk twice";
343
344 //default: -- handled below, without a default, we know whether we missed
345 // some new TLD_FILE_ERROR_... in our cases above.
346 }
347
348 return "Unknown tld_file error number";
349}
350
351
352tld_description const * tld_file_description(tld_file const * file, uint32_t id)
353{
354 if(id >= file->f_descriptions_count)
355 {
356 return nullptr;
357 }
358 return file->f_descriptions + id;
359}
360
361
362tld_tag const * tld_file_tag(tld_file const * file, uint32_t id)
363{
364 if(id + 1 >= file->f_tags_size)
365 {
366 return nullptr;
367 }
368 return reinterpret_cast<tld_tag *>(file->f_tags + id);
369}
370
371
372char const * tld_file_string(tld_file const * file, uint32_t id, uint32_t * length)
373{
374 if(length == nullptr)
375 {
376 errno = EINVAL;
377 return nullptr;
378 }
379 *length = 0;
380
381 --id;
382 if(id >= file->f_strings_count)
383 {
384 errno = EINVAL;
385 return nullptr;
386 }
387 char const * s(file->f_strings + file->f_string_offsets[id].f_string_offset);
388 uint32_t l(file->f_string_lengths[id].f_string_length);
389 char const * e(s + l);
390 if(s > file->f_strings_end
391 || e > file->f_strings_end)
392 {
393 // assuming the file is valid, this should not happen
394 //
395 errno = EINVAL;
396 return nullptr;
397 }
398 *length = l;
399 return s;
400}
401
402
414char * tld_file_to_json(tld_file const * file)
415{
416 if(file == nullptr
417 || file->f_header == nullptr
418 || file->f_descriptions == nullptr
419 || file->f_tags == nullptr
420 || file->f_string_offsets == nullptr
421 || file->f_string_lengths == nullptr
422 || file->f_strings == nullptr)
423 {
424 return nullptr;
425 }
426
427 std::stringstream out;
428
429 out << "{\n";
430 out << "\"version\":\"" << static_cast<int>(file->f_header->f_version_major)
431 << '.' << static_cast<int>(file->f_header->f_version_minor) << "\",\n";
432 out << "\"created-on\":" << file->f_header->f_created_on << ",\n";
433 out << "\"max-level\":" << static_cast<int>(file->f_header->f_tld_max_level) << ",\n";
434 out << "\"tld-start-offset\":" << static_cast<int>(file->f_header->f_tld_start_offset) << ",\n";
435 out << "\"tld-end-offset\":" << static_cast<int>(file->f_header->f_tld_end_offset) << ",\n";
436 out << "\"descriptions\":[\n";
437 for(uint32_t idx(0); idx < file->f_descriptions_count; ++idx)
438 {
439 tld_description const * d(tld_file_description(file, idx));
440
441 out << (idx == 0 ? "" : ",\n");
442
443 {
444 uint32_t length(0);
445 char const * tld(tld_file_string(file, d->f_tld, &length));
446 out << "{\"tld\":\"" << std::string(tld, length) << "\"";
447 }
448
449 out << ",\"status\":\"" << tld_status_to_string(static_cast<tld_status>(d->f_status)) << "\"";
450
451 if(d->f_exception_apply_to != USHRT_MAX)
452 {
453 tld_description const * apply_to(tld_file_description(file, d->f_exception_apply_to));
454 uint32_t length(0);
455 char const * to_tld(tld_file_string(file, apply_to->f_tld, &length));
456 out << ",\"apply-to\":\"" << std::string(to_tld, length) << "\"";
457 }
458
459 if(d->f_start_offset != USHRT_MAX)
460 {
461 out << ",\"start-offset\":" << d->f_start_offset;
462 out << ",\"end-offset\":" << d->f_end_offset;
463 }
464
465 for(uint32_t tidx(0); tidx < d->f_tags_count; ++tidx)
466 {
467 const tld_tag * tag(tld_file_tag(file, d->f_tags + tidx * 2));
468 {
469 uint32_t length(0);
470 char const * tag_name(tld_file_string(file, tag->f_tag_name, &length));
471 out << ",\"" << std::string(tag_name, length)
472 << "\":\"";
473 }
474 {
475 uint32_t length(0);
476 char const * tag_value(tld_file_string(file, tag->f_tag_value, &length));
477 out << std::string(tag_value, length)
478 << "\"";
479 }
480 }
481
482 out << "}";
483 }
484 out << "]}\n";
485
486 return strdup(out.str().c_str());
487}
488
489
490void tld_file_free(tld_file ** file)
491{
492 if(file != nullptr
493 && *file != nullptr)
494 {
495 free(*file);
496 *file = nullptr;
497 }
498}
499
500
608#ifdef __cplusplus
609}
610#endif
611
612// vim: ts=4 sw=4 et
[internal] The description of one TLD.
Definition tld_file.h:117
uint16_t f_end_offset
The last offset of a list of TLDs.
Definition tld_file.h:123
uint16_t f_tags_count
The number of tags defined by this TLD.
Definition tld_file.h:128
uint16_t f_start_offset
The first offset of a list of TLDs.
Definition tld_file.h:122
uint16_t f_tld
The actual TLD of this entry.
Definition tld_file.h:125
uint16_t f_exception_apply_to
This TLD is an exception of the "apply to" TLD.
Definition tld_file.h:120
uint16_t f_tags
The tags of this TLD.
Definition tld_file.h:127
uint8_t f_status
The status of this TLD.
Definition tld_file.h:118
The public header of the libtld library.
LIBTLD_EXPORT enum tld_result tld(const char *uri, struct tld_info *info)
Get information about the TLD for the specified URI.
Definition tld.cpp:1113
LIBTLD_EXPORT const char * tld_status_to_string(enum tld_status status)
Transform the status to a string.
Definition tld_strings.c:49
tld_status
Definition tld.h:70
Declaration of the static TLDs file.
char * tld_file_to_json(tld_file const *file)
Transform a tld_file to a JSON string.
Definition tld_file.cpp:414
Declaration of the TLD file structures.

This document is part of the Snap! Websites Project.

Copyright by Made to Order Software Corp.