Line data Source code
1 : /* TLD library -- TLD, domain name, and sub-domain extraction
2 : * Copyright (c) 2011-2022 Made to Order Software Corp. All Rights Reserved
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the
6 : * "Software"), to deal in the Software without restriction, including
7 : * without limitation the rights to use, copy, modify, merge, publish,
8 : * distribute, sublicense, and/or sell copies of the Software, and to
9 : * permit persons to whom the Software is furnished to do so, subject to
10 : * the following conditions:
11 : *
12 : * The above copyright notice and this permission notice shall be included
13 : * in all copies or substantial portions of the Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 : */
23 : #ifndef LIB_TLD_FILE_H
24 : #define LIB_TLD_FILE_H
25 : /** \file
26 : * \brief Declaration of the TLD file structures.
27 : *
28 : * The older version of the libtld had a pre-compiled structure within the
29 : * library. The main problem with that designed is that you can't quickly
30 : * update the list of domain name TLDs. You have to recompile the whole
31 : * library, generate a new package, install that new package.
32 : *
33 : * The pros are of course that you only have to deal with one file (.so
34 : * or .a or similar file for your OS).
35 : *
36 : * Yet, the number of TLDs has been growing and changing a lot back and
37 : * forth in the last 10 years or so and having a compressed external file
38 : * will make it a lot faster and a lot easier to update the available TLDs.
39 : *
40 : * The structures found here descript that external file. The basic format
41 : * is IFF (like a WAVE file, sizes and other numbers will be in the endian
42 : * of your computer, so watch out on that part.)
43 : *
44 : * At time of writing, I have three hunks:
45 : *
46 : * * Header -- a header with a few parameters such as the maximum TLD level
47 : * * Descriptions -- the array of descriptions
48 : * * Strings -- one super-string; the descriptions include an offset and a
49 : * size for each one of those strings
50 : */
51 :
52 : // C lib
53 : //
54 : #include <stdint.h>
55 :
56 :
57 : #ifdef __cplusplus
58 :
59 : #include <iostream>
60 :
61 : extern "C" {
62 : #endif
63 :
64 :
65 : #define TLD_FILE_VERSION_MAJOR 1
66 : #define TLD_FILE_VERSION_MINOR 0
67 :
68 : #define TLD_HUNK(a, b, c, d) ((uint32_t)((a)|((b)<<8)|((c)<<16)|((d)<<24)))
69 :
70 : #define TLD_MAGIC TLD_HUNK('R','I','F','F')
71 : #define TLD_TLDS TLD_HUNK('T','L','D','S')
72 : #define TLD_HEADER TLD_HUNK('H','E','A','D')
73 : #define TLD_DESCRIPTIONS TLD_HUNK('D','E','S','C')
74 : #define TLD_TAGS TLD_HUNK('T','A','G','S')
75 : #define TLD_STRING_OFFSETS TLD_HUNK('S','O','F','F')
76 : #define TLD_STRING_LENGTHS TLD_HUNK('S','L','E','N')
77 : #define TLD_STRINGS TLD_HUNK('S','T','R','S')
78 :
79 :
80 :
81 :
82 : struct tld_magic
83 : {
84 : uint32_t f_riff; // 'RIFF' (Reversed IFF)
85 : uint32_t f_size; // total size of this file - 8
86 : uint32_t f_type; // 'TLDS'
87 : };
88 :
89 :
90 : struct tld_hunk
91 : {
92 : uint32_t f_name;
93 : uint32_t f_size;
94 : };
95 :
96 :
97 : struct tld_header
98 : {
99 : // WARNING: do not change the version position
100 : // anything else may change based on that information
101 : //
102 : uint8_t f_version_major; // 1.0
103 : uint8_t f_version_minor;
104 : uint8_t f_pad0;
105 : uint8_t f_tld_max_level;
106 :
107 : uint16_t f_tld_start_offset;
108 : uint16_t f_tld_end_offset;
109 :
110 : int64_t f_created_on;
111 : };
112 :
113 :
114 : struct tld_description
115 : {
116 : uint8_t f_status;
117 : uint8_t f_exception_level;
118 : uint16_t f_exception_apply_to; // index of tld_description this exception applies to
119 :
120 : uint16_t f_start_offset; // next level or -1 (65535)
121 : uint16_t f_end_offset;
122 :
123 : uint16_t f_tld; // string ID
124 :
125 : uint16_t f_tags; // offset in tld_tag table
126 : uint16_t f_tags_count;
127 : };
128 :
129 :
130 : struct tld_tag
131 : {
132 : uint32_t f_tag_name; // string ID
133 : uint32_t f_tag_value; // string ID
134 : };
135 :
136 :
137 : struct tld_string_offset
138 : {
139 : uint32_t f_string_offset; // offset in STRS
140 : };
141 :
142 :
143 : struct tld_string_length
144 : {
145 : uint16_t f_string_length; // corresponding length
146 : };
147 :
148 :
149 : struct tld_file
150 : {
151 : struct tld_header * f_header;
152 : uint32_t f_descriptions_count;
153 : struct tld_description * f_descriptions;
154 : uint32_t f_tags_size; // WARNING: this is the number of uint32_t, not tld_tag
155 : uint32_t * f_tags; // these are tld_tags which may be merged at any level (a tag id may be odd)
156 : uint32_t f_strings_count;
157 : struct tld_string_offset * f_string_offsets;
158 : struct tld_string_length * f_string_lengths;
159 : char * f_strings;
160 : char * f_strings_end;
161 : };
162 :
163 :
164 : enum tld_file_error
165 : {
166 : TLD_FILE_ERROR_NONE = 0,
167 : TLD_FILE_ERROR_INVALID_POINTER,
168 : TLD_FILE_ERROR_POINTER_PRESENT,
169 : TLD_FILE_ERROR_CANNOT_OPEN_FILE,
170 : TLD_FILE_ERROR_CANNOT_READ_FILE,
171 : TLD_FILE_ERROR_UNRECOGNIZED_FILE,
172 : TLD_FILE_ERROR_INVALID_FILE_SIZE,
173 : TLD_FILE_ERROR_OUT_OF_MEMORY,
174 : TLD_FILE_ERROR_INVALID_HUNK_SIZE,
175 : TLD_FILE_ERROR_INVALID_STRUCTURE_SIZE,
176 : TLD_FILE_ERROR_INVALID_ARRAY_SIZE,
177 : TLD_FILE_ERROR_UNSUPPORTED_VERSION,
178 : TLD_FILE_ERROR_MISSING_HUNK,
179 : };
180 :
181 :
182 : enum tld_file_error tld_file_load(const char * filename, struct tld_file ** file);
183 : const char * tld_file_errstr(enum tld_file_error err);
184 : const struct tld_description * tld_file_description(struct tld_file const * file, uint32_t id);
185 : const struct tld_tag * tld_file_tag(struct tld_file const * file, uint32_t id);
186 : const char * tld_file_string(struct tld_file const * file, uint32_t id, uint32_t * length);
187 : char * tld_file_to_json(struct tld_file const * file);
188 : void tld_file_free(struct tld_file ** file);
189 :
190 :
191 : #ifdef __cplusplus
192 :
193 : enum tld_file_error tld_file_load_stream(tld_file ** file, std::istream & in);
194 :
195 : class auto_free_string
196 : {
197 : public:
198 1 : auto_free_string(char * s)
199 1 : : f_string(s)
200 : {
201 1 : }
202 : auto_free_string(auto_free_string const &) = delete;
203 : auto_free_string & operator = (auto_free_string const &) = delete;
204 1 : ~auto_free_string()
205 1 : {
206 1 : free(f_string);
207 1 : }
208 : private:
209 : char * f_string = nullptr;
210 : };
211 :
212 : class auto_free_tld_file
213 : {
214 : public:
215 1 : auto_free_tld_file(tld_file ** f)
216 1 : : f_file(f)
217 : {
218 1 : }
219 : auto_free_tld_file(auto_free_tld_file const &) = delete;
220 : auto_free_tld_file & operator = (auto_free_tld_file const &) = delete;
221 1 : ~auto_free_tld_file()
222 1 : {
223 1 : tld_file_free(f_file);
224 1 : }
225 : private:
226 : tld_file ** f_file = nullptr;
227 : };
228 :
229 : }
230 : #endif
231 :
232 : #endif
233 : //#ifndef LIB_TLD_FILE_H
234 : // vim: ts=4 sw=4 et
|