libtld 2.0.14
A library to determine the Top-Level Domain name of any Internet URI.
tld_compiler.h
Go to the documentation of this file.
1/* TLD library -- TLD, domain name, and sub-domain extraction
2 * Copyright (c) 2011-2025 Made to Order Software Corp. All Rights Reserved
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
32// self
33//
34#include "libtld/tld.h"
35
36#ifdef __cplusplus
37
38// C++
39//
40#include <iostream>
41#include <list>
42#include <map>
43#include <memory>
44#include <set>
45#include <cstdint>
46#include <vector>
47
48// C
49//
50#include <limits.h>
51
52
53typedef uint32_t string_id_t;
54typedef std::map<string_id_t, string_id_t> tags_t;
55typedef uint32_t tag_id_t;
56
57constexpr string_id_t STRING_ID_NULL = 0;
58
60{
61public:
62 typedef std::shared_ptr<tld_string> pointer_t;
63 typedef std::map<std::string, pointer_t> map_by_string_t;
64 typedef std::map<string_id_t, pointer_t> map_by_id_t;
65
66 tld_string(string_id_t id, std::string const & s);
67
68 string_id_t get_id() const;
69 std::string const & get_string() const;
70 std::string::size_type length() const;
71 void set_found_in(string_id_t id);
72 string_id_t get_found_in() const;
73
74private:
75 string_id_t f_id = STRING_ID_NULL;
76 std::string f_string = std::string();
77 string_id_t f_found_in = STRING_ID_NULL;
78};
79
80
82{
83public:
84 string_id_t add_string(std::string const & s);
85 string_id_t find_string(std::string const & s);
86 std::string get_string(string_id_t id) const;
87 string_id_t get_next_string_id() const;
88 std::size_t size() const;
89 std::size_t max_length() const;
90 std::size_t total_length() const;
91 std::string const & compressed_strings() const;
92 std::size_t compressed_length() const;
93 void merge_strings();
94 std::size_t included_count() const;
95 std::size_t included_length() const;
96 std::size_t merged_count() const;
97 std::size_t merged_length() const;
98 std::size_t get_string_offset(std::string const & s) const;
99 std::size_t get_string_offset(string_id_t id) const;
100
101private:
102 typedef std::set<string_id_t> set_id_t;
103
104 std::string::size_type end_start_match(
105 std::string const & s1
106 , std::string const & s2);
107 bool merge_two_strings();
108
109 string_id_t f_next_id = STRING_ID_NULL;
110 tld_string::map_by_string_t f_strings_by_string = tld_string::map_by_string_t();
111 tld_string::map_by_id_t f_strings_by_id = tld_string::map_by_id_t();
112 set_id_t f_strings_reviewed = set_id_t();
113 std::size_t f_max_length = 0;
114 std::size_t f_total_length = 0;
115 std::size_t f_included_count = 0;
116 std::size_t f_included_length = 0;
117 std::size_t f_merged_count = 0;
118 std::size_t f_merged_length = 0;
119 std::string f_merged_strings = std::string();
120};
121
122
124{
125public:
126 typedef std::vector<string_id_t> tags_table_t;
127
128 void add(tags_t const & tags);
129 void merge();
130 tags_table_t const & merged_tags() const;
131 std::size_t merged_size() const;
132 std::size_t get_tag_offset(tags_t const & tags) const;
133
134private:
135 typedef std::vector<tags_table_t> tags_vector_t;
136
137 tags_table_t tags_to_table(tags_t const & tags) const;
138 std::size_t end_start_match(
139 tags_table_t const & s1
140 , tags_table_t const & s2);
141
142 tags_vector_t f_tags = tags_vector_t();
143 tags_table_t f_merged_tags = tags_table_t();
144};
145
146
148{
149public:
150 typedef std::shared_ptr<tld_definition> pointer_t;
151 typedef std::vector<string_id_t> segments_t;
152 typedef std::map<std::string, pointer_t> map_t;
153
154 static constexpr std::uint32_t SET_TLD = 0x0001;
155 static constexpr std::uint32_t SET_STATUS = 0x0002;
156 static constexpr std::uint32_t SET_APPLY_TO = 0x0080;
157
158 tld_definition(tld_definition const &) = default;
160
161 tld_definition & operator = (tld_definition const &);
162
163 bool add_segment(std::string const & segment, std::string & errmsg);
164 segments_t const & get_segments() const;
165 std::string get_name() const;
166 std::string get_inverted_name() const;
167 std::string get_parent_name() const;
168 std::string get_parent_inverted_name() const;
169
170 void set_index(int idx);
171 int get_index() const;
172
173 bool set_status(tld_status status);
174 tld_status get_status() const;
175
176 bool set_apply_to(std::string const & apply_to);
177 std::string get_apply_to() const;
178
179 void add_tag(
180 std::string const & tag_name
181 , std::string const & value
182 , std::string & errmsg);
183 tags_t const & get_tags() const;
184
185 void reset_set_flags();
186 void set_named_parameter(
187 std::string const & name
188 , std::string const & value
189 , std::string & errmsg);
190
191 void set_start_offset(uint16_t start);
192 void set_end_offset(uint16_t end);
193 uint16_t get_start_offset() const;
194 uint16_t get_end_offset() const;
195
196private:
197 tld_string_manager & f_strings;
198
199 int f_set = 0;
200 segments_t f_tld = segments_t();
201 int f_index = 0;
202 tld_status f_status = TLD_STATUS_VALID;
203 std::string f_apply_to = std::string();
204
205 tags_t f_tags = tags_t();
206
207 uint16_t f_start_offset = USHRT_MAX;
208 uint16_t f_end_offset = USHRT_MAX;
209};
210
211
213{
214public:
215 void set_input_folder(std::string const & path);
216 std::string const & get_input_folder() const;
217 void set_output(std::string const & filename);
218 std::string const & get_output() const;
219 void set_c_file(std::string const & filename);
220 std::string const & get_c_file() const;
221 bool compile();
222 int get_errno() const;
223 std::string const & get_errmsg() const;
224 int get_line() const;
225 std::string const & get_filename() const;
226 tld_string_manager & get_string_manager();
227 void output_to_json(std::ostream & out, bool verbose) const;
228
229private:
230 typedef std::vector<std::string> paths_t;
231 typedef std::vector<std::uint8_t> data_t;
232 typedef std::map<std::string, std::string> values_t;
233
234 static constexpr char32_t const CHAR_ERR = static_cast<char32_t>(-2);
235 static constexpr char32_t const CHAR_EOF = static_cast<char32_t>(-1);
236
237 enum token_t
238 {
239 TOKEN_EOF,
240 TOKEN_STRING,
241 TOKEN_IDENTIFIER,
242 TOKEN_WORD,
243 TOKEN_NUMBER,
244 TOKEN_EQUAL,
245 TOKEN_DOT,
246 TOKEN_WILD_CARD,
247 TOKEN_EXCEPTION,
248 TOKEN_OPEN_SQUARE_BRACKET,
249 TOKEN_CLOSE_SQUARE_BRACKET,
250 };
251 class token
252 {
253 public:
254 typedef std::vector<token> vector_t;
255
256 token(std::string const & filename
257 , int line
258 , token_t token
259 , std::string const & value);
260
261 std::string const & get_filename() const;
262 int get_line() const;
263 token_t get_token() const;
264 std::string const & get_value() const;
265
266 private:
267 std::string const f_filename;
268 int const f_line = 0;
269 token_t const f_token = TOKEN_EOF;
270 std::string const f_value = std::string();
271 };
272
273 void find_files(std::string const & path);
274 void process_input_files();
275 void process_file(std::string const & filename);
276 bool get_backslash(char32_t & c);
277 void read_line();
278 bool is_space(char32_t wc) const;
279 char32_t getc();
280 void ungetc(char32_t c);
281 bool append_wc(std::string & value, char32_t wc);
282 void parse_line();
283 void parse_variable();
284 void parse_tld();
285 void print_tokens();
286 void define_default_category();
287 void find_max_level();
288 void compress_tags();
289 uint16_t find_definition(std::string name) const;
290 void output_tlds(std::ostream & out);
291 void save_to_file(std::string const & buffer);
292 void output_header(std::ostream & out);
293 void save_to_c_file(std::string const & buffer);
294
295 std::string f_input_folder = "/usr/share/libtld/tlds";
296 std::string f_output = "/var/lib/libtld/tlds.tld";
297 std::string f_c_file = std::string();
298 int f_errno = 0;
299 std::string f_errmsg = std::string();
300 paths_t f_input_files = paths_t();
301 values_t f_global_variables = values_t();
302 values_t f_global_tags = values_t();
303 std::string f_current_tld = std::string();
304 tld_definition::map_t f_definitions = tld_definition::map_t();
305 token::vector_t f_tokens = token::vector_t();
306 data_t f_data = data_t();
307 std::string::size_type f_pos = 0;
308 int f_line = 1;
309 std::string f_filename = std::string();
310 char32_t f_ungetc[1] = {};
311 std::string::size_type f_ungetc_pos = 0;
313 string_id_t f_strings_count = 0;
315 time_t f_created_on = time(nullptr);
316 uint8_t f_tld_max_level = 0;
317 uint16_t f_tld_start_offset = USHRT_MAX;
318 uint16_t f_tld_end_offset = USHRT_MAX;
319};
320#endif
321/*#ifdef __cplusplus*/
322
323/* vim: ts=4 sw=4 et
324 */
void find_max_level()
Determine the longest TLD in terms of levels.
std::string get_name() const
The domain name with periods separating each segment.
std::string get_inverted_name() const
Get the full TLD as a reversed domain name.
The public header of the libtld library.
tld_status
Definition tld.h:70
@ TLD_STATUS_VALID
The TLD is currently valid.
Definition tld.h:71
int verbose
Whether the user asked for verbosity, false by default.

This document is part of the Snap! Websites Project.

Copyright by Made to Order Software Corp.