47#include <malloc/malloc.h>
49#if !defined(MO_DARWIN) && !defined(MO_FREEBSD)
58#define strncasecmp _strnicmp
365 for(uint32_t idx(0); idx <
tld->f_tags_count; ++idx)
373 str = tld_file_string(
g_tld_file, tag->f_tag_name, &l);
379 && memcmp(str,
"category", l) == 0)
381 str = tld_file_string(
g_tld_file, tag->f_tag_value, &l);
388 && memcmp(str,
"country", l) == 0)
390 str = tld_file_string(
g_tld_file, tag->f_tag_value, &l);
413 return (c >=
'0' && c <=
'9')
414 || (c >=
'A' && c <=
'F')
415 || (c >=
'a' && c <=
'f');
499static int cmp(
const char *a,
int l,
const char *b,
int n)
510 while(l > 0 && n > 0)
569static int search(
int i,
int j,
char const * domain,
int n)
571 int auto_match = -1, p, r;
584 if(
static_cast<uint32_t
>(i) >
static_cast<uint32_t
>(j))
590 <<
") is larger than j ("
592 <<
") which is not expected in search()."
602 if(
static_cast<uint32_t
>(i) >=
g_tld_file->f_descriptions_count
603 ||
static_cast<uint32_t
>(j) >
g_tld_file->f_descriptions_count)
611 <<
") is too large, max is "
631 if(l == 1 && name[0] ==
'*')
651std::cerr <<
"--- name offset: " <<
tld->f_tld <<
" --- ptr: " <<
reinterpret_cast<void const *
>(name) <<
", cmp(\"" << std::string(name, l) <<
"\", \"" << std::string(domain, n) <<
"\") == " << r <<
"\n";
654 if(l == 1 && name[0] ==
'*')
658 <<
"fatal error: found an asterisk within an array of sub-domains at "
665 r =
cmp(name, l, domain, n);
667std::cerr <<
"--- name offset: " <<
tld->f_tld <<
" --- cmp(\"" << std::string(name, l) <<
"\", \"" << std::string(domain, n) <<
"\") == " << r <<
"\n";
710 info->
f_tld = (
const char *) 0;
712 info->f_tld_index = -1;
746 enum tld_file_error err;
750 if(filename ==
nullptr)
754 err = tld_file_load(
"/var/lib/libtld/tlds.tld", &
g_tld_file);
755 if(err == TLD_FILE_ERROR_NONE)
763 filename =
"/usr/share/libtld/tlds.tld";
768 if(err == TLD_FILE_ERROR_NONE)
777 std::stringstream in;
780 if(err == TLD_FILE_ERROR_NONE)
786 return err == TLD_FILE_ERROR_CANNOT_OPEN_FILE
904 if(
g_tld_file->f_header->f_tld_max_level > std::size(state->f_offset))
909 if(state->f_depth == 0
910 && state->f_offset[0] == 0)
914 state->f_offset[0] =
g_tld_file->f_header->f_tld_start_offset;
919 if(state->f_offset[0] >=
g_tld_file->f_header->f_tld_end_offset)
926 char * domain(state->f_domain +
sizeof(state->f_domain));
929 for(
int d(0); d <= state->f_depth; ++d)
934 char const * name = tld_file_string(
g_tld_file,
tld->f_tld, &length);
944 && name[length - 2] ==
'%'
945 && is_hex(name[length - 1])
946 && is_hex(name[length - 0]))
950 c = h2d(name[length - 1]) * 16 + h2d(name[length - 0]);
958 if(domain < state->f_domain)
968 if(domain < state->f_domain)
975 info->
f_tld = state->f_domain;
976 info->
f_offset = domain - state->f_domain;
977 info->f_tld_index = state->f_offset[state->f_depth];
979 tags_to_info(
tld, info);
983 if(
tld->f_start_offset != 65535)
986 state->f_offset[state->f_depth] =
tld->f_start_offset;
990 ++state->f_offset[state->f_depth];
991 while(state->f_depth > 0)
994 if(state->f_offset[state->f_depth] < parent->
f_end_offset)
999 ++state->f_offset[state->f_depth];
1115 char const * end = uri;
1117 int level = 0, max_level, start_level, i, r, p, offset;
1123 if(uri ==
nullptr || uri[0] ==
'\0')
1135 max_level =
g_tld_file->f_header->f_tld_max_level;
1136 std::vector<const char *> level_ptr(max_level);
1141 if(level >= max_level)
1146 for(i = 1; i < max_level; ++i)
1148 level_ptr[i - 1] = level_ptr[i];
1150 level_ptr[max_level - 1] = end;
1154 level_ptr[level] = end;
1157 if(level >= 2 && level_ptr[level - 2] + 1 == level_ptr[level - 1])
1172 start_level = level;
1176 level_ptr[level] + 1, (
int) (end - level_ptr[level] - 1));
1184 for(p = r; level > 0; --level, p = r)
1191 if(
tld->f_start_offset == USHRT_MAX)
1196 level_ptr[level - 1] + 1,
1197 static_cast<int>(level_ptr[level] - level_ptr[level - 1] - 1));
1204 offset = (int) (level_ptr[level] - uri);
1217 static_cast<int>(level_ptr[0] - uri));
1231 info->f_tld_index = p;
1243 p =
tld->f_exception_apply_to;
1249 level = start_level -
tld->f_exception_level;
1250 offset =
static_cast<int>(level_ptr[level] - uri);
1261 tags_to_info(
tld, info);
1263 info->
f_tld = level_ptr[level];
1313 const char *p, *q, *username, *password, *host, *port, *n, *a, *query_string;
1315 int protocol_length, length, valid, c, i, j, anchor;
1321 if(uri ==
nullptr || uri[0] ==
'\0')
1327 for(p = uri; *uri !=
'\0' && *uri !=
':'; ++uri)
1329 if((*uri <
'a' || *uri >
'z')
1330 && (*uri <
'A' || *uri >
'Z')
1331 && (*uri <
'0' || *uri >
'9')
1338 protocol_length = (int) (uri - p);
1340 for(q = protocols; *q !=
'\0';)
1342 if(q[0] ==
'*' && (q[1] ==
'\0' || q[1] ==
','))
1347 if(tolower(*q) == c)
1349 if(strncasecmp(p, q, protocol_length) == 0
1350 && (q[protocol_length] ==
'\0' || q[protocol_length] ==
','))
1357 for(; *q !=
'\0' && *q !=
','; ++q);
1358 for(; *q ==
','; ++q);
1364 if(uri[1] !=
'/' || uri[2] !=
'/')
1373 for(; *uri !=
'/' && *uri !=
'\0'; ++uri)
1375 if((
unsigned char) *uri <
' ')
1382 if(username !=
nullptr)
1390 else if((*uri & 0x80) != 0)
1398 else if(*uri ==
' ' || *uri ==
'+')
1403 else if(*uri ==
'%')
1409 if(((uri[1] <
'2' || uri[1] >
'9')
1410 && (uri[1] <
'a' || uri[1] >
'f')
1411 && (uri[1] <
'A' || uri[1] >
'F'))
1412 || ((uri[2] <
'0' || uri[2] >
'9')
1413 && (uri[2] <
'a' || uri[2] >
'f')
1414 && (uri[2] <
'A' || uri[2] >
'F')))
1418 if(uri[1] ==
'2' && uri[2] ==
'0')
1432 if(username !=
nullptr)
1434 password = username;
1435 for(; *password !=
'@' && *password !=
':'; ++password);
1436 if(*password ==
':')
1438 if((host - 1) - (password + 1) <= 0)
1444 if(password - username - 1 <= 0)
1450 for(port = host; *port !=
':' && port < uri; ++port);
1458 for(n = port + 1; *n >=
'0' && *n <=
'9'; ++n);
1459 if(n != uri || n == port + 1)
1468 query_string =
nullptr;
1470 for(a = uri; *a !=
'\0'; ++a)
1472 if((
unsigned char) *a <
' ')
1478 else if(*a ==
'+' || *a ==
' ')
1491 if(query_string !=
nullptr)
1498 query_string = a + 1;
1501 else if(*a ==
'&' && anchor == 0)
1503 if(query_string ==
nullptr)
1513 query_string = a + 1;
1517 if(query_string !=
nullptr && a - query_string == 0)
1525 query_string =
nullptr;
1534 if(((a[1] <
'2' || a[1] >
'9')
1535 && (a[1] <
'a' || a[1] >
'f')
1536 && (a[1] <
'A' || a[1] >
'F'))
1537 || ((a[2] <
'0' || a[2] >
'9')
1538 && (a[2] <
'a' || a[2] >
'f')
1539 && (a[2] <
'A' || a[2] >
'F')))
1556 else if((*a & 0x80) != 0)
1581 length = (int) (port - host);
1582 if(length >= (
int) (
sizeof(domain) /
sizeof(domain[0])))
1598 for(i = 0, j = 0; i < length; ++i, ++j)
1602 domain[j] = (char) (h2d(host[i + 1]) * 16 + h2d(host[i + 2]));
1607 domain[j] = host[i];
1612 result =
tld(domain, info);
1613 if(info->
f_tld !=
nullptr)
1671 return reinterpret_cast<uint32_t
const *
>(tld_static_tlds)[1] + 8;
1675int tld_tag_count(
struct tld_info *info)
1680 || info->f_tld_index < 0)
1691 return tld->f_tags_count;
1706 tag->f_name =
nullptr;
1707 tag->f_name_length = 0;
1708 tag->f_value =
nullptr;
1709 tag->f_value_length = 0;
1716 if(info->f_tld_index < 0)
1733 file_tag = tld_file_tag(
g_tld_file,
tld->f_tags + tag_idx * 2);
1734 if(file_tag ==
nullptr)
1739 tag->f_name = tld_file_string(
g_tld_file, file_tag->f_tag_name, &l);
1740 tag->f_name_length = l;
1742 tag->f_value = tld_file_string(
g_tld_file, file_tag->f_tag_value, &l);
1743 tag->f_value_length = l;
1745 if(tag->f_name ==
nullptr
1746 || tag->f_value ==
nullptr)
[internal] The description of one TLD.
uint16_t f_end_offset
The last offset of a list of TLDs.
Set of information returned by the tld() function.
enum tld_category f_category
The category of the TLD.
enum tld_status f_status
The status of the TLD.
int f_offset
The offset to the TLD in the URI string you supplied.
char f_country[48]
The country where this TLD is used.
const char * f_tld
Pointer to the TLD in the URI string you supplied.
const char * tld_version()
Return the version of the library.
static struct tld_file * g_tld_file
The TLD file currently loaded or NULL.
static enum tld_result tld_load_tlds_if_not_loaded()
Load the TLDs if not yet loaded.
static int cmp(const char *a, int l, const char *b, int n)
Compare two strings, one of which is limited by length.
enum tld_result tld(char const *uri, struct tld_info *info)
Get information about the TLD for the specified URI.
void tld_clear_info(struct tld_info *info)
Clear the info structure.
const struct tld_file * tld_get_tlds()
Return a pointer to the current list of TLDs.
static int search(int i, int j, char const *domain, int n)
Search for the specified domain.
enum tld_result tld_next_tld(struct tld_enumeration_state *state, struct tld_info *info)
Read the next TLD and return its info.
enum tld_result tld_check_uri(const char *uri, struct tld_info *info, const char *protocols, int flags)
Check that a URI is valid.
uint32_t tld_get_static_tlds_buffer_size()
Get the size of the TLDs static buffer.
void tld_free_tlds()
Clear the allocated TLD file.
enum tld_result tld_load_tlds(char const *filename, int fallback)
Load a TLDs file as the file to be used by the tld() function.
The public header of the libtld library.
#define VALID_URI_NO_SPACES
Whether to check that the URI do not include any spaces.
#define LIBTLD_VERSION
The version of the library as a string.
@ TLD_CATEGORY_UNDEFINED
The TLD was not found.
#define VALID_URI_ASCII_ONLY
Whether to check that the URI only includes ASCII.
LIBTLD_EXPORT enum tld_category tld_word_to_category(const char *word, int n)
This is for backward compatibility.
@ TLD_RESULT_SUCCESS
Success! The TLD of the specified URI is valid.
@ TLD_RESULT_NO_TLD
The input URI has no TLD defined.
@ TLD_RESULT_INVALID
The TLD was found, but it is marked as invalid.
@ TLD_RESULT_BAD_URI
The URI includes characters that are not accepted by the function.
@ TLD_RESULT_NOT_FOUND
The URI has a TLD that could not be determined.
@ TLD_RESULT_NULL
The input URI is empty.
@ TLD_STATUS_EXCEPTION
Special status to indicate an exception which is not directly a TLD.
@ TLD_STATUS_UNDEFINED
Special status to indicate we did not find the TLD.
@ TLD_STATUS_VALID
The TLD is currently valid.
Declaration of the static TLDs file.
Declaration of the TLD file structures.