Line data Source code
1 : /* TLD library -- test the TLD interface against the Mozilla effective TLD names
2 : * Copyright (C) 2011-2015 Made to Order Software Corp.
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the
6 : * "Software"), to deal in the Software without restriction, including
7 : * without limitation the rights to use, copy, modify, merge, publish,
8 : * distribute, sublicense, and/or sell copies of the Software, and to
9 : * permit persons to whom the Software is furnished to do so, subject to
10 : * the following conditions:
11 : *
12 : * The above copyright notice and this permission notice shall be included
13 : * in all copies or substantial portions of the Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 : */
23 :
24 : /** \file
25 : * \brief Test the tld_domain_to_lowercase() function.
26 : *
27 : * This file implements various test to verify that the
28 : * tld() function works as expected with valid and
29 : * invalid names.
30 : */
31 :
32 : // Qt headers make use of long long which is not considered a valid type
33 : #pragma GCC diagnostic ignored "-Wlong-long"
34 :
35 : #include "libtld/tld.h"
36 : #include <string>
37 : #include <vector>
38 : #include <stdlib.h>
39 : #include <stdio.h>
40 : #include <boost/algorithm/string.hpp>
41 : #include <QtCore/QString>
42 :
43 :
44 :
45 : int err_count = 0;
46 : int verbose = 0;
47 :
48 : /*
49 : * This test calls the tld() function with all the TLDs as defined
50 : * by Mozilla to determine whether we are up to date.
51 : *
52 : * extern enum tld_result tld(const char *uri, struct tld_info *info);
53 : */
54 :
55 : typedef std::vector<std::string> string_vector_t;
56 1 : string_vector_t tlds;
57 :
58 :
59 : /** \brief Encode a URL.
60 : *
61 : * This function transforms the characters in a valid URI string.
62 : */
63 15092 : QString tld_encode(const QString& tld, int& level)
64 : {
65 15092 : QString result;
66 15092 : level = 0;
67 :
68 30184 : QByteArray utf8 = tld.toUtf8();
69 15092 : int max(utf8.length());
70 15092 : const char *p = utf8.data();
71 659010 : for(int l = 0; l < max; ++l)
72 : {
73 643918 : char c(p[l]);
74 643918 : if(static_cast<unsigned char>(c) < 0x20)
75 : {
76 0 : fprintf(stderr, "error: controls characters (^%c) are not allowed in TLDs (%s).\n", c, p);
77 0 : exit(1);
78 : }
79 643918 : if((c >= 'A' && c <= 'Z')
80 643918 : || (c >= 'a' && c <= 'z')
81 135576 : || (c >= '0' && c <= '9')
82 135184 : || c == '.' || c == '-')
83 : {
84 : // these are accepted as is; note that we already checked the
85 : // validty of the data w
86 639580 : if(c == '.')
87 : {
88 31730 : ++level;
89 : }
90 639580 : result += c;
91 : }
92 : else
93 : {
94 : // add/remove as appropriate
95 4338 : if(c == '/' || c == ':' || c == '&')
96 : {
97 0 : fprintf(stderr, "error: character (^%c) is not allowed in TLDs.\n", c);
98 0 : exit(1);
99 : }
100 4338 : result += '%';
101 4338 : QString v(QString("%1").arg(c & 255, 2, 16, QLatin1Char('0')));
102 4338 : result += v[0];
103 4338 : result += v[1];
104 : }
105 : }
106 : // at this time the maximum level we declared is 4 but there are cases
107 : // where countries defined 5 levels (which is definitively crazy!)
108 : // there is also one Amazon server using 6 levels
109 15092 : if(level < 0 || level > 6)
110 : {
111 0 : fprintf(stderr, "error: level out of range (%d) in \"%s\"; if larger than the maximum limit, you may want to increase the limit.\n", level, p);
112 0 : exit(1);
113 : }
114 :
115 30184 : return result;
116 : }
117 :
118 :
119 : /*
120 : * The function reads the effective_tld_names.dat file in memory.
121 : *
122 : * We call exit(1) if we find an error while reading the data.
123 : */
124 1 : void test_load()
125 : {
126 1 : FILE *f = fopen("effective_tld_names.dat", "r");
127 1 : if(f == NULL)
128 : {
129 0 : fprintf(stderr, "error: could not open the \"effective_tld_names.dat\" file; did you start the test in the source directory?\n");
130 0 : exit(1);
131 : }
132 : char buf[256];
133 1 : buf[sizeof(buf) -1] = '\0';
134 1 : int line(0);
135 10653 : while(fgets(buf, sizeof(buf) - 1, f) != NULL)
136 : {
137 10651 : ++line;
138 10651 : int l = strlen(buf);
139 10651 : if(l == sizeof(buf) - 1)
140 : {
141 : // the fgets() failed in this case so forget it
142 0 : fprintf(stderr, "effective_tld_names.data:%d:error: line too long.\n", line);
143 0 : ++err_count;
144 : }
145 : else
146 : {
147 10651 : std::string s(buf);
148 10651 : boost::algorithm::trim(s);
149 10651 : if(s.length() == 1)
150 : {
151 : // all TLDs are at least 2 characters
152 0 : fprintf(stderr, "effective_tld_names.data:%d:error: a TLD must be at least two characters.\n", line);
153 0 : ++err_count;
154 : }
155 10651 : else if(s.length() > 1 && s[0] != '/' && s[1] != '/')
156 : {
157 : // this is not a comment and not an empty line, that's a TLD
158 7583 : tlds.push_back(s);
159 : //printf("found [%s]\n", s.c_str());
160 10651 : }
161 : }
162 : }
163 1 : fclose(f);
164 1 : if(verbose)
165 : {
166 0 : printf("Found %d TLDs in the input file.\n", static_cast<int>(tlds.size()));
167 : }
168 1 : }
169 :
170 :
171 : /*
172 : * This test checks out URIs that end with an invalid TLD. This is
173 : * expected to return an error every single time.
174 : */
175 1 : void test_tlds()
176 : {
177 7584 : for(string_vector_t::const_iterator it(tlds.begin()); it != tlds.end(); ++it)
178 : {
179 : tld_info info;
180 7583 : if(it->at(0) == '*')
181 : {
182 28 : std::string url("we-want-to-test-just-one-domain-name");
183 28 : url += it->substr(1);
184 28 : tld_result r = tld(url.c_str(), &info);
185 28 : if(r == TLD_RESULT_SUCCESS)
186 : {
187 : // if it worked then we have a problem
188 : fprintf(stderr, "error: tld(\"%s\", &info) accepted when 2nd level names are not accepted by effective_tld_names.dat.\n",
189 0 : url.c_str());
190 0 : ++err_count;
191 : }
192 28 : else if(r != TLD_RESULT_INVALID)
193 : {
194 : // we're good if invalid since that's what we expect in this case
195 : // any other result is an error
196 0 : fprintf(stderr, "error: tld(\"%s\", &info) failed.\n", it->c_str());
197 0 : ++err_count;
198 28 : }
199 : }
200 7555 : else if(it->at(0) == '!')
201 : {
202 9 : if(*it != "!nel.uk")
203 : {
204 9 : std::string url;//("we-want-to-test-just-one-domain-name.");
205 9 : url += it->substr(1);
206 9 : tld_result r = tld(url.c_str(), &info);
207 9 : if(r != TLD_RESULT_SUCCESS)
208 : {
209 : // if it worked then we have a problem
210 : fprintf(stderr, "error: tld(\"%s\", &info) = %d failed with an exception that should have been accepted.\n",
211 0 : it->c_str(), r);
212 0 : ++err_count;
213 9 : }
214 : }
215 : }
216 7546 : else if(it->at(0) != '!')
217 : {
218 7546 : std::string url("www.this-is-a-long-domain-name-that-should-not-make-it-in-a-tld.");
219 7546 : url += *it;
220 : int level;
221 15092 : QString utf16(QString::fromUtf8(url.c_str()));
222 15092 : QString u(tld_encode(utf16, level));
223 15092 : QByteArray uri(u.toUtf8());
224 7546 : tld_result r = tld(uri.data(), &info);
225 7546 : if(r == TLD_RESULT_SUCCESS || r == TLD_RESULT_INVALID)
226 : {
227 : // it succeeded, but is it the right length?
228 7546 : utf16 = QString::fromUtf8(it->c_str());
229 7546 : u = tld_encode(utf16, level);
230 7546 : if(strlen(info.f_tld) != static_cast<size_t>(u.size() + 1))
231 : {
232 : fprintf(stderr, "error: tld(\"%s\", &info) length mismatch (\"%s\", %d/%d).\n",
233 0 : uri.data(), info.f_tld, static_cast<int>(strlen(info.f_tld)), static_cast<int>((u.size() + 1)));
234 0 : QString s(QString::fromUtf8(it->c_str()));
235 0 : fprintf(stderr, "%d> %s [%s] -> %d ", r, it->c_str(), u.toUtf8().data(), s.length());
236 0 : for(int i(0); i < s.length(); ++i) {
237 0 : fprintf(stderr, "&#x%04X;", s.at(i).unicode());
238 : }
239 0 : fprintf(stderr, "\n");
240 0 : ++err_count;
241 7546 : }
242 : }
243 : else
244 : {
245 : //fprintf(stderr, "error: tld(\"%s\", &info) failed.\n", it->c_str());
246 0 : QString s(QString::fromUtf8(it->c_str()));
247 0 : printf("error: tld(\"%s\", &info) failed with %d [%s] -> %d ", it->c_str(), r, u.toUtf8().data(), s.length());
248 0 : for(int i(0); i < s.length(); ++i) {
249 0 : printf("&#x%04X;", s.at(i).unicode());
250 : }
251 0 : printf("\n");
252 0 : ++err_count;
253 7546 : }
254 : }
255 : }
256 1 : }
257 :
258 :
259 :
260 :
261 1 : int main(int argc, char *argv[])
262 : {
263 1 : printf("testing tld names version %s\n", tld_version());
264 :
265 1 : if(argc > 1)
266 : {
267 0 : if(strcmp(argv[1], "-v") == 0)
268 : {
269 0 : verbose = 1;
270 : }
271 : }
272 :
273 : /* call all the tests, one by one
274 : * failures are "recorded" in the err_count global variable
275 : * and the process stops with an error message and exit(1)
276 : * if err_count is not zero.
277 : */
278 1 : test_load();
279 :
280 1 : if(err_count == 0)
281 : {
282 1 : test_tlds();
283 : }
284 :
285 1 : if(err_count || verbose)
286 : {
287 : fprintf(stderr, "%d error%s occured.\n",
288 0 : err_count, err_count != 1 ? "s" : "");
289 : }
290 1 : exit(err_count ? 1 : 0);
291 3 : }
292 :
293 : /* vim: ts=4 sw=4 et
294 : */
|