Line data Source code
1 : /* TLD library -- test the TLD interface for emails
2 : * Copyright (c) 2013-2022 Made to Order Software Corp. All Rights Reserved
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the
6 : * "Software"), to deal in the Software without restriction, including
7 : * without limitation the rights to use, copy, modify, merge, publish,
8 : * distribute, sublicense, and/or sell copies of the Software, and to
9 : * permit persons to whom the Software is furnished to do so, subject to
10 : * the following conditions:
11 : *
12 : * The above copyright notice and this permission notice shall be included
13 : * in all copies or substantial portions of the Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 : */
23 :
24 : /** \file
25 : * \brief Test the tld_email_list class.
26 : *
27 : * This file implements various tests to verify that the
28 : * tld_email_list functions as expected.
29 : */
30 :
31 : #include "libtld/tld.h"
32 : #include <stdlib.h>
33 : #include <stdio.h>
34 : #include <string.h>
35 : #include <sstream>
36 :
37 : /// The number of errors encountered before exiting.
38 : int err_count = 0;
39 :
40 : /// Whether to be verbose, turned off by default.
41 : int verbose = 0;
42 :
43 :
44 : /** \brief Print an error.
45 : *
46 : * This function prints the specified \p msg in stderr and increases
47 : * the error counter by one.
48 : *
49 : * \param[in] msg The message to be printed.
50 : */
51 0 : void error(const std::string& msg)
52 : {
53 0 : fprintf(stderr, "%s\n", msg.c_str());
54 0 : ++err_count;
55 0 : }
56 :
57 :
58 : /// Macro to check that exceptions are raised without having to write the try/catch each time.
59 : #define EXPECTED_THROW(s, e) \
60 : try \
61 : { \
62 : static_cast<void>(s); \
63 : error("error: bad." #s "() of \"\" did not throw an error."); \
64 : } \
65 : catch(const e&) \
66 : { \
67 : }
68 :
69 :
70 : /** \brief Define a valid email string.
71 : *
72 : * This structure is used to define a valid email string. The string may
73 : * include any number of emails as defined by the \p f_count field. Note
74 : * that the count is increased by 1 for each group definition in the list
75 : * defined in the \p f_input_email string.
76 : *
77 : * This structure is used to validate many different types of email
78 : * addresses to make sure that our parser works properly.
79 : */
80 : struct valid_email
81 : {
82 : /// The valid emails to be parsed.
83 : const char * f_input_email;
84 : /// The number of emails returned on f_input_email was parsed, plus one per group.
85 : int f_count;
86 : };
87 :
88 : //const char * f_group;
89 : //const char * f_original_email;
90 : //const char * f_fullname;
91 : //const char * f_username;
92 : //const char * f_domain;
93 : //const char * f_email_only;
94 : //const char * f_canonicalized_email;
95 :
96 : /// List of results to verify all the fields of the parser output. There is one entry per group and email.
97 : const tld_email list_of_results[] =
98 : {
99 : { "", "MAILER-DAEMON@mail.exdox.com (Mail Delivery System)",
100 : "", "MAILER-DAEMON", "mail.exdox.com", "MAILER-DAEMON@mail.exdox.com", "MAILER-DAEMON@mail.exdox.com" },
101 : #if 1
102 : { "", "alexis@m2osw.com",
103 : "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
104 : { "", "a@m2osw.com",
105 : "", "a", "m2osw.com", "a@m2osw.com", "a@m2osw.com" },
106 : { "", "b@c.com",
107 : "", "b", "c.com", "b@c.com", "b@c.com" },
108 : { "", "alexis@m2osw.com",
109 : "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
110 : { "", "\"Wilke, Alexis\" <alexis@m2osw.com>",
111 : "Wilke, Alexis", "alexis", "m2osw.com", "alexis@m2osw.com", "\"Wilke, Alexis\" <alexis@m2osw.com>" },
112 : { "", "(* Pascal Comments *) \t alexis@m2osw.com\n (Just (kidding) he! he!)",
113 : "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
114 : { "", "(Start-Comment)alexis@ \t [ \t m2osw.com \t ] \n (More (comment) here)",
115 : "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
116 : { "", "(Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there)",
117 : "", "al.ex.is", "m2osw.com", "al.ex.is@m2osw.com", "al.ex.is@m2osw.com" },
118 : { "", "< (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) >",
119 : "", "al.ex.is", "m2osw.com", "al.ex.is@m2osw.com", "al.ex.is@m2osw.com" },
120 : { "", "(With full name) Alexis Wilke < (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) >",
121 : "Alexis Wilke", "al.ex.is", "m2osw.com", "al.ex.is@m2osw.com", "Alexis Wilke <al.ex.is@m2osw.com>" },
122 : { "This Group", "",
123 : "", "", "", "", "" },
124 : { "This Group", "(With full name) Alexis Wilke < \n alexis \t @ \t [ \t m2osw.com \t ] \n (Less) >",
125 : "Alexis Wilke", "alexis", "m2osw.com", "alexis@m2osw.com", "Alexis Wilke <alexis@m2osw.com>" },
126 : { "People", "",
127 : "", "", "", "", "" },
128 : { "People", "Alexis Wilke <alexis@m2osw.com>",
129 : "Alexis Wilke", "alexis", "m2osw.com", "alexis@m2osw.com", "Alexis Wilke <alexis@m2osw.com>" },
130 : { "People", "John Smith <john@m2osw.com>",
131 : "John Smith", "john", "m2osw.com", "john@m2osw.com", "John Smith <john@m2osw.com>" },
132 : { "Lists", "",
133 : "", "", "", "", "" },
134 : { "Lists", "Contact <contact@m2osw.com>",
135 : "Contact", "contact", "m2osw.com", "contact@m2osw.com", "Contact <contact@m2osw.com>" },
136 : { "Lists", "Resume <resume@m2osw.com>",
137 : "Resume", "resume", "m2osw.com", "resume@m2osw.com", "Resume <resume@m2osw.com>" },
138 : { "", "normal@m2osw.com",
139 : "", "normal", "m2osw.com", "normal@m2osw.com", "normal@m2osw.com" },
140 : { "No-Reply", "",
141 : "", "", "", "", "" },
142 : { "No-Reply", "no-reply@m2osw.com",
143 : "", "no-reply", "m2osw.com", "no-reply@m2osw.com", "no-reply@m2osw.com" },
144 : { "", "\"Complex <name> for !a! \\\"USER\\\"\" <user@example.co.uk>",
145 : "Complex <name> for !a! \"USER\"", "user", "example.co.uk", "user@example.co.uk", "\"Complex <name> for !a! \\\"USER\\\"\" <user@example.co.uk>" },
146 : { "", "(Comment \n New-Line) alexis@m2osw.com",
147 : "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
148 : { "", "(Comment (Sub-Comment (Sub-Sub-Comment (Sub-Sub-Sub-Comment \\) This is still the Sub-Sub-Sub-Comment!!!)))) alexis@m2osw.com",
149 : "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
150 : { "Group with some sub-comments", "",
151 : "", "", "", "", "" },
152 : { "Group with some sub-comments", "alexis@m2osw.com",
153 : "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
154 : // TBD: since the colons get canonicalized to %3A we do not need the '[' and ']' in the canonicalized version
155 : { "", "\"Wilke, Alexis\" <\"alexis,wilke\"@[:special:.m2osw.com]>",
156 : "Wilke, Alexis", "alexis,wilke", ":special:.m2osw.com", "\"alexis,wilke\"@[:special:.m2osw.com]", "\"Wilke, Alexis\" <\"alexis,wilke\"@%3Aspecial%3A.m2osw.com>" },
157 : { "", "alexis@m2osw.com (Simple Comment)",
158 : "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
159 : #endif
160 :
161 : // end list with nulls
162 : { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }
163 : };
164 :
165 : /// The list of valid emails used to check the parser out.
166 : const valid_email list_of_valid_emails[] =
167 : {
168 : { "MAILER-DAEMON@mail.exdox.com (Mail Delivery System)", 1},
169 : #if 1
170 : { "alexis@m2osw.com", 1 },
171 : { "a@m2osw.com", 1 },
172 : { "b@c.com", 1 },
173 : { " \t alexis@m2osw.com\n \t", 1 },
174 : { "\"Wilke, Alexis\" <alexis@m2osw.com>", 1 },
175 : { " (* Pascal Comments *) \t alexis@m2osw.com\n (Just (kidding) he! he!) \t", 1 },
176 : { "(Start-Comment)alexis@ \t [ \t m2osw.com \t ] \n (More (comment) here) \r\n\t", 1 },
177 : { "(Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) \r\n\t", 1 },
178 : { "< (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) > \r\n\t", 1 },
179 : { "(With full name) Alexis Wilke < (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) > \r\n\t", 1 },
180 : { " (Now a group:) This Group: (With full name) Alexis Wilke < \n alexis \t @ \t [ \t m2osw.com \t ] \n (Less) >; \r\n\t", 2 },
181 : { "People: Alexis Wilke <alexis@m2osw.com>, John Smith <john@m2osw.com>; Lists: Contact <contact@m2osw.com>, Resume <resume@m2osw.com>; normal@m2osw.com, No-Reply: no-reply@m2osw.com;", 9 },
182 : { "\"Complex <name> for !a! \\\"USER\\\"\" <user@example.co.uk>", 1 },
183 : { "(Comment \n New-Line) alexis@m2osw.com", 1 },
184 : { "(Comment (Sub-Comment (Sub-Sub-Comment (Sub-Sub-Sub-Comment \\) This is still the Sub-Sub-Sub-Comment!!!)))) alexis@m2osw.com", 1 },
185 : { "Group with (Comment (Sub-Comment (Sub-Sub-Comment (Sub-Sub-Sub-Comment \\) This is still the Sub-Sub-Sub-Comment!!!)))) some sub-comments \t : alexis@m2osw.com;", 2 },
186 : { "\"Wilke, Alexis\" <\"alexis,wilke\"@[:special:.m2osw.com]>", 1 },
187 : { "alexis@m2osw.com (Simple Comment)", 1 },
188 : #endif
189 :
190 : // end of list
191 : { nullptr, 0 }
192 : };
193 :
194 :
195 : /** \brief Transform an email string in a C-like string.
196 : *
197 : * This function transforms the characters in \p e into a set of C-like
198 : * escape characters so it can safely be printed in the console.
199 : *
200 : * For example, the character 0x09 is transformed to the character \\t.
201 : *
202 : * \param[in] e The email to be transformed.
203 : *
204 : * \return The transformed email.
205 : */
206 0 : std::string email_to_vstring(const std::string& e)
207 : {
208 0 : std::string result;
209 0 : char buf[3];
210 :
211 0 : for(const char *s(e.c_str()); *s != '\0'; ++s)
212 : {
213 0 : if(static_cast<unsigned char>(*s) < ' ')
214 : {
215 0 : switch(*s)
216 : {
217 0 : case '\a': result += "\\a"; break;
218 0 : case '\b': result += "\\b"; break;
219 0 : case '\f': result += "\\f"; break;
220 0 : case '\n': result += "\\n"; break;
221 0 : case '\r': result += "\\r"; break;
222 0 : case '\t': result += "\\t"; break;
223 0 : case '\v': result += "\\v"; break;
224 0 : default:
225 0 : buf[0] = '^';
226 0 : buf[1] = *s + '@';
227 0 : buf[2] = '\0';
228 0 : result += buf;
229 0 : break;
230 :
231 : }
232 : }
233 0 : else if(*s == 0x7F)
234 : {
235 0 : result += "<DEL>";
236 : }
237 0 : else if(static_cast<unsigned char>(*s) > 0x80)
238 : {
239 : static const char *hc = "0123456789ABCDEF";
240 0 : result += "\\x";
241 0 : buf[0] = hc[*s >> 4];
242 0 : buf[1] = hc[*s & 15];
243 0 : buf[2] = '\0';
244 0 : result += buf;
245 : }
246 : else
247 : {
248 0 : result += *s;
249 : }
250 : }
251 :
252 0 : return result;
253 : }
254 :
255 :
256 1 : void test_valid_emails()
257 : {
258 1 : const tld_email *results(list_of_results);
259 20 : for(const valid_email *v(list_of_valid_emails); v->f_input_email != nullptr; ++v)
260 : {
261 19 : if(verbose)
262 : {
263 0 : printf("*** testing email \"%s\", start with C++ test\n", email_to_vstring(v->f_input_email).c_str());
264 0 : fflush(stdout);
265 : }
266 :
267 19 : const tld_email * const cresults(results);
268 :
269 : // C++ test
270 : {
271 38 : tld_email_list list;
272 19 : tld_result r(list.parse(v->f_input_email, 0));
273 19 : int max(v->f_count);
274 19 : if(r != TLD_RESULT_SUCCESS)
275 : {
276 0 : fprintf(stderr, "return value is %d instead of %d with [%s]\n", r, TLD_RESULT_SUCCESS, v->f_input_email);
277 0 : error("error: unexpected return value.");
278 0 : results += max;
279 : }
280 19 : else if(list.count() != max)
281 : {
282 0 : fprintf(stderr, "parse() returned %d as count, expected %d\n", list.count(), max);
283 0 : error("error: unexpected count");
284 0 : results += max;
285 : }
286 : else
287 : {
288 : // test the C++ function first
289 : {
290 38 : tld_email_list::tld_email_t e;
291 48 : for(int i(0); i < max; ++i, ++results)
292 : {
293 29 : if(results->f_group == nullptr)
294 : {
295 0 : error("error: end of results array reached before completion of the test.\n");
296 0 : return;
297 : }
298 :
299 29 : if(!list.next(e))
300 : {
301 0 : error("error: next() returned false too soon.");
302 : }
303 29 : if(e.f_group != results->f_group)
304 : {
305 0 : error("error: next() returned the wrong group. Got \"" + e.f_group + "\" instead of \"" + results->f_group + "\".");
306 : }
307 29 : if(e.f_original_email != results->f_original_email)
308 : {
309 0 : error("error: next() returned the wrong original email. Got \"" + e.f_original_email + "\" instead of \"" + results->f_original_email + "\".");
310 : }
311 29 : if(e.f_fullname != results->f_fullname)
312 : {
313 0 : error("error: next() returned the wrong fullname. Got \"" + e.f_fullname + "\" instead of \"" + results->f_fullname + "\".");
314 : }
315 29 : if(e.f_username != results->f_username)
316 : {
317 0 : error("error: next() returned the wrong username. Got \"" + e.f_username + "\" instead of \"" + results->f_username + "\".");
318 : }
319 29 : if(e.f_domain != results->f_domain)
320 : {
321 0 : error("error: next() returned the wrong username. Got \"" + e.f_domain + "\" instead of \"" + results->f_domain + "\".");
322 : }
323 29 : if(e.f_email_only != results->f_email_only)
324 : {
325 0 : error("error: next() returned the wrong email only. Got \"" + e.f_email_only + "\" instead of \"" + results->f_email_only + "\".");
326 : }
327 29 : if(e.f_canonicalized_email != results->f_canonicalized_email)
328 : {
329 0 : error("error: next() returned the wrong canonicalized email. Got \"" + e.f_canonicalized_email + "\" instead of \"" + results->f_canonicalized_email + "\".");
330 : }
331 : }
332 19 : if(list.next(e))
333 : {
334 0 : error("error: next(e) returned the wrong result, it should be false after the whole set of emails were read.");
335 : }
336 : }
337 : // try the C function which also allows us to test the rewind()
338 19 : list.rewind();
339 : {
340 19 : results = cresults;
341 19 : tld_email e;
342 48 : for(int i(0); i < max; ++i, ++results)
343 : {
344 29 : if(!list.next(&e))
345 : {
346 0 : error("error: next() returned false too soon.");
347 : }
348 29 : if(strcmp(e.f_group, results->f_group) != 0)
349 : {
350 0 : error("error: next() returned the wrong group. Got \"" + std::string(e.f_group) + "\" from \"" + results->f_group + "\".");
351 : }
352 29 : if(strcmp(e.f_original_email, results->f_original_email) != 0)
353 : {
354 0 : error("error: next() returned the wrong original email. Got \"" + std::string(e.f_original_email) + "\" instead of \"" + results->f_original_email + "\".");
355 : }
356 29 : if(strcmp(e.f_fullname, results->f_fullname) != 0)
357 : {
358 0 : error("error: next() returned the wrong fullname.");
359 : }
360 29 : if(strcmp(e.f_username, results->f_username) != 0)
361 : {
362 0 : error("error: next() returned the wrong username.");
363 : }
364 29 : if(strcmp(e.f_domain, results->f_domain) != 0)
365 : {
366 0 : error("error: next() returned the wrong username.");
367 : }
368 29 : if(strcmp(e.f_email_only, results->f_email_only) != 0)
369 : {
370 0 : error("error: next() returned the wrong email only.");
371 : }
372 29 : if(strcmp(e.f_canonicalized_email, results->f_canonicalized_email) != 0)
373 : {
374 0 : error("error: next() returned the wrong canonicalized email.");
375 : }
376 : }
377 19 : if(list.next(&e))
378 : {
379 0 : error("error: next(&e) returned the wrong result, it should be false after the whole set of emails were read.");
380 : }
381 : }
382 : }
383 : }
384 :
385 19 : if(verbose)
386 : {
387 0 : printf("*** C test now\n");
388 0 : fflush(stdout);
389 : }
390 : // C test
391 : {
392 : tld_email_list *list;
393 19 : list = tld_email_alloc();
394 19 : tld_result r = tld_email_parse(list, v->f_input_email, 0);
395 19 : const int max(v->f_count);
396 19 : if(r != TLD_RESULT_SUCCESS)
397 : {
398 0 : fprintf(stderr, "return value is %d instead of %d for \"%s\"\n", r, TLD_RESULT_SUCCESS, v->f_input_email);
399 0 : error("error: unexpected return value.");
400 : }
401 19 : else if(tld_email_count(list) != max)
402 : {
403 0 : fprintf(stderr, "parse() returned %d as count, expected %d\n", tld_email_count(list), max);
404 0 : error("error: unexpected count");
405 : }
406 : else
407 : {
408 : // test the C++ function first
409 57 : for(int repeat(0); repeat < 2; ++repeat)
410 : {
411 38 : results = cresults;
412 38 : struct tld_email e;
413 96 : for(int i(0); i < max; ++i, ++results)
414 : {
415 58 : if(results->f_group == nullptr)
416 : {
417 0 : error("error: end of results array reached before completion of the test.\n");
418 0 : return;
419 : }
420 :
421 58 : if(tld_email_next(list, &e) != 1)
422 : {
423 0 : error("error: next() returned false too soon.");
424 : }
425 58 : if(strcmp(e.f_group, results->f_group) != 0)
426 : {
427 0 : error("error: next() returned the wrong group. Got \"" + std::string(e.f_group) + "\" from \"" + results->f_group + "\".");
428 : }
429 58 : if(strcmp(e.f_original_email, results->f_original_email) != 0)
430 : {
431 0 : error("error: next() returned the wrong original email. Got \"" + std::string(e.f_original_email) + "\" instead of \"" + results->f_original_email + "\".");
432 : }
433 58 : if(strcmp(e.f_fullname, results->f_fullname) != 0)
434 : {
435 0 : error("error: next() returned the wrong fullname.");
436 : }
437 58 : if(strcmp(e.f_username, results->f_username) != 0)
438 : {
439 0 : error("error: next() returned the wrong username.");
440 : }
441 58 : if(strcmp(e.f_domain, results->f_domain) != 0)
442 : {
443 0 : error("error: next() returned the wrong username.");
444 : }
445 58 : if(strcmp(e.f_email_only, results->f_email_only) != 0)
446 : {
447 0 : error("error: next() returned the wrong email only.");
448 : }
449 58 : if(strcmp(e.f_canonicalized_email, results->f_canonicalized_email) != 0)
450 : {
451 0 : error("error: next() returned the wrong canonicalized email.");
452 : }
453 : }
454 38 : if(tld_email_next(list, &e) != 0)
455 : {
456 0 : error("error: next(&e) returned the wrong result, it should be false after the whole set of emails were read.");
457 : }
458 : // try again
459 38 : tld_email_rewind(list);
460 : }
461 : }
462 19 : tld_email_free(list);
463 : }
464 : }
465 :
466 : {
467 : // all valid atom characters
468 1 : const char valid_chars[] =
469 : "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
470 : "abcdefghijklmnopqrstuvwxyz"
471 : "0123456789"
472 : "!#$%&'*+-/=?^_`{|}~" // here there is a NUL
473 : ;
474 82 : for(size_t i(0); i < sizeof(valid_chars) / sizeof(valid_chars[0]) - 1; ++i)
475 : {
476 162 : tld_email_list list;
477 162 : std::string e("abc");
478 81 : e += valid_chars[i];
479 81 : e += "def@m2osw.com";
480 81 : if(verbose)
481 : {
482 0 : printf("*** testing all atom characters with email \"%s\"\n", email_to_vstring(e).c_str());
483 0 : fflush(stdout);
484 : }
485 81 : tld_result r(list.parse(e, 0));
486 81 : if(r != TLD_RESULT_SUCCESS)
487 : {
488 0 : fprintf(stderr, "call to list.parse() failed with %d expected %d for email \"%s\" (all valid atom characters)\n", r, TLD_RESULT_SUCCESS, e.c_str());
489 0 : error("error: unexpected return value.");
490 : }
491 : }
492 : }
493 :
494 : {
495 : // all valid quoted characters: " " to "\x7E" except the " and \ characters
496 : if(sizeof(int) < 4)
497 : {
498 : error("error: the ctrl variable needs to be at least 32 bits");
499 : return;
500 : }
501 1 : const int ctrl(1 << '\t');
502 127 : for(size_t i(1); i <= 126; ++i)
503 : {
504 126 : switch(i)
505 : {
506 4 : case ' ': // at this point we disallow the space which causes problems with Snap!
507 : case '\t': // at this point we disallow the tab which causes problems with Snap!
508 : case '"':
509 : case '\\':
510 : case 0x7F: // not included in the loop anyway
511 4 : break;
512 :
513 122 : default:
514 122 : if(i >= ' ' || (ctrl & (1 << i)) != 0)
515 : {
516 184 : tld_email_list list;
517 184 : std::string e("\"abc");
518 92 : e += static_cast<char>(i);
519 92 : e += "def\"@m2osw.com";
520 92 : if(verbose)
521 : {
522 0 : printf("*** testing all atom characters with email \"%s\"\n", email_to_vstring(e).c_str());
523 0 : fflush(stdout);
524 : }
525 92 : tld_result r(list.parse(e, 0));
526 92 : if(r != TLD_RESULT_SUCCESS)
527 : {
528 0 : fprintf(stderr, "call to list.parse() failed with %d expected %d for email \"%s\" (all valid characters)\n", r, TLD_RESULT_SUCCESS, email_to_vstring(e).c_str());
529 0 : error("error: unexpected return value.");
530 : }
531 30 : }
532 122 : break;
533 :
534 : }
535 : }
536 : }
537 :
538 : {
539 : // all valid quoted pair: '\t' and " " to "\x7E"
540 : // -- at this time the \t and " " do not work here
541 95 : for(size_t i(33); i <= 126; ++i)
542 : {
543 188 : tld_email_list list;
544 188 : std::string e("\"abc\\");
545 94 : if(i == 31)
546 : {
547 0 : e += static_cast<char>('\t');
548 : }
549 : else
550 : {
551 94 : e += static_cast<char>(i);
552 : }
553 94 : e += "def\"@m2osw.com";
554 94 : if(verbose)
555 : {
556 0 : printf("*** testing all atom characters with email \"%s\"\n", email_to_vstring(e).c_str());
557 0 : fflush(stdout);
558 : }
559 94 : tld_result r(list.parse(e, 0));
560 94 : if(r != TLD_RESULT_SUCCESS)
561 : {
562 0 : fprintf(stderr, "call to list.parse() failed with %d expected %d for email \"%s\" (all valid quoted pairs)\n", r, TLD_RESULT_SUCCESS, email_to_vstring(e).c_str());
563 0 : error("error: unexpected return value.");
564 : }
565 : }
566 : }
567 :
568 : {
569 : // all valid comment characters: " " to "\x7E" except the " and \ characters
570 : if(sizeof(int) < 4)
571 : {
572 : error("error: the ctrl variable needs to be at least 32 bits");
573 : return;
574 : }
575 1 : const int ctrl((1 << '\t') | (1 << '\r') | (1 << '\n'));
576 127 : for(size_t i(1); i <= 126; ++i)
577 : {
578 : // we skip all the special characters in a comment since
579 : // those are already tested somewhere else
580 126 : switch(i)
581 : {
582 3 : case '(': // avoid a sub-comment
583 : case ')': // avoid closing the comment mid-way
584 : case '\\': // tested somewhere else
585 : case 0x7F: // not included in the loop anyway
586 3 : break;
587 :
588 123 : default:
589 123 : if(i >= ' ' || (ctrl & (1 << i)) != 0)
590 : {
591 190 : tld_email_list list;
592 190 : std::string e("(Comment \"");
593 95 : e += static_cast<char>(i);
594 95 : e += "\" char.) alexis@m2osw.com";
595 95 : if(verbose)
596 : {
597 0 : printf("*** testing all atom characters with email \"%s\"\n", email_to_vstring(e).c_str());
598 0 : fflush(stdout);
599 : }
600 95 : tld_result r(list.parse(e, 0));
601 95 : if(r != TLD_RESULT_SUCCESS)
602 : {
603 0 : error("error: unexpected return value.");
604 : }
605 28 : }
606 123 : break;
607 :
608 : }
609 : }
610 : }
611 :
612 : {
613 : // all valid domain characters: "!" to "\x7E" except the [, ], and \ characters
614 95 : for(size_t i('!'); i <= 126; ++i)
615 : {
616 : // a dot is valid but we cannot test it between two other dots
617 94 : if(i == '[' || i == ']' || i == '\\' || i == '.')
618 : {
619 4 : continue;
620 : }
621 180 : tld_email_list list;
622 180 : std::string e("alexis@[ m2osw.");
623 90 : e += static_cast<char>(i);
624 90 : if(i == '%')
625 : {
626 1 : e += "25";
627 : }
628 90 : e += ".com\t]";
629 90 : if(verbose)
630 : {
631 0 : printf("*** testing all atom characters with email \"%s\"\n", email_to_vstring(e).c_str());
632 0 : fflush(stdout);
633 : }
634 90 : tld_result r(list.parse(e, 0));
635 90 : if(r != TLD_RESULT_SUCCESS)
636 : {
637 0 : error("error: unexpected return value while testing a domain with special character \"" + e + "\"");
638 : }
639 : }
640 : }
641 :
642 : {
643 1 : if(tld_email_list::quote_string("Test quoting a simple comment", '(') != "(Test quoting a simple comment)")
644 : {
645 0 : error("error: unexpected return value when testing a simple comment quotation");
646 : }
647 1 : if(tld_email_list::quote_string("Test (quoting) a complex )comment(", '(') != "(Test \\(quoting\\) a complex \\)comment\\()")
648 : {
649 0 : error("error: unexpected return value when testing a complex comment quotation");
650 : }
651 : }
652 : }
653 :
654 :
655 :
656 :
657 : /** \brief Define an invalid email.
658 : *
659 : * This structure is used to list invalid emails in order to test that such
660 : * emails are not accepted by the parser. The structure includes the expected
661 : * result as well as a string pointer to the invalid email.
662 : */
663 : struct invalid_email
664 : {
665 : /// The expected reslut, if the call does not return this exact value the test fails
666 : tld_result f_result;
667 : /// The pointer to the invalid email to be tested
668 : const char * f_input_email;
669 : };
670 :
671 : const invalid_email list_of_invalid_emails[] =
672 : {
673 : { TLD_RESULT_NULL, "alexism2osw.com (missing @)" },
674 : { TLD_RESULT_INVALID, "doug barbieri@m2osw.com\n \t (space in email address)" },
675 : { TLD_RESULT_NO_TLD, "doug_barbieri@m2osw com\n \t (space in email domain)" },
676 : { TLD_RESULT_NOT_FOUND, "doug_barbieri@m2osw.com org (space in email domain after dot)" },
677 : { TLD_RESULT_NOT_FOUND, "<doug_barbieri@m2osw.com org> (space in email domain after dot)" },
678 : { TLD_RESULT_INVALID, "<doug_barbieri@this sub domain.m2osw.com> (space in email domain after dot)" },
679 : { TLD_RESULT_INVALID, " \v alexis@m2osw.com\n \t (bad control)" },
680 : { TLD_RESULT_INVALID, " (* Pascal Comments *) \t alexis@m2osw.com\n (missing closing parenthesis\\)" },
681 : { TLD_RESULT_INVALID, "(Start-Comment)alexis@ \t [ \t m2osw.com \t ] \n (extra after domain done) \"more\tdata\" \r\n\t" },
682 : { TLD_RESULT_INVALID, "(Test with dots in user name) al.ex.is@ \t(missing closing bracket ]) [ \t m2osw.com \t " },
683 : { TLD_RESULT_NULL, "< (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (Missing >) \r\n\t" },
684 : { TLD_RESULT_INVALID, "(Full name with period) Alexis.Wilke < (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) > \r\n\t" },
685 : { TLD_RESULT_INVALID, " (Now a group:) This Group: (With full name) Alexis Wilke < \n alexis \t @ \t [ \t m2osw.com \t ] \n (missing ;) > \r\n\t" },
686 : { TLD_RESULT_INVALID, "Good Group: alexis@m2osw.com, bad-group: test@example.com;" },
687 : { TLD_RESULT_INVALID, "(No Group Name): alexis@m2osw.com;" },
688 : { TLD_RESULT_INVALID, " (No Group Name) : alexis@m2osw.com;" },
689 : { TLD_RESULT_INVALID, ": alexis@m2osw.com;" },
690 : { TLD_RESULT_INVALID, "(Group with CTRL) Group \v Unexpected: alexis@m2osw.com;" },
691 : { TLD_RESULT_INVALID, "\"alexis@m2osw.com;" },
692 : { TLD_RESULT_INVALID, "\"alexis@m2osw.com;\v\"" },
693 : { TLD_RESULT_NO_TLD, "alexis@m2osw.com, valid@group.com; alexis@m2osw.com, invalid@group" },
694 : { TLD_RESULT_NO_TLD, "alexis@m2osw.com, valid@group.com; alexis@m2osw.com, invalid@group;" },
695 : { TLD_RESULT_NOT_FOUND, "alexis@m2osw.com, valid@group.com; alexis@m2osw.com, invalid@unknown.tld" },
696 : { TLD_RESULT_NOT_FOUND, "alexis@m2osw.com, valid@group.com; alexis@m2osw.com, invalid@unknown.tld;" },
697 : { TLD_RESULT_INVALID, "\"Alexis Wilke\\" }, // \ followed by NUL
698 : { TLD_RESULT_INVALID, "(Comment with \\\\ followed by NUL: \\" },
699 : { TLD_RESULT_INVALID, "(Test Errors Once Done) \"Wilke, Alexis\" <alexis@m2osw.com> \"Bad\"" },
700 : { TLD_RESULT_INVALID, "(Comment with CTRL \b) \"Wilke, Alexis\" <alexis@m2osw.com>" },
701 : { TLD_RESULT_INVALID, "[m2osw.com]" }, // missing user name
702 : { TLD_RESULT_INVALID, "good@[bad-slash\\.com]" },
703 : { TLD_RESULT_INVALID, "good@[bad[reopen.com]" },
704 : { TLD_RESULT_INVALID, "good@[bad-duplicate.com] more.net" }, // two domains
705 : { TLD_RESULT_INVALID, "(Test Errors Once Done) \"Wilke, Alexis\" <alexis@m2osw.com> [Bad]" },
706 : { TLD_RESULT_INVALID, "(Test Errors Once Done) alexis@start[Bad]" },
707 : { TLD_RESULT_INVALID, "(Test Errors Once Done) alexis@[first][Bad]" },
708 : { TLD_RESULT_INVALID, "(Test Errors Once Done) alexis@[control:\v]" },
709 : { TLD_RESULT_NULL, "(Test Errors Once Done) alexis@[ spaces BAD]" },
710 : { TLD_RESULT_NULL, "(Wind Domain...) alexis@[ ]" },
711 : { TLD_RESULT_NULL, "(More Spaces Test) alexis@[no-left-trim no-right-trim]" },
712 : { TLD_RESULT_NULL, "(Dot Dot Dot Domain) alexis@[ . . . ]" },
713 : { TLD_RESULT_INVALID, "(Dot Only Domain) alexis@[ . ]" },
714 : { TLD_RESULT_INVALID, "(Spurious Angle) alexis>@m2osw.com" },
715 : { TLD_RESULT_INVALID, "(Spurious Angle) alexis@m2osw.com>" },
716 : { TLD_RESULT_INVALID, "(Double Angle) <alexis@m2osw.com>>" },
717 : { TLD_RESULT_NULL, "(Missing domain) <alexis@>" },
718 : { TLD_RESULT_NULL, "(Missing domain) alexis@" },
719 : { TLD_RESULT_INVALID, "(2 domains) <alexis@[m2osw.com]bad>" },
720 : { TLD_RESULT_INVALID, "(Double @) <alexis@m2osw.com> @" },
721 : { TLD_RESULT_INVALID, "(Double @) alexis@m2osw.com@" },
722 : { TLD_RESULT_INVALID, "(Extra Chars) <alexis@m2osw.com> bad" },
723 : { TLD_RESULT_NULL, "(Empty username within brackets) <@m2osw.com>" },
724 : { TLD_RESULT_NULL, "(Empty User Name) @m2osw.com" },
725 : { TLD_RESULT_INVALID, "(Cannot start with a dot) .alexis@m2osw.com" },
726 : { TLD_RESULT_INVALID, "(Cannot start with a dot) <.alexis@m2osw.com>" },
727 : { TLD_RESULT_INVALID, "(Cannot end with a dot) alexis.@m2osw.com" },
728 : { TLD_RESULT_INVALID, "(Cannot end with a dot) <alexis.@m2osw.com>" },
729 : { TLD_RESULT_INVALID, "(Cannot include double dots) ale..xis@m2osw.com" },
730 : { TLD_RESULT_NOT_FOUND, "(End domain with dot not considered valid!) alexis@m2osw.com." },
731 : { TLD_RESULT_INVALID, "(End domain with dot not considered valid!) <alexis@m2osw.com.>" },
732 : { TLD_RESULT_NULL, "(Bad Emails) alexis,m2osw.com" },
733 : { TLD_RESULT_INVALID, "(Bad Char) alexis@m2osw\001com" },
734 : { TLD_RESULT_NOT_FOUND, "(Bad Extension) alexis@m2osw.comm" },
735 : { TLD_RESULT_INVALID, "(Bad Extension) alexis@m2osw.ar" },
736 : { TLD_RESULT_INVALID, "(Bad Extension) alexis@m2osw.nom.ar" },
737 : { TLD_RESULT_NO_TLD, "(Bad Extension) alexis@m2osw" },
738 : { TLD_RESULT_INVALID, "(Bad Extension) alexis@[m2osw..com]" },
739 :
740 : // end of list
741 : { TLD_RESULT_SUCCESS, nullptr }
742 : };
743 :
744 1 : void test_invalid_emails()
745 : {
746 67 : for(const invalid_email *v(list_of_invalid_emails); v->f_input_email != nullptr; ++v)
747 : {
748 66 : if(verbose)
749 : {
750 0 : printf("+++ testing email \"%s\"\n", email_to_vstring(v->f_input_email).c_str());
751 : }
752 :
753 : // C++ test
754 : {
755 132 : tld_email_list list;
756 66 : tld_result r(list.parse(v->f_input_email, 0));
757 66 : if(r != v->f_result)
758 : {
759 0 : std::stringstream ss;
760 0 : ss << "error: unexpected return value. Got " << static_cast<int>(r) << ", expected " << static_cast<int>(v->f_result) << " for \"" << v->f_input_email << "\" (C++)";
761 0 : error(ss.str());
762 : }
763 : }
764 :
765 : // C test
766 : {
767 : tld_email_list *list;
768 66 : list = tld_email_alloc();
769 66 : tld_result r = tld_email_parse(list, v->f_input_email, 0);
770 66 : if(r != v->f_result)
771 : {
772 0 : std::stringstream ss;
773 0 : ss << "error: unexpected return value. Got " << static_cast<int>(r) << ", expected " << static_cast<int>(v->f_result) << " for \"" << v->f_input_email << "\" (C)";
774 0 : error(ss.str());
775 : }
776 66 : tld_email_free(list);
777 66 : list = nullptr;
778 : }
779 : }
780 1 : }
781 :
782 :
783 6 : void contract_furfilled(tld_email_list::tld_email_t & e)
784 : {
785 12 : if(!e.f_group.empty()
786 6 : || !e.f_original_email.empty()
787 6 : || !e.f_fullname.empty()
788 6 : || !e.f_username.empty()
789 6 : || !e.f_domain.empty()
790 6 : || !e.f_email_only.empty()
791 12 : || !e.f_canonicalized_email.empty())
792 : {
793 0 : error("error: one of the structure parameters was modified on error!");
794 : }
795 6 : }
796 :
797 :
798 1 : void test_direct_email()
799 : {
800 2 : tld_email_list::tld_email_t email;
801 :
802 : ////////////// EMAILS
803 : // missing closing \"
804 2 : EXPECTED_THROW(email.parse("\"blah alexis@m2osw.com"), std::logic_error);
805 1 : contract_furfilled(email);
806 :
807 : // missing closing )
808 2 : EXPECTED_THROW(email.parse("(comment alexis@m2osw.com"), std::logic_error);
809 1 : contract_furfilled(email);
810 :
811 : // use of \ at the end of the comment
812 2 : EXPECTED_THROW(email.parse("(comment\\"), std::logic_error);
813 1 : contract_furfilled(email);
814 :
815 : // missing closing ]
816 2 : EXPECTED_THROW(email.parse("alexis@[m2osw.com"), std::logic_error);
817 1 : contract_furfilled(email);
818 :
819 : ////////////// GROUP
820 : // missing closing )
821 2 : EXPECTED_THROW(email.parse_group("Group (comment"), std::logic_error);
822 1 : contract_furfilled(email);
823 :
824 : // use of \ at the end of the comment
825 2 : EXPECTED_THROW(email.parse_group("Group (comment \\"), std::logic_error);
826 1 : contract_furfilled(email);
827 1 : }
828 :
829 :
830 :
831 : /** \brief Structure used to define a set of fields to test.
832 : *
833 : * This structure is used in this test to define a list of fields
834 : * to test against the library.
835 : */
836 : struct email_field_types
837 : {
838 : const char * f_field;
839 : tld_email_field_type f_type;
840 : };
841 :
842 : /** \var email_field_types::f_field
843 : * \brief The name of the field to be tested.
844 : */
845 : /** \var email_field_types::f_type
846 : * \brief The type we expect the library to return for that field.
847 : */
848 :
849 : const email_field_types list_of_email_field_types[] =
850 : {
851 : // make sure case does not have side effects
852 : { "to", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
853 : { "To", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
854 : { "tO", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
855 : { "TO", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
856 :
857 : // check all fields that are expected to include emails
858 : { "from", TLD_EMAIL_FIELD_TYPE_MAILBOX_LIST },
859 : { "resent-from", TLD_EMAIL_FIELD_TYPE_MAILBOX_LIST },
860 : { "sender", TLD_EMAIL_FIELD_TYPE_MAILBOX },
861 : { "resent-sender", TLD_EMAIL_FIELD_TYPE_MAILBOX },
862 : { "to", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
863 : { "cc", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
864 : { "reply-to", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
865 : { "resent-to", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
866 : { "resent-cc", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
867 : { "bcc", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST_OPT },
868 : { "resent-bcc", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST_OPT },
869 :
870 : // check all fields with a colon
871 : { "from: someone", TLD_EMAIL_FIELD_TYPE_MAILBOX_LIST },
872 : { "resent-from: someone", TLD_EMAIL_FIELD_TYPE_MAILBOX_LIST },
873 : { "sender: someone", TLD_EMAIL_FIELD_TYPE_MAILBOX },
874 : { "resent-sender: someone", TLD_EMAIL_FIELD_TYPE_MAILBOX },
875 : { "to: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
876 : { "cc: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
877 : { "reply-to: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
878 : { "resent-to: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
879 : { "resent-cc: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
880 : { "bcc: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST_OPT },
881 : { "resent-bcc: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST_OPT },
882 :
883 : // check other fields
884 : { "message-id", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
885 : { "date", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
886 : { "subject", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
887 : { "x-extension", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
888 :
889 : // check other fields with a colon
890 : { "message-id: something", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
891 : { "date: something", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
892 : { "subject: something", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
893 : { "x-extension: something", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
894 :
895 : // check for invalid field names
896 : { "s\xfc\x62ject", TLD_EMAIL_FIELD_TYPE_INVALID },
897 : { "subj\xe9\x63t", TLD_EMAIL_FIELD_TYPE_INVALID },
898 : { "-bad-dash", TLD_EMAIL_FIELD_TYPE_INVALID },
899 : { "0bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
900 : { "1bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
901 : { "2bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
902 : { "3bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
903 : { "4bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
904 : { "5bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
905 : { "6bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
906 : { "7bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
907 : { "8bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
908 : { "9bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
909 : { "" /*empty*/, TLD_EMAIL_FIELD_TYPE_INVALID },
910 : };
911 :
912 1 : void test_email_field_types()
913 : {
914 49 : for(size_t i(0); i < sizeof(list_of_email_field_types) / sizeof(list_of_email_field_types[0]); ++i)
915 : {
916 48 : tld_email_field_type type(tld_email_list::email_field_type(list_of_email_field_types[i].f_field));
917 48 : if(type != list_of_email_field_types[i].f_type)
918 : {
919 0 : std::stringstream ss;
920 0 : ss << "error: email type mismatch for \"" << list_of_email_field_types[i].f_field
921 0 : << "\", expected " << static_cast<int>(list_of_email_field_types[i].f_type)
922 0 : << ", got " << static_cast<int>(type) << " instead.";
923 0 : error(ss.str());
924 : }
925 : }
926 1 : }
927 :
928 :
929 :
930 1 : int main(int argc, char *argv[])
931 : {
932 1 : printf("testing tld emails version %s\n", tld_version());
933 :
934 1 : if(argc > 1)
935 : {
936 0 : if(strcmp(argv[1], "-v") == 0)
937 : {
938 0 : verbose = 1;
939 : }
940 : }
941 :
942 : /* Call all the tests, one by one.
943 : *
944 : * Failures are "recorded" in the err_count global variable
945 : * and the process stops with an error message and exit(1)
946 : * if err_count is not zero.
947 : *
948 : * Exceptions that should not occur are expected to also
949 : * be caught and reported as errors.
950 : */
951 : try
952 : {
953 1 : test_valid_emails();
954 1 : test_invalid_emails();
955 1 : test_direct_email();
956 1 : test_email_field_types();
957 : }
958 0 : catch(const invalid_domain&)
959 : {
960 0 : error("error: caught an exception when all emails are expected to be valid.");
961 : }
962 :
963 1 : if(err_count)
964 : {
965 0 : fprintf(stderr, "%d error%s occured.\n",
966 0 : err_count, err_count != 1 ? "s" : "");
967 : }
968 1 : exit(err_count ? 1 : 0);
969 : }
970 :
971 : /* vim: ts=4 sw=4 et
972 : */
|