Line data Source code
1 : // Copyright (c) 2006-2022 Made to Order Software Corp. All Rights Reserved
2 : //
3 : // https://snapwebsites.org/project/advgetopt
4 : // contact@m2osw.com
5 : //
6 : // This program is free software; you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation; either version 2 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License along
17 : // with this program; if not, write to the Free Software Foundation, Inc.,
18 : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 :
20 : // murmur3
21 : //
22 : #include <murmur3/stream.h>
23 :
24 : #include <murmur3/murmur3.h>
25 :
26 :
27 : // self
28 : //
29 : #include "catch_main.h"
30 :
31 :
32 : // snapdev
33 : //
34 : #include <snapdev/file_contents.h>
35 : #include <snapdev/glob_to_list.h>
36 : #include <snapdev/math.h>
37 : #include <snapdev/ostream_int128.h>
38 :
39 :
40 : // C++
41 : //
42 : #include <iomanip>
43 :
44 :
45 : // C
46 : //
47 : #include <sys/random.h>
48 :
49 :
50 : // last include
51 : //
52 : #include <snapdev/poison.h>
53 :
54 :
55 :
56 5 : CATCH_TEST_CASE("stream_basic", "[stream][valid]")
57 : {
58 6 : CATCH_START_SECTION("Verify small stream (Hello World!)")
59 : {
60 1 : murmur3::stream sum(123, 123);
61 :
62 1 : murmur3::seed_t seed1(0);
63 1 : murmur3::seed_t seed2(0);
64 1 : sum.get_seeds(seed1, seed2);
65 1 : CATCH_CHECK(seed1 == 123);
66 1 : CATCH_CHECK(seed2 == 123);
67 :
68 2 : std::string const hello_world("Hello, world!");
69 1 : sum.add_data(hello_world.c_str(), hello_world.length());
70 1 : CATCH_CHECK(sum.flush().to_string() == "8743acad421c8c73d373c3f5f19732fd");
71 :
72 1 : CATCH_CHECK(murmur3::sum(hello_world.c_str(), hello_world.length(), 123).to_string() == "8743acad421c8c73d373c3f5f19732fd");
73 :
74 : // verify the reset()
75 : //
76 1 : sum.reset();
77 :
78 1 : sum.add_data(hello_world.c_str(), hello_world.length());
79 1 : CATCH_CHECK(sum.flush().to_string() == "8743acad421c8c73d373c3f5f19732fd");
80 :
81 : // verify different seed
82 : //
83 1 : sum.reset(321);
84 :
85 1 : sum.add_data(hello_world.c_str(), hello_world.length());
86 1 : CATCH_CHECK(sum.flush().to_string() == "f86d4004ca47f42bb9546c7979200aee");
87 :
88 1 : CATCH_CHECK(murmur3::sum(hello_world.c_str(), hello_world.length(), 321).to_string() == "f86d4004ca47f42bb9546c7979200aee");
89 : }
90 : CATCH_END_SECTION()
91 :
92 6 : CATCH_START_SECTION("X28 stream")
93 : {
94 1 : murmur3::stream sum(123, 123);
95 :
96 1 : murmur3::seed_t seed1(0);
97 1 : murmur3::seed_t seed2(0);
98 1 : sum.get_seeds(seed1, seed2);
99 1 : CATCH_CHECK(seed1 == 123);
100 1 : CATCH_CHECK(seed2 == 123);
101 :
102 2 : std::string const x28("xxxxxxxxxxxxxxxxxxxxxxxxxxxx");
103 1 : sum.add_data(x28.c_str(), x28.length());
104 1 : CATCH_CHECK(sum.flush().to_string() == "becf7e04dbcf74637751664ef66e73e0");
105 :
106 1 : CATCH_CHECK(murmur3::sum(x28.c_str(), x28.length(), 123).to_string() == "becf7e04dbcf74637751664ef66e73e0");
107 : }
108 : CATCH_END_SECTION()
109 :
110 6 : CATCH_START_SECTION("Empty file stream")
111 : {
112 1 : murmur3::stream sum(123, 123);
113 :
114 1 : murmur3::seed_t seed1(0);
115 1 : murmur3::seed_t seed2(0);
116 1 : sum.get_seeds(seed1, seed2);
117 1 : CATCH_CHECK(seed1 == 123);
118 1 : CATCH_CHECK(seed2 == 123);
119 :
120 2 : std::string const empty("");
121 1 : sum.add_data(empty.c_str(), empty.length());
122 1 : CATCH_CHECK(sum.flush().to_string() == "4cd9597081679d1abd92f8784bace33d");
123 :
124 1 : CATCH_CHECK(murmur3::sum(empty.c_str(), empty.length(), 123).to_string() == "4cd9597081679d1abd92f8784bace33d");
125 : }
126 : CATCH_END_SECTION()
127 3 : }
128 :
129 :
130 8 : CATCH_TEST_CASE("stream_file", "[stream][valid]")
131 : {
132 12 : CATCH_START_SECTION("Stream files")
133 : {
134 2 : snapdev::glob_to_list<std::list<snapdev::file>> glob;
135 1 : CATCH_REQUIRE(glob.read_path<
136 : snapdev::glob_to_list_flag_t::GLOB_FLAG_IGNORE_ERRORS,
137 : snapdev::glob_to_list_flag_t::GLOB_FLAG_PERIOD>("..."));
138 :
139 208 : for(auto const & f : glob)
140 : {
141 207 : if(!f.is_regular_file())
142 : {
143 : // skip directories and special files
144 : //
145 85 : continue;
146 : }
147 122 : if(SNAP_CATCH2_NAMESPACE::g_verbose())
148 : {
149 0 : std::cout << "-- testing with file \"" << f.filename() << "\".\n";
150 : }
151 :
152 244 : snapdev::file_contents file(f.filename());
153 122 : CATCH_REQUIRE(file.read_all());
154 122 : std::string const & contents(file.contents());
155 :
156 : // first compute the hash with the basic C function
157 : //
158 122 : std::uint32_t hash[4];
159 122 : MurmurHash3_x64_128(contents.c_str(), contents.length(), 0, hash);
160 244 : std::string c_hash(SNAP_CATCH2_NAMESPACE::hex128(hash));
161 :
162 : // second compute the same hash with the stream function 4Kb at
163 : // a time so we should hit all possible cases
164 : //
165 122 : constexpr std::size_t four_kb(1024 * 4);
166 122 : murmur3::stream sum(0);
167 122 : std::size_t const nblocks(contents.length() / four_kb);
168 122 : std::size_t const left_over(contents.length() % four_kb);
169 4171 : for(std::size_t b(0); b < nblocks; ++b)
170 : {
171 4049 : sum.add_data(contents.c_str() + b * four_kb, four_kb);
172 : }
173 122 : if(left_over != 0)
174 : {
175 122 : sum.add_data(contents.c_str() + nblocks * four_kb, left_over);
176 : }
177 :
178 122 : CATCH_CHECK(sum.flush().to_string() == c_hash);
179 :
180 : // test the helper function that computes the murmur3 of a file
181 : // using the streaming mechanism (to avoid loading the file in
182 : // memory all at once)
183 : //
184 122 : CATCH_CHECK(murmur3::sum(f.filename()).to_string() == c_hash);
185 : }
186 : }
187 : CATCH_END_SECTION()
188 :
189 12 : CATCH_START_SECTION("Stream with random seed")
190 : {
191 1 : std::size_t size(0);
192 1 : getrandom(&size, sizeof(size), 0);
193 1 : size %= 256;
194 1 : size += 25; // 25 to 280
195 :
196 1 : char buf[size];
197 1 : getrandom(buf, sizeof(buf), 0);
198 :
199 1 : murmur3::stream sum;
200 1 : sum.add_data(buf, sizeof(buf));
201 :
202 1 : murmur3::seed_t seed1(0);
203 1 : murmur3::seed_t seed2(0);
204 1 : sum.get_seeds(seed1, seed2);
205 :
206 1 : std::uint32_t hash[4];
207 1 : MurmurHash3_x64_128_128(buf, sizeof(buf), seed1, seed2, hash);
208 2 : std::string const c_hash(SNAP_CATCH2_NAMESPACE::hex128(hash));
209 :
210 2 : CATCH_CHECK(sum.flush().to_string() == c_hash);
211 : }
212 : CATCH_END_SECTION()
213 :
214 12 : CATCH_START_SECTION("Stream 4Kb exactly (special case)")
215 : {
216 1 : constexpr std::size_t four_kb(1024 * 4);
217 1 : char buf[four_kb];
218 1 : getrandom(buf, sizeof(buf), 0);
219 :
220 : // first compute the hash with the basic C function
221 : //
222 1 : std::uint32_t hash[4];
223 1 : MurmurHash3_x64_128(buf, sizeof(buf), 0, hash);
224 2 : std::string c_hash(SNAP_CATCH2_NAMESPACE::hex128(hash));
225 :
226 : // second compute the same hash with the stream function 4Kb at
227 : // a time so we should hit all possible cases
228 : //
229 1 : murmur3::stream sum(0);
230 1 : sum.add_data(buf, sizeof(buf));
231 1 : CATCH_CHECK(sum.flush().to_string() == c_hash);
232 :
233 : // test the helper function that computes the murmur3 of a file
234 : // using the streaming mechanism (to avoid loading the file in
235 : // memory all at once)
236 : //
237 2 : std::string const tmp_dir(SNAP_CATCH2_NAMESPACE::g_tmp_dir());
238 2 : std::string const test_filename(tmp_dir + "/4kb.bin");
239 2 : std::ofstream out(test_filename);
240 1 : out.write(buf, sizeof(buf));
241 1 : CATCH_CHECK(murmur3::sum(test_filename).to_string() == c_hash);
242 : }
243 : CATCH_END_SECTION()
244 :
245 12 : CATCH_START_SECTION("Stream 0Kb to 4Kb (many special cases)")
246 : {
247 1 : constexpr std::size_t four_kb(1024 * 4);
248 4097 : for(std::size_t size(0); size < four_kb; ++size)
249 : {
250 4096 : char buf[size];
251 4096 : getrandom(buf, sizeof(buf), 0);
252 :
253 : // first compute the hash with the basic C function
254 : //
255 4096 : std::uint32_t hash[4];
256 4096 : MurmurHash3_x64_128(buf, sizeof(buf), 0, hash);
257 8192 : std::string c_hash(SNAP_CATCH2_NAMESPACE::hex128(hash));
258 :
259 : // second compute the same hash with the stream function 4Kb at
260 : // a time so we should hit all possible cases
261 : //
262 4096 : murmur3::stream sum(0);
263 4096 : sum.add_data(buf, sizeof(buf));
264 4096 : CATCH_CHECK(sum.flush().to_string() == c_hash);
265 :
266 4096 : murmur3::hash expected_hash;
267 4096 : expected_hash.set(reinterpret_cast<std::uint8_t const *>(hash));
268 : //std::cerr << "-- full hash: " << c_hash << " -> " << expected_hash.to_string() << "\n";
269 4096 : CATCH_CHECK(sum.flush() == expected_hash);
270 4096 : CATCH_CHECK(sum.flush() <= expected_hash);
271 4096 : CATCH_CHECK(sum.flush() >= expected_hash);
272 :
273 4096 : murmur3::hash unexpected_hash;
274 4096 : std::uint32_t unhash[4] = { ~hash[0], ~hash[1], ~hash[2], ~hash[3] };
275 4096 : unexpected_hash.set(reinterpret_cast<std::uint8_t const *>(unhash));
276 4096 : CATCH_CHECK(sum.flush() != unexpected_hash);
277 :
278 4096 : murmur3::hash_t const value(expected_hash.to_uint128());
279 4096 : murmur3::hash_t const unvalue(unexpected_hash.to_uint128());
280 4096 : if(snapdev::bswap_128(value) < snapdev::bswap_128(unvalue))
281 : {
282 2004 : CATCH_CHECK(sum.flush() < unexpected_hash);
283 2004 : CATCH_CHECK_FALSE(sum.flush() > unexpected_hash);
284 : }
285 : else
286 : {
287 2092 : CATCH_CHECK_FALSE(sum.flush() < unexpected_hash);
288 2092 : CATCH_CHECK(sum.flush() > unexpected_hash);
289 : }
290 :
291 : // test the helper function that computes the murmur3 of a file
292 : // using the streaming mechanism (to avoid loading the file in
293 : // memory all at once)
294 : //
295 8192 : std::string const tmp_dir(SNAP_CATCH2_NAMESPACE::g_tmp_dir());
296 8192 : std::string const test_filename(tmp_dir + "/" + std::to_string(size) + "_bytes.bin");
297 : {
298 8192 : std::ofstream out(test_filename);
299 4096 : out.write(buf, sizeof(buf));
300 : }
301 : //std::cerr << "-- test with file [" << test_filename << "] -> " << murmur3::sum(test_filename).to_string() << "\n";
302 4096 : CATCH_CHECK(murmur3::sum(test_filename).to_string() == c_hash);
303 4096 : }
304 : }
305 : CATCH_END_SECTION()
306 :
307 12 : CATCH_START_SECTION("1 to 15 bytes sent first")
308 : {
309 : // test all possibilities in the switch()
310 : //
311 16 : for(std::size_t size(1); size < 16; ++size)
312 : {
313 15 : char buf[size + 16];
314 15 : getrandom(buf, sizeof(buf), 0);
315 :
316 : // first compute the hash with the basic C function
317 : //
318 15 : std::uint32_t hash[4];
319 15 : MurmurHash3_x64_128(buf, sizeof(buf), 0, hash);
320 30 : std::string c_hash(SNAP_CATCH2_NAMESPACE::hex128(hash));
321 :
322 : // second compute the same hash with the stream function
323 : // starting with a few bytes
324 : //
325 15 : murmur3::stream sum(0);
326 15 : sum.add_data(buf, size);
327 15 : sum.add_data(buf + size, 16);
328 15 : CATCH_CHECK(sum.flush().to_string() == c_hash);
329 15 : }
330 : }
331 : CATCH_END_SECTION()
332 :
333 12 : CATCH_START_SECTION("Irregular number of incoming bytes")
334 : {
335 : // test all possibilities in the switch()
336 : //
337 26 : for(std::size_t count(0); count < 25; ++count)
338 : {
339 25 : std::size_t size(0);
340 25 : getrandom(&size, sizeof(size), 0);
341 25 : size %= 256;
342 25 : size += 25; // 25 to 280
343 :
344 25 : char buf[size];
345 25 : getrandom(buf, sizeof(buf), 0);
346 :
347 : // first compute the hash with the basic C function
348 : //
349 25 : std::uint32_t hash[4];
350 25 : MurmurHash3_x64_128(buf, sizeof(buf), 0, hash);
351 50 : std::string c_hash(SNAP_CATCH2_NAMESPACE::hex128(hash));
352 :
353 25 : murmur3::stream sum(0);
354 25 : std::size_t pos(0);
355 1041 : while(pos < size)
356 : {
357 508 : std::size_t incr(0);
358 508 : getrandom(&incr, sizeof(incr), 0);
359 508 : incr %= 15;
360 508 : ++incr; // 1 to 15
361 508 : if(pos + incr > size)
362 : {
363 19 : incr = size - pos;
364 : }
365 :
366 508 : sum.add_data(buf + pos, incr);
367 :
368 508 : pos += incr;
369 : }
370 25 : CATCH_CHECK(sum.flush().to_string() == c_hash);
371 25 : }
372 : }
373 : CATCH_END_SECTION()
374 6 : }
375 :
376 :
377 4 : CATCH_TEST_CASE("hash", "[hash][valid]")
378 : {
379 4 : CATCH_START_SECTION("Hash validation")
380 : {
381 1 : murmur3::hash_t value;
382 1 : getrandom(&value, sizeof(value), 0);
383 1 : murmur3::hash h;
384 1 : h.set(reinterpret_cast<std::uint8_t const *>(&value));
385 1 : CATCH_CHECK(value == h.to_uint128());
386 :
387 1 : murmur3::hash_t verify;
388 1 : memcpy(&verify, h.get(), murmur3::HASH_SIZE);
389 1 : CATCH_CHECK(value == verify);
390 :
391 2 : std::stringstream ss;
392 1 : ss << std::hex << std::setfill('0') << std::setw(8) << static_cast<std::uint32_t>(value);
393 1 : ss << std::hex << std::setfill('0') << std::setw(8) << static_cast<std::uint32_t>(value >> 32);
394 1 : ss << std::hex << std::setfill('0') << std::setw(8) << static_cast<std::uint32_t>(value >> 64);
395 1 : ss << std::hex << std::setfill('0') << std::setw(8) << static_cast<std::uint32_t>(value >> 96);
396 1 : CATCH_CHECK(ss.str() == h.to_string());
397 :
398 1 : murmur3::hash h2;
399 1 : h2.from_string(ss.str());
400 1 : CATCH_CHECK(value == h2.to_uint128());
401 : }
402 : CATCH_END_SECTION()
403 :
404 4 : CATCH_START_SECTION("Hash starts with zeroes")
405 : {
406 1 : murmur3::hash_t value;
407 1 : getrandom(&value, sizeof(value), 0);
408 1 : value &= ~static_cast<murmur3::hash_t>(0xFFFFFFFF);
409 1 : murmur3::hash h;
410 1 : h.set(reinterpret_cast<std::uint8_t const *>(&value));
411 1 : CATCH_CHECK(value == h.to_uint128());
412 :
413 2 : std::stringstream ss;
414 1 : ss << std::hex << std::setfill('0') << std::setw(8) << static_cast<std::uint32_t>(value);
415 1 : ss << std::hex << std::setfill('0') << std::setw(8) << static_cast<std::uint32_t>(value >> 32);
416 1 : ss << std::hex << std::setfill('0') << std::setw(8) << static_cast<std::uint32_t>(value >> 64);
417 1 : ss << std::hex << std::setfill('0') << std::setw(8) << static_cast<std::uint32_t>(value >> 96);
418 1 : CATCH_CHECK(ss.str() == h.to_string());
419 :
420 2 : std::string const without_the_zeroes(ss.str().substr(8));
421 1 : murmur3::hash zh;
422 1 : zh.from_string(without_the_zeroes);
423 1 : CATCH_CHECK(value == zh.to_uint128());
424 : }
425 : CATCH_END_SECTION()
426 2 : }
427 :
428 :
429 5 : CATCH_TEST_CASE("invalid_hash", "[hash][invalid]")
430 : {
431 6 : CATCH_START_SECTION("Invalid Hash (odd size)")
432 : {
433 1 : murmur3::hash h;
434 :
435 : // invalid characters
436 : //
437 1 : CATCH_REQUIRE_THROWS_MATCHES(
438 : h.from_string("abc")
439 : , snapdev::hexadecimal_string_invalid_parameter
440 : , Catch::Matchers::ExceptionMessage(
441 : "hexadecimal_string_exception: the hex parameter"
442 : " must have an even size."));
443 : }
444 : CATCH_END_SECTION()
445 :
446 6 : CATCH_START_SECTION("Invalid Hash (not hex. characters)")
447 : {
448 1 : murmur3::hash h;
449 :
450 : // invalid characters
451 : //
452 1 : CATCH_REQUIRE_THROWS_MATCHES(
453 : h.from_string("abcdefghijkl")
454 : , snapdev::hexadecimal_string_invalid_parameter
455 : , Catch::Matchers::ExceptionMessage(
456 : "hexadecimal_string_exception: input character"
457 : " 'g' is not an hexadecimal digit."));
458 : }
459 : CATCH_END_SECTION()
460 :
461 6 : CATCH_START_SECTION("Invalid Hash (too long)")
462 : {
463 1 : murmur3::hash h;
464 :
465 : // invalid characters
466 : //
467 1 : CATCH_REQUIRE_THROWS_MATCHES(
468 : h.from_string("1111111111111111111111111111111111")
469 : , snapdev::hexadecimal_string_invalid_parameter
470 : , Catch::Matchers::ExceptionMessage(
471 : "hexadecimal_string_exception: \"1111111111111111111111111111111111\" is not a valid 128 bit murmur3 hash value; it is too long."));
472 : }
473 : CATCH_END_SECTION()
474 9 : }
475 :
476 :
477 : // vim: ts=4 sw=4 et
|