Line data Source code
1 : // Snap Websites Server -- compress (decompress) data
2 : // Copyright (c) 2013-2019 Made to Order Software Corp. All Rights Reserved
3 : //
4 : // This program is free software; you can redistribute it and/or modify
5 : // it under the terms of the GNU General Public License as published by
6 : // the Free Software Foundation; either version 2 of the License, or
7 : // (at your option) any later version.
8 : //
9 : // This program is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : // GNU General Public License for more details.
13 : //
14 : // You should have received a copy of the GNU General Public License
15 : // along with this program; if not, write to the Free Software
16 : // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 :
18 : // self
19 : //
20 : #include "snapwebsites/compression.h"
21 :
22 : // self lib
23 : //
24 : #include "snapwebsites/log.h"
25 :
26 : // snapdev lib
27 : //
28 : #include <snapdev/not_used.h>
29 :
30 : // Qt lib
31 : //
32 : #include <QMap>
33 :
34 : // C lib
35 : //
36 : #pragma GCC diagnostic push
37 : #pragma GCC diagnostic ignored "-Wold-style-cast"
38 : #include <zlib.h>
39 : #pragma GCC diagnostic pop
40 :
41 : // last include
42 : //
43 : #include <snapdev/poison.h>
44 :
45 :
46 : namespace snap
47 : {
48 : namespace compression
49 : {
50 :
51 : namespace
52 : {
53 : typedef QMap<QString, compressor_t *> compressor_map_t;
54 : typedef QMap<QString, archiver_t *> archiver_map_t;
55 :
56 : // IMPORTANT NOTE:
57 : // This list only makes use of bare pointers for many good reasons.
58 : // (i.e. all compressors are defined statitcally, not allocated)
59 : // Do not try to change it! Thank you.
60 : compressor_map_t * g_compressors;
61 :
62 : // IMPORTANT NOTE:
63 : // This list only makes use of bare pointers for many good reasons.
64 : // (i.e. all archivers are defined statitcally, not allocated)
65 : // Do not try to change it! Thank you.
66 : archiver_map_t * g_archivers;
67 :
68 0 : int bound_level(int level, int min, int max)
69 : {
70 0 : return level < min ? min : (level > max ? max : level);
71 : }
72 :
73 : }
74 :
75 :
76 : /** \brief Special compressor name to get the best compression available.
77 : *
78 : * Whenever we send a page on the Internet, we can compress it with zlib
79 : * (gzip, really). However, more and more, browsers are starting to support
80 : * other compressors. For example, Chrome supports "sdch" (a vcdiff
81 : * compressor) and FireFox is testing with lzma.
82 : *
83 : * Using the name "best" for the compressor will test with all available
84 : * compressions and return the smallest result whatever it is.
85 : */
86 : char const *compressor_t::BEST_COMPRESSION = "best";
87 :
88 :
89 : /** \brief Special compressor name returned in some cases.
90 : *
91 : * When trying to compress a buffer, there are several reasons why the
92 : * compression may "fail". When that happens the result is the same
93 : * as the input, meaning that the data is not going to be compressed
94 : * at all.
95 : *
96 : * You should always verify whether the compression worked by testing
97 : * the compressor_name variable on return.
98 : */
99 : char const *compressor_t::NO_COMPRESSION = "none";
100 :
101 :
102 : /** \brief Register the compressor.
103 : *
104 : * Whenever you implement a compressor, the constructor must call
105 : * this constructor with the name of the compressor. Remember that
106 : * the get_name() function is NOT ready from this constructor which
107 : * is why we require you to specify the name in the constructor.
108 : *
109 : * This function registers the compressor in the internal list of
110 : * compressors and then returns.
111 : *
112 : * \param[in] name The name of the compressor.
113 : */
114 4 : compressor_t::compressor_t(char const * name)
115 : {
116 4 : if(g_compressors == nullptr)
117 : {
118 2 : g_compressors = new compressor_map_t;
119 : }
120 4 : (*g_compressors)[name] = this;
121 4 : }
122 :
123 :
124 : /** \brief Clean up the compressor.
125 : *
126 : * This function unregisters the compressor. Note that it is expected
127 : * that compressors get destroyed on exit only as they are expected
128 : * to be implemented and defined statically.
129 : */
130 4 : compressor_t::~compressor_t()
131 : {
132 : // TBD we probably don't need this code...
133 : // it is rather slow so why waste our time on exit?
134 : //for(compressor_map_t::iterator
135 : // it(g_compressors->begin());
136 : // it != g_compressors->end();
137 : // ++it)
138 : //{
139 : // if(*it == this)
140 : // {
141 : // g_compressors->erase(it);
142 : // break;
143 : // }
144 : //}
145 : //delete g_compressors;
146 : //g_compressors = nullptr;
147 4 : }
148 :
149 :
150 : /** \brief Return a list of available compressors.
151 : *
152 : * In case you have more than one Accept-Encoding this list may end up being
153 : * helpful to know whether a compression is available or not.
154 : *
155 : * \return A list of all the available compressors.
156 : */
157 0 : snap_string_list compressor_list()
158 : {
159 0 : snap_string_list list;
160 0 : for(compressor_map_t::const_iterator
161 0 : it(g_compressors->begin());
162 0 : it != g_compressors->end();
163 : ++it)
164 : {
165 0 : list.push_back((*it)->get_name());
166 : }
167 0 : return list;
168 : }
169 :
170 :
171 : /** \brief Return a pointer to the named compressor.
172 : *
173 : * This function checks whether a compressor named \p compressor_name
174 : * exists, if so it gets returned, otherwise a null pointer is returned.
175 : *
176 : * \param[in] compressor_name The name of the concerned compressor.
177 : *
178 : * \return A pointer to a compressor_t object or nullptr.
179 : */
180 0 : compressor_t * get_compressor(QString const & compressor_name)
181 : {
182 0 : if(g_compressors != nullptr)
183 : {
184 0 : if(g_compressors->contains(compressor_name))
185 : {
186 0 : return (*g_compressors)[compressor_name];
187 : }
188 : }
189 :
190 0 : return nullptr;
191 : }
192 :
193 :
194 : /** \brief Compress the input buffer.
195 : *
196 : * This function compresses the input buffer and returns the result
197 : * in a copy.
198 : *
199 : * IMPORTANT NOTE:
200 : *
201 : * There are several reasons why the compressor may refuse compressing
202 : * your input buffer and return the input as is. When this happens the
203 : * name of the compressor is changed to NO_COMPRESSION.
204 : *
205 : * \li The input is empty.
206 : * \li The input buffer is too small for that compressor.
207 : * \li The level is set to a value under 5%.
208 : * \li The buffer is way too large and allocating the compression buffer
209 : * failed (this should never happen on a serious server!)
210 : * \li The named compressor does not exist.
211 : *
212 : * Again, if the compression fails for whatever reason, the compressor_name
213 : * is set to NO_COMPRESSION. You have to make sure to test that name on
214 : * return to know what worked and what failed.
215 : *
216 : * \param[in,out] compressor_name The name of the compressor to use.
217 : * \param[in] input The input buffer which has to be compressed.
218 : * \param[in] level The level of compression (0 to 100).
219 : * \param[in] text Whether the input is text, set to false if not sure.
220 : *
221 : * \return A byte array with the compressed input data.
222 : */
223 0 : QByteArray compress(QString & compressor_name, QByteArray const & input, level_t level, bool text)
224 : {
225 : // clamp the level, just in case
226 : //
227 0 : if(level < 0)
228 : {
229 0 : level = 0;
230 : }
231 0 : else if(level > 100)
232 : {
233 0 : level = 100;
234 : }
235 :
236 : // nothing to compress if empty or too small a level
237 0 : if(input.size() == 0 || level < 5)
238 : {
239 : #ifdef DEBUG
240 0 : SNAP_LOG_TRACE("nothing to compress");
241 : #endif
242 0 : compressor_name = compressor_t::NO_COMPRESSION;
243 0 : return input;
244 : }
245 :
246 0 : if(compressor_name == compressor_t::BEST_COMPRESSION)
247 : {
248 0 : QByteArray best;
249 0 : for(compressor_map_t::const_iterator
250 0 : it(g_compressors->begin());
251 0 : it != g_compressors->end();
252 : ++it)
253 : {
254 0 : if(best.size() == 0)
255 : {
256 0 : best = (*it)->compress(input, level, text);
257 0 : compressor_name = (*it)->get_name();
258 : }
259 : else
260 : {
261 0 : QByteArray test((*it)->compress(input, level, text));
262 0 : if(test.size() < best.size())
263 : {
264 0 : best.swap(test);
265 0 : compressor_name = (*it)->get_name();
266 : }
267 : }
268 : }
269 0 : return best;
270 : }
271 :
272 0 : if(!g_compressors->contains(compressor_name))
273 : {
274 : // compressor is not available, return input as is...
275 0 : compressor_name = compressor_t::NO_COMPRESSION;
276 : #ifdef DEBUG
277 0 : SNAP_LOG_TRACE("compressor not found?!");
278 : #endif
279 0 : return input;
280 : }
281 :
282 : // avoid the compression if the result is larger or equal to the input!
283 0 : QByteArray const result((*g_compressors)[compressor_name]->compress(input, level, text));
284 0 : if(result.size() >= input.size())
285 : {
286 0 : compressor_name = compressor_t::NO_COMPRESSION;
287 : #ifdef DEBUG
288 0 : SNAP_LOG_TRACE("compression is larger?!");
289 : #endif
290 0 : return input;
291 : }
292 :
293 0 : return result;
294 : }
295 :
296 :
297 : /** \brief Decompress a buffer.
298 : *
299 : * This function checks the specified input buffer and decompresses it if
300 : * a compressor recognized its magic signature.
301 : *
302 : * If none of the compressors were compatible then the input is returned
303 : * as is. The compressor_name is set to NO_COMPRESSION in this case. This
304 : * does not really mean the buffer is not compressed, although it is likely
305 : * correct.
306 : *
307 : * \param[out] compressor_name Receives the name of the compressor used
308 : * to decompress the input data.
309 : * \param[in] input The input to decompress.
310 : *
311 : * \return The decompressed buffer.
312 : */
313 0 : QByteArray decompress(QString & compressor_name, QByteArray const & input)
314 : {
315 : // nothing to decompress if empty
316 0 : if(input.size() > 0)
317 : {
318 0 : for(compressor_map_t::const_iterator
319 0 : it(g_compressors->begin());
320 0 : it != g_compressors->end();
321 : ++it)
322 : {
323 0 : if((*it)->compatible(input))
324 : {
325 0 : compressor_name = (*it)->get_name();
326 0 : return (*it)->decompress(input);
327 : }
328 : }
329 : }
330 :
331 0 : compressor_name = compressor_t::NO_COMPRESSION;
332 0 : return input;
333 : }
334 :
335 :
336 : /** \brief Implementation of the GZip compressor.
337 : *
338 : * This class defines the gzip compressor which compresses and decompresses
339 : * data using the gzip file format.
340 : *
341 : * \note
342 : * This implementation makes use of the zlib library to do all the
343 : * compression and decompression work.
344 : */
345 2 : class gzip_t
346 : : public compressor_t
347 : {
348 : public:
349 2 : gzip_t()
350 2 : : compressor_t("gzip")
351 : {
352 2 : }
353 :
354 0 : virtual char const * get_name() const
355 : {
356 0 : return "gzip";
357 : }
358 :
359 0 : virtual QByteArray compress(QByteArray const & input, level_t level, bool text)
360 : {
361 : // clamp the level, just in case
362 : //
363 0 : if(level < 0)
364 : {
365 0 : level = 0;
366 : }
367 0 : else if(level > 100)
368 : {
369 0 : level = 100;
370 : }
371 :
372 : // transform the 0 to 100 level to the standard 1 to 9 in zlib
373 0 : int const zlib_level(bound_level((level * 2 + 25) / 25, Z_BEST_SPEED, Z_BEST_COMPRESSION));
374 : // initialize the zlib stream
375 : z_stream strm;
376 0 : memset(&strm, 0, sizeof(strm));
377 : // deflateInit2 expects the input to be defined
378 0 : strm.avail_in = input.size();
379 0 : strm.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(input.data()));
380 : #pragma GCC diagnostic push
381 : #pragma GCC diagnostic ignored "-Wold-style-cast"
382 0 : int ret(deflateInit2(&strm, zlib_level, Z_DEFLATED, 15 + 16, 9, Z_DEFAULT_STRATEGY));
383 : #pragma GCC diagnostic pop
384 0 : if(ret != Z_OK)
385 : {
386 : // compression failed, return input as is
387 0 : return input;
388 : }
389 :
390 : // initialize the gzip header
391 : gz_header header;
392 0 : memset(&header, 0, sizeof(header));
393 0 : header.text = text;
394 0 : header.time = time(nullptr);
395 0 : header.os = 3;
396 0 : header.comment = const_cast<Bytef *>(reinterpret_cast<Bytef const *>("Snap! Websites"));
397 : //header.hcrc = 1; -- would that be useful?
398 0 : ret = deflateSetHeader(&strm, &header);
399 0 : if(ret != Z_OK)
400 : {
401 0 : deflateEnd(&strm);
402 0 : return input;
403 : }
404 :
405 : // prepare to call the deflate function
406 : // (to do it in one go!)
407 : // TODO check the size of the input buffer, if really large
408 : // (256Kb?) then break this up in multiple iterations
409 0 : QByteArray result;
410 0 : result.resize(static_cast<int>(deflateBound(&strm, strm.avail_in)));
411 0 : strm.avail_out = result.size();
412 0 : strm.next_out = reinterpret_cast<Bytef *>(result.data());
413 :
414 : // compress in one go
415 0 : ret = deflate(&strm, Z_FINISH);
416 0 : if(ret != Z_STREAM_END)
417 : {
418 0 : deflateEnd(&strm);
419 0 : return input;
420 : }
421 : // lose the extra size returned by deflateBound()
422 0 : result.resize(result.size() - strm.avail_out);
423 0 : deflateEnd(&strm);
424 0 : return result;
425 : }
426 :
427 0 : virtual bool compatible(QByteArray const & input) const
428 : {
429 : // the header is at least 10 bytes
430 : // the magic code (identification) is 0x1F 0x8B
431 0 : return input.size() >= 10 && input[0] == 0x1F && input[1] == static_cast<char>(0x8B);
432 : }
433 :
434 0 : virtual QByteArray decompress(QByteArray const & input)
435 : {
436 : // initialize the zlib stream
437 : z_stream strm;
438 0 : memset(&strm, 0, sizeof(strm));
439 : // inflateInit2 expects the input to be defined
440 0 : strm.avail_in = input.size();
441 0 : strm.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(input.data()));
442 : #pragma GCC diagnostic push
443 : #pragma GCC diagnostic ignored "-Wold-style-cast"
444 0 : int ret(inflateInit2(&strm, 15 + 16));
445 : #pragma GCC diagnostic pop
446 0 : if(ret != Z_OK)
447 : {
448 : // decompression failed, return input as is assuming it was not
449 : // compressed maybe...
450 0 : return input;
451 : }
452 :
453 : // Unfortunately the zlib support for the gzip header does not help
454 : // us getting the ISIZE which is saved as the last 4 bytes of the
455 : // files (frankly?!)
456 : //
457 : // initialize the gzip header
458 : //gz_header header;
459 : //memset(&header, 0, sizeof(header));
460 : //ret = inflateGetHeader(&strm, &header);
461 : //if(ret != Z_OK)
462 : //{
463 : // inflateEnd(&strm);
464 : // return input;
465 : //}
466 : // The size is saved in the last 4 bytes in little endian
467 0 : size_t uncompressed_size(input[strm.avail_in - 4]
468 0 : | (input[strm.avail_in - 3] << 8)
469 0 : | (input[strm.avail_in - 2] << 16)
470 0 : | (input[strm.avail_in - 1] << 24));
471 :
472 : // prepare to call the inflate function
473 : // (to do it in one go!)
474 0 : QByteArray result;
475 0 : result.resize(static_cast<int>(uncompressed_size));
476 0 : strm.avail_out = result.size();
477 0 : strm.next_out = reinterpret_cast<Bytef *>(result.data());
478 :
479 : // decompress in one go
480 0 : ret = inflate(&strm, Z_FINISH);
481 0 : if(ret != Z_STREAM_END)
482 : {
483 0 : inflateEnd(&strm);
484 0 : return input;
485 : }
486 0 : inflateEnd(&strm);
487 0 : return result;
488 : }
489 :
490 0 : virtual QByteArray decompress(QByteArray const & input, size_t uncompressed_size)
491 : {
492 0 : NOTUSED(input);
493 0 : NOTUSED(uncompressed_size);
494 0 : throw compression_exception_not_implemented("gzip decompress() with the uncompressed_size parameter is not implemented.");
495 : }
496 :
497 2 : } g_gzip; // create statically
498 :
499 :
500 2 : class deflate_t
501 : : public compressor_t
502 : {
503 : public:
504 2 : deflate_t()
505 2 : : compressor_t("deflate")
506 : {
507 2 : }
508 :
509 0 : virtual const char *get_name() const
510 : {
511 0 : return "deflate";
512 : }
513 :
514 0 : virtual QByteArray compress(QByteArray const & input, level_t level, bool text)
515 : {
516 0 : NOTUSED(text);
517 :
518 : // transform the 0 to 100 level to the standard 1 to 9 in zlib
519 0 : int const zlib_level(bound_level((level * 2 + 25) / 25, Z_BEST_SPEED, Z_BEST_COMPRESSION));
520 : // initialize the zlib stream
521 : z_stream strm;
522 0 : memset(&strm, 0, sizeof(strm));
523 : // deflateInit2 expects the input to be defined
524 0 : strm.avail_in = input.size();
525 0 : strm.next_in = const_cast<Bytef *>(reinterpret_cast<Bytef const *>(input.data()));
526 : #pragma GCC diagnostic push
527 : #pragma GCC diagnostic ignored "-Wold-style-cast"
528 0 : int ret(deflateInit2(&strm, zlib_level, Z_DEFLATED, 15, 9, Z_DEFAULT_STRATEGY));
529 : #pragma GCC diagnostic pop
530 0 : if(ret != Z_OK)
531 : {
532 : // compression failed, return input as is
533 0 : return input;
534 : }
535 :
536 : // prepare to call the deflate function
537 : // (to do it in one go!)
538 : // TODO check the size of the input buffer, if really large
539 : // (256Kb?) then break this up in multiple iterations
540 0 : QByteArray result;
541 0 : result.resize(static_cast<int>(deflateBound(&strm, strm.avail_in)));
542 0 : strm.avail_out = result.size();
543 0 : strm.next_out = reinterpret_cast<Bytef *>(result.data());
544 :
545 : // compress in one go
546 0 : ret = deflate(&strm, Z_FINISH);
547 0 : if(ret != Z_STREAM_END)
548 : {
549 0 : deflateEnd(&strm);
550 0 : return input;
551 : }
552 : // lose the extra size returned by deflateBound()
553 0 : result.resize(result.size() - strm.avail_out);
554 0 : deflateEnd(&strm);
555 0 : return result;
556 : }
557 :
558 0 : virtual bool compatible(QByteArray const & input) const
559 : {
560 0 : NOTUSED(input);
561 :
562 : // there is no magic header in this one...
563 0 : return false;
564 : }
565 :
566 0 : virtual QByteArray decompress(QByteArray const & input)
567 : {
568 : // the decompress function for "deflate" requires the size in
569 : // our case so this function is not implemented for now...
570 0 : NOTUSED(input);
571 0 : throw compression_exception_not_implemented("gzip decompress() with the uncompressed_size parameter is not implemented.");
572 : }
573 :
574 0 : virtual QByteArray decompress(QByteArray const & input, size_t uncompressed_size)
575 : {
576 : // by default we cannot reach this function, if we get called, then
577 : // the caller specifically wanted to call us, in such a case we
578 : // expect the size of the uncompressed data to be specified...
579 :
580 : // initialize the zlib stream
581 : z_stream strm;
582 0 : memset(&strm, 0, sizeof(strm));
583 : // inflateInit expects the input to be defined
584 0 : strm.avail_in = input.size();
585 0 : strm.next_in = const_cast<Bytef *>(reinterpret_cast<Bytef const *>(input.data()));
586 : #pragma GCC diagnostic push
587 : #pragma GCC diagnostic ignored "-Wold-style-cast"
588 0 : int ret(inflateInit(&strm));
589 : #pragma GCC diagnostic pop
590 0 : if(ret != Z_OK)
591 : {
592 : // compression failed, return input as is
593 0 : return input;
594 : }
595 :
596 : // Unfortunately the zlib support for the gzip header does not help
597 : // us getting the ISIZE which is saved as the last 4 bytes of the
598 : // files (frankly?!)
599 : //
600 : // initialize the gzip header
601 : //gz_header header;
602 : //memset(&header, 0, sizeof(header));
603 : //ret = inflateGetHeader(&strm, &header);
604 : //if(ret != Z_OK)
605 : //{
606 : // inflateEnd(&strm);
607 : // return input;
608 : //}
609 : // The size is saved in the last 4 bytes in little endian
610 : //size_t uncompressed_size(input[strm.avail_in - 4]
611 : // | (input[strm.avail_in - 3] << 8)
612 : // | (input[strm.avail_in - 2] << 16)
613 : // | (input[strm.avail_in - 1] << 24));
614 :
615 : // prepare to call the inflate function
616 : // (to do it in one go!)
617 0 : QByteArray result;
618 0 : result.resize(static_cast<int>(uncompressed_size));
619 0 : strm.avail_out = result.size();
620 0 : strm.next_out = reinterpret_cast<Bytef *>(result.data());
621 :
622 : // decompress in one go
623 0 : ret = inflate(&strm, Z_FINISH);
624 0 : inflateEnd(&strm);
625 0 : if(ret != Z_STREAM_END)
626 : {
627 0 : return input;
628 : }
629 0 : return result;
630 : }
631 :
632 2 : } g_deflate; // create statically
633 :
634 :
635 :
636 :
637 :
638 :
639 0 : void archiver_t::file_t::set_type(type_t type)
640 : {
641 0 : f_type = type;
642 0 : }
643 :
644 :
645 0 : void archiver_t::file_t::set_data(QByteArray const& data)
646 : {
647 0 : f_data = data;
648 0 : }
649 :
650 :
651 0 : void archiver_t::file_t::set_filename(QString const& filename)
652 : {
653 0 : f_filename = filename;
654 0 : }
655 :
656 :
657 0 : void archiver_t::file_t::set_user(QString const& user, uid_t uid)
658 : {
659 0 : f_user = user;
660 0 : f_uid = uid;
661 0 : }
662 :
663 :
664 0 : void archiver_t::file_t::set_group(QString const& group, gid_t gid)
665 : {
666 0 : f_group = group;
667 0 : f_gid = gid;
668 0 : }
669 :
670 :
671 0 : void archiver_t::file_t::set_mode(mode_t mode)
672 : {
673 0 : f_mode = mode;
674 0 : }
675 :
676 :
677 0 : void archiver_t::file_t::set_mtime(time_t mtime)
678 : {
679 0 : f_mtime = mtime;
680 0 : }
681 :
682 :
683 0 : archiver_t::file_t::type_t archiver_t::file_t::get_type() const
684 : {
685 0 : return f_type;
686 : }
687 :
688 :
689 0 : QByteArray const & archiver_t::file_t::get_data() const
690 : {
691 0 : return f_data;
692 : }
693 :
694 :
695 0 : QString const & archiver_t::file_t::get_filename() const
696 : {
697 0 : return f_filename;
698 : }
699 :
700 :
701 0 : QString const & archiver_t::file_t::get_user() const
702 : {
703 0 : return f_user;
704 : }
705 :
706 :
707 0 : QString const & archiver_t::file_t::get_group() const
708 : {
709 0 : return f_group;
710 : }
711 :
712 :
713 0 : uid_t archiver_t::file_t::get_uid() const
714 : {
715 0 : return f_uid;
716 : }
717 :
718 :
719 0 : gid_t archiver_t::file_t::get_gid() const
720 : {
721 0 : return f_gid;
722 : }
723 :
724 :
725 0 : mode_t archiver_t::file_t::get_mode() const
726 : {
727 0 : return f_mode;
728 : }
729 :
730 :
731 0 : time_t archiver_t::file_t::get_mtime() const
732 : {
733 0 : return f_mtime;
734 : }
735 :
736 :
737 2 : archiver_t::archiver_t(char const * name)
738 : {
739 2 : if(g_archivers == nullptr)
740 : {
741 2 : g_archivers = new archiver_map_t;
742 : }
743 2 : (*g_archivers)[name] = this;
744 2 : }
745 :
746 :
747 2 : archiver_t::~archiver_t()
748 : {
749 : // TBD we probably do not need this code...
750 : // it is rather slow so why waste our time on exit?
751 : //for(archiver_map_t::iterator
752 : // it(g_archivers->begin());
753 : // it != g_archivers->end();
754 : // ++it)
755 : //{
756 : // if(*it == this)
757 : // {
758 : // g_archivers->erase(it);
759 : // break;
760 : // }
761 : //}
762 : //delete g_archivers;
763 : //g_archivers = nullptr;
764 2 : }
765 :
766 :
767 0 : void archiver_t::set_archive(QByteArray const & input)
768 : {
769 0 : f_archive = input;
770 0 : }
771 :
772 :
773 0 : QByteArray archiver_t::get_archive() const
774 : {
775 0 : return f_archive;
776 : }
777 :
778 :
779 :
780 2 : class tar
781 : : public archiver_t
782 : {
783 : public:
784 2 : tar()
785 2 : : archiver_t("tar")
786 : {
787 2 : }
788 :
789 0 : virtual char const * get_name() const
790 : {
791 0 : return "tar";
792 : }
793 :
794 0 : virtual void append_file(file_t const & file)
795 : {
796 0 : QByteArray utf8;
797 :
798 : // INITIALIZE HEADER
799 0 : std::vector<char> header;
800 0 : header.resize(512, 0);
801 0 : std::string const ustar("ustar ");
802 0 : std::copy(ustar.begin(), ustar.end(), header.begin() + 257);
803 0 : header[263] = ' '; // version " \0"
804 0 : header[264] = '\0';
805 :
806 : // FILENAME
807 0 : QString fn(file.get_filename());
808 0 : int l(fn.length());
809 0 : if(l > 100)
810 : {
811 : // TODO: add support for longer filenames
812 0 : throw compression_exception_not_compatible("this file cannot be added to a tar archive at this point (filename too long)");
813 : }
814 0 : utf8 = fn.toUtf8();
815 0 : std::copy(utf8.data(), utf8.data() + utf8.size(), header.begin());
816 :
817 : // MODE, UID, GID, MTIME
818 0 : append_int(&header[100], file.get_mode(), 7, 8, '0');
819 0 : append_int(&header[108], file.get_uid(), 7, 8, '0');
820 0 : append_int(&header[116], file.get_gid(), 7, 8, '0');
821 0 : append_int(&header[136], static_cast<int>(file.get_mtime()), 11, 8, '0');
822 :
823 : // USER/GROUP NAMES
824 0 : utf8 = file.get_user().toUtf8();
825 0 : if(utf8.length() > 32)
826 : {
827 0 : throw compression_exception_not_compatible("this file cannot be added to a tar archive at this point (user name too long)");
828 : }
829 0 : std::copy(utf8.data(), utf8.data() + utf8.size(), header.begin() + 265);
830 :
831 0 : utf8 = file.get_group().toUtf8();
832 0 : if(utf8.length() > 32)
833 : {
834 0 : throw compression_exception_not_compatible("this file cannot be added to a tar archive at this point (group name too long)");
835 : }
836 0 : std::copy(utf8.data(), utf8.data() + utf8.size(), header.begin() + 265);
837 :
838 : // TYPE, SIZE
839 0 : switch(file.get_type())
840 : {
841 0 : case file_t::type_t::FILE_TYPE_REGULAR:
842 0 : header[156] = '0'; // regular (tar type)
843 0 : append_int(&header[124], file.get_data().size(), 11, 8, '0');
844 0 : break;
845 :
846 0 : case file_t::type_t::FILE_TYPE_DIRECTORY:
847 : // needs to be zero in ASCII
848 0 : header[156] = '5'; // directory (tar type)
849 0 : append_int(&header[124], 0, 11, 8, '0');
850 0 : break;
851 :
852 : //default: ... we could throw but here the compile fails if we
853 : // are missing some types
854 : }
855 :
856 0 : uint32_t checksum(tar_check_sum(&header[0]));
857 0 : if(checksum > 32767)
858 : {
859 : // no null in this case (very rare if at all possible)
860 0 : append_int(&header[148], checksum, 7, 8, '0');
861 : }
862 : else
863 : {
864 0 : append_int(&header[148], checksum, 6, 8, '0');
865 : }
866 0 : header[155] = ' ';
867 :
868 0 : f_archive.append(&header[0], static_cast<int>(header.size()));
869 :
870 0 : switch(file.get_type())
871 : {
872 0 : case file_t::type_t::FILE_TYPE_REGULAR:
873 0 : f_archive.append(file.get_data());
874 : {
875 : // padding to next 512 bytes
876 0 : uint32_t size(file.get_data().size());
877 0 : size &= 511;
878 0 : if(size != 0)
879 : {
880 0 : std::vector<char> pad;
881 0 : pad.resize(512 - size, 0);
882 0 : f_archive.append(&pad[0], static_cast<int>(pad.size()));
883 : }
884 : }
885 0 : break;
886 :
887 0 : default:
888 : // no data for that type
889 0 : break;
890 :
891 : }
892 0 : }
893 :
894 0 : virtual bool next_file(file_t & file) const
895 : {
896 : // any more files?
897 : // (make sure there is at least a header for now)
898 0 : if(f_pos + 512 > f_archive.size())
899 : {
900 0 : return false;
901 : }
902 :
903 : // read the header
904 0 : std::vector<char> header(f_archive.data() + f_pos, f_archive.data() + f_pos + 512);
905 :
906 : // MAGIC
907 0 : if(header[257] != 'u' || header[258] != 's' || header[259] != 't' || header[260] != 'a'
908 0 : || header[261] != 'r' || (header[262] != ' ' && header[262] != '\0'))
909 : {
910 : // if no MAGIC we may have empty blocks (which are legal at the
911 : // end of the file)
912 0 : for(int i(0); i < 512; ++i)
913 : {
914 0 : if(header[i] != '\0')
915 : {
916 0 : throw compression_exception_not_compatible(QString("ustar magic code missing at position %1").arg(f_pos));
917 : }
918 : }
919 0 : f_pos += 512;
920 : // TODO: test all the following blocks as they all should be null
921 : // (as you cannot find an empty block within the tarball)
922 0 : return false;
923 : }
924 :
925 0 : uint32_t const file_checksum(read_int(&header[148], 8, 8));
926 0 : uint32_t const comp_checksum(tar_check_sum(&header[0]));
927 0 : if(file_checksum != comp_checksum)
928 : {
929 0 : throw compression_exception_not_compatible(QString("ustar checksum code does not match what was expected"));
930 : }
931 :
932 0 : QString filename(QString::fromUtf8(&header[0], static_cast<int>(strnlen(&header[0], 100))));
933 0 : if(header[345] != '\0')
934 : {
935 : // this one has a prefix (long filename)
936 0 : QString prefix(QString::fromUtf8(&header[345], static_cast<int>(strnlen(&header[345], 155))));
937 0 : if(prefix.endsWith("/"))
938 : {
939 : // I think this case is considered a bug in a tarball...
940 0 : filename = prefix + filename;
941 : }
942 : else
943 : {
944 0 : filename = prefix + "/" + filename;
945 : }
946 : }
947 0 : file.set_filename(filename);
948 :
949 0 : switch(header[156])
950 : {
951 0 : case '\0':
952 : case '0':
953 0 : file.set_type(file_t::type_t::FILE_TYPE_REGULAR);
954 0 : break;
955 :
956 0 : case '5':
957 0 : file.set_type(file_t::type_t::FILE_TYPE_DIRECTORY);
958 0 : break;
959 :
960 :
961 0 : default:
962 0 : throw compression_exception_not_supported("file in tarball not supported (we accept regular and directory files only)");
963 :
964 : }
965 :
966 0 : file.set_mode (read_int(&header[100], 8, 8));
967 0 : file.set_mtime(read_int(&header[136], 12, 8));
968 :
969 0 : uid_t uid(read_int(&header[108], 8, 8));
970 0 : file.set_user (QString::fromUtf8(&header[265], 32), uid);
971 :
972 0 : gid_t gid(read_int(&header[116], 8, 8));
973 0 : file.set_group(QString::fromUtf8(&header[297], 32), gid);
974 :
975 0 : f_pos += 512;
976 :
977 0 : if(file.get_type() == file_t::type_t::FILE_TYPE_REGULAR)
978 : {
979 0 : uint32_t const size(read_int(&header[124], 12, 8));
980 0 : int const total_size((size + 511) & -512);
981 0 : if(f_pos + total_size > f_archive.size())
982 : {
983 0 : throw compression_exception_not_supported("file data not available (archive too small)");
984 : }
985 0 : QByteArray data;
986 0 : data.append(f_archive.data() + f_pos, size);
987 0 : file.set_data(data);
988 :
989 0 : f_pos += total_size;
990 : }
991 :
992 0 : return true;
993 : }
994 :
995 0 : virtual void rewind_file()
996 : {
997 0 : f_pos = 0;
998 0 : }
999 :
1000 : private:
1001 0 : void append_int(char * header, int value, unsigned int length, int base, char fill)
1002 : {
1003 : // save the number (minimum 1 digit)
1004 0 : do
1005 : {
1006 : // base is 8 or 10
1007 0 : header[length] = static_cast<char>((value % base) + '0');
1008 0 : value /= base;
1009 0 : --length;
1010 : }
1011 0 : while((length > 0) && (value != 0));
1012 :
1013 : // fill the left side with 'fill'
1014 0 : while(length > 0)
1015 : {
1016 0 : header[length] = fill;
1017 0 : --length;
1018 : }
1019 0 : }
1020 :
1021 0 : uint32_t read_int(char const * header, int length, int base) const
1022 : {
1023 : // TODO: add tests
1024 0 : uint32_t result(0);
1025 0 : for(; length > 0 && *header != '\0' && *header != ' '; --length, ++header)
1026 : {
1027 0 : result = result * base + (*header - '0');
1028 : }
1029 0 : return result;
1030 : }
1031 :
1032 0 : uint32_t tar_check_sum(char const * s) const
1033 : {
1034 0 : uint32_t result = 8 * ' '; // the checksum field
1035 :
1036 : // name + mode + uid + gid + size + mtime = 148 bytes
1037 0 : for(int n(148); n > 0; --n, ++s)
1038 : {
1039 0 : result += *s;
1040 : }
1041 :
1042 0 : s += 8; // skip the checksum field
1043 :
1044 : // everything after the checksum is another 356 bytes
1045 0 : for(int n(356); n > 0; --n, ++s)
1046 : {
1047 0 : result += *s;
1048 : }
1049 :
1050 0 : return result;
1051 : }
1052 :
1053 : mutable int32_t f_pos = 0;
1054 2 : } g_tar; // declare statically
1055 :
1056 :
1057 :
1058 : } // namespace snap
1059 6 : } // namespace compression
1060 : // vim: ts=4 sw=4 et
|