LCOV - code coverage report
Current view: top level - snapwebsites - compression.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 30 367 8.2 %
Date: 2019-12-15 17:13:15 Functions: 12 57 21.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Snap Websites Server -- compress (decompress) data
       2             : // Copyright (c) 2013-2019  Made to Order Software Corp.  All Rights Reserved
       3             : //
       4             : // This program is free software; you can redistribute it and/or modify
       5             : // it under the terms of the GNU General Public License as published by
       6             : // the Free Software Foundation; either version 2 of the License, or
       7             : // (at your option) any later version.
       8             : //
       9             : // This program is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             : // GNU General Public License for more details.
      13             : //
      14             : // You should have received a copy of the GNU General Public License
      15             : // along with this program; if not, write to the Free Software
      16             : // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
      17             : 
      18             : // self
      19             : //
      20             : #include "snapwebsites/compression.h"
      21             : 
      22             : // self lib
      23             : //
      24             : #include "snapwebsites/log.h"
      25             : 
      26             : // snapdev lib
      27             : //
      28             : #include <snapdev/not_used.h>
      29             : 
      30             : // Qt lib
      31             : //
      32             : #include <QMap>
      33             : 
      34             : // C lib
      35             : //
      36             : #pragma GCC diagnostic push
      37             : #pragma GCC diagnostic ignored "-Wold-style-cast"
      38             : #include <zlib.h>
      39             : #pragma GCC diagnostic pop
      40             : 
      41             : // last include
      42             : //
      43             : #include <snapdev/poison.h>
      44             : 
      45             : 
      46             : namespace snap
      47             : {
      48             : namespace compression
      49             : {
      50             : 
      51             : namespace
      52             : {
      53             : typedef QMap<QString, compressor_t *>   compressor_map_t;
      54             : typedef QMap<QString, archiver_t *>   archiver_map_t;
      55             : 
      56             : // IMPORTANT NOTE:
      57             : // This list only makes use of bare pointers for many good reasons.
      58             : // (i.e. all compressors are defined statitcally, not allocated)
      59             : // Do not try to change it! Thank you.
      60             : compressor_map_t * g_compressors;
      61             : 
      62             : // IMPORTANT NOTE:
      63             : // This list only makes use of bare pointers for many good reasons.
      64             : // (i.e. all archivers are defined statitcally, not allocated)
      65             : // Do not try to change it! Thank you.
      66             : archiver_map_t * g_archivers;
      67             : 
      68           0 : int bound_level(int level, int min, int max)
      69             : {
      70           0 :     return level < min ? min : (level > max ? max : level);
      71             : }
      72             : 
      73             : }
      74             : 
      75             : 
      76             : /** \brief Special compressor name to get the best compression available.
      77             :  *
      78             :  * Whenever we send a page on the Internet, we can compress it with zlib
      79             :  * (gzip, really). However, more and more, browsers are starting to support
      80             :  * other compressors. For example, Chrome supports "sdch" (a vcdiff
      81             :  * compressor) and FireFox is testing with lzma.
      82             :  *
      83             :  * Using the name "best" for the compressor will test with all available
      84             :  * compressions and return the smallest result whatever it is.
      85             :  */
      86             : char const *compressor_t::BEST_COMPRESSION = "best";
      87             : 
      88             : 
      89             : /** \brief Special compressor name returned in some cases.
      90             :  *
      91             :  * When trying to compress a buffer, there are several reasons why the
      92             :  * compression may "fail". When that happens the result is the same
      93             :  * as the input, meaning that the data is not going to be compressed
      94             :  * at all.
      95             :  *
      96             :  * You should always verify whether the compression worked by testing
      97             :  * the compressor_name variable on return.
      98             :  */
      99             : char const *compressor_t::NO_COMPRESSION = "none";
     100             : 
     101             : 
     102             : /** \brief Register the compressor.
     103             :  *
     104             :  * Whenever you implement a compressor, the constructor must call
     105             :  * this constructor with the name of the compressor. Remember that
     106             :  * the get_name() function is NOT ready from this constructor which
     107             :  * is why we require you to specify the name in the constructor.
     108             :  *
     109             :  * This function registers the compressor in the internal list of
     110             :  * compressors and then returns.
     111             :  *
     112             :  * \param[in] name  The name of the compressor.
     113             :  */
     114           4 : compressor_t::compressor_t(char const * name)
     115             : {
     116           4 :     if(g_compressors == nullptr)
     117             :     {
     118           2 :         g_compressors = new compressor_map_t;
     119             :     }
     120           4 :     (*g_compressors)[name] = this;
     121           4 : }
     122             : 
     123             : 
     124             : /** \brief Clean up the compressor.
     125             :  *
     126             :  * This function unregisters the compressor. Note that it is expected
     127             :  * that compressors get destroyed on exit only as they are expected
     128             :  * to be implemented and defined statically.
     129             :  */
     130           4 : compressor_t::~compressor_t()
     131             : {
     132             :     // TBD we probably don't need this code...
     133             :     //     it is rather slow so why waste our time on exit?
     134             :     //for(compressor_map_t::iterator
     135             :     //        it(g_compressors->begin());
     136             :     //        it != g_compressors->end();
     137             :     //        ++it)
     138             :     //{
     139             :     //    if(*it == this)
     140             :     //    {
     141             :     //        g_compressors->erase(it);
     142             :     //        break;
     143             :     //    }
     144             :     //}
     145             :     //delete g_compressors;
     146             :     //g_compressors = nullptr;
     147           4 : }
     148             : 
     149             : 
     150             : /** \brief Return a list of available compressors.
     151             :  *
     152             :  * In case you have more than one Accept-Encoding this list may end up being
     153             :  * helpful to know whether a compression is available or not.
     154             :  *
     155             :  * \return A list of all the available compressors.
     156             :  */
     157           0 : snap_string_list compressor_list()
     158             : {
     159           0 :     snap_string_list list;
     160           0 :     for(compressor_map_t::const_iterator
     161           0 :             it(g_compressors->begin());
     162           0 :             it != g_compressors->end();
     163             :             ++it)
     164             :     {
     165           0 :         list.push_back((*it)->get_name());
     166             :     }
     167           0 :     return list;
     168             : }
     169             : 
     170             : 
     171             : /** \brief Return a pointer to the named compressor.
     172             :  *
     173             :  * This function checks whether a compressor named \p compressor_name
     174             :  * exists, if so it gets returned, otherwise a null pointer is returned.
     175             :  *
     176             :  * \param[in] compressor_name  The name of the concerned compressor.
     177             :  *
     178             :  * \return A pointer to a compressor_t object or nullptr.
     179             :  */
     180           0 : compressor_t * get_compressor(QString const & compressor_name)
     181             : {
     182           0 :     if(g_compressors != nullptr)
     183             :     {
     184           0 :         if(g_compressors->contains(compressor_name))
     185             :         {
     186           0 :             return (*g_compressors)[compressor_name];
     187             :         }
     188             :     }
     189             : 
     190           0 :     return nullptr;
     191             : }
     192             : 
     193             : 
     194             : /** \brief Compress the input buffer.
     195             :  *
     196             :  * This function compresses the input buffer and returns the result
     197             :  * in a copy.
     198             :  *
     199             :  * IMPORTANT NOTE:
     200             :  *
     201             :  * There are several reasons why the compressor may refuse compressing
     202             :  * your input buffer and return the input as is. When this happens the
     203             :  * name of the compressor is changed to NO_COMPRESSION.
     204             :  *
     205             :  * \li The input is empty.
     206             :  * \li The input buffer is too small for that compressor.
     207             :  * \li The level is set to a value under 5%.
     208             :  * \li The buffer is way too large and allocating the compression buffer
     209             :  *     failed (this should never happen on a serious server!)
     210             :  * \li The named compressor does not exist.
     211             :  *
     212             :  * Again, if the compression fails for whatever reason, the compressor_name
     213             :  * is set to NO_COMPRESSION. You have to make sure to test that name on
     214             :  * return to know what worked and what failed.
     215             :  *
     216             :  * \param[in,out] compressor_name  The name of the compressor to use.
     217             :  * \param[in] input  The input buffer which has to be compressed.
     218             :  * \param[in] level  The level of compression (0 to 100).
     219             :  * \param[in] text  Whether the input is text, set to false if not sure.
     220             :  *
     221             :  * \return A byte array with the compressed input data.
     222             :  */
     223           0 : QByteArray compress(QString & compressor_name, QByteArray const & input, level_t level, bool text)
     224             : {
     225             :     // clamp the level, just in case
     226             :     //
     227           0 :     if(level < 0)
     228             :     {
     229           0 :         level = 0;
     230             :     }
     231           0 :     else if(level > 100)
     232             :     {
     233           0 :         level = 100;
     234             :     }
     235             : 
     236             :     // nothing to compress if empty or too small a level
     237           0 :     if(input.size() == 0 || level < 5)
     238             :     {
     239             : #ifdef DEBUG
     240           0 : SNAP_LOG_TRACE("nothing to compress");
     241             : #endif
     242           0 :         compressor_name = compressor_t::NO_COMPRESSION;
     243           0 :         return input;
     244             :     }
     245             : 
     246           0 :     if(compressor_name == compressor_t::BEST_COMPRESSION)
     247             :     {
     248           0 :         QByteArray best;
     249           0 :         for(compressor_map_t::const_iterator
     250           0 :                 it(g_compressors->begin());
     251           0 :                 it != g_compressors->end();
     252             :                 ++it)
     253             :         {
     254           0 :             if(best.size() == 0)
     255             :             {
     256           0 :                 best = (*it)->compress(input, level, text);
     257           0 :                 compressor_name = (*it)->get_name();
     258             :             }
     259             :             else
     260             :             {
     261           0 :                 QByteArray test((*it)->compress(input, level, text));
     262           0 :                 if(test.size() < best.size())
     263             :                 {
     264           0 :                     best.swap(test);
     265           0 :                     compressor_name = (*it)->get_name();
     266             :                 }
     267             :             }
     268             :         }
     269           0 :         return best;
     270             :     }
     271             : 
     272           0 :     if(!g_compressors->contains(compressor_name))
     273             :     {
     274             :         // compressor is not available, return input as is...
     275           0 :         compressor_name = compressor_t::NO_COMPRESSION;
     276             : #ifdef DEBUG
     277           0 : SNAP_LOG_TRACE("compressor not found?!");
     278             : #endif
     279           0 :         return input;
     280             :     }
     281             : 
     282             :     // avoid the compression if the result is larger or equal to the input!
     283           0 :     QByteArray const result((*g_compressors)[compressor_name]->compress(input, level, text));
     284           0 :     if(result.size() >= input.size())
     285             :     {
     286           0 :         compressor_name = compressor_t::NO_COMPRESSION;
     287             : #ifdef DEBUG
     288           0 : SNAP_LOG_TRACE("compression is larger?!");
     289             : #endif
     290           0 :         return input;
     291             :     }
     292             : 
     293           0 :     return result;
     294             : }
     295             : 
     296             : 
     297             : /** \brief Decompress a buffer.
     298             :  *
     299             :  * This function checks the specified input buffer and decompresses it if
     300             :  * a compressor recognized its magic signature.
     301             :  *
     302             :  * If none of the compressors were compatible then the input is returned
     303             :  * as is. The compressor_name is set to NO_COMPRESSION in this case. This
     304             :  * does not really mean the buffer is not compressed, although it is likely
     305             :  * correct.
     306             :  *
     307             :  * \param[out] compressor_name  Receives the name of the compressor used
     308             :  *                              to decompress the input data.
     309             :  * \param[in] input  The input to decompress.
     310             :  *
     311             :  * \return The decompressed buffer.
     312             :  */
     313           0 : QByteArray decompress(QString & compressor_name, QByteArray const & input)
     314             : {
     315             :     // nothing to decompress if empty
     316           0 :     if(input.size() > 0)
     317             :     {
     318           0 :         for(compressor_map_t::const_iterator
     319           0 :                 it(g_compressors->begin());
     320           0 :                 it != g_compressors->end();
     321             :                 ++it)
     322             :         {
     323           0 :             if((*it)->compatible(input))
     324             :             {
     325           0 :                 compressor_name = (*it)->get_name();
     326           0 :                 return (*it)->decompress(input);
     327             :             }
     328             :         }
     329             :     }
     330             : 
     331           0 :     compressor_name = compressor_t::NO_COMPRESSION;
     332           0 :     return input;
     333             : }
     334             : 
     335             : 
     336             : /** \brief Implementation of the GZip compressor.
     337             :  *
     338             :  * This class defines the gzip compressor which compresses and decompresses
     339             :  * data using the gzip file format.
     340             :  *
     341             :  * \note
     342             :  * This implementation makes use of the zlib library to do all the
     343             :  * compression and decompression work.
     344             :  */
     345           2 : class gzip_t
     346             :     : public compressor_t
     347             : {
     348             : public:
     349           2 :     gzip_t()
     350           2 :         : compressor_t("gzip")
     351             :     {
     352           2 :     }
     353             : 
     354           0 :     virtual char const * get_name() const
     355             :     {
     356           0 :         return "gzip";
     357             :     }
     358             : 
     359           0 :     virtual QByteArray compress(QByteArray const & input, level_t level, bool text)
     360             :     {
     361             :         // clamp the level, just in case
     362             :         //
     363           0 :         if(level < 0)
     364             :         {
     365           0 :             level = 0;
     366             :         }
     367           0 :         else if(level > 100)
     368             :         {
     369           0 :             level = 100;
     370             :         }
     371             : 
     372             :         // transform the 0 to 100 level to the standard 1 to 9 in zlib
     373           0 :         int const zlib_level(bound_level((level * 2 + 25) / 25, Z_BEST_SPEED, Z_BEST_COMPRESSION));
     374             :         // initialize the zlib stream
     375             :         z_stream strm;
     376           0 :         memset(&strm, 0, sizeof(strm));
     377             :         // deflateInit2 expects the input to be defined
     378           0 :         strm.avail_in = input.size();
     379           0 :         strm.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(input.data()));
     380             : #pragma GCC diagnostic push
     381             : #pragma GCC diagnostic ignored "-Wold-style-cast"
     382           0 :         int ret(deflateInit2(&strm, zlib_level, Z_DEFLATED, 15 + 16, 9, Z_DEFAULT_STRATEGY));
     383             : #pragma GCC diagnostic pop
     384           0 :         if(ret != Z_OK)
     385             :         {
     386             :             // compression failed, return input as is
     387           0 :             return input;
     388             :         }
     389             : 
     390             :         // initialize the gzip header
     391             :         gz_header header;
     392           0 :         memset(&header, 0, sizeof(header));
     393           0 :         header.text = text;
     394           0 :         header.time = time(nullptr);
     395           0 :         header.os = 3;
     396           0 :         header.comment = const_cast<Bytef *>(reinterpret_cast<Bytef const *>("Snap! Websites"));
     397             :         //header.hcrc = 1; -- would that be useful?
     398           0 :         ret = deflateSetHeader(&strm, &header);
     399           0 :         if(ret != Z_OK)
     400             :         {
     401           0 :             deflateEnd(&strm);
     402           0 :             return input;
     403             :         }
     404             : 
     405             :         // prepare to call the deflate function
     406             :         // (to do it in one go!)
     407             :         // TODO check the size of the input buffer, if really large
     408             :         //      (256Kb?) then break this up in multiple iterations
     409           0 :         QByteArray result;
     410           0 :         result.resize(static_cast<int>(deflateBound(&strm, strm.avail_in)));
     411           0 :         strm.avail_out = result.size();
     412           0 :         strm.next_out = reinterpret_cast<Bytef *>(result.data());
     413             : 
     414             :         // compress in one go
     415           0 :         ret = deflate(&strm, Z_FINISH);
     416           0 :         if(ret != Z_STREAM_END)
     417             :         {
     418           0 :             deflateEnd(&strm);
     419           0 :             return input;
     420             :         }
     421             :         // lose the extra size returned by deflateBound()
     422           0 :         result.resize(result.size() - strm.avail_out);
     423           0 :         deflateEnd(&strm);
     424           0 :         return result;
     425             :     }
     426             : 
     427           0 :     virtual bool compatible(QByteArray const & input) const
     428             :     {
     429             :         // the header is at least 10 bytes
     430             :         // the magic code (identification) is 0x1F 0x8B
     431           0 :         return input.size() >= 10 && input[0] == 0x1F && input[1] == static_cast<char>(0x8B);
     432             :     }
     433             : 
     434           0 :     virtual QByteArray decompress(QByteArray const & input)
     435             :     {
     436             :         // initialize the zlib stream
     437             :         z_stream strm;
     438           0 :         memset(&strm, 0, sizeof(strm));
     439             :         // inflateInit2 expects the input to be defined
     440           0 :         strm.avail_in = input.size();
     441           0 :         strm.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(input.data()));
     442             : #pragma GCC diagnostic push
     443             : #pragma GCC diagnostic ignored "-Wold-style-cast"
     444           0 :         int ret(inflateInit2(&strm, 15 + 16));
     445             : #pragma GCC diagnostic pop
     446           0 :         if(ret != Z_OK)
     447             :         {
     448             :             // decompression failed, return input as is assuming it was not
     449             :             // compressed maybe...
     450           0 :             return input;
     451             :         }
     452             : 
     453             :         // Unfortunately the zlib support for the gzip header does not help
     454             :         // us getting the ISIZE which is saved as the last 4 bytes of the
     455             :         // files (frankly?!)
     456             :         //
     457             :         // initialize the gzip header
     458             :         //gz_header header;
     459             :         //memset(&header, 0, sizeof(header));
     460             :         //ret = inflateGetHeader(&strm, &header);
     461             :         //if(ret != Z_OK)
     462             :         //{
     463             :         //    inflateEnd(&strm);
     464             :         //    return input;
     465             :         //}
     466             :         // The size is saved in the last 4 bytes in little endian
     467           0 :         size_t uncompressed_size(input[strm.avail_in - 4]
     468           0 :                 | (input[strm.avail_in - 3] << 8)
     469           0 :                 | (input[strm.avail_in - 2] << 16)
     470           0 :                 | (input[strm.avail_in - 1] << 24));
     471             : 
     472             :         // prepare to call the inflate function
     473             :         // (to do it in one go!)
     474           0 :         QByteArray result;
     475           0 :         result.resize(static_cast<int>(uncompressed_size));
     476           0 :         strm.avail_out = result.size();
     477           0 :         strm.next_out = reinterpret_cast<Bytef *>(result.data());
     478             : 
     479             :         // decompress in one go
     480           0 :         ret = inflate(&strm, Z_FINISH);
     481           0 :         if(ret != Z_STREAM_END)
     482             :         {
     483           0 :             inflateEnd(&strm);
     484           0 :             return input;
     485             :         }
     486           0 :         inflateEnd(&strm);
     487           0 :         return result;
     488             :     }
     489             : 
     490           0 :     virtual QByteArray decompress(QByteArray const & input, size_t uncompressed_size)
     491             :     {
     492           0 :         NOTUSED(input);
     493           0 :         NOTUSED(uncompressed_size);
     494           0 :         throw compression_exception_not_implemented("gzip decompress() with the uncompressed_size parameter is not implemented.");
     495             :     }
     496             : 
     497           2 : } g_gzip; // create statically
     498             : 
     499             : 
     500           2 : class deflate_t
     501             :     : public compressor_t
     502             : {
     503             : public:
     504           2 :     deflate_t()
     505           2 :         : compressor_t("deflate")
     506             :     {
     507           2 :     }
     508             : 
     509           0 :     virtual const char *get_name() const
     510             :     {
     511           0 :         return "deflate";
     512             :     }
     513             : 
     514           0 :     virtual QByteArray compress(QByteArray const & input, level_t level, bool text)
     515             :     {
     516           0 :         NOTUSED(text);
     517             : 
     518             :         // transform the 0 to 100 level to the standard 1 to 9 in zlib
     519           0 :         int const zlib_level(bound_level((level * 2 + 25) / 25, Z_BEST_SPEED, Z_BEST_COMPRESSION));
     520             :         // initialize the zlib stream
     521             :         z_stream strm;
     522           0 :         memset(&strm, 0, sizeof(strm));
     523             :         // deflateInit2 expects the input to be defined
     524           0 :         strm.avail_in = input.size();
     525           0 :         strm.next_in = const_cast<Bytef *>(reinterpret_cast<Bytef const *>(input.data()));
     526             : #pragma GCC diagnostic push
     527             : #pragma GCC diagnostic ignored "-Wold-style-cast"
     528           0 :         int ret(deflateInit2(&strm, zlib_level, Z_DEFLATED, 15, 9, Z_DEFAULT_STRATEGY));
     529             : #pragma GCC diagnostic pop
     530           0 :         if(ret != Z_OK)
     531             :         {
     532             :             // compression failed, return input as is
     533           0 :             return input;
     534             :         }
     535             : 
     536             :         // prepare to call the deflate function
     537             :         // (to do it in one go!)
     538             :         // TODO check the size of the input buffer, if really large
     539             :         //      (256Kb?) then break this up in multiple iterations
     540           0 :         QByteArray result;
     541           0 :         result.resize(static_cast<int>(deflateBound(&strm, strm.avail_in)));
     542           0 :         strm.avail_out = result.size();
     543           0 :         strm.next_out = reinterpret_cast<Bytef *>(result.data());
     544             : 
     545             :         // compress in one go
     546           0 :         ret = deflate(&strm, Z_FINISH);
     547           0 :         if(ret != Z_STREAM_END)
     548             :         {
     549           0 :             deflateEnd(&strm);
     550           0 :             return input;
     551             :         }
     552             :         // lose the extra size returned by deflateBound()
     553           0 :         result.resize(result.size() - strm.avail_out);
     554           0 :         deflateEnd(&strm);
     555           0 :         return result;
     556             :     }
     557             : 
     558           0 :     virtual bool compatible(QByteArray const & input) const
     559             :     {
     560           0 :         NOTUSED(input);
     561             : 
     562             :         // there is no magic header in this one...
     563           0 :         return false;
     564             :     }
     565             : 
     566           0 :     virtual QByteArray decompress(QByteArray const & input)
     567             :     {
     568             :         // the decompress function for "deflate" requires the size in
     569             :         // our case so this function is not implemented for now...
     570           0 :         NOTUSED(input);
     571           0 :         throw compression_exception_not_implemented("gzip decompress() with the uncompressed_size parameter is not implemented.");
     572             :     }
     573             : 
     574           0 :     virtual QByteArray decompress(QByteArray const & input, size_t uncompressed_size)
     575             :     {
     576             :         // by default we cannot reach this function, if we get called, then
     577             :         // the caller specifically wanted to call us, in such a case we
     578             :         // expect the size of the uncompressed data to be specified...
     579             : 
     580             :         // initialize the zlib stream
     581             :         z_stream strm;
     582           0 :         memset(&strm, 0, sizeof(strm));
     583             :         // inflateInit expects the input to be defined
     584           0 :         strm.avail_in = input.size();
     585           0 :         strm.next_in = const_cast<Bytef *>(reinterpret_cast<Bytef const *>(input.data()));
     586             : #pragma GCC diagnostic push
     587             : #pragma GCC diagnostic ignored "-Wold-style-cast"
     588           0 :         int ret(inflateInit(&strm));
     589             : #pragma GCC diagnostic pop
     590           0 :         if(ret != Z_OK)
     591             :         {
     592             :             // compression failed, return input as is
     593           0 :             return input;
     594             :         }
     595             : 
     596             :         // Unfortunately the zlib support for the gzip header does not help
     597             :         // us getting the ISIZE which is saved as the last 4 bytes of the
     598             :         // files (frankly?!)
     599             :         //
     600             :         // initialize the gzip header
     601             :         //gz_header header;
     602             :         //memset(&header, 0, sizeof(header));
     603             :         //ret = inflateGetHeader(&strm, &header);
     604             :         //if(ret != Z_OK)
     605             :         //{
     606             :         //    inflateEnd(&strm);
     607             :         //    return input;
     608             :         //}
     609             :         // The size is saved in the last 4 bytes in little endian
     610             :         //size_t uncompressed_size(input[strm.avail_in - 4]
     611             :         //        | (input[strm.avail_in - 3] << 8)
     612             :         //        | (input[strm.avail_in - 2] << 16)
     613             :         //        | (input[strm.avail_in - 1] << 24));
     614             : 
     615             :         // prepare to call the inflate function
     616             :         // (to do it in one go!)
     617           0 :         QByteArray result;
     618           0 :         result.resize(static_cast<int>(uncompressed_size));
     619           0 :         strm.avail_out = result.size();
     620           0 :         strm.next_out = reinterpret_cast<Bytef *>(result.data());
     621             : 
     622             :         // decompress in one go
     623           0 :         ret = inflate(&strm, Z_FINISH);
     624           0 :         inflateEnd(&strm);
     625           0 :         if(ret != Z_STREAM_END)
     626             :         {
     627           0 :             return input;
     628             :         }
     629           0 :         return result;
     630             :     }
     631             : 
     632           2 : } g_deflate; // create statically
     633             : 
     634             : 
     635             : 
     636             : 
     637             : 
     638             : 
     639           0 : void archiver_t::file_t::set_type(type_t type)
     640             : {
     641           0 :     f_type = type;
     642           0 : }
     643             : 
     644             : 
     645           0 : void archiver_t::file_t::set_data(QByteArray const& data)
     646             : {
     647           0 :     f_data = data;
     648           0 : }
     649             : 
     650             : 
     651           0 : void archiver_t::file_t::set_filename(QString const& filename)
     652             : {
     653           0 :     f_filename = filename;
     654           0 : }
     655             : 
     656             : 
     657           0 : void archiver_t::file_t::set_user(QString const& user, uid_t uid)
     658             : {
     659           0 :     f_user = user;
     660           0 :     f_uid = uid;
     661           0 : }
     662             : 
     663             : 
     664           0 : void archiver_t::file_t::set_group(QString const& group, gid_t gid)
     665             : {
     666           0 :     f_group = group;
     667           0 :     f_gid = gid;
     668           0 : }
     669             : 
     670             : 
     671           0 : void archiver_t::file_t::set_mode(mode_t mode)
     672             : {
     673           0 :     f_mode = mode;
     674           0 : }
     675             : 
     676             : 
     677           0 : void archiver_t::file_t::set_mtime(time_t mtime)
     678             : {
     679           0 :     f_mtime = mtime;
     680           0 : }
     681             : 
     682             : 
     683           0 : archiver_t::file_t::type_t archiver_t::file_t::get_type() const
     684             : {
     685           0 :     return f_type;
     686             : }
     687             : 
     688             : 
     689           0 : QByteArray const & archiver_t::file_t::get_data() const
     690             : {
     691           0 :     return f_data;
     692             : }
     693             : 
     694             : 
     695           0 : QString const & archiver_t::file_t::get_filename() const
     696             : {
     697           0 :     return f_filename;
     698             : }
     699             : 
     700             : 
     701           0 : QString const & archiver_t::file_t::get_user() const
     702             : {
     703           0 :     return f_user;
     704             : }
     705             : 
     706             : 
     707           0 : QString const & archiver_t::file_t::get_group() const
     708             : {
     709           0 :     return f_group;
     710             : }
     711             : 
     712             : 
     713           0 : uid_t archiver_t::file_t::get_uid() const
     714             : {
     715           0 :     return f_uid;
     716             : }
     717             : 
     718             : 
     719           0 : gid_t archiver_t::file_t::get_gid() const
     720             : {
     721           0 :     return f_gid;
     722             : }
     723             : 
     724             : 
     725           0 : mode_t archiver_t::file_t::get_mode() const
     726             : {
     727           0 :     return f_mode;
     728             : }
     729             : 
     730             : 
     731           0 : time_t archiver_t::file_t::get_mtime() const
     732             : {
     733           0 :     return f_mtime;
     734             : }
     735             : 
     736             : 
     737           2 : archiver_t::archiver_t(char const * name)
     738             : {
     739           2 :     if(g_archivers == nullptr)
     740             :     {
     741           2 :         g_archivers = new archiver_map_t;
     742             :     }
     743           2 :     (*g_archivers)[name] = this;
     744           2 : }
     745             : 
     746             : 
     747           2 : archiver_t::~archiver_t()
     748             : {
     749             :     // TBD we probably do not need this code...
     750             :     //     it is rather slow so why waste our time on exit?
     751             :     //for(archiver_map_t::iterator
     752             :     //        it(g_archivers->begin());
     753             :     //        it != g_archivers->end();
     754             :     //        ++it)
     755             :     //{
     756             :     //    if(*it == this)
     757             :     //    {
     758             :     //        g_archivers->erase(it);
     759             :     //        break;
     760             :     //    }
     761             :     //}
     762             :     //delete g_archivers;
     763             :     //g_archivers = nullptr;
     764           2 : }
     765             : 
     766             : 
     767           0 : void archiver_t::set_archive(QByteArray const & input)
     768             : {
     769           0 :     f_archive = input;
     770           0 : }
     771             : 
     772             : 
     773           0 : QByteArray archiver_t::get_archive() const
     774             : {
     775           0 :     return f_archive;
     776             : }
     777             : 
     778             : 
     779             : 
     780           2 : class tar
     781             :     : public archiver_t
     782             : {
     783             : public:
     784           2 :     tar()
     785           2 :         : archiver_t("tar")
     786             :     {
     787           2 :     }
     788             : 
     789           0 :     virtual char const * get_name() const
     790             :     {
     791           0 :         return "tar";
     792             :     }
     793             : 
     794           0 :     virtual void append_file(file_t const & file)
     795             :     {
     796           0 :         QByteArray utf8;
     797             : 
     798             :         // INITIALIZE HEADER
     799           0 :         std::vector<char> header;
     800           0 :         header.resize(512, 0);
     801           0 :         std::string const ustar("ustar ");
     802           0 :         std::copy(ustar.begin(), ustar.end(), header.begin() + 257);
     803           0 :         header[263] = ' '; // version " \0"
     804           0 :         header[264] = '\0';
     805             : 
     806             :         // FILENAME
     807           0 :         QString fn(file.get_filename());
     808           0 :         int l(fn.length());
     809           0 :         if(l > 100)
     810             :         {
     811             :             // TODO: add support for longer filenames
     812           0 :             throw compression_exception_not_compatible("this file cannot be added to a tar archive at this point (filename too long)");
     813             :         }
     814           0 :         utf8 = fn.toUtf8();
     815           0 :         std::copy(utf8.data(), utf8.data() + utf8.size(), header.begin());
     816             : 
     817             :         // MODE, UID, GID, MTIME
     818           0 :         append_int(&header[100], file.get_mode(),   7, 8, '0');
     819           0 :         append_int(&header[108], file.get_uid(),    7, 8, '0');
     820           0 :         append_int(&header[116], file.get_gid(),    7, 8, '0');
     821           0 :         append_int(&header[136], static_cast<int>(file.get_mtime()), 11, 8, '0');
     822             : 
     823             :         // USER/GROUP NAMES
     824           0 :         utf8 = file.get_user().toUtf8();
     825           0 :         if(utf8.length() > 32)
     826             :         {
     827           0 :             throw compression_exception_not_compatible("this file cannot be added to a tar archive at this point (user name too long)");
     828             :         }
     829           0 :         std::copy(utf8.data(), utf8.data() + utf8.size(), header.begin() + 265);
     830             : 
     831           0 :         utf8 = file.get_group().toUtf8();
     832           0 :         if(utf8.length() > 32)
     833             :         {
     834           0 :             throw compression_exception_not_compatible("this file cannot be added to a tar archive at this point (group name too long)");
     835             :         }
     836           0 :         std::copy(utf8.data(), utf8.data() + utf8.size(), header.begin() + 265);
     837             : 
     838             :         // TYPE, SIZE
     839           0 :         switch(file.get_type())
     840             :         {
     841           0 :         case file_t::type_t::FILE_TYPE_REGULAR:
     842           0 :             header[156] = '0'; // regular (tar type)
     843           0 :             append_int(&header[124], file.get_data().size(), 11, 8, '0');
     844           0 :             break;
     845             : 
     846           0 :         case file_t::type_t::FILE_TYPE_DIRECTORY:
     847             :             // needs to be zero in ASCII
     848           0 :             header[156] = '5'; // directory (tar type)
     849           0 :             append_int(&header[124], 0, 11, 8, '0');
     850           0 :             break;
     851             : 
     852             :         //default: ... we could throw but here the compile fails if we
     853             :         //             are missing some types
     854             :         }
     855             : 
     856           0 :         uint32_t checksum(tar_check_sum(&header[0]));
     857           0 :         if(checksum > 32767)
     858             :         {
     859             :             // no null in this case (very rare if at all possible)
     860           0 :             append_int(&header[148], checksum, 7, 8, '0');
     861             :         }
     862             :         else
     863             :         {
     864           0 :             append_int(&header[148], checksum, 6, 8, '0');
     865             :         }
     866           0 :         header[155] = ' ';
     867             : 
     868           0 :         f_archive.append(&header[0], static_cast<int>(header.size()));
     869             : 
     870           0 :         switch(file.get_type())
     871             :         {
     872           0 :         case file_t::type_t::FILE_TYPE_REGULAR:
     873           0 :             f_archive.append(file.get_data());
     874             :             {
     875             :                 // padding to next 512 bytes
     876           0 :                 uint32_t size(file.get_data().size());
     877           0 :                 size &= 511;
     878           0 :                 if(size != 0)
     879             :                 {
     880           0 :                     std::vector<char> pad;
     881           0 :                     pad.resize(512 - size, 0);
     882           0 :                     f_archive.append(&pad[0], static_cast<int>(pad.size()));
     883             :                 }
     884             :             }
     885           0 :             break;
     886             : 
     887           0 :         default:
     888             :             // no data for that type
     889           0 :             break;
     890             : 
     891             :         }
     892           0 :     }
     893             : 
     894           0 :     virtual bool next_file(file_t & file) const
     895             :     {
     896             :         // any more files?
     897             :         // (make sure there is at least a header for now)
     898           0 :         if(f_pos + 512 > f_archive.size())
     899             :         {
     900           0 :             return false;
     901             :         }
     902             : 
     903             :         // read the header
     904           0 :         std::vector<char> header(f_archive.data() + f_pos, f_archive.data() + f_pos + 512);
     905             : 
     906             :         // MAGIC
     907           0 :         if(header[257] != 'u' || header[258] != 's' || header[259] != 't' || header[260] != 'a'
     908           0 :         || header[261] != 'r' || (header[262] != ' ' && header[262] != '\0'))
     909             :         {
     910             :             // if no MAGIC we may have empty blocks (which are legal at the
     911             :             // end of the file)
     912           0 :             for(int i(0); i < 512; ++i)
     913             :             {
     914           0 :                 if(header[i] != '\0')
     915             :                 {
     916           0 :                     throw compression_exception_not_compatible(QString("ustar magic code missing at position %1").arg(f_pos));
     917             :                 }
     918             :             }
     919           0 :             f_pos += 512;
     920             :             // TODO: test all the following blocks as they all should be null
     921             :             //       (as you cannot find an empty block within the tarball)
     922           0 :             return false;
     923             :         }
     924             : 
     925           0 :         uint32_t const file_checksum(read_int(&header[148], 8, 8));
     926           0 :         uint32_t const comp_checksum(tar_check_sum(&header[0]));
     927           0 :         if(file_checksum != comp_checksum)
     928             :         {
     929           0 :             throw compression_exception_not_compatible(QString("ustar checksum code does not match what was expected"));
     930             :         }
     931             : 
     932           0 :         QString filename(QString::fromUtf8(&header[0], static_cast<int>(strnlen(&header[0], 100))));
     933           0 :         if(header[345] != '\0')
     934             :         {
     935             :             // this one has a prefix (long filename)
     936           0 :             QString prefix(QString::fromUtf8(&header[345], static_cast<int>(strnlen(&header[345], 155))));
     937           0 :             if(prefix.endsWith("/"))
     938             :             {
     939             :                 // I think this case is considered a bug in a tarball...
     940           0 :                 filename = prefix + filename;
     941             :             }
     942             :             else
     943             :             {
     944           0 :                 filename = prefix + "/" + filename;
     945             :             }
     946             :         }
     947           0 :         file.set_filename(filename);
     948             : 
     949           0 :         switch(header[156])
     950             :         {
     951           0 :         case '\0':
     952             :         case '0':
     953           0 :             file.set_type(file_t::type_t::FILE_TYPE_REGULAR);
     954           0 :             break;
     955             : 
     956           0 :         case '5':
     957           0 :             file.set_type(file_t::type_t::FILE_TYPE_DIRECTORY);
     958           0 :             break;
     959             : 
     960             : 
     961           0 :         default:
     962           0 :             throw compression_exception_not_supported("file in tarball not supported (we accept regular and directory files only)");
     963             : 
     964             :         }
     965             : 
     966           0 :         file.set_mode (read_int(&header[100],  8, 8));
     967           0 :         file.set_mtime(read_int(&header[136], 12, 8));
     968             : 
     969           0 :         uid_t uid(read_int(&header[108],  8, 8));
     970           0 :         file.set_user (QString::fromUtf8(&header[265], 32), uid);
     971             : 
     972           0 :         gid_t gid(read_int(&header[116],  8, 8));
     973           0 :         file.set_group(QString::fromUtf8(&header[297], 32), gid);
     974             : 
     975           0 :         f_pos += 512;
     976             : 
     977           0 :         if(file.get_type() == file_t::type_t::FILE_TYPE_REGULAR)
     978             :         {
     979           0 :             uint32_t const size(read_int(&header[124], 12, 8));
     980           0 :             int const total_size((size + 511) & -512);
     981           0 :             if(f_pos + total_size > f_archive.size())
     982             :             {
     983           0 :                 throw compression_exception_not_supported("file data not available (archive too small)");
     984             :             }
     985           0 :             QByteArray data;
     986           0 :             data.append(f_archive.data() + f_pos, size);
     987           0 :             file.set_data(data);
     988             : 
     989           0 :             f_pos += total_size;
     990             :         }
     991             : 
     992           0 :         return true;
     993             :     }
     994             : 
     995           0 :     virtual void rewind_file()
     996             :     {
     997           0 :         f_pos = 0;
     998           0 :     }
     999             : 
    1000             : private:
    1001           0 :     void append_int(char * header, int value, unsigned int length, int base, char fill)
    1002             :     {
    1003             :         // save the number (minimum 1 digit)
    1004           0 :         do
    1005             :         {
    1006             :             // base is 8 or 10
    1007           0 :             header[length] = static_cast<char>((value % base) + '0');
    1008           0 :             value /= base;
    1009           0 :             --length;
    1010             :         }
    1011           0 :         while((length > 0) && (value != 0));
    1012             : 
    1013             :         // fill the left side with 'fill'
    1014           0 :         while(length > 0)
    1015             :         {
    1016           0 :             header[length] = fill;
    1017           0 :             --length;
    1018             :         }
    1019           0 :     }
    1020             : 
    1021           0 :     uint32_t read_int(char const * header, int length, int base) const
    1022             :     {
    1023             :         // TODO: add tests
    1024           0 :         uint32_t result(0);
    1025           0 :         for(; length > 0 && *header != '\0' && *header != ' '; --length, ++header)
    1026             :         {
    1027           0 :             result = result * base + (*header - '0');
    1028             :         }
    1029           0 :         return result;
    1030             :     }
    1031             : 
    1032           0 :     uint32_t tar_check_sum(char const * s) const
    1033             :     {
    1034           0 :         uint32_t result = 8 * ' '; // the checksum field
    1035             : 
    1036             :         // name + mode + uid + gid + size + mtime = 148 bytes
    1037           0 :         for(int n(148); n > 0; --n, ++s)
    1038             :         {
    1039           0 :             result += *s;
    1040             :         }
    1041             : 
    1042           0 :         s += 8; // skip the checksum field
    1043             : 
    1044             :         // everything after the checksum is another 356 bytes
    1045           0 :         for(int n(356); n > 0; --n, ++s)
    1046             :         {
    1047           0 :             result += *s;
    1048             :         }
    1049             : 
    1050           0 :         return result;
    1051             :     }
    1052             : 
    1053             :     mutable int32_t   f_pos = 0;
    1054           2 : } g_tar; // declare statically
    1055             : 
    1056             : 
    1057             : 
    1058             : } // namespace snap
    1059           6 : } // namespace compression
    1060             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13