Line data Source code
1 : /*
2 : Zipios -- a small C++ library that provides easy access to .zip files.
3 :
4 : Copyright (C) 2000-2007 Thomas Sondergaard
5 : Copyright (c) 2015-2022 Made to Order Software Corp. All Rights Reserved
6 :
7 : This library is free software; you can redistribute it and/or
8 : modify it under the terms of the GNU Lesser General Public
9 : License as published by the Free Software Foundation; either
10 : version 2.1 of the License, or (at your option) any later version.
11 :
12 : This library is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : Lesser General Public License for more details.
16 :
17 : You should have received a copy of the GNU Lesser General Public
18 : License along with this library; if not, write to the Free Software
19 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 : */
21 :
22 : /** \file
23 : * \brief Implementation of zipios::DeflateOutputStreambuf.
24 : *
25 : * This is the counterpart of the zipios::InflateInputStreambuf.
26 : */
27 :
28 : #include "deflateoutputstreambuf.hpp"
29 :
30 : #include "zipios/zipiosexceptions.hpp"
31 :
32 : #include "zipios_common.hpp"
33 :
34 :
35 : namespace zipios
36 : {
37 :
38 : /** \class DeflateOutputStreambuf
39 : * \brief A class to handle stream deflate on the fly.
40 : *
41 : * DeflateOutputStreambuf is an output stream filter, that deflates
42 : * the data that is written to it before it passes it on to the
43 : * output stream it is attached to. Deflation/Inflation is a
44 : * compression/decompression method used in gzip and zip. The zlib
45 : * library is used to perform the actual deflation, this class only
46 : * wraps the functionality in an output stream filter.
47 : */
48 :
49 :
50 : /** \brief Initialize a DeflateOutputStreambuf object.
51 : *
52 : * This function initializes the DeflateOutputStreambuf object to make it
53 : * ready for compressing data using the zlib library.
54 : *
55 : * \param[in,out] outbuf The streambuf to use for output.
56 : */
57 257 : DeflateOutputStreambuf::DeflateOutputStreambuf(std::streambuf * outbuf)
58 : : FilterOutputStreambuf(outbuf)
59 257 : , m_invec(getBufferSize())
60 514 : , m_outvec(getBufferSize())
61 : {
62 : // NOTICE: It is important that this constructor and the methods it
63 : // calls do not do anything with the output streambuf m_outbuf.
64 : // The reason is that this class can be sub-classed, and the
65 : // sub-class should get a chance to write to the buffer first.
66 257 : }
67 :
68 :
69 : /** \brief Clean up any resources used by this object.
70 : *
71 : * The destructor makes sure that the zlib library is done with all
72 : * the input and output data by calling various flush functions. It
73 : * then makes sure that the remaining data from zlib is printed in
74 : * the output file.
75 : *
76 : * This is similar to calling closeStream() explicitly.
77 : */
78 257 : DeflateOutputStreambuf::~DeflateOutputStreambuf()
79 : {
80 257 : closeStream();
81 257 : }
82 :
83 :
84 : /** \brief Initialize the zlib library.
85 : *
86 : * This method is called in the constructor, so it must not write
87 : * anything to the output streambuf m_outbuf (see notice in
88 : * constructor.)
89 : *
90 : * It will initialize the output stream as required to accept data
91 : * to be compressed using the zlib library. The compression level
92 : * is expected to come from the FileEntry which is about to be
93 : * saved in the file.
94 : *
95 : * \param[in] compression_level The level of compression. A number from 1 to
96 : * 100 or a special number representing the best, minimum, maximum compression
97 : * available.
98 : *
99 : * \return true if the initialization succeeded, false otherwise.
100 : */
101 49411 : bool DeflateOutputStreambuf::init(FileEntry::CompressionLevel compression_level)
102 : {
103 49411 : if(m_zs_initialized)
104 : {
105 : // This is excluded from the coverage since if we reach this
106 : // line there is an internal error that needs to be fixed.
107 : throw std::logic_error("DeflateOutputStreambuf::init(): initialization function called when the class is already initialized. This is not supported."); // LCOV_EXCL_LINE
108 : }
109 49411 : m_zs_initialized = true;
110 :
111 49411 : int const default_mem_level(8);
112 :
113 49411 : int zlevel(Z_NO_COMPRESSION);
114 49411 : switch(compression_level)
115 : {
116 477 : case FileEntry::COMPRESSION_LEVEL_DEFAULT:
117 477 : zlevel = Z_DEFAULT_COMPRESSION;
118 477 : break;
119 :
120 477 : case FileEntry::COMPRESSION_LEVEL_SMALLEST:
121 477 : zlevel = Z_BEST_COMPRESSION;
122 477 : break;
123 :
124 477 : case FileEntry::COMPRESSION_LEVEL_FASTEST:
125 477 : zlevel = Z_BEST_SPEED;
126 477 : break;
127 :
128 0 : case FileEntry::COMPRESSION_LEVEL_NONE:
129 : throw std::logic_error("the compression level NONE is not supported in DeflateOutputStreambuf::init()"); // LCOV_EXCL_LINE
130 :
131 47980 : default:
132 47980 : if(compression_level < FileEntry::COMPRESSION_LEVEL_MINIMUM
133 47980 : || compression_level > FileEntry::COMPRESSION_LEVEL_MAXIMUM)
134 : {
135 : // This is excluded from the coverage since if we reach this
136 : // line there is an internal error that needs to be fixed.
137 : throw std::logic_error("the compression level must be defined between -3 and 100, see the zipios/fileentry.hpp for a list of valid levels."); // LCOV_EXCL_LINE
138 : }
139 : // The zlevel is calculated linearly from the user specified value
140 : // of 1 to 100
141 : //
142 : // The calculation goes as follow:
143 : //
144 : // x = user specified value - 1 (0 to 99)
145 : // x = x * 8 (0 to 792)
146 : // x = x + 11 / 2 (5 to 797, i.e. +5 with integers)
147 : // x = x / 99 (0 to 8)
148 : // x = x + 1 (1 to 9)
149 : //
150 47980 : zlevel = ((compression_level - 1) * 8 + 11 / 2) / 99 + 1;
151 47980 : break;
152 :
153 : }
154 :
155 : // m_zs.next_in and avail_in must be set according to
156 : // zlib.h (inline doc).
157 49411 : m_zs.next_in = reinterpret_cast<unsigned char *>(&m_invec[0]);
158 49411 : m_zs.avail_in = 0;
159 :
160 49411 : m_zs.next_out = reinterpret_cast<unsigned char *>(&m_outvec[0]);
161 49411 : m_zs.avail_out = getBufferSize();
162 :
163 : //
164 : // windowBits is passed -MAX_WBITS to tell that no zlib
165 : // header should be written.
166 : //
167 49411 : int const err = deflateInit2(&m_zs, zlevel, Z_DEFLATED, -MAX_WBITS, default_mem_level, Z_DEFAULT_STRATEGY);
168 49411 : if(err != Z_OK)
169 : {
170 : // Not too sure how we could generate an error here, the deflateInit2()
171 : // would fail if (1) there is not enough memory and (2) if a parameter
172 : // is out of wack which neither can be generated from the outside
173 : // (well... not easily)
174 : std::ostringstream msgs; // LCOV_EXCL_LINE
175 : msgs << "DeflateOutputStreambuf::init(): error while initializing zlib, " << zError(err) << std::endl; // LCOV_EXCL_LINE
176 : throw IOException(msgs.str()); // LCOV_EXCL_LINE
177 0 : }
178 :
179 : // streambuf init:
180 49411 : setp(&m_invec[0], &m_invec[0] + getBufferSize());
181 :
182 49411 : m_crc32 = crc32(0, Z_NULL, 0);
183 :
184 49411 : return err == Z_OK;
185 : }
186 :
187 :
188 : /** \brief Closing the stream.
189 : *
190 : * This function is expected to be called once the stream is getting
191 : * closed (the buffer is destroyed.)
192 : *
193 : * It ensures that the zlib library last few bytes get flushed and
194 : * then mark the class as closed.
195 : *
196 : * Note that this function can be called to close the current zlib
197 : * library stream and start a new one. It is actually called from
198 : * the putNextEntry() function (via the closeEntry() function.)
199 : */
200 49668 : void DeflateOutputStreambuf::closeStream()
201 : {
202 49668 : if(m_zs_initialized)
203 : {
204 49411 : m_zs_initialized = false;
205 :
206 : // flush any remaining data
207 49411 : endDeflation();
208 :
209 49411 : int const err(deflateEnd(&m_zs));
210 49411 : if(err != Z_OK) // when we close a directory, we get the Z_DATA_ERROR!
211 : {
212 : // There are not too many cases which break the deflateEnd()
213 : // function call...
214 : std::ostringstream msgs; // LCOV_EXCL_LINE
215 : msgs << "DeflateOutputStreambuf::closeStream(): deflateEnd failed: " << zError(err) << std::endl; // LCOV_EXCL_LINE
216 : throw IOException(msgs.str()); // LCOV_EXCL_LINE
217 0 : }
218 : }
219 49668 : }
220 :
221 :
222 : /** \brief Get the CRC32 of the file.
223 : *
224 : * This function returns the CRC32 for the current file.
225 : *
226 : * The returned value is the CRC for the data that has been compressed
227 : * already (due to calls to overflow()). As DeflateOutputStreambuf may
228 : * buffer an arbitrary amount of bytes until closeStream() has been
229 : * invoked, the returned value is not very useful before closeStream()
230 : * has been called.
231 : *
232 : * \return The CRC32 of the last file that was passed through.
233 : */
234 121750 : uint32_t DeflateOutputStreambuf::getCrc32() const
235 : {
236 121750 : return m_crc32;
237 : }
238 :
239 :
240 : /** \brief Retrieve the size of the file deflated.
241 : *
242 : * This function returns the number of bytes written to the
243 : * streambuf object and that were processed from the input
244 : * buffer by the compressor. After closeStream() has been
245 : * called this number is the total number of bytes written
246 : * to the stream. In other words, the size of the uncompressed
247 : * data.
248 : *
249 : * \return The uncompressed size of the file that got written here.
250 : */
251 121750 : size_t DeflateOutputStreambuf::getSize() const
252 : {
253 121750 : return m_overflown_bytes;
254 : }
255 :
256 :
257 : /** \brief Handle an overflow.
258 : *
259 : * This function is called by the streambuf implementation whenever
260 : * "too many bytes" are in the output buffer, ready to be compressed.
261 : *
262 : * \exception IOException
263 : * This exception is raised whenever the overflow() function calls
264 : * a zlib library function which returns an error.
265 : *
266 : * \param[in] c The character (byte) that overflowed the buffer.
267 : *
268 : * \return Always zero (0).
269 : */
270 337101 : int DeflateOutputStreambuf::overflow(int c)
271 : {
272 337101 : int err(Z_OK);
273 :
274 337101 : m_zs.avail_in = pptr() - pbase();
275 337101 : m_zs.next_in = reinterpret_cast<unsigned char *>(&m_invec[0]);
276 :
277 337101 : if(m_zs.avail_in > 0)
278 : {
279 337101 : m_crc32 = crc32(m_crc32, m_zs.next_in, m_zs.avail_in); // update crc32
280 :
281 337101 : m_zs.next_out = reinterpret_cast<unsigned char *>(&m_outvec[0]);
282 337101 : m_zs.avail_out = getBufferSize();
283 :
284 : // Deflate until m_invec is empty.
285 937514 : while((m_zs.avail_in > 0 || m_zs.avail_out == 0) && err == Z_OK)
286 : {
287 600413 : if(m_zs.avail_out == 0)
288 : {
289 263312 : flushOutvec();
290 : }
291 :
292 600413 : err = deflate(&m_zs, Z_NO_FLUSH);
293 : }
294 : }
295 :
296 : // somehow we need this flush here or it fails
297 337101 : flushOutvec();
298 :
299 : // Update 'put' pointers
300 337101 : setp(&m_invec[0], &m_invec[0] + getBufferSize());
301 :
302 337101 : if(err != Z_OK && err != Z_STREAM_END)
303 : {
304 : // Throw an exception to make istream set badbit
305 : //
306 : // This is marked as not cover-able by tests because the calls
307 : // that access this function only happen in an internal loop and
308 : // even if we were to write a direct test, I do not see how
309 : // we could end up with an error here
310 : OutputStringStream msgs; // LCOV_EXCL_LINE
311 : msgs << "Deflation failed:" << zError(err); // LCOV_EXCL_LINE
312 : throw IOException(msgs.str()); // LCOV_EXCL_LINE
313 0 : }
314 :
315 337101 : if(c != EOF)
316 : {
317 287690 : *pptr() = c;
318 287690 : pbump(1);
319 : }
320 :
321 337101 : return 0;
322 : }
323 :
324 :
325 : /** \brief Synchronize the buffer.
326 : *
327 : * The sync() function is expected to clear the input buffer so that
328 : * any new data read from the input (i.e. a file) are re-read from
329 : * disk. However, a call to sync() could break the filtering
330 : * functionality so we do not implement it at all.
331 : *
332 : * This means you are stuck with the existing buffer. But to make
333 : * sure the system understands that, we always returns -1.
334 : */
335 : int DeflateOutputStreambuf::sync() // LCOV_EXCL_LINE
336 : {
337 : return -1; // LCOV_EXCL_LINE
338 : }
339 :
340 :
341 : /** \brief Flush the cached output data.
342 : *
343 : * This function flushes m_outvec and updates the output pointer
344 : * and size m_zs.next_out and m_zs.avail_out.
345 : */
346 673882 : void DeflateOutputStreambuf::flushOutvec()
347 : {
348 : /** \TODO
349 : * We need to redesign the class to allow for STORED files to
350 : * flow through without the need to have this crap of bytes to
351 : * skip...
352 : */
353 673882 : std::size_t const deflated_bytes(getBufferSize() - m_zs.avail_out);
354 673882 : if(deflated_bytes > 0)
355 : {
356 468437 : std::size_t const bc(m_outbuf->sputn(&m_outvec[0], deflated_bytes));
357 468437 : if(deflated_bytes != bc)
358 : {
359 : // Without implementing our own stream in our test, this
360 : // cannot really be reached because it is all happening
361 : // inside the same loop in ZipFile::saveCollectionToArchive()
362 : throw IOException("DeflateOutputStreambuf::flushOutvec(): write to buffer failed."); // LCOV_EXCL_LINE
363 : }
364 : }
365 :
366 673882 : m_zs.next_out = reinterpret_cast<unsigned char *>(&m_outvec[0]);
367 673882 : m_zs.avail_out = getBufferSize();
368 673882 : }
369 :
370 :
371 : /** \brief End deflation of current file.
372 : *
373 : * This function flushes the remaining data in the zlib buffers,
374 : * after which the only possible operations are deflateEnd() or
375 : * deflateReset().
376 : */
377 49411 : void DeflateOutputStreambuf::endDeflation()
378 : {
379 49411 : overflow();
380 :
381 49411 : m_zs.next_out = reinterpret_cast<unsigned char *>(&m_outvec[0]);
382 49411 : m_zs.avail_out = getBufferSize();
383 :
384 : // Deflate until _invec is empty.
385 49411 : int err(Z_OK);
386 :
387 : // make sure to NOT call deflate() if nothing was written to the
388 : // deflate output stream, otherwise we get a "spurious" (as far
389 : // Zip archives are concerned) 0x03 0x00 marker from the zlib
390 : // library
391 : //
392 49411 : if(m_overflown_bytes > 0)
393 : {
394 122880 : while(err == Z_OK)
395 : {
396 73469 : if(m_zs.avail_out == 0)
397 : {
398 24058 : flushOutvec();
399 : }
400 :
401 73469 : err = deflate(&m_zs, Z_FINISH);
402 : }
403 : }
404 : else
405 : {
406 : // this is not expected to happen, but it can
407 : err = Z_STREAM_END; // LCOV_EXCL_LINE
408 : }
409 :
410 49411 : flushOutvec();
411 :
412 49411 : if(err != Z_STREAM_END)
413 : {
414 : // This is marked as not cover-able because the calls that
415 : // access this function only happen in an internal loop and
416 : // even if we were to write a direct test, I do not see how
417 : // we could end up with an error here
418 : std::ostringstream msgs; // LCOV_EXCL_LINE
419 : msgs << "DeflateOutputStreambuf::endDeflation(): deflate() failed: " // LCOV_EXCL_LINE
420 : << zError(err) << std::endl; // LCOV_EXCL_LINE
421 : throw IOException(msgs.str()); // LCOV_EXCL_LINE
422 0 : }
423 49411 : }
424 :
425 :
426 : } // namespace
427 :
428 : // Local Variables:
429 : // mode: cpp
430 : // indent-tabs-mode: nil
431 : // c-basic-offset: 4
432 : // tab-width: 4
433 : // End:
434 :
435 : // vim: ts=4 sw=4 et
|