Line data Source code
1 : /*
2 : Zipios -- a small C++ library that provides easy access to .zip files.
3 :
4 : Copyright (C) 2000-2007 Thomas Sondergaard
5 : Copyright (c) 2015-2022 Made to Order Software Corp. All Rights Reserved
6 :
7 : This library is free software; you can redistribute it and/or
8 : modify it under the terms of the GNU Lesser General Public
9 : License as published by the Free Software Foundation; either
10 : version 2.1 of the License, or (at your option) any later version.
11 :
12 : This library is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : Lesser General Public License for more details.
16 :
17 : You should have received a copy of the GNU Lesser General Public
18 : License along with this library; if not, write to the Free Software
19 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 : */
21 :
22 : /** \file
23 : * \brief The implementation of zipios::ZipFile.
24 : *
25 : * This file contains the high level functions used to read or write
26 : * a Zip archive file.
27 : */
28 :
29 : #include "zipios/zipfile.hpp"
30 :
31 : #include "zipios/streamentry.hpp"
32 : #include "zipios/zipiosexceptions.hpp"
33 :
34 : #include "backbuffer.hpp"
35 : #include "zipendofcentraldirectory.hpp"
36 : #include "zipcentraldirectoryentry.hpp"
37 : #include "zipinputstream.hpp"
38 : #include "zipoutputstream.hpp"
39 :
40 : #include <fstream>
41 :
42 :
43 : /** \brief The zipios namespace includes the Zipios library definitions.
44 : *
45 : * This namespace is used to clearly separate all the Zipios definitions.
46 : * Note that a very few definitions are found outside of the namespace.
47 : * Some of those are hidden in the source of the library, a very few
48 : * appear in the zipios-config.hpp file as they are used to support
49 : * Zipios on any platform.
50 : *
51 : * Note that to ensure secure development, we do not make use of the
52 : * C++ "using ..." keyword. That way we can guarantee what's what.
53 : */
54 : namespace zipios
55 : {
56 :
57 :
58 : /** \mainpage Zipios
59 : *
60 : * \image html zipios.jpg
61 : *
62 : * \section intro Introduction
63 : *
64 : * Zipios is a java.util.zip-like C++ library for reading and
65 : * writing Zip files (ZipFile). Access to individual entries is
66 : * provided through a Zipios class (FileEntry) for the meta data
67 : * of the and a standard C++ iostreams for the contents of the file.
68 : *
69 : * A simple virtual file system that mounts regular directories and
70 : * zip files is also provided (FileCollection).
71 : *
72 : * The library is fully re-entrant. It is not otherwise thread safe.
73 : *
74 : * The source code is released under the <a
75 : * href="http://www.gnu.org/copyleft/lesser.html">GNU Lesser General Public
76 : * License</a>.
77 : *
78 : * \section status Status
79 : *
80 : * This was the status of version 1.x. At this point, 2.x has a brand new
81 : * version out and we are waiting for good news about the current status.
82 : * That being said, version 2.x comes a test suite which produces a
83 : * 100% coverage of the library (except gzip which is not yet publicly
84 : * available.)
85 : *
86 : * \warning
87 : * There is a bug in the catch.hpp header file that generates a never
88 : * ending loop (see https://github.com/philsquared/Catch/issues/271 for
89 : * more information) when running the test suite under FreeBSD and an error
90 : * occurs (although you should not have an error, if it happens, then
91 : * the loop never ends.) I have noticed that problem with the following
92 : * scenario, and it does not seem to be fixed yet (Apr 4, 2015):
93 : *
94 : * \li "use Zipios to create zip archives with 1 or 3 files each"
95 : *
96 : * Spanned archives are not supported, and support is not planned.
97 : *
98 : * The library v1.x has been tested and appears to be working with:
99 : *
100 : * \li <a href="http://www.freebsd.org/ports/archivers.html#zipios++-0.1.5">FreeBSD stable and current / gcc 2.95.3</a>
101 : * \li Red Hat Linux release 7.0 / gcc 2.96
102 : * \li Red Hat Linux release 6.2 (Zoot) / egcs-2.91.66
103 : * \li Linux Mandrake release 7.0 (Air) / gcc 2.95.2
104 : * \li SGI IRIX64 6.5 / gcc 2.95.2
105 : * \li SGI IRIX64 6.5 / MIPSpro Compilers: Version 7.30
106 : *
107 : * The library v2.x has been compiled and appears to be working with:
108 : *
109 : * \li Ubuntu (starting with 14.04) -- full test suite working
110 : * \li Debian (starting with Stretch)
111 : * \li Fedora (starting with F25)
112 : * \li FreeBSD (starting with 10.01)
113 : * \li SunOS (starting with Open SunOS 11.2)
114 : * \li Cygwin (starting with 6.1)
115 : *
116 : * If you make Zipios work on other platforms, let us know by posting
117 : * an issue on GitHub:
118 : *
119 : * https://github.com/Zipios/Zipios/issues
120 : *
121 : *
122 : * \section documentation Documentation
123 : *
124 : * This web page is the front page to the library documentation which
125 : * is generated from the source files using <a
126 : * href="http://www.stack.nl/~dimitri/doxygen/index.html">Doxygen</a>.
127 : * Use the links at the top of the page to browse the API
128 : * documentation. Your Doxygen installation may also be capable
129 : * of generating other formats (Latex, PDF, etc.) if you would
130 : * prefer such (we only offer the HTML documentation.)
131 : *
132 : * \subsection zipfiles Zip file access
133 : *
134 : * The two most important classes are DirectoryCollection and ZipFile.
135 : *
136 : * A ZipFile is also a FileCollection, only the collection is loaded
137 : * from a Zip archive instead of a directory. A ZipFile is composed of
138 : * ZipCentralDirectoryEntry objects. As far as you are concerned though,
139 : * you can only use it as FileEntry objects.
140 : *
141 : * Note that the ZipFile constructor immediately scans the Central
142 : * Directory of the Zip archive so the entries are immediately accessible.
143 : *
144 : * The DirectoryCollection can be created one file at a time, so it is
145 : * possible to create a collection without having to include all the
146 : * files from a directory. However, the files still have to exist on
147 : * disk. The DirectoryCollection is composed of DirectoryEntry objects.
148 : *
149 : * To access the entries in a collection, use the entries() function
150 : * which returns a vector of FileEntry objects. If you know the exact
151 : * filename of an entry, you may also use the getEntry() with that name.
152 : * This is particularly useful if you want to use Zipios as a way to
153 : * handle the resources of your executable (see the openEmbeddedZipFile()
154 : * function and the appendzip.cpp tool). Finally, you want to use
155 : * the getInputStream() function to read the data of a file defined in
156 : * a collection.
157 : *
158 : * \code
159 : * // Resources global pointer
160 : * zipios::ZipFile::pointer_t g_resources;
161 : *
162 : * // Initialization of resources
163 : * g_resources = zipios::ZipFile::openEmbeddedZipFile("executable_filename");
164 : *
165 : * // Anywhere else in your application
166 : *
167 : * // 1. get the entry (to access meta data)
168 : * zipios::FileEntry::pointer_t entry(g_resources->getEntry("my/resource/file.xml"));
169 : *
170 : * // 2. get the istream (to access the actual file data)
171 : * zipios::FileCollection::stream_pointer_t in_stream(g_resources->getInputStream("my/resource/file.xml"));
172 : * \endcode
173 : *
174 : * zipios_example.cpp demonstrates the central elements of Zipios when used
175 : * in read mode.
176 : *
177 : * \subsection filecollection FileCollection
178 : *
179 : * A ZipFile is actually just a special kind of FileCollection that
180 : * obtains its entries from a Zip archive. Zipios also implements
181 : * a DirectoryCollection that obtains its entries from an on disk
182 : * directory and a CollectionCollection that obtains its entries from
183 : * other collections.
184 : *
185 : * Using a single CollectionCollection, any number of other FileCollection's
186 : * can be placed under its control and accessed through the same single
187 : * interface that is used to access a ZipFile or a DirectoryCollection.
188 : *
189 : * \warning
190 : * The CollectionCollection singleton in version 1.x was removed to make
191 : * the entire library 100% re-entrant without the need to link against
192 : * a thread library.
193 : *
194 : * \section download Download
195 : *
196 : * The <a href="https://github.com/Zipios/Zipios">Zipios project</a> is now
197 : * on github. You can find the source files, tarballs and changelog files
198 : * there.
199 : *
200 : * \section development Development
201 : *
202 : * The Zipios project makes use of a few development tools and the
203 : * tests require the zip utility, used to verify that an external tool
204 : * can generate a zip file that Zipios can read.
205 : *
206 : * Under a Debian or Ubuntu system, you can run apt-get install with
207 : * the following list of packages:
208 : *
209 : * \code
210 : * # For source management (or download the .tar.gz file)
211 : * apt-get install git
212 : *
213 : * # For development
214 : * apt-get install g++ cmake zlib1g-dev
215 : *
216 : * # For documentation (or download the -doc.tar.gz file)
217 : * apt-get install doxygen graphviz
218 : *
219 : * # For testing (optional, albeit recommended)
220 : * apt-get install catch zip
221 : * \endcode
222 : *
223 : * Under Fedora, you can use dnf (or yum for early versions):
224 : *
225 : * \code
226 : * # For source management (or download the .tar.gz file)
227 : * dnf install git
228 : *
229 : * # For development
230 : * dnf install gcc-c++ cmake zlib-devel
231 : *
232 : * # For documentation (or download the -doc.tar.gz file)
233 : * dnf install doxygen graphviz
234 : *
235 : * # For testing (optional, albeit recommended)
236 : * dnf install catch-devel zip
237 : * \endcode
238 : *
239 : *
240 : * Other systems use tools with pretty much the same naming convention
241 : * so you should be able to make the correlation.
242 : *
243 : * \section history History
244 : *
245 : * <a href="http://www.zlib.net/">zlib</a>.
246 : * The compression library that Zipios uses to perform the actual
247 : * compression and decompression.
248 : *
249 : * Zipios version 1.0 is heavily inspired by the java.util.zip package.
250 : * Version 2.0 is following the same philosophy without (1) attempting
251 : * to follow the interface one to one and (2) without updating to the
252 : * newer version, if there were changes...
253 : *
254 : * You will find a text file in the doc directory named zip-format.txt
255 : * with a complete description of the zip file format as of October 1, 2014.
256 : *
257 : * \section bugs Bugs
258 : *
259 : * <a href="https://github.com/Zipios/Zipios/issues">Submit bug reports
260 : * and patches on github.</a>
261 : *
262 : * \section source Source
263 : *
264 : * The <a href="https://github.com/Zipios/Zipios">source code is available
265 : * on github</a>.
266 : */
267 :
268 :
269 : /** \class ZipFile
270 : * \brief The ZipFile class represents a collection of files.
271 : *
272 : * ZipFile is a FileCollection, where the files are stored
273 : * in a .zip file.
274 : */
275 :
276 :
277 :
278 : /** \brief Open a zip archive that was previously appended to another file.
279 : *
280 : * Opens a Zip archive embedded in another file, by writing the zip
281 : * archive to the end of the file followed by the start offset of
282 : * the zip file on 4 bytes. The offset must be written in zip-file
283 : * byte-order (little endian).
284 : *
285 : * The program appendzip, which is part of the Zipios distribution can
286 : * be used to append a Zip archive to a file, e.g. a binary program.
287 : *
288 : * The function may throw various exception if the named file does not
289 : * seem to include a valid zip archive attached.
290 : *
291 : * \note
292 : * Only one file can be appended and opened in this way. Although
293 : * the appendzip tool can be used to append any number of files,
294 : * only the last one is accessible.
295 : *
296 : * \exception FileCollectionException
297 : * This exception is raised if the initialization fails. The function verifies
298 : * that the input stream represents what is considered a valid zip file.
299 : *
300 : * \param[in] filename The filename of your executable (generally, argv[0]).
301 : *
302 : * \return A ZipFile that one can use to read compressed data.
303 : */
304 0 : ZipFile::pointer_t ZipFile::openEmbeddedZipFile(std::string const & filename)
305 : {
306 : // open zipfile, read 4 last bytes close file
307 : uint32_t start_offset;
308 : {
309 0 : std::ifstream ifs(filename, std::ios::in | std::ios::binary);
310 0 : ifs.seekg(-4, std::ios::end);
311 0 : zipRead(ifs, start_offset);
312 : // todo: add support for 64 bit (files of more than 4Gb)
313 0 : }
314 :
315 : // create ZipFile object from embedded data
316 0 : return std::make_shared<ZipFile>(filename, start_offset, 4);
317 : }
318 :
319 :
320 : /** \brief Initialize a ZipFile object.
321 : *
322 : * This is the default constructor of the ZipFile object.
323 : *
324 : * Note that an empty ZipFile is marked as invalid. More or less, such
325 : * an object is useless although it is useful to have this constructor
326 : * if you want to work with maps or vectors of ZipFile objects.
327 : */
328 1 : ZipFile::ZipFile()
329 : {
330 1 : }
331 :
332 :
333 : /** \brief Initialize a ZipFile object from an existing file.
334 : *
335 : * This constructor opens the named zip file. If the zip "file" is
336 : * embedded in a file that contains other data, e.g. a binary
337 : * program, the offset of the zip file start and end must be
338 : * specified.
339 : *
340 : * If the file cannot be opened or the Zip directory cannot
341 : * be read, then the constructor throws an exception.
342 : *
343 : * \exception FileCollectionException
344 : * This exception is raised if the initialization fails. The function verifies
345 : * that the input stream represents what is considered a valid zip file.
346 : *
347 : * \param[in] filename The filename of the zip file to open.
348 : * \param[in] s_off Offset relative to the start of the file, that
349 : * indicates the beginning of the zip data in the file.
350 : * \param[in] e_off Offset relative to the end of the file, that
351 : * indicates the end of the zip data in the file.
352 : * The offset is a positive number, even though the
353 : * offset goes toward the beginning of the file.
354 : */
355 369 : ZipFile::ZipFile(std::string const & filename, offset_t s_off, offset_t e_off)
356 : : FileCollection(filename)
357 369 : , m_vs(s_off, e_off)
358 : {
359 369 : std::ifstream zipfile(m_filename, std::ios::in | std::ios::binary);
360 369 : if(!zipfile)
361 : {
362 1 : throw IOException("Error opening Zip archive file for reading in binary mode.");
363 : }
364 :
365 368 : init(zipfile);
366 453 : }
367 :
368 :
369 : /** \brief Initialize a ZipFile object from an istream.
370 : *
371 : * This constructor opens the ZipFile from the specified istream. The
372 : * istream can be in memory or even an Internet stream. However, the
373 : * ZipFile algorithm requires a stream with proper back and forth seek
374 : * capabilities.
375 : *
376 : * If the Zip directory cannot be read, then the constructor throws an
377 : * exception.
378 : *
379 : * \exception FileCollectionException
380 : * This exception is raised if the initialization fails. The function verifies
381 : * that the input stream represents what is considered a valid zip file.
382 : *
383 : * \param[in] is The input stream with the zip file data.
384 : * \param[in] s_off Offset relative to the start of the file, that
385 : * indicates the beginning of the zip data in the file.
386 : * \param[in] e_off Offset relative to the end of the file, that
387 : * indicates the end of the zip data in the file.
388 : * The offset is a positive number, even though the
389 : * offset goes toward the beginning of the file.
390 : */
391 0 : ZipFile::ZipFile(std::istream & is, offset_t s_off, offset_t e_off)
392 0 : : m_vs(s_off, e_off)
393 : {
394 0 : init(is);
395 0 : }
396 :
397 :
398 : /** \brief Initialize the ZipFile from the specified input stream.
399 : *
400 : * This function finishes the initialization of the ZipFile from the
401 : * constructor. It is 100% private.
402 : *
403 : * \exception FileCollectionException
404 : * This exception is raised if the initialization fails. The function verifies
405 : * that the input stream represents what is considered a valid zip file.
406 : *
407 : * \param[in] is The input stream used to read the ZipFile.
408 : */
409 368 : void ZipFile::init(std::istream & is)
410 : {
411 : // Find and read the End of Central Directory.
412 368 : ZipEndOfCentralDirectory eocd;
413 : {
414 368 : BackBuffer bb(is, m_vs);
415 368 : ssize_t read_p(-1);
416 : for(;;)
417 : {
418 9168 : if(read_p < 0)
419 : {
420 391 : if(!bb.readChunk(read_p))
421 : {
422 23 : throw FileCollectionException("Unable to find zip structure: End-of-central-directory");
423 : }
424 : }
425 : // Note: this is pretty fast since it reads from 'bb' which
426 : // caches the buffer the readChunk() function just read.
427 : //
428 9145 : if(eocd.read(bb, read_p))
429 : {
430 : // found it!
431 325 : break;
432 : }
433 8800 : --read_p;
434 : }
435 368 : }
436 :
437 : // Position read pointer to start of first entry in central dir.
438 325 : m_vs.vseekg(is, eocd.getOffset(), std::ios::beg);
439 :
440 : // TBD -- is that ", 0" still necessary? (With VC2012 and better)
441 : // Give the second argument in the next line to keep Visual C++ quiet
442 : //m_entries.resize(eocd.getCount(), 0);
443 325 : m_entries.resize(eocd.getCount());
444 :
445 325 : size_t const max_entry(eocd.getCount());
446 59516 : for(size_t entry_num(0); entry_num < max_entry; ++entry_num)
447 : {
448 59201 : m_entries[entry_num] = std::make_shared<ZipCentralDirectoryEntry>();
449 59201 : m_entries[entry_num].get()->read(is);
450 : }
451 :
452 : // Consistency check #1:
453 : // The virtual seeker position is exactly the start offset of the
454 : // Central Directory plus the Central Directory size
455 : //
456 315 : offset_t const pos(m_vs.vtellg(is));
457 315 : if(static_cast<offset_t>(eocd.getOffset() + eocd.getCentralDirectorySize()) != pos)
458 : {
459 10 : throw FileCollectionException("Zip file consistency problem. Zip file data fields are inconsistent with zip file layout.");
460 : }
461 :
462 : // Consistency check #2:
463 : // Are local headers consistent with CD headers?
464 : //
465 59466 : for(auto it = m_entries.begin(); it != m_entries.end(); ++it)
466 : {
467 : /** \TODO
468 : * Make sure the entry offset is properly defined by
469 : * ZipCentralDirectoryEntry.
470 : *
471 : * Also the isEqual() is a quite advanced (slow) test here!
472 : */
473 59181 : m_vs.vseekg(is, (*it)->getEntryOffset(), std::ios::beg);
474 59181 : ZipLocalEntry zlh;
475 59181 : zlh.read(is);
476 59171 : if(!is || !zlh.isEqual(**it))
477 : {
478 10 : throw FileCollectionException("Zip file consistency problem. Zip file data fields are inconsistent with zip file layout.");
479 : }
480 59181 : }
481 :
482 : // we are all good!
483 285 : m_valid = true;
484 368 : }
485 :
486 :
487 : /** \brief Create a clone of this ZipFile.
488 : *
489 : * This function creates a heap allocated clone of the ZipFile object.
490 : *
491 : * \return A shared pointer to a copy of this ZipFile object.
492 : */
493 1 : FileCollection::pointer_t ZipFile::clone() const
494 : {
495 1 : return FileCollection::pointer_t(std::make_shared<ZipFile>(*this));
496 : }
497 :
498 :
499 : /** \brief Clean up the ZipFile object.
500 : *
501 : * The destructor ensures that any ZipFile data gets flushed
502 : * out before returning.
503 : */
504 287 : ZipFile::~ZipFile()
505 : {
506 287 : close();
507 287 : }
508 :
509 :
510 : /** \brief Retrieve a pointer to a file in the Zip archive.
511 : *
512 : * This function returns a shared pointer to an istream defined from the
513 : * named entry, which gives you access to the corresponding file defined
514 : * in the Zip archive.
515 : *
516 : * The function returns nullptr if there is no entry with the
517 : * specified name in this ZipFile.
518 : *
519 : * Note that the function returns a smart pointer to an istream. The
520 : * ZipFile class does not hold that pointer meaning that
521 : * if you call getInputStream() multiple times with the same
522 : * \p entry_name parameter, you get different istream instance each
523 : * time.
524 : *
525 : * By default the \p entry_name parameter is expected to match the full
526 : * path and filename (MatchPath::MATCH). If you are looking for a file
527 : * and want to ignore the path, set the matchpath parameter
528 : * to MatchPath::IGNORE.
529 : *
530 : * \note
531 : * If the file is compressed inside the Zip archive, this input stream
532 : * returns the uncompressed data transparently to you (outside of the
533 : * time it takes to decompress the data, of course.)
534 : *
535 : * \param[in] entry_name The name of the file to search in the collection.
536 : * \param[in] matchpath Whether the full path or just the filename is matched.
537 : *
538 : * \return A shared pointer to an open istream for the specified entry.
539 : *
540 : * \sa CollectionCollection
541 : * \sa DirectoryCollection
542 : * \sa FileCollection
543 : */
544 53573 : ZipFile::stream_pointer_t ZipFile::getInputStream(std::string const & entry_name, MatchPath matchpath)
545 : {
546 53573 : mustBeValid();
547 :
548 : // TODO: see whether we could make the handling of the StreamEntry
549 : // non-special
550 : //
551 53573 : FileEntry::pointer_t entry(getEntry(entry_name, matchpath));
552 53573 : StreamEntry::pointer_t stream(std::dynamic_pointer_cast<StreamEntry>(entry));
553 53573 : if(stream != nullptr)
554 : {
555 0 : stream_pointer_t zis(std::make_shared<ZipInputStream>(stream->getStream()));
556 0 : return zis;
557 0 : }
558 53573 : else if(entry != nullptr)
559 : {
560 53101 : stream_pointer_t zis(std::make_shared<ZipInputStream>(m_filename, entry->getEntryOffset() + m_vs.startOffset()));
561 53041 : return zis;
562 53041 : }
563 :
564 : // no entry with that name (and match) available
565 512 : return nullptr;
566 53593 : }
567 :
568 :
569 : /** \brief Create a Zip archive from the specified FileCollection.
570 : *
571 : * This function is expected to be used with a DirectoryCollection
572 : * that you created to save the collection in an archive.
573 : *
574 : * \param[in,out] os The output stream where the Zip archive is saved.
575 : * \param[in] collection The collection to save in this output stream.
576 : * \param[in] zip_comment The global comment of the Zip archive.
577 : */
578 257 : void ZipFile::saveCollectionToArchive(
579 : std::ostream & os
580 : , FileCollection & collection
581 : , std::string const & zip_comment)
582 : {
583 : try
584 : {
585 257 : ZipOutputStream output_stream(os);
586 :
587 257 : output_stream.setComment(zip_comment);
588 :
589 257 : FileEntry::vector_t entries(collection.entries());
590 122007 : for(auto it(entries.begin()); it != entries.end(); ++it)
591 : {
592 121752 : output_stream.putNextEntry(*it);
593 :
594 : // next we need to include the data of that file in the
595 : // output buffer if it is not a directory and the file is
596 : // not an empty file
597 : //
598 121750 : if(!(*it)->isDirectory()
599 121750 : && (*it)->getSize() > 0)
600 : {
601 : // get an InputStream
602 : //
603 116927 : FileCollection::stream_pointer_t is(collection.getInputStream((*it)->getName()));
604 116927 : if(is != nullptr
605 116927 : && is->good())
606 : {
607 : // copy the file content to the output
608 : //
609 116927 : output_stream << is->rdbuf();
610 : }
611 116927 : }
612 : }
613 :
614 : // clean up manually so we can get any exception
615 : // (so we avoid having exceptions gobbled by the destructor)
616 256 : output_stream.closeEntry();
617 256 : output_stream.finish();
618 253 : output_stream.close();
619 261 : }
620 4 : catch(...)
621 : {
622 4 : os.setstate(std::ios::failbit);
623 4 : throw;
624 4 : }
625 253 : }
626 :
627 :
628 : } // zipios namespace
629 :
630 : // Local Variables:
631 : // mode: cpp
632 : // indent-tabs-mode: nil
633 : // c-basic-offset: 4
634 : // tab-width: 4
635 : // End:
636 :
637 : // vim: ts=4 sw=4 et
|