001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.zip;
019
020import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
021import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
022import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
023import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
024import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
025
026import java.io.BufferedInputStream;
027import java.io.ByteArrayInputStream;
028import java.io.Closeable;
029import java.io.EOFException;
030import java.io.File;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.SequenceInputStream;
034import java.nio.ByteBuffer;
035import java.nio.channels.FileChannel;
036import java.nio.channels.SeekableByteChannel;
037import java.nio.file.Files;
038import java.nio.file.Path;
039import java.nio.file.StandardOpenOption;
040import java.util.Arrays;
041import java.util.Collections;
042import java.util.Comparator;
043import java.util.EnumSet;
044import java.util.Enumeration;
045import java.util.HashMap;
046import java.util.LinkedList;
047import java.util.List;
048import java.util.Map;
049import java.util.zip.Inflater;
050import java.util.zip.ZipException;
051
052import org.apache.commons.compress.archivers.EntryStreamOffsets;
053import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
054import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
055import org.apache.commons.compress.utils.BoundedArchiveInputStream;
056import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
057import org.apache.commons.compress.utils.CountingInputStream;
058import org.apache.commons.compress.utils.IOUtils;
059import org.apache.commons.compress.utils.InputStreamStatistics;
060
061/**
062 * Replacement for {@code java.util.ZipFile}.
063 *
064 * <p>This class adds support for file name encodings other than UTF-8
065 * (which is required to work on ZIP files created by native zip tools
066 * and is able to skip a preamble like the one found in self
067 * extracting archives.  Furthermore it returns instances of
068 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry}
069 * instead of {@code java.util.zip.ZipEntry}.</p>
070 *
071 * <p>It doesn't extend {@code java.util.zip.ZipFile} as it would
072 * have to reimplement all methods anyway.  Like
073 * {@code java.util.ZipFile}, it uses SeekableByteChannel under the
074 * covers and supports compressed and uncompressed entries.  As of
075 * Apache Commons Compress 1.3 it also transparently supports Zip64
076 * extensions and thus individual entries and archives larger than 4
077 * GB or with more than 65536 entries.</p>
078 *
079 * <p>The method signatures mimic the ones of
080 * {@code java.util.zip.ZipFile}, with a couple of exceptions:
081 *
082 * <ul>
083 *   <li>There is no getName method.</li>
084 *   <li>entries has been renamed to getEntries.</li>
085 *   <li>getEntries and getEntry return
086 *   {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry}
087 *   instances.</li>
088 *   <li>close is allowed to throw IOException.</li>
089 * </ul>
090 *
091 */
092public class ZipFile implements Closeable {
093    private static final int HASH_SIZE = 509;
094    static final int NIBLET_MASK = 0x0f;
095    static final int BYTE_SHIFT = 8;
096    private static final int POS_0 = 0;
097    private static final int POS_1 = 1;
098    private static final int POS_2 = 2;
099    private static final int POS_3 = 3;
100    private static final byte[] ONE_ZERO_BYTE = new byte[1];
101
102    /**
103     * List of entries in the order they appear inside the central
104     * directory.
105     */
106    private final List<ZipArchiveEntry> entries =
107        new LinkedList<>();
108
109    /**
110     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
111     */
112    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
113        new HashMap<>(HASH_SIZE);
114
115    /**
116     * The encoding to use for file names and the file comment.
117     *
118     * <p>For a list of possible values see <a
119     * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
120     * Defaults to UTF-8.</p>
121     */
122    private final String encoding;
123
124    /**
125     * The zip encoding to use for file names and the file comment.
126     */
127    private final ZipEncoding zipEncoding;
128
129    /**
130     * File name of actual source.
131     */
132    private final String archiveName;
133
134    /**
135     * The actual data source.
136     */
137    private final SeekableByteChannel archive;
138
139    /**
140     * Whether to look for and use Unicode extra fields.
141     */
142    private final boolean useUnicodeExtraFields;
143
144    /**
145     * Whether the file is closed.
146     */
147    private volatile boolean closed = true;
148
149    /**
150     * Whether the zip archive is a split zip archive
151     */
152    private final boolean isSplitZipArchive;
153
154    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
155    private final byte[] dwordBuf = new byte[DWORD];
156    private final byte[] wordBuf = new byte[WORD];
157    private final byte[] cfhBuf = new byte[CFH_LEN];
158    private final byte[] shortBuf = new byte[SHORT];
159    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
160    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
161    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
162    private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf);
163
164    private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset;
165    private long centralDirectoryStartOffset;
166
167    /**
168     * Opens the given file for reading, assuming "UTF8" for file names.
169     *
170     * @param f the archive.
171     *
172     * @throws IOException if an error occurs while reading the file.
173     */
174    public ZipFile(final File f) throws IOException {
175        this(f, ZipEncodingHelper.UTF8);
176    }
177
178    /**
179     * Opens the given path for reading, assuming "UTF8" for file names.
180     * @param path path to the archive.
181     * @throws IOException if an error occurs while reading the file.
182     * @since 1.22
183     */
184    public ZipFile(final Path path) throws IOException {
185        this(path, ZipEncodingHelper.UTF8);
186    }
187
188    /**
189     * Opens the given file for reading, assuming "UTF8".
190     *
191     * @param name name of the archive.
192     *
193     * @throws IOException if an error occurs while reading the file.
194     */
195    public ZipFile(final String name) throws IOException {
196        this(new File(name).toPath(), ZipEncodingHelper.UTF8);
197    }
198
199    /**
200     * Opens the given file for reading, assuming the specified
201     * encoding for file names, scanning unicode extra fields.
202     *
203     * @param name name of the archive.
204     * @param encoding the encoding to use for file names, use null
205     * for the platform's default encoding
206     *
207     * @throws IOException if an error occurs while reading the file.
208     */
209    public ZipFile(final String name, final String encoding) throws IOException {
210        this(new File(name).toPath(), encoding, true);
211    }
212
213    /**
214     * Opens the given file for reading, assuming the specified
215     * encoding for file names and scanning for unicode extra fields.
216     *
217     * @param f the archive.
218     * @param encoding the encoding to use for file names, use null
219     * for the platform's default encoding
220     *
221     * @throws IOException if an error occurs while reading the file.
222     */
223    public ZipFile(final File f, final String encoding) throws IOException {
224        this(f.toPath(), encoding, true);
225    }
226
227    /**
228     * Opens the given path for reading, assuming the specified
229     * encoding for file names and scanning for unicode extra fields.
230     * @param path path to the archive.
231     * @param encoding the encoding to use for file names, use null
232     * for the platform's default encoding
233     * @throws IOException if an error occurs while reading the file.
234     * @since 1.22
235     */
236    public ZipFile(final Path path, final String encoding) throws IOException {
237        this(path, encoding, true);
238    }
239
240    /**
241     * Opens the given file for reading, assuming the specified
242     * encoding for file names.
243     *
244     * @param f the archive.
245     * @param encoding the encoding to use for file names, use null
246     * for the platform's default encoding
247     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
248     * Extra Fields (if present) to set the file names.
249     *
250     * @throws IOException if an error occurs while reading the file.
251     */
252    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
253        throws IOException {
254        this(f.toPath(), encoding, useUnicodeExtraFields, false);
255    }
256
257    /**
258     * Opens the given path for reading, assuming the specified
259     * encoding for file names.
260     * @param path path to the archive.
261     * @param encoding the encoding to use for file names, use null
262     * for the platform's default encoding
263     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
264     * Extra Fields (if present) to set the file names.
265     * @throws IOException if an error occurs while reading the file.
266     * @since 1.22
267     */
268    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields)
269            throws IOException {
270        this(path, encoding, useUnicodeExtraFields, false);
271    }
272
273    /**
274     * Opens the given file for reading, assuming the specified
275     * encoding for file names.
276     *
277     *
278     * <p>By default the central directory record and all local file headers of the archive will be read immediately
279     * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
280     * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
281     * may contain information not present inside of the central directory which will not be available when the argument
282     * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
283     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also
284     * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code
285     * true}.</p>
286     *
287     * @param f the archive.
288     * @param encoding the encoding to use for file names, use null
289     * for the platform's default encoding
290     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
291     * Extra Fields (if present) to set the file names.
292     * @param ignoreLocalFileHeader whether to ignore information
293     * stored inside the local file header (see the notes in this method's javadoc)
294     *
295     * @throws IOException if an error occurs while reading the file.
296     * @since 1.19
297     */
298    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields,
299                   final boolean ignoreLocalFileHeader)
300        throws IOException {
301        this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
302             f.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
303    }
304
305    /**
306     * Opens the given path for reading, assuming the specified
307     * encoding for file names.
308     * <p>By default the central directory record and all local file headers of the archive will be read immediately
309     * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
310     * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
311     * may contain information not present inside of the central directory which will not be available when the argument
312     * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
313     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also
314     * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code
315     * true}.</p>
316     * @param path path to the archive.
317     * @param encoding the encoding to use for file names, use null
318     * for the platform's default encoding
319     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
320     * Extra Fields (if present) to set the file names.
321     * @param ignoreLocalFileHeader whether to ignore information
322     * stored inside the local file header (see the notes in this method's javadoc)
323     * @throws IOException if an error occurs while reading the file.
324     * @since 1.22
325     */
326    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields,
327                   final boolean ignoreLocalFileHeader)
328            throws IOException {
329        this(Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)),
330                path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields,
331                true, ignoreLocalFileHeader);
332    }
333
334    /**
335     * Opens the given channel for reading, assuming "UTF8" for file names.
336     *
337     * <p>{@link
338     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
339     * allows you to read from an in-memory archive.</p>
340     *
341     * @param channel the archive.
342     *
343     * @throws IOException if an error occurs while reading the file.
344     * @since 1.13
345     */
346    public ZipFile(final SeekableByteChannel channel)
347            throws IOException {
348        this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
349    }
350
351    /**
352     * Opens the given channel for reading, assuming the specified
353     * encoding for file names.
354     *
355     * <p>{@link
356     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
357     * allows you to read from an in-memory archive.</p>
358     *
359     * @param channel the archive.
360     * @param encoding the encoding to use for file names, use null
361     * for the platform's default encoding
362     *
363     * @throws IOException if an error occurs while reading the file.
364     * @since 1.13
365     */
366    public ZipFile(final SeekableByteChannel channel, final String encoding)
367        throws IOException {
368        this(channel, "unknown archive", encoding, true);
369    }
370
371    /**
372     * Opens the given channel for reading, assuming the specified
373     * encoding for file names.
374     *
375     * <p>{@link
376     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
377     * allows you to read from an in-memory archive.</p>
378     *
379     * @param channel the archive.
380     * @param archiveName name of the archive, used for error messages only.
381     * @param encoding the encoding to use for file names, use null
382     * for the platform's default encoding
383     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
384     * Extra Fields (if present) to set the file names.
385     *
386     * @throws IOException if an error occurs while reading the file.
387     * @since 1.13
388     */
389    public ZipFile(final SeekableByteChannel channel, final String archiveName,
390                   final String encoding, final boolean useUnicodeExtraFields)
391        throws IOException {
392        this(channel, archiveName, encoding, useUnicodeExtraFields, false, false);
393    }
394
395    /**
396     * Opens the given channel for reading, assuming the specified
397     * encoding for file names.
398     *
399     * <p>{@link
400     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
401     * allows you to read from an in-memory archive.</p>
402     *
403     * <p>By default the central directory record and all local file headers of the archive will be read immediately
404     * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
405     * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
406     * may contain information not present inside of the central directory which will not be available when the argument
407     * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
408     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also
409     * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code
410     * true}.</p>
411     *
412     * @param channel the archive.
413     * @param archiveName name of the archive, used for error messages only.
414     * @param encoding the encoding to use for file names, use null
415     * for the platform's default encoding
416     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
417     * Extra Fields (if present) to set the file names.
418     * @param ignoreLocalFileHeader whether to ignore information
419     * stored inside the local file header (see the notes in this method's javadoc)
420     *
421     * @throws IOException if an error occurs while reading the file.
422     * @since 1.19
423     */
424    public ZipFile(final SeekableByteChannel channel, final String archiveName,
425                   final String encoding, final boolean useUnicodeExtraFields,
426                   final boolean ignoreLocalFileHeader)
427        throws IOException {
428        this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader);
429    }
430
431    private ZipFile(final SeekableByteChannel channel, final String archiveName,
432                    final String encoding, final boolean useUnicodeExtraFields,
433                    final boolean closeOnError, final boolean ignoreLocalFileHeader)
434        throws IOException {
435        isSplitZipArchive = (channel instanceof ZipSplitReadOnlySeekableByteChannel);
436
437        this.archiveName = archiveName;
438        this.encoding = encoding;
439        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
440        this.useUnicodeExtraFields = useUnicodeExtraFields;
441        archive = channel;
442        boolean success = false;
443        try {
444            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
445                populateFromCentralDirectory();
446            if (!ignoreLocalFileHeader) {
447                resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
448            }
449            fillNameMap();
450            success = true;
451        } catch (final IOException e) {
452            throw new IOException("Error on ZipFile " + archiveName, e);
453        } finally {
454            closed = !success;
455            if (!success && closeOnError) {
456                IOUtils.closeQuietly(archive);
457            }
458        }
459    }
460
461    /**
462     * The encoding to use for file names and the file comment.
463     *
464     * @return null if using the platform's default character encoding.
465     */
466    public String getEncoding() {
467        return encoding;
468    }
469
470    /**
471     * Closes the archive.
472     * @throws IOException if an error occurs closing the archive.
473     */
474    @Override
475    public void close() throws IOException {
476        // this flag is only written here and read in finalize() which
477        // can never be run in parallel.
478        // no synchronization needed.
479        closed = true;
480
481        archive.close();
482    }
483
484    /**
485     * close a zipfile quietly; throw no io fault, do nothing
486     * on a null parameter
487     * @param zipfile file to close, can be null
488     */
489    public static void closeQuietly(final ZipFile zipfile) {
490        IOUtils.closeQuietly(zipfile);
491    }
492
493    /**
494     * Returns all entries.
495     *
496     * <p>Entries will be returned in the same order they appear
497     * within the archive's central directory.</p>
498     *
499     * @return all entries as {@link ZipArchiveEntry} instances
500     */
501    public Enumeration<ZipArchiveEntry> getEntries() {
502        return Collections.enumeration(entries);
503    }
504
505    /**
506     * Returns all entries in physical order.
507     *
508     * <p>Entries will be returned in the same order their contents
509     * appear within the archive.</p>
510     *
511     * @return all entries as {@link ZipArchiveEntry} instances
512     *
513     * @since 1.1
514     */
515    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
516        final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY);
517        Arrays.sort(allEntries, offsetComparator);
518        return Collections.enumeration(Arrays.asList(allEntries));
519    }
520
521    /**
522     * Returns a named entry - or {@code null} if no entry by
523     * that name exists.
524     *
525     * <p>If multiple entries with the same name exist the first entry
526     * in the archive's central directory by that name is
527     * returned.</p>
528     *
529     * @param name name of the entry.
530     * @return the ZipArchiveEntry corresponding to the given name - or
531     * {@code null} if not present.
532     */
533    public ZipArchiveEntry getEntry(final String name) {
534        final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
535        return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
536    }
537
538    /**
539     * Returns all named entries in the same order they appear within
540     * the archive's central directory.
541     *
542     * @param name name of the entry.
543     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
544     * given name
545     * @since 1.6
546     */
547    public Iterable<ZipArchiveEntry> getEntries(final String name) {
548        final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
549        return entriesOfThatName != null ? entriesOfThatName
550            : Collections.emptyList();
551    }
552
553    /**
554     * Returns all named entries in the same order their contents
555     * appear within the archive.
556     *
557     * @param name name of the entry.
558     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
559     * given name
560     * @since 1.6
561     */
562    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
563        ZipArchiveEntry[] entriesOfThatName = ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY;
564        if (nameMap.containsKey(name)) {
565            entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
566            Arrays.sort(entriesOfThatName, offsetComparator);
567        }
568        return Arrays.asList(entriesOfThatName);
569    }
570
571    /**
572     * Whether this class is able to read the given entry.
573     *
574     * <p>May return false if it is set up to use encryption or a
575     * compression method that hasn't been implemented yet.</p>
576     * @since 1.1
577     * @param ze the entry
578     * @return whether this class is able to read the given entry.
579     */
580    public boolean canReadEntryData(final ZipArchiveEntry ze) {
581        return ZipUtil.canHandleEntryData(ze);
582    }
583
584    /**
585     * Expose the raw stream of the archive entry (compressed form).
586     *
587     * <p>This method does not relate to how/if we understand the payload in the
588     * stream, since we really only intend to move it on to somewhere else.</p>
589     *
590     * @param ze The entry to get the stream for
591     * @return The raw input stream containing (possibly) compressed data.
592     * @since 1.11
593     */
594    public InputStream getRawInputStream(final ZipArchiveEntry ze) {
595        if (!(ze instanceof Entry)) {
596            return null;
597        }
598        final long start = ze.getDataOffset();
599        if (start == EntryStreamOffsets.OFFSET_UNKNOWN) {
600            return null;
601        }
602        return createBoundedInputStream(start, ze.getCompressedSize());
603    }
604
605
606    /**
607     * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
608     * Compression and all other attributes will be as in this file.
609     * <p>This method transfers entries based on the central directory of the zip file.</p>
610     *
611     * @param target The zipArchiveOutputStream to write the entries to
612     * @param predicate A predicate that selects which entries to write
613     * @throws IOException on error
614     */
615    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
616            throws IOException {
617        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
618        while (src.hasMoreElements()) {
619            final ZipArchiveEntry entry = src.nextElement();
620            if (predicate.test( entry)) {
621                target.addRawArchiveEntry(entry, getRawInputStream(entry));
622            }
623        }
624    }
625
626    /**
627     * Returns an InputStream for reading the contents of the given entry.
628     *
629     * @param ze the entry to get the stream for.
630     * @return a stream to read the entry from. The returned stream
631     * implements {@link InputStreamStatistics}.
632     * @throws IOException if unable to create an input stream from the zipentry
633     */
634    public InputStream getInputStream(final ZipArchiveEntry ze)
635        throws IOException {
636        if (!(ze instanceof Entry)) {
637            return null;
638        }
639        // cast validity is checked just above
640        ZipUtil.checkRequestedFeatures(ze);
641        final long start = getDataOffset(ze);
642
643        // doesn't get closed if the method is not supported - which
644        // should never happen because of the checkRequestedFeatures
645        // call above
646        final InputStream is =
647            new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR
648        switch (ZipMethod.getMethodByCode(ze.getMethod())) {
649            case STORED:
650                return new StoredStatisticsStream(is);
651            case UNSHRINKING:
652                return new UnshrinkingInputStream(is);
653            case IMPLODING:
654                try {
655                    return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
656                            ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
657                } catch (final IllegalArgumentException ex) {
658                    throw new IOException("bad IMPLODE data", ex);
659                }
660            case DEFLATED:
661                final Inflater inflater = new Inflater(true);
662                // Inflater with nowrap=true has this odd contract for a zero padding
663                // byte following the data stream; this used to be zlib's requirement
664                // and has been fixed a long time ago, but the contract persists so
665                // we comply.
666                // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
667                return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)),
668                    inflater) {
669                    @Override
670                    public void close() throws IOException {
671                        try {
672                            super.close();
673                        } finally {
674                            inflater.end();
675                        }
676                    }
677                };
678            case BZIP2:
679                return new BZip2CompressorInputStream(is);
680            case ENHANCED_DEFLATED:
681                return new Deflate64CompressorInputStream(is);
682            case AES_ENCRYPTED:
683            case EXPANDING_LEVEL_1:
684            case EXPANDING_LEVEL_2:
685            case EXPANDING_LEVEL_3:
686            case EXPANDING_LEVEL_4:
687            case JPEG:
688            case LZMA:
689            case PKWARE_IMPLODING:
690            case PPMD:
691            case TOKENIZATION:
692            case UNKNOWN:
693            case WAVPACK:
694            case XZ:
695            default:
696                throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(ze.getMethod()), ze);
697        }
698    }
699
700    /**
701     * <p>
702     * Convenience method to return the entry's content as a String if isUnixSymlink()
703     * returns true for it, otherwise returns null.
704     * </p>
705     *
706     * <p>This method assumes the symbolic link's file name uses the
707     * same encoding that as been specified for this ZipFile.</p>
708     *
709     * @param entry ZipArchiveEntry object that represents the symbolic link
710     * @return entry's content as a String
711     * @throws IOException problem with content's input stream
712     * @since 1.5
713     */
714    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
715        if (entry != null && entry.isUnixSymlink()) {
716            try (InputStream in = getInputStream(entry)) {
717                return zipEncoding.decode(IOUtils.toByteArray(in));
718            }
719        }
720        return null;
721    }
722
723    /**
724     * Ensures that the close method of this zipfile is called when
725     * there are no more references to it.
726     * @see #close()
727     */
728    @Override
729    protected void finalize() throws Throwable {
730        try {
731            if (!closed) {
732                System.err.println("Cleaning up unclosed ZipFile for archive "
733                                   + archiveName);
734                close();
735            }
736        } finally {
737            super.finalize();
738        }
739    }
740
741    /**
742     * Length of a "central directory" entry structure without file
743     * name, extra fields or comment.
744     */
745    private static final int CFH_LEN =
746        /* version made by                 */ SHORT
747        /* version needed to extract       */ + SHORT
748        /* general purpose bit flag        */ + SHORT
749        /* compression method              */ + SHORT
750        /* last mod file time              */ + SHORT
751        /* last mod file date              */ + SHORT
752        /* crc-32                          */ + WORD
753        /* compressed size                 */ + WORD
754        /* uncompressed size               */ + WORD
755        /* file name length                 */ + SHORT
756        /* extra field length              */ + SHORT
757        /* file comment length             */ + SHORT
758        /* disk number start               */ + SHORT
759        /* internal file attributes        */ + SHORT
760        /* external file attributes        */ + WORD
761        /* relative offset of local header */ + WORD;
762
763    private static final long CFH_SIG =
764        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
765
766    /**
767     * Reads the central directory of the given archive and populates
768     * the internal tables with ZipArchiveEntry instances.
769     *
770     * <p>The ZipArchiveEntrys will know all data that can be obtained from
771     * the central directory alone, but not the data that requires the
772     * local file header or additional data to be read.</p>
773     *
774     * @return a map of zipentries that didn't have the language
775     * encoding flag set when read.
776     */
777    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
778        throws IOException {
779        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
780            new HashMap<>();
781
782        positionAtCentralDirectory();
783        centralDirectoryStartOffset = archive.position();
784
785        wordBbuf.rewind();
786        IOUtils.readFully(archive, wordBbuf);
787        long sig = ZipLong.getValue(wordBuf);
788
789        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
790            throw new IOException("Central directory is empty, can't expand"
791                                  + " corrupt archive.");
792        }
793
794        while (sig == CFH_SIG) {
795            readCentralDirectoryEntry(noUTF8Flag);
796            wordBbuf.rewind();
797            IOUtils.readFully(archive, wordBbuf);
798            sig = ZipLong.getValue(wordBuf);
799        }
800        return noUTF8Flag;
801    }
802
803    /**
804     * Reads an individual entry of the central directory, creats an
805     * ZipArchiveEntry from it and adds it to the global maps.
806     *
807     * @param noUTF8Flag map used to collect entries that don't have
808     * their UTF-8 flag set and whose name will be set by data read
809     * from the local file header later.  The current entry may be
810     * added to this map.
811     */
812    private void
813        readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
814        throws IOException {
815        cfhBbuf.rewind();
816        IOUtils.readFully(archive, cfhBbuf);
817        int off = 0;
818        final Entry ze = new Entry();
819
820        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
821        off += SHORT;
822        ze.setVersionMadeBy(versionMadeBy);
823        ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
824
825        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
826        off += SHORT; // version required
827
828        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
829        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
830        final ZipEncoding entryEncoding =
831            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
832        if (hasUTF8Flag) {
833            ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
834        }
835        ze.setGeneralPurposeBit(gpFlag);
836        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
837
838        off += SHORT;
839
840        //noinspection MagicConstant
841        ze.setMethod(ZipShort.getValue(cfhBuf, off));
842        off += SHORT;
843
844        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
845        ze.setTime(time);
846        off += WORD;
847
848        ze.setCrc(ZipLong.getValue(cfhBuf, off));
849        off += WORD;
850
851        long size = ZipLong.getValue(cfhBuf, off);
852        if (size < 0) {
853            throw new IOException("broken archive, entry with negative compressed size");
854        }
855        ze.setCompressedSize(size);
856        off += WORD;
857
858        size = ZipLong.getValue(cfhBuf, off);
859        if (size < 0) {
860            throw new IOException("broken archive, entry with negative size");
861        }
862        ze.setSize(size);
863        off += WORD;
864
865        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
866        off += SHORT;
867        if (fileNameLen < 0) {
868            throw new IOException("broken archive, entry with negative fileNameLen");
869        }
870
871        final int extraLen = ZipShort.getValue(cfhBuf, off);
872        off += SHORT;
873        if (extraLen < 0) {
874            throw new IOException("broken archive, entry with negative extraLen");
875        }
876
877        final int commentLen = ZipShort.getValue(cfhBuf, off);
878        off += SHORT;
879        if (commentLen < 0) {
880            throw new IOException("broken archive, entry with negative commentLen");
881        }
882
883        ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off));
884        off += SHORT;
885
886        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
887        off += SHORT;
888
889        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
890        off += WORD;
891
892        final byte[] fileName = IOUtils.readRange(archive, fileNameLen);
893        if (fileName.length < fileNameLen) {
894            throw new EOFException();
895        }
896        ze.setName(entryEncoding.decode(fileName), fileName);
897
898        // LFH offset,
899        ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off));
900        // data offset will be filled later
901        entries.add(ze);
902
903        final byte[] cdExtraData = IOUtils.readRange(archive, extraLen);
904        if (cdExtraData.length < extraLen) {
905            throw new EOFException();
906        }
907        try {
908            ze.setCentralDirectoryExtra(cdExtraData);
909        } catch (RuntimeException ex) {
910            final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
911            z.initCause(ex);
912            throw z;
913        }
914
915        setSizesAndOffsetFromZip64Extra(ze);
916        sanityCheckLFHOffset(ze);
917
918        final byte[] comment = IOUtils.readRange(archive, commentLen);
919        if (comment.length < commentLen) {
920            throw new EOFException();
921        }
922        ze.setComment(entryEncoding.decode(comment));
923
924        if (!hasUTF8Flag && useUnicodeExtraFields) {
925            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
926        }
927
928        ze.setStreamContiguous(true);
929    }
930
931    private void sanityCheckLFHOffset(final ZipArchiveEntry ze) throws IOException {
932        if (ze.getDiskNumberStart() < 0) {
933            throw new IOException("broken archive, entry with negative disk number");
934        }
935        if (ze.getLocalHeaderOffset() < 0) {
936            throw new IOException("broken archive, entry with negative local file header offset");
937        }
938        if (isSplitZipArchive) {
939            if (ze.getDiskNumberStart() > centralDirectoryStartDiskNumber) {
940                throw new IOException("local file header for " + ze.getName() + " starts on a later disk than central directory");
941            }
942            if (ze.getDiskNumberStart() == centralDirectoryStartDiskNumber
943                && ze.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) {
944                throw new IOException("local file header for " + ze.getName() + " starts after central directory");
945            }
946        } else if (ze.getLocalHeaderOffset() > centralDirectoryStartOffset) {
947            throw new IOException("local file header for " + ze.getName() + " starts after central directory");
948        }
949    }
950
951    /**
952     * If the entry holds a Zip64 extended information extra field,
953     * read sizes from there if the entry's sizes are set to
954     * 0xFFFFFFFFF, do the same for the offset of the local file
955     * header.
956     *
957     * <p>Ensures the Zip64 extra either knows both compressed and
958     * uncompressed size or neither of both as the internal logic in
959     * ExtraFieldUtils forces the field to create local header data
960     * even if they are never used - and here a field with only one
961     * size would be invalid.</p>
962     */
963    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze)
964        throws IOException {
965        final ZipExtraField extra =
966            ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
967        if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) {
968            throw new ZipException("archive contains unparseable zip64 extra field");
969        }
970        final Zip64ExtendedInformationExtraField z64 =
971            (Zip64ExtendedInformationExtraField) extra;
972        if (z64 != null) {
973            final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
974            final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
975            final boolean hasRelativeHeaderOffset =
976                ze.getLocalHeaderOffset() == ZIP64_MAGIC;
977            final boolean hasDiskStart = ze.getDiskNumberStart() == ZIP64_MAGIC_SHORT;
978            z64.reparseCentralDirectoryData(hasUncompressedSize,
979                                            hasCompressedSize,
980                                            hasRelativeHeaderOffset,
981                                            hasDiskStart);
982
983            if (hasUncompressedSize) {
984                final long size = z64.getSize().getLongValue();
985                if (size < 0) {
986                    throw new IOException("broken archive, entry with negative size");
987                }
988                ze.setSize(size);
989            } else if (hasCompressedSize) {
990                z64.setSize(new ZipEightByteInteger(ze.getSize()));
991            }
992
993            if (hasCompressedSize) {
994                final long size = z64.getCompressedSize().getLongValue();
995                if (size < 0) {
996                    throw new IOException("broken archive, entry with negative compressed size");
997                }
998                ze.setCompressedSize(size);
999            } else if (hasUncompressedSize) {
1000                z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
1001            }
1002
1003            if (hasRelativeHeaderOffset) {
1004                ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
1005            }
1006
1007            if (hasDiskStart) {
1008                ze.setDiskNumberStart(z64.getDiskStartNumber().getValue());
1009            }
1010        }
1011    }
1012
1013    /**
1014     * Length of the "End of central directory record" - which is
1015     * supposed to be the last structure of the archive - without file
1016     * comment.
1017     */
1018    static final int MIN_EOCD_SIZE =
1019        /* end of central dir signature    */ WORD
1020        /* number of this disk             */ + SHORT
1021        /* number of the disk with the     */
1022        /* start of the central directory  */ + SHORT
1023        /* total number of entries in      */
1024        /* the central dir on this disk    */ + SHORT
1025        /* total number of entries in      */
1026        /* the central dir                 */ + SHORT
1027        /* size of the central directory   */ + WORD
1028        /* offset of start of central      */
1029        /* directory with respect to       */
1030        /* the starting disk number        */ + WORD
1031        /* zipfile comment length          */ + SHORT;
1032
1033    /**
1034     * Maximum length of the "End of central directory record" with a
1035     * file comment.
1036     */
1037    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
1038        /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
1039
1040    /**
1041     * Offset of the field that holds the location of the first
1042     * central directory entry inside the "End of central directory
1043     * record" relative to the start of the "End of central directory
1044     * record".
1045     */
1046    private static final int CFD_LOCATOR_OFFSET =
1047        /* end of central dir signature    */ WORD
1048        /* number of this disk             */ + SHORT
1049        /* number of the disk with the     */
1050        /* start of the central directory  */ + SHORT
1051        /* total number of entries in      */
1052        /* the central dir on this disk    */ + SHORT
1053        /* total number of entries in      */
1054        /* the central dir                 */ + SHORT
1055        /* size of the central directory   */ + WORD;
1056
1057    /**
1058     * Offset of the field that holds the disk number of the first
1059     * central directory entry inside the "End of central directory
1060     * record" relative to the start of the "End of central directory
1061     * record".
1062     */
1063    private static final int CFD_DISK_OFFSET =
1064            /* end of central dir signature    */ WORD
1065            /* number of this disk             */ + SHORT;
1066
1067    /**
1068     * Offset of the field that holds the location of the first
1069     * central directory entry inside the "End of central directory
1070     * record" relative to the "number of the disk with the start
1071     * of the central directory".
1072     */
1073    private static final int CFD_LOCATOR_RELATIVE_OFFSET =
1074            /* total number of entries in      */
1075            /* the central dir on this disk    */ + SHORT
1076            /* total number of entries in      */
1077            /* the central dir                 */ + SHORT
1078            /* size of the central directory   */ + WORD;
1079
1080    /**
1081     * Length of the "Zip64 end of central directory locator" - which
1082     * should be right in front of the "end of central directory
1083     * record" if one is present at all.
1084     */
1085    private static final int ZIP64_EOCDL_LENGTH =
1086        /* zip64 end of central dir locator sig */ WORD
1087        /* number of the disk with the start    */
1088        /* start of the zip64 end of            */
1089        /* central directory                    */ + WORD
1090        /* relative offset of the zip64         */
1091        /* end of central directory record      */ + DWORD
1092        /* total number of disks                */ + WORD;
1093
1094    /**
1095     * Offset of the field that holds the location of the "Zip64 end
1096     * of central directory record" inside the "Zip64 end of central
1097     * directory locator" relative to the start of the "Zip64 end of
1098     * central directory locator".
1099     */
1100    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
1101        /* zip64 end of central dir locator sig */ WORD
1102        /* number of the disk with the start    */
1103        /* start of the zip64 end of            */
1104        /* central directory                    */ + WORD;
1105
1106    /**
1107     * Offset of the field that holds the location of the first
1108     * central directory entry inside the "Zip64 end of central
1109     * directory record" relative to the start of the "Zip64 end of
1110     * central directory record".
1111     */
1112    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
1113        /* zip64 end of central dir        */
1114        /* signature                       */ WORD
1115        /* size of zip64 end of central    */
1116        /* directory record                */ + DWORD
1117        /* version made by                 */ + SHORT
1118        /* version needed to extract       */ + SHORT
1119        /* number of this disk             */ + WORD
1120        /* number of the disk with the     */
1121        /* start of the central directory  */ + WORD
1122        /* total number of entries in the  */
1123        /* central directory on this disk  */ + DWORD
1124        /* total number of entries in the  */
1125        /* central directory               */ + DWORD
1126        /* size of the central directory   */ + DWORD;
1127
1128    /**
1129     * Offset of the field that holds the disk number of the first
1130     * central directory entry inside the "Zip64 end of central
1131     * directory record" relative to the start of the "Zip64 end of
1132     * central directory record".
1133     */
1134    private static final int ZIP64_EOCD_CFD_DISK_OFFSET =
1135            /* zip64 end of central dir        */
1136            /* signature                       */ WORD
1137            /* size of zip64 end of central    */
1138            /* directory record                */ + DWORD
1139            /* version made by                 */ + SHORT
1140            /* version needed to extract       */ + SHORT
1141            /* number of this disk             */ + WORD;
1142
1143    /**
1144     * Offset of the field that holds the location of the first
1145     * central directory entry inside the "Zip64 end of central
1146     * directory record" relative to the "number of the disk
1147     * with the start of the central directory".
1148     */
1149    private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET =
1150            /* total number of entries in the  */
1151            /* central directory on this disk  */ DWORD
1152            /* total number of entries in the  */
1153            /* central directory               */ + DWORD
1154            /* size of the central directory   */ + DWORD;
1155
1156    /**
1157     * Searches for either the &quot;Zip64 end of central directory
1158     * locator&quot; or the &quot;End of central dir record&quot;, parses
1159     * it and positions the stream at the first central directory
1160     * record.
1161     */
1162    private void positionAtCentralDirectory()
1163        throws IOException {
1164        positionAtEndOfCentralDirectoryRecord();
1165        boolean found = false;
1166        final boolean searchedForZip64EOCD =
1167            archive.position() > ZIP64_EOCDL_LENGTH;
1168        if (searchedForZip64EOCD) {
1169            archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
1170            wordBbuf.rewind();
1171            IOUtils.readFully(archive, wordBbuf);
1172            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
1173                                  wordBuf);
1174        }
1175        if (!found) {
1176            // not a ZIP64 archive
1177            if (searchedForZip64EOCD) {
1178                skipBytes(ZIP64_EOCDL_LENGTH - WORD);
1179            }
1180            positionAtCentralDirectory32();
1181        } else {
1182            positionAtCentralDirectory64();
1183        }
1184    }
1185
1186    /**
1187     * Parses the &quot;Zip64 end of central directory locator&quot;,
1188     * finds the &quot;Zip64 end of central directory record&quot; using the
1189     * parsed information, parses that and positions the stream at the
1190     * first central directory record.
1191     *
1192     * Expects stream to be positioned right behind the &quot;Zip64
1193     * end of central directory locator&quot;'s signature.
1194     */
1195    private void positionAtCentralDirectory64()
1196        throws IOException {
1197        if (isSplitZipArchive) {
1198            wordBbuf.rewind();
1199            IOUtils.readFully(archive, wordBbuf);
1200            final long diskNumberOfEOCD = ZipLong.getValue(wordBuf);
1201
1202            dwordBbuf.rewind();
1203            IOUtils.readFully(archive, dwordBbuf);
1204            final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf);
1205            ((ZipSplitReadOnlySeekableByteChannel) archive)
1206                .position(diskNumberOfEOCD, relativeOffsetOfEOCD);
1207        } else {
1208            skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
1209                    - WORD /* signature has already been read */);
1210            dwordBbuf.rewind();
1211            IOUtils.readFully(archive, dwordBbuf);
1212            archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
1213        }
1214
1215        wordBbuf.rewind();
1216        IOUtils.readFully(archive, wordBbuf);
1217        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
1218            throw new ZipException("Archive's ZIP64 end of central "
1219                                   + "directory locator is corrupt.");
1220        }
1221
1222        if (isSplitZipArchive) {
1223            skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET
1224                    - WORD /* signature has already been read */);
1225            wordBbuf.rewind();
1226            IOUtils.readFully(archive, wordBbuf);
1227            centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf);
1228
1229            skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET);
1230
1231            dwordBbuf.rewind();
1232            IOUtils.readFully(archive, dwordBbuf);
1233            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1234            ((ZipSplitReadOnlySeekableByteChannel) archive)
1235                .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1236        } else {
1237            skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
1238                    - WORD /* signature has already been read */);
1239            dwordBbuf.rewind();
1240            IOUtils.readFully(archive, dwordBbuf);
1241            centralDirectoryStartDiskNumber = 0;
1242            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1243            archive.position(centralDirectoryStartRelativeOffset);
1244        }
1245    }
1246
1247    /**
1248     * Parses the &quot;End of central dir record&quot; and positions
1249     * the stream at the first central directory record.
1250     *
1251     * Expects stream to be positioned at the beginning of the
1252     * &quot;End of central dir record&quot;.
1253     */
1254    private void positionAtCentralDirectory32()
1255        throws IOException {
1256        if (isSplitZipArchive) {
1257            skipBytes(CFD_DISK_OFFSET);
1258            shortBbuf.rewind();
1259            IOUtils.readFully(archive, shortBbuf);
1260            centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf);
1261
1262            skipBytes(CFD_LOCATOR_RELATIVE_OFFSET);
1263
1264            wordBbuf.rewind();
1265            IOUtils.readFully(archive, wordBbuf);
1266            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1267            ((ZipSplitReadOnlySeekableByteChannel) archive)
1268                .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1269        } else {
1270            skipBytes(CFD_LOCATOR_OFFSET);
1271            wordBbuf.rewind();
1272            IOUtils.readFully(archive, wordBbuf);
1273            centralDirectoryStartDiskNumber = 0;
1274            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1275            archive.position(centralDirectoryStartRelativeOffset);
1276        }
1277    }
1278
1279    /**
1280     * Searches for the and positions the stream at the start of the
1281     * &quot;End of central dir record&quot;.
1282     */
1283    private void positionAtEndOfCentralDirectoryRecord()
1284        throws IOException {
1285        final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
1286                                             ZipArchiveOutputStream.EOCD_SIG);
1287        if (!found) {
1288            throw new ZipException("Archive is not a ZIP archive");
1289        }
1290    }
1291
1292    /**
1293     * Searches the archive backwards from minDistance to maxDistance
1294     * for the given signature, positions the RandomaccessFile right
1295     * at the signature if it has been found.
1296     */
1297    private boolean tryToLocateSignature(final long minDistanceFromEnd,
1298                                         final long maxDistanceFromEnd,
1299                                         final byte[] sig) throws IOException {
1300        boolean found = false;
1301        long off = archive.size() - minDistanceFromEnd;
1302        final long stopSearching =
1303            Math.max(0L, archive.size() - maxDistanceFromEnd);
1304        if (off >= 0) {
1305            for (; off >= stopSearching; off--) {
1306                archive.position(off);
1307                try {
1308                    wordBbuf.rewind();
1309                    IOUtils.readFully(archive, wordBbuf);
1310                    wordBbuf.flip();
1311                } catch (final EOFException ex) { // NOSONAR
1312                    break;
1313                }
1314                int curr = wordBbuf.get();
1315                if (curr == sig[POS_0]) {
1316                    curr = wordBbuf.get();
1317                    if (curr == sig[POS_1]) {
1318                        curr = wordBbuf.get();
1319                        if (curr == sig[POS_2]) {
1320                            curr = wordBbuf.get();
1321                            if (curr == sig[POS_3]) {
1322                                found = true;
1323                                break;
1324                            }
1325                        }
1326                    }
1327                }
1328            }
1329        }
1330        if (found) {
1331            archive.position(off);
1332        }
1333        return found;
1334    }
1335
1336    /**
1337     * Skips the given number of bytes or throws an EOFException if
1338     * skipping failed.
1339     */
1340    private void skipBytes(final int count) throws IOException {
1341        final long currentPosition = archive.position();
1342        final long newPosition = currentPosition + count;
1343        if (newPosition > archive.size()) {
1344            throw new EOFException();
1345        }
1346        archive.position(newPosition);
1347    }
1348
1349    /**
1350     * Number of bytes in local file header up to the &quot;length of
1351     * file name&quot; entry.
1352     */
1353    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
1354        /* local file header signature     */ WORD
1355        /* version needed to extract       */ + SHORT
1356        /* general purpose bit flag        */ + SHORT
1357        /* compression method              */ + SHORT
1358        /* last mod file time              */ + SHORT
1359        /* last mod file date              */ + SHORT
1360        /* crc-32                          */ + WORD
1361        /* compressed size                 */ + WORD
1362        /* uncompressed size               */ + (long) WORD;
1363
1364    /**
1365     * Walks through all recorded entries and adds the data available
1366     * from the local file header.
1367     *
1368     * <p>Also records the offsets for the data to read from the
1369     * entries.</p>
1370     */
1371    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1372                                            entriesWithoutUTF8Flag)
1373        throws IOException {
1374        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1375            // entries is filled in populateFromCentralDirectory and
1376            // never modified
1377            final Entry ze = (Entry) zipArchiveEntry;
1378            final int[] lens = setDataOffset(ze);
1379            final int fileNameLen = lens[0];
1380            final int extraFieldLen = lens[1];
1381            skipBytes(fileNameLen);
1382            final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen);
1383            if (localExtraData.length < extraFieldLen) {
1384                throw new EOFException();
1385            }
1386            try {
1387                ze.setExtra(localExtraData);
1388            } catch (RuntimeException ex) {
1389                final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1390                z.initCause(ex);
1391                throw z;
1392            }
1393
1394            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1395                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1396                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1397                                                         nc.comment);
1398            }
1399        }
1400    }
1401
1402    private void fillNameMap() {
1403        entries.forEach(ze -> {
1404            // entries is filled in populateFromCentralDirectory and
1405            // never modified
1406            final String name = ze.getName();
1407            LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>());
1408            entriesOfThatName.addLast(ze);
1409        });
1410    }
1411
1412    private int[] setDataOffset(final ZipArchiveEntry ze) throws IOException {
1413        long offset = ze.getLocalHeaderOffset();
1414        if (isSplitZipArchive) {
1415            ((ZipSplitReadOnlySeekableByteChannel) archive)
1416                .position(ze.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1417            // the offset should be updated to the global offset
1418            offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH;
1419        } else {
1420            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1421        }
1422        wordBbuf.rewind();
1423        IOUtils.readFully(archive, wordBbuf);
1424        wordBbuf.flip();
1425        wordBbuf.get(shortBuf);
1426        final int fileNameLen = ZipShort.getValue(shortBuf);
1427        wordBbuf.get(shortBuf);
1428        final int extraFieldLen = ZipShort.getValue(shortBuf);
1429        ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1430                         + SHORT + SHORT + fileNameLen + extraFieldLen);
1431        if (ze.getDataOffset() + ze.getCompressedSize() > centralDirectoryStartOffset) {
1432            throw new IOException("data for " + ze.getName() + " overlaps with central directory.");
1433        }
1434        return new int[] { fileNameLen, extraFieldLen };
1435    }
1436
1437    private long getDataOffset(final ZipArchiveEntry ze) throws IOException {
1438        final long s = ze.getDataOffset();
1439        if (s == EntryStreamOffsets.OFFSET_UNKNOWN) {
1440            setDataOffset(ze);
1441            return ze.getDataOffset();
1442        }
1443        return s;
1444    }
1445
1446    /**
1447     * Checks whether the archive starts with a LFH.  If it doesn't,
1448     * it may be an empty archive.
1449     */
1450    private boolean startsWithLocalFileHeader() throws IOException {
1451        archive.position(0);
1452        wordBbuf.rewind();
1453        IOUtils.readFully(archive, wordBbuf);
1454        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1455    }
1456
1457    /**
1458     * Creates new BoundedInputStream, according to implementation of
1459     * underlying archive channel.
1460     */
1461    private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) {
1462        if (start < 0 || remaining < 0 || start + remaining < start) {
1463            throw new IllegalArgumentException("Corrupted archive, stream boundaries"
1464                + " are out of range");
1465        }
1466        return archive instanceof FileChannel ?
1467            new BoundedFileChannelInputStream(start, remaining) :
1468            new BoundedSeekableByteChannelInputStream(start, remaining, archive);
1469    }
1470
1471    /**
1472     * Lock-free implementation of BoundedInputStream. The
1473     * implementation uses positioned reads on the underlying archive
1474     * file channel and therefore performs significantly faster in
1475     * concurrent environment.
1476     */
1477    private class BoundedFileChannelInputStream extends BoundedArchiveInputStream {
1478        private final FileChannel archive;
1479
1480        BoundedFileChannelInputStream(final long start, final long remaining) {
1481            super(start, remaining);
1482            archive = (FileChannel) ZipFile.this.archive;
1483        }
1484
1485        @Override
1486        protected int read(final long pos, final ByteBuffer buf) throws IOException {
1487            final int read = archive.read(buf, pos);
1488            buf.flip();
1489            return read;
1490        }
1491    }
1492
1493    private static final class NameAndComment {
1494        private final byte[] name;
1495        private final byte[] comment;
1496        private NameAndComment(final byte[] name, final byte[] comment) {
1497            this.name = name;
1498            this.comment = comment;
1499        }
1500    }
1501
1502    /**
1503     * Compares two ZipArchiveEntries based on their offset within the archive.
1504     *
1505     * <p>Won't return any meaningful results if one of the entries
1506     * isn't part of the archive at all.</p>
1507     *
1508     * @since 1.1
1509     */
1510    private final Comparator<ZipArchiveEntry> offsetComparator =
1511        Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart)
1512            .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset);
1513
1514    /**
1515     * Extends ZipArchiveEntry to store the offset within the archive.
1516     */
1517    private static class Entry extends ZipArchiveEntry {
1518
1519        Entry() {
1520        }
1521
1522        @Override
1523        public int hashCode() {
1524            return 3 * super.hashCode()
1525                + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32);
1526        }
1527
1528        @Override
1529        public boolean equals(final Object other) {
1530            if (super.equals(other)) {
1531                // super.equals would return false if other were not an Entry
1532                final Entry otherEntry = (Entry) other;
1533                return getLocalHeaderOffset()
1534                        == otherEntry.getLocalHeaderOffset()
1535                    && super.getDataOffset()
1536                        == otherEntry.getDataOffset()
1537                    && super.getDiskNumberStart()
1538                        == otherEntry.getDiskNumberStart();
1539            }
1540            return false;
1541        }
1542    }
1543
1544    private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics {
1545        StoredStatisticsStream(final InputStream in) {
1546            super(in);
1547        }
1548
1549        @Override
1550        public long getCompressedCount() {
1551            return super.getBytesRead();
1552        }
1553
1554        @Override
1555        public long getUncompressedCount() {
1556            return getCompressedCount();
1557        }
1558    }
1559}