001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.zip;
019
020import java.io.BufferedInputStream;
021import java.io.ByteArrayInputStream;
022import java.io.Closeable;
023import java.io.EOFException;
024import java.io.File;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.SequenceInputStream;
028import java.nio.ByteBuffer;
029import java.nio.channels.FileChannel;
030import java.nio.channels.SeekableByteChannel;
031import java.nio.file.Files;
032import java.nio.file.Path;
033import java.nio.file.StandardOpenOption;
034import java.util.Arrays;
035import java.util.Collections;
036import java.util.Comparator;
037import java.util.EnumSet;
038import java.util.Enumeration;
039import java.util.HashMap;
040import java.util.LinkedList;
041import java.util.List;
042import java.util.Map;
043import java.util.zip.Inflater;
044import java.util.zip.ZipException;
045
046import org.apache.commons.compress.archivers.EntryStreamOffsets;
047import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
048import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
049import org.apache.commons.compress.utils.BoundedArchiveInputStream;
050import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
051import org.apache.commons.compress.utils.CountingInputStream;
052import org.apache.commons.compress.utils.IOUtils;
053import org.apache.commons.compress.utils.InputStreamStatistics;
054
055/**
056 * Replacement for {@code java.util.ZipFile}.
057 *
058 * <p>This class adds support for file name encodings other than UTF-8
059 * (which is required to work on ZIP files created by native ZIP tools
060 * and is able to skip a preamble like the one found in self
061 * extracting archives.  Furthermore it returns instances of
062 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry}
063 * instead of {@code java.util.zip.ZipEntry}.</p>
064 *
065 * <p>It doesn't extend {@code java.util.zip.ZipFile} as it would
066 * have to reimplement all methods anyway.  Like
067 * {@code java.util.ZipFile}, it uses SeekableByteChannel under the
068 * covers and supports compressed and uncompressed entries.  As of
069 * Apache Commons Compress 1.3 it also transparently supports Zip64
070 * extensions and thus individual entries and archives larger than 4
071 * GB or with more than 65536 entries.</p>
072 *
073 * <p>The method signatures mimic the ones of
074 * {@code java.util.zip.ZipFile}, with a couple of exceptions:
075 *
076 * <ul>
077 *   <li>There is no getName method.</li>
078 *   <li>entries has been renamed to getEntries.</li>
079 *   <li>getEntries and getEntry return
080 *   {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry}
081 *   instances.</li>
082 *   <li>close is allowed to throw IOException.</li>
083 * </ul>
084 */
085public class ZipFile implements Closeable {
086    /**
087     * Lock-free implementation of BoundedInputStream. The
088     * implementation uses positioned reads on the underlying archive
089     * file channel and therefore performs significantly faster in
090     * concurrent environment.
091     */
092    private class BoundedFileChannelInputStream extends BoundedArchiveInputStream {
093        private final FileChannel archive;
094
095        BoundedFileChannelInputStream(final long start, final long remaining) {
096            super(start, remaining);
097            archive = (FileChannel) ZipFile.this.archive;
098        }
099
100        @Override
101        protected int read(final long pos, final ByteBuffer buf) throws IOException {
102            final int read = archive.read(buf, pos);
103            buf.flip();
104            return read;
105        }
106    }
107    /**
108     * Extends ZipArchiveEntry to store the offset within the archive.
109     */
110    private static class Entry extends ZipArchiveEntry {
111
112        Entry() {
113        }
114
115        @Override
116        public boolean equals(final Object other) {
117            if (super.equals(other)) {
118                // super.equals would return false if other were not an Entry
119                final Entry otherEntry = (Entry) other;
120                return getLocalHeaderOffset()
121                        == otherEntry.getLocalHeaderOffset()
122                    && super.getDataOffset()
123                        == otherEntry.getDataOffset()
124                    && super.getDiskNumberStart()
125                        == otherEntry.getDiskNumberStart();
126            }
127            return false;
128        }
129
130        @Override
131        public int hashCode() {
132            return 3 * super.hashCode()
133                + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32);
134        }
135    }
136    private static final class NameAndComment {
137        private final byte[] name;
138        private final byte[] comment;
139        private NameAndComment(final byte[] name, final byte[] comment) {
140            this.name = name;
141            this.comment = comment;
142        }
143    }
144    private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics {
145        StoredStatisticsStream(final InputStream in) {
146            super(in);
147        }
148
149        @Override
150        public long getCompressedCount() {
151            return super.getBytesRead();
152        }
153
154        @Override
155        public long getUncompressedCount() {
156            return getCompressedCount();
157        }
158    }
159    private static final int HASH_SIZE = 509;
160    static final int NIBLET_MASK = 0x0f;
161    static final int BYTE_SHIFT = 8;
162    private static final int POS_0 = 0;
163
164    private static final int POS_1 = 1;
165
166    private static final int POS_2 = 2;
167
168    private static final int POS_3 = 3;
169
170    private static final byte[] ONE_ZERO_BYTE = new byte[1];
171
172    /**
173     * Length of a "central directory" entry structure without file
174     * name, extra fields or comment.
175     */
176    private static final int CFH_LEN =
177        /* version made by                 */ ZipConstants.SHORT
178        /* version needed to extract       */ + ZipConstants.SHORT
179        /* general purpose bit flag        */ + ZipConstants.SHORT
180        /* compression method              */ + ZipConstants.SHORT
181        /* last mod file time              */ + ZipConstants.SHORT
182        /* last mod file date              */ + ZipConstants.SHORT
183        /* crc-32                          */ + ZipConstants.WORD
184        /* compressed size                 */ + ZipConstants.WORD
185        /* uncompressed size               */ + ZipConstants.WORD
186        /* file name length                */ + ZipConstants. SHORT
187        /* extra field length              */ + ZipConstants.SHORT
188        /* file comment length             */ + ZipConstants.SHORT
189        /* disk number start               */ + ZipConstants.SHORT
190        /* internal file attributes        */ + ZipConstants.SHORT
191        /* external file attributes        */ + ZipConstants.WORD
192        /* relative offset of local header */ + ZipConstants.WORD;
193
194    private static final long CFH_SIG =
195        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
196
197    /**
198     * Length of the "End of central directory record" - which is
199     * supposed to be the last structure of the archive - without file
200     * comment.
201     */
202    static final int MIN_EOCD_SIZE =
203        /* end of central dir signature    */ ZipConstants.WORD
204        /* number of this disk             */ + ZipConstants.SHORT
205        /* number of the disk with the     */
206        /* start of the central directory  */ + ZipConstants.SHORT
207        /* total number of entries in      */
208        /* the central dir on this disk    */ + ZipConstants.SHORT
209        /* total number of entries in      */
210        /* the central dir                 */ + ZipConstants.SHORT
211        /* size of the central directory   */ + ZipConstants.WORD
212        /* offset of start of central      */
213        /* directory with respect to       */
214        /* the starting disk number        */ + ZipConstants.WORD
215        /* ZIP file comment length         */ + ZipConstants.SHORT;
216
217    /**
218     * Maximum length of the "End of central directory record" with a
219     * file comment.
220     */
221    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
222        /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT;
223
224    /**
225     * Offset of the field that holds the location of the length of
226     * the central directory inside the "End of central directory
227     * record" relative to the start of the "End of central directory
228     * record".
229     */
230    private static final int CFD_LENGTH_OFFSET =
231        /* end of central dir signature    */ ZipConstants.WORD
232        /* number of this disk             */ + ZipConstants.SHORT
233        /* number of the disk with the     */
234        /* start of the central directory  */ + ZipConstants.SHORT
235        /* total number of entries in      */
236        /* the central dir on this disk    */ + ZipConstants.SHORT
237        /* total number of entries in      */
238        /* the central dir                 */ + ZipConstants.SHORT;
239
240    /**
241     * Offset of the field that holds the disk number of the first
242     * central directory entry inside the "End of central directory
243     * record" relative to the start of the "End of central directory
244     * record".
245     */
246    private static final int CFD_DISK_OFFSET =
247            /* end of central dir signature    */ ZipConstants.WORD
248            /* number of this disk             */ + ZipConstants.SHORT;
249    /**
250     * Offset of the field that holds the location of the first
251     * central directory entry inside the "End of central directory
252     * record" relative to the "number of the disk with the start
253     * of the central directory".
254     */
255    private static final int CFD_LOCATOR_RELATIVE_OFFSET =
256            /* total number of entries in      */
257            /* the central dir on this disk    */ + ZipConstants.SHORT
258            /* total number of entries in      */
259            /* the central dir                 */ + ZipConstants.SHORT
260            /* size of the central directory   */ + ZipConstants.WORD;
261    /**
262     * Length of the "Zip64 end of central directory locator" - which
263     * should be right in front of the "end of central directory
264     * record" if one is present at all.
265     */
266    private static final int ZIP64_EOCDL_LENGTH =
267        /* zip64 end of central dir locator sig */ ZipConstants.WORD
268        /* number of the disk with the start    */
269        /* start of the zip64 end of            */
270        /* central directory                    */ + ZipConstants.WORD
271        /* relative offset of the zip64         */
272        /* end of central directory record      */ + ZipConstants.DWORD
273        /* total number of disks                */ + ZipConstants.WORD;
274    /**
275     * Offset of the field that holds the location of the "Zip64 end
276     * of central directory record" inside the "Zip64 end of central
277     * directory locator" relative to the start of the "Zip64 end of
278     * central directory locator".
279     */
280    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
281        /* zip64 end of central dir locator sig */ ZipConstants.WORD
282        /* number of the disk with the start    */
283        /* start of the zip64 end of            */
284        /* central directory                    */ + ZipConstants.WORD;
285    /**
286     * Offset of the field that holds the location of the first
287     * central directory entry inside the "Zip64 end of central
288     * directory record" relative to the start of the "Zip64 end of
289     * central directory record".
290     */
291    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
292        /* zip64 end of central dir        */
293        /* signature                       */ ZipConstants.WORD
294        /* size of zip64 end of central    */
295        /* directory record                */ + ZipConstants.DWORD
296        /* version made by                 */ + ZipConstants.SHORT
297        /* version needed to extract       */ + ZipConstants.SHORT
298        /* number of this disk             */ + ZipConstants.WORD
299        /* number of the disk with the     */
300        /* start of the central directory  */ + ZipConstants.WORD
301        /* total number of entries in the  */
302        /* central directory on this disk  */ + ZipConstants.DWORD
303        /* total number of entries in the  */
304        /* central directory               */ + ZipConstants.DWORD
305        /* size of the central directory   */ + ZipConstants.DWORD;
306    /**
307     * Offset of the field that holds the disk number of the first
308     * central directory entry inside the "Zip64 end of central
309     * directory record" relative to the start of the "Zip64 end of
310     * central directory record".
311     */
312    private static final int ZIP64_EOCD_CFD_DISK_OFFSET =
313            /* zip64 end of central dir        */
314            /* signature                       */ ZipConstants.WORD
315            /* size of zip64 end of central    */
316            /* directory record                */ + ZipConstants.DWORD
317            /* version made by                 */ + ZipConstants.SHORT
318            /* version needed to extract       */ + ZipConstants.SHORT
319            /* number of this disk             */ + ZipConstants.WORD;
320    /**
321     * Offset of the field that holds the location of the first
322     * central directory entry inside the "Zip64 end of central
323     * directory record" relative to the "number of the disk
324     * with the start of the central directory".
325     */
326    private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET =
327            /* total number of entries in the  */
328            /* central directory on this disk  */ ZipConstants.DWORD
329            /* total number of entries in the  */
330            /* central directory               */ + ZipConstants.DWORD
331            /* size of the central directory   */ + ZipConstants.DWORD;
332    /**
333     * Number of bytes in local file header up to the &quot;length of
334     * file name&quot; entry.
335     */
336    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
337        /* local file header signature     */ ZipConstants.WORD
338        /* version needed to extract       */ + ZipConstants.SHORT
339        /* general purpose bit flag        */ + ZipConstants.SHORT
340        /* compression method              */ + ZipConstants.SHORT
341        /* last mod file time              */ + ZipConstants.SHORT
342        /* last mod file date              */ + ZipConstants.SHORT
343        /* crc-32                          */ + ZipConstants.WORD
344        /* compressed size                 */ + ZipConstants.WORD
345        /* uncompressed size               */ + (long) ZipConstants.WORD;
346
347    /**
348     * Compares two ZipArchiveEntries based on their offset within the archive.
349     *
350     * <p>Won't return any meaningful results if one of the entries
351     * isn't part of the archive at all.</p>
352     *
353     * @since 1.1
354     */
355    private static final Comparator<ZipArchiveEntry> offsetComparator =
356        Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart)
357            .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset);
358
359    /**
360     * Closes a ZIP file quietly; throwing no IOException, dooes nothing
361     * on null input.
362     * @param zipFile file to close, can be null
363     */
364    public static void closeQuietly(final ZipFile zipFile) {
365        IOUtils.closeQuietly(zipFile);
366    }
367
368    /**
369     * List of entries in the order they appear inside the central
370     * directory.
371     */
372    private final List<ZipArchiveEntry> entries = new LinkedList<>();
373
374    /**
375     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
376     */
377    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE);
378
379    /**
380     * The encoding to use for file names and the file comment.
381     *
382     * <p>For a list of possible values see <a
383     * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
384     * Defaults to UTF-8.</p>
385     */
386    private final String encoding;
387
388    /**
389     * The ZIP encoding to use for file names and the file comment.
390     */
391    private final ZipEncoding zipEncoding;
392
393    /**
394     * File name of actual source.
395     */
396    private final String archiveName;
397
398    /**
399     * The actual data source.
400     */
401    private final SeekableByteChannel archive;
402
403    /**
404     * Whether to look for and use Unicode extra fields.
405     */
406    private final boolean useUnicodeExtraFields;
407
408    /**
409     * Whether the file is closed.
410     */
411    private volatile boolean closed = true;
412
413    /**
414     * Whether the ZIP archive is a split ZIP archive
415     */
416    private final boolean isSplitZipArchive;
417
418    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
419    private final byte[] dwordBuf = new byte[ZipConstants.DWORD];
420
421    private final byte[] wordBuf = new byte[ZipConstants.WORD];
422
423    private final byte[] cfhBuf = new byte[CFH_LEN];
424
425    private final byte[] shortBuf = new byte[ZipConstants.SHORT];
426
427    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
428
429    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
430
431    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
432
433    private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf);
434
435    private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset;
436
437    private long centralDirectoryStartOffset;
438
439    private long firstLocalFileHeaderOffset;
440
441    /**
442     * Opens the given file for reading, assuming "UTF8" for file names.
443     *
444     * @param f the archive.
445     *
446     * @throws IOException if an error occurs while reading the file.
447     */
448    public ZipFile(final File f) throws IOException {
449        this(f, ZipEncodingHelper.UTF8);
450    }
451
452    /**
453     * Opens the given file for reading, assuming the specified
454     * encoding for file names and scanning for Unicode extra fields.
455     *
456     * @param f the archive.
457     * @param encoding the encoding to use for file names, use null
458     * for the platform's default encoding
459     *
460     * @throws IOException if an error occurs while reading the file.
461     */
462    public ZipFile(final File f, final String encoding) throws IOException {
463        this(f.toPath(), encoding, true);
464    }
465
466    /**
467     * Opens the given file for reading, assuming the specified
468     * encoding for file names.
469     *
470     * @param f the archive.
471     * @param encoding the encoding to use for file names, use null
472     * for the platform's default encoding
473     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
474     * Extra Fields (if present) to set the file names.
475     *
476     * @throws IOException if an error occurs while reading the file.
477     */
478    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
479        throws IOException {
480        this(f.toPath(), encoding, useUnicodeExtraFields, false);
481    }
482
483    /**
484     * Opens the given file for reading, assuming the specified
485     * encoding for file names.
486     *
487     * <p>By default the central directory record and all local file headers of the archive will be read immediately
488     * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
489     * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
490     * may contain information not present inside of the central directory which will not be available when the argument
491     * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
492     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p>
493     *
494     * @param f the archive.
495     * @param encoding the encoding to use for file names, use null
496     * for the platform's default encoding
497     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
498     * Extra Fields (if present) to set the file names.
499     * @param ignoreLocalFileHeader whether to ignore information
500     * stored inside the local file header (see the notes in this method's javadoc)
501     *
502     * @throws IOException if an error occurs while reading the file.
503     * @since 1.19
504     */
505    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields,
506                   final boolean ignoreLocalFileHeader)
507        throws IOException {
508        this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
509             f.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
510    }
511
512    /**
513     * Opens the given path for reading, assuming "UTF8" for file names.
514     *
515     * @param path path to the archive.
516     * @throws IOException if an error occurs while reading the file.
517     * @since 1.22
518     */
519    public ZipFile(final Path path) throws IOException {
520        this(path, ZipEncodingHelper.UTF8);
521    }
522
523    /**
524     * Opens the given path for reading, assuming the specified
525     * encoding for file names and scanning for Unicode extra fields.
526     *
527     * @param path path to the archive.
528     * @param encoding the encoding to use for file names, use null
529     * for the platform's default encoding
530     * @throws IOException if an error occurs while reading the file.
531     * @since 1.22
532     */
533    public ZipFile(final Path path, final String encoding) throws IOException {
534        this(path, encoding, true);
535    }
536
537
538    /**
539     * Opens the given path for reading, assuming the specified
540     * encoding for file names.
541     *
542     * @param path path to the archive.
543     * @param encoding the encoding to use for file names, use null
544     * for the platform's default encoding
545     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
546     * Extra Fields (if present) to set the file names.
547     * @throws IOException if an error occurs while reading the file.
548     * @since 1.22
549     */
550    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields)
551            throws IOException {
552        this(path, encoding, useUnicodeExtraFields, false);
553    }
554
555    /**
556     * Opens the given path for reading, assuming the specified
557     * encoding for file names.
558     * <p>By default the central directory record and all local file headers of the archive will be read immediately
559     * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
560     * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
561     * may contain information not present inside of the central directory which will not be available when the argument
562     * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
563     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p>
564     *
565     * @param path path to the archive.
566     * @param encoding the encoding to use for file names, use null
567     * for the platform's default encoding
568     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
569     * Extra Fields (if present) to set the file names.
570     * @param ignoreLocalFileHeader whether to ignore information
571     * stored inside the local file header (see the notes in this method's javadoc)
572     * @throws IOException if an error occurs while reading the file.
573     * @since 1.22
574     */
575    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields,
576                   final boolean ignoreLocalFileHeader)
577            throws IOException {
578        this(Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)),
579                path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields,
580                true, ignoreLocalFileHeader);
581    }
582
583    /**
584     * Opens the given channel for reading, assuming "UTF8" for file names.
585     *
586     * <p>{@link
587     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
588     * allows you to read from an in-memory archive.</p>
589     *
590     * @param channel the archive.
591     *
592     * @throws IOException if an error occurs while reading the file.
593     * @since 1.13
594     */
595    public ZipFile(final SeekableByteChannel channel)
596            throws IOException {
597        this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
598    }
599
600    /**
601     * Opens the given channel for reading, assuming the specified
602     * encoding for file names.
603     *
604     * <p>{@link
605     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
606     * allows you to read from an in-memory archive.</p>
607     *
608     * @param channel the archive.
609     * @param encoding the encoding to use for file names, use null
610     * for the platform's default encoding
611     *
612     * @throws IOException if an error occurs while reading the file.
613     * @since 1.13
614     */
615    public ZipFile(final SeekableByteChannel channel, final String encoding)
616        throws IOException {
617        this(channel, "unknown archive", encoding, true);
618    }
619
620    /**
621     * Opens the given channel for reading, assuming the specified
622     * encoding for file names.
623     *
624     * <p>{@link
625     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
626     * allows you to read from an in-memory archive.</p>
627     *
628     * @param channel the archive.
629     * @param archiveName name of the archive, used for error messages only.
630     * @param encoding the encoding to use for file names, use null
631     * for the platform's default encoding
632     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
633     * Extra Fields (if present) to set the file names.
634     *
635     * @throws IOException if an error occurs while reading the file.
636     * @since 1.13
637     */
638    public ZipFile(final SeekableByteChannel channel, final String archiveName,
639                   final String encoding, final boolean useUnicodeExtraFields)
640        throws IOException {
641        this(channel, archiveName, encoding, useUnicodeExtraFields, false, false);
642    }
643
644    /**
645     * Opens the given channel for reading, assuming the specified
646     * encoding for file names.
647     *
648     * <p>{@link
649     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
650     * allows you to read from an in-memory archive.</p>
651     *
652     * <p>By default the central directory record and all local file headers of the archive will be read immediately
653     * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
654     * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
655     * may contain information not present inside of the central directory which will not be available when the argument
656     * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
657     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p>
658     *
659     * @param channel the archive.
660     * @param archiveName name of the archive, used for error messages only.
661     * @param encoding the encoding to use for file names, use null
662     * for the platform's default encoding
663     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
664     * Extra Fields (if present) to set the file names.
665     * @param ignoreLocalFileHeader whether to ignore information
666     * stored inside the local file header (see the notes in this method's javadoc)
667     *
668     * @throws IOException if an error occurs while reading the file.
669     * @since 1.19
670     */
671    public ZipFile(final SeekableByteChannel channel, final String archiveName,
672                   final String encoding, final boolean useUnicodeExtraFields,
673                   final boolean ignoreLocalFileHeader)
674        throws IOException {
675        this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader);
676    }
677
678    private ZipFile(final SeekableByteChannel channel, final String archiveName,
679                    final String encoding, final boolean useUnicodeExtraFields,
680                    final boolean closeOnError, final boolean ignoreLocalFileHeader)
681        throws IOException {
682        isSplitZipArchive = (channel instanceof ZipSplitReadOnlySeekableByteChannel);
683
684        this.archiveName = archiveName;
685        this.encoding = encoding;
686        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
687        this.useUnicodeExtraFields = useUnicodeExtraFields;
688        archive = channel;
689        boolean success = false;
690        try {
691            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
692                populateFromCentralDirectory();
693            if (!ignoreLocalFileHeader) {
694                resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
695            }
696            fillNameMap();
697            success = true;
698        } catch (final IOException e) {
699            throw new IOException("Error on ZipFile " + archiveName, e);
700        } finally {
701            closed = !success;
702            if (!success && closeOnError) {
703                IOUtils.closeQuietly(archive);
704            }
705        }
706    }
707
708    /**
709     * Opens the given file for reading, assuming "UTF8".
710     *
711     * @param name name of the archive.
712     *
713     * @throws IOException if an error occurs while reading the file.
714     */
715    public ZipFile(final String name) throws IOException {
716        this(new File(name).toPath(), ZipEncodingHelper.UTF8);
717    }
718
719    /**
720     * Opens the given file for reading, assuming the specified
721     * encoding for file names, scanning unicode extra fields.
722     *
723     * @param name name of the archive.
724     * @param encoding the encoding to use for file names, use null
725     * for the platform's default encoding
726     *
727     * @throws IOException if an error occurs while reading the file.
728     */
729    public ZipFile(final String name, final String encoding) throws IOException {
730        this(new File(name).toPath(), encoding, true);
731    }
732
733    /**
734     * Whether this class is able to read the given entry.
735     *
736     * <p>May return false if it is set up to use encryption or a
737     * compression method that hasn't been implemented yet.</p>
738     * @since 1.1
739     * @param ze the entry
740     * @return whether this class is able to read the given entry.
741     */
742    public boolean canReadEntryData(final ZipArchiveEntry ze) {
743        return ZipUtil.canHandleEntryData(ze);
744    }
745
746    /**
747     * Closes the archive.
748     * @throws IOException if an error occurs closing the archive.
749     */
750    @Override
751    public void close() throws IOException {
752        // this flag is only written here and read in finalize() which
753        // can never be run in parallel.
754        // no synchronization needed.
755        closed = true;
756
757        archive.close();
758    }
759
760    /**
761     * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream.
762     * Compression and all other attributes will be as in this file.
763     * <p>This method transfers entries based on the central directory of the ZIP file.</p>
764     *
765     * @param target The zipArchiveOutputStream to write the entries to
766     * @param predicate A predicate that selects which entries to write
767     * @throws IOException on error
768     */
769    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
770            throws IOException {
771        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
772        while (src.hasMoreElements()) {
773            final ZipArchiveEntry entry = src.nextElement();
774            if (predicate.test( entry)) {
775                target.addRawArchiveEntry(entry, getRawInputStream(entry));
776            }
777        }
778    }
779
780    /**
781     * Creates new BoundedInputStream, according to implementation of
782     * underlying archive channel.
783     */
784    private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) {
785        if (start < 0 || remaining < 0 || start + remaining < start) {
786            throw new IllegalArgumentException("Corrupted archive, stream boundaries"
787                + " are out of range");
788        }
789        return archive instanceof FileChannel ?
790            new BoundedFileChannelInputStream(start, remaining) :
791            new BoundedSeekableByteChannelInputStream(start, remaining, archive);
792    }
793
794    private void fillNameMap() {
795        entries.forEach(ze -> {
796            // entries is filled in populateFromCentralDirectory and
797            // never modified
798            final String name = ze.getName();
799            final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>());
800            entriesOfThatName.addLast(ze);
801        });
802    }
803
804    /**
805     * Ensures that the close method of this ZIP file is called when
806     * there are no more references to it.
807     * @see #close()
808     */
809    @Override
810    protected void finalize() throws Throwable {
811        try {
812            if (!closed) {
813                close();
814            }
815        } finally {
816            super.finalize();
817        }
818    }
819
820    /**
821     * Gets an InputStream for reading the content before the first local file header.
822     *
823     * @return null if there is no content before the first local file header.
824     * Otherwise returns a stream to read the content before the first local file header.
825     * @since 1.23
826     */
827    public InputStream getContentBeforeFirstLocalFileHeader() {
828        return firstLocalFileHeaderOffset == 0
829                ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset);
830    }
831
832    private long getDataOffset(final ZipArchiveEntry ze) throws IOException {
833        final long s = ze.getDataOffset();
834        if (s == EntryStreamOffsets.OFFSET_UNKNOWN) {
835            setDataOffset(ze);
836            return ze.getDataOffset();
837        }
838        return s;
839    }
840
841    /**
842     * Gets the encoding to use for file names and the file comment.
843     *
844     * @return null if using the platform's default character encoding.
845     */
846    public String getEncoding() {
847        return encoding;
848    }
849
850    /**
851     * Gets all entries.
852     *
853     * <p>Entries will be returned in the same order they appear
854     * within the archive's central directory.</p>
855     *
856     * @return all entries as {@link ZipArchiveEntry} instances
857     */
858    public Enumeration<ZipArchiveEntry> getEntries() {
859        return Collections.enumeration(entries);
860    }
861
862    /**
863     * Gets all named entries in the same order they appear within
864     * the archive's central directory.
865     *
866     * @param name name of the entry.
867     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
868     * given name
869     * @since 1.6
870     */
871    public Iterable<ZipArchiveEntry> getEntries(final String name) {
872        final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
873        return entriesOfThatName != null ? entriesOfThatName
874            : Collections.emptyList();
875    }
876
877    /**
878     * Gets all entries in physical order.
879     *
880     * <p>Entries will be returned in the same order their contents
881     * appear within the archive.</p>
882     *
883     * @return all entries as {@link ZipArchiveEntry} instances
884     *
885     * @since 1.1
886     */
887    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
888        final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY);
889        Arrays.sort(allEntries, offsetComparator);
890        return Collections.enumeration(Arrays.asList(allEntries));
891    }
892
893    /**
894     * Gets all named entries in the same order their contents
895     * appear within the archive.
896     *
897     * @param name name of the entry.
898     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
899     * given name
900     * @since 1.6
901     */
902    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
903        ZipArchiveEntry[] entriesOfThatName = ZipArchiveEntry.EMPTY_ARRAY;
904        final LinkedList<ZipArchiveEntry> linkedList = nameMap.get(name);
905        if (linkedList != null) {
906            entriesOfThatName = linkedList.toArray(entriesOfThatName);
907            Arrays.sort(entriesOfThatName, offsetComparator);
908        }
909        return Arrays.asList(entriesOfThatName);
910    }
911
912    /**
913     * Gets a named entry or {@code null} if no entry by
914     * that name exists.
915     *
916     * <p>If multiple entries with the same name exist the first entry
917     * in the archive's central directory by that name is
918     * returned.</p>
919     *
920     * @param name name of the entry.
921     * @return the ZipArchiveEntry corresponding to the given name - or
922     * {@code null} if not present.
923     */
924    public ZipArchiveEntry getEntry(final String name) {
925        final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
926        return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
927    }
928
929    /**
930     * Gets the offset of the first local file header in the file.
931     *
932     * @return the length of the content before the first local file header
933     * @since 1.23
934     */
935    public long getFirstLocalFileHeaderOffset() {
936        return firstLocalFileHeaderOffset;
937    }
938
939    /**
940     * Gets an InputStream for reading the contents of the given entry.
941     *
942     * @param zipEntry the entry to get the stream for.
943     * @return a stream to read the entry from. The returned stream
944     * implements {@link InputStreamStatistics}.
945     * @throws IOException if unable to create an input stream from the zipEntry.
946     */
947    public InputStream getInputStream(final ZipArchiveEntry zipEntry)
948        throws IOException {
949        if (!(zipEntry instanceof Entry)) {
950            return null;
951        }
952        // cast validity is checked just above
953        ZipUtil.checkRequestedFeatures(zipEntry);
954
955        // doesn't get closed if the method is not supported - which
956        // should never happen because of the checkRequestedFeatures
957        // call above
958        final InputStream is = new BufferedInputStream(getRawInputStream(zipEntry)); //NOSONAR
959        switch (ZipMethod.getMethodByCode(zipEntry.getMethod())) {
960            case STORED:
961                return new StoredStatisticsStream(is);
962            case UNSHRINKING:
963                return new UnshrinkingInputStream(is);
964            case IMPLODING:
965                try {
966                    return new ExplodingInputStream(zipEntry.getGeneralPurposeBit().getSlidingDictionarySize(),
967                            zipEntry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
968                } catch (final IllegalArgumentException ex) {
969                    throw new IOException("bad IMPLODE data", ex);
970                }
971            case DEFLATED:
972                final Inflater inflater = new Inflater(true);
973                // Inflater with nowrap=true has this odd contract for a zero padding
974                // byte following the data stream; this used to be zlib's requirement
975                // and has been fixed a long time ago, but the contract persists so
976                // we comply.
977                // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
978                return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)),
979                    inflater) {
980                    @Override
981                    public void close() throws IOException {
982                        try {
983                            super.close();
984                        } finally {
985                            inflater.end();
986                        }
987                    }
988                };
989            case BZIP2:
990                return new BZip2CompressorInputStream(is);
991            case ENHANCED_DEFLATED:
992                return new Deflate64CompressorInputStream(is);
993            case AES_ENCRYPTED:
994            case EXPANDING_LEVEL_1:
995            case EXPANDING_LEVEL_2:
996            case EXPANDING_LEVEL_3:
997            case EXPANDING_LEVEL_4:
998            case JPEG:
999            case LZMA:
1000            case PKWARE_IMPLODING:
1001            case PPMD:
1002            case TOKENIZATION:
1003            case UNKNOWN:
1004            case WAVPACK:
1005            case XZ:
1006            default:
1007                throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(zipEntry.getMethod()), zipEntry);
1008        }
1009    }
1010
1011    /**
1012     * Gets the raw stream of the archive entry (compressed form).
1013     *
1014     * <p>This method does not relate to how/if we understand the payload in the
1015     * stream, since we really only intend to move it on to somewhere else.</p>
1016     *
1017     * <p>Since version 1.22, this method will make an attempt to read the entry's data
1018     * stream offset, even if the {@code ignoreLocalFileHeader} parameter was {@code true}
1019     * in the constructor. An IOException can also be thrown from the body of the method
1020     * if this lookup fails for some reason.</p>
1021     *
1022     * @param ze The entry to get the stream for
1023     * @return The raw input stream containing (possibly) compressed data.
1024     * @since 1.11
1025     * @throws IOException if there is a problem reading data offset (added in version 1.22).
1026     */
1027    public InputStream getRawInputStream(final ZipArchiveEntry ze) throws IOException {
1028        if (!(ze instanceof Entry)) {
1029            return null;
1030        }
1031
1032        final long start = getDataOffset(ze);
1033        if (start == EntryStreamOffsets.OFFSET_UNKNOWN) {
1034            return null;
1035        }
1036        return createBoundedInputStream(start, ze.getCompressedSize());
1037    }
1038
1039    /**
1040     * Gets the entry's content as a String if isUnixSymlink()
1041     * returns true for it, otherwise returns null.
1042     * <p>This method assumes the symbolic link's file name uses the
1043     * same encoding that as been specified for this ZipFile.</p>
1044     *
1045     * @param entry ZipArchiveEntry object that represents the symbolic link
1046     * @return entry's content as a String
1047     * @throws IOException problem with content's input stream
1048     * @since 1.5
1049     */
1050    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
1051        if (entry != null && entry.isUnixSymlink()) {
1052            try (InputStream in = getInputStream(entry)) {
1053                return zipEncoding.decode(IOUtils.toByteArray(in));
1054            }
1055        }
1056        return null;
1057    }
1058
1059    /**
1060     * Reads the central directory of the given archive and populates
1061     * the internal tables with ZipArchiveEntry instances.
1062     *
1063     * <p>The ZipArchiveEntrys will know all data that can be obtained from
1064     * the central directory alone, but not the data that requires the
1065     * local file header or additional data to be read.</p>
1066     *
1067     * @return a map of zipentries that didn't have the language
1068     * encoding flag set when read.
1069     */
1070    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
1071        throws IOException {
1072        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
1073            new HashMap<>();
1074
1075        positionAtCentralDirectory();
1076        centralDirectoryStartOffset = archive.position();
1077
1078        wordBbuf.rewind();
1079        IOUtils.readFully(archive, wordBbuf);
1080        long sig = ZipLong.getValue(wordBuf);
1081
1082        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
1083            throw new IOException("Central directory is empty, can't expand"
1084                                  + " corrupt archive.");
1085        }
1086
1087        while (sig == CFH_SIG) {
1088            readCentralDirectoryEntry(noUTF8Flag);
1089            wordBbuf.rewind();
1090            IOUtils.readFully(archive, wordBbuf);
1091            sig = ZipLong.getValue(wordBuf);
1092        }
1093        return noUTF8Flag;
1094    }
1095
1096    /**
1097     * Searches for either the &quot;Zip64 end of central directory
1098     * locator&quot; or the &quot;End of central dir record&quot;, parses
1099     * it and positions the stream at the first central directory
1100     * record.
1101     */
1102    private void positionAtCentralDirectory()
1103        throws IOException {
1104        positionAtEndOfCentralDirectoryRecord();
1105        boolean found = false;
1106        final boolean searchedForZip64EOCD =
1107            archive.position() > ZIP64_EOCDL_LENGTH;
1108        if (searchedForZip64EOCD) {
1109            archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
1110            wordBbuf.rewind();
1111            IOUtils.readFully(archive, wordBbuf);
1112            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
1113                                  wordBuf);
1114        }
1115        if (!found) {
1116            // not a ZIP64 archive
1117            if (searchedForZip64EOCD) {
1118                skipBytes(ZIP64_EOCDL_LENGTH - ZipConstants.WORD);
1119            }
1120            positionAtCentralDirectory32();
1121        } else {
1122            positionAtCentralDirectory64();
1123        }
1124    }
1125
1126    /**
1127     * Parses the &quot;End of central dir record&quot; and positions
1128     * the stream at the first central directory record.
1129     *
1130     * Expects stream to be positioned at the beginning of the
1131     * &quot;End of central dir record&quot;.
1132     */
1133    private void positionAtCentralDirectory32()
1134        throws IOException {
1135        final long endOfCentralDirectoryRecordOffset = archive.position();
1136        if (isSplitZipArchive) {
1137            skipBytes(CFD_DISK_OFFSET);
1138            shortBbuf.rewind();
1139            IOUtils.readFully(archive, shortBbuf);
1140            centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf);
1141
1142            skipBytes(CFD_LOCATOR_RELATIVE_OFFSET);
1143
1144            wordBbuf.rewind();
1145            IOUtils.readFully(archive, wordBbuf);
1146            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1147            ((ZipSplitReadOnlySeekableByteChannel) archive)
1148                .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1149        } else {
1150            skipBytes(CFD_LENGTH_OFFSET);
1151            wordBbuf.rewind();
1152            IOUtils.readFully(archive, wordBbuf);
1153            final long centralDirectoryLength = ZipLong.getValue(wordBuf);
1154
1155            wordBbuf.rewind();
1156            IOUtils.readFully(archive, wordBbuf);
1157            centralDirectoryStartDiskNumber = 0;
1158            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1159
1160            firstLocalFileHeaderOffset = Long.max(
1161                    endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset,
1162                    0L);
1163            archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset);
1164        }
1165    }
1166
1167    /**
1168     * Parses the &quot;Zip64 end of central directory locator&quot;,
1169     * finds the &quot;Zip64 end of central directory record&quot; using the
1170     * parsed information, parses that and positions the stream at the
1171     * first central directory record.
1172     *
1173     * Expects stream to be positioned right behind the &quot;Zip64
1174     * end of central directory locator&quot;'s signature.
1175     */
1176    private void positionAtCentralDirectory64()
1177        throws IOException {
1178        if (isSplitZipArchive) {
1179            wordBbuf.rewind();
1180            IOUtils.readFully(archive, wordBbuf);
1181            final long diskNumberOfEOCD = ZipLong.getValue(wordBuf);
1182
1183            dwordBbuf.rewind();
1184            IOUtils.readFully(archive, dwordBbuf);
1185            final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf);
1186            ((ZipSplitReadOnlySeekableByteChannel) archive)
1187                .position(diskNumberOfEOCD, relativeOffsetOfEOCD);
1188        } else {
1189            skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
1190                    - ZipConstants.WORD /* signature has already been read */);
1191            dwordBbuf.rewind();
1192            IOUtils.readFully(archive, dwordBbuf);
1193            archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
1194        }
1195
1196        wordBbuf.rewind();
1197        IOUtils.readFully(archive, wordBbuf);
1198        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
1199            throw new ZipException("Archive's ZIP64 end of central "
1200                                   + "directory locator is corrupt.");
1201        }
1202
1203        if (isSplitZipArchive) {
1204            skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET
1205                    - ZipConstants.WORD /* signature has already been read */);
1206            wordBbuf.rewind();
1207            IOUtils.readFully(archive, wordBbuf);
1208            centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf);
1209
1210            skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET);
1211
1212            dwordBbuf.rewind();
1213            IOUtils.readFully(archive, dwordBbuf);
1214            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1215            ((ZipSplitReadOnlySeekableByteChannel) archive)
1216                .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1217        } else {
1218            skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
1219                    - ZipConstants.WORD /* signature has already been read */);
1220            dwordBbuf.rewind();
1221            IOUtils.readFully(archive, dwordBbuf);
1222            centralDirectoryStartDiskNumber = 0;
1223            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1224            archive.position(centralDirectoryStartRelativeOffset);
1225        }
1226    }
1227
1228    /**
1229     * Searches for the and positions the stream at the start of the
1230     * &quot;End of central dir record&quot;.
1231     */
1232    private void positionAtEndOfCentralDirectoryRecord()
1233        throws IOException {
1234        final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
1235                                             ZipArchiveOutputStream.EOCD_SIG);
1236        if (!found) {
1237            throw new ZipException("Archive is not a ZIP archive");
1238        }
1239    }
1240
1241    /**
1242     * Reads an individual entry of the central directory, creats an
1243     * ZipArchiveEntry from it and adds it to the global maps.
1244     *
1245     * @param noUTF8Flag map used to collect entries that don't have
1246     * their UTF-8 flag set and whose name will be set by data read
1247     * from the local file header later.  The current entry may be
1248     * added to this map.
1249     */
1250    private void
1251        readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
1252        throws IOException {
1253        cfhBbuf.rewind();
1254        IOUtils.readFully(archive, cfhBbuf);
1255        int off = 0;
1256        final Entry ze = new Entry();
1257
1258        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
1259        off += ZipConstants.SHORT;
1260        ze.setVersionMadeBy(versionMadeBy);
1261        ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
1262
1263        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
1264        off += ZipConstants.SHORT; // version required
1265
1266        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
1267        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
1268        final ZipEncoding entryEncoding =
1269            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
1270        if (hasUTF8Flag) {
1271            ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
1272        }
1273        ze.setGeneralPurposeBit(gpFlag);
1274        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
1275
1276        off += ZipConstants.SHORT;
1277
1278        //noinspection MagicConstant
1279        ze.setMethod(ZipShort.getValue(cfhBuf, off));
1280        off += ZipConstants.SHORT;
1281
1282        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
1283        ze.setTime(time);
1284        off += ZipConstants.WORD;
1285
1286        ze.setCrc(ZipLong.getValue(cfhBuf, off));
1287        off += ZipConstants.WORD;
1288
1289        long size = ZipLong.getValue(cfhBuf, off);
1290        if (size < 0) {
1291            throw new IOException("broken archive, entry with negative compressed size");
1292        }
1293        ze.setCompressedSize(size);
1294        off += ZipConstants.WORD;
1295
1296        size = ZipLong.getValue(cfhBuf, off);
1297        if (size < 0) {
1298            throw new IOException("broken archive, entry with negative size");
1299        }
1300        ze.setSize(size);
1301        off += ZipConstants.WORD;
1302
1303        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
1304        off += ZipConstants.SHORT;
1305        if (fileNameLen < 0) {
1306            throw new IOException("broken archive, entry with negative fileNameLen");
1307        }
1308
1309        final int extraLen = ZipShort.getValue(cfhBuf, off);
1310        off += ZipConstants.SHORT;
1311        if (extraLen < 0) {
1312            throw new IOException("broken archive, entry with negative extraLen");
1313        }
1314
1315        final int commentLen = ZipShort.getValue(cfhBuf, off);
1316        off += ZipConstants.SHORT;
1317        if (commentLen < 0) {
1318            throw new IOException("broken archive, entry with negative commentLen");
1319        }
1320
1321        ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off));
1322        off += ZipConstants.SHORT;
1323
1324        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
1325        off += ZipConstants.SHORT;
1326
1327        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
1328        off += ZipConstants.WORD;
1329
1330        final byte[] fileName = IOUtils.readRange(archive, fileNameLen);
1331        if (fileName.length < fileNameLen) {
1332            throw new EOFException();
1333        }
1334        ze.setName(entryEncoding.decode(fileName), fileName);
1335
1336        // LFH offset,
1337        ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset);
1338        // data offset will be filled later
1339        entries.add(ze);
1340
1341        final byte[] cdExtraData = IOUtils.readRange(archive, extraLen);
1342        if (cdExtraData.length < extraLen) {
1343            throw new EOFException();
1344        }
1345        try {
1346            ze.setCentralDirectoryExtra(cdExtraData);
1347        } catch (final RuntimeException ex) {
1348            final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1349            z.initCause(ex);
1350            throw z;
1351        }
1352
1353        setSizesAndOffsetFromZip64Extra(ze);
1354        sanityCheckLFHOffset(ze);
1355
1356        final byte[] comment = IOUtils.readRange(archive, commentLen);
1357        if (comment.length < commentLen) {
1358            throw new EOFException();
1359        }
1360        ze.setComment(entryEncoding.decode(comment));
1361
1362        if (!hasUTF8Flag && useUnicodeExtraFields) {
1363            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
1364        }
1365
1366        ze.setStreamContiguous(true);
1367    }
1368
1369    /**
1370     * Walks through all recorded entries and adds the data available
1371     * from the local file header.
1372     *
1373     * <p>Also records the offsets for the data to read from the
1374     * entries.</p>
1375     */
1376    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1377                                            entriesWithoutUTF8Flag)
1378        throws IOException {
1379        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1380            // entries is filled in populateFromCentralDirectory and
1381            // never modified
1382            final Entry ze = (Entry) zipArchiveEntry;
1383            final int[] lens = setDataOffset(ze);
1384            final int fileNameLen = lens[0];
1385            final int extraFieldLen = lens[1];
1386            skipBytes(fileNameLen);
1387            final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen);
1388            if (localExtraData.length < extraFieldLen) {
1389                throw new EOFException();
1390            }
1391            try {
1392                ze.setExtra(localExtraData);
1393            } catch (final RuntimeException ex) {
1394                final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1395                z.initCause(ex);
1396                throw z;
1397            }
1398
1399            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1400                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1401                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1402                                                         nc.comment);
1403            }
1404        }
1405    }
1406
1407    private void sanityCheckLFHOffset(final ZipArchiveEntry ze) throws IOException {
1408        if (ze.getDiskNumberStart() < 0) {
1409            throw new IOException("broken archive, entry with negative disk number");
1410        }
1411        if (ze.getLocalHeaderOffset() < 0) {
1412            throw new IOException("broken archive, entry with negative local file header offset");
1413        }
1414        if (isSplitZipArchive) {
1415            if (ze.getDiskNumberStart() > centralDirectoryStartDiskNumber) {
1416                throw new IOException("local file header for " + ze.getName() + " starts on a later disk than central directory");
1417            }
1418            if (ze.getDiskNumberStart() == centralDirectoryStartDiskNumber
1419                && ze.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) {
1420                throw new IOException("local file header for " + ze.getName() + " starts after central directory");
1421            }
1422        } else if (ze.getLocalHeaderOffset() > centralDirectoryStartOffset) {
1423            throw new IOException("local file header for " + ze.getName() + " starts after central directory");
1424        }
1425    }
1426
1427    private int[] setDataOffset(final ZipArchiveEntry ze) throws IOException {
1428        long offset = ze.getLocalHeaderOffset();
1429        if (isSplitZipArchive) {
1430            ((ZipSplitReadOnlySeekableByteChannel) archive)
1431                .position(ze.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1432            // the offset should be updated to the global offset
1433            offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH;
1434        } else {
1435            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1436        }
1437        wordBbuf.rewind();
1438        IOUtils.readFully(archive, wordBbuf);
1439        wordBbuf.flip();
1440        wordBbuf.get(shortBuf);
1441        final int fileNameLen = ZipShort.getValue(shortBuf);
1442        wordBbuf.get(shortBuf);
1443        final int extraFieldLen = ZipShort.getValue(shortBuf);
1444        ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1445                         + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen);
1446        if (ze.getDataOffset() + ze.getCompressedSize() > centralDirectoryStartOffset) {
1447            throw new IOException("data for " + ze.getName() + " overlaps with central directory.");
1448        }
1449        return new int[] { fileNameLen, extraFieldLen };
1450    }
1451
1452    /**
1453     * If the entry holds a Zip64 extended information extra field,
1454     * read sizes from there if the entry's sizes are set to
1455     * 0xFFFFFFFFF, do the same for the offset of the local file
1456     * header.
1457     *
1458     * <p>Ensures the Zip64 extra either knows both compressed and
1459     * uncompressed size or neither of both as the internal logic in
1460     * ExtraFieldUtils forces the field to create local header data
1461     * even if they are never used - and here a field with only one
1462     * size would be invalid.</p>
1463     */
1464    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze)
1465        throws IOException {
1466        final ZipExtraField extra =
1467            ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
1468        if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) {
1469            throw new ZipException("archive contains unparseable zip64 extra field");
1470        }
1471        final Zip64ExtendedInformationExtraField z64 =
1472            (Zip64ExtendedInformationExtraField) extra;
1473        if (z64 != null) {
1474            final boolean hasUncompressedSize = ze.getSize() == ZipConstants.ZIP64_MAGIC;
1475            final boolean hasCompressedSize = ze.getCompressedSize() == ZipConstants.ZIP64_MAGIC;
1476            final boolean hasRelativeHeaderOffset =
1477                ze.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC;
1478            final boolean hasDiskStart = ze.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT;
1479            z64.reparseCentralDirectoryData(hasUncompressedSize,
1480                                            hasCompressedSize,
1481                                            hasRelativeHeaderOffset,
1482                                            hasDiskStart);
1483
1484            if (hasUncompressedSize) {
1485                final long size = z64.getSize().getLongValue();
1486                if (size < 0) {
1487                    throw new IOException("broken archive, entry with negative size");
1488                }
1489                ze.setSize(size);
1490            } else if (hasCompressedSize) {
1491                z64.setSize(new ZipEightByteInteger(ze.getSize()));
1492            }
1493
1494            if (hasCompressedSize) {
1495                final long size = z64.getCompressedSize().getLongValue();
1496                if (size < 0) {
1497                    throw new IOException("broken archive, entry with negative compressed size");
1498                }
1499                ze.setCompressedSize(size);
1500            } else if (hasUncompressedSize) {
1501                z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
1502            }
1503
1504            if (hasRelativeHeaderOffset) {
1505                ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
1506            }
1507
1508            if (hasDiskStart) {
1509                ze.setDiskNumberStart(z64.getDiskStartNumber().getValue());
1510            }
1511        }
1512    }
1513
1514    /**
1515     * Skips the given number of bytes or throws an EOFException if
1516     * skipping failed.
1517     */
1518    private void skipBytes(final int count) throws IOException {
1519        final long currentPosition = archive.position();
1520        final long newPosition = currentPosition + count;
1521        if (newPosition > archive.size()) {
1522            throw new EOFException();
1523        }
1524        archive.position(newPosition);
1525    }
1526
1527    /**
1528     * Checks whether the archive starts with a LFH.  If it doesn't,
1529     * it may be an empty archive.
1530     */
1531    private boolean startsWithLocalFileHeader() throws IOException {
1532        archive.position(firstLocalFileHeaderOffset);
1533        wordBbuf.rewind();
1534        IOUtils.readFully(archive, wordBbuf);
1535        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1536    }
1537
1538    /**
1539     * Searches the archive backwards from minDistance to maxDistance
1540     * for the given signature, positions the RandomaccessFile right
1541     * at the signature if it has been found.
1542     */
1543    private boolean tryToLocateSignature(final long minDistanceFromEnd,
1544                                         final long maxDistanceFromEnd,
1545                                         final byte[] sig) throws IOException {
1546        boolean found = false;
1547        long off = archive.size() - minDistanceFromEnd;
1548        final long stopSearching =
1549            Math.max(0L, archive.size() - maxDistanceFromEnd);
1550        if (off >= 0) {
1551            for (; off >= stopSearching; off--) {
1552                archive.position(off);
1553                try {
1554                    wordBbuf.rewind();
1555                    IOUtils.readFully(archive, wordBbuf);
1556                    wordBbuf.flip();
1557                } catch (final EOFException ex) { // NOSONAR
1558                    break;
1559                }
1560                int curr = wordBbuf.get();
1561                if (curr == sig[POS_0]) {
1562                    curr = wordBbuf.get();
1563                    if (curr == sig[POS_1]) {
1564                        curr = wordBbuf.get();
1565                        if (curr == sig[POS_2]) {
1566                            curr = wordBbuf.get();
1567                            if (curr == sig[POS_3]) {
1568                                found = true;
1569                                break;
1570                            }
1571                        }
1572                    }
1573                }
1574            }
1575        }
1576        if (found) {
1577            archive.position(off);
1578        }
1579        return found;
1580    }
1581}