001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
037import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
038import org.apache.commons.compress.utils.ArchiveUtils;
039import org.apache.commons.compress.utils.IOUtils;
040
041import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
042import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
043import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
044import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
045
046/**
047 * Implements an input stream that can read Zip archives.
048 *
049 * <p>As of Apache Commons Compress it transparently supports Zip64
050 * extensions and thus individual entries and archives larger than 4
051 * GB or with more than 65536 entries.</p>
052 *
053 * <p>The {@link ZipFile} class is preferred when reading from files
054 * as {@link ZipArchiveInputStream} is limited by not being able to
055 * read the central directory header before returning entries.  In
056 * particular {@link ZipArchiveInputStream}</p>
057 *
058 * <ul>
059 *
060 *  <li>may return entries that are not part of the central directory
061 *  at all and shouldn't be considered part of the archive.</li>
062 *
063 *  <li>may return several entries with the same name.</li>
064 *
065 *  <li>will not return internal or external attributes.</li>
066 *
067 *  <li>may return incomplete extra field data.</li>
068 *
069 *  <li>may return unknown sizes and CRC values for entries until the
070 *  next entry has been reached if the archive uses the data
071 *  descriptor feature.</li>
072 *
073 * </ul>
074 *
075 * @see ZipFile
076 * @NotThreadSafe
077 */
078public class ZipArchiveInputStream extends ArchiveInputStream {
079
080    /** The zip encoding to use for filenames and the file comment. */
081    private final ZipEncoding zipEncoding;
082
083    // the provided encoding (for unit tests)
084    final String encoding;
085
086    /** Whether to look for and use Unicode extra fields. */
087    private final boolean useUnicodeExtraFields;
088
089    /** Wrapped stream, will always be a PushbackInputStream. */
090    private final InputStream in;
091
092    /** Inflater used for all deflated entries. */
093    private final Inflater inf = new Inflater(true);
094
095    /** Buffer used to read from the wrapped stream. */
096    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
097
098    /** The entry that is currently being read. */
099    private CurrentEntry current = null;
100
101    /** Whether the stream has been closed. */
102    private boolean closed = false;
103
104    /** Whether the stream has reached the central directory - and thus found all entries. */
105    private boolean hitCentralDirectory = false;
106
107    /**
108     * When reading a stored entry that uses the data descriptor this
109     * stream has to read the full entry and caches it.  This is the
110     * cache.
111     */
112    private ByteArrayInputStream lastStoredEntry = null;
113
114    /** Whether the stream will try to read STORED entries that use a data descriptor. */
115    private boolean allowStoredEntriesWithDataDescriptor = false;
116
117    private static final int LFH_LEN = 30;
118    /*
119      local file header signature     WORD
120      version needed to extract       SHORT
121      general purpose bit flag        SHORT
122      compression method              SHORT
123      last mod file time              SHORT
124      last mod file date              SHORT
125      crc-32                          WORD
126      compressed size                 WORD
127      uncompressed size               WORD
128      file name length                SHORT
129      extra field length              SHORT
130    */
131
132    private static final int CFH_LEN = 46;
133    /*
134        central file header signature   WORD
135        version made by                 SHORT
136        version needed to extract       SHORT
137        general purpose bit flag        SHORT
138        compression method              SHORT
139        last mod file time              SHORT
140        last mod file date              SHORT
141        crc-32                          WORD
142        compressed size                 WORD
143        uncompressed size               WORD
144        file name length                SHORT
145        extra field length              SHORT
146        file comment length             SHORT
147        disk number start               SHORT
148        internal file attributes        SHORT
149        external file attributes        WORD
150        relative offset of local header WORD
151    */
152
153    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
154
155    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
156    private final byte[] lfhBuf = new byte[LFH_LEN];
157    private final byte[] skipBuf = new byte[1024];
158    private final byte[] shortBuf = new byte[SHORT];
159    private final byte[] wordBuf = new byte[WORD];
160    private final byte[] twoDwordBuf = new byte[2 * DWORD];
161
162    private int entriesRead = 0;
163
164    /**
165     * Create an instance using UTF-8 encoding
166     * @param inputStream the stream to wrap
167     */
168    public ZipArchiveInputStream(final InputStream inputStream) {
169        this(inputStream, ZipEncodingHelper.UTF8);
170    }
171
172    /**
173     * Create an instance using the specified encoding
174     * @param inputStream the stream to wrap
175     * @param encoding the encoding to use for file names, use null
176     * for the platform's default encoding
177     * @since 1.5
178     */
179    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
180        this(inputStream, encoding, true);
181    }
182
183    /**
184     * Create an instance using the specified encoding
185     * @param inputStream the stream to wrap
186     * @param encoding the encoding to use for file names, use null
187     * for the platform's default encoding
188     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
189     * Extra Fields (if present) to set the file names.
190     */
191    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
192        this(inputStream, encoding, useUnicodeExtraFields, false);
193    }
194
195    /**
196     * Create an instance using the specified encoding
197     * @param inputStream the stream to wrap
198     * @param encoding the encoding to use for file names, use null
199     * for the platform's default encoding
200     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
201     * Extra Fields (if present) to set the file names.
202     * @param allowStoredEntriesWithDataDescriptor whether the stream
203     * will try to read STORED entries that use a data descriptor
204     * @since 1.1
205     */
206    public ZipArchiveInputStream(final InputStream inputStream,
207                                 final String encoding,
208                                 final boolean useUnicodeExtraFields,
209                                 final boolean allowStoredEntriesWithDataDescriptor) {
210        this.encoding = encoding;
211        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
212        this.useUnicodeExtraFields = useUnicodeExtraFields;
213        in = new PushbackInputStream(inputStream, buf.capacity());
214        this.allowStoredEntriesWithDataDescriptor =
215            allowStoredEntriesWithDataDescriptor;
216        // haven't read anything so far
217        buf.limit(0);
218    }
219
220    public ZipArchiveEntry getNextZipEntry() throws IOException {
221        boolean firstEntry = true;
222        if (closed || hitCentralDirectory) {
223            return null;
224        }
225        if (current != null) {
226            closeEntry();
227            firstEntry = false;
228        }
229
230        long currentHeaderOffset = getBytesRead();
231        try {
232            if (firstEntry) {
233                // split archives have a special signature before the
234                // first local file header - look for it and fail with
235                // the appropriate error message if this is a split
236                // archive.
237                readFirstLocalFileHeader(lfhBuf);
238            } else {
239                readFully(lfhBuf);
240            }
241        } catch (final EOFException e) {
242            return null;
243        }
244
245        final ZipLong sig = new ZipLong(lfhBuf);
246        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
247            hitCentralDirectory = true;
248            skipRemainderOfArchive();
249            return null;
250        }
251        if (!sig.equals(ZipLong.LFH_SIG)) {
252            throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
253        }
254
255        int off = WORD;
256        current = new CurrentEntry();
257
258        final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
259        off += SHORT;
260        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
261
262        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
263        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
264        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
265        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
266        current.entry.setGeneralPurposeBit(gpFlag);
267
268        off += SHORT;
269
270        current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
271        off += SHORT;
272
273        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
274        current.entry.setTime(time);
275        off += WORD;
276
277        ZipLong size = null, cSize = null;
278        if (!current.hasDataDescriptor) {
279            current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
280            off += WORD;
281
282            cSize = new ZipLong(lfhBuf, off);
283            off += WORD;
284
285            size = new ZipLong(lfhBuf, off);
286            off += WORD;
287        } else {
288            off += 3 * WORD;
289        }
290
291        final int fileNameLen = ZipShort.getValue(lfhBuf, off);
292
293        off += SHORT;
294
295        final int extraLen = ZipShort.getValue(lfhBuf, off);
296        off += SHORT; // NOSONAR - assignment as documentation
297
298        final byte[] fileName = new byte[fileNameLen];
299        readFully(fileName);
300        current.entry.setName(entryEncoding.decode(fileName), fileName);
301        if (hasUTF8Flag) {
302            current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
303        }
304
305        final byte[] extraData = new byte[extraLen];
306        readFully(extraData);
307        current.entry.setExtra(extraData);
308
309        if (!hasUTF8Flag && useUnicodeExtraFields) {
310            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
311        }
312
313        processZip64Extra(size, cSize);
314
315        current.entry.setLocalHeaderOffset(currentHeaderOffset);
316        current.entry.setDataOffset(getBytesRead());
317        current.entry.setStreamContiguous(true);
318
319        ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod());
320        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
321            if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) {
322                InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize());
323                switch (m) {
324                case UNSHRINKING:
325                    current.in = new UnshrinkingInputStream(bis);
326                    break;
327                case IMPLODING:
328                    current.in = new ExplodingInputStream(
329                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
330                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
331                        bis);
332                    break;
333                case BZIP2:
334                    current.in = new BZip2CompressorInputStream(bis);
335                    break;
336                case ENHANCED_DEFLATED:
337                    current.in = new Deflate64CompressorInputStream(bis);
338                    break;
339                default:
340                    // we should never get here as all supported methods have been covered
341                    // will cause an error when read is invoked, don't throw an exception here so people can
342                    // skip unsupported entries
343                    break;
344                }
345            }
346        } else if (m == ZipMethod.ENHANCED_DEFLATED) {
347            current.in = new Deflate64CompressorInputStream(in);
348        }
349
350        entriesRead++;
351        return current.entry;
352    }
353
354    /**
355     * Fills the given array with the first local file header and
356     * deals with splitting/spanning markers that may prefix the first
357     * LFH.
358     */
359    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
360        readFully(lfh);
361        final ZipLong sig = new ZipLong(lfh);
362        if (sig.equals(ZipLong.DD_SIG)) {
363            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
364        }
365
366        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
367            // The archive is not really split as only one segment was
368            // needed in the end.  Just skip over the marker.
369            final byte[] missedLfhBytes = new byte[4];
370            readFully(missedLfhBytes);
371            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
372            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
373        }
374    }
375
376    /**
377     * Records whether a Zip64 extra is present and sets the size
378     * information from it if sizes are 0xFFFFFFFF and the entry
379     * doesn't use a data descriptor.
380     */
381    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
382        final Zip64ExtendedInformationExtraField z64 =
383            (Zip64ExtendedInformationExtraField)
384            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
385        current.usesZip64 = z64 != null;
386        if (!current.hasDataDescriptor) {
387            if (z64 != null // same as current.usesZip64 but avoids NPE warning
388                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
389                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
390                current.entry.setSize(z64.getSize().getLongValue());
391            } else {
392                current.entry.setCompressedSize(cSize.getValue());
393                current.entry.setSize(size.getValue());
394            }
395        }
396    }
397
398    @Override
399    public ArchiveEntry getNextEntry() throws IOException {
400        return getNextZipEntry();
401    }
402
403    /**
404     * Whether this class is able to read the given entry.
405     *
406     * <p>May return false if it is set up to use encryption or a
407     * compression method that hasn't been implemented yet.</p>
408     * @since 1.1
409     */
410    @Override
411    public boolean canReadEntryData(final ArchiveEntry ae) {
412        if (ae instanceof ZipArchiveEntry) {
413            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
414            return ZipUtil.canHandleEntryData(ze)
415                && supportsDataDescriptorFor(ze)
416                && supportsCompressedSizeFor(ze);
417        }
418        return false;
419    }
420
421    @Override
422    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
423        if (closed) {
424            throw new IOException("The stream is closed");
425        }
426
427        if (current == null) {
428            return -1;
429        }
430
431        // avoid int overflow, check null buffer
432        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
433            throw new ArrayIndexOutOfBoundsException();
434        }
435
436        ZipUtil.checkRequestedFeatures(current.entry);
437        if (!supportsDataDescriptorFor(current.entry)) {
438            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
439                    current.entry);
440        }
441        if (!supportsCompressedSizeFor(current.entry)) {
442            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE,
443                    current.entry);
444        }
445
446        int read;
447        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
448            read = readStored(buffer, offset, length);
449        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
450            read = readDeflated(buffer, offset, length);
451        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
452                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
453                || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
454                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
455            read = current.in.read(buffer, offset, length);
456        } else {
457            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
458                    current.entry);
459        }
460
461        if (read >= 0) {
462            current.crc.update(buffer, offset, read);
463        }
464
465        return read;
466    }
467
468    /**
469     * Implementation of read for STORED entries.
470     */
471    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
472
473        if (current.hasDataDescriptor) {
474            if (lastStoredEntry == null) {
475                readStoredEntry();
476            }
477            return lastStoredEntry.read(buffer, offset, length);
478        }
479
480        final long csize = current.entry.getSize();
481        if (current.bytesRead >= csize) {
482            return -1;
483        }
484
485        if (buf.position() >= buf.limit()) {
486            buf.position(0);
487            final int l = in.read(buf.array());
488            if (l == -1) {
489                return -1;
490            }
491            buf.limit(l);
492
493            count(l);
494            current.bytesReadFromStream += l;
495        }
496
497        int toRead = Math.min(buf.remaining(), length);
498        if ((csize - current.bytesRead) < toRead) {
499            // if it is smaller than toRead then it fits into an int
500            toRead = (int) (csize - current.bytesRead);
501        }
502        buf.get(buffer, offset, toRead);
503        current.bytesRead += toRead;
504        return toRead;
505    }
506
507    /**
508     * Implementation of read for DEFLATED entries.
509     */
510    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
511        final int read = readFromInflater(buffer, offset, length);
512        if (read <= 0) {
513            if (inf.finished()) {
514                return -1;
515            } else if (inf.needsDictionary()) {
516                throw new ZipException("This archive needs a preset dictionary"
517                                       + " which is not supported by Commons"
518                                       + " Compress.");
519            } else if (read == -1) {
520                throw new IOException("Truncated ZIP file");
521            }
522        }
523        return read;
524    }
525
526    /**
527     * Potentially reads more bytes to fill the inflater's buffer and
528     * reads from it.
529     */
530    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
531        int read = 0;
532        do {
533            if (inf.needsInput()) {
534                final int l = fill();
535                if (l > 0) {
536                    current.bytesReadFromStream += buf.limit();
537                } else if (l == -1) {
538                    return -1;
539                } else {
540                    break;
541                }
542            }
543            try {
544                read = inf.inflate(buffer, offset, length);
545            } catch (final DataFormatException e) {
546                throw (IOException) new ZipException(e.getMessage()).initCause(e);
547            }
548        } while (read == 0 && inf.needsInput());
549        return read;
550    }
551
552    @Override
553    public void close() throws IOException {
554        if (!closed) {
555            closed = true;
556            try {
557                in.close();
558            } finally {
559                inf.end();
560            }
561        }
562    }
563
564    /**
565     * Skips over and discards value bytes of data from this input
566     * stream.
567     *
568     * <p>This implementation may end up skipping over some smaller
569     * number of bytes, possibly 0, if and only if it reaches the end
570     * of the underlying stream.</p>
571     *
572     * <p>The actual number of bytes skipped is returned.</p>
573     *
574     * @param value the number of bytes to be skipped.
575     * @return the actual number of bytes skipped.
576     * @throws IOException - if an I/O error occurs.
577     * @throws IllegalArgumentException - if value is negative.
578     */
579    @Override
580    public long skip(final long value) throws IOException {
581        if (value >= 0) {
582            long skipped = 0;
583            while (skipped < value) {
584                final long rem = value - skipped;
585                final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
586                if (x == -1) {
587                    return skipped;
588                }
589                skipped += x;
590            }
591            return skipped;
592        }
593        throw new IllegalArgumentException();
594    }
595
596    /**
597     * Checks if the signature matches what is expected for a zip file.
598     * Does not currently handle self-extracting zips which may have arbitrary
599     * leading content.
600     *
601     * @param signature the bytes to check
602     * @param length    the number of bytes to check
603     * @return true, if this stream is a zip archive stream, false otherwise
604     */
605    public static boolean matches(final byte[] signature, final int length) {
606        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
607            return false;
608        }
609
610        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
611            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
612            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
613            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
614    }
615
616    private static boolean checksig(final byte[] signature, final byte[] expected) {
617        for (int i = 0; i < expected.length; i++) {
618            if (signature[i] != expected[i]) {
619                return false;
620            }
621        }
622        return true;
623    }
624
625    /**
626     * Closes the current ZIP archive entry and positions the underlying
627     * stream to the beginning of the next entry. All per-entry variables
628     * and data structures are cleared.
629     * <p>
630     * If the compressed size of this entry is included in the entry header,
631     * then any outstanding bytes are simply skipped from the underlying
632     * stream without uncompressing them. This allows an entry to be safely
633     * closed even if the compression method is unsupported.
634     * <p>
635     * In case we don't know the compressed size of this entry or have
636     * already buffered too much data from the underlying stream to support
637     * uncompression, then the uncompression process is completed and the
638     * end position of the stream is adjusted based on the result of that
639     * process.
640     *
641     * @throws IOException if an error occurs
642     */
643    private void closeEntry() throws IOException {
644        if (closed) {
645            throw new IOException("The stream is closed");
646        }
647        if (current == null) {
648            return;
649        }
650
651        // Ensure all entry bytes are read
652        if (currentEntryHasOutstandingBytes()) {
653            drainCurrentEntryData();
654        } else {
655            // this is guaranteed to exhaust the stream
656            skip(Long.MAX_VALUE); //NOSONAR
657
658            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
659                       ? getBytesInflated() : current.bytesRead;
660
661            // this is at most a single read() operation and can't
662            // exceed the range of int
663            final int diff = (int) (current.bytesReadFromStream - inB);
664
665            // Pushback any required bytes
666            if (diff > 0) {
667                pushback(buf.array(), buf.limit() - diff, diff);
668                current.bytesReadFromStream -= diff;
669            }
670
671            // Drain remainder of entry if not all data bytes were required
672            if (currentEntryHasOutstandingBytes()) {
673                drainCurrentEntryData();
674            }
675        }
676
677        if (lastStoredEntry == null && current.hasDataDescriptor) {
678            readDataDescriptor();
679        }
680
681        inf.reset();
682        buf.clear().flip();
683        current = null;
684        lastStoredEntry = null;
685    }
686
687    /**
688     * If the compressed size of the current entry is included in the entry header
689     * and there are any outstanding bytes in the underlying stream, then
690     * this returns true.
691     *
692     * @return true, if current entry is determined to have outstanding bytes, false otherwise
693     */
694    private boolean currentEntryHasOutstandingBytes() {
695        return current.bytesReadFromStream <= current.entry.getCompressedSize()
696                && !current.hasDataDescriptor;
697    }
698
699    /**
700     * Read all data of the current entry from the underlying stream
701     * that hasn't been read, yet.
702     */
703    private void drainCurrentEntryData() throws IOException {
704        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
705        while (remaining > 0) {
706            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
707            if (n < 0) {
708                throw new EOFException("Truncated ZIP entry: "
709                                       + ArchiveUtils.sanitize(current.entry.getName()));
710            }
711            count(n);
712            remaining -= n;
713        }
714    }
715
716    /**
717     * Get the number of bytes Inflater has actually processed.
718     *
719     * <p>for Java &lt; Java7 the getBytes* methods in
720     * Inflater/Deflater seem to return unsigned ints rather than
721     * longs that start over with 0 at 2^32.</p>
722     *
723     * <p>The stream knows how many bytes it has read, but not how
724     * many the Inflater actually consumed - it should be between the
725     * total number of bytes read for the entry and the total number
726     * minus the last read operation.  Here we just try to make the
727     * value close enough to the bytes we've read by assuming the
728     * number of bytes consumed must be smaller than (or equal to) the
729     * number of bytes read but not smaller by more than 2^32.</p>
730     */
731    private long getBytesInflated() {
732        long inB = inf.getBytesRead();
733        if (current.bytesReadFromStream >= TWO_EXP_32) {
734            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
735                inB += TWO_EXP_32;
736            }
737        }
738        return inB;
739    }
740
741    private int fill() throws IOException {
742        if (closed) {
743            throw new IOException("The stream is closed");
744        }
745        final int length = in.read(buf.array());
746        if (length > 0) {
747            buf.limit(length);
748            count(buf.limit());
749            inf.setInput(buf.array(), 0, buf.limit());
750        }
751        return length;
752    }
753
754    private void readFully(final byte[] b) throws IOException {
755        final int count = IOUtils.readFully(in, b);
756        count(count);
757        if (count < b.length) {
758            throw new EOFException();
759        }
760    }
761
762    private void readDataDescriptor() throws IOException {
763        readFully(wordBuf);
764        ZipLong val = new ZipLong(wordBuf);
765        if (ZipLong.DD_SIG.equals(val)) {
766            // data descriptor with signature, skip sig
767            readFully(wordBuf);
768            val = new ZipLong(wordBuf);
769        }
770        current.entry.setCrc(val.getValue());
771
772        // if there is a ZIP64 extra field, sizes are eight bytes
773        // each, otherwise four bytes each.  Unfortunately some
774        // implementations - namely Java7 - use eight bytes without
775        // using a ZIP64 extra field -
776        // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
777
778        // just read 16 bytes and check whether bytes nine to twelve
779        // look like one of the signatures of what could follow a data
780        // descriptor (ignoring archive decryption headers for now).
781        // If so, push back eight bytes and assume sizes are four
782        // bytes, otherwise sizes are eight bytes each.
783        readFully(twoDwordBuf);
784        final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
785        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
786            pushback(twoDwordBuf, DWORD, DWORD);
787            current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
788            current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
789        } else {
790            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
791            current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
792        }
793    }
794
795    /**
796     * Whether this entry requires a data descriptor this library can work with.
797     *
798     * @return true if allowStoredEntriesWithDataDescriptor is true,
799     * the entry doesn't require any data descriptor or the method is
800     * DEFLATED or ENHANCED_DEFLATED.
801     */
802    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
803        return !entry.getGeneralPurposeBit().usesDataDescriptor()
804
805                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
806                || entry.getMethod() == ZipEntry.DEFLATED
807                || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
808    }
809
810    /**
811     * Whether the compressed size for the entry is either known or
812     * not required by the compression method being used.
813     */
814    private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
815        return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN
816            || entry.getMethod() == ZipEntry.DEFLATED
817            || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
818            || (entry.getGeneralPurposeBit().usesDataDescriptor()
819                && allowStoredEntriesWithDataDescriptor
820                && entry.getMethod() == ZipEntry.STORED);
821    }
822
823    /**
824     * Caches a stored entry that uses the data descriptor.
825     *
826     * <ul>
827     *   <li>Reads a stored entry until the signature of a local file
828     *     header, central directory header or data descriptor has been
829     *     found.</li>
830     *   <li>Stores all entry data in lastStoredEntry.</p>
831     *   <li>Rewinds the stream to position at the data
832     *     descriptor.</li>
833     *   <li>reads the data descriptor</li>
834     * </ul>
835     *
836     * <p>After calling this method the entry should know its size,
837     * the entry's data is cached and the stream is positioned at the
838     * next local file or central directory header.</p>
839     */
840    private void readStoredEntry() throws IOException {
841        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
842        int off = 0;
843        boolean done = false;
844
845        // length of DD without signature
846        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
847
848        while (!done) {
849            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
850            if (r <= 0) {
851                // read the whole archive without ever finding a
852                // central directory
853                throw new IOException("Truncated ZIP file");
854            }
855            if (r + off < 4) {
856                // buffer too small to check for a signature, loop
857                off += r;
858                continue;
859            }
860
861            done = bufferContainsSignature(bos, off, r, ddLen);
862            if (!done) {
863                off = cacheBytesRead(bos, off, r, ddLen);
864            }
865        }
866
867        final byte[] b = bos.toByteArray();
868        lastStoredEntry = new ByteArrayInputStream(b);
869    }
870
871    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
872    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
873    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
874
875    /**
876     * Checks whether the current buffer contains the signature of a
877     * &quot;data descriptor&quot;, &quot;local file header&quot; or
878     * &quot;central directory entry&quot;.
879     *
880     * <p>If it contains such a signature, reads the data descriptor
881     * and positions the stream right after the data descriptor.</p>
882     */
883    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
884            throws IOException {
885
886        boolean done = false;
887        int readTooMuch = 0;
888        for (int i = 0; !done && i < lastRead - 4; i++) {
889            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
890                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
891                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
892                    // found a LFH or CFH:
893                    readTooMuch = offset + lastRead - i - expectedDDLen;
894                    done = true;
895                }
896                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
897                    // found DD:
898                    readTooMuch = offset + lastRead - i;
899                    done = true;
900                }
901                if (done) {
902                    // * push back bytes read in excess as well as the data
903                    //   descriptor
904                    // * copy the remaining bytes to cache
905                    // * read data descriptor
906                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
907                    bos.write(buf.array(), 0, i);
908                    readDataDescriptor();
909                }
910            }
911        }
912        return done;
913    }
914
915    /**
916     * If the last read bytes could hold a data descriptor and an
917     * incomplete signature then save the last bytes to the front of
918     * the buffer and cache everything in front of the potential data
919     * descriptor into the given ByteArrayOutputStream.
920     *
921     * <p>Data descriptor plus incomplete signature (3 bytes in the
922     * worst case) can be 20 bytes max.</p>
923     */
924    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
925        final int cacheable = offset + lastRead - expecteDDLen - 3;
926        if (cacheable > 0) {
927            bos.write(buf.array(), 0, cacheable);
928            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
929            offset = expecteDDLen + 3;
930        } else {
931            offset += lastRead;
932        }
933        return offset;
934    }
935
936    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
937        ((PushbackInputStream) in).unread(buf, offset, length);
938        pushedBackBytes(length);
939    }
940
941    // End of Central Directory Record
942    //   end of central dir signature    WORD
943    //   number of this disk             SHORT
944    //   number of the disk with the
945    //   start of the central directory  SHORT
946    //   total number of entries in the
947    //   central directory on this disk  SHORT
948    //   total number of entries in
949    //   the central directory           SHORT
950    //   size of the central directory   WORD
951    //   offset of start of central
952    //   directory with respect to
953    //   the starting disk number        WORD
954    //   .ZIP file comment length        SHORT
955    //   .ZIP file comment               up to 64KB
956    //
957
958    /**
959     * Reads the stream until it find the "End of central directory
960     * record" and consumes it as well.
961     */
962    private void skipRemainderOfArchive() throws IOException {
963        // skip over central directory. One LFH has been read too much
964        // already.  The calculation discounts file names and extra
965        // data so it will be too short.
966        realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
967        findEocdRecord();
968        realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
969        readFully(shortBuf);
970        // file comment
971        realSkip(ZipShort.getValue(shortBuf));
972    }
973
974    /**
975     * Reads forward until the signature of the &quot;End of central
976     * directory&quot; record is found.
977     */
978    private void findEocdRecord() throws IOException {
979        int currentByte = -1;
980        boolean skipReadCall = false;
981        while (skipReadCall || (currentByte = readOneByte()) > -1) {
982            skipReadCall = false;
983            if (!isFirstByteOfEocdSig(currentByte)) {
984                continue;
985            }
986            currentByte = readOneByte();
987            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
988                if (currentByte == -1) {
989                    break;
990                }
991                skipReadCall = isFirstByteOfEocdSig(currentByte);
992                continue;
993            }
994            currentByte = readOneByte();
995            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
996                if (currentByte == -1) {
997                    break;
998                }
999                skipReadCall = isFirstByteOfEocdSig(currentByte);
1000                continue;
1001            }
1002            currentByte = readOneByte();
1003            if (currentByte == -1
1004                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
1005                break;
1006            }
1007            skipReadCall = isFirstByteOfEocdSig(currentByte);
1008        }
1009    }
1010
1011    /**
1012     * Skips bytes by reading from the underlying stream rather than
1013     * the (potentially inflating) archive stream - which {@link
1014     * #skip} would do.
1015     *
1016     * Also updates bytes-read counter.
1017     */
1018    private void realSkip(final long value) throws IOException {
1019        if (value >= 0) {
1020            long skipped = 0;
1021            while (skipped < value) {
1022                final long rem = value - skipped;
1023                final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
1024                if (x == -1) {
1025                    return;
1026                }
1027                count(x);
1028                skipped += x;
1029            }
1030            return;
1031        }
1032        throw new IllegalArgumentException();
1033    }
1034
1035    /**
1036     * Reads bytes by reading from the underlying stream rather than
1037     * the (potentially inflating) archive stream - which {@link #read} would do.
1038     *
1039     * Also updates bytes-read counter.
1040     */
1041    private int readOneByte() throws IOException {
1042        final int b = in.read();
1043        if (b != -1) {
1044            count(1);
1045        }
1046        return b;
1047    }
1048
1049    private boolean isFirstByteOfEocdSig(final int b) {
1050        return b == ZipArchiveOutputStream.EOCD_SIG[0];
1051    }
1052
1053    /**
1054     * Structure collecting information for the entry that is
1055     * currently being read.
1056     */
1057    private static final class CurrentEntry {
1058
1059        /**
1060         * Current ZIP entry.
1061         */
1062        private final ZipArchiveEntry entry = new ZipArchiveEntry();
1063
1064        /**
1065         * Does the entry use a data descriptor?
1066         */
1067        private boolean hasDataDescriptor;
1068
1069        /**
1070         * Does the entry have a ZIP64 extended information extra field.
1071         */
1072        private boolean usesZip64;
1073
1074        /**
1075         * Number of bytes of entry content read by the client if the
1076         * entry is STORED.
1077         */
1078        private long bytesRead;
1079
1080        /**
1081         * Number of bytes of entry content read so from the stream.
1082         *
1083         * <p>This may be more than the actual entry's length as some
1084         * stuff gets buffered up and needs to be pushed back when the
1085         * end of the entry has been reached.</p>
1086         */
1087        private long bytesReadFromStream;
1088
1089        /**
1090         * The checksum calculated as the current entry is read.
1091         */
1092        private final CRC32 crc = new CRC32();
1093
1094        /**
1095         * The input stream decompressing the data for shrunk and imploded entries.
1096         */
1097        private InputStream in;
1098    }
1099
1100    /**
1101     * Bounded input stream adapted from commons-io
1102     */
1103    private class BoundedInputStream extends InputStream {
1104
1105        /** the wrapped input stream */
1106        private final InputStream in;
1107
1108        /** the max length to provide */
1109        private final long max;
1110
1111        /** the number of bytes already returned */
1112        private long pos = 0;
1113
1114        /**
1115         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1116         * stream and limits it to a certain size.
1117         *
1118         * @param in The wrapped input stream
1119         * @param size The maximum number of bytes to return
1120         */
1121        public BoundedInputStream(final InputStream in, final long size) {
1122            this.max = size;
1123            this.in = in;
1124        }
1125
1126        @Override
1127        public int read() throws IOException {
1128            if (max >= 0 && pos >= max) {
1129                return -1;
1130            }
1131            final int result = in.read();
1132            pos++;
1133            count(1);
1134            current.bytesReadFromStream++;
1135            return result;
1136        }
1137
1138        @Override
1139        public int read(final byte[] b) throws IOException {
1140            return this.read(b, 0, b.length);
1141        }
1142
1143        @Override
1144        public int read(final byte[] b, final int off, final int len) throws IOException {
1145            if (max >= 0 && pos >= max) {
1146                return -1;
1147            }
1148            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1149            final int bytesRead = in.read(b, off, (int) maxRead);
1150
1151            if (bytesRead == -1) {
1152                return -1;
1153            }
1154
1155            pos += bytesRead;
1156            count(bytesRead);
1157            current.bytesReadFromStream += bytesRead;
1158            return bytesRead;
1159        }
1160
1161        @Override
1162        public long skip(final long n) throws IOException {
1163            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1164            final long skippedBytes = in.skip(toSkip);
1165            pos += skippedBytes;
1166            return skippedBytes;
1167        }
1168
1169        @Override
1170        public int available() throws IOException {
1171            if (max >= 0 && pos >= max) {
1172                return 0;
1173            }
1174            return in.available();
1175        }
1176    }
1177}