001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
037import org.apache.commons.compress.utils.ArchiveUtils;
038import org.apache.commons.compress.utils.IOUtils;
039
040import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
041import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
042import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
043import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
044
045/**
046 * Implements an input stream that can read Zip archives.
047 *
048 * <p>As of Apache Commons Compress it transparently supports Zip64
049 * extensions and thus individual entries and archives larger than 4
050 * GB or with more than 65536 entries.</p>
051 *
052 * <p>The {@link ZipFile} class is preferred when reading from files
053 * as {@link ZipArchiveInputStream} is limited by not being able to
054 * read the central directory header before returning entries.  In
055 * particular {@link ZipArchiveInputStream}</p>
056 *
057 * <ul>
058 *
059 *  <li>may return entries that are not part of the central directory
060 *  at all and shouldn't be considered part of the archive.</li>
061 *
062 *  <li>may return several entries with the same name.</li>
063 *
064 *  <li>will not return internal or external attributes.</li>
065 *
066 *  <li>may return incomplete extra field data.</li>
067 *
068 *  <li>may return unknown sizes and CRC values for entries until the
069 *  next entry has been reached if the archive uses the data
070 *  descriptor feature.</li>
071 *
072 * </ul>
073 *
074 * @see ZipFile
075 * @NotThreadSafe
076 */
077public class ZipArchiveInputStream extends ArchiveInputStream {
078
079    /** The zip encoding to use for filenames and the file comment. */
080    private final ZipEncoding zipEncoding;
081
082    // the provided encoding (for unit tests)
083    final String encoding;
084
085    /** Whether to look for and use Unicode extra fields. */
086    private final boolean useUnicodeExtraFields;
087
088    /** Wrapped stream, will always be a PushbackInputStream. */
089    private final InputStream in;
090
091    /** Inflater used for all deflated entries. */
092    private final Inflater inf = new Inflater(true);
093
094    /** Buffer used to read from the wrapped stream. */
095    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
096
097    /** The entry that is currently being read. */
098    private CurrentEntry current = null;
099
100    /** Whether the stream has been closed. */
101    private boolean closed = false;
102
103    /** Whether the stream has reached the central directory - and thus found all entries. */
104    private boolean hitCentralDirectory = false;
105
106    /**
107     * When reading a stored entry that uses the data descriptor this
108     * stream has to read the full entry and caches it.  This is the
109     * cache.
110     */
111    private ByteArrayInputStream lastStoredEntry = null;
112
113    /** Whether the stream will try to read STORED entries that use a data descriptor. */
114    private boolean allowStoredEntriesWithDataDescriptor = false;
115
116    private static final int LFH_LEN = 30;
117    /*
118      local file header signature     WORD
119      version needed to extract       SHORT
120      general purpose bit flag        SHORT
121      compression method              SHORT
122      last mod file time              SHORT
123      last mod file date              SHORT
124      crc-32                          WORD
125      compressed size                 WORD
126      uncompressed size               WORD
127      file name length                SHORT
128      extra field length              SHORT
129    */
130
131    private static final int CFH_LEN = 46;
132    /*
133        central file header signature   WORD
134        version made by                 SHORT
135        version needed to extract       SHORT
136        general purpose bit flag        SHORT
137        compression method              SHORT
138        last mod file time              SHORT
139        last mod file date              SHORT
140        crc-32                          WORD
141        compressed size                 WORD
142        uncompressed size               WORD
143        file name length                SHORT
144        extra field length              SHORT
145        file comment length             SHORT
146        disk number start               SHORT
147        internal file attributes        SHORT
148        external file attributes        WORD
149        relative offset of local header WORD
150    */
151
152    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
153
154    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
155    private final byte[] lfhBuf = new byte[LFH_LEN];
156    private final byte[] skipBuf = new byte[1024];
157    private final byte[] shortBuf = new byte[SHORT];
158    private final byte[] wordBuf = new byte[WORD];
159    private final byte[] twoDwordBuf = new byte[2 * DWORD];
160
161    private int entriesRead = 0;
162
163    /**
164     * Create an instance using UTF-8 encoding
165     * @param inputStream the stream to wrap
166     */
167    public ZipArchiveInputStream(final InputStream inputStream) {
168        this(inputStream, ZipEncodingHelper.UTF8);
169    }
170
171    /**
172     * Create an instance using the specified encoding
173     * @param inputStream the stream to wrap
174     * @param encoding the encoding to use for file names, use null
175     * for the platform's default encoding
176     * @since 1.5
177     */
178    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
179        this(inputStream, encoding, true);
180    }
181
182    /**
183     * Create an instance using the specified encoding
184     * @param inputStream the stream to wrap
185     * @param encoding the encoding to use for file names, use null
186     * for the platform's default encoding
187     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
188     * Extra Fields (if present) to set the file names.
189     */
190    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
191        this(inputStream, encoding, useUnicodeExtraFields, false);
192    }
193
194    /**
195     * Create an instance using the specified encoding
196     * @param inputStream the stream to wrap
197     * @param encoding the encoding to use for file names, use null
198     * for the platform's default encoding
199     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
200     * Extra Fields (if present) to set the file names.
201     * @param allowStoredEntriesWithDataDescriptor whether the stream
202     * will try to read STORED entries that use a data descriptor
203     * @since 1.1
204     */
205    public ZipArchiveInputStream(final InputStream inputStream,
206                                 final String encoding,
207                                 final boolean useUnicodeExtraFields,
208                                 final boolean allowStoredEntriesWithDataDescriptor) {
209        this.encoding = encoding;
210        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
211        this.useUnicodeExtraFields = useUnicodeExtraFields;
212        in = new PushbackInputStream(inputStream, buf.capacity());
213        this.allowStoredEntriesWithDataDescriptor =
214            allowStoredEntriesWithDataDescriptor;
215        // haven't read anything so far
216        buf.limit(0);
217    }
218
219    public ZipArchiveEntry getNextZipEntry() throws IOException {
220        boolean firstEntry = true;
221        if (closed || hitCentralDirectory) {
222            return null;
223        }
224        if (current != null) {
225            closeEntry();
226            firstEntry = false;
227        }
228
229        long currentHeaderOffset = getBytesRead();
230        try {
231            if (firstEntry) {
232                // split archives have a special signature before the
233                // first local file header - look for it and fail with
234                // the appropriate error message if this is a split
235                // archive.
236                readFirstLocalFileHeader(lfhBuf);
237            } else {
238                readFully(lfhBuf);
239            }
240        } catch (final EOFException e) {
241            return null;
242        }
243
244        final ZipLong sig = new ZipLong(lfhBuf);
245        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
246            hitCentralDirectory = true;
247            skipRemainderOfArchive();
248            return null;
249        }
250        if (!sig.equals(ZipLong.LFH_SIG)) {
251            throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
252        }
253
254        int off = WORD;
255        current = new CurrentEntry();
256
257        final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
258        off += SHORT;
259        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
260
261        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
262        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
263        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
264        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
265        current.entry.setGeneralPurposeBit(gpFlag);
266
267        off += SHORT;
268
269        current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
270        off += SHORT;
271
272        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
273        current.entry.setTime(time);
274        off += WORD;
275
276        ZipLong size = null, cSize = null;
277        if (!current.hasDataDescriptor) {
278            current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
279            off += WORD;
280
281            cSize = new ZipLong(lfhBuf, off);
282            off += WORD;
283
284            size = new ZipLong(lfhBuf, off);
285            off += WORD;
286        } else {
287            off += 3 * WORD;
288        }
289
290        final int fileNameLen = ZipShort.getValue(lfhBuf, off);
291
292        off += SHORT;
293
294        final int extraLen = ZipShort.getValue(lfhBuf, off);
295        off += SHORT;
296
297        final byte[] fileName = new byte[fileNameLen];
298        readFully(fileName);
299        current.entry.setName(entryEncoding.decode(fileName), fileName);
300
301        final byte[] extraData = new byte[extraLen];
302        readFully(extraData);
303        current.entry.setExtra(extraData);
304
305        if (!hasUTF8Flag && useUnicodeExtraFields) {
306            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
307        }
308
309        processZip64Extra(size, cSize);
310
311        current.entry.setLocalHeaderOffset(currentHeaderOffset);
312        current.entry.setDataOffset(getBytesRead());
313        current.entry.setStreamContiguous(true);
314
315        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
316            if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
317                current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
318            } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
319                current.in = new ExplodingInputStream(
320                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
321                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
322                        new BoundedInputStream(in, current.entry.getCompressedSize()));
323            } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
324                current.in = new BZip2CompressorInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
325            }
326        }
327
328        entriesRead++;
329        return current.entry;
330    }
331
332    /**
333     * Fills the given array with the first local file header and
334     * deals with splitting/spanning markers that may prefix the first
335     * LFH.
336     */
337    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
338        readFully(lfh);
339        final ZipLong sig = new ZipLong(lfh);
340        if (sig.equals(ZipLong.DD_SIG)) {
341            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
342        }
343
344        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
345            // The archive is not really split as only one segment was
346            // needed in the end.  Just skip over the marker.
347            final byte[] missedLfhBytes = new byte[4];
348            readFully(missedLfhBytes);
349            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
350            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
351        }
352    }
353
354    /**
355     * Records whether a Zip64 extra is present and sets the size
356     * information from it if sizes are 0xFFFFFFFF and the entry
357     * doesn't use a data descriptor.
358     */
359    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
360        final Zip64ExtendedInformationExtraField z64 =
361            (Zip64ExtendedInformationExtraField) 
362            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
363        current.usesZip64 = z64 != null;
364        if (!current.hasDataDescriptor) {
365            if (z64 != null // same as current.usesZip64 but avoids NPE warning
366                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
367                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
368                current.entry.setSize(z64.getSize().getLongValue());
369            } else {
370                current.entry.setCompressedSize(cSize.getValue());
371                current.entry.setSize(size.getValue());
372            }
373        }
374    }
375
376    @Override
377    public ArchiveEntry getNextEntry() throws IOException {
378        return getNextZipEntry();
379    }
380
381    /**
382     * Whether this class is able to read the given entry.
383     *
384     * <p>May return false if it is set up to use encryption or a
385     * compression method that hasn't been implemented yet.</p>
386     * @since 1.1
387     */
388    @Override
389    public boolean canReadEntryData(final ArchiveEntry ae) {
390        if (ae instanceof ZipArchiveEntry) {
391            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
392            return ZipUtil.canHandleEntryData(ze)
393                && supportsDataDescriptorFor(ze);
394
395        }
396        return false;
397    }
398
399    @Override
400    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
401        if (closed) {
402            throw new IOException("The stream is closed");
403        }
404
405        if (current == null) {
406            return -1;
407        }
408
409        // avoid int overflow, check null buffer
410        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
411            throw new ArrayIndexOutOfBoundsException();
412        }
413        
414        ZipUtil.checkRequestedFeatures(current.entry);
415        if (!supportsDataDescriptorFor(current.entry)) {
416            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
417                    current.entry);
418        }
419
420        int read;
421        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
422            read = readStored(buffer, offset, length);
423        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
424            read = readDeflated(buffer, offset, length);
425        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
426                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
427                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
428            read = current.in.read(buffer, offset, length);
429        } else {
430            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
431                    current.entry);
432        }
433        
434        if (read >= 0) {
435            current.crc.update(buffer, offset, read);
436        }
437        
438        return read;
439    }
440
441    /**
442     * Implementation of read for STORED entries.
443     */
444    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
445
446        if (current.hasDataDescriptor) {
447            if (lastStoredEntry == null) {
448                readStoredEntry();
449            }
450            return lastStoredEntry.read(buffer, offset, length);
451        }
452
453        final long csize = current.entry.getSize();
454        if (current.bytesRead >= csize) {
455            return -1;
456        }
457
458        if (buf.position() >= buf.limit()) {
459            buf.position(0);
460            final int l = in.read(buf.array());
461            if (l == -1) {
462                return -1;
463            }
464            buf.limit(l);
465
466            count(l);
467            current.bytesReadFromStream += l;
468        }
469
470        int toRead = Math.min(buf.remaining(), length);
471        if ((csize - current.bytesRead) < toRead) {
472            // if it is smaller than toRead then it fits into an int
473            toRead = (int) (csize - current.bytesRead);
474        }
475        buf.get(buffer, offset, toRead);
476        current.bytesRead += toRead;
477        return toRead;
478    }
479
480    /**
481     * Implementation of read for DEFLATED entries.
482     */
483    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
484        final int read = readFromInflater(buffer, offset, length);
485        if (read <= 0) {
486            if (inf.finished()) {
487                return -1;
488            } else if (inf.needsDictionary()) {
489                throw new ZipException("This archive needs a preset dictionary"
490                                       + " which is not supported by Commons"
491                                       + " Compress.");
492            } else if (read == -1) {
493                throw new IOException("Truncated ZIP file");
494            }
495        }
496        return read;
497    }
498
499    /**
500     * Potentially reads more bytes to fill the inflater's buffer and
501     * reads from it.
502     */
503    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
504        int read = 0;
505        do {
506            if (inf.needsInput()) {
507                final int l = fill();
508                if (l > 0) {
509                    current.bytesReadFromStream += buf.limit();
510                } else if (l == -1) {
511                    return -1;
512                } else {
513                    break;
514                }
515            }
516            try {
517                read = inf.inflate(buffer, offset, length);
518            } catch (final DataFormatException e) {
519                throw (IOException) new ZipException(e.getMessage()).initCause(e);
520            }
521        } while (read == 0 && inf.needsInput());
522        return read;
523    }
524
525    @Override
526    public void close() throws IOException {
527        if (!closed) {
528            closed = true;
529            try {
530                in.close();
531            } finally {
532                inf.end();
533            }
534        }
535    }
536
537    /**
538     * Skips over and discards value bytes of data from this input
539     * stream.
540     *
541     * <p>This implementation may end up skipping over some smaller
542     * number of bytes, possibly 0, if and only if it reaches the end
543     * of the underlying stream.</p>
544     *
545     * <p>The actual number of bytes skipped is returned.</p>
546     *
547     * @param value the number of bytes to be skipped.
548     * @return the actual number of bytes skipped.
549     * @throws IOException - if an I/O error occurs.
550     * @throws IllegalArgumentException - if value is negative.
551     */
552    @Override
553    public long skip(final long value) throws IOException {
554        if (value >= 0) {
555            long skipped = 0;
556            while (skipped < value) {
557                final long rem = value - skipped;
558                final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
559                if (x == -1) {
560                    return skipped;
561                }
562                skipped += x;
563            }
564            return skipped;
565        }
566        throw new IllegalArgumentException();
567    }
568
569    /**
570     * Checks if the signature matches what is expected for a zip file.
571     * Does not currently handle self-extracting zips which may have arbitrary
572     * leading content.
573     *
574     * @param signature the bytes to check
575     * @param length    the number of bytes to check
576     * @return true, if this stream is a zip archive stream, false otherwise
577     */
578    public static boolean matches(final byte[] signature, final int length) {
579        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
580            return false;
581        }
582
583        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
584            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
585            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
586            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
587    }
588
589    private static boolean checksig(final byte[] signature, final byte[] expected) {
590        for (int i = 0; i < expected.length; i++) {
591            if (signature[i] != expected[i]) {
592                return false;
593            }
594        }
595        return true;
596    }
597
598    /**
599     * Closes the current ZIP archive entry and positions the underlying
600     * stream to the beginning of the next entry. All per-entry variables
601     * and data structures are cleared.
602     * <p>
603     * If the compressed size of this entry is included in the entry header,
604     * then any outstanding bytes are simply skipped from the underlying
605     * stream without uncompressing them. This allows an entry to be safely
606     * closed even if the compression method is unsupported.
607     * <p>
608     * In case we don't know the compressed size of this entry or have
609     * already buffered too much data from the underlying stream to support
610     * uncompression, then the uncompression process is completed and the
611     * end position of the stream is adjusted based on the result of that
612     * process.
613     *
614     * @throws IOException if an error occurs
615     */
616    private void closeEntry() throws IOException {
617        if (closed) {
618            throw new IOException("The stream is closed");
619        }
620        if (current == null) {
621            return;
622        }
623
624        // Ensure all entry bytes are read
625        if (currentEntryHasOutstandingBytes()) {
626            drainCurrentEntryData();
627        } else {
628            // this is guaranteed to exhaust the stream
629            skip(Long.MAX_VALUE); //NOSONAR
630
631            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
632                       ? getBytesInflated() : current.bytesRead;
633
634            // this is at most a single read() operation and can't
635            // exceed the range of int
636            final int diff = (int) (current.bytesReadFromStream - inB);
637
638            // Pushback any required bytes
639            if (diff > 0) {
640                pushback(buf.array(), buf.limit() - diff, diff);
641                current.bytesReadFromStream -= diff;
642            }
643
644            // Drain remainder of entry if not all data bytes were required
645            if (currentEntryHasOutstandingBytes()) {
646                drainCurrentEntryData();
647            }
648        }
649
650        if (lastStoredEntry == null && current.hasDataDescriptor) {
651            readDataDescriptor();
652        }
653
654        inf.reset();
655        buf.clear().flip();
656        current = null;
657        lastStoredEntry = null;
658    }
659
660    /**
661     * If the compressed size of the current entry is included in the entry header
662     * and there are any outstanding bytes in the underlying stream, then
663     * this returns true.
664     *
665     * @return true, if current entry is determined to have outstanding bytes, false otherwise
666     */
667    private boolean currentEntryHasOutstandingBytes() {
668        return current.bytesReadFromStream <= current.entry.getCompressedSize()
669                && !current.hasDataDescriptor;
670    }
671
672    /**
673     * Read all data of the current entry from the underlying stream
674     * that hasn't been read, yet.
675     */
676    private void drainCurrentEntryData() throws IOException {
677        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
678        while (remaining > 0) {
679            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
680            if (n < 0) {
681                throw new EOFException("Truncated ZIP entry: "
682                                       + ArchiveUtils.sanitize(current.entry.getName()));
683            }
684            count(n);
685            remaining -= n;
686        }
687    }
688
689    /**
690     * Get the number of bytes Inflater has actually processed.
691     *
692     * <p>for Java &lt; Java7 the getBytes* methods in
693     * Inflater/Deflater seem to return unsigned ints rather than
694     * longs that start over with 0 at 2^32.</p>
695     *
696     * <p>The stream knows how many bytes it has read, but not how
697     * many the Inflater actually consumed - it should be between the
698     * total number of bytes read for the entry and the total number
699     * minus the last read operation.  Here we just try to make the
700     * value close enough to the bytes we've read by assuming the
701     * number of bytes consumed must be smaller than (or equal to) the
702     * number of bytes read but not smaller by more than 2^32.</p>
703     */
704    private long getBytesInflated() {
705        long inB = inf.getBytesRead();
706        if (current.bytesReadFromStream >= TWO_EXP_32) {
707            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
708                inB += TWO_EXP_32;
709            }
710        }
711        return inB;
712    }
713
714    private int fill() throws IOException {
715        if (closed) {
716            throw new IOException("The stream is closed");
717        }
718        final int length = in.read(buf.array());
719        if (length > 0) {
720            buf.limit(length);
721            count(buf.limit());
722            inf.setInput(buf.array(), 0, buf.limit());
723        }
724        return length;
725    }
726
727    private void readFully(final byte[] b) throws IOException {
728        final int count = IOUtils.readFully(in, b);
729        count(count);
730        if (count < b.length) {
731            throw new EOFException();
732        }
733    }
734
735    private void readDataDescriptor() throws IOException {
736        readFully(wordBuf);
737        ZipLong val = new ZipLong(wordBuf);
738        if (ZipLong.DD_SIG.equals(val)) {
739            // data descriptor with signature, skip sig
740            readFully(wordBuf);
741            val = new ZipLong(wordBuf);
742        }
743        current.entry.setCrc(val.getValue());
744
745        // if there is a ZIP64 extra field, sizes are eight bytes
746        // each, otherwise four bytes each.  Unfortunately some
747        // implementations - namely Java7 - use eight bytes without
748        // using a ZIP64 extra field -
749        // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
750
751        // just read 16 bytes and check whether bytes nine to twelve
752        // look like one of the signatures of what could follow a data
753        // descriptor (ignoring archive decryption headers for now).
754        // If so, push back eight bytes and assume sizes are four
755        // bytes, otherwise sizes are eight bytes each.
756        readFully(twoDwordBuf);
757        final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
758        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
759            pushback(twoDwordBuf, DWORD, DWORD);
760            current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
761            current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
762        } else {
763            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
764            current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
765        }
766    }
767
768    /**
769     * Whether this entry requires a data descriptor this library can work with.
770     *
771     * @return true if allowStoredEntriesWithDataDescriptor is true,
772     * the entry doesn't require any data descriptor or the method is
773     * DEFLATED.
774     */
775    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
776        return !entry.getGeneralPurposeBit().usesDataDescriptor()
777
778                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
779                || entry.getMethod() == ZipEntry.DEFLATED;
780    }
781
782    /**
783     * Caches a stored entry that uses the data descriptor.
784     *
785     * <ul>
786     *   <li>Reads a stored entry until the signature of a local file
787     *     header, central directory header or data descriptor has been
788     *     found.</li>
789     *   <li>Stores all entry data in lastStoredEntry.</p>
790     *   <li>Rewinds the stream to position at the data
791     *     descriptor.</li>
792     *   <li>reads the data descriptor</li>
793     * </ul>
794     *
795     * <p>After calling this method the entry should know its size,
796     * the entry's data is cached and the stream is positioned at the
797     * next local file or central directory header.</p>
798     */
799    private void readStoredEntry() throws IOException {
800        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
801        int off = 0;
802        boolean done = false;
803
804        // length of DD without signature
805        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
806
807        while (!done) {
808            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
809            if (r <= 0) {
810                // read the whole archive without ever finding a
811                // central directory
812                throw new IOException("Truncated ZIP file");
813            }
814            if (r + off < 4) {
815                // buffer too small to check for a signature, loop
816                off += r;
817                continue;
818            }
819
820            done = bufferContainsSignature(bos, off, r, ddLen);
821            if (!done) {
822                off = cacheBytesRead(bos, off, r, ddLen);
823            }
824        }
825
826        final byte[] b = bos.toByteArray();
827        lastStoredEntry = new ByteArrayInputStream(b);
828    }
829
830    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
831    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
832    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
833
834    /**
835     * Checks whether the current buffer contains the signature of a
836     * &quot;data descriptor&quot;, &quot;local file header&quot; or
837     * &quot;central directory entry&quot;.
838     *
839     * <p>If it contains such a signature, reads the data descriptor
840     * and positions the stream right after the data descriptor.</p>
841     */
842    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
843            throws IOException {
844
845        boolean done = false;
846        int readTooMuch = 0;
847        for (int i = 0; !done && i < lastRead - 4; i++) {
848            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
849                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
850                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
851                    // found a LFH or CFH:
852                    readTooMuch = offset + lastRead - i - expectedDDLen;
853                    done = true;
854                }
855                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
856                    // found DD:
857                    readTooMuch = offset + lastRead - i;
858                    done = true;
859                }
860                if (done) {
861                    // * push back bytes read in excess as well as the data
862                    //   descriptor
863                    // * copy the remaining bytes to cache
864                    // * read data descriptor
865                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
866                    bos.write(buf.array(), 0, i);
867                    readDataDescriptor();
868                }
869            }
870        }
871        return done;
872    }
873
874    /**
875     * If the last read bytes could hold a data descriptor and an
876     * incomplete signature then save the last bytes to the front of
877     * the buffer and cache everything in front of the potential data
878     * descriptor into the given ByteArrayOutputStream.
879     *
880     * <p>Data descriptor plus incomplete signature (3 bytes in the
881     * worst case) can be 20 bytes max.</p>
882     */
883    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
884        final int cacheable = offset + lastRead - expecteDDLen - 3;
885        if (cacheable > 0) {
886            bos.write(buf.array(), 0, cacheable);
887            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
888            offset = expecteDDLen + 3;
889        } else {
890            offset += lastRead;
891        }
892        return offset;
893    }
894
895    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
896        ((PushbackInputStream) in).unread(buf, offset, length);
897        pushedBackBytes(length);
898    }
899
900    // End of Central Directory Record
901    //   end of central dir signature    WORD
902    //   number of this disk             SHORT
903    //   number of the disk with the
904    //   start of the central directory  SHORT
905    //   total number of entries in the
906    //   central directory on this disk  SHORT
907    //   total number of entries in
908    //   the central directory           SHORT
909    //   size of the central directory   WORD
910    //   offset of start of central
911    //   directory with respect to
912    //   the starting disk number        WORD
913    //   .ZIP file comment length        SHORT
914    //   .ZIP file comment               up to 64KB
915    //
916
917    /**
918     * Reads the stream until it find the "End of central directory
919     * record" and consumes it as well.
920     */
921    private void skipRemainderOfArchive() throws IOException {
922        // skip over central directory. One LFH has been read too much
923        // already.  The calculation discounts file names and extra
924        // data so it will be too short.
925        realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
926        findEocdRecord();
927        realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
928        readFully(shortBuf);
929        // file comment
930        realSkip(ZipShort.getValue(shortBuf));
931    }
932
933    /**
934     * Reads forward until the signature of the &quot;End of central
935     * directory&quot; record is found.
936     */
937    private void findEocdRecord() throws IOException {
938        int currentByte = -1;
939        boolean skipReadCall = false;
940        while (skipReadCall || (currentByte = readOneByte()) > -1) {
941            skipReadCall = false;
942            if (!isFirstByteOfEocdSig(currentByte)) {
943                continue;
944            }
945            currentByte = readOneByte();
946            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
947                if (currentByte == -1) {
948                    break;
949                }
950                skipReadCall = isFirstByteOfEocdSig(currentByte);
951                continue;
952            }
953            currentByte = readOneByte();
954            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
955                if (currentByte == -1) {
956                    break;
957                }
958                skipReadCall = isFirstByteOfEocdSig(currentByte);
959                continue;
960            }
961            currentByte = readOneByte();
962            if (currentByte == -1
963                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
964                break;
965            }
966            skipReadCall = isFirstByteOfEocdSig(currentByte);
967        }
968    }
969
970    /**
971     * Skips bytes by reading from the underlying stream rather than
972     * the (potentially inflating) archive stream - which {@link
973     * #skip} would do.
974     *
975     * Also updates bytes-read counter.
976     */
977    private void realSkip(final long value) throws IOException {
978        if (value >= 0) {
979            long skipped = 0;
980            while (skipped < value) {
981                final long rem = value - skipped;
982                final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
983                if (x == -1) {
984                    return;
985                }
986                count(x);
987                skipped += x;
988            }
989            return;
990        }
991        throw new IllegalArgumentException();
992    }
993
994    /**
995     * Reads bytes by reading from the underlying stream rather than
996     * the (potentially inflating) archive stream - which {@link #read} would do.
997     *
998     * Also updates bytes-read counter.
999     */
1000    private int readOneByte() throws IOException {
1001        final int b = in.read();
1002        if (b != -1) {
1003            count(1);
1004        }
1005        return b;
1006    }
1007
1008    private boolean isFirstByteOfEocdSig(final int b) {
1009        return b == ZipArchiveOutputStream.EOCD_SIG[0];
1010    }
1011
1012    /**
1013     * Structure collecting information for the entry that is
1014     * currently being read.
1015     */
1016    private static final class CurrentEntry {
1017
1018        /**
1019         * Current ZIP entry.
1020         */
1021        private final ZipArchiveEntry entry = new ZipArchiveEntry();
1022
1023        /**
1024         * Does the entry use a data descriptor?
1025         */
1026        private boolean hasDataDescriptor;
1027
1028        /**
1029         * Does the entry have a ZIP64 extended information extra field.
1030         */
1031        private boolean usesZip64;
1032
1033        /**
1034         * Number of bytes of entry content read by the client if the
1035         * entry is STORED.
1036         */
1037        private long bytesRead;
1038
1039        /**
1040         * Number of bytes of entry content read so from the stream.
1041         *
1042         * <p>This may be more than the actual entry's length as some
1043         * stuff gets buffered up and needs to be pushed back when the
1044         * end of the entry has been reached.</p>
1045         */
1046        private long bytesReadFromStream;
1047
1048        /**
1049         * The checksum calculated as the current entry is read.
1050         */
1051        private final CRC32 crc = new CRC32();
1052
1053        /**
1054         * The input stream decompressing the data for shrunk and imploded entries.
1055         */
1056        private InputStream in;
1057    }
1058
1059    /**
1060     * Bounded input stream adapted from commons-io
1061     */
1062    private class BoundedInputStream extends InputStream {
1063
1064        /** the wrapped input stream */
1065        private final InputStream in;
1066
1067        /** the max length to provide */
1068        private final long max;
1069
1070        /** the number of bytes already returned */
1071        private long pos = 0;
1072    
1073        /**
1074         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1075         * stream and limits it to a certain size.
1076         *
1077         * @param in The wrapped input stream
1078         * @param size The maximum number of bytes to return
1079         */
1080        public BoundedInputStream(final InputStream in, final long size) {
1081            this.max = size;
1082            this.in = in;
1083        }
1084
1085        @Override
1086        public int read() throws IOException {
1087            if (max >= 0 && pos >= max) {
1088                return -1;
1089            }
1090            final int result = in.read();
1091            pos++;
1092            count(1);
1093            current.bytesReadFromStream++;
1094            return result;
1095        }
1096
1097        @Override
1098        public int read(final byte[] b) throws IOException {
1099            return this.read(b, 0, b.length);
1100        }
1101
1102        @Override
1103        public int read(final byte[] b, final int off, final int len) throws IOException {
1104            if (max >= 0 && pos >= max) {
1105                return -1;
1106            }
1107            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1108            final int bytesRead = in.read(b, off, (int) maxRead);
1109
1110            if (bytesRead == -1) {
1111                return -1;
1112            }
1113
1114            pos += bytesRead;
1115            count(bytesRead);
1116            current.bytesReadFromStream += bytesRead;
1117            return bytesRead;
1118        }
1119
1120        @Override
1121        public long skip(final long n) throws IOException {
1122            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1123            final long skippedBytes = in.skip(toSkip);
1124            pos += skippedBytes;
1125            return skippedBytes;
1126        }
1127    
1128        @Override
1129        public int available() throws IOException {
1130            if (max >= 0 && pos >= max) {
1131                return 0;
1132            }
1133            return in.available();
1134        }
1135    }
1136}