Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
037import org.apache.commons.compress.utils.ArchiveUtils;
038import org.apache.commons.compress.utils.IOUtils;
039
040import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
041import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
042import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
043import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
044
045/**
046 * Implements an input stream that can read Zip archives.
047 *
048 * <p>As of Apache Commons Compress it transparently supports Zip64
049 * extensions and thus individual entries and archives larger than 4
050 * GB or with more than 65536 entries.</p>
051 *
052 * <p>The {@link ZipFile} class is preferred when reading from files
053 * as {@link ZipArchiveInputStream} is limited by not being able to
054 * read the central directory header before returning entries.  In
055 * particular {@link ZipArchiveInputStream}</p>
056 *
057 * <ul>
058 *
059 *  <li>may return entries that are not part of the central directory
060 *  at all and shouldn't be considered part of the archive.</li>
061 *
062 *  <li>may return several entries with the same name.</li>
063 *
064 *  <li>will not return internal or external attributes.</li>
065 *
066 *  <li>may return incomplete extra field data.</li>
067 *
068 *  <li>may return unknown sizes and CRC values for entries until the
069 *  next entry has been reached if the archive uses the data
070 *  descriptor feature.</li>
071 *
072 * </ul>
073 *
074 * @see ZipFile
075 * @NotThreadSafe
076 */
077public class ZipArchiveInputStream extends ArchiveInputStream {
078
079    /** The zip encoding to use for filenames and the file comment. */
080    private final ZipEncoding zipEncoding;
081
082    // the provided encoding (for unit tests)
083    final String encoding;
084
085    /** Whether to look for and use Unicode extra fields. */
086    private final boolean useUnicodeExtraFields;
087
088    /** Wrapped stream, will always be a PushbackInputStream. */
089    private final InputStream in;
090
091    /** Inflater used for all deflated entries. */
092    private final Inflater inf = new Inflater(true);
093
094    /** Buffer used to read from the wrapped stream. */
095    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
096
097    /** The entry that is currently being read. */
098    private CurrentEntry current = null;
099
100    /** Whether the stream has been closed. */
101    private boolean closed = false;
102
103    /** Whether the stream has reached the central directory - and thus found all entries. */
104    private boolean hitCentralDirectory = false;
105
106    /**
107     * When reading a stored entry that uses the data descriptor this
108     * stream has to read the full entry and caches it.  This is the
109     * cache.
110     */
111    private ByteArrayInputStream lastStoredEntry = null;
112
113    /** Whether the stream will try to read STORED entries that use a data descriptor. */
114    private boolean allowStoredEntriesWithDataDescriptor = false;
115
116    private static final int LFH_LEN = 30;
117    /*
118      local file header signature     WORD
119      version needed to extract       SHORT
120      general purpose bit flag        SHORT
121      compression method              SHORT
122      last mod file time              SHORT
123      last mod file date              SHORT
124      crc-32                          WORD
125      compressed size                 WORD
126      uncompressed size               WORD
127      file name length                SHORT
128      extra field length              SHORT
129    */
130
131    private static final int CFH_LEN = 46;
132    /*
133        central file header signature   WORD
134        version made by                 SHORT
135        version needed to extract       SHORT
136        general purpose bit flag        SHORT
137        compression method              SHORT
138        last mod file time              SHORT
139        last mod file date              SHORT
140        crc-32                          WORD
141        compressed size                 WORD
142        uncompressed size               WORD
143        file name length                SHORT
144        extra field length              SHORT
145        file comment length             SHORT
146        disk number start               SHORT
147        internal file attributes        SHORT
148        external file attributes        WORD
149        relative offset of local header WORD
150    */
151
152    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
153
154    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
155    private final byte[] LFH_BUF = new byte[LFH_LEN];
156    private final byte[] SKIP_BUF = new byte[1024];
157    private final byte[] SHORT_BUF = new byte[SHORT];
158    private final byte[] WORD_BUF = new byte[WORD];
159    private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD];
160
161    private int entriesRead = 0;
162
163    /**
164     * Create an instance using UTF-8 encoding
165     * @param inputStream the stream to wrap
166     */
167    public ZipArchiveInputStream(final InputStream inputStream) {
168        this(inputStream, ZipEncodingHelper.UTF8);
169    }
170
171    /**
172     * Create an instance using the specified encoding
173     * @param inputStream the stream to wrap
174     * @param encoding the encoding to use for file names, use null
175     * for the platform's default encoding
176     * @since 1.5
177     */
178    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
179        this(inputStream, encoding, true);
180    }
181
182    /**
183     * Create an instance using the specified encoding
184     * @param inputStream the stream to wrap
185     * @param encoding the encoding to use for file names, use null
186     * for the platform's default encoding
187     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
188     * Extra Fields (if present) to set the file names.
189     */
190    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
191        this(inputStream, encoding, useUnicodeExtraFields, false);
192    }
193
194    /**
195     * Create an instance using the specified encoding
196     * @param inputStream the stream to wrap
197     * @param encoding the encoding to use for file names, use null
198     * for the platform's default encoding
199     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
200     * Extra Fields (if present) to set the file names.
201     * @param allowStoredEntriesWithDataDescriptor whether the stream
202     * will try to read STORED entries that use a data descriptor
203     * @since 1.1
204     */
205    public ZipArchiveInputStream(final InputStream inputStream,
206                                 final String encoding,
207                                 final boolean useUnicodeExtraFields,
208                                 final boolean allowStoredEntriesWithDataDescriptor) {
209        this.encoding = encoding;
210        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
211        this.useUnicodeExtraFields = useUnicodeExtraFields;
212        in = new PushbackInputStream(inputStream, buf.capacity());
213        this.allowStoredEntriesWithDataDescriptor =
214            allowStoredEntriesWithDataDescriptor;
215        // haven't read anything so far
216        buf.limit(0);
217    }
218
219    public ZipArchiveEntry getNextZipEntry() throws IOException {
220        boolean firstEntry = true;
221        if (closed || hitCentralDirectory) {
222            return null;
223        }
224        if (current != null) {
225            closeEntry();
226            firstEntry = false;
227        }
228
229        try {
230            if (firstEntry) {
231                // split archives have a special signature before the
232                // first local file header - look for it and fail with
233                // the appropriate error message if this is a split
234                // archive.
235                readFirstLocalFileHeader(LFH_BUF);
236            } else {
237                readFully(LFH_BUF);
238            }
239        } catch (final EOFException e) {
240            return null;
241        }
242
243        final ZipLong sig = new ZipLong(LFH_BUF);
244        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
245            hitCentralDirectory = true;
246            skipRemainderOfArchive();
247        }
248        if (!sig.equals(ZipLong.LFH_SIG)) {
249            return null;
250        }
251
252        int off = WORD;
253        current = new CurrentEntry();
254
255        final int versionMadeBy = ZipShort.getValue(LFH_BUF, off);
256        off += SHORT;
257        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
258
259        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off);
260        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
261        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
262        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
263        current.entry.setGeneralPurposeBit(gpFlag);
264
265        off += SHORT;
266
267        current.entry.setMethod(ZipShort.getValue(LFH_BUF, off));
268        off += SHORT;
269
270        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off));
271        current.entry.setTime(time);
272        off += WORD;
273
274        ZipLong size = null, cSize = null;
275        if (!current.hasDataDescriptor) {
276            current.entry.setCrc(ZipLong.getValue(LFH_BUF, off));
277            off += WORD;
278
279            cSize = new ZipLong(LFH_BUF, off);
280            off += WORD;
281
282            size = new ZipLong(LFH_BUF, off);
283            off += WORD;
284        } else {
285            off += 3 * WORD;
286        }
287
288        final int fileNameLen = ZipShort.getValue(LFH_BUF, off);
289
290        off += SHORT;
291
292        final int extraLen = ZipShort.getValue(LFH_BUF, off);
293        off += SHORT;
294
295        final byte[] fileName = new byte[fileNameLen];
296        readFully(fileName);
297        current.entry.setName(entryEncoding.decode(fileName), fileName);
298
299        final byte[] extraData = new byte[extraLen];
300        readFully(extraData);
301        current.entry.setExtra(extraData);
302
303        if (!hasUTF8Flag && useUnicodeExtraFields) {
304            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
305        }
306
307        processZip64Extra(size, cSize);
308
309        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
310            if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
311                current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
312            } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
313                current.in = new ExplodingInputStream(
314                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
315                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
316                        new BoundedInputStream(in, current.entry.getCompressedSize()));
317            } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
318                current.in = new BZip2CompressorInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
319            }
320        }
321        
322        entriesRead++;
323        return current.entry;
324    }
325
326    /**
327     * Fills the given array with the first local file header and
328     * deals with splitting/spanning markers that may prefix the first
329     * LFH.
330     */
331    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
332        readFully(lfh);
333        final ZipLong sig = new ZipLong(lfh);
334        if (sig.equals(ZipLong.DD_SIG)) {
335            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
336        }
337
338        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
339            // The archive is not really split as only one segment was
340            // needed in the end.  Just skip over the marker.
341            final byte[] missedLfhBytes = new byte[4];
342            readFully(missedLfhBytes);
343            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
344            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
345        }
346    }
347
348    /**
349     * Records whether a Zip64 extra is present and sets the size
350     * information from it if sizes are 0xFFFFFFFF and the entry
351     * doesn't use a data descriptor.
352     */
353    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
354        final Zip64ExtendedInformationExtraField z64 =
355            (Zip64ExtendedInformationExtraField) 
356            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
357        current.usesZip64 = z64 != null;
358        if (!current.hasDataDescriptor) {
359            if (z64 != null // same as current.usesZip64 but avoids NPE warning
360                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
361                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
362                current.entry.setSize(z64.getSize().getLongValue());
363            } else {
364                current.entry.setCompressedSize(cSize.getValue());
365                current.entry.setSize(size.getValue());
366            }
367        }
368    }
369
370    @Override
371    public ArchiveEntry getNextEntry() throws IOException {
372        return getNextZipEntry();
373    }
374
375    /**
376     * Whether this class is able to read the given entry.
377     *
378     * <p>May return false if it is set up to use encryption or a
379     * compression method that hasn't been implemented yet.</p>
380     * @since 1.1
381     */
382    @Override
383    public boolean canReadEntryData(final ArchiveEntry ae) {
384        if (ae instanceof ZipArchiveEntry) {
385            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
386            return ZipUtil.canHandleEntryData(ze)
387                && supportsDataDescriptorFor(ze);
388
389        }
390        return false;
391    }
392
393    @Override
394    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
395        if (closed) {
396            throw new IOException("The stream is closed");
397        }
398
399        if (current == null) {
400            return -1;
401        }
402
403        // avoid int overflow, check null buffer
404        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
405            throw new ArrayIndexOutOfBoundsException();
406        }
407        
408        ZipUtil.checkRequestedFeatures(current.entry);
409        if (!supportsDataDescriptorFor(current.entry)) {
410            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
411                    current.entry);
412        }
413
414        int read;
415        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
416            read = readStored(buffer, offset, length);
417        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
418            read = readDeflated(buffer, offset, length);
419        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
420                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
421                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
422            read = current.in.read(buffer, offset, length);
423        } else {
424            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
425                    current.entry);
426        }
427        
428        if (read >= 0) {
429            current.crc.update(buffer, offset, read);
430        }
431        
432        return read;
433    }
434
435    /**
436     * Implementation of read for STORED entries.
437     */
438    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
439
440        if (current.hasDataDescriptor) {
441            if (lastStoredEntry == null) {
442                readStoredEntry();
443            }
444            return lastStoredEntry.read(buffer, offset, length);
445        }
446
447        final long csize = current.entry.getSize();
448        if (current.bytesRead >= csize) {
449            return -1;
450        }
451
452        if (buf.position() >= buf.limit()) {
453            buf.position(0);
454            final int l = in.read(buf.array());
455            if (l == -1) {
456                return -1;
457            }
458            buf.limit(l);
459
460            count(l);
461            current.bytesReadFromStream += l;
462        }
463
464        int toRead = Math.min(buf.remaining(), length);
465        if ((csize - current.bytesRead) < toRead) {
466            // if it is smaller than toRead then it fits into an int
467            toRead = (int) (csize - current.bytesRead);
468        }
469        buf.get(buffer, offset, toRead);
470        current.bytesRead += toRead;
471        return toRead;
472    }
473
474    /**
475     * Implementation of read for DEFLATED entries.
476     */
477    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
478        final int read = readFromInflater(buffer, offset, length);
479        if (read <= 0) {
480            if (inf.finished()) {
481                return -1;
482            } else if (inf.needsDictionary()) {
483                throw new ZipException("This archive needs a preset dictionary"
484                                       + " which is not supported by Commons"
485                                       + " Compress.");
486            } else if (read == -1) {
487                throw new IOException("Truncated ZIP file");
488            }
489        }
490        return read;
491    }
492
493    /**
494     * Potentially reads more bytes to fill the inflater's buffer and
495     * reads from it.
496     */
497    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
498        int read = 0;
499        do {
500            if (inf.needsInput()) {
501                final int l = fill();
502                if (l > 0) {
503                    current.bytesReadFromStream += buf.limit();
504                } else if (l == -1) {
505                    return -1;
506                } else {
507                    break;
508                }
509            }
510            try {
511                read = inf.inflate(buffer, offset, length);
512            } catch (final DataFormatException e) {
513                throw (IOException) new ZipException(e.getMessage()).initCause(e);
514            }
515        } while (read == 0 && inf.needsInput());
516        return read;
517    }
518
519    @Override
520    public void close() throws IOException {
521        if (!closed) {
522            closed = true;
523            try {
524                in.close();
525            } finally {
526                inf.end();
527            }
528        }
529    }
530
531    /**
532     * Skips over and discards value bytes of data from this input
533     * stream.
534     *
535     * <p>This implementation may end up skipping over some smaller
536     * number of bytes, possibly 0, if and only if it reaches the end
537     * of the underlying stream.</p>
538     *
539     * <p>The actual number of bytes skipped is returned.</p>
540     *
541     * @param value the number of bytes to be skipped.
542     * @return the actual number of bytes skipped.
543     * @throws IOException - if an I/O error occurs.
544     * @throws IllegalArgumentException - if value is negative.
545     */
546    @Override
547    public long skip(final long value) throws IOException {
548        if (value >= 0) {
549            long skipped = 0;
550            while (skipped < value) {
551                final long rem = value - skipped;
552                final int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
553                if (x == -1) {
554                    return skipped;
555                }
556                skipped += x;
557            }
558            return skipped;
559        }
560        throw new IllegalArgumentException();
561    }
562
563    /**
564     * Checks if the signature matches what is expected for a zip file.
565     * Does not currently handle self-extracting zips which may have arbitrary
566     * leading content.
567     *
568     * @param signature the bytes to check
569     * @param length    the number of bytes to check
570     * @return true, if this stream is a zip archive stream, false otherwise
571     */
572    public static boolean matches(final byte[] signature, final int length) {
573        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
574            return false;
575        }
576
577        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
578            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
579            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
580            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
581    }
582
583    private static boolean checksig(final byte[] signature, final byte[] expected) {
584        for (int i = 0; i < expected.length; i++) {
585            if (signature[i] != expected[i]) {
586                return false;
587            }
588        }
589        return true;
590    }
591
592    /**
593     * Closes the current ZIP archive entry and positions the underlying
594     * stream to the beginning of the next entry. All per-entry variables
595     * and data structures are cleared.
596     * <p>
597     * If the compressed size of this entry is included in the entry header,
598     * then any outstanding bytes are simply skipped from the underlying
599     * stream without uncompressing them. This allows an entry to be safely
600     * closed even if the compression method is unsupported.
601     * <p>
602     * In case we don't know the compressed size of this entry or have
603     * already buffered too much data from the underlying stream to support
604     * uncompression, then the uncompression process is completed and the
605     * end position of the stream is adjusted based on the result of that
606     * process.
607     *
608     * @throws IOException if an error occurs
609     */
610    private void closeEntry() throws IOException {
611        if (closed) {
612            throw new IOException("The stream is closed");
613        }
614        if (current == null) {
615            return;
616        }
617
618        // Ensure all entry bytes are read
619        if (current.bytesReadFromStream <= current.entry.getCompressedSize()
620                && !current.hasDataDescriptor) {
621            drainCurrentEntryData();
622        } else {
623            skip(Long.MAX_VALUE);
624
625            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
626                       ? getBytesInflated() : current.bytesRead;
627
628            // this is at most a single read() operation and can't
629            // exceed the range of int
630            final int diff = (int) (current.bytesReadFromStream - inB);
631
632            // Pushback any required bytes
633            if (diff > 0) {
634                pushback(buf.array(), buf.limit() - diff, diff);
635            }
636        }
637
638        if (lastStoredEntry == null && current.hasDataDescriptor) {
639            readDataDescriptor();
640        }
641
642        inf.reset();
643        buf.clear().flip();
644        current = null;
645        lastStoredEntry = null;
646    }
647
648    /**
649     * Read all data of the current entry from the underlying stream
650     * that hasn't been read, yet.
651     */
652    private void drainCurrentEntryData() throws IOException {
653        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
654        while (remaining > 0) {
655            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
656            if (n < 0) {
657                throw new EOFException("Truncated ZIP entry: "
658                                       + ArchiveUtils.sanitize(current.entry.getName()));
659            }
660            count(n);
661            remaining -= n;
662        }
663    }
664
665    /**
666     * Get the number of bytes Inflater has actually processed.
667     *
668     * <p>for Java &lt; Java7 the getBytes* methods in
669     * Inflater/Deflater seem to return unsigned ints rather than
670     * longs that start over with 0 at 2^32.</p>
671     *
672     * <p>The stream knows how many bytes it has read, but not how
673     * many the Inflater actually consumed - it should be between the
674     * total number of bytes read for the entry and the total number
675     * minus the last read operation.  Here we just try to make the
676     * value close enough to the bytes we've read by assuming the
677     * number of bytes consumed must be smaller than (or equal to) the
678     * number of bytes read but not smaller by more than 2^32.</p>
679     */
680    private long getBytesInflated() {
681        long inB = inf.getBytesRead();
682        if (current.bytesReadFromStream >= TWO_EXP_32) {
683            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
684                inB += TWO_EXP_32;
685            }
686        }
687        return inB;
688    }
689
690    private int fill() throws IOException {
691        if (closed) {
692            throw new IOException("The stream is closed");
693        }
694        final int length = in.read(buf.array());
695        if (length > 0) {
696            buf.limit(length);
697            count(buf.limit());
698            inf.setInput(buf.array(), 0, buf.limit());
699        }
700        return length;
701    }
702
703    private void readFully(final byte[] b) throws IOException {
704        final int count = IOUtils.readFully(in, b);
705        count(count);
706        if (count < b.length) {
707            throw new EOFException();
708        }
709    }
710
711    private void readDataDescriptor() throws IOException {
712        readFully(WORD_BUF);
713        ZipLong val = new ZipLong(WORD_BUF);
714        if (ZipLong.DD_SIG.equals(val)) {
715            // data descriptor with signature, skip sig
716            readFully(WORD_BUF);
717            val = new ZipLong(WORD_BUF);
718        }
719        current.entry.setCrc(val.getValue());
720
721        // if there is a ZIP64 extra field, sizes are eight bytes
722        // each, otherwise four bytes each.  Unfortunately some
723        // implementations - namely Java7 - use eight bytes without
724        // using a ZIP64 extra field -
725        // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
726
727        // just read 16 bytes and check whether bytes nine to twelve
728        // look like one of the signatures of what could follow a data
729        // descriptor (ignoring archive decryption headers for now).
730        // If so, push back eight bytes and assume sizes are four
731        // bytes, otherwise sizes are eight bytes each.
732        readFully(TWO_DWORD_BUF);
733        final ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD);
734        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
735            pushback(TWO_DWORD_BUF, DWORD, DWORD);
736            current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF));
737            current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD));
738        } else {
739            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF));
740            current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD));
741        }
742    }
743
744    /**
745     * Whether this entry requires a data descriptor this library can work with.
746     *
747     * @return true if allowStoredEntriesWithDataDescriptor is true,
748     * the entry doesn't require any data descriptor or the method is
749     * DEFLATED.
750     */
751    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
752        return !entry.getGeneralPurposeBit().usesDataDescriptor()
753
754                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
755                || entry.getMethod() == ZipEntry.DEFLATED;
756    }
757
758    /**
759     * Caches a stored entry that uses the data descriptor.
760     *
761     * <ul>
762     *   <li>Reads a stored entry until the signature of a local file
763     *     header, central directory header or data descriptor has been
764     *     found.</li>
765     *   <li>Stores all entry data in lastStoredEntry.</p>
766     *   <li>Rewinds the stream to position at the data
767     *     descriptor.</li>
768     *   <li>reads the data descriptor</li>
769     * </ul>
770     *
771     * <p>After calling this method the entry should know its size,
772     * the entry's data is cached and the stream is positioned at the
773     * next local file or central directory header.</p>
774     */
775    private void readStoredEntry() throws IOException {
776        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
777        int off = 0;
778        boolean done = false;
779
780        // length of DD without signature
781        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
782
783        while (!done) {
784            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
785            if (r <= 0) {
786                // read the whole archive without ever finding a
787                // central directory
788                throw new IOException("Truncated ZIP file");
789            }
790            if (r + off < 4) {
791                // buffer too small to check for a signature, loop
792                off += r;
793                continue;
794            }
795
796            done = bufferContainsSignature(bos, off, r, ddLen);
797            if (!done) {
798                off = cacheBytesRead(bos, off, r, ddLen);
799            }
800        }
801
802        final byte[] b = bos.toByteArray();
803        lastStoredEntry = new ByteArrayInputStream(b);
804    }
805
806    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
807    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
808    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
809
810    /**
811     * Checks whether the current buffer contains the signature of a
812     * &quot;data descriptor&quot;, &quot;local file header&quot; or
813     * &quot;central directory entry&quot;.
814     *
815     * <p>If it contains such a signature, reads the data descriptor
816     * and positions the stream right after the data descriptor.</p>
817     */
818    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
819            throws IOException {
820
821        boolean done = false;
822        int readTooMuch = 0;
823        for (int i = 0; !done && i < lastRead - 4; i++) {
824            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
825                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
826                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
827                    // found a LFH or CFH:
828                    readTooMuch = offset + lastRead - i - expectedDDLen;
829                    done = true;
830                }
831                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
832                    // found DD:
833                    readTooMuch = offset + lastRead - i;
834                    done = true;
835                }
836                if (done) {
837                    // * push back bytes read in excess as well as the data
838                    //   descriptor
839                    // * copy the remaining bytes to cache
840                    // * read data descriptor
841                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
842                    bos.write(buf.array(), 0, i);
843                    readDataDescriptor();
844                }
845            }
846        }
847        return done;
848    }
849
850    /**
851     * If the last read bytes could hold a data descriptor and an
852     * incomplete signature then save the last bytes to the front of
853     * the buffer and cache everything in front of the potential data
854     * descriptor into the given ByteArrayOutputStream.
855     *
856     * <p>Data descriptor plus incomplete signature (3 bytes in the
857     * worst case) can be 20 bytes max.</p>
858     */
859    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
860        final int cacheable = offset + lastRead - expecteDDLen - 3;
861        if (cacheable > 0) {
862            bos.write(buf.array(), 0, cacheable);
863            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
864            offset = expecteDDLen + 3;
865        } else {
866            offset += lastRead;
867        }
868        return offset;
869    }
870
871    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
872        ((PushbackInputStream) in).unread(buf, offset, length);
873        pushedBackBytes(length);
874    }
875
876    // End of Central Directory Record
877    //   end of central dir signature    WORD
878    //   number of this disk             SHORT
879    //   number of the disk with the
880    //   start of the central directory  SHORT
881    //   total number of entries in the
882    //   central directory on this disk  SHORT
883    //   total number of entries in
884    //   the central directory           SHORT
885    //   size of the central directory   WORD
886    //   offset of start of central
887    //   directory with respect to
888    //   the starting disk number        WORD
889    //   .ZIP file comment length        SHORT
890    //   .ZIP file comment               up to 64KB
891    //
892
893    /**
894     * Reads the stream until it find the "End of central directory
895     * record" and consumes it as well.
896     */
897    private void skipRemainderOfArchive() throws IOException {
898        // skip over central directory. One LFH has been read too much
899        // already.  The calculation discounts file names and extra
900        // data so it will be too short.
901        realSkip(entriesRead * CFH_LEN - LFH_LEN);
902        findEocdRecord();
903        realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
904        readFully(SHORT_BUF);
905        // file comment
906        realSkip(ZipShort.getValue(SHORT_BUF));
907    }
908
909    /**
910     * Reads forward until the signature of the &quot;End of central
911     * directory&quot; record is found.
912     */
913    private void findEocdRecord() throws IOException {
914        int currentByte = -1;
915        boolean skipReadCall = false;
916        while (skipReadCall || (currentByte = readOneByte()) > -1) {
917            skipReadCall = false;
918            if (!isFirstByteOfEocdSig(currentByte)) {
919                continue;
920            }
921            currentByte = readOneByte();
922            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
923                if (currentByte == -1) {
924                    break;
925                }
926                skipReadCall = isFirstByteOfEocdSig(currentByte);
927                continue;
928            }
929            currentByte = readOneByte();
930            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
931                if (currentByte == -1) {
932                    break;
933                }
934                skipReadCall = isFirstByteOfEocdSig(currentByte);
935                continue;
936            }
937            currentByte = readOneByte();
938            if (currentByte == -1
939                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
940                break;
941            }
942            skipReadCall = isFirstByteOfEocdSig(currentByte);
943        }
944    }
945
946    /**
947     * Skips bytes by reading from the underlying stream rather than
948     * the (potentially inflating) archive stream - which {@link
949     * #skip} would do.
950     *
951     * Also updates bytes-read counter.
952     */
953    private void realSkip(final long value) throws IOException {
954        if (value >= 0) {
955            long skipped = 0;
956            while (skipped < value) {
957                final long rem = value - skipped;
958                final int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
959                if (x == -1) {
960                    return;
961                }
962                count(x);
963                skipped += x;
964            }
965            return;
966        }
967        throw new IllegalArgumentException();
968    }
969
970    /**
971     * Reads bytes by reading from the underlying stream rather than
972     * the (potentially inflating) archive stream - which {@link #read} would do.
973     *
974     * Also updates bytes-read counter.
975     */
976    private int readOneByte() throws IOException {
977        final int b = in.read();
978        if (b != -1) {
979            count(1);
980        }
981        return b;
982    }
983
984    private boolean isFirstByteOfEocdSig(final int b) {
985        return b == ZipArchiveOutputStream.EOCD_SIG[0];
986    }
987
988    /**
989     * Structure collecting information for the entry that is
990     * currently being read.
991     */
992    private static final class CurrentEntry {
993
994        /**
995         * Current ZIP entry.
996         */
997        private final ZipArchiveEntry entry = new ZipArchiveEntry();
998
999        /**
1000         * Does the entry use a data descriptor?
1001         */
1002        private boolean hasDataDescriptor;
1003
1004        /**
1005         * Does the entry have a ZIP64 extended information extra field.
1006         */
1007        private boolean usesZip64;
1008
1009        /**
1010         * Number of bytes of entry content read by the client if the
1011         * entry is STORED.
1012         */
1013        private long bytesRead;
1014
1015        /**
1016         * Number of bytes of entry content read so from the stream.
1017         *
1018         * <p>This may be more than the actual entry's length as some
1019         * stuff gets buffered up and needs to be pushed back when the
1020         * end of the entry has been reached.</p>
1021         */
1022        private long bytesReadFromStream;
1023
1024        /**
1025         * The checksum calculated as the current entry is read.
1026         */
1027        private final CRC32 crc = new CRC32();
1028
1029        /**
1030         * The input stream decompressing the data for shrunk and imploded entries.
1031         */
1032        private InputStream in;
1033    }
1034
1035    /**
1036     * Bounded input stream adapted from commons-io
1037     */
1038    private class BoundedInputStream extends InputStream {
1039
1040        /** the wrapped input stream */
1041        private final InputStream in;
1042
1043        /** the max length to provide */
1044        private final long max;
1045
1046        /** the number of bytes already returned */
1047        private long pos = 0;
1048    
1049        /**
1050         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1051         * stream and limits it to a certain size.
1052         *
1053         * @param in The wrapped input stream
1054         * @param size The maximum number of bytes to return
1055         */
1056        public BoundedInputStream(final InputStream in, final long size) {
1057            this.max = size;
1058            this.in = in;
1059        }
1060
1061        @Override
1062        public int read() throws IOException {
1063            if (max >= 0 && pos >= max) {
1064                return -1;
1065            }
1066            final int result = in.read();
1067            pos++;
1068            count(1);
1069            current.bytesReadFromStream++;
1070            return result;
1071        }
1072
1073        @Override
1074        public int read(final byte[] b) throws IOException {
1075            return this.read(b, 0, b.length);
1076        }
1077
1078        @Override
1079        public int read(final byte[] b, final int off, final int len) throws IOException {
1080            if (max >= 0 && pos >= max) {
1081                return -1;
1082            }
1083            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1084            final int bytesRead = in.read(b, off, (int) maxRead);
1085
1086            if (bytesRead == -1) {
1087                return -1;
1088            }
1089
1090            pos += bytesRead;
1091            count(bytesRead);
1092            current.bytesReadFromStream += bytesRead;
1093            return bytesRead;
1094        }
1095
1096        @Override
1097        public long skip(final long n) throws IOException {
1098            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1099            final long skippedBytes = in.skip(toSkip);
1100            pos += skippedBytes;
1101            return skippedBytes;
1102        }
1103    
1104        @Override
1105        public int available() throws IOException {
1106            if (max >= 0 && pos >= max) {
1107                return 0;
1108            }
1109            return in.available();
1110        }
1111    }
1112}