001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
037import org.apache.commons.compress.utils.IOUtils;
038
039import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
040import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
041import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
042import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
043
044/**
045 * Implements an input stream that can read Zip archives.
046 *
047 * <p>As of Apache Commons Compress it transparently supports Zip64
048 * extensions and thus individual entries and archives larger than 4
049 * GB or with more than 65536 entries.</p>
050 *
051 * <p>The {@link ZipFile} class is preferred when reading from files
052 * as {@link ZipArchiveInputStream} is limited by not being able to
053 * read the central directory header before returning entries.  In
054 * particular {@link ZipArchiveInputStream}</p>
055 *
056 * <ul>
057 *
058 *  <li>may return entries that are not part of the central directory
059 *  at all and shouldn't be considered part of the archive.</li>
060 *
061 *  <li>may return several entries with the same name.</li>
062 *
063 *  <li>will not return internal or external attributes.</li>
064 *
065 *  <li>may return incomplete extra field data.</li>
066 *
067 *  <li>may return unknown sizes and CRC values for entries until the
068 *  next entry has been reached if the archive uses the data
069 *  descriptor feature.</li>
070 *
071 * </ul>
072 *
073 * @see ZipFile
074 * @NotThreadSafe
075 */
076public class ZipArchiveInputStream extends ArchiveInputStream {
077
078    /** The zip encoding to use for filenames and the file comment. */
079    private final ZipEncoding zipEncoding;
080
081    // the provided encoding (for unit tests)
082    final String encoding;
083
084    /** Whether to look for and use Unicode extra fields. */
085    private final boolean useUnicodeExtraFields;
086
087    /** Wrapped stream, will always be a PushbackInputStream. */
088    private final InputStream in;
089
090    /** Inflater used for all deflated entries. */
091    private final Inflater inf = new Inflater(true);
092
093    /** Buffer used to read from the wrapped stream. */
094    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
095
096    /** The entry that is currently being read. */
097    private CurrentEntry current = null;
098
099    /** Whether the stream has been closed. */
100    private boolean closed = false;
101
102    /** Whether the stream has reached the central directory - and thus found all entries. */
103    private boolean hitCentralDirectory = false;
104
105    /**
106     * When reading a stored entry that uses the data descriptor this
107     * stream has to read the full entry and caches it.  This is the
108     * cache.
109     */
110    private ByteArrayInputStream lastStoredEntry = null;
111
112    /** Whether the stream will try to read STORED entries that use a data descriptor. */
113    private boolean allowStoredEntriesWithDataDescriptor = false;
114
115    private static final int LFH_LEN = 30;
116    /*
117      local file header signature     WORD
118      version needed to extract       SHORT
119      general purpose bit flag        SHORT
120      compression method              SHORT
121      last mod file time              SHORT
122      last mod file date              SHORT
123      crc-32                          WORD
124      compressed size                 WORD
125      uncompressed size               WORD
126      file name length                SHORT
127      extra field length              SHORT
128    */
129
130    private static final int CFH_LEN = 46;
131    /*
132        central file header signature   WORD
133        version made by                 SHORT
134        version needed to extract       SHORT
135        general purpose bit flag        SHORT
136        compression method              SHORT
137        last mod file time              SHORT
138        last mod file date              SHORT
139        crc-32                          WORD
140        compressed size                 WORD
141        uncompressed size               WORD
142        file name length                SHORT
143        extra field length              SHORT
144        file comment length             SHORT
145        disk number start               SHORT
146        internal file attributes        SHORT
147        external file attributes        WORD
148        relative offset of local header WORD
149    */
150
151    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
152
153    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
154    private final byte[] LFH_BUF = new byte[LFH_LEN];
155    private final byte[] SKIP_BUF = new byte[1024];
156    private final byte[] SHORT_BUF = new byte[SHORT];
157    private final byte[] WORD_BUF = new byte[WORD];
158    private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD];
159
160    private int entriesRead = 0;
161
162    /**
163     * Create an instance using UTF-8 encoding
164     * @param inputStream the stream to wrap
165     */
166    public ZipArchiveInputStream(InputStream inputStream) {
167        this(inputStream, ZipEncodingHelper.UTF8);
168    }
169
170    /**
171     * Create an instance using the specified encoding
172     * @param inputStream the stream to wrap
173     * @param encoding the encoding to use for file names, use null
174     * for the platform's default encoding
175     * @since 1.5
176     */
177    public ZipArchiveInputStream(InputStream inputStream, String encoding) {
178        this(inputStream, encoding, true);
179    }
180
181    /**
182     * Create an instance using the specified encoding
183     * @param inputStream the stream to wrap
184     * @param encoding the encoding to use for file names, use null
185     * for the platform's default encoding
186     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
187     * Extra Fields (if present) to set the file names.
188     */
189    public ZipArchiveInputStream(InputStream inputStream, String encoding, boolean useUnicodeExtraFields) {
190        this(inputStream, encoding, useUnicodeExtraFields, false);
191    }
192
193    /**
194     * Create an instance using the specified encoding
195     * @param inputStream the stream to wrap
196     * @param encoding the encoding to use for file names, use null
197     * for the platform's default encoding
198     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
199     * Extra Fields (if present) to set the file names.
200     * @param allowStoredEntriesWithDataDescriptor whether the stream
201     * will try to read STORED entries that use a data descriptor
202     * @since 1.1
203     */
204    public ZipArchiveInputStream(InputStream inputStream,
205                                 String encoding,
206                                 boolean useUnicodeExtraFields,
207                                 boolean allowStoredEntriesWithDataDescriptor) {
208        this.encoding = encoding;
209        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
210        this.useUnicodeExtraFields = useUnicodeExtraFields;
211        in = new PushbackInputStream(inputStream, buf.capacity());
212        this.allowStoredEntriesWithDataDescriptor =
213            allowStoredEntriesWithDataDescriptor;
214        // haven't read anything so far
215        buf.limit(0);
216    }
217
218    public ZipArchiveEntry getNextZipEntry() throws IOException {
219        boolean firstEntry = true;
220        if (closed || hitCentralDirectory) {
221            return null;
222        }
223        if (current != null) {
224            closeEntry();
225            firstEntry = false;
226        }
227
228        try {
229            if (firstEntry) {
230                // split archives have a special signature before the
231                // first local file header - look for it and fail with
232                // the appropriate error message if this is a split
233                // archive.
234                readFirstLocalFileHeader(LFH_BUF);
235            } else {
236                readFully(LFH_BUF);
237            }
238        } catch (EOFException e) {
239            return null;
240        }
241
242        ZipLong sig = new ZipLong(LFH_BUF);
243        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
244            hitCentralDirectory = true;
245            skipRemainderOfArchive();
246        }
247        if (!sig.equals(ZipLong.LFH_SIG)) {
248            return null;
249        }
250
251        int off = WORD;
252        current = new CurrentEntry();
253
254        int versionMadeBy = ZipShort.getValue(LFH_BUF, off);
255        off += SHORT;
256        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
257
258        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off);
259        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
260        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
261        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
262        current.entry.setGeneralPurposeBit(gpFlag);
263
264        off += SHORT;
265
266        current.entry.setMethod(ZipShort.getValue(LFH_BUF, off));
267        off += SHORT;
268
269        long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off));
270        current.entry.setTime(time);
271        off += WORD;
272
273        ZipLong size = null, cSize = null;
274        if (!current.hasDataDescriptor) {
275            current.entry.setCrc(ZipLong.getValue(LFH_BUF, off));
276            off += WORD;
277
278            cSize = new ZipLong(LFH_BUF, off);
279            off += WORD;
280
281            size = new ZipLong(LFH_BUF, off);
282            off += WORD;
283        } else {
284            off += 3 * WORD;
285        }
286
287        int fileNameLen = ZipShort.getValue(LFH_BUF, off);
288
289        off += SHORT;
290
291        int extraLen = ZipShort.getValue(LFH_BUF, off);
292        off += SHORT;
293
294        byte[] fileName = new byte[fileNameLen];
295        readFully(fileName);
296        current.entry.setName(entryEncoding.decode(fileName), fileName);
297
298        byte[] extraData = new byte[extraLen];
299        readFully(extraData);
300        current.entry.setExtra(extraData);
301
302        if (!hasUTF8Flag && useUnicodeExtraFields) {
303            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
304        }
305
306        processZip64Extra(size, cSize);
307
308        if (current.entry.getCompressedSize() != ZipArchiveEntry.SIZE_UNKNOWN) {
309            if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
310                current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
311            } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
312                current.in = new ExplodingInputStream(
313                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
314                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
315                        new BoundedInputStream(in, current.entry.getCompressedSize()));
316            } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
317                current.in = new BZip2CompressorInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
318            }
319        }
320        
321        entriesRead++;
322        return current.entry;
323    }
324
325    /**
326     * Fills the given array with the first local file header and
327     * deals with splitting/spanning markers that may prefix the first
328     * LFH.
329     */
330    private void readFirstLocalFileHeader(byte[] lfh) throws IOException {
331        readFully(lfh);
332        ZipLong sig = new ZipLong(lfh);
333        if (sig.equals(ZipLong.DD_SIG)) {
334            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
335        }
336
337        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
338            // The archive is not really split as only one segment was
339            // needed in the end.  Just skip over the marker.
340            byte[] missedLfhBytes = new byte[4];
341            readFully(missedLfhBytes);
342            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
343            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
344        }
345    }
346
347    /**
348     * Records whether a Zip64 extra is present and sets the size
349     * information from it if sizes are 0xFFFFFFFF and the entry
350     * doesn't use a data descriptor.
351     */
352    private void processZip64Extra(ZipLong size, ZipLong cSize) {
353        Zip64ExtendedInformationExtraField z64 =
354            (Zip64ExtendedInformationExtraField) 
355            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
356        current.usesZip64 = z64 != null;
357        if (!current.hasDataDescriptor) {
358            if (z64 != null // same as current.usesZip64 but avoids NPE warning
359                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
360                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
361                current.entry.setSize(z64.getSize().getLongValue());
362            } else {
363                current.entry.setCompressedSize(cSize.getValue());
364                current.entry.setSize(size.getValue());
365            }
366        }
367    }
368
369    @Override
370    public ArchiveEntry getNextEntry() throws IOException {
371        return getNextZipEntry();
372    }
373
374    /**
375     * Whether this class is able to read the given entry.
376     *
377     * <p>May return false if it is set up to use encryption or a
378     * compression method that hasn't been implemented yet.</p>
379     * @since 1.1
380     */
381    @Override
382    public boolean canReadEntryData(ArchiveEntry ae) {
383        if (ae instanceof ZipArchiveEntry) {
384            ZipArchiveEntry ze = (ZipArchiveEntry) ae;
385            return ZipUtil.canHandleEntryData(ze)
386                && supportsDataDescriptorFor(ze);
387
388        }
389        return false;
390    }
391
392    @Override
393    public int read(byte[] buffer, int offset, int length) throws IOException {
394        if (closed) {
395            throw new IOException("The stream is closed");
396        }
397
398        if (current == null) {
399            return -1;
400        }
401
402        // avoid int overflow, check null buffer
403        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
404            throw new ArrayIndexOutOfBoundsException();
405        }
406        
407        ZipUtil.checkRequestedFeatures(current.entry);
408        if (!supportsDataDescriptorFor(current.entry)) {
409            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
410                    current.entry);
411        }
412
413        int read;
414        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
415            read = readStored(buffer, offset, length);
416        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
417            read = readDeflated(buffer, offset, length);
418        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
419                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
420                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
421            read = current.in.read(buffer, offset, length);
422        } else {
423            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
424                    current.entry);
425        }
426        
427        if (read >= 0) {
428            current.crc.update(buffer, offset, read);
429        }
430        
431        return read;
432    }
433
434    /**
435     * Implementation of read for STORED entries.
436     */
437    private int readStored(byte[] buffer, int offset, int length) throws IOException {
438
439        if (current.hasDataDescriptor) {
440            if (lastStoredEntry == null) {
441                readStoredEntry();
442            }
443            return lastStoredEntry.read(buffer, offset, length);
444        }
445
446        long csize = current.entry.getSize();
447        if (current.bytesRead >= csize) {
448            return -1;
449        }
450
451        if (buf.position() >= buf.limit()) {
452            buf.position(0);
453            int l = in.read(buf.array());
454            if (l == -1) {
455                return -1;
456            }
457            buf.limit(l);
458
459            count(l);
460            current.bytesReadFromStream += l;
461        }
462
463        int toRead = Math.min(buf.remaining(), length);
464        if ((csize - current.bytesRead) < toRead) {
465            // if it is smaller than toRead then it fits into an int
466            toRead = (int) (csize - current.bytesRead);
467        }
468        buf.get(buffer, offset, toRead);
469        current.bytesRead += toRead;
470        return toRead;
471    }
472
473    /**
474     * Implementation of read for DEFLATED entries.
475     */
476    private int readDeflated(byte[] buffer, int offset, int length) throws IOException {
477        int read = readFromInflater(buffer, offset, length);
478        if (read <= 0) {
479            if (inf.finished()) {
480                return -1;
481            } else if (inf.needsDictionary()) {
482                throw new ZipException("This archive needs a preset dictionary"
483                                       + " which is not supported by Commons"
484                                       + " Compress.");
485            } else if (read == -1) {
486                throw new IOException("Truncated ZIP file");
487            }
488        }
489        return read;
490    }
491
492    /**
493     * Potentially reads more bytes to fill the inflater's buffer and
494     * reads from it.
495     */
496    private int readFromInflater(byte[] buffer, int offset, int length) throws IOException {
497        int read = 0;
498        do {
499            if (inf.needsInput()) {
500                int l = fill();
501                if (l > 0) {
502                    current.bytesReadFromStream += buf.limit();
503                } else if (l == -1) {
504                    return -1;
505                } else {
506                    break;
507                }
508            }
509            try {
510                read = inf.inflate(buffer, offset, length);
511            } catch (DataFormatException e) {
512                throw (IOException) new ZipException(e.getMessage()).initCause(e);
513            }
514        } while (read == 0 && inf.needsInput());
515        return read;
516    }
517
518    @Override
519    public void close() throws IOException {
520        if (!closed) {
521            closed = true;
522            try {
523                in.close();
524            } finally {
525                inf.end();
526            }
527        }
528    }
529
530    /**
531     * Skips over and discards value bytes of data from this input
532     * stream.
533     *
534     * <p>This implementation may end up skipping over some smaller
535     * number of bytes, possibly 0, if and only if it reaches the end
536     * of the underlying stream.</p>
537     *
538     * <p>The actual number of bytes skipped is returned.</p>
539     *
540     * @param value the number of bytes to be skipped.
541     * @return the actual number of bytes skipped.
542     * @throws IOException - if an I/O error occurs.
543     * @throws IllegalArgumentException - if value is negative.
544     */
545    @Override
546    public long skip(long value) throws IOException {
547        if (value >= 0) {
548            long skipped = 0;
549            while (skipped < value) {
550                long rem = value - skipped;
551                int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
552                if (x == -1) {
553                    return skipped;
554                }
555                skipped += x;
556            }
557            return skipped;
558        }
559        throw new IllegalArgumentException();
560    }
561
562    /**
563     * Checks if the signature matches what is expected for a zip file.
564     * Does not currently handle self-extracting zips which may have arbitrary
565     * leading content.
566     *
567     * @param signature the bytes to check
568     * @param length    the number of bytes to check
569     * @return true, if this stream is a zip archive stream, false otherwise
570     */
571    public static boolean matches(byte[] signature, int length) {
572        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
573            return false;
574        }
575
576        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
577            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
578            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
579            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
580    }
581
582    private static boolean checksig(byte[] signature, byte[] expected) {
583        for (int i = 0; i < expected.length; i++) {
584            if (signature[i] != expected[i]) {
585                return false;
586            }
587        }
588        return true;
589    }
590
591    /**
592     * Closes the current ZIP archive entry and positions the underlying
593     * stream to the beginning of the next entry. All per-entry variables
594     * and data structures are cleared.
595     * <p>
596     * If the compressed size of this entry is included in the entry header,
597     * then any outstanding bytes are simply skipped from the underlying
598     * stream without uncompressing them. This allows an entry to be safely
599     * closed even if the compression method is unsupported.
600     * <p>
601     * In case we don't know the compressed size of this entry or have
602     * already buffered too much data from the underlying stream to support
603     * uncompression, then the uncompression process is completed and the
604     * end position of the stream is adjusted based on the result of that
605     * process.
606     *
607     * @throws IOException if an error occurs
608     */
609    private void closeEntry() throws IOException {
610        if (closed) {
611            throw new IOException("The stream is closed");
612        }
613        if (current == null) {
614            return;
615        }
616
617        // Ensure all entry bytes are read
618        if (current.bytesReadFromStream <= current.entry.getCompressedSize()
619                && !current.hasDataDescriptor) {
620            drainCurrentEntryData();
621        } else {
622            skip(Long.MAX_VALUE);
623
624            long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
625                       ? getBytesInflated() : current.bytesRead;
626
627            // this is at most a single read() operation and can't
628            // exceed the range of int
629            int diff = (int) (current.bytesReadFromStream - inB);
630
631            // Pushback any required bytes
632            if (diff > 0) {
633                pushback(buf.array(), buf.limit() - diff, diff);
634            }
635        }
636
637        if (lastStoredEntry == null && current.hasDataDescriptor) {
638            readDataDescriptor();
639        }
640
641        inf.reset();
642        buf.clear().flip();
643        current = null;
644        lastStoredEntry = null;
645    }
646
647    /**
648     * Read all data of the current entry from the underlying stream
649     * that hasn't been read, yet.
650     */
651    private void drainCurrentEntryData() throws IOException {
652        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
653        while (remaining > 0) {
654            long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
655            if (n < 0) {
656                throw new EOFException("Truncated ZIP entry: " + current.entry.getName());
657            } else {
658                count(n);
659                remaining -= n;
660            }
661        }
662    }
663
664    /**
665     * Get the number of bytes Inflater has actually processed.
666     *
667     * <p>for Java &lt; Java7 the getBytes* methods in
668     * Inflater/Deflater seem to return unsigned ints rather than
669     * longs that start over with 0 at 2^32.</p>
670     *
671     * <p>The stream knows how many bytes it has read, but not how
672     * many the Inflater actually consumed - it should be between the
673     * total number of bytes read for the entry and the total number
674     * minus the last read operation.  Here we just try to make the
675     * value close enough to the bytes we've read by assuming the
676     * number of bytes consumed must be smaller than (or equal to) the
677     * number of bytes read but not smaller by more than 2^32.</p>
678     */
679    private long getBytesInflated() {
680        long inB = inf.getBytesRead();
681        if (current.bytesReadFromStream >= TWO_EXP_32) {
682            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
683                inB += TWO_EXP_32;
684            }
685        }
686        return inB;
687    }
688
689    private int fill() throws IOException {
690        if (closed) {
691            throw new IOException("The stream is closed");
692        }
693        int length = in.read(buf.array());
694        if (length > 0) {
695            buf.limit(length);
696            count(buf.limit());
697            inf.setInput(buf.array(), 0, buf.limit());
698        }
699        return length;
700    }
701
702    private void readFully(byte[] b) throws IOException {
703        int count = IOUtils.readFully(in, b);
704        count(count);
705        if (count < b.length) {
706            throw new EOFException();
707        }
708    }
709
710    private void readDataDescriptor() throws IOException {
711        readFully(WORD_BUF);
712        ZipLong val = new ZipLong(WORD_BUF);
713        if (ZipLong.DD_SIG.equals(val)) {
714            // data descriptor with signature, skip sig
715            readFully(WORD_BUF);
716            val = new ZipLong(WORD_BUF);
717        }
718        current.entry.setCrc(val.getValue());
719
720        // if there is a ZIP64 extra field, sizes are eight bytes
721        // each, otherwise four bytes each.  Unfortunately some
722        // implementations - namely Java7 - use eight bytes without
723        // using a ZIP64 extra field -
724        // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
725
726        // just read 16 bytes and check whether bytes nine to twelve
727        // look like one of the signatures of what could follow a data
728        // descriptor (ignoring archive decryption headers for now).
729        // If so, push back eight bytes and assume sizes are four
730        // bytes, otherwise sizes are eight bytes each.
731        readFully(TWO_DWORD_BUF);
732        ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD);
733        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
734            pushback(TWO_DWORD_BUF, DWORD, DWORD);
735            current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF));
736            current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD));
737        } else {
738            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF));
739            current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD));
740        }
741    }
742
743    /**
744     * Whether this entry requires a data descriptor this library can work with.
745     *
746     * @return true if allowStoredEntriesWithDataDescriptor is true,
747     * the entry doesn't require any data descriptor or the method is
748     * DEFLATED.
749     */
750    private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) {
751        return !entry.getGeneralPurposeBit().usesDataDescriptor()
752
753                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
754                || entry.getMethod() == ZipEntry.DEFLATED;
755    }
756
757    /**
758     * Caches a stored entry that uses the data descriptor.
759     *
760     * <ul>
761     *   <li>Reads a stored entry until the signature of a local file
762     *     header, central directory header or data descriptor has been
763     *     found.</li>
764     *   <li>Stores all entry data in lastStoredEntry.</p>
765     *   <li>Rewinds the stream to position at the data
766     *     descriptor.</li>
767     *   <li>reads the data descriptor</li>
768     * </ul>
769     *
770     * <p>After calling this method the entry should know its size,
771     * the entry's data is cached and the stream is positioned at the
772     * next local file or central directory header.</p>
773     */
774    private void readStoredEntry() throws IOException {
775        ByteArrayOutputStream bos = new ByteArrayOutputStream();
776        int off = 0;
777        boolean done = false;
778
779        // length of DD without signature
780        int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
781
782        while (!done) {
783            int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
784            if (r <= 0) {
785                // read the whole archive without ever finding a
786                // central directory
787                throw new IOException("Truncated ZIP file");
788            }
789            if (r + off < 4) {
790                // buffer too small to check for a signature, loop
791                off += r;
792                continue;
793            }
794
795            done = bufferContainsSignature(bos, off, r, ddLen);
796            if (!done) {
797                off = cacheBytesRead(bos, off, r, ddLen);
798            }
799        }
800
801        byte[] b = bos.toByteArray();
802        lastStoredEntry = new ByteArrayInputStream(b);
803    }
804
805    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
806    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
807    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
808
809    /**
810     * Checks whether the current buffer contains the signature of a
811     * &quot;data descriptor&quot;, &quot;local file header&quot; or
812     * &quot;central directory entry&quot;.
813     *
814     * <p>If it contains such a signature, reads the data descriptor
815     * and positions the stream right after the data descriptor.</p>
816     */
817    private boolean bufferContainsSignature(ByteArrayOutputStream bos, int offset, int lastRead, int expectedDDLen)
818            throws IOException {
819
820        boolean done = false;
821        int readTooMuch = 0;
822        for (int i = 0; !done && i < lastRead - 4; i++) {
823            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
824                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
825                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
826                    // found a LFH or CFH:
827                    readTooMuch = offset + lastRead - i - expectedDDLen;
828                    done = true;
829                }
830                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
831                    // found DD:
832                    readTooMuch = offset + lastRead - i;
833                    done = true;
834                }
835                if (done) {
836                    // * push back bytes read in excess as well as the data
837                    //   descriptor
838                    // * copy the remaining bytes to cache
839                    // * read data descriptor
840                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
841                    bos.write(buf.array(), 0, i);
842                    readDataDescriptor();
843                }
844            }
845        }
846        return done;
847    }
848
849    /**
850     * If the last read bytes could hold a data descriptor and an
851     * incomplete signature then save the last bytes to the front of
852     * the buffer and cache everything in front of the potential data
853     * descriptor into the given ByteArrayOutputStream.
854     *
855     * <p>Data descriptor plus incomplete signature (3 bytes in the
856     * worst case) can be 20 bytes max.</p>
857     */
858    private int cacheBytesRead(ByteArrayOutputStream bos, int offset, int lastRead, int expecteDDLen) {
859        final int cacheable = offset + lastRead - expecteDDLen - 3;
860        if (cacheable > 0) {
861            bos.write(buf.array(), 0, cacheable);
862            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
863            offset = expecteDDLen + 3;
864        } else {
865            offset += lastRead;
866        }
867        return offset;
868    }
869
870    private void pushback(byte[] buf, int offset, int length) throws IOException {
871        ((PushbackInputStream) in).unread(buf, offset, length);
872        pushedBackBytes(length);
873    }
874
875    // End of Central Directory Record
876    //   end of central dir signature    WORD
877    //   number of this disk             SHORT
878    //   number of the disk with the
879    //   start of the central directory  SHORT
880    //   total number of entries in the
881    //   central directory on this disk  SHORT
882    //   total number of entries in
883    //   the central directory           SHORT
884    //   size of the central directory   WORD
885    //   offset of start of central
886    //   directory with respect to
887    //   the starting disk number        WORD
888    //   .ZIP file comment length        SHORT
889    //   .ZIP file comment               up to 64KB
890    //
891
892    /**
893     * Reads the stream until it find the "End of central directory
894     * record" and consumes it as well.
895     */
896    private void skipRemainderOfArchive() throws IOException {
897        // skip over central directory. One LFH has been read too much
898        // already.  The calculation discounts file names and extra
899        // data so it will be too short.
900        realSkip(entriesRead * CFH_LEN - LFH_LEN);
901        findEocdRecord();
902        realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
903        readFully(SHORT_BUF);
904        // file comment
905        realSkip(ZipShort.getValue(SHORT_BUF));
906    }
907
908    /**
909     * Reads forward until the signature of the &quot;End of central
910     * directory&quot; record is found.
911     */
912    private void findEocdRecord() throws IOException {
913        int currentByte = -1;
914        boolean skipReadCall = false;
915        while (skipReadCall || (currentByte = readOneByte()) > -1) {
916            skipReadCall = false;
917            if (!isFirstByteOfEocdSig(currentByte)) {
918                continue;
919            }
920            currentByte = readOneByte();
921            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
922                if (currentByte == -1) {
923                    break;
924                }
925                skipReadCall = isFirstByteOfEocdSig(currentByte);
926                continue;
927            }
928            currentByte = readOneByte();
929            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
930                if (currentByte == -1) {
931                    break;
932                }
933                skipReadCall = isFirstByteOfEocdSig(currentByte);
934                continue;
935            }
936            currentByte = readOneByte();
937            if (currentByte == -1
938                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
939                break;
940            }
941            skipReadCall = isFirstByteOfEocdSig(currentByte);
942        }
943    }
944
945    /**
946     * Skips bytes by reading from the underlying stream rather than
947     * the (potentially inflating) archive stream - which {@link
948     * #skip} would do.
949     *
950     * Also updates bytes-read counter.
951     */
952    private void realSkip(long value) throws IOException {
953        if (value >= 0) {
954            long skipped = 0;
955            while (skipped < value) {
956                long rem = value - skipped;
957                int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
958                if (x == -1) {
959                    return;
960                }
961                count(x);
962                skipped += x;
963            }
964            return;
965        }
966        throw new IllegalArgumentException();
967    }
968
969    /**
970     * Reads bytes by reading from the underlying stream rather than
971     * the (potentially inflating) archive stream - which {@link #read} would do.
972     *
973     * Also updates bytes-read counter.
974     */
975    private int readOneByte() throws IOException {
976        int b = in.read();
977        if (b != -1) {
978            count(1);
979        }
980        return b;
981    }
982
983    private boolean isFirstByteOfEocdSig(int b) {
984        return b == ZipArchiveOutputStream.EOCD_SIG[0];
985    }
986
987    /**
988     * Structure collecting information for the entry that is
989     * currently being read.
990     */
991    private static final class CurrentEntry {
992
993        /**
994         * Current ZIP entry.
995         */
996        private final ZipArchiveEntry entry = new ZipArchiveEntry();
997
998        /**
999         * Does the entry use a data descriptor?
1000         */
1001        private boolean hasDataDescriptor;
1002
1003        /**
1004         * Does the entry have a ZIP64 extended information extra field.
1005         */
1006        private boolean usesZip64;
1007
1008        /**
1009         * Number of bytes of entry content read by the client if the
1010         * entry is STORED.
1011         */
1012        private long bytesRead;
1013
1014        /**
1015         * Number of bytes of entry content read so from the stream.
1016         *
1017         * <p>This may be more than the actual entry's length as some
1018         * stuff gets buffered up and needs to be pushed back when the
1019         * end of the entry has been reached.</p>
1020         */
1021        private long bytesReadFromStream;
1022
1023        /**
1024         * The checksum calculated as the current entry is read.
1025         */
1026        private final CRC32 crc = new CRC32();
1027
1028        /**
1029         * The input stream decompressing the data for shrunk and imploded entries.
1030         */
1031        private InputStream in;
1032    }
1033
1034    /**
1035     * Bounded input stream adapted from commons-io
1036     */
1037    private class BoundedInputStream extends InputStream {
1038
1039        /** the wrapped input stream */
1040        private final InputStream in;
1041
1042        /** the max length to provide */
1043        private final long max;
1044
1045        /** the number of bytes already returned */
1046        private long pos = 0;
1047    
1048        /**
1049         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1050         * stream and limits it to a certain size.
1051         *
1052         * @param in The wrapped input stream
1053         * @param size The maximum number of bytes to return
1054         */
1055        public BoundedInputStream(final InputStream in, final long size) {
1056            this.max = size;
1057            this.in = in;
1058        }
1059
1060        @Override
1061        public int read() throws IOException {
1062            if (max >= 0 && pos >= max) {
1063                return -1;
1064            }
1065            final int result = in.read();
1066            pos++;
1067            count(1);
1068            current.bytesReadFromStream++;
1069            return result;
1070        }
1071
1072        @Override
1073        public int read(final byte[] b) throws IOException {
1074            return this.read(b, 0, b.length);
1075        }
1076
1077        @Override
1078        public int read(final byte[] b, final int off, final int len) throws IOException {
1079            if (max >= 0 && pos >= max) {
1080                return -1;
1081            }
1082            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1083            final int bytesRead = in.read(b, off, (int) maxRead);
1084
1085            if (bytesRead == -1) {
1086                return -1;
1087            }
1088
1089            pos += bytesRead;
1090            count(bytesRead);
1091            current.bytesReadFromStream += bytesRead;
1092            return bytesRead;
1093        }
1094
1095        @Override
1096        public long skip(final long n) throws IOException {
1097            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1098            final long skippedBytes = in.skip(toSkip);
1099            pos += skippedBytes;
1100            return skippedBytes;
1101        }
1102    
1103        @Override
1104        public int available() throws IOException {
1105            if (max >= 0 && pos >= max) {
1106                return 0;
1107            }
1108            return in.available();
1109        }
1110    }
1111}