Source code

001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.zip;
019
020import java.io.BufferedInputStream;
021import java.io.Closeable;
022import java.io.EOFException;
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.RandomAccessFile;
027import java.util.Arrays;
028import java.util.Collections;
029import java.util.Comparator;
030import java.util.Enumeration;
031import java.util.HashMap;
032import java.util.LinkedList;
033import java.util.List;
034import java.util.Map;
035import java.util.zip.Inflater;
036import java.util.zip.InflaterInputStream;
037import java.util.zip.ZipException;
038
039import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
040import org.apache.commons.compress.utils.IOUtils;
041
042import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
043import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
044import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
045import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
046import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
047
048/**
049 * Replacement for <code>java.util.ZipFile</code>.
050 *
051 * <p>This class adds support for file name encodings other than UTF-8
052 * (which is required to work on ZIP files created by native zip tools
053 * and is able to skip a preamble like the one found in self
054 * extracting archives.  Furthermore it returns instances of
055 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
056 * instead of <code>java.util.zip.ZipEntry</code>.</p>
057 *
058 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
059 * have to reimplement all methods anyway.  Like
060 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
061 * covers and supports compressed and uncompressed entries.  As of
062 * Apache Commons Compress 1.3 it also transparently supports Zip64
063 * extensions and thus individual entries and archives larger than 4
064 * GB or with more than 65536 entries.</p>
065 *
066 * <p>The method signatures mimic the ones of
067 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
068 *
069 * <ul>
070 *   <li>There is no getName method.</li>
071 *   <li>entries has been renamed to getEntries.</li>
072 *   <li>getEntries and getEntry return
073 *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
074 *   instances.</li>
075 *   <li>close is allowed to throw IOException.</li>
076 * </ul>
077 *
078 */
079public class ZipFile implements Closeable {
080    private static final int HASH_SIZE = 509;
081    static final int NIBLET_MASK = 0x0f;
082    static final int BYTE_SHIFT = 8;
083    private static final int POS_0 = 0;
084    private static final int POS_1 = 1;
085    private static final int POS_2 = 2;
086    private static final int POS_3 = 3;
087
088    /**
089     * List of entries in the order they appear inside the central
090     * directory.
091     */
092    private final List<ZipArchiveEntry> entries =
093        new LinkedList<ZipArchiveEntry>();
094
095    /**
096     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
097     */
098    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
099        new HashMap<String, LinkedList<ZipArchiveEntry>>(HASH_SIZE);
100
101    private static final class OffsetEntry {
102        private long headerOffset = -1;
103        private long dataOffset = -1;
104    }
105
106    /**
107     * The encoding to use for filenames and the file comment.
108     *
109     * <p>For a list of possible values see <a
110     * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
111     * Defaults to UTF-8.</p>
112     */
113    private final String encoding;
114
115    /**
116     * The zip encoding to use for filenames and the file comment.
117     */
118    private final ZipEncoding zipEncoding;
119
120    /**
121     * File name of actual source.
122     */
123    private final String archiveName;
124
125    /**
126     * The actual data source.
127     */
128    private final RandomAccessFile archive;
129
130    /**
131     * Whether to look for and use Unicode extra fields.
132     */
133    private final boolean useUnicodeExtraFields;
134
135    /**
136     * Whether the file is closed.
137     */
138    private volatile boolean closed = true;
139
140    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
141    private final byte[] DWORD_BUF = new byte[DWORD];
142    private final byte[] WORD_BUF = new byte[WORD];
143    private final byte[] CFH_BUF = new byte[CFH_LEN];
144    private final byte[] SHORT_BUF = new byte[SHORT];
145
146    /**
147     * Opens the given file for reading, assuming "UTF8" for file names.
148     *
149     * @param f the archive.
150     *
151     * @throws IOException if an error occurs while reading the file.
152     */
153    public ZipFile(File f) throws IOException {
154        this(f, ZipEncodingHelper.UTF8);
155    }
156
157    /**
158     * Opens the given file for reading, assuming "UTF8".
159     *
160     * @param name name of the archive.
161     *
162     * @throws IOException if an error occurs while reading the file.
163     */
164    public ZipFile(String name) throws IOException {
165        this(new File(name), ZipEncodingHelper.UTF8);
166    }
167
168    /**
169     * Opens the given file for reading, assuming the specified
170     * encoding for file names, scanning unicode extra fields.
171     *
172     * @param name name of the archive.
173     * @param encoding the encoding to use for file names, use null
174     * for the platform's default encoding
175     *
176     * @throws IOException if an error occurs while reading the file.
177     */
178    public ZipFile(String name, String encoding) throws IOException {
179        this(new File(name), encoding, true);
180    }
181
182    /**
183     * Opens the given file for reading, assuming the specified
184     * encoding for file names and scanning for unicode extra fields.
185     *
186     * @param f the archive.
187     * @param encoding the encoding to use for file names, use null
188     * for the platform's default encoding
189     *
190     * @throws IOException if an error occurs while reading the file.
191     */
192    public ZipFile(File f, String encoding) throws IOException {
193        this(f, encoding, true);
194    }
195
196    /**
197     * Opens the given file for reading, assuming the specified
198     * encoding for file names.
199     *
200     * @param f the archive.
201     * @param encoding the encoding to use for file names, use null
202     * for the platform's default encoding
203     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
204     * Extra Fields (if present) to set the file names.
205     *
206     * @throws IOException if an error occurs while reading the file.
207     */
208    public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
209        throws IOException {
210        this.archiveName = f.getAbsolutePath();
211        this.encoding = encoding;
212        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
213        this.useUnicodeExtraFields = useUnicodeExtraFields;
214        archive = new RandomAccessFile(f, "r");
215        boolean success = false;
216        try {
217            Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
218                populateFromCentralDirectory();
219            resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
220            success = true;
221        } finally {
222            closed = !success;
223            if (!success) {
224                IOUtils.closeQuietly(archive);
225            }
226        }
227    }
228
229    /**
230     * The encoding to use for filenames and the file comment.
231     *
232     * @return null if using the platform's default character encoding.
233     */
234    public String getEncoding() {
235        return encoding;
236    }
237
238    /**
239     * Closes the archive.
240     * @throws IOException if an error occurs closing the archive.
241     */
242    public void close() throws IOException {
243        // this flag is only written here and read in finalize() which
244        // can never be run in parallel.
245        // no synchronization needed.
246        closed = true;
247
248        archive.close();
249    }
250
251    /**
252     * close a zipfile quietly; throw no io fault, do nothing
253     * on a null parameter
254     * @param zipfile file to close, can be null
255     */
256    public static void closeQuietly(ZipFile zipfile) {
257        IOUtils.closeQuietly(zipfile);
258    }
259
260    /**
261     * Returns all entries.
262     *
263     * <p>Entries will be returned in the same order they appear
264     * within the archive's central directory.</p>
265     *
266     * @return all entries as {@link ZipArchiveEntry} instances
267     */
268    public Enumeration<ZipArchiveEntry> getEntries() {
269        return Collections.enumeration(entries);
270    }
271
272    /**
273     * Returns all entries in physical order.
274     *
275     * <p>Entries will be returned in the same order their contents
276     * appear within the archive.</p>
277     *
278     * @return all entries as {@link ZipArchiveEntry} instances
279     *
280     * @since 1.1
281     */
282    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
283        ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]);
284        Arrays.sort(allEntries, OFFSET_COMPARATOR);
285        return Collections.enumeration(Arrays.asList(allEntries));
286    }
287
288    /**
289     * Returns a named entry - or {@code null} if no entry by
290     * that name exists.
291     *
292     * <p>If multiple entries with the same name exist the first entry
293     * in the archive's central directory by that name is
294     * returned.</p>
295     *
296     * @param name name of the entry.
297     * @return the ZipArchiveEntry corresponding to the given name - or
298     * {@code null} if not present.
299     */
300    public ZipArchiveEntry getEntry(String name) {
301        LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
302        return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
303    }
304
305    /**
306     * Returns all named entries in the same order they appear within
307     * the archive's central directory.
308     *
309     * @param name name of the entry.
310     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
311     * given name
312     * @since 1.6
313     */
314    public Iterable<ZipArchiveEntry> getEntries(String name) {
315        List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
316        return entriesOfThatName != null ? entriesOfThatName
317            : Collections.<ZipArchiveEntry>emptyList();
318    }
319
320    /**
321     * Returns all named entries in the same order their contents
322     * appear within the archive.
323     *
324     * @param name name of the entry.
325     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
326     * given name
327     * @since 1.6
328     */
329    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(String name) {
330        ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
331        if (nameMap.containsKey(name)) {
332            entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
333            Arrays.sort(entriesOfThatName, OFFSET_COMPARATOR);
334        }
335        return Arrays.asList(entriesOfThatName);
336    }
337
338    /**
339     * Whether this class is able to read the given entry.
340     *
341     * <p>May return false if it is set up to use encryption or a
342     * compression method that hasn't been implemented yet.</p>
343     * @since 1.1
344     * @param ze the entry
345     * @return whether this class is able to read the given entry.
346     */
347    public boolean canReadEntryData(ZipArchiveEntry ze) {
348        return ZipUtil.canHandleEntryData(ze);
349    }
350
351    /**
352     * Expose the raw stream of the archive entry (compressed form).
353     *
354     * <p>This method does not relate to how/if we understand the payload in the
355     * stream, since we really only intend to move it on to somewhere else.</p>
356     *
357     * @param ze The entry to get the stream for
358     * @return The raw input stream containing (possibly) compressed data.
359     * @since 1.11
360     */
361    public InputStream getRawInputStream(ZipArchiveEntry ze) {
362        if (!(ze instanceof Entry)) {
363            return null;
364        }
365        OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry();
366        long start = offsetEntry.dataOffset;
367        return new BoundedInputStream(start, ze.getCompressedSize());
368    }
369
370
371    /**
372     * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
373     * Compression and all other attributes will be as in this file.
374     * <p>This method transfers entries based on the central directory of the zip file.</p>
375     *
376     * @param target The zipArchiveOutputStream to write the entries to
377     * @param predicate A predicate that selects which entries to write
378     * @throws IOException on error
379     */
380    public void copyRawEntries(ZipArchiveOutputStream target, ZipArchiveEntryPredicate predicate)
381            throws IOException {
382        Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
383        while (src.hasMoreElements()) {
384            ZipArchiveEntry entry = src.nextElement();
385            if (predicate.test( entry)) {
386                target.addRawArchiveEntry(entry, getRawInputStream(entry));
387            }
388        }
389    }
390
391    /**
392     * Returns an InputStream for reading the contents of the given entry.
393     *
394     * @param ze the entry to get the stream for.
395     * @return a stream to read the entry from.
396     * @throws IOException if unable to create an input stream from the zipentry
397     * @throws ZipException if the zipentry uses an unsupported feature
398     */
399    public InputStream getInputStream(ZipArchiveEntry ze)
400        throws IOException, ZipException {
401        if (!(ze instanceof Entry)) {
402            return null;
403        }
404        // cast valididty is checked just above
405        OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry();
406        ZipUtil.checkRequestedFeatures(ze);
407        long start = offsetEntry.dataOffset;
408        BoundedInputStream bis =
409            new BoundedInputStream(start, ze.getCompressedSize());
410        switch (ZipMethod.getMethodByCode(ze.getMethod())) {
411            case STORED:
412                return bis;
413            case UNSHRINKING:
414                return new UnshrinkingInputStream(bis);
415            case IMPLODING:
416                return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
417                        ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis));
418            case DEFLATED:
419                bis.addDummy();
420                final Inflater inflater = new Inflater(true);
421                return new InflaterInputStream(bis, inflater) {
422                    @Override
423                    public void close() throws IOException {
424                        try {
425                            super.close();
426                        } finally {
427                            inflater.end();
428                        }
429                    }
430                };
431            case BZIP2:
432                return new BZip2CompressorInputStream(bis);
433            default:
434                throw new ZipException("Found unsupported compression method "
435                                       + ze.getMethod());
436        }
437    }
438
439    /**
440     * <p>
441     * Convenience method to return the entry's content as a String if isUnixSymlink()
442     * returns true for it, otherwise returns null.
443     * </p>
444     *
445     * <p>This method assumes the symbolic link's file name uses the
446     * same encoding that as been specified for this ZipFile.</p>
447     *
448     * @param entry ZipArchiveEntry object that represents the symbolic link
449     * @return entry's content as a String
450     * @throws IOException problem with content's input stream
451     * @since 1.5
452     */
453    public String getUnixSymlink(ZipArchiveEntry entry) throws IOException {
454        if (entry != null && entry.isUnixSymlink()) {
455            InputStream in = null;
456            try {
457                in = getInputStream(entry);
458                byte[] symlinkBytes = IOUtils.toByteArray(in);
459                return zipEncoding.decode(symlinkBytes);
460            } finally {
461                if (in != null) {
462                    in.close();
463                }
464            }
465        } else {
466            return null;
467        }
468    }
469
470    /**
471     * Ensures that the close method of this zipfile is called when
472     * there are no more references to it.
473     * @see #close()
474     */
475    @Override
476    protected void finalize() throws Throwable {
477        try {
478            if (!closed) {
479                System.err.println("Cleaning up unclosed ZipFile for archive "
480                                   + archiveName);
481                close();
482            }
483        } finally {
484            super.finalize();
485        }
486    }
487
488    /**
489     * Length of a "central directory" entry structure without file
490     * name, extra fields or comment.
491     */
492    private static final int CFH_LEN =
493        /* version made by                 */ SHORT
494        /* version needed to extract       */ + SHORT
495        /* general purpose bit flag        */ + SHORT
496        /* compression method              */ + SHORT
497        /* last mod file time              */ + SHORT
498        /* last mod file date              */ + SHORT
499        /* crc-32                          */ + WORD
500        /* compressed size                 */ + WORD
501        /* uncompressed size               */ + WORD
502        /* filename length                 */ + SHORT
503        /* extra field length              */ + SHORT
504        /* file comment length             */ + SHORT
505        /* disk number start               */ + SHORT
506        /* internal file attributes        */ + SHORT
507        /* external file attributes        */ + WORD
508        /* relative offset of local header */ + WORD;
509
510    private static final long CFH_SIG =
511        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
512
513    /**
514     * Reads the central directory of the given archive and populates
515     * the internal tables with ZipArchiveEntry instances.
516     *
517     * <p>The ZipArchiveEntrys will know all data that can be obtained from
518     * the central directory alone, but not the data that requires the
519     * local file header or additional data to be read.</p>
520     *
521     * @return a map of zipentries that didn't have the language
522     * encoding flag set when read.
523     */
524    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
525        throws IOException {
526        HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
527            new HashMap<ZipArchiveEntry, NameAndComment>();
528
529        positionAtCentralDirectory();
530
531        archive.readFully(WORD_BUF);
532        long sig = ZipLong.getValue(WORD_BUF);
533
534        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
535            throw new IOException("central directory is empty, can't expand"
536                                  + " corrupt archive.");
537        }
538
539        while (sig == CFH_SIG) {
540            readCentralDirectoryEntry(noUTF8Flag);
541            archive.readFully(WORD_BUF);
542            sig = ZipLong.getValue(WORD_BUF);
543        }
544        return noUTF8Flag;
545    }
546
547    /**
548     * Reads an individual entry of the central directory, creats an
549     * ZipArchiveEntry from it and adds it to the global maps.
550     *
551     * @param noUTF8Flag map used to collect entries that don't have
552     * their UTF-8 flag set and whose name will be set by data read
553     * from the local file header later.  The current entry may be
554     * added to this map.
555     */
556    private void
557        readCentralDirectoryEntry(Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
558        throws IOException {
559        archive.readFully(CFH_BUF);
560        int off = 0;
561        OffsetEntry offset = new OffsetEntry();
562        Entry ze = new Entry(offset);
563
564        int versionMadeBy = ZipShort.getValue(CFH_BUF, off);
565        off += SHORT;
566        ze.setVersionMadeBy(versionMadeBy);
567        ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
568
569        ze.setVersionRequired(ZipShort.getValue(CFH_BUF, off));
570        off += SHORT; // version required
571
572        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(CFH_BUF, off);
573        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
574        final ZipEncoding entryEncoding =
575            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
576        ze.setGeneralPurposeBit(gpFlag);
577        ze.setRawFlag(ZipShort.getValue(CFH_BUF, off));
578
579        off += SHORT;
580
581        //noinspection MagicConstant
582        ze.setMethod(ZipShort.getValue(CFH_BUF, off));
583        off += SHORT;
584
585        long time = ZipUtil.dosToJavaTime(ZipLong.getValue(CFH_BUF, off));
586        ze.setTime(time);
587        off += WORD;
588
589        ze.setCrc(ZipLong.getValue(CFH_BUF, off));
590        off += WORD;
591
592        ze.setCompressedSize(ZipLong.getValue(CFH_BUF, off));
593        off += WORD;
594
595        ze.setSize(ZipLong.getValue(CFH_BUF, off));
596        off += WORD;
597
598        int fileNameLen = ZipShort.getValue(CFH_BUF, off);
599        off += SHORT;
600
601        int extraLen = ZipShort.getValue(CFH_BUF, off);
602        off += SHORT;
603
604        int commentLen = ZipShort.getValue(CFH_BUF, off);
605        off += SHORT;
606
607        int diskStart = ZipShort.getValue(CFH_BUF, off);
608        off += SHORT;
609
610        ze.setInternalAttributes(ZipShort.getValue(CFH_BUF, off));
611        off += SHORT;
612
613        ze.setExternalAttributes(ZipLong.getValue(CFH_BUF, off));
614        off += WORD;
615
616        byte[] fileName = new byte[fileNameLen];
617        archive.readFully(fileName);
618        ze.setName(entryEncoding.decode(fileName), fileName);
619
620        // LFH offset,
621        offset.headerOffset = ZipLong.getValue(CFH_BUF, off);
622        // data offset will be filled later
623        entries.add(ze);
624
625        byte[] cdExtraData = new byte[extraLen];
626        archive.readFully(cdExtraData);
627        ze.setCentralDirectoryExtra(cdExtraData);
628
629        setSizesAndOffsetFromZip64Extra(ze, offset, diskStart);
630
631        byte[] comment = new byte[commentLen];
632        archive.readFully(comment);
633        ze.setComment(entryEncoding.decode(comment));
634
635        if (!hasUTF8Flag && useUnicodeExtraFields) {
636            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
637        }
638    }
639
640    /**
641     * If the entry holds a Zip64 extended information extra field,
642     * read sizes from there if the entry's sizes are set to
643     * 0xFFFFFFFFF, do the same for the offset of the local file
644     * header.
645     *
646     * <p>Ensures the Zip64 extra either knows both compressed and
647     * uncompressed size or neither of both as the internal logic in
648     * ExtraFieldUtils forces the field to create local header data
649     * even if they are never used - and here a field with only one
650     * size would be invalid.</p>
651     */
652    private void setSizesAndOffsetFromZip64Extra(ZipArchiveEntry ze,
653                                                 OffsetEntry offset,
654                                                 int diskStart)
655        throws IOException {
656        Zip64ExtendedInformationExtraField z64 =
657            (Zip64ExtendedInformationExtraField)
658            ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
659        if (z64 != null) {
660            boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
661            boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
662            boolean hasRelativeHeaderOffset =
663                offset.headerOffset == ZIP64_MAGIC;
664            z64.reparseCentralDirectoryData(hasUncompressedSize,
665                                            hasCompressedSize,
666                                            hasRelativeHeaderOffset,
667                                            diskStart == ZIP64_MAGIC_SHORT);
668
669            if (hasUncompressedSize) {
670                ze.setSize(z64.getSize().getLongValue());
671            } else if (hasCompressedSize) {
672                z64.setSize(new ZipEightByteInteger(ze.getSize()));
673            }
674
675            if (hasCompressedSize) {
676                ze.setCompressedSize(z64.getCompressedSize().getLongValue());
677            } else if (hasUncompressedSize) {
678                z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
679            }
680
681            if (hasRelativeHeaderOffset) {
682                offset.headerOffset =
683                    z64.getRelativeHeaderOffset().getLongValue();
684            }
685        }
686    }
687
688    /**
689     * Length of the "End of central directory record" - which is
690     * supposed to be the last structure of the archive - without file
691     * comment.
692     */
693    static final int MIN_EOCD_SIZE =
694        /* end of central dir signature    */ WORD
695        /* number of this disk             */ + SHORT
696        /* number of the disk with the     */
697        /* start of the central directory  */ + SHORT
698        /* total number of entries in      */
699        /* the central dir on this disk    */ + SHORT
700        /* total number of entries in      */
701        /* the central dir                 */ + SHORT
702        /* size of the central directory   */ + WORD
703        /* offset of start of central      */
704        /* directory with respect to       */
705        /* the starting disk number        */ + WORD
706        /* zipfile comment length          */ + SHORT;
707
708    /**
709     * Maximum length of the "End of central directory record" with a
710     * file comment.
711     */
712    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
713        /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
714
715    /**
716     * Offset of the field that holds the location of the first
717     * central directory entry inside the "End of central directory
718     * record" relative to the start of the "End of central directory
719     * record".
720     */
721    private static final int CFD_LOCATOR_OFFSET =
722        /* end of central dir signature    */ WORD
723        /* number of this disk             */ + SHORT
724        /* number of the disk with the     */
725        /* start of the central directory  */ + SHORT
726        /* total number of entries in      */
727        /* the central dir on this disk    */ + SHORT
728        /* total number of entries in      */
729        /* the central dir                 */ + SHORT
730        /* size of the central directory   */ + WORD;
731
732    /**
733     * Length of the "Zip64 end of central directory locator" - which
734     * should be right in front of the "end of central directory
735     * record" if one is present at all.
736     */
737    private static final int ZIP64_EOCDL_LENGTH =
738        /* zip64 end of central dir locator sig */ WORD
739        /* number of the disk with the start    */
740        /* start of the zip64 end of            */
741        /* central directory                    */ + WORD
742        /* relative offset of the zip64         */
743        /* end of central directory record      */ + DWORD
744        /* total number of disks                */ + WORD;
745
746    /**
747     * Offset of the field that holds the location of the "Zip64 end
748     * of central directory record" inside the "Zip64 end of central
749     * directory locator" relative to the start of the "Zip64 end of
750     * central directory locator".
751     */
752    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
753        /* zip64 end of central dir locator sig */ WORD
754        /* number of the disk with the start    */
755        /* start of the zip64 end of            */
756        /* central directory                    */ + WORD;
757
758    /**
759     * Offset of the field that holds the location of the first
760     * central directory entry inside the "Zip64 end of central
761     * directory record" relative to the start of the "Zip64 end of
762     * central directory record".
763     */
764    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
765        /* zip64 end of central dir        */
766        /* signature                       */ WORD
767        /* size of zip64 end of central    */
768        /* directory record                */ + DWORD
769        /* version made by                 */ + SHORT
770        /* version needed to extract       */ + SHORT
771        /* number of this disk             */ + WORD
772        /* number of the disk with the     */
773        /* start of the central directory  */ + WORD
774        /* total number of entries in the  */
775        /* central directory on this disk  */ + DWORD
776        /* total number of entries in the  */
777        /* central directory               */ + DWORD
778        /* size of the central directory   */ + DWORD;
779
780    /**
781     * Searches for either the &quot;Zip64 end of central directory
782     * locator&quot; or the &quot;End of central dir record&quot;, parses
783     * it and positions the stream at the first central directory
784     * record.
785     */
786    private void positionAtCentralDirectory()
787        throws IOException {
788        positionAtEndOfCentralDirectoryRecord();
789        boolean found = false;
790        boolean searchedForZip64EOCD =
791            archive.getFilePointer() > ZIP64_EOCDL_LENGTH;
792        if (searchedForZip64EOCD) {
793            archive.seek(archive.getFilePointer() - ZIP64_EOCDL_LENGTH);
794            archive.readFully(WORD_BUF);
795            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
796                                  WORD_BUF);
797        }
798        if (!found) {
799            // not a ZIP64 archive
800            if (searchedForZip64EOCD) {
801                skipBytes(ZIP64_EOCDL_LENGTH - WORD);
802            }
803            positionAtCentralDirectory32();
804        } else {
805            positionAtCentralDirectory64();
806        }
807    }
808
809    /**
810     * Parses the &quot;Zip64 end of central directory locator&quot;,
811     * finds the &quot;Zip64 end of central directory record&quot; using the
812     * parsed information, parses that and positions the stream at the
813     * first central directory record.
814     *
815     * Expects stream to be positioned right behind the &quot;Zip64
816     * end of central directory locator&quot;'s signature.
817     */
818    private void positionAtCentralDirectory64()
819        throws IOException {
820        skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
821                  - WORD /* signature has already been read */);
822        archive.readFully(DWORD_BUF);
823        archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF));
824        archive.readFully(WORD_BUF);
825        if (!Arrays.equals(WORD_BUF, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
826            throw new ZipException("archive's ZIP64 end of central "
827                                   + "directory locator is corrupt.");
828        }
829        skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
830                  - WORD /* signature has already been read */);
831        archive.readFully(DWORD_BUF);
832        archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF));
833    }
834
835    /**
836     * Parses the &quot;End of central dir record&quot; and positions
837     * the stream at the first central directory record.
838     *
839     * Expects stream to be positioned at the beginning of the
840     * &quot;End of central dir record&quot;.
841     */
842    private void positionAtCentralDirectory32()
843        throws IOException {
844        skipBytes(CFD_LOCATOR_OFFSET);
845        archive.readFully(WORD_BUF);
846        archive.seek(ZipLong.getValue(WORD_BUF));
847    }
848
849    /**
850     * Searches for the and positions the stream at the start of the
851     * &quot;End of central dir record&quot;.
852     */
853    private void positionAtEndOfCentralDirectoryRecord()
854        throws IOException {
855        boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
856                                             ZipArchiveOutputStream.EOCD_SIG);
857        if (!found) {
858            throw new ZipException("archive is not a ZIP archive");
859        }
860    }
861
862    /**
863     * Searches the archive backwards from minDistance to maxDistance
864     * for the given signature, positions the RandomaccessFile right
865     * at the signature if it has been found.
866     */
867    private boolean tryToLocateSignature(long minDistanceFromEnd,
868                                         long maxDistanceFromEnd,
869                                         byte[] sig) throws IOException {
870        boolean found = false;
871        long off = archive.length() - minDistanceFromEnd;
872        final long stopSearching =
873            Math.max(0L, archive.length() - maxDistanceFromEnd);
874        if (off >= 0) {
875            for (; off >= stopSearching; off--) {
876                archive.seek(off);
877                int curr = archive.read();
878                if (curr == -1) {
879                    break;
880                }
881                if (curr == sig[POS_0]) {
882                    curr = archive.read();
883                    if (curr == sig[POS_1]) {
884                        curr = archive.read();
885                        if (curr == sig[POS_2]) {
886                            curr = archive.read();
887                            if (curr == sig[POS_3]) {
888                                found = true;
889                                break;
890                            }
891                        }
892                    }
893                }
894            }
895        }
896        if (found) {
897            archive.seek(off);
898        }
899        return found;
900    }
901
902    /**
903     * Skips the given number of bytes or throws an EOFException if
904     * skipping failed.
905     */ 
906    private void skipBytes(final int count) throws IOException {
907        int totalSkipped = 0;
908        while (totalSkipped < count) {
909            int skippedNow = archive.skipBytes(count - totalSkipped);
910            if (skippedNow <= 0) {
911                throw new EOFException();
912            }
913            totalSkipped += skippedNow;
914        }
915    }
916
917    /**
918     * Number of bytes in local file header up to the &quot;length of
919     * filename&quot; entry.
920     */
921    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
922        /* local file header signature     */ WORD
923        /* version needed to extract       */ + SHORT
924        /* general purpose bit flag        */ + SHORT
925        /* compression method              */ + SHORT
926        /* last mod file time              */ + SHORT
927        /* last mod file date              */ + SHORT
928        /* crc-32                          */ + WORD
929        /* compressed size                 */ + WORD
930        /* uncompressed size               */ + WORD;
931
932    /**
933     * Walks through all recorded entries and adds the data available
934     * from the local file header.
935     *
936     * <p>Also records the offsets for the data to read from the
937     * entries.</p>
938     */
939    private void resolveLocalFileHeaderData(Map<ZipArchiveEntry, NameAndComment>
940                                            entriesWithoutUTF8Flag)
941        throws IOException {
942        for (ZipArchiveEntry zipArchiveEntry : entries) {
943            // entries is filled in populateFromCentralDirectory and
944            // never modified
945            Entry ze = (Entry) zipArchiveEntry;
946            OffsetEntry offsetEntry = ze.getOffsetEntry();
947            long offset = offsetEntry.headerOffset;
948            archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
949            archive.readFully(SHORT_BUF);
950            int fileNameLen = ZipShort.getValue(SHORT_BUF);
951            archive.readFully(SHORT_BUF);
952            int extraFieldLen = ZipShort.getValue(SHORT_BUF);
953            int lenToSkip = fileNameLen;
954            while (lenToSkip > 0) {
955                int skipped = archive.skipBytes(lenToSkip);
956                if (skipped <= 0) {
957                    throw new IOException("failed to skip file name in"
958                                          + " local file header");
959                }
960                lenToSkip -= skipped;
961            }
962            byte[] localExtraData = new byte[extraFieldLen];
963            archive.readFully(localExtraData);
964            ze.setExtra(localExtraData);
965            offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
966                + SHORT + SHORT + fileNameLen + extraFieldLen;
967
968            if (entriesWithoutUTF8Flag.containsKey(ze)) {
969                NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
970                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
971                                                         nc.comment);
972            }
973
974            String name = ze.getName();
975            LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
976            if (entriesOfThatName == null) {
977                entriesOfThatName = new LinkedList<ZipArchiveEntry>();
978                nameMap.put(name, entriesOfThatName);
979            }
980            entriesOfThatName.addLast(ze);
981        }
982    }
983
984    /**
985     * Checks whether the archive starts with a LFH.  If it doesn't,
986     * it may be an empty archive.
987     */
988    private boolean startsWithLocalFileHeader() throws IOException {
989        archive.seek(0);
990        archive.readFully(WORD_BUF);
991        return Arrays.equals(WORD_BUF, ZipArchiveOutputStream.LFH_SIG);
992    }
993
994    /**
995     * InputStream that delegates requests to the underlying
996     * RandomAccessFile, making sure that only bytes from a certain
997     * range can be read.
998     */
999    private class BoundedInputStream extends InputStream {
1000        private long remaining;
1001        private long loc;
1002        private boolean addDummyByte = false;
1003
1004        BoundedInputStream(long start, long remaining) {
1005            this.remaining = remaining;
1006            loc = start;
1007        }
1008
1009        @Override
1010        public int read() throws IOException {
1011            if (remaining-- <= 0) {
1012                if (addDummyByte) {
1013                    addDummyByte = false;
1014                    return 0;
1015                }
1016                return -1;
1017            }
1018            synchronized (archive) {
1019                archive.seek(loc++);
1020                return archive.read();
1021            }
1022        }
1023
1024        @Override
1025        public int read(byte[] b, int off, int len) throws IOException {
1026            if (remaining <= 0) {
1027                if (addDummyByte) {
1028                    addDummyByte = false;
1029                    b[off] = 0;
1030                    return 1;
1031                }
1032                return -1;
1033            }
1034
1035            if (len <= 0) {
1036                return 0;
1037            }
1038
1039            if (len > remaining) {
1040                len = (int) remaining;
1041            }
1042            int ret = -1;
1043            synchronized (archive) {
1044                archive.seek(loc);
1045                ret = archive.read(b, off, len);
1046            }
1047            if (ret > 0) {
1048                loc += ret;
1049                remaining -= ret;
1050            }
1051            return ret;
1052        }
1053
1054        /**
1055         * Inflater needs an extra dummy byte for nowrap - see
1056         * Inflater's javadocs.
1057         */
1058        void addDummy() {
1059            addDummyByte = true;
1060        }
1061    }
1062
1063    private static final class NameAndComment {
1064        private final byte[] name;
1065        private final byte[] comment;
1066        private NameAndComment(byte[] name, byte[] comment) {
1067            this.name = name;
1068            this.comment = comment;
1069        }
1070    }
1071
1072    /**
1073     * Compares two ZipArchiveEntries based on their offset within the archive.
1074     *
1075     * <p>Won't return any meaningful results if one of the entries
1076     * isn't part of the archive at all.</p>
1077     *
1078     * @since 1.1
1079     */
1080    private final Comparator<ZipArchiveEntry> OFFSET_COMPARATOR =
1081        new Comparator<ZipArchiveEntry>() {
1082        public int compare(ZipArchiveEntry e1, ZipArchiveEntry e2) {
1083            if (e1 == e2) {
1084                return 0;
1085            }
1086
1087            Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
1088            Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
1089            if (ent1 == null) {
1090                return 1;
1091            }
1092            if (ent2 == null) {
1093                return -1;
1094            }
1095            long val = (ent1.getOffsetEntry().headerOffset
1096                        - ent2.getOffsetEntry().headerOffset);
1097            return val == 0 ? 0 : val < 0 ? -1 : +1;
1098        }
1099    };
1100
1101    /**
1102     * Extends ZipArchiveEntry to store the offset within the archive.
1103     */
1104    private static class Entry extends ZipArchiveEntry {
1105
1106        private final OffsetEntry offsetEntry;
1107
1108        Entry(OffsetEntry offset) {
1109            this.offsetEntry = offset;
1110        }
1111
1112        OffsetEntry getOffsetEntry() {
1113            return offsetEntry;
1114        }
1115
1116        @Override
1117        public int hashCode() {
1118            return 3 * super.hashCode()
1119                + (int) (offsetEntry.headerOffset % Integer.MAX_VALUE);
1120        }
1121
1122        @Override
1123        public boolean equals(Object other) {
1124            if (super.equals(other)) {
1125                // super.equals would return false if other were not an Entry
1126                Entry otherEntry = (Entry) other;
1127                return offsetEntry.headerOffset
1128                        == otherEntry.offsetEntry.headerOffset
1129                    && offsetEntry.dataOffset
1130                        == otherEntry.offsetEntry.dataOffset;
1131            }
1132            return false;
1133        }
1134    }
1135}