001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.Closeable; 022import java.io.EOFException; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.RandomAccessFile; 027import java.util.Arrays; 028import java.util.Collections; 029import java.util.Comparator; 030import java.util.Enumeration; 031import java.util.HashMap; 032import java.util.LinkedList; 033import java.util.List; 034import java.util.Map; 035import java.util.zip.Inflater; 036import java.util.zip.InflaterInputStream; 037import java.util.zip.ZipException; 038 039import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 040import org.apache.commons.compress.utils.IOUtils; 041 042import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 044import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 045import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 046import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 047 048/** 049 * Replacement for <code>java.util.ZipFile</code>. 050 * 051 * <p>This class adds support for file name encodings other than UTF-8 052 * (which is required to work on ZIP files created by native zip tools 053 * and is able to skip a preamble like the one found in self 054 * extracting archives. Furthermore it returns instances of 055 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 056 * instead of <code>java.util.zip.ZipEntry</code>.</p> 057 * 058 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 059 * have to reimplement all methods anyway. Like 060 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the 061 * covers and supports compressed and uncompressed entries. As of 062 * Apache Commons Compress 1.3 it also transparently supports Zip64 063 * extensions and thus individual entries and archives larger than 4 064 * GB or with more than 65536 entries.</p> 065 * 066 * <p>The method signatures mimic the ones of 067 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 068 * 069 * <ul> 070 * <li>There is no getName method.</li> 071 * <li>entries has been renamed to getEntries.</li> 072 * <li>getEntries and getEntry return 073 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 074 * instances.</li> 075 * <li>close is allowed to throw IOException.</li> 076 * </ul> 077 * 078 */ 079public class ZipFile implements Closeable { 080 private static final int HASH_SIZE = 509; 081 static final int NIBLET_MASK = 0x0f; 082 static final int BYTE_SHIFT = 8; 083 private static final int POS_0 = 0; 084 private static final int POS_1 = 1; 085 private static final int POS_2 = 2; 086 private static final int POS_3 = 3; 087 088 /** 089 * List of entries in the order they appear inside the central 090 * directory. 091 */ 092 private final List<ZipArchiveEntry> entries = 093 new LinkedList<ZipArchiveEntry>(); 094 095 /** 096 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 097 */ 098 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 099 new HashMap<String, LinkedList<ZipArchiveEntry>>(HASH_SIZE); 100 101 private static final class OffsetEntry { 102 private long headerOffset = -1; 103 private long dataOffset = -1; 104 } 105 106 /** 107 * The encoding to use for filenames and the file comment. 108 * 109 * <p>For a list of possible values see <a 110 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 111 * Defaults to UTF-8.</p> 112 */ 113 private final String encoding; 114 115 /** 116 * The zip encoding to use for filenames and the file comment. 117 */ 118 private final ZipEncoding zipEncoding; 119 120 /** 121 * File name of actual source. 122 */ 123 private final String archiveName; 124 125 /** 126 * The actual data source. 127 */ 128 private final RandomAccessFile archive; 129 130 /** 131 * Whether to look for and use Unicode extra fields. 132 */ 133 private final boolean useUnicodeExtraFields; 134 135 /** 136 * Whether the file is closed. 137 */ 138 private volatile boolean closed = true; 139 140 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 141 private final byte[] DWORD_BUF = new byte[DWORD]; 142 private final byte[] WORD_BUF = new byte[WORD]; 143 private final byte[] CFH_BUF = new byte[CFH_LEN]; 144 private final byte[] SHORT_BUF = new byte[SHORT]; 145 146 /** 147 * Opens the given file for reading, assuming "UTF8" for file names. 148 * 149 * @param f the archive. 150 * 151 * @throws IOException if an error occurs while reading the file. 152 */ 153 public ZipFile(File f) throws IOException { 154 this(f, ZipEncodingHelper.UTF8); 155 } 156 157 /** 158 * Opens the given file for reading, assuming "UTF8". 159 * 160 * @param name name of the archive. 161 * 162 * @throws IOException if an error occurs while reading the file. 163 */ 164 public ZipFile(String name) throws IOException { 165 this(new File(name), ZipEncodingHelper.UTF8); 166 } 167 168 /** 169 * Opens the given file for reading, assuming the specified 170 * encoding for file names, scanning unicode extra fields. 171 * 172 * @param name name of the archive. 173 * @param encoding the encoding to use for file names, use null 174 * for the platform's default encoding 175 * 176 * @throws IOException if an error occurs while reading the file. 177 */ 178 public ZipFile(String name, String encoding) throws IOException { 179 this(new File(name), encoding, true); 180 } 181 182 /** 183 * Opens the given file for reading, assuming the specified 184 * encoding for file names and scanning for unicode extra fields. 185 * 186 * @param f the archive. 187 * @param encoding the encoding to use for file names, use null 188 * for the platform's default encoding 189 * 190 * @throws IOException if an error occurs while reading the file. 191 */ 192 public ZipFile(File f, String encoding) throws IOException { 193 this(f, encoding, true); 194 } 195 196 /** 197 * Opens the given file for reading, assuming the specified 198 * encoding for file names. 199 * 200 * @param f the archive. 201 * @param encoding the encoding to use for file names, use null 202 * for the platform's default encoding 203 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 204 * Extra Fields (if present) to set the file names. 205 * 206 * @throws IOException if an error occurs while reading the file. 207 */ 208 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) 209 throws IOException { 210 this.archiveName = f.getAbsolutePath(); 211 this.encoding = encoding; 212 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 213 this.useUnicodeExtraFields = useUnicodeExtraFields; 214 archive = new RandomAccessFile(f, "r"); 215 boolean success = false; 216 try { 217 Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 218 populateFromCentralDirectory(); 219 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 220 success = true; 221 } finally { 222 closed = !success; 223 if (!success) { 224 IOUtils.closeQuietly(archive); 225 } 226 } 227 } 228 229 /** 230 * The encoding to use for filenames and the file comment. 231 * 232 * @return null if using the platform's default character encoding. 233 */ 234 public String getEncoding() { 235 return encoding; 236 } 237 238 /** 239 * Closes the archive. 240 * @throws IOException if an error occurs closing the archive. 241 */ 242 public void close() throws IOException { 243 // this flag is only written here and read in finalize() which 244 // can never be run in parallel. 245 // no synchronization needed. 246 closed = true; 247 248 archive.close(); 249 } 250 251 /** 252 * close a zipfile quietly; throw no io fault, do nothing 253 * on a null parameter 254 * @param zipfile file to close, can be null 255 */ 256 public static void closeQuietly(ZipFile zipfile) { 257 IOUtils.closeQuietly(zipfile); 258 } 259 260 /** 261 * Returns all entries. 262 * 263 * <p>Entries will be returned in the same order they appear 264 * within the archive's central directory.</p> 265 * 266 * @return all entries as {@link ZipArchiveEntry} instances 267 */ 268 public Enumeration<ZipArchiveEntry> getEntries() { 269 return Collections.enumeration(entries); 270 } 271 272 /** 273 * Returns all entries in physical order. 274 * 275 * <p>Entries will be returned in the same order their contents 276 * appear within the archive.</p> 277 * 278 * @return all entries as {@link ZipArchiveEntry} instances 279 * 280 * @since 1.1 281 */ 282 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 283 ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]); 284 Arrays.sort(allEntries, OFFSET_COMPARATOR); 285 return Collections.enumeration(Arrays.asList(allEntries)); 286 } 287 288 /** 289 * Returns a named entry - or {@code null} if no entry by 290 * that name exists. 291 * 292 * <p>If multiple entries with the same name exist the first entry 293 * in the archive's central directory by that name is 294 * returned.</p> 295 * 296 * @param name name of the entry. 297 * @return the ZipArchiveEntry corresponding to the given name - or 298 * {@code null} if not present. 299 */ 300 public ZipArchiveEntry getEntry(String name) { 301 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 302 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 303 } 304 305 /** 306 * Returns all named entries in the same order they appear within 307 * the archive's central directory. 308 * 309 * @param name name of the entry. 310 * @return the Iterable<ZipArchiveEntry> corresponding to the 311 * given name 312 * @since 1.6 313 */ 314 public Iterable<ZipArchiveEntry> getEntries(String name) { 315 List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 316 return entriesOfThatName != null ? entriesOfThatName 317 : Collections.<ZipArchiveEntry>emptyList(); 318 } 319 320 /** 321 * Returns all named entries in the same order their contents 322 * appear within the archive. 323 * 324 * @param name name of the entry. 325 * @return the Iterable<ZipArchiveEntry> corresponding to the 326 * given name 327 * @since 1.6 328 */ 329 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(String name) { 330 ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0]; 331 if (nameMap.containsKey(name)) { 332 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 333 Arrays.sort(entriesOfThatName, OFFSET_COMPARATOR); 334 } 335 return Arrays.asList(entriesOfThatName); 336 } 337 338 /** 339 * Whether this class is able to read the given entry. 340 * 341 * <p>May return false if it is set up to use encryption or a 342 * compression method that hasn't been implemented yet.</p> 343 * @since 1.1 344 * @param ze the entry 345 * @return whether this class is able to read the given entry. 346 */ 347 public boolean canReadEntryData(ZipArchiveEntry ze) { 348 return ZipUtil.canHandleEntryData(ze); 349 } 350 351 /** 352 * Expose the raw stream of the archive entry (compressed form). 353 * 354 * <p>This method does not relate to how/if we understand the payload in the 355 * stream, since we really only intend to move it on to somewhere else.</p> 356 * 357 * @param ze The entry to get the stream for 358 * @return The raw input stream containing (possibly) compressed data. 359 * @since 1.11 360 */ 361 public InputStream getRawInputStream(ZipArchiveEntry ze) { 362 if (!(ze instanceof Entry)) { 363 return null; 364 } 365 OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); 366 long start = offsetEntry.dataOffset; 367 return new BoundedInputStream(start, ze.getCompressedSize()); 368 } 369 370 371 /** 372 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 373 * Compression and all other attributes will be as in this file. 374 * <p>This method transfers entries based on the central directory of the zip file.</p> 375 * 376 * @param target The zipArchiveOutputStream to write the entries to 377 * @param predicate A predicate that selects which entries to write 378 * @throws IOException on error 379 */ 380 public void copyRawEntries(ZipArchiveOutputStream target, ZipArchiveEntryPredicate predicate) 381 throws IOException { 382 Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 383 while (src.hasMoreElements()) { 384 ZipArchiveEntry entry = src.nextElement(); 385 if (predicate.test( entry)) { 386 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 387 } 388 } 389 } 390 391 /** 392 * Returns an InputStream for reading the contents of the given entry. 393 * 394 * @param ze the entry to get the stream for. 395 * @return a stream to read the entry from. 396 * @throws IOException if unable to create an input stream from the zipentry 397 * @throws ZipException if the zipentry uses an unsupported feature 398 */ 399 public InputStream getInputStream(ZipArchiveEntry ze) 400 throws IOException, ZipException { 401 if (!(ze instanceof Entry)) { 402 return null; 403 } 404 // cast valididty is checked just above 405 OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); 406 ZipUtil.checkRequestedFeatures(ze); 407 long start = offsetEntry.dataOffset; 408 BoundedInputStream bis = 409 new BoundedInputStream(start, ze.getCompressedSize()); 410 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 411 case STORED: 412 return bis; 413 case UNSHRINKING: 414 return new UnshrinkingInputStream(bis); 415 case IMPLODING: 416 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 417 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis)); 418 case DEFLATED: 419 bis.addDummy(); 420 final Inflater inflater = new Inflater(true); 421 return new InflaterInputStream(bis, inflater) { 422 @Override 423 public void close() throws IOException { 424 try { 425 super.close(); 426 } finally { 427 inflater.end(); 428 } 429 } 430 }; 431 case BZIP2: 432 return new BZip2CompressorInputStream(bis); 433 default: 434 throw new ZipException("Found unsupported compression method " 435 + ze.getMethod()); 436 } 437 } 438 439 /** 440 * <p> 441 * Convenience method to return the entry's content as a String if isUnixSymlink() 442 * returns true for it, otherwise returns null. 443 * </p> 444 * 445 * <p>This method assumes the symbolic link's file name uses the 446 * same encoding that as been specified for this ZipFile.</p> 447 * 448 * @param entry ZipArchiveEntry object that represents the symbolic link 449 * @return entry's content as a String 450 * @throws IOException problem with content's input stream 451 * @since 1.5 452 */ 453 public String getUnixSymlink(ZipArchiveEntry entry) throws IOException { 454 if (entry != null && entry.isUnixSymlink()) { 455 InputStream in = null; 456 try { 457 in = getInputStream(entry); 458 byte[] symlinkBytes = IOUtils.toByteArray(in); 459 return zipEncoding.decode(symlinkBytes); 460 } finally { 461 if (in != null) { 462 in.close(); 463 } 464 } 465 } else { 466 return null; 467 } 468 } 469 470 /** 471 * Ensures that the close method of this zipfile is called when 472 * there are no more references to it. 473 * @see #close() 474 */ 475 @Override 476 protected void finalize() throws Throwable { 477 try { 478 if (!closed) { 479 System.err.println("Cleaning up unclosed ZipFile for archive " 480 + archiveName); 481 close(); 482 } 483 } finally { 484 super.finalize(); 485 } 486 } 487 488 /** 489 * Length of a "central directory" entry structure without file 490 * name, extra fields or comment. 491 */ 492 private static final int CFH_LEN = 493 /* version made by */ SHORT 494 /* version needed to extract */ + SHORT 495 /* general purpose bit flag */ + SHORT 496 /* compression method */ + SHORT 497 /* last mod file time */ + SHORT 498 /* last mod file date */ + SHORT 499 /* crc-32 */ + WORD 500 /* compressed size */ + WORD 501 /* uncompressed size */ + WORD 502 /* filename length */ + SHORT 503 /* extra field length */ + SHORT 504 /* file comment length */ + SHORT 505 /* disk number start */ + SHORT 506 /* internal file attributes */ + SHORT 507 /* external file attributes */ + WORD 508 /* relative offset of local header */ + WORD; 509 510 private static final long CFH_SIG = 511 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 512 513 /** 514 * Reads the central directory of the given archive and populates 515 * the internal tables with ZipArchiveEntry instances. 516 * 517 * <p>The ZipArchiveEntrys will know all data that can be obtained from 518 * the central directory alone, but not the data that requires the 519 * local file header or additional data to be read.</p> 520 * 521 * @return a map of zipentries that didn't have the language 522 * encoding flag set when read. 523 */ 524 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 525 throws IOException { 526 HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 527 new HashMap<ZipArchiveEntry, NameAndComment>(); 528 529 positionAtCentralDirectory(); 530 531 archive.readFully(WORD_BUF); 532 long sig = ZipLong.getValue(WORD_BUF); 533 534 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 535 throw new IOException("central directory is empty, can't expand" 536 + " corrupt archive."); 537 } 538 539 while (sig == CFH_SIG) { 540 readCentralDirectoryEntry(noUTF8Flag); 541 archive.readFully(WORD_BUF); 542 sig = ZipLong.getValue(WORD_BUF); 543 } 544 return noUTF8Flag; 545 } 546 547 /** 548 * Reads an individual entry of the central directory, creats an 549 * ZipArchiveEntry from it and adds it to the global maps. 550 * 551 * @param noUTF8Flag map used to collect entries that don't have 552 * their UTF-8 flag set and whose name will be set by data read 553 * from the local file header later. The current entry may be 554 * added to this map. 555 */ 556 private void 557 readCentralDirectoryEntry(Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 558 throws IOException { 559 archive.readFully(CFH_BUF); 560 int off = 0; 561 OffsetEntry offset = new OffsetEntry(); 562 Entry ze = new Entry(offset); 563 564 int versionMadeBy = ZipShort.getValue(CFH_BUF, off); 565 off += SHORT; 566 ze.setVersionMadeBy(versionMadeBy); 567 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 568 569 ze.setVersionRequired(ZipShort.getValue(CFH_BUF, off)); 570 off += SHORT; // version required 571 572 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(CFH_BUF, off); 573 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 574 final ZipEncoding entryEncoding = 575 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 576 ze.setGeneralPurposeBit(gpFlag); 577 ze.setRawFlag(ZipShort.getValue(CFH_BUF, off)); 578 579 off += SHORT; 580 581 //noinspection MagicConstant 582 ze.setMethod(ZipShort.getValue(CFH_BUF, off)); 583 off += SHORT; 584 585 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(CFH_BUF, off)); 586 ze.setTime(time); 587 off += WORD; 588 589 ze.setCrc(ZipLong.getValue(CFH_BUF, off)); 590 off += WORD; 591 592 ze.setCompressedSize(ZipLong.getValue(CFH_BUF, off)); 593 off += WORD; 594 595 ze.setSize(ZipLong.getValue(CFH_BUF, off)); 596 off += WORD; 597 598 int fileNameLen = ZipShort.getValue(CFH_BUF, off); 599 off += SHORT; 600 601 int extraLen = ZipShort.getValue(CFH_BUF, off); 602 off += SHORT; 603 604 int commentLen = ZipShort.getValue(CFH_BUF, off); 605 off += SHORT; 606 607 int diskStart = ZipShort.getValue(CFH_BUF, off); 608 off += SHORT; 609 610 ze.setInternalAttributes(ZipShort.getValue(CFH_BUF, off)); 611 off += SHORT; 612 613 ze.setExternalAttributes(ZipLong.getValue(CFH_BUF, off)); 614 off += WORD; 615 616 byte[] fileName = new byte[fileNameLen]; 617 archive.readFully(fileName); 618 ze.setName(entryEncoding.decode(fileName), fileName); 619 620 // LFH offset, 621 offset.headerOffset = ZipLong.getValue(CFH_BUF, off); 622 // data offset will be filled later 623 entries.add(ze); 624 625 byte[] cdExtraData = new byte[extraLen]; 626 archive.readFully(cdExtraData); 627 ze.setCentralDirectoryExtra(cdExtraData); 628 629 setSizesAndOffsetFromZip64Extra(ze, offset, diskStart); 630 631 byte[] comment = new byte[commentLen]; 632 archive.readFully(comment); 633 ze.setComment(entryEncoding.decode(comment)); 634 635 if (!hasUTF8Flag && useUnicodeExtraFields) { 636 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 637 } 638 } 639 640 /** 641 * If the entry holds a Zip64 extended information extra field, 642 * read sizes from there if the entry's sizes are set to 643 * 0xFFFFFFFFF, do the same for the offset of the local file 644 * header. 645 * 646 * <p>Ensures the Zip64 extra either knows both compressed and 647 * uncompressed size or neither of both as the internal logic in 648 * ExtraFieldUtils forces the field to create local header data 649 * even if they are never used - and here a field with only one 650 * size would be invalid.</p> 651 */ 652 private void setSizesAndOffsetFromZip64Extra(ZipArchiveEntry ze, 653 OffsetEntry offset, 654 int diskStart) 655 throws IOException { 656 Zip64ExtendedInformationExtraField z64 = 657 (Zip64ExtendedInformationExtraField) 658 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 659 if (z64 != null) { 660 boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 661 boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 662 boolean hasRelativeHeaderOffset = 663 offset.headerOffset == ZIP64_MAGIC; 664 z64.reparseCentralDirectoryData(hasUncompressedSize, 665 hasCompressedSize, 666 hasRelativeHeaderOffset, 667 diskStart == ZIP64_MAGIC_SHORT); 668 669 if (hasUncompressedSize) { 670 ze.setSize(z64.getSize().getLongValue()); 671 } else if (hasCompressedSize) { 672 z64.setSize(new ZipEightByteInteger(ze.getSize())); 673 } 674 675 if (hasCompressedSize) { 676 ze.setCompressedSize(z64.getCompressedSize().getLongValue()); 677 } else if (hasUncompressedSize) { 678 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 679 } 680 681 if (hasRelativeHeaderOffset) { 682 offset.headerOffset = 683 z64.getRelativeHeaderOffset().getLongValue(); 684 } 685 } 686 } 687 688 /** 689 * Length of the "End of central directory record" - which is 690 * supposed to be the last structure of the archive - without file 691 * comment. 692 */ 693 static final int MIN_EOCD_SIZE = 694 /* end of central dir signature */ WORD 695 /* number of this disk */ + SHORT 696 /* number of the disk with the */ 697 /* start of the central directory */ + SHORT 698 /* total number of entries in */ 699 /* the central dir on this disk */ + SHORT 700 /* total number of entries in */ 701 /* the central dir */ + SHORT 702 /* size of the central directory */ + WORD 703 /* offset of start of central */ 704 /* directory with respect to */ 705 /* the starting disk number */ + WORD 706 /* zipfile comment length */ + SHORT; 707 708 /** 709 * Maximum length of the "End of central directory record" with a 710 * file comment. 711 */ 712 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 713 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 714 715 /** 716 * Offset of the field that holds the location of the first 717 * central directory entry inside the "End of central directory 718 * record" relative to the start of the "End of central directory 719 * record". 720 */ 721 private static final int CFD_LOCATOR_OFFSET = 722 /* end of central dir signature */ WORD 723 /* number of this disk */ + SHORT 724 /* number of the disk with the */ 725 /* start of the central directory */ + SHORT 726 /* total number of entries in */ 727 /* the central dir on this disk */ + SHORT 728 /* total number of entries in */ 729 /* the central dir */ + SHORT 730 /* size of the central directory */ + WORD; 731 732 /** 733 * Length of the "Zip64 end of central directory locator" - which 734 * should be right in front of the "end of central directory 735 * record" if one is present at all. 736 */ 737 private static final int ZIP64_EOCDL_LENGTH = 738 /* zip64 end of central dir locator sig */ WORD 739 /* number of the disk with the start */ 740 /* start of the zip64 end of */ 741 /* central directory */ + WORD 742 /* relative offset of the zip64 */ 743 /* end of central directory record */ + DWORD 744 /* total number of disks */ + WORD; 745 746 /** 747 * Offset of the field that holds the location of the "Zip64 end 748 * of central directory record" inside the "Zip64 end of central 749 * directory locator" relative to the start of the "Zip64 end of 750 * central directory locator". 751 */ 752 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 753 /* zip64 end of central dir locator sig */ WORD 754 /* number of the disk with the start */ 755 /* start of the zip64 end of */ 756 /* central directory */ + WORD; 757 758 /** 759 * Offset of the field that holds the location of the first 760 * central directory entry inside the "Zip64 end of central 761 * directory record" relative to the start of the "Zip64 end of 762 * central directory record". 763 */ 764 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 765 /* zip64 end of central dir */ 766 /* signature */ WORD 767 /* size of zip64 end of central */ 768 /* directory record */ + DWORD 769 /* version made by */ + SHORT 770 /* version needed to extract */ + SHORT 771 /* number of this disk */ + WORD 772 /* number of the disk with the */ 773 /* start of the central directory */ + WORD 774 /* total number of entries in the */ 775 /* central directory on this disk */ + DWORD 776 /* total number of entries in the */ 777 /* central directory */ + DWORD 778 /* size of the central directory */ + DWORD; 779 780 /** 781 * Searches for either the "Zip64 end of central directory 782 * locator" or the "End of central dir record", parses 783 * it and positions the stream at the first central directory 784 * record. 785 */ 786 private void positionAtCentralDirectory() 787 throws IOException { 788 positionAtEndOfCentralDirectoryRecord(); 789 boolean found = false; 790 boolean searchedForZip64EOCD = 791 archive.getFilePointer() > ZIP64_EOCDL_LENGTH; 792 if (searchedForZip64EOCD) { 793 archive.seek(archive.getFilePointer() - ZIP64_EOCDL_LENGTH); 794 archive.readFully(WORD_BUF); 795 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 796 WORD_BUF); 797 } 798 if (!found) { 799 // not a ZIP64 archive 800 if (searchedForZip64EOCD) { 801 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 802 } 803 positionAtCentralDirectory32(); 804 } else { 805 positionAtCentralDirectory64(); 806 } 807 } 808 809 /** 810 * Parses the "Zip64 end of central directory locator", 811 * finds the "Zip64 end of central directory record" using the 812 * parsed information, parses that and positions the stream at the 813 * first central directory record. 814 * 815 * Expects stream to be positioned right behind the "Zip64 816 * end of central directory locator"'s signature. 817 */ 818 private void positionAtCentralDirectory64() 819 throws IOException { 820 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 821 - WORD /* signature has already been read */); 822 archive.readFully(DWORD_BUF); 823 archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF)); 824 archive.readFully(WORD_BUF); 825 if (!Arrays.equals(WORD_BUF, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 826 throw new ZipException("archive's ZIP64 end of central " 827 + "directory locator is corrupt."); 828 } 829 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 830 - WORD /* signature has already been read */); 831 archive.readFully(DWORD_BUF); 832 archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF)); 833 } 834 835 /** 836 * Parses the "End of central dir record" and positions 837 * the stream at the first central directory record. 838 * 839 * Expects stream to be positioned at the beginning of the 840 * "End of central dir record". 841 */ 842 private void positionAtCentralDirectory32() 843 throws IOException { 844 skipBytes(CFD_LOCATOR_OFFSET); 845 archive.readFully(WORD_BUF); 846 archive.seek(ZipLong.getValue(WORD_BUF)); 847 } 848 849 /** 850 * Searches for the and positions the stream at the start of the 851 * "End of central dir record". 852 */ 853 private void positionAtEndOfCentralDirectoryRecord() 854 throws IOException { 855 boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 856 ZipArchiveOutputStream.EOCD_SIG); 857 if (!found) { 858 throw new ZipException("archive is not a ZIP archive"); 859 } 860 } 861 862 /** 863 * Searches the archive backwards from minDistance to maxDistance 864 * for the given signature, positions the RandomaccessFile right 865 * at the signature if it has been found. 866 */ 867 private boolean tryToLocateSignature(long minDistanceFromEnd, 868 long maxDistanceFromEnd, 869 byte[] sig) throws IOException { 870 boolean found = false; 871 long off = archive.length() - minDistanceFromEnd; 872 final long stopSearching = 873 Math.max(0L, archive.length() - maxDistanceFromEnd); 874 if (off >= 0) { 875 for (; off >= stopSearching; off--) { 876 archive.seek(off); 877 int curr = archive.read(); 878 if (curr == -1) { 879 break; 880 } 881 if (curr == sig[POS_0]) { 882 curr = archive.read(); 883 if (curr == sig[POS_1]) { 884 curr = archive.read(); 885 if (curr == sig[POS_2]) { 886 curr = archive.read(); 887 if (curr == sig[POS_3]) { 888 found = true; 889 break; 890 } 891 } 892 } 893 } 894 } 895 } 896 if (found) { 897 archive.seek(off); 898 } 899 return found; 900 } 901 902 /** 903 * Skips the given number of bytes or throws an EOFException if 904 * skipping failed. 905 */ 906 private void skipBytes(final int count) throws IOException { 907 int totalSkipped = 0; 908 while (totalSkipped < count) { 909 int skippedNow = archive.skipBytes(count - totalSkipped); 910 if (skippedNow <= 0) { 911 throw new EOFException(); 912 } 913 totalSkipped += skippedNow; 914 } 915 } 916 917 /** 918 * Number of bytes in local file header up to the "length of 919 * filename" entry. 920 */ 921 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 922 /* local file header signature */ WORD 923 /* version needed to extract */ + SHORT 924 /* general purpose bit flag */ + SHORT 925 /* compression method */ + SHORT 926 /* last mod file time */ + SHORT 927 /* last mod file date */ + SHORT 928 /* crc-32 */ + WORD 929 /* compressed size */ + WORD 930 /* uncompressed size */ + WORD; 931 932 /** 933 * Walks through all recorded entries and adds the data available 934 * from the local file header. 935 * 936 * <p>Also records the offsets for the data to read from the 937 * entries.</p> 938 */ 939 private void resolveLocalFileHeaderData(Map<ZipArchiveEntry, NameAndComment> 940 entriesWithoutUTF8Flag) 941 throws IOException { 942 for (ZipArchiveEntry zipArchiveEntry : entries) { 943 // entries is filled in populateFromCentralDirectory and 944 // never modified 945 Entry ze = (Entry) zipArchiveEntry; 946 OffsetEntry offsetEntry = ze.getOffsetEntry(); 947 long offset = offsetEntry.headerOffset; 948 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 949 archive.readFully(SHORT_BUF); 950 int fileNameLen = ZipShort.getValue(SHORT_BUF); 951 archive.readFully(SHORT_BUF); 952 int extraFieldLen = ZipShort.getValue(SHORT_BUF); 953 int lenToSkip = fileNameLen; 954 while (lenToSkip > 0) { 955 int skipped = archive.skipBytes(lenToSkip); 956 if (skipped <= 0) { 957 throw new IOException("failed to skip file name in" 958 + " local file header"); 959 } 960 lenToSkip -= skipped; 961 } 962 byte[] localExtraData = new byte[extraFieldLen]; 963 archive.readFully(localExtraData); 964 ze.setExtra(localExtraData); 965 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH 966 + SHORT + SHORT + fileNameLen + extraFieldLen; 967 968 if (entriesWithoutUTF8Flag.containsKey(ze)) { 969 NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 970 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 971 nc.comment); 972 } 973 974 String name = ze.getName(); 975 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 976 if (entriesOfThatName == null) { 977 entriesOfThatName = new LinkedList<ZipArchiveEntry>(); 978 nameMap.put(name, entriesOfThatName); 979 } 980 entriesOfThatName.addLast(ze); 981 } 982 } 983 984 /** 985 * Checks whether the archive starts with a LFH. If it doesn't, 986 * it may be an empty archive. 987 */ 988 private boolean startsWithLocalFileHeader() throws IOException { 989 archive.seek(0); 990 archive.readFully(WORD_BUF); 991 return Arrays.equals(WORD_BUF, ZipArchiveOutputStream.LFH_SIG); 992 } 993 994 /** 995 * InputStream that delegates requests to the underlying 996 * RandomAccessFile, making sure that only bytes from a certain 997 * range can be read. 998 */ 999 private class BoundedInputStream extends InputStream { 1000 private long remaining; 1001 private long loc; 1002 private boolean addDummyByte = false; 1003 1004 BoundedInputStream(long start, long remaining) { 1005 this.remaining = remaining; 1006 loc = start; 1007 } 1008 1009 @Override 1010 public int read() throws IOException { 1011 if (remaining-- <= 0) { 1012 if (addDummyByte) { 1013 addDummyByte = false; 1014 return 0; 1015 } 1016 return -1; 1017 } 1018 synchronized (archive) { 1019 archive.seek(loc++); 1020 return archive.read(); 1021 } 1022 } 1023 1024 @Override 1025 public int read(byte[] b, int off, int len) throws IOException { 1026 if (remaining <= 0) { 1027 if (addDummyByte) { 1028 addDummyByte = false; 1029 b[off] = 0; 1030 return 1; 1031 } 1032 return -1; 1033 } 1034 1035 if (len <= 0) { 1036 return 0; 1037 } 1038 1039 if (len > remaining) { 1040 len = (int) remaining; 1041 } 1042 int ret = -1; 1043 synchronized (archive) { 1044 archive.seek(loc); 1045 ret = archive.read(b, off, len); 1046 } 1047 if (ret > 0) { 1048 loc += ret; 1049 remaining -= ret; 1050 } 1051 return ret; 1052 } 1053 1054 /** 1055 * Inflater needs an extra dummy byte for nowrap - see 1056 * Inflater's javadocs. 1057 */ 1058 void addDummy() { 1059 addDummyByte = true; 1060 } 1061 } 1062 1063 private static final class NameAndComment { 1064 private final byte[] name; 1065 private final byte[] comment; 1066 private NameAndComment(byte[] name, byte[] comment) { 1067 this.name = name; 1068 this.comment = comment; 1069 } 1070 } 1071 1072 /** 1073 * Compares two ZipArchiveEntries based on their offset within the archive. 1074 * 1075 * <p>Won't return any meaningful results if one of the entries 1076 * isn't part of the archive at all.</p> 1077 * 1078 * @since 1.1 1079 */ 1080 private final Comparator<ZipArchiveEntry> OFFSET_COMPARATOR = 1081 new Comparator<ZipArchiveEntry>() { 1082 public int compare(ZipArchiveEntry e1, ZipArchiveEntry e2) { 1083 if (e1 == e2) { 1084 return 0; 1085 } 1086 1087 Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null; 1088 Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null; 1089 if (ent1 == null) { 1090 return 1; 1091 } 1092 if (ent2 == null) { 1093 return -1; 1094 } 1095 long val = (ent1.getOffsetEntry().headerOffset 1096 - ent2.getOffsetEntry().headerOffset); 1097 return val == 0 ? 0 : val < 0 ? -1 : +1; 1098 } 1099 }; 1100 1101 /** 1102 * Extends ZipArchiveEntry to store the offset within the archive. 1103 */ 1104 private static class Entry extends ZipArchiveEntry { 1105 1106 private final OffsetEntry offsetEntry; 1107 1108 Entry(OffsetEntry offset) { 1109 this.offsetEntry = offset; 1110 } 1111 1112 OffsetEntry getOffsetEntry() { 1113 return offsetEntry; 1114 } 1115 1116 @Override 1117 public int hashCode() { 1118 return 3 * super.hashCode() 1119 + (int) (offsetEntry.headerOffset % Integer.MAX_VALUE); 1120 } 1121 1122 @Override 1123 public boolean equals(Object other) { 1124 if (super.equals(other)) { 1125 // super.equals would return false if other were not an Entry 1126 Entry otherEntry = (Entry) other; 1127 return offsetEntry.headerOffset 1128 == otherEntry.offsetEntry.headerOffset 1129 && offsetEntry.dataOffset 1130 == otherEntry.offsetEntry.dataOffset; 1131 } 1132 return false; 1133 } 1134 } 1135}