001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 021import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 022import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 023import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 024import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 025 026import java.io.BufferedInputStream; 027import java.io.ByteArrayInputStream; 028import java.io.Closeable; 029import java.io.EOFException; 030import java.io.File; 031import java.io.IOException; 032import java.io.InputStream; 033import java.io.SequenceInputStream; 034import java.nio.ByteBuffer; 035import java.nio.channels.FileChannel; 036import java.nio.channels.SeekableByteChannel; 037import java.nio.file.Files; 038import java.nio.file.Path; 039import java.nio.file.StandardOpenOption; 040import java.util.Arrays; 041import java.util.Collections; 042import java.util.Comparator; 043import java.util.EnumSet; 044import java.util.Enumeration; 045import java.util.HashMap; 046import java.util.LinkedList; 047import java.util.List; 048import java.util.Map; 049import java.util.zip.Inflater; 050import java.util.zip.ZipException; 051 052import org.apache.commons.compress.archivers.EntryStreamOffsets; 053import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 054import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 055import org.apache.commons.compress.utils.BoundedArchiveInputStream; 056import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 057import org.apache.commons.compress.utils.CountingInputStream; 058import org.apache.commons.compress.utils.IOUtils; 059import org.apache.commons.compress.utils.InputStreamStatistics; 060 061/** 062 * Replacement for {@code java.util.ZipFile}. 063 * 064 * <p>This class adds support for file name encodings other than UTF-8 065 * (which is required to work on ZIP files created by native zip tools 066 * and is able to skip a preamble like the one found in self 067 * extracting archives. Furthermore it returns instances of 068 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} 069 * instead of {@code java.util.zip.ZipEntry}.</p> 070 * 071 * <p>It doesn't extend {@code java.util.zip.ZipFile} as it would 072 * have to reimplement all methods anyway. Like 073 * {@code java.util.ZipFile}, it uses SeekableByteChannel under the 074 * covers and supports compressed and uncompressed entries. As of 075 * Apache Commons Compress 1.3 it also transparently supports Zip64 076 * extensions and thus individual entries and archives larger than 4 077 * GB or with more than 65536 entries.</p> 078 * 079 * <p>The method signatures mimic the ones of 080 * {@code java.util.zip.ZipFile}, with a couple of exceptions: 081 * 082 * <ul> 083 * <li>There is no getName method.</li> 084 * <li>entries has been renamed to getEntries.</li> 085 * <li>getEntries and getEntry return 086 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} 087 * instances.</li> 088 * <li>close is allowed to throw IOException.</li> 089 * </ul> 090 * 091 */ 092public class ZipFile implements Closeable { 093 private static final int HASH_SIZE = 509; 094 static final int NIBLET_MASK = 0x0f; 095 static final int BYTE_SHIFT = 8; 096 private static final int POS_0 = 0; 097 private static final int POS_1 = 1; 098 private static final int POS_2 = 2; 099 private static final int POS_3 = 3; 100 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 101 102 /** 103 * List of entries in the order they appear inside the central 104 * directory. 105 */ 106 private final List<ZipArchiveEntry> entries = 107 new LinkedList<>(); 108 109 /** 110 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 111 */ 112 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 113 new HashMap<>(HASH_SIZE); 114 115 /** 116 * The encoding to use for file names and the file comment. 117 * 118 * <p>For a list of possible values see <a 119 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 120 * Defaults to UTF-8.</p> 121 */ 122 private final String encoding; 123 124 /** 125 * The zip encoding to use for file names and the file comment. 126 */ 127 private final ZipEncoding zipEncoding; 128 129 /** 130 * File name of actual source. 131 */ 132 private final String archiveName; 133 134 /** 135 * The actual data source. 136 */ 137 private final SeekableByteChannel archive; 138 139 /** 140 * Whether to look for and use Unicode extra fields. 141 */ 142 private final boolean useUnicodeExtraFields; 143 144 /** 145 * Whether the file is closed. 146 */ 147 private volatile boolean closed = true; 148 149 /** 150 * Whether the zip archive is a split zip archive 151 */ 152 private final boolean isSplitZipArchive; 153 154 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 155 private final byte[] dwordBuf = new byte[DWORD]; 156 private final byte[] wordBuf = new byte[WORD]; 157 private final byte[] cfhBuf = new byte[CFH_LEN]; 158 private final byte[] shortBuf = new byte[SHORT]; 159 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 160 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 161 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 162 private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf); 163 164 private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset; 165 private long centralDirectoryStartOffset; 166 167 /** 168 * Opens the given file for reading, assuming "UTF8" for file names. 169 * 170 * @param f the archive. 171 * 172 * @throws IOException if an error occurs while reading the file. 173 */ 174 public ZipFile(final File f) throws IOException { 175 this(f, ZipEncodingHelper.UTF8); 176 } 177 178 /** 179 * Opens the given path for reading, assuming "UTF8" for file names. 180 * @param path path to the archive. 181 * @throws IOException if an error occurs while reading the file. 182 * @since 1.22 183 */ 184 public ZipFile(final Path path) throws IOException { 185 this(path, ZipEncodingHelper.UTF8); 186 } 187 188 /** 189 * Opens the given file for reading, assuming "UTF8". 190 * 191 * @param name name of the archive. 192 * 193 * @throws IOException if an error occurs while reading the file. 194 */ 195 public ZipFile(final String name) throws IOException { 196 this(new File(name).toPath(), ZipEncodingHelper.UTF8); 197 } 198 199 /** 200 * Opens the given file for reading, assuming the specified 201 * encoding for file names, scanning unicode extra fields. 202 * 203 * @param name name of the archive. 204 * @param encoding the encoding to use for file names, use null 205 * for the platform's default encoding 206 * 207 * @throws IOException if an error occurs while reading the file. 208 */ 209 public ZipFile(final String name, final String encoding) throws IOException { 210 this(new File(name).toPath(), encoding, true); 211 } 212 213 /** 214 * Opens the given file for reading, assuming the specified 215 * encoding for file names and scanning for unicode extra fields. 216 * 217 * @param f the archive. 218 * @param encoding the encoding to use for file names, use null 219 * for the platform's default encoding 220 * 221 * @throws IOException if an error occurs while reading the file. 222 */ 223 public ZipFile(final File f, final String encoding) throws IOException { 224 this(f.toPath(), encoding, true); 225 } 226 227 /** 228 * Opens the given path for reading, assuming the specified 229 * encoding for file names and scanning for unicode extra fields. 230 * @param path path to the archive. 231 * @param encoding the encoding to use for file names, use null 232 * for the platform's default encoding 233 * @throws IOException if an error occurs while reading the file. 234 * @since 1.22 235 */ 236 public ZipFile(final Path path, final String encoding) throws IOException { 237 this(path, encoding, true); 238 } 239 240 /** 241 * Opens the given file for reading, assuming the specified 242 * encoding for file names. 243 * 244 * @param f the archive. 245 * @param encoding the encoding to use for file names, use null 246 * for the platform's default encoding 247 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 248 * Extra Fields (if present) to set the file names. 249 * 250 * @throws IOException if an error occurs while reading the file. 251 */ 252 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 253 throws IOException { 254 this(f.toPath(), encoding, useUnicodeExtraFields, false); 255 } 256 257 /** 258 * Opens the given path for reading, assuming the specified 259 * encoding for file names. 260 * @param path path to the archive. 261 * @param encoding the encoding to use for file names, use null 262 * for the platform's default encoding 263 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 264 * Extra Fields (if present) to set the file names. 265 * @throws IOException if an error occurs while reading the file. 266 * @since 1.22 267 */ 268 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) 269 throws IOException { 270 this(path, encoding, useUnicodeExtraFields, false); 271 } 272 273 /** 274 * Opens the given file for reading, assuming the specified 275 * encoding for file names. 276 * 277 * 278 * <p>By default the central directory record and all local file headers of the archive will be read immediately 279 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 280 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 281 * may contain information not present inside of the central directory which will not be available when the argument 282 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 283 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also 284 * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code 285 * true}.</p> 286 * 287 * @param f the archive. 288 * @param encoding the encoding to use for file names, use null 289 * for the platform's default encoding 290 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 291 * Extra Fields (if present) to set the file names. 292 * @param ignoreLocalFileHeader whether to ignore information 293 * stored inside the local file header (see the notes in this method's javadoc) 294 * 295 * @throws IOException if an error occurs while reading the file. 296 * @since 1.19 297 */ 298 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields, 299 final boolean ignoreLocalFileHeader) 300 throws IOException { 301 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 302 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader); 303 } 304 305 /** 306 * Opens the given path for reading, assuming the specified 307 * encoding for file names. 308 * <p>By default the central directory record and all local file headers of the archive will be read immediately 309 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 310 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 311 * may contain information not present inside of the central directory which will not be available when the argument 312 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 313 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also 314 * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code 315 * true}.</p> 316 * @param path path to the archive. 317 * @param encoding the encoding to use for file names, use null 318 * for the platform's default encoding 319 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 320 * Extra Fields (if present) to set the file names. 321 * @param ignoreLocalFileHeader whether to ignore information 322 * stored inside the local file header (see the notes in this method's javadoc) 323 * @throws IOException if an error occurs while reading the file. 324 * @since 1.22 325 */ 326 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, 327 final boolean ignoreLocalFileHeader) 328 throws IOException { 329 this(Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)), 330 path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, 331 true, ignoreLocalFileHeader); 332 } 333 334 /** 335 * Opens the given channel for reading, assuming "UTF8" for file names. 336 * 337 * <p>{@link 338 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 339 * allows you to read from an in-memory archive.</p> 340 * 341 * @param channel the archive. 342 * 343 * @throws IOException if an error occurs while reading the file. 344 * @since 1.13 345 */ 346 public ZipFile(final SeekableByteChannel channel) 347 throws IOException { 348 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 349 } 350 351 /** 352 * Opens the given channel for reading, assuming the specified 353 * encoding for file names. 354 * 355 * <p>{@link 356 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 357 * allows you to read from an in-memory archive.</p> 358 * 359 * @param channel the archive. 360 * @param encoding the encoding to use for file names, use null 361 * for the platform's default encoding 362 * 363 * @throws IOException if an error occurs while reading the file. 364 * @since 1.13 365 */ 366 public ZipFile(final SeekableByteChannel channel, final String encoding) 367 throws IOException { 368 this(channel, "unknown archive", encoding, true); 369 } 370 371 /** 372 * Opens the given channel for reading, assuming the specified 373 * encoding for file names. 374 * 375 * <p>{@link 376 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 377 * allows you to read from an in-memory archive.</p> 378 * 379 * @param channel the archive. 380 * @param archiveName name of the archive, used for error messages only. 381 * @param encoding the encoding to use for file names, use null 382 * for the platform's default encoding 383 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 384 * Extra Fields (if present) to set the file names. 385 * 386 * @throws IOException if an error occurs while reading the file. 387 * @since 1.13 388 */ 389 public ZipFile(final SeekableByteChannel channel, final String archiveName, 390 final String encoding, final boolean useUnicodeExtraFields) 391 throws IOException { 392 this(channel, archiveName, encoding, useUnicodeExtraFields, false, false); 393 } 394 395 /** 396 * Opens the given channel for reading, assuming the specified 397 * encoding for file names. 398 * 399 * <p>{@link 400 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 401 * allows you to read from an in-memory archive.</p> 402 * 403 * <p>By default the central directory record and all local file headers of the archive will be read immediately 404 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 405 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 406 * may contain information not present inside of the central directory which will not be available when the argument 407 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 408 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also 409 * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code 410 * true}.</p> 411 * 412 * @param channel the archive. 413 * @param archiveName name of the archive, used for error messages only. 414 * @param encoding the encoding to use for file names, use null 415 * for the platform's default encoding 416 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 417 * Extra Fields (if present) to set the file names. 418 * @param ignoreLocalFileHeader whether to ignore information 419 * stored inside the local file header (see the notes in this method's javadoc) 420 * 421 * @throws IOException if an error occurs while reading the file. 422 * @since 1.19 423 */ 424 public ZipFile(final SeekableByteChannel channel, final String archiveName, 425 final String encoding, final boolean useUnicodeExtraFields, 426 final boolean ignoreLocalFileHeader) 427 throws IOException { 428 this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader); 429 } 430 431 private ZipFile(final SeekableByteChannel channel, final String archiveName, 432 final String encoding, final boolean useUnicodeExtraFields, 433 final boolean closeOnError, final boolean ignoreLocalFileHeader) 434 throws IOException { 435 isSplitZipArchive = (channel instanceof ZipSplitReadOnlySeekableByteChannel); 436 437 this.archiveName = archiveName; 438 this.encoding = encoding; 439 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 440 this.useUnicodeExtraFields = useUnicodeExtraFields; 441 archive = channel; 442 boolean success = false; 443 try { 444 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 445 populateFromCentralDirectory(); 446 if (!ignoreLocalFileHeader) { 447 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 448 } 449 fillNameMap(); 450 success = true; 451 } catch (final IOException e) { 452 throw new IOException("Error on ZipFile " + archiveName, e); 453 } finally { 454 closed = !success; 455 if (!success && closeOnError) { 456 IOUtils.closeQuietly(archive); 457 } 458 } 459 } 460 461 /** 462 * The encoding to use for file names and the file comment. 463 * 464 * @return null if using the platform's default character encoding. 465 */ 466 public String getEncoding() { 467 return encoding; 468 } 469 470 /** 471 * Closes the archive. 472 * @throws IOException if an error occurs closing the archive. 473 */ 474 @Override 475 public void close() throws IOException { 476 // this flag is only written here and read in finalize() which 477 // can never be run in parallel. 478 // no synchronization needed. 479 closed = true; 480 481 archive.close(); 482 } 483 484 /** 485 * close a zipfile quietly; throw no io fault, do nothing 486 * on a null parameter 487 * @param zipfile file to close, can be null 488 */ 489 public static void closeQuietly(final ZipFile zipfile) { 490 IOUtils.closeQuietly(zipfile); 491 } 492 493 /** 494 * Returns all entries. 495 * 496 * <p>Entries will be returned in the same order they appear 497 * within the archive's central directory.</p> 498 * 499 * @return all entries as {@link ZipArchiveEntry} instances 500 */ 501 public Enumeration<ZipArchiveEntry> getEntries() { 502 return Collections.enumeration(entries); 503 } 504 505 /** 506 * Returns all entries in physical order. 507 * 508 * <p>Entries will be returned in the same order their contents 509 * appear within the archive.</p> 510 * 511 * @return all entries as {@link ZipArchiveEntry} instances 512 * 513 * @since 1.1 514 */ 515 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 516 final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY); 517 Arrays.sort(allEntries, offsetComparator); 518 return Collections.enumeration(Arrays.asList(allEntries)); 519 } 520 521 /** 522 * Returns a named entry - or {@code null} if no entry by 523 * that name exists. 524 * 525 * <p>If multiple entries with the same name exist the first entry 526 * in the archive's central directory by that name is 527 * returned.</p> 528 * 529 * @param name name of the entry. 530 * @return the ZipArchiveEntry corresponding to the given name - or 531 * {@code null} if not present. 532 */ 533 public ZipArchiveEntry getEntry(final String name) { 534 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 535 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 536 } 537 538 /** 539 * Returns all named entries in the same order they appear within 540 * the archive's central directory. 541 * 542 * @param name name of the entry. 543 * @return the Iterable<ZipArchiveEntry> corresponding to the 544 * given name 545 * @since 1.6 546 */ 547 public Iterable<ZipArchiveEntry> getEntries(final String name) { 548 final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 549 return entriesOfThatName != null ? entriesOfThatName 550 : Collections.emptyList(); 551 } 552 553 /** 554 * Returns all named entries in the same order their contents 555 * appear within the archive. 556 * 557 * @param name name of the entry. 558 * @return the Iterable<ZipArchiveEntry> corresponding to the 559 * given name 560 * @since 1.6 561 */ 562 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 563 ZipArchiveEntry[] entriesOfThatName = ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY; 564 if (nameMap.containsKey(name)) { 565 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 566 Arrays.sort(entriesOfThatName, offsetComparator); 567 } 568 return Arrays.asList(entriesOfThatName); 569 } 570 571 /** 572 * Whether this class is able to read the given entry. 573 * 574 * <p>May return false if it is set up to use encryption or a 575 * compression method that hasn't been implemented yet.</p> 576 * @since 1.1 577 * @param ze the entry 578 * @return whether this class is able to read the given entry. 579 */ 580 public boolean canReadEntryData(final ZipArchiveEntry ze) { 581 return ZipUtil.canHandleEntryData(ze); 582 } 583 584 /** 585 * Expose the raw stream of the archive entry (compressed form). 586 * 587 * <p>This method does not relate to how/if we understand the payload in the 588 * stream, since we really only intend to move it on to somewhere else.</p> 589 * 590 * @param ze The entry to get the stream for 591 * @return The raw input stream containing (possibly) compressed data. 592 * @since 1.11 593 */ 594 public InputStream getRawInputStream(final ZipArchiveEntry ze) { 595 if (!(ze instanceof Entry)) { 596 return null; 597 } 598 final long start = ze.getDataOffset(); 599 if (start == EntryStreamOffsets.OFFSET_UNKNOWN) { 600 return null; 601 } 602 return createBoundedInputStream(start, ze.getCompressedSize()); 603 } 604 605 606 /** 607 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 608 * Compression and all other attributes will be as in this file. 609 * <p>This method transfers entries based on the central directory of the zip file.</p> 610 * 611 * @param target The zipArchiveOutputStream to write the entries to 612 * @param predicate A predicate that selects which entries to write 613 * @throws IOException on error 614 */ 615 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 616 throws IOException { 617 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 618 while (src.hasMoreElements()) { 619 final ZipArchiveEntry entry = src.nextElement(); 620 if (predicate.test( entry)) { 621 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 622 } 623 } 624 } 625 626 /** 627 * Returns an InputStream for reading the contents of the given entry. 628 * 629 * @param ze the entry to get the stream for. 630 * @return a stream to read the entry from. The returned stream 631 * implements {@link InputStreamStatistics}. 632 * @throws IOException if unable to create an input stream from the zipentry 633 */ 634 public InputStream getInputStream(final ZipArchiveEntry ze) 635 throws IOException { 636 if (!(ze instanceof Entry)) { 637 return null; 638 } 639 // cast validity is checked just above 640 ZipUtil.checkRequestedFeatures(ze); 641 final long start = getDataOffset(ze); 642 643 // doesn't get closed if the method is not supported - which 644 // should never happen because of the checkRequestedFeatures 645 // call above 646 final InputStream is = 647 new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR 648 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 649 case STORED: 650 return new StoredStatisticsStream(is); 651 case UNSHRINKING: 652 return new UnshrinkingInputStream(is); 653 case IMPLODING: 654 try { 655 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 656 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 657 } catch (final IllegalArgumentException ex) { 658 throw new IOException("bad IMPLODE data", ex); 659 } 660 case DEFLATED: 661 final Inflater inflater = new Inflater(true); 662 // Inflater with nowrap=true has this odd contract for a zero padding 663 // byte following the data stream; this used to be zlib's requirement 664 // and has been fixed a long time ago, but the contract persists so 665 // we comply. 666 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 667 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), 668 inflater) { 669 @Override 670 public void close() throws IOException { 671 try { 672 super.close(); 673 } finally { 674 inflater.end(); 675 } 676 } 677 }; 678 case BZIP2: 679 return new BZip2CompressorInputStream(is); 680 case ENHANCED_DEFLATED: 681 return new Deflate64CompressorInputStream(is); 682 case AES_ENCRYPTED: 683 case EXPANDING_LEVEL_1: 684 case EXPANDING_LEVEL_2: 685 case EXPANDING_LEVEL_3: 686 case EXPANDING_LEVEL_4: 687 case JPEG: 688 case LZMA: 689 case PKWARE_IMPLODING: 690 case PPMD: 691 case TOKENIZATION: 692 case UNKNOWN: 693 case WAVPACK: 694 case XZ: 695 default: 696 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(ze.getMethod()), ze); 697 } 698 } 699 700 /** 701 * <p> 702 * Convenience method to return the entry's content as a String if isUnixSymlink() 703 * returns true for it, otherwise returns null. 704 * </p> 705 * 706 * <p>This method assumes the symbolic link's file name uses the 707 * same encoding that as been specified for this ZipFile.</p> 708 * 709 * @param entry ZipArchiveEntry object that represents the symbolic link 710 * @return entry's content as a String 711 * @throws IOException problem with content's input stream 712 * @since 1.5 713 */ 714 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 715 if (entry != null && entry.isUnixSymlink()) { 716 try (InputStream in = getInputStream(entry)) { 717 return zipEncoding.decode(IOUtils.toByteArray(in)); 718 } 719 } 720 return null; 721 } 722 723 /** 724 * Ensures that the close method of this zipfile is called when 725 * there are no more references to it. 726 * @see #close() 727 */ 728 @Override 729 protected void finalize() throws Throwable { 730 try { 731 if (!closed) { 732 System.err.println("Cleaning up unclosed ZipFile for archive " 733 + archiveName); 734 close(); 735 } 736 } finally { 737 super.finalize(); 738 } 739 } 740 741 /** 742 * Length of a "central directory" entry structure without file 743 * name, extra fields or comment. 744 */ 745 private static final int CFH_LEN = 746 /* version made by */ SHORT 747 /* version needed to extract */ + SHORT 748 /* general purpose bit flag */ + SHORT 749 /* compression method */ + SHORT 750 /* last mod file time */ + SHORT 751 /* last mod file date */ + SHORT 752 /* crc-32 */ + WORD 753 /* compressed size */ + WORD 754 /* uncompressed size */ + WORD 755 /* file name length */ + SHORT 756 /* extra field length */ + SHORT 757 /* file comment length */ + SHORT 758 /* disk number start */ + SHORT 759 /* internal file attributes */ + SHORT 760 /* external file attributes */ + WORD 761 /* relative offset of local header */ + WORD; 762 763 private static final long CFH_SIG = 764 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 765 766 /** 767 * Reads the central directory of the given archive and populates 768 * the internal tables with ZipArchiveEntry instances. 769 * 770 * <p>The ZipArchiveEntrys will know all data that can be obtained from 771 * the central directory alone, but not the data that requires the 772 * local file header or additional data to be read.</p> 773 * 774 * @return a map of zipentries that didn't have the language 775 * encoding flag set when read. 776 */ 777 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 778 throws IOException { 779 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 780 new HashMap<>(); 781 782 positionAtCentralDirectory(); 783 centralDirectoryStartOffset = archive.position(); 784 785 wordBbuf.rewind(); 786 IOUtils.readFully(archive, wordBbuf); 787 long sig = ZipLong.getValue(wordBuf); 788 789 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 790 throw new IOException("Central directory is empty, can't expand" 791 + " corrupt archive."); 792 } 793 794 while (sig == CFH_SIG) { 795 readCentralDirectoryEntry(noUTF8Flag); 796 wordBbuf.rewind(); 797 IOUtils.readFully(archive, wordBbuf); 798 sig = ZipLong.getValue(wordBuf); 799 } 800 return noUTF8Flag; 801 } 802 803 /** 804 * Reads an individual entry of the central directory, creats an 805 * ZipArchiveEntry from it and adds it to the global maps. 806 * 807 * @param noUTF8Flag map used to collect entries that don't have 808 * their UTF-8 flag set and whose name will be set by data read 809 * from the local file header later. The current entry may be 810 * added to this map. 811 */ 812 private void 813 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 814 throws IOException { 815 cfhBbuf.rewind(); 816 IOUtils.readFully(archive, cfhBbuf); 817 int off = 0; 818 final Entry ze = new Entry(); 819 820 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 821 off += SHORT; 822 ze.setVersionMadeBy(versionMadeBy); 823 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 824 825 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 826 off += SHORT; // version required 827 828 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 829 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 830 final ZipEncoding entryEncoding = 831 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 832 if (hasUTF8Flag) { 833 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 834 } 835 ze.setGeneralPurposeBit(gpFlag); 836 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 837 838 off += SHORT; 839 840 //noinspection MagicConstant 841 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 842 off += SHORT; 843 844 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 845 ze.setTime(time); 846 off += WORD; 847 848 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 849 off += WORD; 850 851 long size = ZipLong.getValue(cfhBuf, off); 852 if (size < 0) { 853 throw new IOException("broken archive, entry with negative compressed size"); 854 } 855 ze.setCompressedSize(size); 856 off += WORD; 857 858 size = ZipLong.getValue(cfhBuf, off); 859 if (size < 0) { 860 throw new IOException("broken archive, entry with negative size"); 861 } 862 ze.setSize(size); 863 off += WORD; 864 865 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 866 off += SHORT; 867 if (fileNameLen < 0) { 868 throw new IOException("broken archive, entry with negative fileNameLen"); 869 } 870 871 final int extraLen = ZipShort.getValue(cfhBuf, off); 872 off += SHORT; 873 if (extraLen < 0) { 874 throw new IOException("broken archive, entry with negative extraLen"); 875 } 876 877 final int commentLen = ZipShort.getValue(cfhBuf, off); 878 off += SHORT; 879 if (commentLen < 0) { 880 throw new IOException("broken archive, entry with negative commentLen"); 881 } 882 883 ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off)); 884 off += SHORT; 885 886 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 887 off += SHORT; 888 889 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 890 off += WORD; 891 892 final byte[] fileName = IOUtils.readRange(archive, fileNameLen); 893 if (fileName.length < fileNameLen) { 894 throw new EOFException(); 895 } 896 ze.setName(entryEncoding.decode(fileName), fileName); 897 898 // LFH offset, 899 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off)); 900 // data offset will be filled later 901 entries.add(ze); 902 903 final byte[] cdExtraData = IOUtils.readRange(archive, extraLen); 904 if (cdExtraData.length < extraLen) { 905 throw new EOFException(); 906 } 907 try { 908 ze.setCentralDirectoryExtra(cdExtraData); 909 } catch (RuntimeException ex) { 910 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 911 z.initCause(ex); 912 throw z; 913 } 914 915 setSizesAndOffsetFromZip64Extra(ze); 916 sanityCheckLFHOffset(ze); 917 918 final byte[] comment = IOUtils.readRange(archive, commentLen); 919 if (comment.length < commentLen) { 920 throw new EOFException(); 921 } 922 ze.setComment(entryEncoding.decode(comment)); 923 924 if (!hasUTF8Flag && useUnicodeExtraFields) { 925 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 926 } 927 928 ze.setStreamContiguous(true); 929 } 930 931 private void sanityCheckLFHOffset(final ZipArchiveEntry ze) throws IOException { 932 if (ze.getDiskNumberStart() < 0) { 933 throw new IOException("broken archive, entry with negative disk number"); 934 } 935 if (ze.getLocalHeaderOffset() < 0) { 936 throw new IOException("broken archive, entry with negative local file header offset"); 937 } 938 if (isSplitZipArchive) { 939 if (ze.getDiskNumberStart() > centralDirectoryStartDiskNumber) { 940 throw new IOException("local file header for " + ze.getName() + " starts on a later disk than central directory"); 941 } 942 if (ze.getDiskNumberStart() == centralDirectoryStartDiskNumber 943 && ze.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) { 944 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 945 } 946 } else if (ze.getLocalHeaderOffset() > centralDirectoryStartOffset) { 947 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 948 } 949 } 950 951 /** 952 * If the entry holds a Zip64 extended information extra field, 953 * read sizes from there if the entry's sizes are set to 954 * 0xFFFFFFFFF, do the same for the offset of the local file 955 * header. 956 * 957 * <p>Ensures the Zip64 extra either knows both compressed and 958 * uncompressed size or neither of both as the internal logic in 959 * ExtraFieldUtils forces the field to create local header data 960 * even if they are never used - and here a field with only one 961 * size would be invalid.</p> 962 */ 963 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze) 964 throws IOException { 965 final ZipExtraField extra = 966 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 967 if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) { 968 throw new ZipException("archive contains unparseable zip64 extra field"); 969 } 970 final Zip64ExtendedInformationExtraField z64 = 971 (Zip64ExtendedInformationExtraField) extra; 972 if (z64 != null) { 973 final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 974 final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 975 final boolean hasRelativeHeaderOffset = 976 ze.getLocalHeaderOffset() == ZIP64_MAGIC; 977 final boolean hasDiskStart = ze.getDiskNumberStart() == ZIP64_MAGIC_SHORT; 978 z64.reparseCentralDirectoryData(hasUncompressedSize, 979 hasCompressedSize, 980 hasRelativeHeaderOffset, 981 hasDiskStart); 982 983 if (hasUncompressedSize) { 984 final long size = z64.getSize().getLongValue(); 985 if (size < 0) { 986 throw new IOException("broken archive, entry with negative size"); 987 } 988 ze.setSize(size); 989 } else if (hasCompressedSize) { 990 z64.setSize(new ZipEightByteInteger(ze.getSize())); 991 } 992 993 if (hasCompressedSize) { 994 final long size = z64.getCompressedSize().getLongValue(); 995 if (size < 0) { 996 throw new IOException("broken archive, entry with negative compressed size"); 997 } 998 ze.setCompressedSize(size); 999 } else if (hasUncompressedSize) { 1000 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 1001 } 1002 1003 if (hasRelativeHeaderOffset) { 1004 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 1005 } 1006 1007 if (hasDiskStart) { 1008 ze.setDiskNumberStart(z64.getDiskStartNumber().getValue()); 1009 } 1010 } 1011 } 1012 1013 /** 1014 * Length of the "End of central directory record" - which is 1015 * supposed to be the last structure of the archive - without file 1016 * comment. 1017 */ 1018 static final int MIN_EOCD_SIZE = 1019 /* end of central dir signature */ WORD 1020 /* number of this disk */ + SHORT 1021 /* number of the disk with the */ 1022 /* start of the central directory */ + SHORT 1023 /* total number of entries in */ 1024 /* the central dir on this disk */ + SHORT 1025 /* total number of entries in */ 1026 /* the central dir */ + SHORT 1027 /* size of the central directory */ + WORD 1028 /* offset of start of central */ 1029 /* directory with respect to */ 1030 /* the starting disk number */ + WORD 1031 /* zipfile comment length */ + SHORT; 1032 1033 /** 1034 * Maximum length of the "End of central directory record" with a 1035 * file comment. 1036 */ 1037 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 1038 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 1039 1040 /** 1041 * Offset of the field that holds the location of the first 1042 * central directory entry inside the "End of central directory 1043 * record" relative to the start of the "End of central directory 1044 * record". 1045 */ 1046 private static final int CFD_LOCATOR_OFFSET = 1047 /* end of central dir signature */ WORD 1048 /* number of this disk */ + SHORT 1049 /* number of the disk with the */ 1050 /* start of the central directory */ + SHORT 1051 /* total number of entries in */ 1052 /* the central dir on this disk */ + SHORT 1053 /* total number of entries in */ 1054 /* the central dir */ + SHORT 1055 /* size of the central directory */ + WORD; 1056 1057 /** 1058 * Offset of the field that holds the disk number of the first 1059 * central directory entry inside the "End of central directory 1060 * record" relative to the start of the "End of central directory 1061 * record". 1062 */ 1063 private static final int CFD_DISK_OFFSET = 1064 /* end of central dir signature */ WORD 1065 /* number of this disk */ + SHORT; 1066 1067 /** 1068 * Offset of the field that holds the location of the first 1069 * central directory entry inside the "End of central directory 1070 * record" relative to the "number of the disk with the start 1071 * of the central directory". 1072 */ 1073 private static final int CFD_LOCATOR_RELATIVE_OFFSET = 1074 /* total number of entries in */ 1075 /* the central dir on this disk */ + SHORT 1076 /* total number of entries in */ 1077 /* the central dir */ + SHORT 1078 /* size of the central directory */ + WORD; 1079 1080 /** 1081 * Length of the "Zip64 end of central directory locator" - which 1082 * should be right in front of the "end of central directory 1083 * record" if one is present at all. 1084 */ 1085 private static final int ZIP64_EOCDL_LENGTH = 1086 /* zip64 end of central dir locator sig */ WORD 1087 /* number of the disk with the start */ 1088 /* start of the zip64 end of */ 1089 /* central directory */ + WORD 1090 /* relative offset of the zip64 */ 1091 /* end of central directory record */ + DWORD 1092 /* total number of disks */ + WORD; 1093 1094 /** 1095 * Offset of the field that holds the location of the "Zip64 end 1096 * of central directory record" inside the "Zip64 end of central 1097 * directory locator" relative to the start of the "Zip64 end of 1098 * central directory locator". 1099 */ 1100 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 1101 /* zip64 end of central dir locator sig */ WORD 1102 /* number of the disk with the start */ 1103 /* start of the zip64 end of */ 1104 /* central directory */ + WORD; 1105 1106 /** 1107 * Offset of the field that holds the location of the first 1108 * central directory entry inside the "Zip64 end of central 1109 * directory record" relative to the start of the "Zip64 end of 1110 * central directory record". 1111 */ 1112 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 1113 /* zip64 end of central dir */ 1114 /* signature */ WORD 1115 /* size of zip64 end of central */ 1116 /* directory record */ + DWORD 1117 /* version made by */ + SHORT 1118 /* version needed to extract */ + SHORT 1119 /* number of this disk */ + WORD 1120 /* number of the disk with the */ 1121 /* start of the central directory */ + WORD 1122 /* total number of entries in the */ 1123 /* central directory on this disk */ + DWORD 1124 /* total number of entries in the */ 1125 /* central directory */ + DWORD 1126 /* size of the central directory */ + DWORD; 1127 1128 /** 1129 * Offset of the field that holds the disk number of the first 1130 * central directory entry inside the "Zip64 end of central 1131 * directory record" relative to the start of the "Zip64 end of 1132 * central directory record". 1133 */ 1134 private static final int ZIP64_EOCD_CFD_DISK_OFFSET = 1135 /* zip64 end of central dir */ 1136 /* signature */ WORD 1137 /* size of zip64 end of central */ 1138 /* directory record */ + DWORD 1139 /* version made by */ + SHORT 1140 /* version needed to extract */ + SHORT 1141 /* number of this disk */ + WORD; 1142 1143 /** 1144 * Offset of the field that holds the location of the first 1145 * central directory entry inside the "Zip64 end of central 1146 * directory record" relative to the "number of the disk 1147 * with the start of the central directory". 1148 */ 1149 private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET = 1150 /* total number of entries in the */ 1151 /* central directory on this disk */ DWORD 1152 /* total number of entries in the */ 1153 /* central directory */ + DWORD 1154 /* size of the central directory */ + DWORD; 1155 1156 /** 1157 * Searches for either the "Zip64 end of central directory 1158 * locator" or the "End of central dir record", parses 1159 * it and positions the stream at the first central directory 1160 * record. 1161 */ 1162 private void positionAtCentralDirectory() 1163 throws IOException { 1164 positionAtEndOfCentralDirectoryRecord(); 1165 boolean found = false; 1166 final boolean searchedForZip64EOCD = 1167 archive.position() > ZIP64_EOCDL_LENGTH; 1168 if (searchedForZip64EOCD) { 1169 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 1170 wordBbuf.rewind(); 1171 IOUtils.readFully(archive, wordBbuf); 1172 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 1173 wordBuf); 1174 } 1175 if (!found) { 1176 // not a ZIP64 archive 1177 if (searchedForZip64EOCD) { 1178 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 1179 } 1180 positionAtCentralDirectory32(); 1181 } else { 1182 positionAtCentralDirectory64(); 1183 } 1184 } 1185 1186 /** 1187 * Parses the "Zip64 end of central directory locator", 1188 * finds the "Zip64 end of central directory record" using the 1189 * parsed information, parses that and positions the stream at the 1190 * first central directory record. 1191 * 1192 * Expects stream to be positioned right behind the "Zip64 1193 * end of central directory locator"'s signature. 1194 */ 1195 private void positionAtCentralDirectory64() 1196 throws IOException { 1197 if (isSplitZipArchive) { 1198 wordBbuf.rewind(); 1199 IOUtils.readFully(archive, wordBbuf); 1200 final long diskNumberOfEOCD = ZipLong.getValue(wordBuf); 1201 1202 dwordBbuf.rewind(); 1203 IOUtils.readFully(archive, dwordBbuf); 1204 final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf); 1205 ((ZipSplitReadOnlySeekableByteChannel) archive) 1206 .position(diskNumberOfEOCD, relativeOffsetOfEOCD); 1207 } else { 1208 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 1209 - WORD /* signature has already been read */); 1210 dwordBbuf.rewind(); 1211 IOUtils.readFully(archive, dwordBbuf); 1212 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 1213 } 1214 1215 wordBbuf.rewind(); 1216 IOUtils.readFully(archive, wordBbuf); 1217 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 1218 throw new ZipException("Archive's ZIP64 end of central " 1219 + "directory locator is corrupt."); 1220 } 1221 1222 if (isSplitZipArchive) { 1223 skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET 1224 - WORD /* signature has already been read */); 1225 wordBbuf.rewind(); 1226 IOUtils.readFully(archive, wordBbuf); 1227 centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf); 1228 1229 skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET); 1230 1231 dwordBbuf.rewind(); 1232 IOUtils.readFully(archive, dwordBbuf); 1233 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1234 ((ZipSplitReadOnlySeekableByteChannel) archive) 1235 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1236 } else { 1237 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 1238 - WORD /* signature has already been read */); 1239 dwordBbuf.rewind(); 1240 IOUtils.readFully(archive, dwordBbuf); 1241 centralDirectoryStartDiskNumber = 0; 1242 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1243 archive.position(centralDirectoryStartRelativeOffset); 1244 } 1245 } 1246 1247 /** 1248 * Parses the "End of central dir record" and positions 1249 * the stream at the first central directory record. 1250 * 1251 * Expects stream to be positioned at the beginning of the 1252 * "End of central dir record". 1253 */ 1254 private void positionAtCentralDirectory32() 1255 throws IOException { 1256 if (isSplitZipArchive) { 1257 skipBytes(CFD_DISK_OFFSET); 1258 shortBbuf.rewind(); 1259 IOUtils.readFully(archive, shortBbuf); 1260 centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf); 1261 1262 skipBytes(CFD_LOCATOR_RELATIVE_OFFSET); 1263 1264 wordBbuf.rewind(); 1265 IOUtils.readFully(archive, wordBbuf); 1266 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1267 ((ZipSplitReadOnlySeekableByteChannel) archive) 1268 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1269 } else { 1270 skipBytes(CFD_LOCATOR_OFFSET); 1271 wordBbuf.rewind(); 1272 IOUtils.readFully(archive, wordBbuf); 1273 centralDirectoryStartDiskNumber = 0; 1274 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1275 archive.position(centralDirectoryStartRelativeOffset); 1276 } 1277 } 1278 1279 /** 1280 * Searches for the and positions the stream at the start of the 1281 * "End of central dir record". 1282 */ 1283 private void positionAtEndOfCentralDirectoryRecord() 1284 throws IOException { 1285 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 1286 ZipArchiveOutputStream.EOCD_SIG); 1287 if (!found) { 1288 throw new ZipException("Archive is not a ZIP archive"); 1289 } 1290 } 1291 1292 /** 1293 * Searches the archive backwards from minDistance to maxDistance 1294 * for the given signature, positions the RandomaccessFile right 1295 * at the signature if it has been found. 1296 */ 1297 private boolean tryToLocateSignature(final long minDistanceFromEnd, 1298 final long maxDistanceFromEnd, 1299 final byte[] sig) throws IOException { 1300 boolean found = false; 1301 long off = archive.size() - minDistanceFromEnd; 1302 final long stopSearching = 1303 Math.max(0L, archive.size() - maxDistanceFromEnd); 1304 if (off >= 0) { 1305 for (; off >= stopSearching; off--) { 1306 archive.position(off); 1307 try { 1308 wordBbuf.rewind(); 1309 IOUtils.readFully(archive, wordBbuf); 1310 wordBbuf.flip(); 1311 } catch (final EOFException ex) { // NOSONAR 1312 break; 1313 } 1314 int curr = wordBbuf.get(); 1315 if (curr == sig[POS_0]) { 1316 curr = wordBbuf.get(); 1317 if (curr == sig[POS_1]) { 1318 curr = wordBbuf.get(); 1319 if (curr == sig[POS_2]) { 1320 curr = wordBbuf.get(); 1321 if (curr == sig[POS_3]) { 1322 found = true; 1323 break; 1324 } 1325 } 1326 } 1327 } 1328 } 1329 } 1330 if (found) { 1331 archive.position(off); 1332 } 1333 return found; 1334 } 1335 1336 /** 1337 * Skips the given number of bytes or throws an EOFException if 1338 * skipping failed. 1339 */ 1340 private void skipBytes(final int count) throws IOException { 1341 final long currentPosition = archive.position(); 1342 final long newPosition = currentPosition + count; 1343 if (newPosition > archive.size()) { 1344 throw new EOFException(); 1345 } 1346 archive.position(newPosition); 1347 } 1348 1349 /** 1350 * Number of bytes in local file header up to the "length of 1351 * file name" entry. 1352 */ 1353 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 1354 /* local file header signature */ WORD 1355 /* version needed to extract */ + SHORT 1356 /* general purpose bit flag */ + SHORT 1357 /* compression method */ + SHORT 1358 /* last mod file time */ + SHORT 1359 /* last mod file date */ + SHORT 1360 /* crc-32 */ + WORD 1361 /* compressed size */ + WORD 1362 /* uncompressed size */ + (long) WORD; 1363 1364 /** 1365 * Walks through all recorded entries and adds the data available 1366 * from the local file header. 1367 * 1368 * <p>Also records the offsets for the data to read from the 1369 * entries.</p> 1370 */ 1371 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1372 entriesWithoutUTF8Flag) 1373 throws IOException { 1374 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1375 // entries is filled in populateFromCentralDirectory and 1376 // never modified 1377 final Entry ze = (Entry) zipArchiveEntry; 1378 final int[] lens = setDataOffset(ze); 1379 final int fileNameLen = lens[0]; 1380 final int extraFieldLen = lens[1]; 1381 skipBytes(fileNameLen); 1382 final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen); 1383 if (localExtraData.length < extraFieldLen) { 1384 throw new EOFException(); 1385 } 1386 try { 1387 ze.setExtra(localExtraData); 1388 } catch (RuntimeException ex) { 1389 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1390 z.initCause(ex); 1391 throw z; 1392 } 1393 1394 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1395 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1396 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1397 nc.comment); 1398 } 1399 } 1400 } 1401 1402 private void fillNameMap() { 1403 entries.forEach(ze -> { 1404 // entries is filled in populateFromCentralDirectory and 1405 // never modified 1406 final String name = ze.getName(); 1407 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>()); 1408 entriesOfThatName.addLast(ze); 1409 }); 1410 } 1411 1412 private int[] setDataOffset(final ZipArchiveEntry ze) throws IOException { 1413 long offset = ze.getLocalHeaderOffset(); 1414 if (isSplitZipArchive) { 1415 ((ZipSplitReadOnlySeekableByteChannel) archive) 1416 .position(ze.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1417 // the offset should be updated to the global offset 1418 offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH; 1419 } else { 1420 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1421 } 1422 wordBbuf.rewind(); 1423 IOUtils.readFully(archive, wordBbuf); 1424 wordBbuf.flip(); 1425 wordBbuf.get(shortBuf); 1426 final int fileNameLen = ZipShort.getValue(shortBuf); 1427 wordBbuf.get(shortBuf); 1428 final int extraFieldLen = ZipShort.getValue(shortBuf); 1429 ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1430 + SHORT + SHORT + fileNameLen + extraFieldLen); 1431 if (ze.getDataOffset() + ze.getCompressedSize() > centralDirectoryStartOffset) { 1432 throw new IOException("data for " + ze.getName() + " overlaps with central directory."); 1433 } 1434 return new int[] { fileNameLen, extraFieldLen }; 1435 } 1436 1437 private long getDataOffset(final ZipArchiveEntry ze) throws IOException { 1438 final long s = ze.getDataOffset(); 1439 if (s == EntryStreamOffsets.OFFSET_UNKNOWN) { 1440 setDataOffset(ze); 1441 return ze.getDataOffset(); 1442 } 1443 return s; 1444 } 1445 1446 /** 1447 * Checks whether the archive starts with a LFH. If it doesn't, 1448 * it may be an empty archive. 1449 */ 1450 private boolean startsWithLocalFileHeader() throws IOException { 1451 archive.position(0); 1452 wordBbuf.rewind(); 1453 IOUtils.readFully(archive, wordBbuf); 1454 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1455 } 1456 1457 /** 1458 * Creates new BoundedInputStream, according to implementation of 1459 * underlying archive channel. 1460 */ 1461 private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) { 1462 if (start < 0 || remaining < 0 || start + remaining < start) { 1463 throw new IllegalArgumentException("Corrupted archive, stream boundaries" 1464 + " are out of range"); 1465 } 1466 return archive instanceof FileChannel ? 1467 new BoundedFileChannelInputStream(start, remaining) : 1468 new BoundedSeekableByteChannelInputStream(start, remaining, archive); 1469 } 1470 1471 /** 1472 * Lock-free implementation of BoundedInputStream. The 1473 * implementation uses positioned reads on the underlying archive 1474 * file channel and therefore performs significantly faster in 1475 * concurrent environment. 1476 */ 1477 private class BoundedFileChannelInputStream extends BoundedArchiveInputStream { 1478 private final FileChannel archive; 1479 1480 BoundedFileChannelInputStream(final long start, final long remaining) { 1481 super(start, remaining); 1482 archive = (FileChannel) ZipFile.this.archive; 1483 } 1484 1485 @Override 1486 protected int read(final long pos, final ByteBuffer buf) throws IOException { 1487 final int read = archive.read(buf, pos); 1488 buf.flip(); 1489 return read; 1490 } 1491 } 1492 1493 private static final class NameAndComment { 1494 private final byte[] name; 1495 private final byte[] comment; 1496 private NameAndComment(final byte[] name, final byte[] comment) { 1497 this.name = name; 1498 this.comment = comment; 1499 } 1500 } 1501 1502 /** 1503 * Compares two ZipArchiveEntries based on their offset within the archive. 1504 * 1505 * <p>Won't return any meaningful results if one of the entries 1506 * isn't part of the archive at all.</p> 1507 * 1508 * @since 1.1 1509 */ 1510 private final Comparator<ZipArchiveEntry> offsetComparator = 1511 Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart) 1512 .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset); 1513 1514 /** 1515 * Extends ZipArchiveEntry to store the offset within the archive. 1516 */ 1517 private static class Entry extends ZipArchiveEntry { 1518 1519 Entry() { 1520 } 1521 1522 @Override 1523 public int hashCode() { 1524 return 3 * super.hashCode() 1525 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); 1526 } 1527 1528 @Override 1529 public boolean equals(final Object other) { 1530 if (super.equals(other)) { 1531 // super.equals would return false if other were not an Entry 1532 final Entry otherEntry = (Entry) other; 1533 return getLocalHeaderOffset() 1534 == otherEntry.getLocalHeaderOffset() 1535 && super.getDataOffset() 1536 == otherEntry.getDataOffset() 1537 && super.getDiskNumberStart() 1538 == otherEntry.getDiskNumberStart(); 1539 } 1540 return false; 1541 } 1542 } 1543 1544 private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { 1545 StoredStatisticsStream(final InputStream in) { 1546 super(in); 1547 } 1548 1549 @Override 1550 public long getCompressedCount() { 1551 return super.getBytesRead(); 1552 } 1553 1554 @Override 1555 public long getUncompressedCount() { 1556 return getCompressedCount(); 1557 } 1558 } 1559}