001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.ByteArrayInputStream; 022import java.io.Closeable; 023import java.io.EOFException; 024import java.io.File; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.SequenceInputStream; 028import java.nio.ByteBuffer; 029import java.nio.channels.FileChannel; 030import java.nio.channels.SeekableByteChannel; 031import java.nio.file.Files; 032import java.nio.file.Path; 033import java.nio.file.StandardOpenOption; 034import java.util.Arrays; 035import java.util.Collections; 036import java.util.Comparator; 037import java.util.EnumSet; 038import java.util.Enumeration; 039import java.util.HashMap; 040import java.util.LinkedList; 041import java.util.List; 042import java.util.Map; 043import java.util.zip.Inflater; 044import java.util.zip.ZipException; 045 046import org.apache.commons.compress.archivers.EntryStreamOffsets; 047import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 048import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 049import org.apache.commons.compress.utils.BoundedArchiveInputStream; 050import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 051import org.apache.commons.compress.utils.CountingInputStream; 052import org.apache.commons.compress.utils.IOUtils; 053import org.apache.commons.compress.utils.InputStreamStatistics; 054 055/** 056 * Replacement for {@code java.util.ZipFile}. 057 * 058 * <p>This class adds support for file name encodings other than UTF-8 059 * (which is required to work on ZIP files created by native ZIP tools 060 * and is able to skip a preamble like the one found in self 061 * extracting archives. Furthermore it returns instances of 062 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} 063 * instead of {@code java.util.zip.ZipEntry}.</p> 064 * 065 * <p>It doesn't extend {@code java.util.zip.ZipFile} as it would 066 * have to reimplement all methods anyway. Like 067 * {@code java.util.ZipFile}, it uses SeekableByteChannel under the 068 * covers and supports compressed and uncompressed entries. As of 069 * Apache Commons Compress 1.3 it also transparently supports Zip64 070 * extensions and thus individual entries and archives larger than 4 071 * GB or with more than 65536 entries.</p> 072 * 073 * <p>The method signatures mimic the ones of 074 * {@code java.util.zip.ZipFile}, with a couple of exceptions: 075 * 076 * <ul> 077 * <li>There is no getName method.</li> 078 * <li>entries has been renamed to getEntries.</li> 079 * <li>getEntries and getEntry return 080 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} 081 * instances.</li> 082 * <li>close is allowed to throw IOException.</li> 083 * </ul> 084 */ 085public class ZipFile implements Closeable { 086 /** 087 * Lock-free implementation of BoundedInputStream. The 088 * implementation uses positioned reads on the underlying archive 089 * file channel and therefore performs significantly faster in 090 * concurrent environment. 091 */ 092 private class BoundedFileChannelInputStream extends BoundedArchiveInputStream { 093 private final FileChannel archive; 094 095 BoundedFileChannelInputStream(final long start, final long remaining) { 096 super(start, remaining); 097 archive = (FileChannel) ZipFile.this.archive; 098 } 099 100 @Override 101 protected int read(final long pos, final ByteBuffer buf) throws IOException { 102 final int read = archive.read(buf, pos); 103 buf.flip(); 104 return read; 105 } 106 } 107 /** 108 * Extends ZipArchiveEntry to store the offset within the archive. 109 */ 110 private static class Entry extends ZipArchiveEntry { 111 112 Entry() { 113 } 114 115 @Override 116 public boolean equals(final Object other) { 117 if (super.equals(other)) { 118 // super.equals would return false if other were not an Entry 119 final Entry otherEntry = (Entry) other; 120 return getLocalHeaderOffset() 121 == otherEntry.getLocalHeaderOffset() 122 && super.getDataOffset() 123 == otherEntry.getDataOffset() 124 && super.getDiskNumberStart() 125 == otherEntry.getDiskNumberStart(); 126 } 127 return false; 128 } 129 130 @Override 131 public int hashCode() { 132 return 3 * super.hashCode() 133 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); 134 } 135 } 136 private static final class NameAndComment { 137 private final byte[] name; 138 private final byte[] comment; 139 private NameAndComment(final byte[] name, final byte[] comment) { 140 this.name = name; 141 this.comment = comment; 142 } 143 } 144 private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { 145 StoredStatisticsStream(final InputStream in) { 146 super(in); 147 } 148 149 @Override 150 public long getCompressedCount() { 151 return super.getBytesRead(); 152 } 153 154 @Override 155 public long getUncompressedCount() { 156 return getCompressedCount(); 157 } 158 } 159 private static final int HASH_SIZE = 509; 160 static final int NIBLET_MASK = 0x0f; 161 static final int BYTE_SHIFT = 8; 162 private static final int POS_0 = 0; 163 164 private static final int POS_1 = 1; 165 166 private static final int POS_2 = 2; 167 168 private static final int POS_3 = 3; 169 170 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 171 172 /** 173 * Length of a "central directory" entry structure without file 174 * name, extra fields or comment. 175 */ 176 private static final int CFH_LEN = 177 /* version made by */ ZipConstants.SHORT 178 /* version needed to extract */ + ZipConstants.SHORT 179 /* general purpose bit flag */ + ZipConstants.SHORT 180 /* compression method */ + ZipConstants.SHORT 181 /* last mod file time */ + ZipConstants.SHORT 182 /* last mod file date */ + ZipConstants.SHORT 183 /* crc-32 */ + ZipConstants.WORD 184 /* compressed size */ + ZipConstants.WORD 185 /* uncompressed size */ + ZipConstants.WORD 186 /* file name length */ + ZipConstants. SHORT 187 /* extra field length */ + ZipConstants.SHORT 188 /* file comment length */ + ZipConstants.SHORT 189 /* disk number start */ + ZipConstants.SHORT 190 /* internal file attributes */ + ZipConstants.SHORT 191 /* external file attributes */ + ZipConstants.WORD 192 /* relative offset of local header */ + ZipConstants.WORD; 193 194 private static final long CFH_SIG = 195 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 196 197 /** 198 * Length of the "End of central directory record" - which is 199 * supposed to be the last structure of the archive - without file 200 * comment. 201 */ 202 static final int MIN_EOCD_SIZE = 203 /* end of central dir signature */ ZipConstants.WORD 204 /* number of this disk */ + ZipConstants.SHORT 205 /* number of the disk with the */ 206 /* start of the central directory */ + ZipConstants.SHORT 207 /* total number of entries in */ 208 /* the central dir on this disk */ + ZipConstants.SHORT 209 /* total number of entries in */ 210 /* the central dir */ + ZipConstants.SHORT 211 /* size of the central directory */ + ZipConstants.WORD 212 /* offset of start of central */ 213 /* directory with respect to */ 214 /* the starting disk number */ + ZipConstants.WORD 215 /* ZIP file comment length */ + ZipConstants.SHORT; 216 217 /** 218 * Maximum length of the "End of central directory record" with a 219 * file comment. 220 */ 221 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 222 /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT; 223 224 /** 225 * Offset of the field that holds the location of the length of 226 * the central directory inside the "End of central directory 227 * record" relative to the start of the "End of central directory 228 * record". 229 */ 230 private static final int CFD_LENGTH_OFFSET = 231 /* end of central dir signature */ ZipConstants.WORD 232 /* number of this disk */ + ZipConstants.SHORT 233 /* number of the disk with the */ 234 /* start of the central directory */ + ZipConstants.SHORT 235 /* total number of entries in */ 236 /* the central dir on this disk */ + ZipConstants.SHORT 237 /* total number of entries in */ 238 /* the central dir */ + ZipConstants.SHORT; 239 240 /** 241 * Offset of the field that holds the disk number of the first 242 * central directory entry inside the "End of central directory 243 * record" relative to the start of the "End of central directory 244 * record". 245 */ 246 private static final int CFD_DISK_OFFSET = 247 /* end of central dir signature */ ZipConstants.WORD 248 /* number of this disk */ + ZipConstants.SHORT; 249 /** 250 * Offset of the field that holds the location of the first 251 * central directory entry inside the "End of central directory 252 * record" relative to the "number of the disk with the start 253 * of the central directory". 254 */ 255 private static final int CFD_LOCATOR_RELATIVE_OFFSET = 256 /* total number of entries in */ 257 /* the central dir on this disk */ + ZipConstants.SHORT 258 /* total number of entries in */ 259 /* the central dir */ + ZipConstants.SHORT 260 /* size of the central directory */ + ZipConstants.WORD; 261 /** 262 * Length of the "Zip64 end of central directory locator" - which 263 * should be right in front of the "end of central directory 264 * record" if one is present at all. 265 */ 266 private static final int ZIP64_EOCDL_LENGTH = 267 /* zip64 end of central dir locator sig */ ZipConstants.WORD 268 /* number of the disk with the start */ 269 /* start of the zip64 end of */ 270 /* central directory */ + ZipConstants.WORD 271 /* relative offset of the zip64 */ 272 /* end of central directory record */ + ZipConstants.DWORD 273 /* total number of disks */ + ZipConstants.WORD; 274 /** 275 * Offset of the field that holds the location of the "Zip64 end 276 * of central directory record" inside the "Zip64 end of central 277 * directory locator" relative to the start of the "Zip64 end of 278 * central directory locator". 279 */ 280 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 281 /* zip64 end of central dir locator sig */ ZipConstants.WORD 282 /* number of the disk with the start */ 283 /* start of the zip64 end of */ 284 /* central directory */ + ZipConstants.WORD; 285 /** 286 * Offset of the field that holds the location of the first 287 * central directory entry inside the "Zip64 end of central 288 * directory record" relative to the start of the "Zip64 end of 289 * central directory record". 290 */ 291 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 292 /* zip64 end of central dir */ 293 /* signature */ ZipConstants.WORD 294 /* size of zip64 end of central */ 295 /* directory record */ + ZipConstants.DWORD 296 /* version made by */ + ZipConstants.SHORT 297 /* version needed to extract */ + ZipConstants.SHORT 298 /* number of this disk */ + ZipConstants.WORD 299 /* number of the disk with the */ 300 /* start of the central directory */ + ZipConstants.WORD 301 /* total number of entries in the */ 302 /* central directory on this disk */ + ZipConstants.DWORD 303 /* total number of entries in the */ 304 /* central directory */ + ZipConstants.DWORD 305 /* size of the central directory */ + ZipConstants.DWORD; 306 /** 307 * Offset of the field that holds the disk number of the first 308 * central directory entry inside the "Zip64 end of central 309 * directory record" relative to the start of the "Zip64 end of 310 * central directory record". 311 */ 312 private static final int ZIP64_EOCD_CFD_DISK_OFFSET = 313 /* zip64 end of central dir */ 314 /* signature */ ZipConstants.WORD 315 /* size of zip64 end of central */ 316 /* directory record */ + ZipConstants.DWORD 317 /* version made by */ + ZipConstants.SHORT 318 /* version needed to extract */ + ZipConstants.SHORT 319 /* number of this disk */ + ZipConstants.WORD; 320 /** 321 * Offset of the field that holds the location of the first 322 * central directory entry inside the "Zip64 end of central 323 * directory record" relative to the "number of the disk 324 * with the start of the central directory". 325 */ 326 private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET = 327 /* total number of entries in the */ 328 /* central directory on this disk */ ZipConstants.DWORD 329 /* total number of entries in the */ 330 /* central directory */ + ZipConstants.DWORD 331 /* size of the central directory */ + ZipConstants.DWORD; 332 /** 333 * Number of bytes in local file header up to the "length of 334 * file name" entry. 335 */ 336 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 337 /* local file header signature */ ZipConstants.WORD 338 /* version needed to extract */ + ZipConstants.SHORT 339 /* general purpose bit flag */ + ZipConstants.SHORT 340 /* compression method */ + ZipConstants.SHORT 341 /* last mod file time */ + ZipConstants.SHORT 342 /* last mod file date */ + ZipConstants.SHORT 343 /* crc-32 */ + ZipConstants.WORD 344 /* compressed size */ + ZipConstants.WORD 345 /* uncompressed size */ + (long) ZipConstants.WORD; 346 347 /** 348 * Compares two ZipArchiveEntries based on their offset within the archive. 349 * 350 * <p>Won't return any meaningful results if one of the entries 351 * isn't part of the archive at all.</p> 352 * 353 * @since 1.1 354 */ 355 private static final Comparator<ZipArchiveEntry> offsetComparator = 356 Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart) 357 .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset); 358 359 /** 360 * Closes a ZIP file quietly; throwing no IOException, dooes nothing 361 * on null input. 362 * @param zipFile file to close, can be null 363 */ 364 public static void closeQuietly(final ZipFile zipFile) { 365 IOUtils.closeQuietly(zipFile); 366 } 367 368 /** 369 * List of entries in the order they appear inside the central 370 * directory. 371 */ 372 private final List<ZipArchiveEntry> entries = new LinkedList<>(); 373 374 /** 375 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 376 */ 377 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE); 378 379 /** 380 * The encoding to use for file names and the file comment. 381 * 382 * <p>For a list of possible values see <a 383 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 384 * Defaults to UTF-8.</p> 385 */ 386 private final String encoding; 387 388 /** 389 * The ZIP encoding to use for file names and the file comment. 390 */ 391 private final ZipEncoding zipEncoding; 392 393 /** 394 * File name of actual source. 395 */ 396 private final String archiveName; 397 398 /** 399 * The actual data source. 400 */ 401 private final SeekableByteChannel archive; 402 403 /** 404 * Whether to look for and use Unicode extra fields. 405 */ 406 private final boolean useUnicodeExtraFields; 407 408 /** 409 * Whether the file is closed. 410 */ 411 private volatile boolean closed = true; 412 413 /** 414 * Whether the ZIP archive is a split ZIP archive 415 */ 416 private final boolean isSplitZipArchive; 417 418 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 419 private final byte[] dwordBuf = new byte[ZipConstants.DWORD]; 420 421 private final byte[] wordBuf = new byte[ZipConstants.WORD]; 422 423 private final byte[] cfhBuf = new byte[CFH_LEN]; 424 425 private final byte[] shortBuf = new byte[ZipConstants.SHORT]; 426 427 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 428 429 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 430 431 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 432 433 private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf); 434 435 private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset; 436 437 private long centralDirectoryStartOffset; 438 439 private long firstLocalFileHeaderOffset; 440 441 /** 442 * Opens the given file for reading, assuming "UTF8" for file names. 443 * 444 * @param f the archive. 445 * 446 * @throws IOException if an error occurs while reading the file. 447 */ 448 public ZipFile(final File f) throws IOException { 449 this(f, ZipEncodingHelper.UTF8); 450 } 451 452 /** 453 * Opens the given file for reading, assuming the specified 454 * encoding for file names and scanning for Unicode extra fields. 455 * 456 * @param f the archive. 457 * @param encoding the encoding to use for file names, use null 458 * for the platform's default encoding 459 * 460 * @throws IOException if an error occurs while reading the file. 461 */ 462 public ZipFile(final File f, final String encoding) throws IOException { 463 this(f.toPath(), encoding, true); 464 } 465 466 /** 467 * Opens the given file for reading, assuming the specified 468 * encoding for file names. 469 * 470 * @param f the archive. 471 * @param encoding the encoding to use for file names, use null 472 * for the platform's default encoding 473 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 474 * Extra Fields (if present) to set the file names. 475 * 476 * @throws IOException if an error occurs while reading the file. 477 */ 478 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 479 throws IOException { 480 this(f.toPath(), encoding, useUnicodeExtraFields, false); 481 } 482 483 /** 484 * Opens the given file for reading, assuming the specified 485 * encoding for file names. 486 * 487 * <p>By default the central directory record and all local file headers of the archive will be read immediately 488 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 489 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 490 * may contain information not present inside of the central directory which will not be available when the argument 491 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 492 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p> 493 * 494 * @param f the archive. 495 * @param encoding the encoding to use for file names, use null 496 * for the platform's default encoding 497 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 498 * Extra Fields (if present) to set the file names. 499 * @param ignoreLocalFileHeader whether to ignore information 500 * stored inside the local file header (see the notes in this method's javadoc) 501 * 502 * @throws IOException if an error occurs while reading the file. 503 * @since 1.19 504 */ 505 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields, 506 final boolean ignoreLocalFileHeader) 507 throws IOException { 508 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 509 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader); 510 } 511 512 /** 513 * Opens the given path for reading, assuming "UTF8" for file names. 514 * 515 * @param path path to the archive. 516 * @throws IOException if an error occurs while reading the file. 517 * @since 1.22 518 */ 519 public ZipFile(final Path path) throws IOException { 520 this(path, ZipEncodingHelper.UTF8); 521 } 522 523 /** 524 * Opens the given path for reading, assuming the specified 525 * encoding for file names and scanning for Unicode extra fields. 526 * 527 * @param path path to the archive. 528 * @param encoding the encoding to use for file names, use null 529 * for the platform's default encoding 530 * @throws IOException if an error occurs while reading the file. 531 * @since 1.22 532 */ 533 public ZipFile(final Path path, final String encoding) throws IOException { 534 this(path, encoding, true); 535 } 536 537 538 /** 539 * Opens the given path for reading, assuming the specified 540 * encoding for file names. 541 * 542 * @param path path to the archive. 543 * @param encoding the encoding to use for file names, use null 544 * for the platform's default encoding 545 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 546 * Extra Fields (if present) to set the file names. 547 * @throws IOException if an error occurs while reading the file. 548 * @since 1.22 549 */ 550 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) 551 throws IOException { 552 this(path, encoding, useUnicodeExtraFields, false); 553 } 554 555 /** 556 * Opens the given path for reading, assuming the specified 557 * encoding for file names. 558 * <p>By default the central directory record and all local file headers of the archive will be read immediately 559 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 560 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 561 * may contain information not present inside of the central directory which will not be available when the argument 562 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 563 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p> 564 * 565 * @param path path to the archive. 566 * @param encoding the encoding to use for file names, use null 567 * for the platform's default encoding 568 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 569 * Extra Fields (if present) to set the file names. 570 * @param ignoreLocalFileHeader whether to ignore information 571 * stored inside the local file header (see the notes in this method's javadoc) 572 * @throws IOException if an error occurs while reading the file. 573 * @since 1.22 574 */ 575 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, 576 final boolean ignoreLocalFileHeader) 577 throws IOException { 578 this(Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)), 579 path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, 580 true, ignoreLocalFileHeader); 581 } 582 583 /** 584 * Opens the given channel for reading, assuming "UTF8" for file names. 585 * 586 * <p>{@link 587 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 588 * allows you to read from an in-memory archive.</p> 589 * 590 * @param channel the archive. 591 * 592 * @throws IOException if an error occurs while reading the file. 593 * @since 1.13 594 */ 595 public ZipFile(final SeekableByteChannel channel) 596 throws IOException { 597 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 598 } 599 600 /** 601 * Opens the given channel for reading, assuming the specified 602 * encoding for file names. 603 * 604 * <p>{@link 605 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 606 * allows you to read from an in-memory archive.</p> 607 * 608 * @param channel the archive. 609 * @param encoding the encoding to use for file names, use null 610 * for the platform's default encoding 611 * 612 * @throws IOException if an error occurs while reading the file. 613 * @since 1.13 614 */ 615 public ZipFile(final SeekableByteChannel channel, final String encoding) 616 throws IOException { 617 this(channel, "unknown archive", encoding, true); 618 } 619 620 /** 621 * Opens the given channel for reading, assuming the specified 622 * encoding for file names. 623 * 624 * <p>{@link 625 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 626 * allows you to read from an in-memory archive.</p> 627 * 628 * @param channel the archive. 629 * @param archiveName name of the archive, used for error messages only. 630 * @param encoding the encoding to use for file names, use null 631 * for the platform's default encoding 632 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 633 * Extra Fields (if present) to set the file names. 634 * 635 * @throws IOException if an error occurs while reading the file. 636 * @since 1.13 637 */ 638 public ZipFile(final SeekableByteChannel channel, final String archiveName, 639 final String encoding, final boolean useUnicodeExtraFields) 640 throws IOException { 641 this(channel, archiveName, encoding, useUnicodeExtraFields, false, false); 642 } 643 644 /** 645 * Opens the given channel for reading, assuming the specified 646 * encoding for file names. 647 * 648 * <p>{@link 649 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 650 * allows you to read from an in-memory archive.</p> 651 * 652 * <p>By default the central directory record and all local file headers of the archive will be read immediately 653 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 654 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 655 * may contain information not present inside of the central directory which will not be available when the argument 656 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 657 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p> 658 * 659 * @param channel the archive. 660 * @param archiveName name of the archive, used for error messages only. 661 * @param encoding the encoding to use for file names, use null 662 * for the platform's default encoding 663 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 664 * Extra Fields (if present) to set the file names. 665 * @param ignoreLocalFileHeader whether to ignore information 666 * stored inside the local file header (see the notes in this method's javadoc) 667 * 668 * @throws IOException if an error occurs while reading the file. 669 * @since 1.19 670 */ 671 public ZipFile(final SeekableByteChannel channel, final String archiveName, 672 final String encoding, final boolean useUnicodeExtraFields, 673 final boolean ignoreLocalFileHeader) 674 throws IOException { 675 this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader); 676 } 677 678 private ZipFile(final SeekableByteChannel channel, final String archiveName, 679 final String encoding, final boolean useUnicodeExtraFields, 680 final boolean closeOnError, final boolean ignoreLocalFileHeader) 681 throws IOException { 682 isSplitZipArchive = (channel instanceof ZipSplitReadOnlySeekableByteChannel); 683 684 this.archiveName = archiveName; 685 this.encoding = encoding; 686 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 687 this.useUnicodeExtraFields = useUnicodeExtraFields; 688 archive = channel; 689 boolean success = false; 690 try { 691 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 692 populateFromCentralDirectory(); 693 if (!ignoreLocalFileHeader) { 694 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 695 } 696 fillNameMap(); 697 success = true; 698 } catch (final IOException e) { 699 throw new IOException("Error on ZipFile " + archiveName, e); 700 } finally { 701 closed = !success; 702 if (!success && closeOnError) { 703 IOUtils.closeQuietly(archive); 704 } 705 } 706 } 707 708 /** 709 * Opens the given file for reading, assuming "UTF8". 710 * 711 * @param name name of the archive. 712 * 713 * @throws IOException if an error occurs while reading the file. 714 */ 715 public ZipFile(final String name) throws IOException { 716 this(new File(name).toPath(), ZipEncodingHelper.UTF8); 717 } 718 719 /** 720 * Opens the given file for reading, assuming the specified 721 * encoding for file names, scanning unicode extra fields. 722 * 723 * @param name name of the archive. 724 * @param encoding the encoding to use for file names, use null 725 * for the platform's default encoding 726 * 727 * @throws IOException if an error occurs while reading the file. 728 */ 729 public ZipFile(final String name, final String encoding) throws IOException { 730 this(new File(name).toPath(), encoding, true); 731 } 732 733 /** 734 * Whether this class is able to read the given entry. 735 * 736 * <p>May return false if it is set up to use encryption or a 737 * compression method that hasn't been implemented yet.</p> 738 * @since 1.1 739 * @param ze the entry 740 * @return whether this class is able to read the given entry. 741 */ 742 public boolean canReadEntryData(final ZipArchiveEntry ze) { 743 return ZipUtil.canHandleEntryData(ze); 744 } 745 746 /** 747 * Closes the archive. 748 * @throws IOException if an error occurs closing the archive. 749 */ 750 @Override 751 public void close() throws IOException { 752 // this flag is only written here and read in finalize() which 753 // can never be run in parallel. 754 // no synchronization needed. 755 closed = true; 756 757 archive.close(); 758 } 759 760 /** 761 * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream. 762 * Compression and all other attributes will be as in this file. 763 * <p>This method transfers entries based on the central directory of the ZIP file.</p> 764 * 765 * @param target The zipArchiveOutputStream to write the entries to 766 * @param predicate A predicate that selects which entries to write 767 * @throws IOException on error 768 */ 769 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 770 throws IOException { 771 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 772 while (src.hasMoreElements()) { 773 final ZipArchiveEntry entry = src.nextElement(); 774 if (predicate.test( entry)) { 775 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 776 } 777 } 778 } 779 780 /** 781 * Creates new BoundedInputStream, according to implementation of 782 * underlying archive channel. 783 */ 784 private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) { 785 if (start < 0 || remaining < 0 || start + remaining < start) { 786 throw new IllegalArgumentException("Corrupted archive, stream boundaries" 787 + " are out of range"); 788 } 789 return archive instanceof FileChannel ? 790 new BoundedFileChannelInputStream(start, remaining) : 791 new BoundedSeekableByteChannelInputStream(start, remaining, archive); 792 } 793 794 private void fillNameMap() { 795 entries.forEach(ze -> { 796 // entries is filled in populateFromCentralDirectory and 797 // never modified 798 final String name = ze.getName(); 799 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>()); 800 entriesOfThatName.addLast(ze); 801 }); 802 } 803 804 /** 805 * Ensures that the close method of this ZIP file is called when 806 * there are no more references to it. 807 * @see #close() 808 */ 809 @Override 810 protected void finalize() throws Throwable { 811 try { 812 if (!closed) { 813 close(); 814 } 815 } finally { 816 super.finalize(); 817 } 818 } 819 820 /** 821 * Gets an InputStream for reading the content before the first local file header. 822 * 823 * @return null if there is no content before the first local file header. 824 * Otherwise returns a stream to read the content before the first local file header. 825 * @since 1.23 826 */ 827 public InputStream getContentBeforeFirstLocalFileHeader() { 828 return firstLocalFileHeaderOffset == 0 829 ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset); 830 } 831 832 private long getDataOffset(final ZipArchiveEntry ze) throws IOException { 833 final long s = ze.getDataOffset(); 834 if (s == EntryStreamOffsets.OFFSET_UNKNOWN) { 835 setDataOffset(ze); 836 return ze.getDataOffset(); 837 } 838 return s; 839 } 840 841 /** 842 * Gets the encoding to use for file names and the file comment. 843 * 844 * @return null if using the platform's default character encoding. 845 */ 846 public String getEncoding() { 847 return encoding; 848 } 849 850 /** 851 * Gets all entries. 852 * 853 * <p>Entries will be returned in the same order they appear 854 * within the archive's central directory.</p> 855 * 856 * @return all entries as {@link ZipArchiveEntry} instances 857 */ 858 public Enumeration<ZipArchiveEntry> getEntries() { 859 return Collections.enumeration(entries); 860 } 861 862 /** 863 * Gets all named entries in the same order they appear within 864 * the archive's central directory. 865 * 866 * @param name name of the entry. 867 * @return the Iterable<ZipArchiveEntry> corresponding to the 868 * given name 869 * @since 1.6 870 */ 871 public Iterable<ZipArchiveEntry> getEntries(final String name) { 872 final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 873 return entriesOfThatName != null ? entriesOfThatName 874 : Collections.emptyList(); 875 } 876 877 /** 878 * Gets all entries in physical order. 879 * 880 * <p>Entries will be returned in the same order their contents 881 * appear within the archive.</p> 882 * 883 * @return all entries as {@link ZipArchiveEntry} instances 884 * 885 * @since 1.1 886 */ 887 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 888 final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY); 889 Arrays.sort(allEntries, offsetComparator); 890 return Collections.enumeration(Arrays.asList(allEntries)); 891 } 892 893 /** 894 * Gets all named entries in the same order their contents 895 * appear within the archive. 896 * 897 * @param name name of the entry. 898 * @return the Iterable<ZipArchiveEntry> corresponding to the 899 * given name 900 * @since 1.6 901 */ 902 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 903 ZipArchiveEntry[] entriesOfThatName = ZipArchiveEntry.EMPTY_ARRAY; 904 final LinkedList<ZipArchiveEntry> linkedList = nameMap.get(name); 905 if (linkedList != null) { 906 entriesOfThatName = linkedList.toArray(entriesOfThatName); 907 Arrays.sort(entriesOfThatName, offsetComparator); 908 } 909 return Arrays.asList(entriesOfThatName); 910 } 911 912 /** 913 * Gets a named entry or {@code null} if no entry by 914 * that name exists. 915 * 916 * <p>If multiple entries with the same name exist the first entry 917 * in the archive's central directory by that name is 918 * returned.</p> 919 * 920 * @param name name of the entry. 921 * @return the ZipArchiveEntry corresponding to the given name - or 922 * {@code null} if not present. 923 */ 924 public ZipArchiveEntry getEntry(final String name) { 925 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 926 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 927 } 928 929 /** 930 * Gets the offset of the first local file header in the file. 931 * 932 * @return the length of the content before the first local file header 933 * @since 1.23 934 */ 935 public long getFirstLocalFileHeaderOffset() { 936 return firstLocalFileHeaderOffset; 937 } 938 939 /** 940 * Gets an InputStream for reading the contents of the given entry. 941 * 942 * @param zipEntry the entry to get the stream for. 943 * @return a stream to read the entry from. The returned stream 944 * implements {@link InputStreamStatistics}. 945 * @throws IOException if unable to create an input stream from the zipEntry. 946 */ 947 public InputStream getInputStream(final ZipArchiveEntry zipEntry) 948 throws IOException { 949 if (!(zipEntry instanceof Entry)) { 950 return null; 951 } 952 // cast validity is checked just above 953 ZipUtil.checkRequestedFeatures(zipEntry); 954 955 // doesn't get closed if the method is not supported - which 956 // should never happen because of the checkRequestedFeatures 957 // call above 958 final InputStream is = new BufferedInputStream(getRawInputStream(zipEntry)); //NOSONAR 959 switch (ZipMethod.getMethodByCode(zipEntry.getMethod())) { 960 case STORED: 961 return new StoredStatisticsStream(is); 962 case UNSHRINKING: 963 return new UnshrinkingInputStream(is); 964 case IMPLODING: 965 try { 966 return new ExplodingInputStream(zipEntry.getGeneralPurposeBit().getSlidingDictionarySize(), 967 zipEntry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 968 } catch (final IllegalArgumentException ex) { 969 throw new IOException("bad IMPLODE data", ex); 970 } 971 case DEFLATED: 972 final Inflater inflater = new Inflater(true); 973 // Inflater with nowrap=true has this odd contract for a zero padding 974 // byte following the data stream; this used to be zlib's requirement 975 // and has been fixed a long time ago, but the contract persists so 976 // we comply. 977 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 978 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), 979 inflater) { 980 @Override 981 public void close() throws IOException { 982 try { 983 super.close(); 984 } finally { 985 inflater.end(); 986 } 987 } 988 }; 989 case BZIP2: 990 return new BZip2CompressorInputStream(is); 991 case ENHANCED_DEFLATED: 992 return new Deflate64CompressorInputStream(is); 993 case AES_ENCRYPTED: 994 case EXPANDING_LEVEL_1: 995 case EXPANDING_LEVEL_2: 996 case EXPANDING_LEVEL_3: 997 case EXPANDING_LEVEL_4: 998 case JPEG: 999 case LZMA: 1000 case PKWARE_IMPLODING: 1001 case PPMD: 1002 case TOKENIZATION: 1003 case UNKNOWN: 1004 case WAVPACK: 1005 case XZ: 1006 default: 1007 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(zipEntry.getMethod()), zipEntry); 1008 } 1009 } 1010 1011 /** 1012 * Gets the raw stream of the archive entry (compressed form). 1013 * 1014 * <p>This method does not relate to how/if we understand the payload in the 1015 * stream, since we really only intend to move it on to somewhere else.</p> 1016 * 1017 * <p>Since version 1.22, this method will make an attempt to read the entry's data 1018 * stream offset, even if the {@code ignoreLocalFileHeader} parameter was {@code true} 1019 * in the constructor. An IOException can also be thrown from the body of the method 1020 * if this lookup fails for some reason.</p> 1021 * 1022 * @param ze The entry to get the stream for 1023 * @return The raw input stream containing (possibly) compressed data. 1024 * @since 1.11 1025 * @throws IOException if there is a problem reading data offset (added in version 1.22). 1026 */ 1027 public InputStream getRawInputStream(final ZipArchiveEntry ze) throws IOException { 1028 if (!(ze instanceof Entry)) { 1029 return null; 1030 } 1031 1032 final long start = getDataOffset(ze); 1033 if (start == EntryStreamOffsets.OFFSET_UNKNOWN) { 1034 return null; 1035 } 1036 return createBoundedInputStream(start, ze.getCompressedSize()); 1037 } 1038 1039 /** 1040 * Gets the entry's content as a String if isUnixSymlink() 1041 * returns true for it, otherwise returns null. 1042 * <p>This method assumes the symbolic link's file name uses the 1043 * same encoding that as been specified for this ZipFile.</p> 1044 * 1045 * @param entry ZipArchiveEntry object that represents the symbolic link 1046 * @return entry's content as a String 1047 * @throws IOException problem with content's input stream 1048 * @since 1.5 1049 */ 1050 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 1051 if (entry != null && entry.isUnixSymlink()) { 1052 try (InputStream in = getInputStream(entry)) { 1053 return zipEncoding.decode(IOUtils.toByteArray(in)); 1054 } 1055 } 1056 return null; 1057 } 1058 1059 /** 1060 * Reads the central directory of the given archive and populates 1061 * the internal tables with ZipArchiveEntry instances. 1062 * 1063 * <p>The ZipArchiveEntrys will know all data that can be obtained from 1064 * the central directory alone, but not the data that requires the 1065 * local file header or additional data to be read.</p> 1066 * 1067 * @return a map of zipentries that didn't have the language 1068 * encoding flag set when read. 1069 */ 1070 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 1071 throws IOException { 1072 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 1073 new HashMap<>(); 1074 1075 positionAtCentralDirectory(); 1076 centralDirectoryStartOffset = archive.position(); 1077 1078 wordBbuf.rewind(); 1079 IOUtils.readFully(archive, wordBbuf); 1080 long sig = ZipLong.getValue(wordBuf); 1081 1082 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 1083 throw new IOException("Central directory is empty, can't expand" 1084 + " corrupt archive."); 1085 } 1086 1087 while (sig == CFH_SIG) { 1088 readCentralDirectoryEntry(noUTF8Flag); 1089 wordBbuf.rewind(); 1090 IOUtils.readFully(archive, wordBbuf); 1091 sig = ZipLong.getValue(wordBuf); 1092 } 1093 return noUTF8Flag; 1094 } 1095 1096 /** 1097 * Searches for either the "Zip64 end of central directory 1098 * locator" or the "End of central dir record", parses 1099 * it and positions the stream at the first central directory 1100 * record. 1101 */ 1102 private void positionAtCentralDirectory() 1103 throws IOException { 1104 positionAtEndOfCentralDirectoryRecord(); 1105 boolean found = false; 1106 final boolean searchedForZip64EOCD = 1107 archive.position() > ZIP64_EOCDL_LENGTH; 1108 if (searchedForZip64EOCD) { 1109 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 1110 wordBbuf.rewind(); 1111 IOUtils.readFully(archive, wordBbuf); 1112 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 1113 wordBuf); 1114 } 1115 if (!found) { 1116 // not a ZIP64 archive 1117 if (searchedForZip64EOCD) { 1118 skipBytes(ZIP64_EOCDL_LENGTH - ZipConstants.WORD); 1119 } 1120 positionAtCentralDirectory32(); 1121 } else { 1122 positionAtCentralDirectory64(); 1123 } 1124 } 1125 1126 /** 1127 * Parses the "End of central dir record" and positions 1128 * the stream at the first central directory record. 1129 * 1130 * Expects stream to be positioned at the beginning of the 1131 * "End of central dir record". 1132 */ 1133 private void positionAtCentralDirectory32() 1134 throws IOException { 1135 final long endOfCentralDirectoryRecordOffset = archive.position(); 1136 if (isSplitZipArchive) { 1137 skipBytes(CFD_DISK_OFFSET); 1138 shortBbuf.rewind(); 1139 IOUtils.readFully(archive, shortBbuf); 1140 centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf); 1141 1142 skipBytes(CFD_LOCATOR_RELATIVE_OFFSET); 1143 1144 wordBbuf.rewind(); 1145 IOUtils.readFully(archive, wordBbuf); 1146 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1147 ((ZipSplitReadOnlySeekableByteChannel) archive) 1148 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1149 } else { 1150 skipBytes(CFD_LENGTH_OFFSET); 1151 wordBbuf.rewind(); 1152 IOUtils.readFully(archive, wordBbuf); 1153 final long centralDirectoryLength = ZipLong.getValue(wordBuf); 1154 1155 wordBbuf.rewind(); 1156 IOUtils.readFully(archive, wordBbuf); 1157 centralDirectoryStartDiskNumber = 0; 1158 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1159 1160 firstLocalFileHeaderOffset = Long.max( 1161 endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset, 1162 0L); 1163 archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset); 1164 } 1165 } 1166 1167 /** 1168 * Parses the "Zip64 end of central directory locator", 1169 * finds the "Zip64 end of central directory record" using the 1170 * parsed information, parses that and positions the stream at the 1171 * first central directory record. 1172 * 1173 * Expects stream to be positioned right behind the "Zip64 1174 * end of central directory locator"'s signature. 1175 */ 1176 private void positionAtCentralDirectory64() 1177 throws IOException { 1178 if (isSplitZipArchive) { 1179 wordBbuf.rewind(); 1180 IOUtils.readFully(archive, wordBbuf); 1181 final long diskNumberOfEOCD = ZipLong.getValue(wordBuf); 1182 1183 dwordBbuf.rewind(); 1184 IOUtils.readFully(archive, dwordBbuf); 1185 final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf); 1186 ((ZipSplitReadOnlySeekableByteChannel) archive) 1187 .position(diskNumberOfEOCD, relativeOffsetOfEOCD); 1188 } else { 1189 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 1190 - ZipConstants.WORD /* signature has already been read */); 1191 dwordBbuf.rewind(); 1192 IOUtils.readFully(archive, dwordBbuf); 1193 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 1194 } 1195 1196 wordBbuf.rewind(); 1197 IOUtils.readFully(archive, wordBbuf); 1198 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 1199 throw new ZipException("Archive's ZIP64 end of central " 1200 + "directory locator is corrupt."); 1201 } 1202 1203 if (isSplitZipArchive) { 1204 skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET 1205 - ZipConstants.WORD /* signature has already been read */); 1206 wordBbuf.rewind(); 1207 IOUtils.readFully(archive, wordBbuf); 1208 centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf); 1209 1210 skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET); 1211 1212 dwordBbuf.rewind(); 1213 IOUtils.readFully(archive, dwordBbuf); 1214 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1215 ((ZipSplitReadOnlySeekableByteChannel) archive) 1216 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1217 } else { 1218 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 1219 - ZipConstants.WORD /* signature has already been read */); 1220 dwordBbuf.rewind(); 1221 IOUtils.readFully(archive, dwordBbuf); 1222 centralDirectoryStartDiskNumber = 0; 1223 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1224 archive.position(centralDirectoryStartRelativeOffset); 1225 } 1226 } 1227 1228 /** 1229 * Searches for the and positions the stream at the start of the 1230 * "End of central dir record". 1231 */ 1232 private void positionAtEndOfCentralDirectoryRecord() 1233 throws IOException { 1234 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 1235 ZipArchiveOutputStream.EOCD_SIG); 1236 if (!found) { 1237 throw new ZipException("Archive is not a ZIP archive"); 1238 } 1239 } 1240 1241 /** 1242 * Reads an individual entry of the central directory, creats an 1243 * ZipArchiveEntry from it and adds it to the global maps. 1244 * 1245 * @param noUTF8Flag map used to collect entries that don't have 1246 * their UTF-8 flag set and whose name will be set by data read 1247 * from the local file header later. The current entry may be 1248 * added to this map. 1249 */ 1250 private void 1251 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 1252 throws IOException { 1253 cfhBbuf.rewind(); 1254 IOUtils.readFully(archive, cfhBbuf); 1255 int off = 0; 1256 final Entry ze = new Entry(); 1257 1258 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 1259 off += ZipConstants.SHORT; 1260 ze.setVersionMadeBy(versionMadeBy); 1261 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 1262 1263 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 1264 off += ZipConstants.SHORT; // version required 1265 1266 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 1267 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 1268 final ZipEncoding entryEncoding = 1269 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 1270 if (hasUTF8Flag) { 1271 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 1272 } 1273 ze.setGeneralPurposeBit(gpFlag); 1274 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 1275 1276 off += ZipConstants.SHORT; 1277 1278 //noinspection MagicConstant 1279 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 1280 off += ZipConstants.SHORT; 1281 1282 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 1283 ze.setTime(time); 1284 off += ZipConstants.WORD; 1285 1286 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 1287 off += ZipConstants.WORD; 1288 1289 long size = ZipLong.getValue(cfhBuf, off); 1290 if (size < 0) { 1291 throw new IOException("broken archive, entry with negative compressed size"); 1292 } 1293 ze.setCompressedSize(size); 1294 off += ZipConstants.WORD; 1295 1296 size = ZipLong.getValue(cfhBuf, off); 1297 if (size < 0) { 1298 throw new IOException("broken archive, entry with negative size"); 1299 } 1300 ze.setSize(size); 1301 off += ZipConstants.WORD; 1302 1303 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 1304 off += ZipConstants.SHORT; 1305 if (fileNameLen < 0) { 1306 throw new IOException("broken archive, entry with negative fileNameLen"); 1307 } 1308 1309 final int extraLen = ZipShort.getValue(cfhBuf, off); 1310 off += ZipConstants.SHORT; 1311 if (extraLen < 0) { 1312 throw new IOException("broken archive, entry with negative extraLen"); 1313 } 1314 1315 final int commentLen = ZipShort.getValue(cfhBuf, off); 1316 off += ZipConstants.SHORT; 1317 if (commentLen < 0) { 1318 throw new IOException("broken archive, entry with negative commentLen"); 1319 } 1320 1321 ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off)); 1322 off += ZipConstants.SHORT; 1323 1324 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 1325 off += ZipConstants.SHORT; 1326 1327 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 1328 off += ZipConstants.WORD; 1329 1330 final byte[] fileName = IOUtils.readRange(archive, fileNameLen); 1331 if (fileName.length < fileNameLen) { 1332 throw new EOFException(); 1333 } 1334 ze.setName(entryEncoding.decode(fileName), fileName); 1335 1336 // LFH offset, 1337 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset); 1338 // data offset will be filled later 1339 entries.add(ze); 1340 1341 final byte[] cdExtraData = IOUtils.readRange(archive, extraLen); 1342 if (cdExtraData.length < extraLen) { 1343 throw new EOFException(); 1344 } 1345 try { 1346 ze.setCentralDirectoryExtra(cdExtraData); 1347 } catch (final RuntimeException ex) { 1348 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1349 z.initCause(ex); 1350 throw z; 1351 } 1352 1353 setSizesAndOffsetFromZip64Extra(ze); 1354 sanityCheckLFHOffset(ze); 1355 1356 final byte[] comment = IOUtils.readRange(archive, commentLen); 1357 if (comment.length < commentLen) { 1358 throw new EOFException(); 1359 } 1360 ze.setComment(entryEncoding.decode(comment)); 1361 1362 if (!hasUTF8Flag && useUnicodeExtraFields) { 1363 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 1364 } 1365 1366 ze.setStreamContiguous(true); 1367 } 1368 1369 /** 1370 * Walks through all recorded entries and adds the data available 1371 * from the local file header. 1372 * 1373 * <p>Also records the offsets for the data to read from the 1374 * entries.</p> 1375 */ 1376 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1377 entriesWithoutUTF8Flag) 1378 throws IOException { 1379 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1380 // entries is filled in populateFromCentralDirectory and 1381 // never modified 1382 final Entry ze = (Entry) zipArchiveEntry; 1383 final int[] lens = setDataOffset(ze); 1384 final int fileNameLen = lens[0]; 1385 final int extraFieldLen = lens[1]; 1386 skipBytes(fileNameLen); 1387 final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen); 1388 if (localExtraData.length < extraFieldLen) { 1389 throw new EOFException(); 1390 } 1391 try { 1392 ze.setExtra(localExtraData); 1393 } catch (final RuntimeException ex) { 1394 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1395 z.initCause(ex); 1396 throw z; 1397 } 1398 1399 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1400 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1401 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1402 nc.comment); 1403 } 1404 } 1405 } 1406 1407 private void sanityCheckLFHOffset(final ZipArchiveEntry ze) throws IOException { 1408 if (ze.getDiskNumberStart() < 0) { 1409 throw new IOException("broken archive, entry with negative disk number"); 1410 } 1411 if (ze.getLocalHeaderOffset() < 0) { 1412 throw new IOException("broken archive, entry with negative local file header offset"); 1413 } 1414 if (isSplitZipArchive) { 1415 if (ze.getDiskNumberStart() > centralDirectoryStartDiskNumber) { 1416 throw new IOException("local file header for " + ze.getName() + " starts on a later disk than central directory"); 1417 } 1418 if (ze.getDiskNumberStart() == centralDirectoryStartDiskNumber 1419 && ze.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) { 1420 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 1421 } 1422 } else if (ze.getLocalHeaderOffset() > centralDirectoryStartOffset) { 1423 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 1424 } 1425 } 1426 1427 private int[] setDataOffset(final ZipArchiveEntry ze) throws IOException { 1428 long offset = ze.getLocalHeaderOffset(); 1429 if (isSplitZipArchive) { 1430 ((ZipSplitReadOnlySeekableByteChannel) archive) 1431 .position(ze.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1432 // the offset should be updated to the global offset 1433 offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH; 1434 } else { 1435 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1436 } 1437 wordBbuf.rewind(); 1438 IOUtils.readFully(archive, wordBbuf); 1439 wordBbuf.flip(); 1440 wordBbuf.get(shortBuf); 1441 final int fileNameLen = ZipShort.getValue(shortBuf); 1442 wordBbuf.get(shortBuf); 1443 final int extraFieldLen = ZipShort.getValue(shortBuf); 1444 ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1445 + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen); 1446 if (ze.getDataOffset() + ze.getCompressedSize() > centralDirectoryStartOffset) { 1447 throw new IOException("data for " + ze.getName() + " overlaps with central directory."); 1448 } 1449 return new int[] { fileNameLen, extraFieldLen }; 1450 } 1451 1452 /** 1453 * If the entry holds a Zip64 extended information extra field, 1454 * read sizes from there if the entry's sizes are set to 1455 * 0xFFFFFFFFF, do the same for the offset of the local file 1456 * header. 1457 * 1458 * <p>Ensures the Zip64 extra either knows both compressed and 1459 * uncompressed size or neither of both as the internal logic in 1460 * ExtraFieldUtils forces the field to create local header data 1461 * even if they are never used - and here a field with only one 1462 * size would be invalid.</p> 1463 */ 1464 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze) 1465 throws IOException { 1466 final ZipExtraField extra = 1467 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 1468 if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) { 1469 throw new ZipException("archive contains unparseable zip64 extra field"); 1470 } 1471 final Zip64ExtendedInformationExtraField z64 = 1472 (Zip64ExtendedInformationExtraField) extra; 1473 if (z64 != null) { 1474 final boolean hasUncompressedSize = ze.getSize() == ZipConstants.ZIP64_MAGIC; 1475 final boolean hasCompressedSize = ze.getCompressedSize() == ZipConstants.ZIP64_MAGIC; 1476 final boolean hasRelativeHeaderOffset = 1477 ze.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC; 1478 final boolean hasDiskStart = ze.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT; 1479 z64.reparseCentralDirectoryData(hasUncompressedSize, 1480 hasCompressedSize, 1481 hasRelativeHeaderOffset, 1482 hasDiskStart); 1483 1484 if (hasUncompressedSize) { 1485 final long size = z64.getSize().getLongValue(); 1486 if (size < 0) { 1487 throw new IOException("broken archive, entry with negative size"); 1488 } 1489 ze.setSize(size); 1490 } else if (hasCompressedSize) { 1491 z64.setSize(new ZipEightByteInteger(ze.getSize())); 1492 } 1493 1494 if (hasCompressedSize) { 1495 final long size = z64.getCompressedSize().getLongValue(); 1496 if (size < 0) { 1497 throw new IOException("broken archive, entry with negative compressed size"); 1498 } 1499 ze.setCompressedSize(size); 1500 } else if (hasUncompressedSize) { 1501 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 1502 } 1503 1504 if (hasRelativeHeaderOffset) { 1505 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 1506 } 1507 1508 if (hasDiskStart) { 1509 ze.setDiskNumberStart(z64.getDiskStartNumber().getValue()); 1510 } 1511 } 1512 } 1513 1514 /** 1515 * Skips the given number of bytes or throws an EOFException if 1516 * skipping failed. 1517 */ 1518 private void skipBytes(final int count) throws IOException { 1519 final long currentPosition = archive.position(); 1520 final long newPosition = currentPosition + count; 1521 if (newPosition > archive.size()) { 1522 throw new EOFException(); 1523 } 1524 archive.position(newPosition); 1525 } 1526 1527 /** 1528 * Checks whether the archive starts with a LFH. If it doesn't, 1529 * it may be an empty archive. 1530 */ 1531 private boolean startsWithLocalFileHeader() throws IOException { 1532 archive.position(firstLocalFileHeaderOffset); 1533 wordBbuf.rewind(); 1534 IOUtils.readFully(archive, wordBbuf); 1535 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1536 } 1537 1538 /** 1539 * Searches the archive backwards from minDistance to maxDistance 1540 * for the given signature, positions the RandomaccessFile right 1541 * at the signature if it has been found. 1542 */ 1543 private boolean tryToLocateSignature(final long minDistanceFromEnd, 1544 final long maxDistanceFromEnd, 1545 final byte[] sig) throws IOException { 1546 boolean found = false; 1547 long off = archive.size() - minDistanceFromEnd; 1548 final long stopSearching = 1549 Math.max(0L, archive.size() - maxDistanceFromEnd); 1550 if (off >= 0) { 1551 for (; off >= stopSearching; off--) { 1552 archive.position(off); 1553 try { 1554 wordBbuf.rewind(); 1555 IOUtils.readFully(archive, wordBbuf); 1556 wordBbuf.flip(); 1557 } catch (final EOFException ex) { // NOSONAR 1558 break; 1559 } 1560 int curr = wordBbuf.get(); 1561 if (curr == sig[POS_0]) { 1562 curr = wordBbuf.get(); 1563 if (curr == sig[POS_1]) { 1564 curr = wordBbuf.get(); 1565 if (curr == sig[POS_2]) { 1566 curr = wordBbuf.get(); 1567 if (curr == sig[POS_3]) { 1568 found = true; 1569 break; 1570 } 1571 } 1572 } 1573 } 1574 } 1575 } 1576 if (found) { 1577 archive.position(off); 1578 } 1579 return found; 1580 } 1581}