001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 037import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 038import org.apache.commons.compress.utils.ArchiveUtils; 039import org.apache.commons.compress.utils.IOUtils; 040 041import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 042import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 044import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 045 046/** 047 * Implements an input stream that can read Zip archives. 048 * 049 * <p>As of Apache Commons Compress it transparently supports Zip64 050 * extensions and thus individual entries and archives larger than 4 051 * GB or with more than 65536 entries.</p> 052 * 053 * <p>The {@link ZipFile} class is preferred when reading from files 054 * as {@link ZipArchiveInputStream} is limited by not being able to 055 * read the central directory header before returning entries. In 056 * particular {@link ZipArchiveInputStream}</p> 057 * 058 * <ul> 059 * 060 * <li>may return entries that are not part of the central directory 061 * at all and shouldn't be considered part of the archive.</li> 062 * 063 * <li>may return several entries with the same name.</li> 064 * 065 * <li>will not return internal or external attributes.</li> 066 * 067 * <li>may return incomplete extra field data.</li> 068 * 069 * <li>may return unknown sizes and CRC values for entries until the 070 * next entry has been reached if the archive uses the data 071 * descriptor feature.</li> 072 * 073 * </ul> 074 * 075 * @see ZipFile 076 * @NotThreadSafe 077 */ 078public class ZipArchiveInputStream extends ArchiveInputStream { 079 080 /** The zip encoding to use for filenames and the file comment. */ 081 private final ZipEncoding zipEncoding; 082 083 // the provided encoding (for unit tests) 084 final String encoding; 085 086 /** Whether to look for and use Unicode extra fields. */ 087 private final boolean useUnicodeExtraFields; 088 089 /** Wrapped stream, will always be a PushbackInputStream. */ 090 private final InputStream in; 091 092 /** Inflater used for all deflated entries. */ 093 private final Inflater inf = new Inflater(true); 094 095 /** Buffer used to read from the wrapped stream. */ 096 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 097 098 /** The entry that is currently being read. */ 099 private CurrentEntry current = null; 100 101 /** Whether the stream has been closed. */ 102 private boolean closed = false; 103 104 /** Whether the stream has reached the central directory - and thus found all entries. */ 105 private boolean hitCentralDirectory = false; 106 107 /** 108 * When reading a stored entry that uses the data descriptor this 109 * stream has to read the full entry and caches it. This is the 110 * cache. 111 */ 112 private ByteArrayInputStream lastStoredEntry = null; 113 114 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 115 private boolean allowStoredEntriesWithDataDescriptor = false; 116 117 private static final int LFH_LEN = 30; 118 /* 119 local file header signature WORD 120 version needed to extract SHORT 121 general purpose bit flag SHORT 122 compression method SHORT 123 last mod file time SHORT 124 last mod file date SHORT 125 crc-32 WORD 126 compressed size WORD 127 uncompressed size WORD 128 file name length SHORT 129 extra field length SHORT 130 */ 131 132 private static final int CFH_LEN = 46; 133 /* 134 central file header signature WORD 135 version made by SHORT 136 version needed to extract SHORT 137 general purpose bit flag SHORT 138 compression method SHORT 139 last mod file time SHORT 140 last mod file date SHORT 141 crc-32 WORD 142 compressed size WORD 143 uncompressed size WORD 144 file name length SHORT 145 extra field length SHORT 146 file comment length SHORT 147 disk number start SHORT 148 internal file attributes SHORT 149 external file attributes WORD 150 relative offset of local header WORD 151 */ 152 153 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 154 155 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 156 private final byte[] lfhBuf = new byte[LFH_LEN]; 157 private final byte[] skipBuf = new byte[1024]; 158 private final byte[] shortBuf = new byte[SHORT]; 159 private final byte[] wordBuf = new byte[WORD]; 160 private final byte[] twoDwordBuf = new byte[2 * DWORD]; 161 162 private int entriesRead = 0; 163 164 /** 165 * Create an instance using UTF-8 encoding 166 * @param inputStream the stream to wrap 167 */ 168 public ZipArchiveInputStream(final InputStream inputStream) { 169 this(inputStream, ZipEncodingHelper.UTF8); 170 } 171 172 /** 173 * Create an instance using the specified encoding 174 * @param inputStream the stream to wrap 175 * @param encoding the encoding to use for file names, use null 176 * for the platform's default encoding 177 * @since 1.5 178 */ 179 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 180 this(inputStream, encoding, true); 181 } 182 183 /** 184 * Create an instance using the specified encoding 185 * @param inputStream the stream to wrap 186 * @param encoding the encoding to use for file names, use null 187 * for the platform's default encoding 188 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 189 * Extra Fields (if present) to set the file names. 190 */ 191 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 192 this(inputStream, encoding, useUnicodeExtraFields, false); 193 } 194 195 /** 196 * Create an instance using the specified encoding 197 * @param inputStream the stream to wrap 198 * @param encoding the encoding to use for file names, use null 199 * for the platform's default encoding 200 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 201 * Extra Fields (if present) to set the file names. 202 * @param allowStoredEntriesWithDataDescriptor whether the stream 203 * will try to read STORED entries that use a data descriptor 204 * @since 1.1 205 */ 206 public ZipArchiveInputStream(final InputStream inputStream, 207 final String encoding, 208 final boolean useUnicodeExtraFields, 209 final boolean allowStoredEntriesWithDataDescriptor) { 210 this.encoding = encoding; 211 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 212 this.useUnicodeExtraFields = useUnicodeExtraFields; 213 in = new PushbackInputStream(inputStream, buf.capacity()); 214 this.allowStoredEntriesWithDataDescriptor = 215 allowStoredEntriesWithDataDescriptor; 216 // haven't read anything so far 217 buf.limit(0); 218 } 219 220 public ZipArchiveEntry getNextZipEntry() throws IOException { 221 boolean firstEntry = true; 222 if (closed || hitCentralDirectory) { 223 return null; 224 } 225 if (current != null) { 226 closeEntry(); 227 firstEntry = false; 228 } 229 230 long currentHeaderOffset = getBytesRead(); 231 try { 232 if (firstEntry) { 233 // split archives have a special signature before the 234 // first local file header - look for it and fail with 235 // the appropriate error message if this is a split 236 // archive. 237 readFirstLocalFileHeader(lfhBuf); 238 } else { 239 readFully(lfhBuf); 240 } 241 } catch (final EOFException e) { 242 return null; 243 } 244 245 final ZipLong sig = new ZipLong(lfhBuf); 246 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 247 hitCentralDirectory = true; 248 skipRemainderOfArchive(); 249 return null; 250 } 251 if (!sig.equals(ZipLong.LFH_SIG)) { 252 throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue())); 253 } 254 255 int off = WORD; 256 current = new CurrentEntry(); 257 258 final int versionMadeBy = ZipShort.getValue(lfhBuf, off); 259 off += SHORT; 260 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 261 262 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off); 263 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 264 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 265 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 266 current.entry.setGeneralPurposeBit(gpFlag); 267 268 off += SHORT; 269 270 current.entry.setMethod(ZipShort.getValue(lfhBuf, off)); 271 off += SHORT; 272 273 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off)); 274 current.entry.setTime(time); 275 off += WORD; 276 277 ZipLong size = null, cSize = null; 278 if (!current.hasDataDescriptor) { 279 current.entry.setCrc(ZipLong.getValue(lfhBuf, off)); 280 off += WORD; 281 282 cSize = new ZipLong(lfhBuf, off); 283 off += WORD; 284 285 size = new ZipLong(lfhBuf, off); 286 off += WORD; 287 } else { 288 off += 3 * WORD; 289 } 290 291 final int fileNameLen = ZipShort.getValue(lfhBuf, off); 292 293 off += SHORT; 294 295 final int extraLen = ZipShort.getValue(lfhBuf, off); 296 off += SHORT; // NOSONAR - assignment as documentation 297 298 final byte[] fileName = new byte[fileNameLen]; 299 readFully(fileName); 300 current.entry.setName(entryEncoding.decode(fileName), fileName); 301 if (hasUTF8Flag) { 302 current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 303 } 304 305 final byte[] extraData = new byte[extraLen]; 306 readFully(extraData); 307 current.entry.setExtra(extraData); 308 309 if (!hasUTF8Flag && useUnicodeExtraFields) { 310 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 311 } 312 313 processZip64Extra(size, cSize); 314 315 current.entry.setLocalHeaderOffset(currentHeaderOffset); 316 current.entry.setDataOffset(getBytesRead()); 317 current.entry.setStreamContiguous(true); 318 319 ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod()); 320 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 321 if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) { 322 InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize()); 323 switch (m) { 324 case UNSHRINKING: 325 current.in = new UnshrinkingInputStream(bis); 326 break; 327 case IMPLODING: 328 current.in = new ExplodingInputStream( 329 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 330 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 331 bis); 332 break; 333 case BZIP2: 334 current.in = new BZip2CompressorInputStream(bis); 335 break; 336 case ENHANCED_DEFLATED: 337 current.in = new Deflate64CompressorInputStream(bis); 338 break; 339 default: 340 // we should never get here as all supported methods have been covered 341 // will cause an error when read is invoked, don't throw an exception here so people can 342 // skip unsupported entries 343 break; 344 } 345 } 346 } else if (m == ZipMethod.ENHANCED_DEFLATED) { 347 current.in = new Deflate64CompressorInputStream(in); 348 } 349 350 entriesRead++; 351 return current.entry; 352 } 353 354 /** 355 * Fills the given array with the first local file header and 356 * deals with splitting/spanning markers that may prefix the first 357 * LFH. 358 */ 359 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 360 readFully(lfh); 361 final ZipLong sig = new ZipLong(lfh); 362 if (sig.equals(ZipLong.DD_SIG)) { 363 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 364 } 365 366 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 367 // The archive is not really split as only one segment was 368 // needed in the end. Just skip over the marker. 369 final byte[] missedLfhBytes = new byte[4]; 370 readFully(missedLfhBytes); 371 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 372 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 373 } 374 } 375 376 /** 377 * Records whether a Zip64 extra is present and sets the size 378 * information from it if sizes are 0xFFFFFFFF and the entry 379 * doesn't use a data descriptor. 380 */ 381 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 382 final Zip64ExtendedInformationExtraField z64 = 383 (Zip64ExtendedInformationExtraField) 384 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 385 current.usesZip64 = z64 != null; 386 if (!current.hasDataDescriptor) { 387 if (z64 != null // same as current.usesZip64 but avoids NPE warning 388 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 389 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 390 current.entry.setSize(z64.getSize().getLongValue()); 391 } else { 392 current.entry.setCompressedSize(cSize.getValue()); 393 current.entry.setSize(size.getValue()); 394 } 395 } 396 } 397 398 @Override 399 public ArchiveEntry getNextEntry() throws IOException { 400 return getNextZipEntry(); 401 } 402 403 /** 404 * Whether this class is able to read the given entry. 405 * 406 * <p>May return false if it is set up to use encryption or a 407 * compression method that hasn't been implemented yet.</p> 408 * @since 1.1 409 */ 410 @Override 411 public boolean canReadEntryData(final ArchiveEntry ae) { 412 if (ae instanceof ZipArchiveEntry) { 413 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 414 return ZipUtil.canHandleEntryData(ze) 415 && supportsDataDescriptorFor(ze) 416 && supportsCompressedSizeFor(ze); 417 } 418 return false; 419 } 420 421 @Override 422 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 423 if (closed) { 424 throw new IOException("The stream is closed"); 425 } 426 427 if (current == null) { 428 return -1; 429 } 430 431 // avoid int overflow, check null buffer 432 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 433 throw new ArrayIndexOutOfBoundsException(); 434 } 435 436 ZipUtil.checkRequestedFeatures(current.entry); 437 if (!supportsDataDescriptorFor(current.entry)) { 438 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 439 current.entry); 440 } 441 if (!supportsCompressedSizeFor(current.entry)) { 442 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE, 443 current.entry); 444 } 445 446 int read; 447 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 448 read = readStored(buffer, offset, length); 449 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 450 read = readDeflated(buffer, offset, length); 451 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 452 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 453 || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 454 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 455 read = current.in.read(buffer, offset, length); 456 } else { 457 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 458 current.entry); 459 } 460 461 if (read >= 0) { 462 current.crc.update(buffer, offset, read); 463 } 464 465 return read; 466 } 467 468 /** 469 * Implementation of read for STORED entries. 470 */ 471 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 472 473 if (current.hasDataDescriptor) { 474 if (lastStoredEntry == null) { 475 readStoredEntry(); 476 } 477 return lastStoredEntry.read(buffer, offset, length); 478 } 479 480 final long csize = current.entry.getSize(); 481 if (current.bytesRead >= csize) { 482 return -1; 483 } 484 485 if (buf.position() >= buf.limit()) { 486 buf.position(0); 487 final int l = in.read(buf.array()); 488 if (l == -1) { 489 return -1; 490 } 491 buf.limit(l); 492 493 count(l); 494 current.bytesReadFromStream += l; 495 } 496 497 int toRead = Math.min(buf.remaining(), length); 498 if ((csize - current.bytesRead) < toRead) { 499 // if it is smaller than toRead then it fits into an int 500 toRead = (int) (csize - current.bytesRead); 501 } 502 buf.get(buffer, offset, toRead); 503 current.bytesRead += toRead; 504 return toRead; 505 } 506 507 /** 508 * Implementation of read for DEFLATED entries. 509 */ 510 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 511 final int read = readFromInflater(buffer, offset, length); 512 if (read <= 0) { 513 if (inf.finished()) { 514 return -1; 515 } else if (inf.needsDictionary()) { 516 throw new ZipException("This archive needs a preset dictionary" 517 + " which is not supported by Commons" 518 + " Compress."); 519 } else if (read == -1) { 520 throw new IOException("Truncated ZIP file"); 521 } 522 } 523 return read; 524 } 525 526 /** 527 * Potentially reads more bytes to fill the inflater's buffer and 528 * reads from it. 529 */ 530 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 531 int read = 0; 532 do { 533 if (inf.needsInput()) { 534 final int l = fill(); 535 if (l > 0) { 536 current.bytesReadFromStream += buf.limit(); 537 } else if (l == -1) { 538 return -1; 539 } else { 540 break; 541 } 542 } 543 try { 544 read = inf.inflate(buffer, offset, length); 545 } catch (final DataFormatException e) { 546 throw (IOException) new ZipException(e.getMessage()).initCause(e); 547 } 548 } while (read == 0 && inf.needsInput()); 549 return read; 550 } 551 552 @Override 553 public void close() throws IOException { 554 if (!closed) { 555 closed = true; 556 try { 557 in.close(); 558 } finally { 559 inf.end(); 560 } 561 } 562 } 563 564 /** 565 * Skips over and discards value bytes of data from this input 566 * stream. 567 * 568 * <p>This implementation may end up skipping over some smaller 569 * number of bytes, possibly 0, if and only if it reaches the end 570 * of the underlying stream.</p> 571 * 572 * <p>The actual number of bytes skipped is returned.</p> 573 * 574 * @param value the number of bytes to be skipped. 575 * @return the actual number of bytes skipped. 576 * @throws IOException - if an I/O error occurs. 577 * @throws IllegalArgumentException - if value is negative. 578 */ 579 @Override 580 public long skip(final long value) throws IOException { 581 if (value >= 0) { 582 long skipped = 0; 583 while (skipped < value) { 584 final long rem = value - skipped; 585 final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 586 if (x == -1) { 587 return skipped; 588 } 589 skipped += x; 590 } 591 return skipped; 592 } 593 throw new IllegalArgumentException(); 594 } 595 596 /** 597 * Checks if the signature matches what is expected for a zip file. 598 * Does not currently handle self-extracting zips which may have arbitrary 599 * leading content. 600 * 601 * @param signature the bytes to check 602 * @param length the number of bytes to check 603 * @return true, if this stream is a zip archive stream, false otherwise 604 */ 605 public static boolean matches(final byte[] signature, final int length) { 606 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 607 return false; 608 } 609 610 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 611 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 612 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 613 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 614 } 615 616 private static boolean checksig(final byte[] signature, final byte[] expected) { 617 for (int i = 0; i < expected.length; i++) { 618 if (signature[i] != expected[i]) { 619 return false; 620 } 621 } 622 return true; 623 } 624 625 /** 626 * Closes the current ZIP archive entry and positions the underlying 627 * stream to the beginning of the next entry. All per-entry variables 628 * and data structures are cleared. 629 * <p> 630 * If the compressed size of this entry is included in the entry header, 631 * then any outstanding bytes are simply skipped from the underlying 632 * stream without uncompressing them. This allows an entry to be safely 633 * closed even if the compression method is unsupported. 634 * <p> 635 * In case we don't know the compressed size of this entry or have 636 * already buffered too much data from the underlying stream to support 637 * uncompression, then the uncompression process is completed and the 638 * end position of the stream is adjusted based on the result of that 639 * process. 640 * 641 * @throws IOException if an error occurs 642 */ 643 private void closeEntry() throws IOException { 644 if (closed) { 645 throw new IOException("The stream is closed"); 646 } 647 if (current == null) { 648 return; 649 } 650 651 // Ensure all entry bytes are read 652 if (currentEntryHasOutstandingBytes()) { 653 drainCurrentEntryData(); 654 } else { 655 // this is guaranteed to exhaust the stream 656 skip(Long.MAX_VALUE); //NOSONAR 657 658 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 659 ? getBytesInflated() : current.bytesRead; 660 661 // this is at most a single read() operation and can't 662 // exceed the range of int 663 final int diff = (int) (current.bytesReadFromStream - inB); 664 665 // Pushback any required bytes 666 if (diff > 0) { 667 pushback(buf.array(), buf.limit() - diff, diff); 668 current.bytesReadFromStream -= diff; 669 } 670 671 // Drain remainder of entry if not all data bytes were required 672 if (currentEntryHasOutstandingBytes()) { 673 drainCurrentEntryData(); 674 } 675 } 676 677 if (lastStoredEntry == null && current.hasDataDescriptor) { 678 readDataDescriptor(); 679 } 680 681 inf.reset(); 682 buf.clear().flip(); 683 current = null; 684 lastStoredEntry = null; 685 } 686 687 /** 688 * If the compressed size of the current entry is included in the entry header 689 * and there are any outstanding bytes in the underlying stream, then 690 * this returns true. 691 * 692 * @return true, if current entry is determined to have outstanding bytes, false otherwise 693 */ 694 private boolean currentEntryHasOutstandingBytes() { 695 return current.bytesReadFromStream <= current.entry.getCompressedSize() 696 && !current.hasDataDescriptor; 697 } 698 699 /** 700 * Read all data of the current entry from the underlying stream 701 * that hasn't been read, yet. 702 */ 703 private void drainCurrentEntryData() throws IOException { 704 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 705 while (remaining > 0) { 706 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 707 if (n < 0) { 708 throw new EOFException("Truncated ZIP entry: " 709 + ArchiveUtils.sanitize(current.entry.getName())); 710 } 711 count(n); 712 remaining -= n; 713 } 714 } 715 716 /** 717 * Get the number of bytes Inflater has actually processed. 718 * 719 * <p>for Java < Java7 the getBytes* methods in 720 * Inflater/Deflater seem to return unsigned ints rather than 721 * longs that start over with 0 at 2^32.</p> 722 * 723 * <p>The stream knows how many bytes it has read, but not how 724 * many the Inflater actually consumed - it should be between the 725 * total number of bytes read for the entry and the total number 726 * minus the last read operation. Here we just try to make the 727 * value close enough to the bytes we've read by assuming the 728 * number of bytes consumed must be smaller than (or equal to) the 729 * number of bytes read but not smaller by more than 2^32.</p> 730 */ 731 private long getBytesInflated() { 732 long inB = inf.getBytesRead(); 733 if (current.bytesReadFromStream >= TWO_EXP_32) { 734 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 735 inB += TWO_EXP_32; 736 } 737 } 738 return inB; 739 } 740 741 private int fill() throws IOException { 742 if (closed) { 743 throw new IOException("The stream is closed"); 744 } 745 final int length = in.read(buf.array()); 746 if (length > 0) { 747 buf.limit(length); 748 count(buf.limit()); 749 inf.setInput(buf.array(), 0, buf.limit()); 750 } 751 return length; 752 } 753 754 private void readFully(final byte[] b) throws IOException { 755 final int count = IOUtils.readFully(in, b); 756 count(count); 757 if (count < b.length) { 758 throw new EOFException(); 759 } 760 } 761 762 private void readDataDescriptor() throws IOException { 763 readFully(wordBuf); 764 ZipLong val = new ZipLong(wordBuf); 765 if (ZipLong.DD_SIG.equals(val)) { 766 // data descriptor with signature, skip sig 767 readFully(wordBuf); 768 val = new ZipLong(wordBuf); 769 } 770 current.entry.setCrc(val.getValue()); 771 772 // if there is a ZIP64 extra field, sizes are eight bytes 773 // each, otherwise four bytes each. Unfortunately some 774 // implementations - namely Java7 - use eight bytes without 775 // using a ZIP64 extra field - 776 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 777 778 // just read 16 bytes and check whether bytes nine to twelve 779 // look like one of the signatures of what could follow a data 780 // descriptor (ignoring archive decryption headers for now). 781 // If so, push back eight bytes and assume sizes are four 782 // bytes, otherwise sizes are eight bytes each. 783 readFully(twoDwordBuf); 784 final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD); 785 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 786 pushback(twoDwordBuf, DWORD, DWORD); 787 current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf)); 788 current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD)); 789 } else { 790 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf)); 791 current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD)); 792 } 793 } 794 795 /** 796 * Whether this entry requires a data descriptor this library can work with. 797 * 798 * @return true if allowStoredEntriesWithDataDescriptor is true, 799 * the entry doesn't require any data descriptor or the method is 800 * DEFLATED or ENHANCED_DEFLATED. 801 */ 802 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 803 return !entry.getGeneralPurposeBit().usesDataDescriptor() 804 805 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 806 || entry.getMethod() == ZipEntry.DEFLATED 807 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode(); 808 } 809 810 /** 811 * Whether the compressed size for the entry is either known or 812 * not required by the compression method being used. 813 */ 814 private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { 815 return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN 816 || entry.getMethod() == ZipEntry.DEFLATED 817 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 818 || (entry.getGeneralPurposeBit().usesDataDescriptor() 819 && allowStoredEntriesWithDataDescriptor 820 && entry.getMethod() == ZipEntry.STORED); 821 } 822 823 /** 824 * Caches a stored entry that uses the data descriptor. 825 * 826 * <ul> 827 * <li>Reads a stored entry until the signature of a local file 828 * header, central directory header or data descriptor has been 829 * found.</li> 830 * <li>Stores all entry data in lastStoredEntry.</p> 831 * <li>Rewinds the stream to position at the data 832 * descriptor.</li> 833 * <li>reads the data descriptor</li> 834 * </ul> 835 * 836 * <p>After calling this method the entry should know its size, 837 * the entry's data is cached and the stream is positioned at the 838 * next local file or central directory header.</p> 839 */ 840 private void readStoredEntry() throws IOException { 841 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 842 int off = 0; 843 boolean done = false; 844 845 // length of DD without signature 846 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 847 848 while (!done) { 849 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 850 if (r <= 0) { 851 // read the whole archive without ever finding a 852 // central directory 853 throw new IOException("Truncated ZIP file"); 854 } 855 if (r + off < 4) { 856 // buffer too small to check for a signature, loop 857 off += r; 858 continue; 859 } 860 861 done = bufferContainsSignature(bos, off, r, ddLen); 862 if (!done) { 863 off = cacheBytesRead(bos, off, r, ddLen); 864 } 865 } 866 867 final byte[] b = bos.toByteArray(); 868 lastStoredEntry = new ByteArrayInputStream(b); 869 } 870 871 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 872 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 873 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 874 875 /** 876 * Checks whether the current buffer contains the signature of a 877 * "data descriptor", "local file header" or 878 * "central directory entry". 879 * 880 * <p>If it contains such a signature, reads the data descriptor 881 * and positions the stream right after the data descriptor.</p> 882 */ 883 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 884 throws IOException { 885 886 boolean done = false; 887 int readTooMuch = 0; 888 for (int i = 0; !done && i < lastRead - 4; i++) { 889 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 890 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 891 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 892 // found a LFH or CFH: 893 readTooMuch = offset + lastRead - i - expectedDDLen; 894 done = true; 895 } 896 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 897 // found DD: 898 readTooMuch = offset + lastRead - i; 899 done = true; 900 } 901 if (done) { 902 // * push back bytes read in excess as well as the data 903 // descriptor 904 // * copy the remaining bytes to cache 905 // * read data descriptor 906 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 907 bos.write(buf.array(), 0, i); 908 readDataDescriptor(); 909 } 910 } 911 } 912 return done; 913 } 914 915 /** 916 * If the last read bytes could hold a data descriptor and an 917 * incomplete signature then save the last bytes to the front of 918 * the buffer and cache everything in front of the potential data 919 * descriptor into the given ByteArrayOutputStream. 920 * 921 * <p>Data descriptor plus incomplete signature (3 bytes in the 922 * worst case) can be 20 bytes max.</p> 923 */ 924 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 925 final int cacheable = offset + lastRead - expecteDDLen - 3; 926 if (cacheable > 0) { 927 bos.write(buf.array(), 0, cacheable); 928 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 929 offset = expecteDDLen + 3; 930 } else { 931 offset += lastRead; 932 } 933 return offset; 934 } 935 936 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 937 ((PushbackInputStream) in).unread(buf, offset, length); 938 pushedBackBytes(length); 939 } 940 941 // End of Central Directory Record 942 // end of central dir signature WORD 943 // number of this disk SHORT 944 // number of the disk with the 945 // start of the central directory SHORT 946 // total number of entries in the 947 // central directory on this disk SHORT 948 // total number of entries in 949 // the central directory SHORT 950 // size of the central directory WORD 951 // offset of start of central 952 // directory with respect to 953 // the starting disk number WORD 954 // .ZIP file comment length SHORT 955 // .ZIP file comment up to 64KB 956 // 957 958 /** 959 * Reads the stream until it find the "End of central directory 960 * record" and consumes it as well. 961 */ 962 private void skipRemainderOfArchive() throws IOException { 963 // skip over central directory. One LFH has been read too much 964 // already. The calculation discounts file names and extra 965 // data so it will be too short. 966 realSkip((long) entriesRead * CFH_LEN - LFH_LEN); 967 findEocdRecord(); 968 realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 969 readFully(shortBuf); 970 // file comment 971 realSkip(ZipShort.getValue(shortBuf)); 972 } 973 974 /** 975 * Reads forward until the signature of the "End of central 976 * directory" record is found. 977 */ 978 private void findEocdRecord() throws IOException { 979 int currentByte = -1; 980 boolean skipReadCall = false; 981 while (skipReadCall || (currentByte = readOneByte()) > -1) { 982 skipReadCall = false; 983 if (!isFirstByteOfEocdSig(currentByte)) { 984 continue; 985 } 986 currentByte = readOneByte(); 987 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 988 if (currentByte == -1) { 989 break; 990 } 991 skipReadCall = isFirstByteOfEocdSig(currentByte); 992 continue; 993 } 994 currentByte = readOneByte(); 995 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 996 if (currentByte == -1) { 997 break; 998 } 999 skipReadCall = isFirstByteOfEocdSig(currentByte); 1000 continue; 1001 } 1002 currentByte = readOneByte(); 1003 if (currentByte == -1 1004 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 1005 break; 1006 } 1007 skipReadCall = isFirstByteOfEocdSig(currentByte); 1008 } 1009 } 1010 1011 /** 1012 * Skips bytes by reading from the underlying stream rather than 1013 * the (potentially inflating) archive stream - which {@link 1014 * #skip} would do. 1015 * 1016 * Also updates bytes-read counter. 1017 */ 1018 private void realSkip(final long value) throws IOException { 1019 if (value >= 0) { 1020 long skipped = 0; 1021 while (skipped < value) { 1022 final long rem = value - skipped; 1023 final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 1024 if (x == -1) { 1025 return; 1026 } 1027 count(x); 1028 skipped += x; 1029 } 1030 return; 1031 } 1032 throw new IllegalArgumentException(); 1033 } 1034 1035 /** 1036 * Reads bytes by reading from the underlying stream rather than 1037 * the (potentially inflating) archive stream - which {@link #read} would do. 1038 * 1039 * Also updates bytes-read counter. 1040 */ 1041 private int readOneByte() throws IOException { 1042 final int b = in.read(); 1043 if (b != -1) { 1044 count(1); 1045 } 1046 return b; 1047 } 1048 1049 private boolean isFirstByteOfEocdSig(final int b) { 1050 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 1051 } 1052 1053 /** 1054 * Structure collecting information for the entry that is 1055 * currently being read. 1056 */ 1057 private static final class CurrentEntry { 1058 1059 /** 1060 * Current ZIP entry. 1061 */ 1062 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 1063 1064 /** 1065 * Does the entry use a data descriptor? 1066 */ 1067 private boolean hasDataDescriptor; 1068 1069 /** 1070 * Does the entry have a ZIP64 extended information extra field. 1071 */ 1072 private boolean usesZip64; 1073 1074 /** 1075 * Number of bytes of entry content read by the client if the 1076 * entry is STORED. 1077 */ 1078 private long bytesRead; 1079 1080 /** 1081 * Number of bytes of entry content read so from the stream. 1082 * 1083 * <p>This may be more than the actual entry's length as some 1084 * stuff gets buffered up and needs to be pushed back when the 1085 * end of the entry has been reached.</p> 1086 */ 1087 private long bytesReadFromStream; 1088 1089 /** 1090 * The checksum calculated as the current entry is read. 1091 */ 1092 private final CRC32 crc = new CRC32(); 1093 1094 /** 1095 * The input stream decompressing the data for shrunk and imploded entries. 1096 */ 1097 private InputStream in; 1098 } 1099 1100 /** 1101 * Bounded input stream adapted from commons-io 1102 */ 1103 private class BoundedInputStream extends InputStream { 1104 1105 /** the wrapped input stream */ 1106 private final InputStream in; 1107 1108 /** the max length to provide */ 1109 private final long max; 1110 1111 /** the number of bytes already returned */ 1112 private long pos = 0; 1113 1114 /** 1115 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1116 * stream and limits it to a certain size. 1117 * 1118 * @param in The wrapped input stream 1119 * @param size The maximum number of bytes to return 1120 */ 1121 public BoundedInputStream(final InputStream in, final long size) { 1122 this.max = size; 1123 this.in = in; 1124 } 1125 1126 @Override 1127 public int read() throws IOException { 1128 if (max >= 0 && pos >= max) { 1129 return -1; 1130 } 1131 final int result = in.read(); 1132 pos++; 1133 count(1); 1134 current.bytesReadFromStream++; 1135 return result; 1136 } 1137 1138 @Override 1139 public int read(final byte[] b) throws IOException { 1140 return this.read(b, 0, b.length); 1141 } 1142 1143 @Override 1144 public int read(final byte[] b, final int off, final int len) throws IOException { 1145 if (max >= 0 && pos >= max) { 1146 return -1; 1147 } 1148 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1149 final int bytesRead = in.read(b, off, (int) maxRead); 1150 1151 if (bytesRead == -1) { 1152 return -1; 1153 } 1154 1155 pos += bytesRead; 1156 count(bytesRead); 1157 current.bytesReadFromStream += bytesRead; 1158 return bytesRead; 1159 } 1160 1161 @Override 1162 public long skip(final long n) throws IOException { 1163 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1164 final long skippedBytes = in.skip(toSkip); 1165 pos += skippedBytes; 1166 return skippedBytes; 1167 } 1168 1169 @Override 1170 public int available() throws IOException { 1171 if (max >= 0 && pos >= max) { 1172 return 0; 1173 } 1174 return in.available(); 1175 } 1176 } 1177}