001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 037import org.apache.commons.compress.utils.ArchiveUtils; 038import org.apache.commons.compress.utils.IOUtils; 039 040import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 041import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 042import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 044 045/** 046 * Implements an input stream that can read Zip archives. 047 * 048 * <p>As of Apache Commons Compress it transparently supports Zip64 049 * extensions and thus individual entries and archives larger than 4 050 * GB or with more than 65536 entries.</p> 051 * 052 * <p>The {@link ZipFile} class is preferred when reading from files 053 * as {@link ZipArchiveInputStream} is limited by not being able to 054 * read the central directory header before returning entries. In 055 * particular {@link ZipArchiveInputStream}</p> 056 * 057 * <ul> 058 * 059 * <li>may return entries that are not part of the central directory 060 * at all and shouldn't be considered part of the archive.</li> 061 * 062 * <li>may return several entries with the same name.</li> 063 * 064 * <li>will not return internal or external attributes.</li> 065 * 066 * <li>may return incomplete extra field data.</li> 067 * 068 * <li>may return unknown sizes and CRC values for entries until the 069 * next entry has been reached if the archive uses the data 070 * descriptor feature.</li> 071 * 072 * </ul> 073 * 074 * @see ZipFile 075 * @NotThreadSafe 076 */ 077public class ZipArchiveInputStream extends ArchiveInputStream { 078 079 /** The zip encoding to use for filenames and the file comment. */ 080 private final ZipEncoding zipEncoding; 081 082 // the provided encoding (for unit tests) 083 final String encoding; 084 085 /** Whether to look for and use Unicode extra fields. */ 086 private final boolean useUnicodeExtraFields; 087 088 /** Wrapped stream, will always be a PushbackInputStream. */ 089 private final InputStream in; 090 091 /** Inflater used for all deflated entries. */ 092 private final Inflater inf = new Inflater(true); 093 094 /** Buffer used to read from the wrapped stream. */ 095 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 096 097 /** The entry that is currently being read. */ 098 private CurrentEntry current = null; 099 100 /** Whether the stream has been closed. */ 101 private boolean closed = false; 102 103 /** Whether the stream has reached the central directory - and thus found all entries. */ 104 private boolean hitCentralDirectory = false; 105 106 /** 107 * When reading a stored entry that uses the data descriptor this 108 * stream has to read the full entry and caches it. This is the 109 * cache. 110 */ 111 private ByteArrayInputStream lastStoredEntry = null; 112 113 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 114 private boolean allowStoredEntriesWithDataDescriptor = false; 115 116 private static final int LFH_LEN = 30; 117 /* 118 local file header signature WORD 119 version needed to extract SHORT 120 general purpose bit flag SHORT 121 compression method SHORT 122 last mod file time SHORT 123 last mod file date SHORT 124 crc-32 WORD 125 compressed size WORD 126 uncompressed size WORD 127 file name length SHORT 128 extra field length SHORT 129 */ 130 131 private static final int CFH_LEN = 46; 132 /* 133 central file header signature WORD 134 version made by SHORT 135 version needed to extract SHORT 136 general purpose bit flag SHORT 137 compression method SHORT 138 last mod file time SHORT 139 last mod file date SHORT 140 crc-32 WORD 141 compressed size WORD 142 uncompressed size WORD 143 file name length SHORT 144 extra field length SHORT 145 file comment length SHORT 146 disk number start SHORT 147 internal file attributes SHORT 148 external file attributes WORD 149 relative offset of local header WORD 150 */ 151 152 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 153 154 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 155 private final byte[] lfhBuf = new byte[LFH_LEN]; 156 private final byte[] skipBuf = new byte[1024]; 157 private final byte[] shortBuf = new byte[SHORT]; 158 private final byte[] wordBuf = new byte[WORD]; 159 private final byte[] twoDwordBuf = new byte[2 * DWORD]; 160 161 private int entriesRead = 0; 162 163 /** 164 * Create an instance using UTF-8 encoding 165 * @param inputStream the stream to wrap 166 */ 167 public ZipArchiveInputStream(final InputStream inputStream) { 168 this(inputStream, ZipEncodingHelper.UTF8); 169 } 170 171 /** 172 * Create an instance using the specified encoding 173 * @param inputStream the stream to wrap 174 * @param encoding the encoding to use for file names, use null 175 * for the platform's default encoding 176 * @since 1.5 177 */ 178 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 179 this(inputStream, encoding, true); 180 } 181 182 /** 183 * Create an instance using the specified encoding 184 * @param inputStream the stream to wrap 185 * @param encoding the encoding to use for file names, use null 186 * for the platform's default encoding 187 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 188 * Extra Fields (if present) to set the file names. 189 */ 190 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 191 this(inputStream, encoding, useUnicodeExtraFields, false); 192 } 193 194 /** 195 * Create an instance using the specified encoding 196 * @param inputStream the stream to wrap 197 * @param encoding the encoding to use for file names, use null 198 * for the platform's default encoding 199 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 200 * Extra Fields (if present) to set the file names. 201 * @param allowStoredEntriesWithDataDescriptor whether the stream 202 * will try to read STORED entries that use a data descriptor 203 * @since 1.1 204 */ 205 public ZipArchiveInputStream(final InputStream inputStream, 206 final String encoding, 207 final boolean useUnicodeExtraFields, 208 final boolean allowStoredEntriesWithDataDescriptor) { 209 this.encoding = encoding; 210 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 211 this.useUnicodeExtraFields = useUnicodeExtraFields; 212 in = new PushbackInputStream(inputStream, buf.capacity()); 213 this.allowStoredEntriesWithDataDescriptor = 214 allowStoredEntriesWithDataDescriptor; 215 // haven't read anything so far 216 buf.limit(0); 217 } 218 219 public ZipArchiveEntry getNextZipEntry() throws IOException { 220 boolean firstEntry = true; 221 if (closed || hitCentralDirectory) { 222 return null; 223 } 224 if (current != null) { 225 closeEntry(); 226 firstEntry = false; 227 } 228 229 long currentHeaderOffset = getBytesRead(); 230 try { 231 if (firstEntry) { 232 // split archives have a special signature before the 233 // first local file header - look for it and fail with 234 // the appropriate error message if this is a split 235 // archive. 236 readFirstLocalFileHeader(lfhBuf); 237 } else { 238 readFully(lfhBuf); 239 } 240 } catch (final EOFException e) { 241 return null; 242 } 243 244 final ZipLong sig = new ZipLong(lfhBuf); 245 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 246 hitCentralDirectory = true; 247 skipRemainderOfArchive(); 248 return null; 249 } 250 if (!sig.equals(ZipLong.LFH_SIG)) { 251 throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue())); 252 } 253 254 int off = WORD; 255 current = new CurrentEntry(); 256 257 final int versionMadeBy = ZipShort.getValue(lfhBuf, off); 258 off += SHORT; 259 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 260 261 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off); 262 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 263 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 264 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 265 current.entry.setGeneralPurposeBit(gpFlag); 266 267 off += SHORT; 268 269 current.entry.setMethod(ZipShort.getValue(lfhBuf, off)); 270 off += SHORT; 271 272 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off)); 273 current.entry.setTime(time); 274 off += WORD; 275 276 ZipLong size = null, cSize = null; 277 if (!current.hasDataDescriptor) { 278 current.entry.setCrc(ZipLong.getValue(lfhBuf, off)); 279 off += WORD; 280 281 cSize = new ZipLong(lfhBuf, off); 282 off += WORD; 283 284 size = new ZipLong(lfhBuf, off); 285 off += WORD; 286 } else { 287 off += 3 * WORD; 288 } 289 290 final int fileNameLen = ZipShort.getValue(lfhBuf, off); 291 292 off += SHORT; 293 294 final int extraLen = ZipShort.getValue(lfhBuf, off); 295 off += SHORT; 296 297 final byte[] fileName = new byte[fileNameLen]; 298 readFully(fileName); 299 current.entry.setName(entryEncoding.decode(fileName), fileName); 300 301 final byte[] extraData = new byte[extraLen]; 302 readFully(extraData); 303 current.entry.setExtra(extraData); 304 305 if (!hasUTF8Flag && useUnicodeExtraFields) { 306 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 307 } 308 309 processZip64Extra(size, cSize); 310 311 current.entry.setLocalHeaderOffset(currentHeaderOffset); 312 current.entry.setDataOffset(getBytesRead()); 313 current.entry.setStreamContiguous(true); 314 315 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 316 if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 317 current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 318 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 319 current.in = new ExplodingInputStream( 320 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 321 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 322 new BoundedInputStream(in, current.entry.getCompressedSize())); 323 } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 324 current.in = new BZip2CompressorInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 325 } 326 } 327 328 entriesRead++; 329 return current.entry; 330 } 331 332 /** 333 * Fills the given array with the first local file header and 334 * deals with splitting/spanning markers that may prefix the first 335 * LFH. 336 */ 337 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 338 readFully(lfh); 339 final ZipLong sig = new ZipLong(lfh); 340 if (sig.equals(ZipLong.DD_SIG)) { 341 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 342 } 343 344 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 345 // The archive is not really split as only one segment was 346 // needed in the end. Just skip over the marker. 347 final byte[] missedLfhBytes = new byte[4]; 348 readFully(missedLfhBytes); 349 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 350 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 351 } 352 } 353 354 /** 355 * Records whether a Zip64 extra is present and sets the size 356 * information from it if sizes are 0xFFFFFFFF and the entry 357 * doesn't use a data descriptor. 358 */ 359 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 360 final Zip64ExtendedInformationExtraField z64 = 361 (Zip64ExtendedInformationExtraField) 362 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 363 current.usesZip64 = z64 != null; 364 if (!current.hasDataDescriptor) { 365 if (z64 != null // same as current.usesZip64 but avoids NPE warning 366 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 367 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 368 current.entry.setSize(z64.getSize().getLongValue()); 369 } else { 370 current.entry.setCompressedSize(cSize.getValue()); 371 current.entry.setSize(size.getValue()); 372 } 373 } 374 } 375 376 @Override 377 public ArchiveEntry getNextEntry() throws IOException { 378 return getNextZipEntry(); 379 } 380 381 /** 382 * Whether this class is able to read the given entry. 383 * 384 * <p>May return false if it is set up to use encryption or a 385 * compression method that hasn't been implemented yet.</p> 386 * @since 1.1 387 */ 388 @Override 389 public boolean canReadEntryData(final ArchiveEntry ae) { 390 if (ae instanceof ZipArchiveEntry) { 391 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 392 return ZipUtil.canHandleEntryData(ze) 393 && supportsDataDescriptorFor(ze); 394 395 } 396 return false; 397 } 398 399 @Override 400 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 401 if (closed) { 402 throw new IOException("The stream is closed"); 403 } 404 405 if (current == null) { 406 return -1; 407 } 408 409 // avoid int overflow, check null buffer 410 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 411 throw new ArrayIndexOutOfBoundsException(); 412 } 413 414 ZipUtil.checkRequestedFeatures(current.entry); 415 if (!supportsDataDescriptorFor(current.entry)) { 416 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 417 current.entry); 418 } 419 420 int read; 421 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 422 read = readStored(buffer, offset, length); 423 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 424 read = readDeflated(buffer, offset, length); 425 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 426 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 427 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 428 read = current.in.read(buffer, offset, length); 429 } else { 430 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 431 current.entry); 432 } 433 434 if (read >= 0) { 435 current.crc.update(buffer, offset, read); 436 } 437 438 return read; 439 } 440 441 /** 442 * Implementation of read for STORED entries. 443 */ 444 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 445 446 if (current.hasDataDescriptor) { 447 if (lastStoredEntry == null) { 448 readStoredEntry(); 449 } 450 return lastStoredEntry.read(buffer, offset, length); 451 } 452 453 final long csize = current.entry.getSize(); 454 if (current.bytesRead >= csize) { 455 return -1; 456 } 457 458 if (buf.position() >= buf.limit()) { 459 buf.position(0); 460 final int l = in.read(buf.array()); 461 if (l == -1) { 462 return -1; 463 } 464 buf.limit(l); 465 466 count(l); 467 current.bytesReadFromStream += l; 468 } 469 470 int toRead = Math.min(buf.remaining(), length); 471 if ((csize - current.bytesRead) < toRead) { 472 // if it is smaller than toRead then it fits into an int 473 toRead = (int) (csize - current.bytesRead); 474 } 475 buf.get(buffer, offset, toRead); 476 current.bytesRead += toRead; 477 return toRead; 478 } 479 480 /** 481 * Implementation of read for DEFLATED entries. 482 */ 483 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 484 final int read = readFromInflater(buffer, offset, length); 485 if (read <= 0) { 486 if (inf.finished()) { 487 return -1; 488 } else if (inf.needsDictionary()) { 489 throw new ZipException("This archive needs a preset dictionary" 490 + " which is not supported by Commons" 491 + " Compress."); 492 } else if (read == -1) { 493 throw new IOException("Truncated ZIP file"); 494 } 495 } 496 return read; 497 } 498 499 /** 500 * Potentially reads more bytes to fill the inflater's buffer and 501 * reads from it. 502 */ 503 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 504 int read = 0; 505 do { 506 if (inf.needsInput()) { 507 final int l = fill(); 508 if (l > 0) { 509 current.bytesReadFromStream += buf.limit(); 510 } else if (l == -1) { 511 return -1; 512 } else { 513 break; 514 } 515 } 516 try { 517 read = inf.inflate(buffer, offset, length); 518 } catch (final DataFormatException e) { 519 throw (IOException) new ZipException(e.getMessage()).initCause(e); 520 } 521 } while (read == 0 && inf.needsInput()); 522 return read; 523 } 524 525 @Override 526 public void close() throws IOException { 527 if (!closed) { 528 closed = true; 529 try { 530 in.close(); 531 } finally { 532 inf.end(); 533 } 534 } 535 } 536 537 /** 538 * Skips over and discards value bytes of data from this input 539 * stream. 540 * 541 * <p>This implementation may end up skipping over some smaller 542 * number of bytes, possibly 0, if and only if it reaches the end 543 * of the underlying stream.</p> 544 * 545 * <p>The actual number of bytes skipped is returned.</p> 546 * 547 * @param value the number of bytes to be skipped. 548 * @return the actual number of bytes skipped. 549 * @throws IOException - if an I/O error occurs. 550 * @throws IllegalArgumentException - if value is negative. 551 */ 552 @Override 553 public long skip(final long value) throws IOException { 554 if (value >= 0) { 555 long skipped = 0; 556 while (skipped < value) { 557 final long rem = value - skipped; 558 final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 559 if (x == -1) { 560 return skipped; 561 } 562 skipped += x; 563 } 564 return skipped; 565 } 566 throw new IllegalArgumentException(); 567 } 568 569 /** 570 * Checks if the signature matches what is expected for a zip file. 571 * Does not currently handle self-extracting zips which may have arbitrary 572 * leading content. 573 * 574 * @param signature the bytes to check 575 * @param length the number of bytes to check 576 * @return true, if this stream is a zip archive stream, false otherwise 577 */ 578 public static boolean matches(final byte[] signature, final int length) { 579 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 580 return false; 581 } 582 583 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 584 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 585 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 586 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 587 } 588 589 private static boolean checksig(final byte[] signature, final byte[] expected) { 590 for (int i = 0; i < expected.length; i++) { 591 if (signature[i] != expected[i]) { 592 return false; 593 } 594 } 595 return true; 596 } 597 598 /** 599 * Closes the current ZIP archive entry and positions the underlying 600 * stream to the beginning of the next entry. All per-entry variables 601 * and data structures are cleared. 602 * <p> 603 * If the compressed size of this entry is included in the entry header, 604 * then any outstanding bytes are simply skipped from the underlying 605 * stream without uncompressing them. This allows an entry to be safely 606 * closed even if the compression method is unsupported. 607 * <p> 608 * In case we don't know the compressed size of this entry or have 609 * already buffered too much data from the underlying stream to support 610 * uncompression, then the uncompression process is completed and the 611 * end position of the stream is adjusted based on the result of that 612 * process. 613 * 614 * @throws IOException if an error occurs 615 */ 616 private void closeEntry() throws IOException { 617 if (closed) { 618 throw new IOException("The stream is closed"); 619 } 620 if (current == null) { 621 return; 622 } 623 624 // Ensure all entry bytes are read 625 if (currentEntryHasOutstandingBytes()) { 626 drainCurrentEntryData(); 627 } else { 628 // this is guaranteed to exhaust the stream 629 skip(Long.MAX_VALUE); //NOSONAR 630 631 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 632 ? getBytesInflated() : current.bytesRead; 633 634 // this is at most a single read() operation and can't 635 // exceed the range of int 636 final int diff = (int) (current.bytesReadFromStream - inB); 637 638 // Pushback any required bytes 639 if (diff > 0) { 640 pushback(buf.array(), buf.limit() - diff, diff); 641 current.bytesReadFromStream -= diff; 642 } 643 644 // Drain remainder of entry if not all data bytes were required 645 if (currentEntryHasOutstandingBytes()) { 646 drainCurrentEntryData(); 647 } 648 } 649 650 if (lastStoredEntry == null && current.hasDataDescriptor) { 651 readDataDescriptor(); 652 } 653 654 inf.reset(); 655 buf.clear().flip(); 656 current = null; 657 lastStoredEntry = null; 658 } 659 660 /** 661 * If the compressed size of the current entry is included in the entry header 662 * and there are any outstanding bytes in the underlying stream, then 663 * this returns true. 664 * 665 * @return true, if current entry is determined to have outstanding bytes, false otherwise 666 */ 667 private boolean currentEntryHasOutstandingBytes() { 668 return current.bytesReadFromStream <= current.entry.getCompressedSize() 669 && !current.hasDataDescriptor; 670 } 671 672 /** 673 * Read all data of the current entry from the underlying stream 674 * that hasn't been read, yet. 675 */ 676 private void drainCurrentEntryData() throws IOException { 677 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 678 while (remaining > 0) { 679 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 680 if (n < 0) { 681 throw new EOFException("Truncated ZIP entry: " 682 + ArchiveUtils.sanitize(current.entry.getName())); 683 } 684 count(n); 685 remaining -= n; 686 } 687 } 688 689 /** 690 * Get the number of bytes Inflater has actually processed. 691 * 692 * <p>for Java < Java7 the getBytes* methods in 693 * Inflater/Deflater seem to return unsigned ints rather than 694 * longs that start over with 0 at 2^32.</p> 695 * 696 * <p>The stream knows how many bytes it has read, but not how 697 * many the Inflater actually consumed - it should be between the 698 * total number of bytes read for the entry and the total number 699 * minus the last read operation. Here we just try to make the 700 * value close enough to the bytes we've read by assuming the 701 * number of bytes consumed must be smaller than (or equal to) the 702 * number of bytes read but not smaller by more than 2^32.</p> 703 */ 704 private long getBytesInflated() { 705 long inB = inf.getBytesRead(); 706 if (current.bytesReadFromStream >= TWO_EXP_32) { 707 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 708 inB += TWO_EXP_32; 709 } 710 } 711 return inB; 712 } 713 714 private int fill() throws IOException { 715 if (closed) { 716 throw new IOException("The stream is closed"); 717 } 718 final int length = in.read(buf.array()); 719 if (length > 0) { 720 buf.limit(length); 721 count(buf.limit()); 722 inf.setInput(buf.array(), 0, buf.limit()); 723 } 724 return length; 725 } 726 727 private void readFully(final byte[] b) throws IOException { 728 final int count = IOUtils.readFully(in, b); 729 count(count); 730 if (count < b.length) { 731 throw new EOFException(); 732 } 733 } 734 735 private void readDataDescriptor() throws IOException { 736 readFully(wordBuf); 737 ZipLong val = new ZipLong(wordBuf); 738 if (ZipLong.DD_SIG.equals(val)) { 739 // data descriptor with signature, skip sig 740 readFully(wordBuf); 741 val = new ZipLong(wordBuf); 742 } 743 current.entry.setCrc(val.getValue()); 744 745 // if there is a ZIP64 extra field, sizes are eight bytes 746 // each, otherwise four bytes each. Unfortunately some 747 // implementations - namely Java7 - use eight bytes without 748 // using a ZIP64 extra field - 749 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 750 751 // just read 16 bytes and check whether bytes nine to twelve 752 // look like one of the signatures of what could follow a data 753 // descriptor (ignoring archive decryption headers for now). 754 // If so, push back eight bytes and assume sizes are four 755 // bytes, otherwise sizes are eight bytes each. 756 readFully(twoDwordBuf); 757 final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD); 758 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 759 pushback(twoDwordBuf, DWORD, DWORD); 760 current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf)); 761 current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD)); 762 } else { 763 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf)); 764 current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD)); 765 } 766 } 767 768 /** 769 * Whether this entry requires a data descriptor this library can work with. 770 * 771 * @return true if allowStoredEntriesWithDataDescriptor is true, 772 * the entry doesn't require any data descriptor or the method is 773 * DEFLATED. 774 */ 775 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 776 return !entry.getGeneralPurposeBit().usesDataDescriptor() 777 778 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 779 || entry.getMethod() == ZipEntry.DEFLATED; 780 } 781 782 /** 783 * Caches a stored entry that uses the data descriptor. 784 * 785 * <ul> 786 * <li>Reads a stored entry until the signature of a local file 787 * header, central directory header or data descriptor has been 788 * found.</li> 789 * <li>Stores all entry data in lastStoredEntry.</p> 790 * <li>Rewinds the stream to position at the data 791 * descriptor.</li> 792 * <li>reads the data descriptor</li> 793 * </ul> 794 * 795 * <p>After calling this method the entry should know its size, 796 * the entry's data is cached and the stream is positioned at the 797 * next local file or central directory header.</p> 798 */ 799 private void readStoredEntry() throws IOException { 800 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 801 int off = 0; 802 boolean done = false; 803 804 // length of DD without signature 805 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 806 807 while (!done) { 808 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 809 if (r <= 0) { 810 // read the whole archive without ever finding a 811 // central directory 812 throw new IOException("Truncated ZIP file"); 813 } 814 if (r + off < 4) { 815 // buffer too small to check for a signature, loop 816 off += r; 817 continue; 818 } 819 820 done = bufferContainsSignature(bos, off, r, ddLen); 821 if (!done) { 822 off = cacheBytesRead(bos, off, r, ddLen); 823 } 824 } 825 826 final byte[] b = bos.toByteArray(); 827 lastStoredEntry = new ByteArrayInputStream(b); 828 } 829 830 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 831 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 832 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 833 834 /** 835 * Checks whether the current buffer contains the signature of a 836 * "data descriptor", "local file header" or 837 * "central directory entry". 838 * 839 * <p>If it contains such a signature, reads the data descriptor 840 * and positions the stream right after the data descriptor.</p> 841 */ 842 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 843 throws IOException { 844 845 boolean done = false; 846 int readTooMuch = 0; 847 for (int i = 0; !done && i < lastRead - 4; i++) { 848 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 849 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 850 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 851 // found a LFH or CFH: 852 readTooMuch = offset + lastRead - i - expectedDDLen; 853 done = true; 854 } 855 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 856 // found DD: 857 readTooMuch = offset + lastRead - i; 858 done = true; 859 } 860 if (done) { 861 // * push back bytes read in excess as well as the data 862 // descriptor 863 // * copy the remaining bytes to cache 864 // * read data descriptor 865 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 866 bos.write(buf.array(), 0, i); 867 readDataDescriptor(); 868 } 869 } 870 } 871 return done; 872 } 873 874 /** 875 * If the last read bytes could hold a data descriptor and an 876 * incomplete signature then save the last bytes to the front of 877 * the buffer and cache everything in front of the potential data 878 * descriptor into the given ByteArrayOutputStream. 879 * 880 * <p>Data descriptor plus incomplete signature (3 bytes in the 881 * worst case) can be 20 bytes max.</p> 882 */ 883 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 884 final int cacheable = offset + lastRead - expecteDDLen - 3; 885 if (cacheable > 0) { 886 bos.write(buf.array(), 0, cacheable); 887 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 888 offset = expecteDDLen + 3; 889 } else { 890 offset += lastRead; 891 } 892 return offset; 893 } 894 895 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 896 ((PushbackInputStream) in).unread(buf, offset, length); 897 pushedBackBytes(length); 898 } 899 900 // End of Central Directory Record 901 // end of central dir signature WORD 902 // number of this disk SHORT 903 // number of the disk with the 904 // start of the central directory SHORT 905 // total number of entries in the 906 // central directory on this disk SHORT 907 // total number of entries in 908 // the central directory SHORT 909 // size of the central directory WORD 910 // offset of start of central 911 // directory with respect to 912 // the starting disk number WORD 913 // .ZIP file comment length SHORT 914 // .ZIP file comment up to 64KB 915 // 916 917 /** 918 * Reads the stream until it find the "End of central directory 919 * record" and consumes it as well. 920 */ 921 private void skipRemainderOfArchive() throws IOException { 922 // skip over central directory. One LFH has been read too much 923 // already. The calculation discounts file names and extra 924 // data so it will be too short. 925 realSkip((long) entriesRead * CFH_LEN - LFH_LEN); 926 findEocdRecord(); 927 realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 928 readFully(shortBuf); 929 // file comment 930 realSkip(ZipShort.getValue(shortBuf)); 931 } 932 933 /** 934 * Reads forward until the signature of the "End of central 935 * directory" record is found. 936 */ 937 private void findEocdRecord() throws IOException { 938 int currentByte = -1; 939 boolean skipReadCall = false; 940 while (skipReadCall || (currentByte = readOneByte()) > -1) { 941 skipReadCall = false; 942 if (!isFirstByteOfEocdSig(currentByte)) { 943 continue; 944 } 945 currentByte = readOneByte(); 946 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 947 if (currentByte == -1) { 948 break; 949 } 950 skipReadCall = isFirstByteOfEocdSig(currentByte); 951 continue; 952 } 953 currentByte = readOneByte(); 954 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 955 if (currentByte == -1) { 956 break; 957 } 958 skipReadCall = isFirstByteOfEocdSig(currentByte); 959 continue; 960 } 961 currentByte = readOneByte(); 962 if (currentByte == -1 963 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 964 break; 965 } 966 skipReadCall = isFirstByteOfEocdSig(currentByte); 967 } 968 } 969 970 /** 971 * Skips bytes by reading from the underlying stream rather than 972 * the (potentially inflating) archive stream - which {@link 973 * #skip} would do. 974 * 975 * Also updates bytes-read counter. 976 */ 977 private void realSkip(final long value) throws IOException { 978 if (value >= 0) { 979 long skipped = 0; 980 while (skipped < value) { 981 final long rem = value - skipped; 982 final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 983 if (x == -1) { 984 return; 985 } 986 count(x); 987 skipped += x; 988 } 989 return; 990 } 991 throw new IllegalArgumentException(); 992 } 993 994 /** 995 * Reads bytes by reading from the underlying stream rather than 996 * the (potentially inflating) archive stream - which {@link #read} would do. 997 * 998 * Also updates bytes-read counter. 999 */ 1000 private int readOneByte() throws IOException { 1001 final int b = in.read(); 1002 if (b != -1) { 1003 count(1); 1004 } 1005 return b; 1006 } 1007 1008 private boolean isFirstByteOfEocdSig(final int b) { 1009 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 1010 } 1011 1012 /** 1013 * Structure collecting information for the entry that is 1014 * currently being read. 1015 */ 1016 private static final class CurrentEntry { 1017 1018 /** 1019 * Current ZIP entry. 1020 */ 1021 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 1022 1023 /** 1024 * Does the entry use a data descriptor? 1025 */ 1026 private boolean hasDataDescriptor; 1027 1028 /** 1029 * Does the entry have a ZIP64 extended information extra field. 1030 */ 1031 private boolean usesZip64; 1032 1033 /** 1034 * Number of bytes of entry content read by the client if the 1035 * entry is STORED. 1036 */ 1037 private long bytesRead; 1038 1039 /** 1040 * Number of bytes of entry content read so from the stream. 1041 * 1042 * <p>This may be more than the actual entry's length as some 1043 * stuff gets buffered up and needs to be pushed back when the 1044 * end of the entry has been reached.</p> 1045 */ 1046 private long bytesReadFromStream; 1047 1048 /** 1049 * The checksum calculated as the current entry is read. 1050 */ 1051 private final CRC32 crc = new CRC32(); 1052 1053 /** 1054 * The input stream decompressing the data for shrunk and imploded entries. 1055 */ 1056 private InputStream in; 1057 } 1058 1059 /** 1060 * Bounded input stream adapted from commons-io 1061 */ 1062 private class BoundedInputStream extends InputStream { 1063 1064 /** the wrapped input stream */ 1065 private final InputStream in; 1066 1067 /** the max length to provide */ 1068 private final long max; 1069 1070 /** the number of bytes already returned */ 1071 private long pos = 0; 1072 1073 /** 1074 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1075 * stream and limits it to a certain size. 1076 * 1077 * @param in The wrapped input stream 1078 * @param size The maximum number of bytes to return 1079 */ 1080 public BoundedInputStream(final InputStream in, final long size) { 1081 this.max = size; 1082 this.in = in; 1083 } 1084 1085 @Override 1086 public int read() throws IOException { 1087 if (max >= 0 && pos >= max) { 1088 return -1; 1089 } 1090 final int result = in.read(); 1091 pos++; 1092 count(1); 1093 current.bytesReadFromStream++; 1094 return result; 1095 } 1096 1097 @Override 1098 public int read(final byte[] b) throws IOException { 1099 return this.read(b, 0, b.length); 1100 } 1101 1102 @Override 1103 public int read(final byte[] b, final int off, final int len) throws IOException { 1104 if (max >= 0 && pos >= max) { 1105 return -1; 1106 } 1107 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1108 final int bytesRead = in.read(b, off, (int) maxRead); 1109 1110 if (bytesRead == -1) { 1111 return -1; 1112 } 1113 1114 pos += bytesRead; 1115 count(bytesRead); 1116 current.bytesReadFromStream += bytesRead; 1117 return bytesRead; 1118 } 1119 1120 @Override 1121 public long skip(final long n) throws IOException { 1122 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1123 final long skippedBytes = in.skip(toSkip); 1124 pos += skippedBytes; 1125 return skippedBytes; 1126 } 1127 1128 @Override 1129 public int available() throws IOException { 1130 if (max >= 0 && pos >= max) { 1131 return 0; 1132 } 1133 return in.available(); 1134 } 1135 } 1136}