001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 037import org.apache.commons.compress.utils.ArchiveUtils; 038import org.apache.commons.compress.utils.IOUtils; 039 040import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 041import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 042import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 044 045/** 046 * Implements an input stream that can read Zip archives. 047 * 048 * <p>As of Apache Commons Compress it transparently supports Zip64 049 * extensions and thus individual entries and archives larger than 4 050 * GB or with more than 65536 entries.</p> 051 * 052 * <p>The {@link ZipFile} class is preferred when reading from files 053 * as {@link ZipArchiveInputStream} is limited by not being able to 054 * read the central directory header before returning entries. In 055 * particular {@link ZipArchiveInputStream}</p> 056 * 057 * <ul> 058 * 059 * <li>may return entries that are not part of the central directory 060 * at all and shouldn't be considered part of the archive.</li> 061 * 062 * <li>may return several entries with the same name.</li> 063 * 064 * <li>will not return internal or external attributes.</li> 065 * 066 * <li>may return incomplete extra field data.</li> 067 * 068 * <li>may return unknown sizes and CRC values for entries until the 069 * next entry has been reached if the archive uses the data 070 * descriptor feature.</li> 071 * 072 * </ul> 073 * 074 * @see ZipFile 075 * @NotThreadSafe 076 */ 077public class ZipArchiveInputStream extends ArchiveInputStream { 078 079 /** The zip encoding to use for filenames and the file comment. */ 080 private final ZipEncoding zipEncoding; 081 082 // the provided encoding (for unit tests) 083 final String encoding; 084 085 /** Whether to look for and use Unicode extra fields. */ 086 private final boolean useUnicodeExtraFields; 087 088 /** Wrapped stream, will always be a PushbackInputStream. */ 089 private final InputStream in; 090 091 /** Inflater used for all deflated entries. */ 092 private final Inflater inf = new Inflater(true); 093 094 /** Buffer used to read from the wrapped stream. */ 095 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 096 097 /** The entry that is currently being read. */ 098 private CurrentEntry current = null; 099 100 /** Whether the stream has been closed. */ 101 private boolean closed = false; 102 103 /** Whether the stream has reached the central directory - and thus found all entries. */ 104 private boolean hitCentralDirectory = false; 105 106 /** 107 * When reading a stored entry that uses the data descriptor this 108 * stream has to read the full entry and caches it. This is the 109 * cache. 110 */ 111 private ByteArrayInputStream lastStoredEntry = null; 112 113 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 114 private boolean allowStoredEntriesWithDataDescriptor = false; 115 116 private static final int LFH_LEN = 30; 117 /* 118 local file header signature WORD 119 version needed to extract SHORT 120 general purpose bit flag SHORT 121 compression method SHORT 122 last mod file time SHORT 123 last mod file date SHORT 124 crc-32 WORD 125 compressed size WORD 126 uncompressed size WORD 127 file name length SHORT 128 extra field length SHORT 129 */ 130 131 private static final int CFH_LEN = 46; 132 /* 133 central file header signature WORD 134 version made by SHORT 135 version needed to extract SHORT 136 general purpose bit flag SHORT 137 compression method SHORT 138 last mod file time SHORT 139 last mod file date SHORT 140 crc-32 WORD 141 compressed size WORD 142 uncompressed size WORD 143 file name length SHORT 144 extra field length SHORT 145 file comment length SHORT 146 disk number start SHORT 147 internal file attributes SHORT 148 external file attributes WORD 149 relative offset of local header WORD 150 */ 151 152 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 153 154 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 155 private final byte[] LFH_BUF = new byte[LFH_LEN]; 156 private final byte[] SKIP_BUF = new byte[1024]; 157 private final byte[] SHORT_BUF = new byte[SHORT]; 158 private final byte[] WORD_BUF = new byte[WORD]; 159 private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD]; 160 161 private int entriesRead = 0; 162 163 /** 164 * Create an instance using UTF-8 encoding 165 * @param inputStream the stream to wrap 166 */ 167 public ZipArchiveInputStream(final InputStream inputStream) { 168 this(inputStream, ZipEncodingHelper.UTF8); 169 } 170 171 /** 172 * Create an instance using the specified encoding 173 * @param inputStream the stream to wrap 174 * @param encoding the encoding to use for file names, use null 175 * for the platform's default encoding 176 * @since 1.5 177 */ 178 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 179 this(inputStream, encoding, true); 180 } 181 182 /** 183 * Create an instance using the specified encoding 184 * @param inputStream the stream to wrap 185 * @param encoding the encoding to use for file names, use null 186 * for the platform's default encoding 187 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 188 * Extra Fields (if present) to set the file names. 189 */ 190 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 191 this(inputStream, encoding, useUnicodeExtraFields, false); 192 } 193 194 /** 195 * Create an instance using the specified encoding 196 * @param inputStream the stream to wrap 197 * @param encoding the encoding to use for file names, use null 198 * for the platform's default encoding 199 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 200 * Extra Fields (if present) to set the file names. 201 * @param allowStoredEntriesWithDataDescriptor whether the stream 202 * will try to read STORED entries that use a data descriptor 203 * @since 1.1 204 */ 205 public ZipArchiveInputStream(final InputStream inputStream, 206 final String encoding, 207 final boolean useUnicodeExtraFields, 208 final boolean allowStoredEntriesWithDataDescriptor) { 209 this.encoding = encoding; 210 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 211 this.useUnicodeExtraFields = useUnicodeExtraFields; 212 in = new PushbackInputStream(inputStream, buf.capacity()); 213 this.allowStoredEntriesWithDataDescriptor = 214 allowStoredEntriesWithDataDescriptor; 215 // haven't read anything so far 216 buf.limit(0); 217 } 218 219 public ZipArchiveEntry getNextZipEntry() throws IOException { 220 boolean firstEntry = true; 221 if (closed || hitCentralDirectory) { 222 return null; 223 } 224 if (current != null) { 225 closeEntry(); 226 firstEntry = false; 227 } 228 229 try { 230 if (firstEntry) { 231 // split archives have a special signature before the 232 // first local file header - look for it and fail with 233 // the appropriate error message if this is a split 234 // archive. 235 readFirstLocalFileHeader(LFH_BUF); 236 } else { 237 readFully(LFH_BUF); 238 } 239 } catch (final EOFException e) { 240 return null; 241 } 242 243 final ZipLong sig = new ZipLong(LFH_BUF); 244 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 245 hitCentralDirectory = true; 246 skipRemainderOfArchive(); 247 } 248 if (!sig.equals(ZipLong.LFH_SIG)) { 249 return null; 250 } 251 252 int off = WORD; 253 current = new CurrentEntry(); 254 255 final int versionMadeBy = ZipShort.getValue(LFH_BUF, off); 256 off += SHORT; 257 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 258 259 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off); 260 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 261 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 262 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 263 current.entry.setGeneralPurposeBit(gpFlag); 264 265 off += SHORT; 266 267 current.entry.setMethod(ZipShort.getValue(LFH_BUF, off)); 268 off += SHORT; 269 270 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off)); 271 current.entry.setTime(time); 272 off += WORD; 273 274 ZipLong size = null, cSize = null; 275 if (!current.hasDataDescriptor) { 276 current.entry.setCrc(ZipLong.getValue(LFH_BUF, off)); 277 off += WORD; 278 279 cSize = new ZipLong(LFH_BUF, off); 280 off += WORD; 281 282 size = new ZipLong(LFH_BUF, off); 283 off += WORD; 284 } else { 285 off += 3 * WORD; 286 } 287 288 final int fileNameLen = ZipShort.getValue(LFH_BUF, off); 289 290 off += SHORT; 291 292 final int extraLen = ZipShort.getValue(LFH_BUF, off); 293 off += SHORT; 294 295 final byte[] fileName = new byte[fileNameLen]; 296 readFully(fileName); 297 current.entry.setName(entryEncoding.decode(fileName), fileName); 298 299 final byte[] extraData = new byte[extraLen]; 300 readFully(extraData); 301 current.entry.setExtra(extraData); 302 303 if (!hasUTF8Flag && useUnicodeExtraFields) { 304 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 305 } 306 307 processZip64Extra(size, cSize); 308 309 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 310 if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 311 current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 312 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 313 current.in = new ExplodingInputStream( 314 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 315 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 316 new BoundedInputStream(in, current.entry.getCompressedSize())); 317 } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 318 current.in = new BZip2CompressorInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 319 } 320 } 321 322 entriesRead++; 323 return current.entry; 324 } 325 326 /** 327 * Fills the given array with the first local file header and 328 * deals with splitting/spanning markers that may prefix the first 329 * LFH. 330 */ 331 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 332 readFully(lfh); 333 final ZipLong sig = new ZipLong(lfh); 334 if (sig.equals(ZipLong.DD_SIG)) { 335 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 336 } 337 338 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 339 // The archive is not really split as only one segment was 340 // needed in the end. Just skip over the marker. 341 final byte[] missedLfhBytes = new byte[4]; 342 readFully(missedLfhBytes); 343 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 344 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 345 } 346 } 347 348 /** 349 * Records whether a Zip64 extra is present and sets the size 350 * information from it if sizes are 0xFFFFFFFF and the entry 351 * doesn't use a data descriptor. 352 */ 353 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 354 final Zip64ExtendedInformationExtraField z64 = 355 (Zip64ExtendedInformationExtraField) 356 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 357 current.usesZip64 = z64 != null; 358 if (!current.hasDataDescriptor) { 359 if (z64 != null // same as current.usesZip64 but avoids NPE warning 360 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 361 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 362 current.entry.setSize(z64.getSize().getLongValue()); 363 } else { 364 current.entry.setCompressedSize(cSize.getValue()); 365 current.entry.setSize(size.getValue()); 366 } 367 } 368 } 369 370 @Override 371 public ArchiveEntry getNextEntry() throws IOException { 372 return getNextZipEntry(); 373 } 374 375 /** 376 * Whether this class is able to read the given entry. 377 * 378 * <p>May return false if it is set up to use encryption or a 379 * compression method that hasn't been implemented yet.</p> 380 * @since 1.1 381 */ 382 @Override 383 public boolean canReadEntryData(final ArchiveEntry ae) { 384 if (ae instanceof ZipArchiveEntry) { 385 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 386 return ZipUtil.canHandleEntryData(ze) 387 && supportsDataDescriptorFor(ze); 388 389 } 390 return false; 391 } 392 393 @Override 394 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 395 if (closed) { 396 throw new IOException("The stream is closed"); 397 } 398 399 if (current == null) { 400 return -1; 401 } 402 403 // avoid int overflow, check null buffer 404 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 405 throw new ArrayIndexOutOfBoundsException(); 406 } 407 408 ZipUtil.checkRequestedFeatures(current.entry); 409 if (!supportsDataDescriptorFor(current.entry)) { 410 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 411 current.entry); 412 } 413 414 int read; 415 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 416 read = readStored(buffer, offset, length); 417 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 418 read = readDeflated(buffer, offset, length); 419 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 420 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 421 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 422 read = current.in.read(buffer, offset, length); 423 } else { 424 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 425 current.entry); 426 } 427 428 if (read >= 0) { 429 current.crc.update(buffer, offset, read); 430 } 431 432 return read; 433 } 434 435 /** 436 * Implementation of read for STORED entries. 437 */ 438 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 439 440 if (current.hasDataDescriptor) { 441 if (lastStoredEntry == null) { 442 readStoredEntry(); 443 } 444 return lastStoredEntry.read(buffer, offset, length); 445 } 446 447 final long csize = current.entry.getSize(); 448 if (current.bytesRead >= csize) { 449 return -1; 450 } 451 452 if (buf.position() >= buf.limit()) { 453 buf.position(0); 454 final int l = in.read(buf.array()); 455 if (l == -1) { 456 return -1; 457 } 458 buf.limit(l); 459 460 count(l); 461 current.bytesReadFromStream += l; 462 } 463 464 int toRead = Math.min(buf.remaining(), length); 465 if ((csize - current.bytesRead) < toRead) { 466 // if it is smaller than toRead then it fits into an int 467 toRead = (int) (csize - current.bytesRead); 468 } 469 buf.get(buffer, offset, toRead); 470 current.bytesRead += toRead; 471 return toRead; 472 } 473 474 /** 475 * Implementation of read for DEFLATED entries. 476 */ 477 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 478 final int read = readFromInflater(buffer, offset, length); 479 if (read <= 0) { 480 if (inf.finished()) { 481 return -1; 482 } else if (inf.needsDictionary()) { 483 throw new ZipException("This archive needs a preset dictionary" 484 + " which is not supported by Commons" 485 + " Compress."); 486 } else if (read == -1) { 487 throw new IOException("Truncated ZIP file"); 488 } 489 } 490 return read; 491 } 492 493 /** 494 * Potentially reads more bytes to fill the inflater's buffer and 495 * reads from it. 496 */ 497 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 498 int read = 0; 499 do { 500 if (inf.needsInput()) { 501 final int l = fill(); 502 if (l > 0) { 503 current.bytesReadFromStream += buf.limit(); 504 } else if (l == -1) { 505 return -1; 506 } else { 507 break; 508 } 509 } 510 try { 511 read = inf.inflate(buffer, offset, length); 512 } catch (final DataFormatException e) { 513 throw (IOException) new ZipException(e.getMessage()).initCause(e); 514 } 515 } while (read == 0 && inf.needsInput()); 516 return read; 517 } 518 519 @Override 520 public void close() throws IOException { 521 if (!closed) { 522 closed = true; 523 try { 524 in.close(); 525 } finally { 526 inf.end(); 527 } 528 } 529 } 530 531 /** 532 * Skips over and discards value bytes of data from this input 533 * stream. 534 * 535 * <p>This implementation may end up skipping over some smaller 536 * number of bytes, possibly 0, if and only if it reaches the end 537 * of the underlying stream.</p> 538 * 539 * <p>The actual number of bytes skipped is returned.</p> 540 * 541 * @param value the number of bytes to be skipped. 542 * @return the actual number of bytes skipped. 543 * @throws IOException - if an I/O error occurs. 544 * @throws IllegalArgumentException - if value is negative. 545 */ 546 @Override 547 public long skip(final long value) throws IOException { 548 if (value >= 0) { 549 long skipped = 0; 550 while (skipped < value) { 551 final long rem = value - skipped; 552 final int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 553 if (x == -1) { 554 return skipped; 555 } 556 skipped += x; 557 } 558 return skipped; 559 } 560 throw new IllegalArgumentException(); 561 } 562 563 /** 564 * Checks if the signature matches what is expected for a zip file. 565 * Does not currently handle self-extracting zips which may have arbitrary 566 * leading content. 567 * 568 * @param signature the bytes to check 569 * @param length the number of bytes to check 570 * @return true, if this stream is a zip archive stream, false otherwise 571 */ 572 public static boolean matches(final byte[] signature, final int length) { 573 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 574 return false; 575 } 576 577 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 578 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 579 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 580 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 581 } 582 583 private static boolean checksig(final byte[] signature, final byte[] expected) { 584 for (int i = 0; i < expected.length; i++) { 585 if (signature[i] != expected[i]) { 586 return false; 587 } 588 } 589 return true; 590 } 591 592 /** 593 * Closes the current ZIP archive entry and positions the underlying 594 * stream to the beginning of the next entry. All per-entry variables 595 * and data structures are cleared. 596 * <p> 597 * If the compressed size of this entry is included in the entry header, 598 * then any outstanding bytes are simply skipped from the underlying 599 * stream without uncompressing them. This allows an entry to be safely 600 * closed even if the compression method is unsupported. 601 * <p> 602 * In case we don't know the compressed size of this entry or have 603 * already buffered too much data from the underlying stream to support 604 * uncompression, then the uncompression process is completed and the 605 * end position of the stream is adjusted based on the result of that 606 * process. 607 * 608 * @throws IOException if an error occurs 609 */ 610 private void closeEntry() throws IOException { 611 if (closed) { 612 throw new IOException("The stream is closed"); 613 } 614 if (current == null) { 615 return; 616 } 617 618 // Ensure all entry bytes are read 619 if (current.bytesReadFromStream <= current.entry.getCompressedSize() 620 && !current.hasDataDescriptor) { 621 drainCurrentEntryData(); 622 } else { 623 skip(Long.MAX_VALUE); 624 625 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 626 ? getBytesInflated() : current.bytesRead; 627 628 // this is at most a single read() operation and can't 629 // exceed the range of int 630 final int diff = (int) (current.bytesReadFromStream - inB); 631 632 // Pushback any required bytes 633 if (diff > 0) { 634 pushback(buf.array(), buf.limit() - diff, diff); 635 } 636 } 637 638 if (lastStoredEntry == null && current.hasDataDescriptor) { 639 readDataDescriptor(); 640 } 641 642 inf.reset(); 643 buf.clear().flip(); 644 current = null; 645 lastStoredEntry = null; 646 } 647 648 /** 649 * Read all data of the current entry from the underlying stream 650 * that hasn't been read, yet. 651 */ 652 private void drainCurrentEntryData() throws IOException { 653 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 654 while (remaining > 0) { 655 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 656 if (n < 0) { 657 throw new EOFException("Truncated ZIP entry: " 658 + ArchiveUtils.sanitize(current.entry.getName())); 659 } 660 count(n); 661 remaining -= n; 662 } 663 } 664 665 /** 666 * Get the number of bytes Inflater has actually processed. 667 * 668 * <p>for Java < Java7 the getBytes* methods in 669 * Inflater/Deflater seem to return unsigned ints rather than 670 * longs that start over with 0 at 2^32.</p> 671 * 672 * <p>The stream knows how many bytes it has read, but not how 673 * many the Inflater actually consumed - it should be between the 674 * total number of bytes read for the entry and the total number 675 * minus the last read operation. Here we just try to make the 676 * value close enough to the bytes we've read by assuming the 677 * number of bytes consumed must be smaller than (or equal to) the 678 * number of bytes read but not smaller by more than 2^32.</p> 679 */ 680 private long getBytesInflated() { 681 long inB = inf.getBytesRead(); 682 if (current.bytesReadFromStream >= TWO_EXP_32) { 683 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 684 inB += TWO_EXP_32; 685 } 686 } 687 return inB; 688 } 689 690 private int fill() throws IOException { 691 if (closed) { 692 throw new IOException("The stream is closed"); 693 } 694 final int length = in.read(buf.array()); 695 if (length > 0) { 696 buf.limit(length); 697 count(buf.limit()); 698 inf.setInput(buf.array(), 0, buf.limit()); 699 } 700 return length; 701 } 702 703 private void readFully(final byte[] b) throws IOException { 704 final int count = IOUtils.readFully(in, b); 705 count(count); 706 if (count < b.length) { 707 throw new EOFException(); 708 } 709 } 710 711 private void readDataDescriptor() throws IOException { 712 readFully(WORD_BUF); 713 ZipLong val = new ZipLong(WORD_BUF); 714 if (ZipLong.DD_SIG.equals(val)) { 715 // data descriptor with signature, skip sig 716 readFully(WORD_BUF); 717 val = new ZipLong(WORD_BUF); 718 } 719 current.entry.setCrc(val.getValue()); 720 721 // if there is a ZIP64 extra field, sizes are eight bytes 722 // each, otherwise four bytes each. Unfortunately some 723 // implementations - namely Java7 - use eight bytes without 724 // using a ZIP64 extra field - 725 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 726 727 // just read 16 bytes and check whether bytes nine to twelve 728 // look like one of the signatures of what could follow a data 729 // descriptor (ignoring archive decryption headers for now). 730 // If so, push back eight bytes and assume sizes are four 731 // bytes, otherwise sizes are eight bytes each. 732 readFully(TWO_DWORD_BUF); 733 final ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD); 734 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 735 pushback(TWO_DWORD_BUF, DWORD, DWORD); 736 current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF)); 737 current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD)); 738 } else { 739 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF)); 740 current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD)); 741 } 742 } 743 744 /** 745 * Whether this entry requires a data descriptor this library can work with. 746 * 747 * @return true if allowStoredEntriesWithDataDescriptor is true, 748 * the entry doesn't require any data descriptor or the method is 749 * DEFLATED. 750 */ 751 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 752 return !entry.getGeneralPurposeBit().usesDataDescriptor() 753 754 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 755 || entry.getMethod() == ZipEntry.DEFLATED; 756 } 757 758 /** 759 * Caches a stored entry that uses the data descriptor. 760 * 761 * <ul> 762 * <li>Reads a stored entry until the signature of a local file 763 * header, central directory header or data descriptor has been 764 * found.</li> 765 * <li>Stores all entry data in lastStoredEntry.</p> 766 * <li>Rewinds the stream to position at the data 767 * descriptor.</li> 768 * <li>reads the data descriptor</li> 769 * </ul> 770 * 771 * <p>After calling this method the entry should know its size, 772 * the entry's data is cached and the stream is positioned at the 773 * next local file or central directory header.</p> 774 */ 775 private void readStoredEntry() throws IOException { 776 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 777 int off = 0; 778 boolean done = false; 779 780 // length of DD without signature 781 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 782 783 while (!done) { 784 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 785 if (r <= 0) { 786 // read the whole archive without ever finding a 787 // central directory 788 throw new IOException("Truncated ZIP file"); 789 } 790 if (r + off < 4) { 791 // buffer too small to check for a signature, loop 792 off += r; 793 continue; 794 } 795 796 done = bufferContainsSignature(bos, off, r, ddLen); 797 if (!done) { 798 off = cacheBytesRead(bos, off, r, ddLen); 799 } 800 } 801 802 final byte[] b = bos.toByteArray(); 803 lastStoredEntry = new ByteArrayInputStream(b); 804 } 805 806 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 807 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 808 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 809 810 /** 811 * Checks whether the current buffer contains the signature of a 812 * "data descriptor", "local file header" or 813 * "central directory entry". 814 * 815 * <p>If it contains such a signature, reads the data descriptor 816 * and positions the stream right after the data descriptor.</p> 817 */ 818 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 819 throws IOException { 820 821 boolean done = false; 822 int readTooMuch = 0; 823 for (int i = 0; !done && i < lastRead - 4; i++) { 824 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 825 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 826 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 827 // found a LFH or CFH: 828 readTooMuch = offset + lastRead - i - expectedDDLen; 829 done = true; 830 } 831 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 832 // found DD: 833 readTooMuch = offset + lastRead - i; 834 done = true; 835 } 836 if (done) { 837 // * push back bytes read in excess as well as the data 838 // descriptor 839 // * copy the remaining bytes to cache 840 // * read data descriptor 841 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 842 bos.write(buf.array(), 0, i); 843 readDataDescriptor(); 844 } 845 } 846 } 847 return done; 848 } 849 850 /** 851 * If the last read bytes could hold a data descriptor and an 852 * incomplete signature then save the last bytes to the front of 853 * the buffer and cache everything in front of the potential data 854 * descriptor into the given ByteArrayOutputStream. 855 * 856 * <p>Data descriptor plus incomplete signature (3 bytes in the 857 * worst case) can be 20 bytes max.</p> 858 */ 859 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 860 final int cacheable = offset + lastRead - expecteDDLen - 3; 861 if (cacheable > 0) { 862 bos.write(buf.array(), 0, cacheable); 863 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 864 offset = expecteDDLen + 3; 865 } else { 866 offset += lastRead; 867 } 868 return offset; 869 } 870 871 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 872 ((PushbackInputStream) in).unread(buf, offset, length); 873 pushedBackBytes(length); 874 } 875 876 // End of Central Directory Record 877 // end of central dir signature WORD 878 // number of this disk SHORT 879 // number of the disk with the 880 // start of the central directory SHORT 881 // total number of entries in the 882 // central directory on this disk SHORT 883 // total number of entries in 884 // the central directory SHORT 885 // size of the central directory WORD 886 // offset of start of central 887 // directory with respect to 888 // the starting disk number WORD 889 // .ZIP file comment length SHORT 890 // .ZIP file comment up to 64KB 891 // 892 893 /** 894 * Reads the stream until it find the "End of central directory 895 * record" and consumes it as well. 896 */ 897 private void skipRemainderOfArchive() throws IOException { 898 // skip over central directory. One LFH has been read too much 899 // already. The calculation discounts file names and extra 900 // data so it will be too short. 901 realSkip(entriesRead * CFH_LEN - LFH_LEN); 902 findEocdRecord(); 903 realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 904 readFully(SHORT_BUF); 905 // file comment 906 realSkip(ZipShort.getValue(SHORT_BUF)); 907 } 908 909 /** 910 * Reads forward until the signature of the "End of central 911 * directory" record is found. 912 */ 913 private void findEocdRecord() throws IOException { 914 int currentByte = -1; 915 boolean skipReadCall = false; 916 while (skipReadCall || (currentByte = readOneByte()) > -1) { 917 skipReadCall = false; 918 if (!isFirstByteOfEocdSig(currentByte)) { 919 continue; 920 } 921 currentByte = readOneByte(); 922 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 923 if (currentByte == -1) { 924 break; 925 } 926 skipReadCall = isFirstByteOfEocdSig(currentByte); 927 continue; 928 } 929 currentByte = readOneByte(); 930 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 931 if (currentByte == -1) { 932 break; 933 } 934 skipReadCall = isFirstByteOfEocdSig(currentByte); 935 continue; 936 } 937 currentByte = readOneByte(); 938 if (currentByte == -1 939 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 940 break; 941 } 942 skipReadCall = isFirstByteOfEocdSig(currentByte); 943 } 944 } 945 946 /** 947 * Skips bytes by reading from the underlying stream rather than 948 * the (potentially inflating) archive stream - which {@link 949 * #skip} would do. 950 * 951 * Also updates bytes-read counter. 952 */ 953 private void realSkip(final long value) throws IOException { 954 if (value >= 0) { 955 long skipped = 0; 956 while (skipped < value) { 957 final long rem = value - skipped; 958 final int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 959 if (x == -1) { 960 return; 961 } 962 count(x); 963 skipped += x; 964 } 965 return; 966 } 967 throw new IllegalArgumentException(); 968 } 969 970 /** 971 * Reads bytes by reading from the underlying stream rather than 972 * the (potentially inflating) archive stream - which {@link #read} would do. 973 * 974 * Also updates bytes-read counter. 975 */ 976 private int readOneByte() throws IOException { 977 final int b = in.read(); 978 if (b != -1) { 979 count(1); 980 } 981 return b; 982 } 983 984 private boolean isFirstByteOfEocdSig(final int b) { 985 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 986 } 987 988 /** 989 * Structure collecting information for the entry that is 990 * currently being read. 991 */ 992 private static final class CurrentEntry { 993 994 /** 995 * Current ZIP entry. 996 */ 997 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 998 999 /** 1000 * Does the entry use a data descriptor? 1001 */ 1002 private boolean hasDataDescriptor; 1003 1004 /** 1005 * Does the entry have a ZIP64 extended information extra field. 1006 */ 1007 private boolean usesZip64; 1008 1009 /** 1010 * Number of bytes of entry content read by the client if the 1011 * entry is STORED. 1012 */ 1013 private long bytesRead; 1014 1015 /** 1016 * Number of bytes of entry content read so from the stream. 1017 * 1018 * <p>This may be more than the actual entry's length as some 1019 * stuff gets buffered up and needs to be pushed back when the 1020 * end of the entry has been reached.</p> 1021 */ 1022 private long bytesReadFromStream; 1023 1024 /** 1025 * The checksum calculated as the current entry is read. 1026 */ 1027 private final CRC32 crc = new CRC32(); 1028 1029 /** 1030 * The input stream decompressing the data for shrunk and imploded entries. 1031 */ 1032 private InputStream in; 1033 } 1034 1035 /** 1036 * Bounded input stream adapted from commons-io 1037 */ 1038 private class BoundedInputStream extends InputStream { 1039 1040 /** the wrapped input stream */ 1041 private final InputStream in; 1042 1043 /** the max length to provide */ 1044 private final long max; 1045 1046 /** the number of bytes already returned */ 1047 private long pos = 0; 1048 1049 /** 1050 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1051 * stream and limits it to a certain size. 1052 * 1053 * @param in The wrapped input stream 1054 * @param size The maximum number of bytes to return 1055 */ 1056 public BoundedInputStream(final InputStream in, final long size) { 1057 this.max = size; 1058 this.in = in; 1059 } 1060 1061 @Override 1062 public int read() throws IOException { 1063 if (max >= 0 && pos >= max) { 1064 return -1; 1065 } 1066 final int result = in.read(); 1067 pos++; 1068 count(1); 1069 current.bytesReadFromStream++; 1070 return result; 1071 } 1072 1073 @Override 1074 public int read(final byte[] b) throws IOException { 1075 return this.read(b, 0, b.length); 1076 } 1077 1078 @Override 1079 public int read(final byte[] b, final int off, final int len) throws IOException { 1080 if (max >= 0 && pos >= max) { 1081 return -1; 1082 } 1083 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1084 final int bytesRead = in.read(b, off, (int) maxRead); 1085 1086 if (bytesRead == -1) { 1087 return -1; 1088 } 1089 1090 pos += bytesRead; 1091 count(bytesRead); 1092 current.bytesReadFromStream += bytesRead; 1093 return bytesRead; 1094 } 1095 1096 @Override 1097 public long skip(final long n) throws IOException { 1098 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1099 final long skippedBytes = in.skip(toSkip); 1100 pos += skippedBytes; 1101 return skippedBytes; 1102 } 1103 1104 @Override 1105 public int available() throws IOException { 1106 if (max >= 0 && pos >= max) { 1107 return 0; 1108 } 1109 return in.available(); 1110 } 1111 } 1112}