001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 037import org.apache.commons.compress.utils.IOUtils; 038 039import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 040import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 041import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 042import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 043 044/** 045 * Implements an input stream that can read Zip archives. 046 * 047 * <p>As of Apache Commons Compress it transparently supports Zip64 048 * extensions and thus individual entries and archives larger than 4 049 * GB or with more than 65536 entries.</p> 050 * 051 * <p>The {@link ZipFile} class is preferred when reading from files 052 * as {@link ZipArchiveInputStream} is limited by not being able to 053 * read the central directory header before returning entries. In 054 * particular {@link ZipArchiveInputStream}</p> 055 * 056 * <ul> 057 * 058 * <li>may return entries that are not part of the central directory 059 * at all and shouldn't be considered part of the archive.</li> 060 * 061 * <li>may return several entries with the same name.</li> 062 * 063 * <li>will not return internal or external attributes.</li> 064 * 065 * <li>may return incomplete extra field data.</li> 066 * 067 * <li>may return unknown sizes and CRC values for entries until the 068 * next entry has been reached if the archive uses the data 069 * descriptor feature.</li> 070 * 071 * </ul> 072 * 073 * @see ZipFile 074 * @NotThreadSafe 075 */ 076public class ZipArchiveInputStream extends ArchiveInputStream { 077 078 /** The zip encoding to use for filenames and the file comment. */ 079 private final ZipEncoding zipEncoding; 080 081 // the provided encoding (for unit tests) 082 final String encoding; 083 084 /** Whether to look for and use Unicode extra fields. */ 085 private final boolean useUnicodeExtraFields; 086 087 /** Wrapped stream, will always be a PushbackInputStream. */ 088 private final InputStream in; 089 090 /** Inflater used for all deflated entries. */ 091 private final Inflater inf = new Inflater(true); 092 093 /** Buffer used to read from the wrapped stream. */ 094 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 095 096 /** The entry that is currently being read. */ 097 private CurrentEntry current = null; 098 099 /** Whether the stream has been closed. */ 100 private boolean closed = false; 101 102 /** Whether the stream has reached the central directory - and thus found all entries. */ 103 private boolean hitCentralDirectory = false; 104 105 /** 106 * When reading a stored entry that uses the data descriptor this 107 * stream has to read the full entry and caches it. This is the 108 * cache. 109 */ 110 private ByteArrayInputStream lastStoredEntry = null; 111 112 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 113 private boolean allowStoredEntriesWithDataDescriptor = false; 114 115 private static final int LFH_LEN = 30; 116 /* 117 local file header signature WORD 118 version needed to extract SHORT 119 general purpose bit flag SHORT 120 compression method SHORT 121 last mod file time SHORT 122 last mod file date SHORT 123 crc-32 WORD 124 compressed size WORD 125 uncompressed size WORD 126 file name length SHORT 127 extra field length SHORT 128 */ 129 130 private static final int CFH_LEN = 46; 131 /* 132 central file header signature WORD 133 version made by SHORT 134 version needed to extract SHORT 135 general purpose bit flag SHORT 136 compression method SHORT 137 last mod file time SHORT 138 last mod file date SHORT 139 crc-32 WORD 140 compressed size WORD 141 uncompressed size WORD 142 file name length SHORT 143 extra field length SHORT 144 file comment length SHORT 145 disk number start SHORT 146 internal file attributes SHORT 147 external file attributes WORD 148 relative offset of local header WORD 149 */ 150 151 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 152 153 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 154 private final byte[] LFH_BUF = new byte[LFH_LEN]; 155 private final byte[] SKIP_BUF = new byte[1024]; 156 private final byte[] SHORT_BUF = new byte[SHORT]; 157 private final byte[] WORD_BUF = new byte[WORD]; 158 private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD]; 159 160 private int entriesRead = 0; 161 162 /** 163 * Create an instance using UTF-8 encoding 164 * @param inputStream the stream to wrap 165 */ 166 public ZipArchiveInputStream(InputStream inputStream) { 167 this(inputStream, ZipEncodingHelper.UTF8); 168 } 169 170 /** 171 * Create an instance using the specified encoding 172 * @param inputStream the stream to wrap 173 * @param encoding the encoding to use for file names, use null 174 * for the platform's default encoding 175 * @since 1.5 176 */ 177 public ZipArchiveInputStream(InputStream inputStream, String encoding) { 178 this(inputStream, encoding, true); 179 } 180 181 /** 182 * Create an instance using the specified encoding 183 * @param inputStream the stream to wrap 184 * @param encoding the encoding to use for file names, use null 185 * for the platform's default encoding 186 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 187 * Extra Fields (if present) to set the file names. 188 */ 189 public ZipArchiveInputStream(InputStream inputStream, String encoding, boolean useUnicodeExtraFields) { 190 this(inputStream, encoding, useUnicodeExtraFields, false); 191 } 192 193 /** 194 * Create an instance using the specified encoding 195 * @param inputStream the stream to wrap 196 * @param encoding the encoding to use for file names, use null 197 * for the platform's default encoding 198 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 199 * Extra Fields (if present) to set the file names. 200 * @param allowStoredEntriesWithDataDescriptor whether the stream 201 * will try to read STORED entries that use a data descriptor 202 * @since 1.1 203 */ 204 public ZipArchiveInputStream(InputStream inputStream, 205 String encoding, 206 boolean useUnicodeExtraFields, 207 boolean allowStoredEntriesWithDataDescriptor) { 208 this.encoding = encoding; 209 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 210 this.useUnicodeExtraFields = useUnicodeExtraFields; 211 in = new PushbackInputStream(inputStream, buf.capacity()); 212 this.allowStoredEntriesWithDataDescriptor = 213 allowStoredEntriesWithDataDescriptor; 214 // haven't read anything so far 215 buf.limit(0); 216 } 217 218 public ZipArchiveEntry getNextZipEntry() throws IOException { 219 boolean firstEntry = true; 220 if (closed || hitCentralDirectory) { 221 return null; 222 } 223 if (current != null) { 224 closeEntry(); 225 firstEntry = false; 226 } 227 228 try { 229 if (firstEntry) { 230 // split archives have a special signature before the 231 // first local file header - look for it and fail with 232 // the appropriate error message if this is a split 233 // archive. 234 readFirstLocalFileHeader(LFH_BUF); 235 } else { 236 readFully(LFH_BUF); 237 } 238 } catch (EOFException e) { 239 return null; 240 } 241 242 ZipLong sig = new ZipLong(LFH_BUF); 243 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 244 hitCentralDirectory = true; 245 skipRemainderOfArchive(); 246 } 247 if (!sig.equals(ZipLong.LFH_SIG)) { 248 return null; 249 } 250 251 int off = WORD; 252 current = new CurrentEntry(); 253 254 int versionMadeBy = ZipShort.getValue(LFH_BUF, off); 255 off += SHORT; 256 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 257 258 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off); 259 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 260 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 261 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 262 current.entry.setGeneralPurposeBit(gpFlag); 263 264 off += SHORT; 265 266 current.entry.setMethod(ZipShort.getValue(LFH_BUF, off)); 267 off += SHORT; 268 269 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off)); 270 current.entry.setTime(time); 271 off += WORD; 272 273 ZipLong size = null, cSize = null; 274 if (!current.hasDataDescriptor) { 275 current.entry.setCrc(ZipLong.getValue(LFH_BUF, off)); 276 off += WORD; 277 278 cSize = new ZipLong(LFH_BUF, off); 279 off += WORD; 280 281 size = new ZipLong(LFH_BUF, off); 282 off += WORD; 283 } else { 284 off += 3 * WORD; 285 } 286 287 int fileNameLen = ZipShort.getValue(LFH_BUF, off); 288 289 off += SHORT; 290 291 int extraLen = ZipShort.getValue(LFH_BUF, off); 292 off += SHORT; 293 294 byte[] fileName = new byte[fileNameLen]; 295 readFully(fileName); 296 current.entry.setName(entryEncoding.decode(fileName), fileName); 297 298 byte[] extraData = new byte[extraLen]; 299 readFully(extraData); 300 current.entry.setExtra(extraData); 301 302 if (!hasUTF8Flag && useUnicodeExtraFields) { 303 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 304 } 305 306 processZip64Extra(size, cSize); 307 308 if (current.entry.getCompressedSize() != ZipArchiveEntry.SIZE_UNKNOWN) { 309 if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 310 current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 311 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 312 current.in = new ExplodingInputStream( 313 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 314 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 315 new BoundedInputStream(in, current.entry.getCompressedSize())); 316 } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 317 current.in = new BZip2CompressorInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 318 } 319 } 320 321 entriesRead++; 322 return current.entry; 323 } 324 325 /** 326 * Fills the given array with the first local file header and 327 * deals with splitting/spanning markers that may prefix the first 328 * LFH. 329 */ 330 private void readFirstLocalFileHeader(byte[] lfh) throws IOException { 331 readFully(lfh); 332 ZipLong sig = new ZipLong(lfh); 333 if (sig.equals(ZipLong.DD_SIG)) { 334 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 335 } 336 337 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 338 // The archive is not really split as only one segment was 339 // needed in the end. Just skip over the marker. 340 byte[] missedLfhBytes = new byte[4]; 341 readFully(missedLfhBytes); 342 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 343 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 344 } 345 } 346 347 /** 348 * Records whether a Zip64 extra is present and sets the size 349 * information from it if sizes are 0xFFFFFFFF and the entry 350 * doesn't use a data descriptor. 351 */ 352 private void processZip64Extra(ZipLong size, ZipLong cSize) { 353 Zip64ExtendedInformationExtraField z64 = 354 (Zip64ExtendedInformationExtraField) 355 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 356 current.usesZip64 = z64 != null; 357 if (!current.hasDataDescriptor) { 358 if (z64 != null // same as current.usesZip64 but avoids NPE warning 359 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 360 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 361 current.entry.setSize(z64.getSize().getLongValue()); 362 } else { 363 current.entry.setCompressedSize(cSize.getValue()); 364 current.entry.setSize(size.getValue()); 365 } 366 } 367 } 368 369 @Override 370 public ArchiveEntry getNextEntry() throws IOException { 371 return getNextZipEntry(); 372 } 373 374 /** 375 * Whether this class is able to read the given entry. 376 * 377 * <p>May return false if it is set up to use encryption or a 378 * compression method that hasn't been implemented yet.</p> 379 * @since 1.1 380 */ 381 @Override 382 public boolean canReadEntryData(ArchiveEntry ae) { 383 if (ae instanceof ZipArchiveEntry) { 384 ZipArchiveEntry ze = (ZipArchiveEntry) ae; 385 return ZipUtil.canHandleEntryData(ze) 386 && supportsDataDescriptorFor(ze); 387 388 } 389 return false; 390 } 391 392 @Override 393 public int read(byte[] buffer, int offset, int length) throws IOException { 394 if (closed) { 395 throw new IOException("The stream is closed"); 396 } 397 398 if (current == null) { 399 return -1; 400 } 401 402 // avoid int overflow, check null buffer 403 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 404 throw new ArrayIndexOutOfBoundsException(); 405 } 406 407 ZipUtil.checkRequestedFeatures(current.entry); 408 if (!supportsDataDescriptorFor(current.entry)) { 409 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 410 current.entry); 411 } 412 413 int read; 414 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 415 read = readStored(buffer, offset, length); 416 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 417 read = readDeflated(buffer, offset, length); 418 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 419 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 420 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 421 read = current.in.read(buffer, offset, length); 422 } else { 423 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 424 current.entry); 425 } 426 427 if (read >= 0) { 428 current.crc.update(buffer, offset, read); 429 } 430 431 return read; 432 } 433 434 /** 435 * Implementation of read for STORED entries. 436 */ 437 private int readStored(byte[] buffer, int offset, int length) throws IOException { 438 439 if (current.hasDataDescriptor) { 440 if (lastStoredEntry == null) { 441 readStoredEntry(); 442 } 443 return lastStoredEntry.read(buffer, offset, length); 444 } 445 446 long csize = current.entry.getSize(); 447 if (current.bytesRead >= csize) { 448 return -1; 449 } 450 451 if (buf.position() >= buf.limit()) { 452 buf.position(0); 453 int l = in.read(buf.array()); 454 if (l == -1) { 455 return -1; 456 } 457 buf.limit(l); 458 459 count(l); 460 current.bytesReadFromStream += l; 461 } 462 463 int toRead = Math.min(buf.remaining(), length); 464 if ((csize - current.bytesRead) < toRead) { 465 // if it is smaller than toRead then it fits into an int 466 toRead = (int) (csize - current.bytesRead); 467 } 468 buf.get(buffer, offset, toRead); 469 current.bytesRead += toRead; 470 return toRead; 471 } 472 473 /** 474 * Implementation of read for DEFLATED entries. 475 */ 476 private int readDeflated(byte[] buffer, int offset, int length) throws IOException { 477 int read = readFromInflater(buffer, offset, length); 478 if (read <= 0) { 479 if (inf.finished()) { 480 return -1; 481 } else if (inf.needsDictionary()) { 482 throw new ZipException("This archive needs a preset dictionary" 483 + " which is not supported by Commons" 484 + " Compress."); 485 } else if (read == -1) { 486 throw new IOException("Truncated ZIP file"); 487 } 488 } 489 return read; 490 } 491 492 /** 493 * Potentially reads more bytes to fill the inflater's buffer and 494 * reads from it. 495 */ 496 private int readFromInflater(byte[] buffer, int offset, int length) throws IOException { 497 int read = 0; 498 do { 499 if (inf.needsInput()) { 500 int l = fill(); 501 if (l > 0) { 502 current.bytesReadFromStream += buf.limit(); 503 } else if (l == -1) { 504 return -1; 505 } else { 506 break; 507 } 508 } 509 try { 510 read = inf.inflate(buffer, offset, length); 511 } catch (DataFormatException e) { 512 throw (IOException) new ZipException(e.getMessage()).initCause(e); 513 } 514 } while (read == 0 && inf.needsInput()); 515 return read; 516 } 517 518 @Override 519 public void close() throws IOException { 520 if (!closed) { 521 closed = true; 522 try { 523 in.close(); 524 } finally { 525 inf.end(); 526 } 527 } 528 } 529 530 /** 531 * Skips over and discards value bytes of data from this input 532 * stream. 533 * 534 * <p>This implementation may end up skipping over some smaller 535 * number of bytes, possibly 0, if and only if it reaches the end 536 * of the underlying stream.</p> 537 * 538 * <p>The actual number of bytes skipped is returned.</p> 539 * 540 * @param value the number of bytes to be skipped. 541 * @return the actual number of bytes skipped. 542 * @throws IOException - if an I/O error occurs. 543 * @throws IllegalArgumentException - if value is negative. 544 */ 545 @Override 546 public long skip(long value) throws IOException { 547 if (value >= 0) { 548 long skipped = 0; 549 while (skipped < value) { 550 long rem = value - skipped; 551 int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 552 if (x == -1) { 553 return skipped; 554 } 555 skipped += x; 556 } 557 return skipped; 558 } 559 throw new IllegalArgumentException(); 560 } 561 562 /** 563 * Checks if the signature matches what is expected for a zip file. 564 * Does not currently handle self-extracting zips which may have arbitrary 565 * leading content. 566 * 567 * @param signature the bytes to check 568 * @param length the number of bytes to check 569 * @return true, if this stream is a zip archive stream, false otherwise 570 */ 571 public static boolean matches(byte[] signature, int length) { 572 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 573 return false; 574 } 575 576 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 577 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 578 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 579 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 580 } 581 582 private static boolean checksig(byte[] signature, byte[] expected) { 583 for (int i = 0; i < expected.length; i++) { 584 if (signature[i] != expected[i]) { 585 return false; 586 } 587 } 588 return true; 589 } 590 591 /** 592 * Closes the current ZIP archive entry and positions the underlying 593 * stream to the beginning of the next entry. All per-entry variables 594 * and data structures are cleared. 595 * <p> 596 * If the compressed size of this entry is included in the entry header, 597 * then any outstanding bytes are simply skipped from the underlying 598 * stream without uncompressing them. This allows an entry to be safely 599 * closed even if the compression method is unsupported. 600 * <p> 601 * In case we don't know the compressed size of this entry or have 602 * already buffered too much data from the underlying stream to support 603 * uncompression, then the uncompression process is completed and the 604 * end position of the stream is adjusted based on the result of that 605 * process. 606 * 607 * @throws IOException if an error occurs 608 */ 609 private void closeEntry() throws IOException { 610 if (closed) { 611 throw new IOException("The stream is closed"); 612 } 613 if (current == null) { 614 return; 615 } 616 617 // Ensure all entry bytes are read 618 if (current.bytesReadFromStream <= current.entry.getCompressedSize() 619 && !current.hasDataDescriptor) { 620 drainCurrentEntryData(); 621 } else { 622 skip(Long.MAX_VALUE); 623 624 long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 625 ? getBytesInflated() : current.bytesRead; 626 627 // this is at most a single read() operation and can't 628 // exceed the range of int 629 int diff = (int) (current.bytesReadFromStream - inB); 630 631 // Pushback any required bytes 632 if (diff > 0) { 633 pushback(buf.array(), buf.limit() - diff, diff); 634 } 635 } 636 637 if (lastStoredEntry == null && current.hasDataDescriptor) { 638 readDataDescriptor(); 639 } 640 641 inf.reset(); 642 buf.clear().flip(); 643 current = null; 644 lastStoredEntry = null; 645 } 646 647 /** 648 * Read all data of the current entry from the underlying stream 649 * that hasn't been read, yet. 650 */ 651 private void drainCurrentEntryData() throws IOException { 652 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 653 while (remaining > 0) { 654 long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 655 if (n < 0) { 656 throw new EOFException("Truncated ZIP entry: " + current.entry.getName()); 657 } else { 658 count(n); 659 remaining -= n; 660 } 661 } 662 } 663 664 /** 665 * Get the number of bytes Inflater has actually processed. 666 * 667 * <p>for Java < Java7 the getBytes* methods in 668 * Inflater/Deflater seem to return unsigned ints rather than 669 * longs that start over with 0 at 2^32.</p> 670 * 671 * <p>The stream knows how many bytes it has read, but not how 672 * many the Inflater actually consumed - it should be between the 673 * total number of bytes read for the entry and the total number 674 * minus the last read operation. Here we just try to make the 675 * value close enough to the bytes we've read by assuming the 676 * number of bytes consumed must be smaller than (or equal to) the 677 * number of bytes read but not smaller by more than 2^32.</p> 678 */ 679 private long getBytesInflated() { 680 long inB = inf.getBytesRead(); 681 if (current.bytesReadFromStream >= TWO_EXP_32) { 682 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 683 inB += TWO_EXP_32; 684 } 685 } 686 return inB; 687 } 688 689 private int fill() throws IOException { 690 if (closed) { 691 throw new IOException("The stream is closed"); 692 } 693 int length = in.read(buf.array()); 694 if (length > 0) { 695 buf.limit(length); 696 count(buf.limit()); 697 inf.setInput(buf.array(), 0, buf.limit()); 698 } 699 return length; 700 } 701 702 private void readFully(byte[] b) throws IOException { 703 int count = IOUtils.readFully(in, b); 704 count(count); 705 if (count < b.length) { 706 throw new EOFException(); 707 } 708 } 709 710 private void readDataDescriptor() throws IOException { 711 readFully(WORD_BUF); 712 ZipLong val = new ZipLong(WORD_BUF); 713 if (ZipLong.DD_SIG.equals(val)) { 714 // data descriptor with signature, skip sig 715 readFully(WORD_BUF); 716 val = new ZipLong(WORD_BUF); 717 } 718 current.entry.setCrc(val.getValue()); 719 720 // if there is a ZIP64 extra field, sizes are eight bytes 721 // each, otherwise four bytes each. Unfortunately some 722 // implementations - namely Java7 - use eight bytes without 723 // using a ZIP64 extra field - 724 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 725 726 // just read 16 bytes and check whether bytes nine to twelve 727 // look like one of the signatures of what could follow a data 728 // descriptor (ignoring archive decryption headers for now). 729 // If so, push back eight bytes and assume sizes are four 730 // bytes, otherwise sizes are eight bytes each. 731 readFully(TWO_DWORD_BUF); 732 ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD); 733 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 734 pushback(TWO_DWORD_BUF, DWORD, DWORD); 735 current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF)); 736 current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD)); 737 } else { 738 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF)); 739 current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD)); 740 } 741 } 742 743 /** 744 * Whether this entry requires a data descriptor this library can work with. 745 * 746 * @return true if allowStoredEntriesWithDataDescriptor is true, 747 * the entry doesn't require any data descriptor or the method is 748 * DEFLATED. 749 */ 750 private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) { 751 return !entry.getGeneralPurposeBit().usesDataDescriptor() 752 753 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 754 || entry.getMethod() == ZipEntry.DEFLATED; 755 } 756 757 /** 758 * Caches a stored entry that uses the data descriptor. 759 * 760 * <ul> 761 * <li>Reads a stored entry until the signature of a local file 762 * header, central directory header or data descriptor has been 763 * found.</li> 764 * <li>Stores all entry data in lastStoredEntry.</p> 765 * <li>Rewinds the stream to position at the data 766 * descriptor.</li> 767 * <li>reads the data descriptor</li> 768 * </ul> 769 * 770 * <p>After calling this method the entry should know its size, 771 * the entry's data is cached and the stream is positioned at the 772 * next local file or central directory header.</p> 773 */ 774 private void readStoredEntry() throws IOException { 775 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 776 int off = 0; 777 boolean done = false; 778 779 // length of DD without signature 780 int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 781 782 while (!done) { 783 int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 784 if (r <= 0) { 785 // read the whole archive without ever finding a 786 // central directory 787 throw new IOException("Truncated ZIP file"); 788 } 789 if (r + off < 4) { 790 // buffer too small to check for a signature, loop 791 off += r; 792 continue; 793 } 794 795 done = bufferContainsSignature(bos, off, r, ddLen); 796 if (!done) { 797 off = cacheBytesRead(bos, off, r, ddLen); 798 } 799 } 800 801 byte[] b = bos.toByteArray(); 802 lastStoredEntry = new ByteArrayInputStream(b); 803 } 804 805 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 806 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 807 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 808 809 /** 810 * Checks whether the current buffer contains the signature of a 811 * "data descriptor", "local file header" or 812 * "central directory entry". 813 * 814 * <p>If it contains such a signature, reads the data descriptor 815 * and positions the stream right after the data descriptor.</p> 816 */ 817 private boolean bufferContainsSignature(ByteArrayOutputStream bos, int offset, int lastRead, int expectedDDLen) 818 throws IOException { 819 820 boolean done = false; 821 int readTooMuch = 0; 822 for (int i = 0; !done && i < lastRead - 4; i++) { 823 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 824 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 825 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 826 // found a LFH or CFH: 827 readTooMuch = offset + lastRead - i - expectedDDLen; 828 done = true; 829 } 830 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 831 // found DD: 832 readTooMuch = offset + lastRead - i; 833 done = true; 834 } 835 if (done) { 836 // * push back bytes read in excess as well as the data 837 // descriptor 838 // * copy the remaining bytes to cache 839 // * read data descriptor 840 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 841 bos.write(buf.array(), 0, i); 842 readDataDescriptor(); 843 } 844 } 845 } 846 return done; 847 } 848 849 /** 850 * If the last read bytes could hold a data descriptor and an 851 * incomplete signature then save the last bytes to the front of 852 * the buffer and cache everything in front of the potential data 853 * descriptor into the given ByteArrayOutputStream. 854 * 855 * <p>Data descriptor plus incomplete signature (3 bytes in the 856 * worst case) can be 20 bytes max.</p> 857 */ 858 private int cacheBytesRead(ByteArrayOutputStream bos, int offset, int lastRead, int expecteDDLen) { 859 final int cacheable = offset + lastRead - expecteDDLen - 3; 860 if (cacheable > 0) { 861 bos.write(buf.array(), 0, cacheable); 862 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 863 offset = expecteDDLen + 3; 864 } else { 865 offset += lastRead; 866 } 867 return offset; 868 } 869 870 private void pushback(byte[] buf, int offset, int length) throws IOException { 871 ((PushbackInputStream) in).unread(buf, offset, length); 872 pushedBackBytes(length); 873 } 874 875 // End of Central Directory Record 876 // end of central dir signature WORD 877 // number of this disk SHORT 878 // number of the disk with the 879 // start of the central directory SHORT 880 // total number of entries in the 881 // central directory on this disk SHORT 882 // total number of entries in 883 // the central directory SHORT 884 // size of the central directory WORD 885 // offset of start of central 886 // directory with respect to 887 // the starting disk number WORD 888 // .ZIP file comment length SHORT 889 // .ZIP file comment up to 64KB 890 // 891 892 /** 893 * Reads the stream until it find the "End of central directory 894 * record" and consumes it as well. 895 */ 896 private void skipRemainderOfArchive() throws IOException { 897 // skip over central directory. One LFH has been read too much 898 // already. The calculation discounts file names and extra 899 // data so it will be too short. 900 realSkip(entriesRead * CFH_LEN - LFH_LEN); 901 findEocdRecord(); 902 realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 903 readFully(SHORT_BUF); 904 // file comment 905 realSkip(ZipShort.getValue(SHORT_BUF)); 906 } 907 908 /** 909 * Reads forward until the signature of the "End of central 910 * directory" record is found. 911 */ 912 private void findEocdRecord() throws IOException { 913 int currentByte = -1; 914 boolean skipReadCall = false; 915 while (skipReadCall || (currentByte = readOneByte()) > -1) { 916 skipReadCall = false; 917 if (!isFirstByteOfEocdSig(currentByte)) { 918 continue; 919 } 920 currentByte = readOneByte(); 921 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 922 if (currentByte == -1) { 923 break; 924 } 925 skipReadCall = isFirstByteOfEocdSig(currentByte); 926 continue; 927 } 928 currentByte = readOneByte(); 929 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 930 if (currentByte == -1) { 931 break; 932 } 933 skipReadCall = isFirstByteOfEocdSig(currentByte); 934 continue; 935 } 936 currentByte = readOneByte(); 937 if (currentByte == -1 938 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 939 break; 940 } 941 skipReadCall = isFirstByteOfEocdSig(currentByte); 942 } 943 } 944 945 /** 946 * Skips bytes by reading from the underlying stream rather than 947 * the (potentially inflating) archive stream - which {@link 948 * #skip} would do. 949 * 950 * Also updates bytes-read counter. 951 */ 952 private void realSkip(long value) throws IOException { 953 if (value >= 0) { 954 long skipped = 0; 955 while (skipped < value) { 956 long rem = value - skipped; 957 int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 958 if (x == -1) { 959 return; 960 } 961 count(x); 962 skipped += x; 963 } 964 return; 965 } 966 throw new IllegalArgumentException(); 967 } 968 969 /** 970 * Reads bytes by reading from the underlying stream rather than 971 * the (potentially inflating) archive stream - which {@link #read} would do. 972 * 973 * Also updates bytes-read counter. 974 */ 975 private int readOneByte() throws IOException { 976 int b = in.read(); 977 if (b != -1) { 978 count(1); 979 } 980 return b; 981 } 982 983 private boolean isFirstByteOfEocdSig(int b) { 984 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 985 } 986 987 /** 988 * Structure collecting information for the entry that is 989 * currently being read. 990 */ 991 private static final class CurrentEntry { 992 993 /** 994 * Current ZIP entry. 995 */ 996 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 997 998 /** 999 * Does the entry use a data descriptor? 1000 */ 1001 private boolean hasDataDescriptor; 1002 1003 /** 1004 * Does the entry have a ZIP64 extended information extra field. 1005 */ 1006 private boolean usesZip64; 1007 1008 /** 1009 * Number of bytes of entry content read by the client if the 1010 * entry is STORED. 1011 */ 1012 private long bytesRead; 1013 1014 /** 1015 * Number of bytes of entry content read so from the stream. 1016 * 1017 * <p>This may be more than the actual entry's length as some 1018 * stuff gets buffered up and needs to be pushed back when the 1019 * end of the entry has been reached.</p> 1020 */ 1021 private long bytesReadFromStream; 1022 1023 /** 1024 * The checksum calculated as the current entry is read. 1025 */ 1026 private final CRC32 crc = new CRC32(); 1027 1028 /** 1029 * The input stream decompressing the data for shrunk and imploded entries. 1030 */ 1031 private InputStream in; 1032 } 1033 1034 /** 1035 * Bounded input stream adapted from commons-io 1036 */ 1037 private class BoundedInputStream extends InputStream { 1038 1039 /** the wrapped input stream */ 1040 private final InputStream in; 1041 1042 /** the max length to provide */ 1043 private final long max; 1044 1045 /** the number of bytes already returned */ 1046 private long pos = 0; 1047 1048 /** 1049 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1050 * stream and limits it to a certain size. 1051 * 1052 * @param in The wrapped input stream 1053 * @param size The maximum number of bytes to return 1054 */ 1055 public BoundedInputStream(final InputStream in, final long size) { 1056 this.max = size; 1057 this.in = in; 1058 } 1059 1060 @Override 1061 public int read() throws IOException { 1062 if (max >= 0 && pos >= max) { 1063 return -1; 1064 } 1065 final int result = in.read(); 1066 pos++; 1067 count(1); 1068 current.bytesReadFromStream++; 1069 return result; 1070 } 1071 1072 @Override 1073 public int read(final byte[] b) throws IOException { 1074 return this.read(b, 0, b.length); 1075 } 1076 1077 @Override 1078 public int read(final byte[] b, final int off, final int len) throws IOException { 1079 if (max >= 0 && pos >= max) { 1080 return -1; 1081 } 1082 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1083 final int bytesRead = in.read(b, off, (int) maxRead); 1084 1085 if (bytesRead == -1) { 1086 return -1; 1087 } 1088 1089 pos += bytesRead; 1090 count(bytesRead); 1091 current.bytesReadFromStream += bytesRead; 1092 return bytesRead; 1093 } 1094 1095 @Override 1096 public long skip(final long n) throws IOException { 1097 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1098 final long skippedBytes = in.skip(toSkip); 1099 pos += skippedBytes; 1100 return skippedBytes; 1101 } 1102 1103 @Override 1104 public int available() throws IOException { 1105 if (max >= 0 && pos >= max) { 1106 return 0; 1107 } 1108 return in.available(); 1109 } 1110 } 1111}