001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.io; 018 019import static com.google.common.base.Preconditions.checkArgument; 020import static com.google.common.base.Preconditions.checkNotNull; 021 022import com.google.common.annotations.Beta; 023import com.google.common.base.Optional; 024import com.google.common.collect.ImmutableList; 025import com.google.common.hash.Funnels; 026import com.google.common.hash.HashCode; 027import com.google.common.hash.HashFunction; 028import com.google.common.hash.Hasher; 029 030import java.io.BufferedInputStream; 031import java.io.ByteArrayInputStream; 032import java.io.IOException; 033import java.io.InputStream; 034import java.io.InputStreamReader; 035import java.io.OutputStream; 036import java.io.Reader; 037import java.nio.charset.Charset; 038import java.util.Arrays; 039import java.util.Iterator; 040 041/** 042 * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a 043 * {@code ByteSource} is not an open, stateful stream for input that can be read and closed. 044 * Instead, it is an immutable <i>supplier</i> of {@code InputStream} instances. 045 * 046 * <p>{@code ByteSource} provides two kinds of methods: 047 * <ul> 048 * <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent 049 * instance each time they are called. The caller is responsible for ensuring that the returned 050 * stream is closed. 051 * <li><b>Convenience methods:</b> These are implementations of common operations that are 052 * typically implemented by opening a stream using one of the methods in the first category, doing 053 * something and finally closing the stream that was opened. 054 * </ul> 055 * 056 * @since 14.0 057 * @author Colin Decker 058 */ 059public abstract class ByteSource { 060 061 private static final int BUF_SIZE = 0x1000; // 4K 062 063 /** 064 * Constructor for use by subclasses. 065 */ 066 protected ByteSource() {} 067 068 /** 069 * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source 070 * as characters using the given {@link Charset}. 071 */ 072 public CharSource asCharSource(Charset charset) { 073 return new AsCharSource(charset); 074 } 075 076 /** 077 * Opens a new {@link InputStream} for reading from this source. This method should return a new, 078 * independent stream each time it is called. 079 * 080 * <p>The caller is responsible for ensuring that the returned stream is closed. 081 * 082 * @throws IOException if an I/O error occurs in the process of opening the stream 083 */ 084 public abstract InputStream openStream() throws IOException; 085 086 /** 087 * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is 088 * not required to be a {@link BufferedInputStream} in order to allow implementations to simply 089 * delegate to {@link #openStream()} when the stream returned by that method does not benefit 090 * from additional buffering (for example, a {@code ByteArrayInputStream}). This method should 091 * return a new, independent stream each time it is called. 092 * 093 * <p>The caller is responsible for ensuring that the returned stream is closed. 094 * 095 * @throws IOException if an I/O error occurs in the process of opening the stream 096 * @since 15.0 (in 14.0 with return type {@link BufferedInputStream}) 097 */ 098 public InputStream openBufferedStream() throws IOException { 099 InputStream in = openStream(); 100 return (in instanceof BufferedInputStream) 101 ? (BufferedInputStream) in 102 : new BufferedInputStream(in); 103 } 104 105 /** 106 * Returns a view of a slice of this byte source that is at most {@code length} bytes long 107 * starting at the given {@code offset}. 108 * 109 * @throws IllegalArgumentException if {@code offset} or {@code length} is negative 110 */ 111 public ByteSource slice(long offset, long length) { 112 return new SlicedByteSource(offset, length); 113 } 114 115 /** 116 * Returns whether the source has zero bytes. The default implementation returns true if 117 * {@link #sizeIfKnown} returns zero, falling back to opening a stream and checking for 118 * EOF if the size is not known. 119 * 120 * <p>Note that, in cases where {@code sizeIfKnown} returns zero, it is <i>possible</i> that bytes 121 * are actually available for reading. (For example, some special files may return a size of 0 122 * despite actually having content when read.) This means that a source may return {@code true} 123 * from {@code isEmpty()} despite having readable content. 124 * 125 * @throws IOException if an I/O error occurs 126 * @since 15.0 127 */ 128 public boolean isEmpty() throws IOException { 129 Optional<Long> sizeIfKnown = sizeIfKnown(); 130 if (sizeIfKnown.isPresent() && sizeIfKnown.get() == 0L) { 131 return true; 132 } 133 Closer closer = Closer.create(); 134 try { 135 InputStream in = closer.register(openStream()); 136 return in.read() == -1; 137 } catch (Throwable e) { 138 throw closer.rethrow(e); 139 } finally { 140 closer.close(); 141 } 142 } 143 144 /** 145 * Returns the size of this source in bytes, if the size can be easily determined without 146 * actually opening the data stream. 147 * 148 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a file, 149 * may return a non-absent value. Note that in such cases, it is <i>possible</i> that this method 150 * will return a different number of bytes than would be returned by reading all of the bytes (for 151 * example, some special files may return a size of 0 despite actually having content when read). 152 * 153 * <p>Additionally, for mutable sources such as files, a subsequent read may return a different 154 * number of bytes if the contents are changed. 155 * 156 * @since 19.0 157 */ 158 @Beta 159 public Optional<Long> sizeIfKnown() { 160 return Optional.absent(); 161 } 162 163 /** 164 * Returns the size of this source in bytes, even if doing so requires opening and traversing 165 * an entire stream. To avoid a potentially expensive operation, see {@link #sizeIfKnown}. 166 * 167 * <p>The default implementation calls {@link #sizeIfKnown} and returns the value if present. 168 * If absent, it will fall back to a heavyweight operation that will open a stream, read (or 169 * {@link InputStream#skip(long) skip}, if possible) to the end of the stream and return the total 170 * number of bytes that were read. 171 * 172 * <p>Note that for some sources that implement {@link #sizeIfKnown} to provide a more efficient 173 * implementation, it is <i>possible</i> that this method will return a different number of bytes 174 * than would be returned by reading all of the bytes (for example, some special files may return 175 * a size of 0 despite actually having content when read). 176 * 177 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 178 * number of bytes if the contents are changed. 179 * 180 * @throws IOException if an I/O error occurs in the process of reading the size of this source 181 */ 182 public long size() throws IOException { 183 Optional<Long> sizeIfKnown = sizeIfKnown(); 184 if (sizeIfKnown.isPresent()) { 185 return sizeIfKnown.get(); 186 } 187 188 Closer closer = Closer.create(); 189 try { 190 InputStream in = closer.register(openStream()); 191 return countBySkipping(in); 192 } catch (IOException e) { 193 // skip may not be supported... at any rate, try reading 194 } finally { 195 closer.close(); 196 } 197 198 closer = Closer.create(); 199 try { 200 InputStream in = closer.register(openStream()); 201 return countByReading(in); 202 } catch (Throwable e) { 203 throw closer.rethrow(e); 204 } finally { 205 closer.close(); 206 } 207 } 208 209 /** 210 * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the 211 * first call to skip threw, in which case skip may just not be supported. 212 */ 213 private long countBySkipping(InputStream in) throws IOException { 214 long count = 0; 215 while (true) { 216 // don't try to skip more than available() 217 // things may work really wrong with FileInputStream otherwise 218 long skipped = in.skip(Math.min(in.available(), Integer.MAX_VALUE)); 219 if (skipped <= 0) { 220 if (in.read() == -1) { 221 return count; 222 } else if (count == 0 && in.available() == 0) { 223 // if available is still zero after reading a single byte, it 224 // will probably always be zero, so we should countByReading 225 throw new IOException(); 226 } 227 count++; 228 } else { 229 count += skipped; 230 } 231 } 232 } 233 234 private static final byte[] countBuffer = new byte[BUF_SIZE]; 235 236 private long countByReading(InputStream in) throws IOException { 237 long count = 0; 238 long read; 239 while ((read = in.read(countBuffer)) != -1) { 240 count += read; 241 } 242 return count; 243 } 244 245 /** 246 * Copies the contents of this byte source to the given {@code OutputStream}. Does not close 247 * {@code output}. 248 * 249 * @throws IOException if an I/O error occurs in the process of reading from this source or 250 * writing to {@code output} 251 */ 252 public long copyTo(OutputStream output) throws IOException { 253 checkNotNull(output); 254 255 Closer closer = Closer.create(); 256 try { 257 InputStream in = closer.register(openStream()); 258 return ByteStreams.copy(in, output); 259 } catch (Throwable e) { 260 throw closer.rethrow(e); 261 } finally { 262 closer.close(); 263 } 264 } 265 266 /** 267 * Copies the contents of this byte source to the given {@code ByteSink}. 268 * 269 * @throws IOException if an I/O error occurs in the process of reading from this source or 270 * writing to {@code sink} 271 */ 272 public long copyTo(ByteSink sink) throws IOException { 273 checkNotNull(sink); 274 275 Closer closer = Closer.create(); 276 try { 277 InputStream in = closer.register(openStream()); 278 OutputStream out = closer.register(sink.openStream()); 279 return ByteStreams.copy(in, out); 280 } catch (Throwable e) { 281 throw closer.rethrow(e); 282 } finally { 283 closer.close(); 284 } 285 } 286 287 /** 288 * Reads the full contents of this byte source as a byte array. 289 * 290 * @throws IOException if an I/O error occurs in the process of reading from this source 291 */ 292 public byte[] read() throws IOException { 293 Closer closer = Closer.create(); 294 try { 295 InputStream in = closer.register(openStream()); 296 return ByteStreams.toByteArray(in); 297 } catch (Throwable e) { 298 throw closer.rethrow(e); 299 } finally { 300 closer.close(); 301 } 302 } 303 304 /** 305 * Reads the contents of this byte source using the given {@code processor} to process bytes as 306 * they are read. Stops when all bytes have been read or the consumer returns {@code false}. 307 * Returns the result produced by the processor. 308 * 309 * @throws IOException if an I/O error occurs in the process of reading from this source or if 310 * {@code processor} throws an {@code IOException} 311 * @since 16.0 312 */ 313 @Beta 314 public <T> T read(ByteProcessor<T> processor) throws IOException { 315 checkNotNull(processor); 316 317 Closer closer = Closer.create(); 318 try { 319 InputStream in = closer.register(openStream()); 320 return ByteStreams.readBytes(in, processor); 321 } catch (Throwable e) { 322 throw closer.rethrow(e); 323 } finally { 324 closer.close(); 325 } 326 } 327 328 /** 329 * Hashes the contents of this byte source using the given hash function. 330 * 331 * @throws IOException if an I/O error occurs in the process of reading from this source 332 */ 333 public HashCode hash(HashFunction hashFunction) throws IOException { 334 Hasher hasher = hashFunction.newHasher(); 335 copyTo(Funnels.asOutputStream(hasher)); 336 return hasher.hash(); 337 } 338 339 /** 340 * Checks that the contents of this byte source are equal to the contents of the given byte 341 * source. 342 * 343 * @throws IOException if an I/O error occurs in the process of reading from this source or 344 * {@code other} 345 */ 346 public boolean contentEquals(ByteSource other) throws IOException { 347 checkNotNull(other); 348 349 byte[] buf1 = new byte[BUF_SIZE]; 350 byte[] buf2 = new byte[BUF_SIZE]; 351 352 Closer closer = Closer.create(); 353 try { 354 InputStream in1 = closer.register(openStream()); 355 InputStream in2 = closer.register(other.openStream()); 356 while (true) { 357 int read1 = ByteStreams.read(in1, buf1, 0, BUF_SIZE); 358 int read2 = ByteStreams.read(in2, buf2, 0, BUF_SIZE); 359 if (read1 != read2 || !Arrays.equals(buf1, buf2)) { 360 return false; 361 } else if (read1 != BUF_SIZE) { 362 return true; 363 } 364 } 365 } catch (Throwable e) { 366 throw closer.rethrow(e); 367 } finally { 368 closer.close(); 369 } 370 } 371 372 /** 373 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 374 * the source will contain the concatenated data from the streams of the underlying sources. 375 * 376 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 377 * close the open underlying stream. 378 * 379 * @param sources the sources to concatenate 380 * @return a {@code ByteSource} containing the concatenated data 381 * @since 15.0 382 */ 383 public static ByteSource concat(Iterable<? extends ByteSource> sources) { 384 return new ConcatenatedByteSource(sources); 385 } 386 387 /** 388 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 389 * the source will contain the concatenated data from the streams of the underlying sources. 390 * 391 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 392 * close the open underlying stream. 393 * 394 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this 395 * method is called. This will fail if the iterator is infinite and may cause problems if the 396 * iterator eagerly fetches data for each source when iterated (rather than producing sources 397 * that only load data through their streams). Prefer using the {@link #concat(Iterable)} 398 * overload if possible. 399 * 400 * @param sources the sources to concatenate 401 * @return a {@code ByteSource} containing the concatenated data 402 * @throws NullPointerException if any of {@code sources} is {@code null} 403 * @since 15.0 404 */ 405 public static ByteSource concat(Iterator<? extends ByteSource> sources) { 406 return concat(ImmutableList.copyOf(sources)); 407 } 408 409 /** 410 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 411 * the source will contain the concatenated data from the streams of the underlying sources. 412 * 413 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 414 * close the open underlying stream. 415 * 416 * @param sources the sources to concatenate 417 * @return a {@code ByteSource} containing the concatenated data 418 * @throws NullPointerException if any of {@code sources} is {@code null} 419 * @since 15.0 420 */ 421 public static ByteSource concat(ByteSource... sources) { 422 return concat(ImmutableList.copyOf(sources)); 423 } 424 425 /** 426 * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range 427 * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}. 428 * 429 * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}). 430 */ 431 public static ByteSource wrap(byte[] b) { 432 return new ByteArrayByteSource(b); 433 } 434 435 /** 436 * Returns an immutable {@link ByteSource} that contains no bytes. 437 * 438 * @since 15.0 439 */ 440 public static ByteSource empty() { 441 return EmptyByteSource.INSTANCE; 442 } 443 444 /** 445 * A char source that reads bytes from this source and decodes them as characters using a 446 * charset. 447 */ 448 private final class AsCharSource extends CharSource { 449 450 private final Charset charset; 451 452 private AsCharSource(Charset charset) { 453 this.charset = checkNotNull(charset); 454 } 455 456 @Override 457 public Reader openStream() throws IOException { 458 return new InputStreamReader(ByteSource.this.openStream(), charset); 459 } 460 461 @Override 462 public String toString() { 463 return ByteSource.this.toString() + ".asCharSource(" + charset + ")"; 464 } 465 } 466 467 /** 468 * A view of a subsection of the containing byte source. 469 */ 470 private final class SlicedByteSource extends ByteSource { 471 472 final long offset; 473 final long length; 474 475 SlicedByteSource(long offset, long length) { 476 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 477 checkArgument(length >= 0, "length (%s) may not be negative", length); 478 this.offset = offset; 479 this.length = length; 480 } 481 482 @Override 483 public InputStream openStream() throws IOException { 484 return sliceStream(ByteSource.this.openStream()); 485 } 486 487 @Override 488 public InputStream openBufferedStream() throws IOException { 489 return sliceStream(ByteSource.this.openBufferedStream()); 490 } 491 492 private InputStream sliceStream(InputStream in) throws IOException { 493 if (offset > 0) { 494 try { 495 ByteStreams.skipFully(in, offset); 496 } catch (Throwable e) { 497 Closer closer = Closer.create(); 498 closer.register(in); 499 try { 500 throw closer.rethrow(e); 501 } finally { 502 closer.close(); 503 } 504 } 505 } 506 return ByteStreams.limit(in, length); 507 } 508 509 @Override 510 public ByteSource slice(long offset, long length) { 511 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 512 checkArgument(length >= 0, "length (%s) may not be negative", length); 513 long maxLength = this.length - offset; 514 return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength)); 515 } 516 517 @Override 518 public boolean isEmpty() throws IOException { 519 return length == 0 || super.isEmpty(); 520 } 521 522 @Override 523 public Optional<Long> sizeIfKnown() { 524 Optional<Long> unslicedSize = ByteSource.this.sizeIfKnown(); 525 if (unslicedSize.isPresent()) { 526 return Optional.of(Math.min(offset + length, unslicedSize.get()) - offset); 527 } 528 return Optional.absent(); 529 } 530 531 @Override 532 public String toString() { 533 return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")"; 534 } 535 } 536 537 private static class ByteArrayByteSource extends ByteSource { 538 539 final byte[] bytes; 540 final int offset; 541 final int length; 542 543 ByteArrayByteSource(byte[] bytes) { 544 this(bytes, 0, bytes.length); 545 } 546 547 // NOTE: Preconditions are enforced by slice, the only non-trivial caller. 548 ByteArrayByteSource(byte[] bytes, int offset, int length) { 549 this.bytes = bytes; 550 this.offset = offset; 551 this.length = length; 552 } 553 554 @Override 555 public InputStream openStream() { 556 return new ByteArrayInputStream(bytes, offset, length); 557 } 558 559 @Override 560 public InputStream openBufferedStream() throws IOException { 561 return openStream(); 562 } 563 564 @Override 565 public boolean isEmpty() { 566 return length == 0; 567 } 568 569 @Override 570 public long size() { 571 return length; 572 } 573 574 @Override 575 public Optional<Long> sizeIfKnown() { 576 return Optional.of((long) length); 577 } 578 579 @Override 580 public byte[] read() { 581 return Arrays.copyOfRange(bytes, offset, offset + length); 582 } 583 584 @Override 585 public long copyTo(OutputStream output) throws IOException { 586 output.write(bytes, offset, length); 587 return length; 588 } 589 590 @Override 591 public <T> T read(ByteProcessor<T> processor) throws IOException { 592 processor.processBytes(bytes, offset, length); 593 return processor.getResult(); 594 } 595 596 @Override 597 public HashCode hash(HashFunction hashFunction) throws IOException { 598 return hashFunction.hashBytes(bytes, offset, length); 599 } 600 601 @Override 602 public ByteSource slice(long offset, long length) { 603 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 604 checkArgument(length >= 0, "length (%s) may not be negative", length); 605 606 int newOffset = this.offset + (int) Math.min(this.length, offset); 607 int endOffset = this.offset + (int) Math.min(this.length, offset + length); 608 return new ByteArrayByteSource(bytes, newOffset, endOffset - newOffset); 609 } 610 611 @Override 612 public String toString() { 613 return "ByteSource.wrap(" 614 + truncate(BaseEncoding.base16().encode(bytes, offset, length), 30, "...") + ")"; 615 } 616 617 /** 618 * Truncates the given character sequence to the given maximum length. If the length of the 619 * sequence is greater than {@code maxLength}, the returned string will be exactly 620 * {@code maxLength} chars in length and will end with the given {@code truncationIndicator}. 621 * Otherwise, the sequence will be returned as a string with no changes to the content. 622 * 623 * <p>Examples: 624 * 625 * <pre> {@code 626 * truncate("foobar", 7, "..."); // returns "foobar" 627 * truncate("foobar", 5, "..."); // returns "fo..." }</pre> 628 * 629 * <p><b>Note:</b> This method <i>may</i> work with certain non-ASCII text but is not safe for 630 * use with arbitrary Unicode text. It is mostly intended for use with text that is known to be 631 * safe for use with it (such as all-ASCII text) and for simple debugging text. When using this 632 * method, consider the following: 633 * 634 * <ul> 635 * <li>it may split surrogate pairs</li> 636 * <li>it may split characters and combining characters</li> 637 * <li>it does not consider word boundaries</li> 638 * <li>if truncating for display to users, there are other considerations that must be taken 639 * into account</li> 640 * <li>the appropriate truncation indicator may be locale-dependent</li> 641 * <li>it is safe to use non-ASCII characters in the truncation indicator</li> 642 * </ul> 643 * 644 * 645 * @throws IllegalArgumentException if {@code maxLength} is less than the length of 646 * {@code truncationIndicator} 647 */ 648 /* 649 * <p>TODO(user, cpovirk): Use Ascii.truncate once it is available in our internal copy of 650 * guava_jdk5. 651 */ 652 private static String truncate(CharSequence seq, int maxLength, String truncationIndicator) { 653 checkNotNull(seq); 654 655 // length to truncate the sequence to, not including the truncation indicator 656 int truncationLength = maxLength - truncationIndicator.length(); 657 658 // in this worst case, this allows a maxLength equal to the length of the truncationIndicator, 659 // meaning that a string will be truncated to just the truncation indicator itself 660 checkArgument(truncationLength >= 0, 661 "maxLength (%s) must be >= length of the truncation indicator (%s)", 662 maxLength, truncationIndicator.length()); 663 664 if (seq.length() <= maxLength) { 665 String string = seq.toString(); 666 if (string.length() <= maxLength) { 667 return string; 668 } 669 // if the length of the toString() result was > maxLength for some reason, truncate that 670 seq = string; 671 } 672 673 return new StringBuilder(maxLength) 674 .append(seq, 0, truncationLength) 675 .append(truncationIndicator) 676 .toString(); 677 } 678 } 679 680 private static final class EmptyByteSource extends ByteArrayByteSource { 681 682 static final EmptyByteSource INSTANCE = new EmptyByteSource(); 683 684 EmptyByteSource() { 685 super(new byte[0]); 686 } 687 688 @Override 689 public CharSource asCharSource(Charset charset) { 690 checkNotNull(charset); 691 return CharSource.empty(); 692 } 693 694 @Override 695 public byte[] read() { 696 return bytes; // length is 0, no need to clone 697 } 698 699 @Override 700 public String toString() { 701 return "ByteSource.empty()"; 702 } 703 } 704 705 private static final class ConcatenatedByteSource extends ByteSource { 706 707 final Iterable<? extends ByteSource> sources; 708 709 ConcatenatedByteSource(Iterable<? extends ByteSource> sources) { 710 this.sources = checkNotNull(sources); 711 } 712 713 @Override 714 public InputStream openStream() throws IOException { 715 return new MultiInputStream(sources.iterator()); 716 } 717 718 @Override 719 public boolean isEmpty() throws IOException { 720 for (ByteSource source : sources) { 721 if (!source.isEmpty()) { 722 return false; 723 } 724 } 725 return true; 726 } 727 728 @Override 729 public Optional<Long> sizeIfKnown() { 730 long result = 0L; 731 for (ByteSource source : sources) { 732 Optional<Long> sizeIfKnown = source.sizeIfKnown(); 733 if (!sizeIfKnown.isPresent()) { 734 return Optional.absent(); 735 } 736 result += sizeIfKnown.get(); 737 } 738 return Optional.of(result); 739 } 740 741 @Override 742 public long size() throws IOException { 743 long result = 0L; 744 for (ByteSource source : sources) { 745 result += source.size(); 746 } 747 return result; 748 } 749 750 @Override 751 public String toString() { 752 return "ByteSource.concat(" + sources + ")"; 753 } 754 } 755}