001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.io;
018
019import static com.google.common.base.Preconditions.checkArgument;
020import static com.google.common.base.Preconditions.checkNotNull;
021
022import com.google.common.annotations.Beta;
023import com.google.common.base.Optional;
024import com.google.common.collect.ImmutableList;
025import com.google.common.hash.Funnels;
026import com.google.common.hash.HashCode;
027import com.google.common.hash.HashFunction;
028import com.google.common.hash.Hasher;
029
030import java.io.BufferedInputStream;
031import java.io.ByteArrayInputStream;
032import java.io.IOException;
033import java.io.InputStream;
034import java.io.InputStreamReader;
035import java.io.OutputStream;
036import java.io.Reader;
037import java.nio.charset.Charset;
038import java.util.Arrays;
039import java.util.Iterator;
040
041/**
042 * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a
043 * {@code ByteSource} is not an open, stateful stream for input that can be read and closed.
044 * Instead, it is an immutable <i>supplier</i> of {@code InputStream} instances.
045 *
046 * <p>{@code ByteSource} provides two kinds of methods:
047 * <ul>
048 *   <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent
049 *   instance each time they are called. The caller is responsible for ensuring that the returned
050 *   stream is closed.
051 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
052 *   typically implemented by opening a stream using one of the methods in the first category, doing
053 *   something and finally closing the stream that was opened.
054 * </ul>
055 *
056 * @since 14.0
057 * @author Colin Decker
058 */
059public abstract class ByteSource {
060
061  private static final int BUF_SIZE = 0x1000; // 4K
062
063  /**
064   * Constructor for use by subclasses.
065   */
066  protected ByteSource() {}
067
068  /**
069   * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source
070   * as characters using the given {@link Charset}.
071   */
072  public CharSource asCharSource(Charset charset) {
073    return new AsCharSource(charset);
074  }
075
076  /**
077   * Opens a new {@link InputStream} for reading from this source. This method should return a new,
078   * independent stream each time it is called.
079   *
080   * <p>The caller is responsible for ensuring that the returned stream is closed.
081   *
082   * @throws IOException if an I/O error occurs in the process of opening the stream
083   */
084  public abstract InputStream openStream() throws IOException;
085
086  /**
087   * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is
088   * not required to be a {@link BufferedInputStream} in order to allow implementations to simply
089   * delegate to {@link #openStream()} when the stream returned by that method does not benefit
090   * from additional buffering (for example, a {@code ByteArrayInputStream}). This method should
091   * return a new, independent stream each time it is called.
092   *
093   * <p>The caller is responsible for ensuring that the returned stream is closed.
094   *
095   * @throws IOException if an I/O error occurs in the process of opening the stream
096   * @since 15.0 (in 14.0 with return type {@link BufferedInputStream})
097   */
098  public InputStream openBufferedStream() throws IOException {
099    InputStream in = openStream();
100    return (in instanceof BufferedInputStream)
101        ? (BufferedInputStream) in
102        : new BufferedInputStream(in);
103  }
104
105  /**
106   * Returns a view of a slice of this byte source that is at most {@code length} bytes long
107   * starting at the given {@code offset}.
108   *
109   * @throws IllegalArgumentException if {@code offset} or {@code length} is negative
110   */
111  public ByteSource slice(long offset, long length) {
112    return new SlicedByteSource(offset, length);
113  }
114
115  /**
116   * Returns whether the source has zero bytes. The default implementation returns true if
117   * {@link #sizeIfKnown} returns zero, falling back to opening a stream and checking for
118   * EOF if the size is not known.
119   *
120   * <p>Note that, in cases where {@code sizeIfKnown} returns zero, it is <i>possible</i> that bytes
121   * are actually available for reading. (For example, some special files may return a size of 0
122   * despite actually having content when read.) This means that a source may return {@code true}
123   * from {@code isEmpty()} despite having readable content.
124   *
125   * @throws IOException if an I/O error occurs
126   * @since 15.0
127   */
128  public boolean isEmpty() throws IOException {
129    Optional<Long> sizeIfKnown = sizeIfKnown();
130    if (sizeIfKnown.isPresent() && sizeIfKnown.get() == 0L) {
131      return true;
132    }
133    Closer closer = Closer.create();
134    try {
135      InputStream in = closer.register(openStream());
136      return in.read() == -1;
137    } catch (Throwable e) {
138      throw closer.rethrow(e);
139    } finally {
140      closer.close();
141    }
142  }
143
144  /**
145   * Returns the size of this source in bytes, if the size can be easily determined without
146   * actually opening the data stream.
147   *
148   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a file,
149   * may return a non-absent value. Note that in such cases, it is <i>possible</i> that this method
150   * will return a different number of bytes than would be returned by reading all of the bytes (for
151   * example, some special files may return a size of 0 despite actually having content when read).
152   *
153   * <p>Additionally, for mutable sources such as files, a subsequent read may return a different
154   * number of bytes if the contents are changed.
155   *
156   * @since 19.0
157   */
158  @Beta
159  public Optional<Long> sizeIfKnown() {
160    return Optional.absent();
161  }
162
163  /**
164   * Returns the size of this source in bytes, even if doing so requires opening and traversing
165   * an entire stream. To avoid a potentially expensive operation, see {@link #sizeIfKnown}.
166   *
167   * <p>The default implementation calls {@link #sizeIfKnown} and returns the value if present.
168   * If absent, it will fall back to a heavyweight operation that will open a stream, read (or
169   * {@link InputStream#skip(long) skip}, if possible) to the end of the stream and return the total
170   * number of bytes that were read.
171   *
172   * <p>Note that for some sources that implement {@link #sizeIfKnown} to provide a more efficient
173   * implementation, it is <i>possible</i> that this method will return a different number of bytes
174   * than would be returned by reading all of the bytes (for example, some special files may return
175   * a size of 0 despite actually having content when read).
176   *
177   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
178   * number of bytes if the contents are changed.
179   *
180   * @throws IOException if an I/O error occurs in the process of reading the size of this source
181   */
182  public long size() throws IOException {
183    Optional<Long> sizeIfKnown = sizeIfKnown();
184    if (sizeIfKnown.isPresent()) {
185      return sizeIfKnown.get();
186    }
187
188    Closer closer = Closer.create();
189    try {
190      InputStream in = closer.register(openStream());
191      return countBySkipping(in);
192    } catch (IOException e) {
193      // skip may not be supported... at any rate, try reading
194    } finally {
195      closer.close();
196    }
197
198    closer = Closer.create();
199    try {
200      InputStream in = closer.register(openStream());
201      return countByReading(in);
202    } catch (Throwable e) {
203      throw closer.rethrow(e);
204    } finally {
205      closer.close();
206    }
207  }
208
209  /**
210   * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the
211   * first call to skip threw, in which case skip may just not be supported.
212   */
213  private long countBySkipping(InputStream in) throws IOException {
214    long count = 0;
215    while (true) {
216      // don't try to skip more than available()
217      // things may work really wrong with FileInputStream otherwise
218      long skipped = in.skip(Math.min(in.available(), Integer.MAX_VALUE));
219      if (skipped <= 0) {
220        if (in.read() == -1) {
221          return count;
222        } else if (count == 0 && in.available() == 0) {
223          // if available is still zero after reading a single byte, it
224          // will probably always be zero, so we should countByReading
225          throw new IOException();
226        }
227        count++;
228      } else {
229        count += skipped;
230      }
231    }
232  }
233
234  private static final byte[] countBuffer = new byte[BUF_SIZE];
235
236  private long countByReading(InputStream in) throws IOException {
237    long count = 0;
238    long read;
239    while ((read = in.read(countBuffer)) != -1) {
240      count += read;
241    }
242    return count;
243  }
244
245  /**
246   * Copies the contents of this byte source to the given {@code OutputStream}. Does not close
247   * {@code output}.
248   *
249   * @throws IOException if an I/O error occurs in the process of reading from this source or
250   *     writing to {@code output}
251   */
252  public long copyTo(OutputStream output) throws IOException {
253    checkNotNull(output);
254
255    Closer closer = Closer.create();
256    try {
257      InputStream in = closer.register(openStream());
258      return ByteStreams.copy(in, output);
259    } catch (Throwable e) {
260      throw closer.rethrow(e);
261    } finally {
262      closer.close();
263    }
264  }
265
266  /**
267   * Copies the contents of this byte source to the given {@code ByteSink}.
268   *
269   * @throws IOException if an I/O error occurs in the process of reading from this source or
270   *     writing to {@code sink}
271   */
272  public long copyTo(ByteSink sink) throws IOException {
273    checkNotNull(sink);
274
275    Closer closer = Closer.create();
276    try {
277      InputStream in = closer.register(openStream());
278      OutputStream out = closer.register(sink.openStream());
279      return ByteStreams.copy(in, out);
280    } catch (Throwable e) {
281      throw closer.rethrow(e);
282    } finally {
283      closer.close();
284    }
285  }
286
287  /**
288   * Reads the full contents of this byte source as a byte array.
289   *
290   * @throws IOException if an I/O error occurs in the process of reading from this source
291   */
292  public byte[] read() throws IOException {
293    Closer closer = Closer.create();
294    try {
295      InputStream in = closer.register(openStream());
296      return ByteStreams.toByteArray(in);
297    } catch (Throwable e) {
298      throw closer.rethrow(e);
299    } finally {
300      closer.close();
301    }
302  }
303
304  /**
305   * Reads the contents of this byte source using the given {@code processor} to process bytes as
306   * they are read. Stops when all bytes have been read or the consumer returns {@code false}.
307   * Returns the result produced by the processor.
308   *
309   * @throws IOException if an I/O error occurs in the process of reading from this source or if
310   *     {@code processor} throws an {@code IOException}
311   * @since 16.0
312   */
313  @Beta
314  public <T> T read(ByteProcessor<T> processor) throws IOException {
315    checkNotNull(processor);
316
317    Closer closer = Closer.create();
318    try {
319      InputStream in = closer.register(openStream());
320      return ByteStreams.readBytes(in, processor);
321    } catch (Throwable e) {
322      throw closer.rethrow(e);
323    } finally {
324      closer.close();
325    }
326  }
327
328  /**
329   * Hashes the contents of this byte source using the given hash function.
330   *
331   * @throws IOException if an I/O error occurs in the process of reading from this source
332   */
333  public HashCode hash(HashFunction hashFunction) throws IOException {
334    Hasher hasher = hashFunction.newHasher();
335    copyTo(Funnels.asOutputStream(hasher));
336    return hasher.hash();
337  }
338
339  /**
340   * Checks that the contents of this byte source are equal to the contents of the given byte
341   * source.
342   *
343   * @throws IOException if an I/O error occurs in the process of reading from this source or
344   *     {@code other}
345   */
346  public boolean contentEquals(ByteSource other) throws IOException {
347    checkNotNull(other);
348
349    byte[] buf1 = new byte[BUF_SIZE];
350    byte[] buf2 = new byte[BUF_SIZE];
351
352    Closer closer = Closer.create();
353    try {
354      InputStream in1 = closer.register(openStream());
355      InputStream in2 = closer.register(other.openStream());
356      while (true) {
357        int read1 = ByteStreams.read(in1, buf1, 0, BUF_SIZE);
358        int read2 = ByteStreams.read(in2, buf2, 0, BUF_SIZE);
359        if (read1 != read2 || !Arrays.equals(buf1, buf2)) {
360          return false;
361        } else if (read1 != BUF_SIZE) {
362          return true;
363        }
364      }
365    } catch (Throwable e) {
366      throw closer.rethrow(e);
367    } finally {
368      closer.close();
369    }
370  }
371
372  /**
373   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
374   * the source will contain the concatenated data from the streams of the underlying sources.
375   *
376   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
377   * close the open underlying stream.
378   *
379   * @param sources the sources to concatenate
380   * @return a {@code ByteSource} containing the concatenated data
381   * @since 15.0
382   */
383  public static ByteSource concat(Iterable<? extends ByteSource> sources) {
384    return new ConcatenatedByteSource(sources);
385  }
386
387  /**
388   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
389   * the source will contain the concatenated data from the streams of the underlying sources.
390   *
391   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
392   * close the open underlying stream.
393   *
394   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this
395   * method is called. This will fail if the iterator is infinite and may cause problems if the
396   * iterator eagerly fetches data for each source when iterated (rather than producing sources
397   * that only load data through their streams). Prefer using the {@link #concat(Iterable)}
398   * overload if possible.
399   *
400   * @param sources the sources to concatenate
401   * @return a {@code ByteSource} containing the concatenated data
402   * @throws NullPointerException if any of {@code sources} is {@code null}
403   * @since 15.0
404   */
405  public static ByteSource concat(Iterator<? extends ByteSource> sources) {
406    return concat(ImmutableList.copyOf(sources));
407  }
408
409  /**
410   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
411   * the source will contain the concatenated data from the streams of the underlying sources.
412   *
413   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
414   * close the open underlying stream.
415   *
416   * @param sources the sources to concatenate
417   * @return a {@code ByteSource} containing the concatenated data
418   * @throws NullPointerException if any of {@code sources} is {@code null}
419   * @since 15.0
420   */
421  public static ByteSource concat(ByteSource... sources) {
422    return concat(ImmutableList.copyOf(sources));
423  }
424
425  /**
426   * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range
427   * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}.
428   *
429   * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}).
430   */
431  public static ByteSource wrap(byte[] b) {
432    return new ByteArrayByteSource(b);
433  }
434
435  /**
436   * Returns an immutable {@link ByteSource} that contains no bytes.
437   *
438   * @since 15.0
439   */
440  public static ByteSource empty() {
441    return EmptyByteSource.INSTANCE;
442  }
443
444  /**
445   * A char source that reads bytes from this source and decodes them as characters using a
446   * charset.
447   */
448  private final class AsCharSource extends CharSource {
449
450    private final Charset charset;
451
452    private AsCharSource(Charset charset) {
453      this.charset = checkNotNull(charset);
454    }
455
456    @Override
457    public Reader openStream() throws IOException {
458      return new InputStreamReader(ByteSource.this.openStream(), charset);
459    }
460
461    @Override
462    public String toString() {
463      return ByteSource.this.toString() + ".asCharSource(" + charset + ")";
464    }
465  }
466
467  /**
468   * A view of a subsection of the containing byte source.
469   */
470  private final class SlicedByteSource extends ByteSource {
471
472    final long offset;
473    final long length;
474
475    SlicedByteSource(long offset, long length) {
476      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
477      checkArgument(length >= 0, "length (%s) may not be negative", length);
478      this.offset = offset;
479      this.length = length;
480    }
481
482    @Override
483    public InputStream openStream() throws IOException {
484      return sliceStream(ByteSource.this.openStream());
485    }
486
487    @Override
488    public InputStream openBufferedStream() throws IOException {
489      return sliceStream(ByteSource.this.openBufferedStream());
490    }
491
492    private InputStream sliceStream(InputStream in) throws IOException {
493      if (offset > 0) {
494        try {
495          ByteStreams.skipFully(in, offset);
496        } catch (Throwable e) {
497          Closer closer = Closer.create();
498          closer.register(in);
499          try {
500            throw closer.rethrow(e);
501          } finally {
502            closer.close();
503          }
504        }
505      }
506      return ByteStreams.limit(in, length);
507    }
508
509    @Override
510    public ByteSource slice(long offset, long length) {
511      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
512      checkArgument(length >= 0, "length (%s) may not be negative", length);
513      long maxLength = this.length - offset;
514      return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength));
515    }
516
517    @Override
518    public boolean isEmpty() throws IOException {
519      return length == 0 || super.isEmpty();
520    }
521
522    @Override
523    public Optional<Long> sizeIfKnown() {
524      Optional<Long> unslicedSize = ByteSource.this.sizeIfKnown();
525      if (unslicedSize.isPresent()) {
526        return Optional.of(Math.min(offset + length, unslicedSize.get()) - offset);
527      }
528      return Optional.absent();
529    }
530
531    @Override
532    public String toString() {
533      return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")";
534    }
535  }
536
537  private static class ByteArrayByteSource extends ByteSource {
538
539    final byte[] bytes;
540    final int offset;
541    final int length;
542
543    ByteArrayByteSource(byte[] bytes) {
544      this(bytes, 0, bytes.length);
545    }
546
547    // NOTE: Preconditions are enforced by slice, the only non-trivial caller.
548    ByteArrayByteSource(byte[] bytes, int offset, int length) {
549      this.bytes = bytes;
550      this.offset = offset;
551      this.length = length;
552    }
553
554    @Override
555    public InputStream openStream() {
556      return new ByteArrayInputStream(bytes, offset, length);
557    }
558
559    @Override
560    public InputStream openBufferedStream() throws IOException {
561      return openStream();
562    }
563
564    @Override
565    public boolean isEmpty() {
566      return length == 0;
567    }
568
569    @Override
570    public long size() {
571      return length;
572    }
573
574    @Override
575    public Optional<Long> sizeIfKnown() {
576      return Optional.of((long) length);
577    }
578
579    @Override
580    public byte[] read() {
581      return Arrays.copyOfRange(bytes, offset, offset + length);
582    }
583
584    @Override
585    public long copyTo(OutputStream output) throws IOException {
586      output.write(bytes, offset, length);
587      return length;
588    }
589
590    @Override
591    public <T> T read(ByteProcessor<T> processor) throws IOException {
592      processor.processBytes(bytes, offset, length);
593      return processor.getResult();
594    }
595
596    @Override
597    public HashCode hash(HashFunction hashFunction) throws IOException {
598      return hashFunction.hashBytes(bytes, offset, length);
599    }
600
601    @Override
602    public ByteSource slice(long offset, long length) {
603      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
604      checkArgument(length >= 0, "length (%s) may not be negative", length);
605
606      int newOffset = this.offset + (int) Math.min(this.length, offset);
607      int endOffset = this.offset + (int) Math.min(this.length, offset + length);
608      return new ByteArrayByteSource(bytes, newOffset, endOffset - newOffset);
609    }
610
611    @Override
612    public String toString() {
613      return "ByteSource.wrap("
614          + truncate(BaseEncoding.base16().encode(bytes, offset, length), 30, "...") + ")";
615    }
616
617    /**
618     * Truncates the given character sequence to the given maximum length. If the length of the
619     * sequence is greater than {@code maxLength}, the returned string will be exactly
620     * {@code maxLength} chars in length and will end with the given {@code truncationIndicator}.
621     * Otherwise, the sequence will be returned as a string with no changes to the content.
622     *
623     * <p>Examples:
624     *
625     * <pre>   {@code
626     *   truncate("foobar", 7, "..."); // returns "foobar"
627     *   truncate("foobar", 5, "..."); // returns "fo..." }</pre>
628     *
629     * <p><b>Note:</b> This method <i>may</i> work with certain non-ASCII text but is not safe for
630     * use with arbitrary Unicode text. It is mostly intended for use with text that is known to be
631     * safe for use with it (such as all-ASCII text) and for simple debugging text. When using this
632     * method, consider the following:
633     *
634     * <ul>
635     *   <li>it may split surrogate pairs</li>
636     *   <li>it may split characters and combining characters</li>
637     *   <li>it does not consider word boundaries</li>
638     *   <li>if truncating for display to users, there are other considerations that must be taken
639     *   into account</li>
640     *   <li>the appropriate truncation indicator may be locale-dependent</li>
641     *   <li>it is safe to use non-ASCII characters in the truncation indicator</li>
642     * </ul>
643     *
644     *
645     * @throws IllegalArgumentException if {@code maxLength} is less than the length of
646     *     {@code truncationIndicator}
647     */
648    /*
649     * <p>TODO(user, cpovirk): Use Ascii.truncate once it is available in our internal copy of
650     * guava_jdk5.
651     */
652    private static String truncate(CharSequence seq, int maxLength, String truncationIndicator) {
653      checkNotNull(seq);
654
655      // length to truncate the sequence to, not including the truncation indicator
656      int truncationLength = maxLength - truncationIndicator.length();
657
658      // in this worst case, this allows a maxLength equal to the length of the truncationIndicator,
659      // meaning that a string will be truncated to just the truncation indicator itself
660      checkArgument(truncationLength >= 0,
661          "maxLength (%s) must be >= length of the truncation indicator (%s)",
662          maxLength, truncationIndicator.length());
663
664      if (seq.length() <= maxLength) {
665        String string = seq.toString();
666        if (string.length() <= maxLength) {
667          return string;
668        }
669        // if the length of the toString() result was > maxLength for some reason, truncate that
670        seq = string;
671      }
672
673      return new StringBuilder(maxLength)
674          .append(seq, 0, truncationLength)
675          .append(truncationIndicator)
676          .toString();
677    }
678  }
679
680  private static final class EmptyByteSource extends ByteArrayByteSource {
681
682    static final EmptyByteSource INSTANCE = new EmptyByteSource();
683
684    EmptyByteSource() {
685      super(new byte[0]);
686    }
687
688    @Override
689    public CharSource asCharSource(Charset charset) {
690      checkNotNull(charset);
691      return CharSource.empty();
692    }
693
694    @Override
695    public byte[] read() {
696      return bytes; // length is 0, no need to clone
697    }
698
699    @Override
700    public String toString() {
701      return "ByteSource.empty()";
702    }
703  }
704
705  private static final class ConcatenatedByteSource extends ByteSource {
706
707    final Iterable<? extends ByteSource> sources;
708
709    ConcatenatedByteSource(Iterable<? extends ByteSource> sources) {
710      this.sources = checkNotNull(sources);
711    }
712
713    @Override
714    public InputStream openStream() throws IOException {
715      return new MultiInputStream(sources.iterator());
716    }
717
718    @Override
719    public boolean isEmpty() throws IOException {
720      for (ByteSource source : sources) {
721        if (!source.isEmpty()) {
722          return false;
723        }
724      }
725      return true;
726    }
727
728    @Override
729    public Optional<Long> sizeIfKnown() {
730      long result = 0L;
731      for (ByteSource source : sources) {
732        Optional<Long> sizeIfKnown = source.sizeIfKnown();
733        if (!sizeIfKnown.isPresent()) {
734          return Optional.absent();
735        }
736        result += sizeIfKnown.get();
737      }
738      return Optional.of(result);
739    }
740
741    @Override
742    public long size() throws IOException {
743      long result = 0L;
744      for (ByteSource source : sources) {
745        result += source.size();
746      }
747      return result;
748    }
749
750    @Override
751    public String toString() {
752      return "ByteSource.concat(" + sources + ")";
753    }
754  }
755}