001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.snappy;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.PushbackInputStream;
024import java.util.Arrays;
025
026import org.apache.commons.compress.compressors.CompressorInputStream;
027import org.apache.commons.compress.utils.BoundedInputStream;
028import org.apache.commons.compress.utils.ByteUtils;
029import org.apache.commons.compress.utils.IOUtils;
030
031/**
032 * CompressorInputStream for the framing Snappy format.
033 *
034 * <p>Based on the "spec" in the version "Last revised: 2013-10-25"</p>
035 *
036 * @see <a href="https://github.com/google/snappy/blob/master/framing_format.txt">Snappy framing format description</a>
037 * @since 1.7
038 */
039public class FramedSnappyCompressorInputStream extends CompressorInputStream {
040
041    /**
042     * package private for tests only.
043     */
044    static final long MASK_OFFSET = 0xa282ead8L;
045
046    private static final int STREAM_IDENTIFIER_TYPE = 0xff;
047    static final int COMPRESSED_CHUNK_TYPE = 0;
048    private static final int UNCOMPRESSED_CHUNK_TYPE = 1;
049    private static final int PADDING_CHUNK_TYPE = 0xfe;
050    private static final int MIN_UNSKIPPABLE_TYPE = 2;
051    private static final int MAX_UNSKIPPABLE_TYPE = 0x7f;
052    private static final int MAX_SKIPPABLE_TYPE = 0xfd;
053
054    // used by FramedSnappyCompressorOutputStream as well
055    static final byte[] SZ_SIGNATURE = new byte[] { //NOSONAR
056        (byte) STREAM_IDENTIFIER_TYPE, // tag
057        6, 0, 0, // length
058        's', 'N', 'a', 'P', 'p', 'Y'
059    };
060
061    /** The underlying stream to read compressed data from */
062    private final PushbackInputStream in;
063
064    /** The dialect to expect */
065    private final FramedSnappyDialect dialect;
066
067    private SnappyCompressorInputStream currentCompressedChunk;
068
069    // used in no-arg read method
070    private final byte[] oneByte = new byte[1];
071
072    private boolean endReached, inUncompressedChunk;
073
074    private int uncompressedBytesRemaining;
075    private long expectedChecksum = -1;
076    private final int blockSize;
077    private final PureJavaCrc32C checksum = new PureJavaCrc32C();
078
079    private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() {
080        @Override
081        public int getAsByte() throws IOException {
082            return readOneByte();
083        }
084    };
085
086    /**
087     * Constructs a new input stream that decompresses
088     * snappy-framed-compressed data from the specified input stream
089     * using the {@link FramedSnappyDialect#STANDARD} dialect.
090     * @param in  the InputStream from which to read the compressed data
091     * @throws IOException if reading fails
092     */
093    public FramedSnappyCompressorInputStream(final InputStream in) throws IOException {
094        this(in, FramedSnappyDialect.STANDARD);
095    }
096
097    /**
098     * Constructs a new input stream that decompresses snappy-framed-compressed data
099     * from the specified input stream.
100     * @param in  the InputStream from which to read the compressed data
101     * @param dialect the dialect used by the compressed stream
102     * @throws IOException if reading fails
103     */
104    public FramedSnappyCompressorInputStream(final InputStream in,
105                                             final FramedSnappyDialect dialect)
106        throws IOException {
107        this(in, SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE, dialect);
108    }
109
110    /**
111     * Constructs a new input stream that decompresses snappy-framed-compressed data
112     * from the specified input stream.
113     * @param in  the InputStream from which to read the compressed data
114     * @param blockSize the block size to use for the compressed stream
115     * @param dialect the dialect used by the compressed stream
116     * @throws IOException if reading fails
117     * @since 1.14
118     */
119    public FramedSnappyCompressorInputStream(final InputStream in,
120                                             final int blockSize,
121                                             final FramedSnappyDialect dialect)
122        throws IOException {
123        this.in = new PushbackInputStream(in, 1);
124        this.blockSize = blockSize;
125        this.dialect = dialect;
126        if (dialect.hasStreamIdentifier()) {
127            readStreamIdentifier();
128        }
129    }
130
131    /** {@inheritDoc} */
132    @Override
133    public int read() throws IOException {
134        return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
135    }
136
137    /** {@inheritDoc} */
138    @Override
139    public void close() throws IOException {
140        if (currentCompressedChunk != null) {
141            currentCompressedChunk.close();
142            currentCompressedChunk = null;
143        }
144        in.close();
145    }
146
147    /** {@inheritDoc} */
148    @Override
149    public int read(final byte[] b, final int off, final int len) throws IOException {
150        int read = readOnce(b, off, len);
151        if (read == -1) {
152            readNextBlock();
153            if (endReached) {
154                return -1;
155            }
156            read = readOnce(b, off, len);
157        }
158        return read;
159    }
160
161    /** {@inheritDoc} */
162    @Override
163    public int available() throws IOException {
164        if (inUncompressedChunk) {
165            return Math.min(uncompressedBytesRemaining,
166                            in.available());
167        } else if (currentCompressedChunk != null) {
168            return currentCompressedChunk.available();
169        }
170        return 0;
171    }
172
173    /**
174     * Read from the current chunk into the given array.
175     *
176     * @return -1 if there is no current chunk or the number of bytes
177     * read from the current chunk (which may be -1 if the end of the
178     * chunk is reached).
179     */
180    private int readOnce(final byte[] b, final int off, final int len) throws IOException {
181        int read = -1;
182        if (inUncompressedChunk) {
183            final int amount = Math.min(uncompressedBytesRemaining, len);
184            if (amount == 0) {
185                return -1;
186            }
187            read = in.read(b, off, amount);
188            if (read != -1) {
189                uncompressedBytesRemaining -= read;
190                count(read);
191            }
192        } else if (currentCompressedChunk != null) {
193            final long before = currentCompressedChunk.getBytesRead();
194            read = currentCompressedChunk.read(b, off, len);
195            if (read == -1) {
196                currentCompressedChunk.close();
197                currentCompressedChunk = null;
198            } else {
199                count(currentCompressedChunk.getBytesRead() - before);
200            }
201        }
202        if (read > 0) {
203            checksum.update(b, off, read);
204        }
205        return read;
206    }
207
208    private void readNextBlock() throws IOException {
209        verifyLastChecksumAndReset();
210        inUncompressedChunk = false;
211        final int type = readOneByte();
212        if (type == -1) {
213            endReached = true;
214        } else if (type == STREAM_IDENTIFIER_TYPE) {
215            in.unread(type);
216            pushedBackBytes(1);
217            readStreamIdentifier();
218            readNextBlock();
219        } else if (type == PADDING_CHUNK_TYPE
220                   || (type > MAX_UNSKIPPABLE_TYPE && type <= MAX_SKIPPABLE_TYPE)) {
221            skipBlock();
222            readNextBlock();
223        } else if (type >= MIN_UNSKIPPABLE_TYPE && type <= MAX_UNSKIPPABLE_TYPE) {
224            throw new IOException("unskippable chunk with type " + type
225                                  + " (hex " + Integer.toHexString(type) + ")"
226                                  + " detected.");
227        } else if (type == UNCOMPRESSED_CHUNK_TYPE) {
228            inUncompressedChunk = true;
229            uncompressedBytesRemaining = readSize() - 4 /* CRC */;
230            expectedChecksum = unmask(readCrc());
231        } else if (type == COMPRESSED_CHUNK_TYPE) {
232            final boolean expectChecksum = dialect.usesChecksumWithCompressedChunks();
233            final long size = readSize() - (expectChecksum ? 4L : 0L);
234            if (expectChecksum) {
235                expectedChecksum = unmask(readCrc());
236            } else {
237                expectedChecksum = -1;
238            }
239            currentCompressedChunk =
240                new SnappyCompressorInputStream(new BoundedInputStream(in, size), blockSize);
241            // constructor reads uncompressed size
242            count(currentCompressedChunk.getBytesRead());
243        } else {
244            // impossible as all potential byte values have been covered
245            throw new IOException("unknown chunk type " + type
246                                  + " detected.");
247        }
248    }
249
250    private long readCrc() throws IOException {
251        final byte[] b = new byte[4];
252        final int read = IOUtils.readFully(in, b);
253        count(read);
254        if (read != 4) {
255            throw new IOException("premature end of stream");
256        }
257        return ByteUtils.fromLittleEndian(b);
258    }
259
260    static long unmask(long x) {
261        // ugly, maybe we should just have used ints and deal with the
262        // overflow
263        x -= MASK_OFFSET;
264        x &= 0xffffFFFFL;
265        return ((x >> 17) | (x << 15)) & 0xffffFFFFL;
266    }
267
268    private int readSize() throws IOException {
269        return (int) ByteUtils.fromLittleEndian(supplier, 3);
270    }
271
272    private void skipBlock() throws IOException {
273        final int size = readSize();
274        final long read = IOUtils.skip(in, size);
275        count(read);
276        if (read != size) {
277            throw new IOException("premature end of stream");
278        }
279    }
280
281    private void readStreamIdentifier() throws IOException {
282        final byte[] b = new byte[10];
283        final int read = IOUtils.readFully(in, b);
284        count(read);
285        if (10 != read || !matches(b, 10)) {
286            throw new IOException("Not a framed Snappy stream");
287        }
288    }
289
290    private int readOneByte() throws IOException {
291        final int b = in.read();
292        if (b != -1) {
293            count(1);
294            return b & 0xFF;
295        }
296        return -1;
297    }
298
299    private void verifyLastChecksumAndReset() throws IOException {
300        if (expectedChecksum >= 0 && expectedChecksum != checksum.getValue()) {
301            throw new IOException("Checksum verification failed");
302        }
303        expectedChecksum = -1;
304        checksum.reset();
305    }
306
307    /**
308     * Checks if the signature matches what is expected for a .sz file.
309     *
310     * <p>.sz files start with a chunk with tag 0xff and content sNaPpY.</p>
311     *
312     * @param signature the bytes to check
313     * @param length    the number of bytes to check
314     * @return          true if this is a .sz stream, false otherwise
315     */
316    public static boolean matches(final byte[] signature, final int length) {
317
318        if (length < SZ_SIGNATURE.length) {
319            return false;
320        }
321
322        byte[] shortenedSig = signature;
323        if (signature.length > SZ_SIGNATURE.length) {
324            shortenedSig = new byte[SZ_SIGNATURE.length];
325            System.arraycopy(signature, 0, shortenedSig, 0, SZ_SIGNATURE.length);
326        }
327
328        return Arrays.equals(shortenedSig, SZ_SIGNATURE);
329    }
330
331}