001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.util.Arrays;
024
025import org.apache.commons.compress.compressors.CompressorInputStream;
026import org.apache.commons.compress.utils.BoundedInputStream;
027import org.apache.commons.compress.utils.ByteUtils;
028import org.apache.commons.compress.utils.ChecksumCalculatingInputStream;
029import org.apache.commons.compress.utils.CountingInputStream;
030import org.apache.commons.compress.utils.IOUtils;
031import org.apache.commons.compress.utils.InputStreamStatistics;
032
033/**
034 * CompressorInputStream for the LZ4 frame format.
035 *
036 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p>
037 *
038 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
039 * @since 1.14
040 * @NotThreadSafe
041 */
042public class FramedLZ4CompressorInputStream extends CompressorInputStream
043    implements InputStreamStatistics {
044
045    // used by FramedLZ4CompressorOutputStream as well
046    static final byte[] LZ4_SIGNATURE = new byte[] { //NOSONAR
047        4, 0x22, 0x4d, 0x18
048    };
049    private static final byte[] SKIPPABLE_FRAME_TRAILER = new byte[] {
050        0x2a, 0x4d, 0x18
051    };
052    private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50;
053
054    static final int VERSION_MASK = 0xC0;
055    static final int SUPPORTED_VERSION = 0x40;
056    static final int BLOCK_INDEPENDENCE_MASK = 0x20;
057    static final int BLOCK_CHECKSUM_MASK = 0x10;
058    static final int CONTENT_SIZE_MASK = 0x08;
059    static final int CONTENT_CHECKSUM_MASK = 0x04;
060    static final int BLOCK_MAX_SIZE_MASK = 0x70;
061    static final int UNCOMPRESSED_FLAG_MASK = 0x80000000;
062
063    // used in no-arg read method
064    private final byte[] oneByte = new byte[1];
065
066    private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() {
067        @Override
068        public int getAsByte() throws IOException {
069            return readOneByte();
070        }
071    };
072
073    private final CountingInputStream inputStream;
074    private final boolean decompressConcatenated;
075
076    private boolean expectBlockChecksum;
077    private boolean expectBlockDependency;
078    private boolean expectContentSize;
079    private boolean expectContentChecksum;
080
081    private InputStream currentBlock;
082    private boolean endReached, inUncompressed;
083
084    // used for frame header checksum and content checksum, if present
085    private final XXHash32 contentHash = new XXHash32();
086
087    // used for block checksum, if present
088    private final XXHash32 blockHash = new XXHash32();
089
090    // only created if the frame doesn't set the block independence flag
091    private byte[] blockDependencyBuffer;
092
093    /**
094     * Creates a new input stream that decompresses streams compressed
095     * using the LZ4 frame format and stops after decompressing the
096     * first frame.
097     * @param in  the InputStream from which to read the compressed data
098     * @throws IOException if reading fails
099     */
100    public FramedLZ4CompressorInputStream(InputStream in) throws IOException {
101        this(in, false);
102    }
103
104    /**
105     * Creates a new input stream that decompresses streams compressed
106     * using the LZ4 frame format.
107     * @param in  the InputStream from which to read the compressed data
108     * @param decompressConcatenated if true, decompress until the end
109     *          of the input; if false, stop after the first LZ4 frame
110     *          and leave the input position to point to the next byte
111     *          after the frame stream
112     * @throws IOException if reading fails
113     */
114    public FramedLZ4CompressorInputStream(InputStream in, boolean decompressConcatenated) throws IOException {
115        this.inputStream = new CountingInputStream(in);
116        this.decompressConcatenated = decompressConcatenated;
117        init(true);
118    }
119
120    /** {@inheritDoc} */
121    @Override
122    public int read() throws IOException {
123        return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
124    }
125
126    /** {@inheritDoc} */
127    @Override
128    public void close() throws IOException {
129        try {
130            if (currentBlock != null) {
131                currentBlock.close();
132                currentBlock = null;
133            }
134        } finally {
135            inputStream.close();
136        }
137    }
138
139    /** {@inheritDoc} */
140    @Override
141    public int read(final byte[] b, final int off, final int len) throws IOException {
142        if (len == 0) {
143            return 0;
144        }
145        if (endReached) {
146            return -1;
147        }
148        int r = readOnce(b, off, len);
149        if (r == -1) {
150            nextBlock();
151            if (!endReached) {
152                r = readOnce(b, off, len);
153            }
154        }
155        if (r != -1) {
156            if (expectBlockDependency) {
157                appendToBlockDependencyBuffer(b, off, r);
158            }
159            if (expectContentChecksum) {
160                contentHash.update(b, off, r);
161            }
162        }
163        return r;
164    }
165
166    /**
167     * @since 1.17
168     */
169    @Override
170    public long getCompressedCount() {
171        return inputStream.getBytesRead();
172    }
173
174    private void init(boolean firstFrame) throws IOException {
175        if (readSignature(firstFrame)) {
176            readFrameDescriptor();
177            nextBlock();
178        }
179    }
180
181    private boolean readSignature(boolean firstFrame) throws IOException {
182        String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage";
183        final byte[] b = new byte[4];
184        int read = IOUtils.readFully(inputStream, b);
185        count(read);
186        if (0 == read && !firstFrame) {
187            // good LZ4 frame and nothing after it
188            endReached = true;
189            return false;
190        }
191        if (4 != read) {
192            throw new IOException(garbageMessage);
193        }
194
195        read = skipSkippableFrame(b);
196        if (0 == read && !firstFrame) {
197            // good LZ4 frame with only some skippable frames after it
198            endReached = true;
199            return false;
200        }
201        if (4 != read || !matches(b, 4)) {
202            throw new IOException(garbageMessage);
203        }
204        return true;
205    }
206
207    private void readFrameDescriptor() throws IOException {
208        int flags = readOneByte();
209        if (flags == -1) {
210            throw new IOException("Premature end of stream while reading frame flags");
211        }
212        contentHash.update(flags);
213        if ((flags & VERSION_MASK) != SUPPORTED_VERSION) {
214            throw new IOException("Unsupported version " + (flags >> 6));
215        }
216        expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0;
217        if (expectBlockDependency) {
218            if (blockDependencyBuffer == null) {
219                blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE];
220            }
221        } else {
222            blockDependencyBuffer = null;
223        }
224        expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0;
225        expectContentSize = (flags & CONTENT_SIZE_MASK) != 0;
226        expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0;
227        int bdByte = readOneByte();
228        if (bdByte == -1) { // max size is irrelevant for this implementation
229            throw new IOException("Premature end of stream while reading frame BD byte");
230        }
231        contentHash.update(bdByte);
232        if (expectContentSize) { // for now we don't care, contains the uncompressed size
233            byte[] contentSize = new byte[8];
234            int skipped = IOUtils.readFully(inputStream, contentSize);
235            count(skipped);
236            if (8 != skipped) {
237                throw new IOException("Premature end of stream while reading content size");
238            }
239            contentHash.update(contentSize, 0, contentSize.length);
240        }
241        int headerHash = readOneByte();
242        if (headerHash == -1) { // partial hash of header.
243            throw new IOException("Premature end of stream while reading frame header checksum");
244        }
245        int expectedHash = (int) ((contentHash.getValue() >> 8) & 0xff);
246        contentHash.reset();
247        if (headerHash != expectedHash) {
248            throw new IOException("Frame header checksum mismatch");
249        }
250    }
251
252    private void nextBlock() throws IOException {
253        maybeFinishCurrentBlock();
254        long len = ByteUtils.fromLittleEndian(supplier, 4);
255        boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0;
256        int realLen = (int) (len & (~UNCOMPRESSED_FLAG_MASK));
257        if (realLen < 0) {
258            throw new IOException("Found illegal block with negative size");
259        }
260        if (realLen == 0) {
261            verifyContentChecksum();
262            if (!decompressConcatenated) {
263                endReached = true;
264            } else {
265                init(false);
266            }
267            return;
268        }
269        InputStream capped = new BoundedInputStream(inputStream, realLen);
270        if (expectBlockChecksum) {
271            capped = new ChecksumCalculatingInputStream(blockHash, capped);
272        }
273        if (uncompressed) {
274            inUncompressed = true;
275            currentBlock = capped;
276        } else {
277            inUncompressed = false;
278            BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped);
279            if (expectBlockDependency) {
280                s.prefill(blockDependencyBuffer);
281            }
282            currentBlock = s;
283        }
284    }
285
286    private void maybeFinishCurrentBlock() throws IOException {
287        if (currentBlock != null) {
288            currentBlock.close();
289            currentBlock = null;
290            if (expectBlockChecksum) {
291                verifyChecksum(blockHash, "block");
292                blockHash.reset();
293            }
294        }
295    }
296
297    private void verifyContentChecksum() throws IOException {
298        if (expectContentChecksum) {
299            verifyChecksum(contentHash, "content");
300        }
301        contentHash.reset();
302    }
303
304    private void verifyChecksum(XXHash32 hash, String kind) throws IOException {
305        byte[] checksum = new byte[4];
306        int read = IOUtils.readFully(inputStream, checksum);
307        count(read);
308        if (4 != read) {
309            throw new IOException("Premature end of stream while reading " + kind + " checksum");
310        }
311        long expectedHash = hash.getValue();
312        if (expectedHash != ByteUtils.fromLittleEndian(checksum)) {
313            throw new IOException(kind + " checksum mismatch.");
314        }
315    }
316
317    private int readOneByte() throws IOException {
318        final int b = inputStream.read();
319        if (b != -1) {
320            count(1);
321            return b & 0xFF;
322        }
323        return -1;
324    }
325
326    private int readOnce(byte[] b, int off, int len) throws IOException {
327        if (inUncompressed) {
328            int cnt = currentBlock.read(b, off, len);
329            count(cnt);
330            return cnt;
331        }
332        BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock;
333        long before = l.getBytesRead();
334        int cnt = currentBlock.read(b, off, len);
335        count(l.getBytesRead() - before);
336        return cnt;
337    }
338
339    private static boolean isSkippableFrameSignature(byte[] b) {
340        if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) {
341            return false;
342        }
343        for (int i = 1; i < 4; i++) {
344            if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) {
345                return false;
346            }
347        }
348        return true;
349    }
350
351    /**
352     * Skips over the contents of a skippable frame as well as
353     * skippable frames following it.
354     *
355     * <p>It then tries to read four more bytes which are supposed to
356     * hold an LZ4 signature and returns the number of bytes read
357     * while storing the bytes in the given array.</p>
358     */
359    private int skipSkippableFrame(byte[] b) throws IOException {
360        int read = 4;
361        while (read == 4 && isSkippableFrameSignature(b)) {
362            final long len = ByteUtils.fromLittleEndian(supplier, 4);
363            if (len < 0) {
364                throw new IOException("Found illegal skippable frame with negative size");
365            }
366            long skipped = IOUtils.skip(inputStream, len);
367            count(skipped);
368            if (len != skipped) {
369                throw new IOException("Premature end of stream while skipping frame");
370            }
371            read = IOUtils.readFully(inputStream, b);
372            count(read);
373        }
374        return read;
375    }
376
377    private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
378        len = Math.min(len, blockDependencyBuffer.length);
379        if (len > 0) {
380            int keep = blockDependencyBuffer.length - len;
381            if (keep > 0) {
382                // move last keep bytes towards the start of the buffer
383                System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
384            }
385            // append new data
386            System.arraycopy(b, off, blockDependencyBuffer, keep, len);
387        }
388    }
389
390    /**
391     * Checks if the signature matches what is expected for a .lz4 file.
392     *
393     * <p>.lz4 files start with a four byte signature.</p>
394     *
395     * @param signature the bytes to check
396     * @param length    the number of bytes to check
397     * @return          true if this is a .sz stream, false otherwise
398     */
399    public static boolean matches(final byte[] signature, final int length) {
400
401        if (length < LZ4_SIGNATURE.length) {
402            return false;
403        }
404
405        byte[] shortenedSig = signature;
406        if (signature.length > LZ4_SIGNATURE.length) {
407            shortenedSig = new byte[LZ4_SIGNATURE.length];
408            System.arraycopy(signature, 0, shortenedSig, 0, LZ4_SIGNATURE.length);
409        }
410
411        return Arrays.equals(shortenedSig, LZ4_SIGNATURE);
412    }
413}