001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.snappy; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.PushbackInputStream; 024import java.util.Arrays; 025 026import org.apache.commons.compress.compressors.CompressorInputStream; 027import org.apache.commons.compress.utils.BoundedInputStream; 028import org.apache.commons.compress.utils.ByteUtils; 029import org.apache.commons.compress.utils.IOUtils; 030 031/** 032 * CompressorInputStream for the framing Snappy format. 033 * 034 * <p>Based on the "spec" in the version "Last revised: 2013-10-25"</p> 035 * 036 * @see <a href="https://github.com/google/snappy/blob/master/framing_format.txt">Snappy framing format description</a> 037 * @since 1.7 038 */ 039public class FramedSnappyCompressorInputStream extends CompressorInputStream { 040 041 /** 042 * package private for tests only. 043 */ 044 static final long MASK_OFFSET = 0xa282ead8L; 045 046 private static final int STREAM_IDENTIFIER_TYPE = 0xff; 047 static final int COMPRESSED_CHUNK_TYPE = 0; 048 private static final int UNCOMPRESSED_CHUNK_TYPE = 1; 049 private static final int PADDING_CHUNK_TYPE = 0xfe; 050 private static final int MIN_UNSKIPPABLE_TYPE = 2; 051 private static final int MAX_UNSKIPPABLE_TYPE = 0x7f; 052 private static final int MAX_SKIPPABLE_TYPE = 0xfd; 053 054 // used by FramedSnappyCompressorOutputStream as well 055 static final byte[] SZ_SIGNATURE = new byte[] { //NOSONAR 056 (byte) STREAM_IDENTIFIER_TYPE, // tag 057 6, 0, 0, // length 058 's', 'N', 'a', 'P', 'p', 'Y' 059 }; 060 061 /** The underlying stream to read compressed data from */ 062 private final PushbackInputStream in; 063 064 /** The dialect to expect */ 065 private final FramedSnappyDialect dialect; 066 067 private SnappyCompressorInputStream currentCompressedChunk; 068 069 // used in no-arg read method 070 private final byte[] oneByte = new byte[1]; 071 072 private boolean endReached, inUncompressedChunk; 073 074 private int uncompressedBytesRemaining; 075 private long expectedChecksum = -1; 076 private final int blockSize; 077 private final PureJavaCrc32C checksum = new PureJavaCrc32C(); 078 079 private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() { 080 @Override 081 public int getAsByte() throws IOException { 082 return readOneByte(); 083 } 084 }; 085 086 /** 087 * Constructs a new input stream that decompresses 088 * snappy-framed-compressed data from the specified input stream 089 * using the {@link FramedSnappyDialect#STANDARD} dialect. 090 * @param in the InputStream from which to read the compressed data 091 * @throws IOException if reading fails 092 */ 093 public FramedSnappyCompressorInputStream(final InputStream in) throws IOException { 094 this(in, FramedSnappyDialect.STANDARD); 095 } 096 097 /** 098 * Constructs a new input stream that decompresses snappy-framed-compressed data 099 * from the specified input stream. 100 * @param in the InputStream from which to read the compressed data 101 * @param dialect the dialect used by the compressed stream 102 * @throws IOException if reading fails 103 */ 104 public FramedSnappyCompressorInputStream(final InputStream in, 105 final FramedSnappyDialect dialect) 106 throws IOException { 107 this(in, SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE, dialect); 108 } 109 110 /** 111 * Constructs a new input stream that decompresses snappy-framed-compressed data 112 * from the specified input stream. 113 * @param in the InputStream from which to read the compressed data 114 * @param blockSize the block size to use for the compressed stream 115 * @param dialect the dialect used by the compressed stream 116 * @throws IOException if reading fails 117 * @since 1.14 118 */ 119 public FramedSnappyCompressorInputStream(final InputStream in, 120 final int blockSize, 121 final FramedSnappyDialect dialect) 122 throws IOException { 123 this.in = new PushbackInputStream(in, 1); 124 this.blockSize = blockSize; 125 this.dialect = dialect; 126 if (dialect.hasStreamIdentifier()) { 127 readStreamIdentifier(); 128 } 129 } 130 131 /** {@inheritDoc} */ 132 @Override 133 public int read() throws IOException { 134 return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; 135 } 136 137 /** {@inheritDoc} */ 138 @Override 139 public void close() throws IOException { 140 if (currentCompressedChunk != null) { 141 currentCompressedChunk.close(); 142 currentCompressedChunk = null; 143 } 144 in.close(); 145 } 146 147 /** {@inheritDoc} */ 148 @Override 149 public int read(final byte[] b, final int off, final int len) throws IOException { 150 int read = readOnce(b, off, len); 151 if (read == -1) { 152 readNextBlock(); 153 if (endReached) { 154 return -1; 155 } 156 read = readOnce(b, off, len); 157 } 158 return read; 159 } 160 161 /** {@inheritDoc} */ 162 @Override 163 public int available() throws IOException { 164 if (inUncompressedChunk) { 165 return Math.min(uncompressedBytesRemaining, 166 in.available()); 167 } else if (currentCompressedChunk != null) { 168 return currentCompressedChunk.available(); 169 } 170 return 0; 171 } 172 173 /** 174 * Read from the current chunk into the given array. 175 * 176 * @return -1 if there is no current chunk or the number of bytes 177 * read from the current chunk (which may be -1 if the end of the 178 * chunk is reached). 179 */ 180 private int readOnce(final byte[] b, final int off, final int len) throws IOException { 181 int read = -1; 182 if (inUncompressedChunk) { 183 final int amount = Math.min(uncompressedBytesRemaining, len); 184 if (amount == 0) { 185 return -1; 186 } 187 read = in.read(b, off, amount); 188 if (read != -1) { 189 uncompressedBytesRemaining -= read; 190 count(read); 191 } 192 } else if (currentCompressedChunk != null) { 193 final long before = currentCompressedChunk.getBytesRead(); 194 read = currentCompressedChunk.read(b, off, len); 195 if (read == -1) { 196 currentCompressedChunk.close(); 197 currentCompressedChunk = null; 198 } else { 199 count(currentCompressedChunk.getBytesRead() - before); 200 } 201 } 202 if (read > 0) { 203 checksum.update(b, off, read); 204 } 205 return read; 206 } 207 208 private void readNextBlock() throws IOException { 209 verifyLastChecksumAndReset(); 210 inUncompressedChunk = false; 211 final int type = readOneByte(); 212 if (type == -1) { 213 endReached = true; 214 } else if (type == STREAM_IDENTIFIER_TYPE) { 215 in.unread(type); 216 pushedBackBytes(1); 217 readStreamIdentifier(); 218 readNextBlock(); 219 } else if (type == PADDING_CHUNK_TYPE 220 || (type > MAX_UNSKIPPABLE_TYPE && type <= MAX_SKIPPABLE_TYPE)) { 221 skipBlock(); 222 readNextBlock(); 223 } else if (type >= MIN_UNSKIPPABLE_TYPE && type <= MAX_UNSKIPPABLE_TYPE) { 224 throw new IOException("unskippable chunk with type " + type 225 + " (hex " + Integer.toHexString(type) + ")" 226 + " detected."); 227 } else if (type == UNCOMPRESSED_CHUNK_TYPE) { 228 inUncompressedChunk = true; 229 uncompressedBytesRemaining = readSize() - 4 /* CRC */; 230 expectedChecksum = unmask(readCrc()); 231 } else if (type == COMPRESSED_CHUNK_TYPE) { 232 final boolean expectChecksum = dialect.usesChecksumWithCompressedChunks(); 233 final long size = readSize() - (expectChecksum ? 4L : 0L); 234 if (expectChecksum) { 235 expectedChecksum = unmask(readCrc()); 236 } else { 237 expectedChecksum = -1; 238 } 239 currentCompressedChunk = 240 new SnappyCompressorInputStream(new BoundedInputStream(in, size), blockSize); 241 // constructor reads uncompressed size 242 count(currentCompressedChunk.getBytesRead()); 243 } else { 244 // impossible as all potential byte values have been covered 245 throw new IOException("unknown chunk type " + type 246 + " detected."); 247 } 248 } 249 250 private long readCrc() throws IOException { 251 final byte[] b = new byte[4]; 252 final int read = IOUtils.readFully(in, b); 253 count(read); 254 if (read != 4) { 255 throw new IOException("premature end of stream"); 256 } 257 return ByteUtils.fromLittleEndian(b); 258 } 259 260 static long unmask(long x) { 261 // ugly, maybe we should just have used ints and deal with the 262 // overflow 263 x -= MASK_OFFSET; 264 x &= 0xffffFFFFL; 265 return ((x >> 17) | (x << 15)) & 0xffffFFFFL; 266 } 267 268 private int readSize() throws IOException { 269 return (int) ByteUtils.fromLittleEndian(supplier, 3); 270 } 271 272 private void skipBlock() throws IOException { 273 final int size = readSize(); 274 final long read = IOUtils.skip(in, size); 275 count(read); 276 if (read != size) { 277 throw new IOException("premature end of stream"); 278 } 279 } 280 281 private void readStreamIdentifier() throws IOException { 282 final byte[] b = new byte[10]; 283 final int read = IOUtils.readFully(in, b); 284 count(read); 285 if (10 != read || !matches(b, 10)) { 286 throw new IOException("Not a framed Snappy stream"); 287 } 288 } 289 290 private int readOneByte() throws IOException { 291 final int b = in.read(); 292 if (b != -1) { 293 count(1); 294 return b & 0xFF; 295 } 296 return -1; 297 } 298 299 private void verifyLastChecksumAndReset() throws IOException { 300 if (expectedChecksum >= 0 && expectedChecksum != checksum.getValue()) { 301 throw new IOException("Checksum verification failed"); 302 } 303 expectedChecksum = -1; 304 checksum.reset(); 305 } 306 307 /** 308 * Checks if the signature matches what is expected for a .sz file. 309 * 310 * <p>.sz files start with a chunk with tag 0xff and content sNaPpY.</p> 311 * 312 * @param signature the bytes to check 313 * @param length the number of bytes to check 314 * @return true if this is a .sz stream, false otherwise 315 */ 316 public static boolean matches(final byte[] signature, final int length) { 317 318 if (length < SZ_SIGNATURE.length) { 319 return false; 320 } 321 322 byte[] shortenedSig = signature; 323 if (signature.length > SZ_SIGNATURE.length) { 324 shortenedSig = new byte[SZ_SIGNATURE.length]; 325 System.arraycopy(signature, 0, shortenedSig, 0, SZ_SIGNATURE.length); 326 } 327 328 return Arrays.equals(shortenedSig, SZ_SIGNATURE); 329 } 330 331}