001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026import org.apache.commons.compress.utils.ByteUtils; 027 028/** 029 * CompressorOutputStream for the LZ4 frame format. 030 * 031 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> 032 * 033 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 034 * @since 1.14 035 * @NotThreadSafe 036 */ 037public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { 038 039 private static final byte[] END_MARK = new byte[4]; 040 041 // used in one-arg write method 042 private final byte[] oneByte = new byte[1]; 043 044 private final byte[] blockData; 045 private final OutputStream out; 046 private final Parameters params; 047 private boolean finished = false; 048 private int currentIndex = 0; 049 050 // used for frame header checksum and content checksum, if requested 051 private final XXHash32 contentHash = new XXHash32(); 052 // used for block checksum, if requested 053 private final XXHash32 blockHash; 054 055 // only created if the config requires block dependency 056 private byte[] blockDependencyBuffer; 057 private int collectedBlockDependencyBytes; 058 059 /** 060 * The block sizes supported by the format. 061 */ 062 public enum BlockSize { 063 /** Block size of 64K */ 064 K64(64 * 1024, 4), 065 /** Block size of 256K */ 066 K256(256 * 1024, 5), 067 /** Block size of 1M */ 068 M1(1024 * 1024, 6), 069 /** Block size of 4M */ 070 M4(4096 * 1024, 7); 071 072 private final int size, index; 073 BlockSize(int size, int index) { 074 this.size = size; 075 this.index = index; 076 } 077 int getSize() { 078 return size; 079 } 080 int getIndex() { 081 return index; 082 } 083 } 084 085 /** 086 * Parameters of the LZ4 frame format. 087 */ 088 public static class Parameters { 089 private final BlockSize blockSize; 090 private final boolean withContentChecksum, withBlockChecksum, withBlockDependency; 091 private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params; 092 093 /** 094 * The default parameters of 4M block size, enabled content 095 * checksum, disabled block checksums and independent blocks. 096 * 097 * <p>This matches the defaults of the lz4 command line utility.</p> 098 */ 099 public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false); 100 101 /** 102 * Sets up custom a custom block size for the LZ4 stream but 103 * otherwise uses the defaults of enabled content checksum, 104 * disabled block checksums and independent blocks. 105 * @param blockSize the size of a single block. 106 */ 107 public Parameters(BlockSize blockSize) { 108 this(blockSize, true, false, false); 109 } 110 /** 111 * Sets up custom a custom block size for the LZ4 stream but 112 * otherwise uses the defaults of enabled content checksum, 113 * disabled block checksums and independent blocks. 114 * @param blockSize the size of a single block. 115 * @param lz77params parameters used to fine-tune compression, 116 * in particular to balance compression ratio vs compression 117 * speed. 118 */ 119 public Parameters(BlockSize blockSize, 120 org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 121 this(blockSize, true, false, false, lz77params); 122 } 123 /** 124 * Sets up custom parameters for the LZ4 stream. 125 * @param blockSize the size of a single block. 126 * @param withContentChecksum whether to write a content checksum 127 * @param withBlockChecksum whether to write a block checksum. 128 * Note that block checksums are not supported by the lz4 129 * command line utility 130 * @param withBlockDependency whether a block may depend on 131 * the content of a previous block. Enabling this may improve 132 * compression ratio but makes it impossible to decompress the 133 * output in parallel. 134 */ 135 public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum, 136 boolean withBlockDependency) { 137 this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, 138 BlockLZ4CompressorOutputStream.createParameterBuilder().build()); 139 } 140 141 /** 142 * Sets up custom parameters for the LZ4 stream. 143 * @param blockSize the size of a single block. 144 * @param withContentChecksum whether to write a content checksum 145 * @param withBlockChecksum whether to write a block checksum. 146 * Note that block checksums are not supported by the lz4 147 * command line utility 148 * @param withBlockDependency whether a block may depend on 149 * the content of a previous block. Enabling this may improve 150 * compression ratio but makes it impossible to decompress the 151 * output in parallel. 152 * @param lz77params parameters used to fine-tune compression, 153 * in particular to balance compression ratio vs compression 154 * speed. 155 */ 156 public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum, 157 boolean withBlockDependency, 158 org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 159 this.blockSize = blockSize; 160 this.withContentChecksum = withContentChecksum; 161 this.withBlockChecksum = withBlockChecksum; 162 this.withBlockDependency = withBlockDependency; 163 this.lz77params = lz77params; 164 } 165 166 @Override 167 public String toString() { 168 return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum 169 + ", withBlockChecksum " + withBlockChecksum + ", withBlockDependency " + withBlockDependency; 170 } 171 } 172 173 /** 174 * Constructs a new output stream that compresses data using the 175 * LZ4 frame format using the default block size of 4MB. 176 * @param out the OutputStream to which to write the compressed data 177 * @throws IOException if writing the signature fails 178 */ 179 public FramedLZ4CompressorOutputStream(OutputStream out) throws IOException { 180 this(out, Parameters.DEFAULT); 181 } 182 183 /** 184 * Constructs a new output stream that compresses data using the 185 * LZ4 frame format using the given block size. 186 * @param out the OutputStream to which to write the compressed data 187 * @param params the parameters to use 188 * @throws IOException if writing the signature fails 189 */ 190 public FramedLZ4CompressorOutputStream(OutputStream out, Parameters params) throws IOException { 191 this.params = params; 192 blockData = new byte[params.blockSize.getSize()]; 193 this.out = out; 194 blockHash = params.withBlockChecksum ? new XXHash32() : null; 195 out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE); 196 writeFrameDescriptor(); 197 blockDependencyBuffer = params.withBlockDependency 198 ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] 199 : null; 200 } 201 202 @Override 203 public void write(int b) throws IOException { 204 oneByte[0] = (byte) (b & 0xff); 205 write(oneByte); 206 } 207 208 @Override 209 public void write(byte[] data, int off, int len) throws IOException { 210 if (params.withContentChecksum) { 211 contentHash.update(data, off, len); 212 } 213 if (currentIndex + len > blockData.length) { 214 flushBlock(); 215 while (len > blockData.length) { 216 System.arraycopy(data, off, blockData, 0, blockData.length); 217 off += blockData.length; 218 len -= blockData.length; 219 currentIndex = blockData.length; 220 flushBlock(); 221 } 222 } 223 System.arraycopy(data, off, blockData, currentIndex, len); 224 currentIndex += len; 225 } 226 227 @Override 228 public void close() throws IOException { 229 try { 230 finish(); 231 } finally { 232 out.close(); 233 } 234 } 235 236 /** 237 * Compresses all remaining data and writes it to the stream, 238 * doesn't close the underlying stream. 239 * @throws IOException if an error occurs 240 */ 241 public void finish() throws IOException { 242 if (!finished) { 243 if (currentIndex > 0) { 244 flushBlock(); 245 } 246 writeTrailer(); 247 finished = true; 248 } 249 } 250 251 private void writeFrameDescriptor() throws IOException { 252 int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION; 253 if (!params.withBlockDependency) { 254 flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK; 255 } 256 if (params.withContentChecksum) { 257 flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK; 258 } 259 if (params.withBlockChecksum) { 260 flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK; 261 } 262 out.write(flags); 263 contentHash.update(flags); 264 int bd = (params.blockSize.getIndex() << 4) & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK; 265 out.write(bd); 266 contentHash.update(bd); 267 out.write((int) ((contentHash.getValue() >> 8) & 0xff)); 268 contentHash.reset(); 269 } 270 271 private void flushBlock() throws IOException { 272 final boolean withBlockDependency = params.withBlockDependency; 273 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 274 try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) { 275 if (withBlockDependency) { 276 o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, 277 collectedBlockDependencyBytes); 278 } 279 o.write(blockData, 0, currentIndex); 280 } 281 if (withBlockDependency) { 282 appendToBlockDependencyBuffer(blockData, 0, currentIndex); 283 } 284 byte[] b = baos.toByteArray(); 285 if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize 286 ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, 287 4); 288 out.write(blockData, 0, currentIndex); 289 if (params.withBlockChecksum) { 290 blockHash.update(blockData, 0, currentIndex); 291 } 292 } else { 293 ByteUtils.toLittleEndian(out, b.length, 4); 294 out.write(b); 295 if (params.withBlockChecksum) { 296 blockHash.update(b, 0, b.length); 297 } 298 } 299 if (params.withBlockChecksum) { 300 ByteUtils.toLittleEndian(out, blockHash.getValue(), 4); 301 blockHash.reset(); 302 } 303 currentIndex = 0; 304 } 305 306 private void writeTrailer() throws IOException { 307 out.write(END_MARK); 308 if (params.withContentChecksum) { 309 ByteUtils.toLittleEndian(out, contentHash.getValue(), 4); 310 } 311 } 312 313 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 314 len = Math.min(len, blockDependencyBuffer.length); 315 if (len > 0) { 316 int keep = blockDependencyBuffer.length - len; 317 if (keep > 0) { 318 // move last keep bytes towards the start of the buffer 319 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 320 } 321 // append new data 322 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 323 collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, 324 blockDependencyBuffer.length); 325 } 326 } 327 328} 329