001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026import org.apache.commons.compress.utils.ByteUtils; 027 028/** 029 * CompressorOutputStream for the LZ4 frame format. 030 * 031 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> 032 * 033 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 034 * @since 1.14 035 * @NotThreadSafe 036 */ 037public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { 038 039 private static final byte[] END_MARK = new byte[4]; 040 041 // used in one-arg write method 042 private final byte[] oneByte = new byte[1]; 043 044 private final byte[] blockData; 045 private final OutputStream out; 046 private final Parameters params; 047 private boolean finished; 048 private int currentIndex; 049 050 // used for frame header checksum and content checksum, if requested 051 private final XXHash32 contentHash = new XXHash32(); 052 // used for block checksum, if requested 053 private final XXHash32 blockHash; 054 055 // only created if the config requires block dependency 056 private final byte[] blockDependencyBuffer; 057 private int collectedBlockDependencyBytes; 058 059 /** 060 * The block sizes supported by the format. 061 */ 062 public enum BlockSize { 063 /** Block size of 64K */ 064 K64(64 * 1024, 4), 065 /** Block size of 256K */ 066 K256(256 * 1024, 5), 067 /** Block size of 1M */ 068 M1(1024 * 1024, 6), 069 /** Block size of 4M */ 070 M4(4096 * 1024, 7); 071 072 private final int size, index; 073 BlockSize(final int size, final int index) { 074 this.size = size; 075 this.index = index; 076 } 077 int getSize() { 078 return size; 079 } 080 int getIndex() { 081 return index; 082 } 083 } 084 085 /** 086 * Parameters of the LZ4 frame format. 087 */ 088 public static class Parameters { 089 private final BlockSize blockSize; 090 private final boolean withContentChecksum, withBlockChecksum, withBlockDependency; 091 private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params; 092 093 /** 094 * The default parameters of 4M block size, enabled content 095 * checksum, disabled block checksums and independent blocks. 096 * 097 * <p>This matches the defaults of the lz4 command line utility.</p> 098 */ 099 public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false); 100 101 /** 102 * Sets up custom a custom block size for the LZ4 stream but 103 * otherwise uses the defaults of enabled content checksum, 104 * disabled block checksums and independent blocks. 105 * @param blockSize the size of a single block. 106 */ 107 public Parameters(final BlockSize blockSize) { 108 this(blockSize, true, false, false); 109 } 110 /** 111 * Sets up custom a custom block size for the LZ4 stream but 112 * otherwise uses the defaults of enabled content checksum, 113 * disabled block checksums and independent blocks. 114 * @param blockSize the size of a single block. 115 * @param lz77params parameters used to fine-tune compression, 116 * in particular to balance compression ratio vs compression 117 * speed. 118 */ 119 public Parameters(final BlockSize blockSize, 120 final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 121 this(blockSize, true, false, false, lz77params); 122 } 123 /** 124 * Sets up custom parameters for the LZ4 stream. 125 * @param blockSize the size of a single block. 126 * @param withContentChecksum whether to write a content checksum 127 * @param withBlockChecksum whether to write a block checksum. 128 * Note that block checksums are not supported by the lz4 129 * command line utility 130 * @param withBlockDependency whether a block may depend on 131 * the content of a previous block. Enabling this may improve 132 * compression ratio but makes it impossible to decompress the 133 * output in parallel. 134 */ 135 public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, 136 final boolean withBlockDependency) { 137 this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, 138 BlockLZ4CompressorOutputStream.createParameterBuilder().build()); 139 } 140 141 /** 142 * Sets up custom parameters for the LZ4 stream. 143 * @param blockSize the size of a single block. 144 * @param withContentChecksum whether to write a content checksum 145 * @param withBlockChecksum whether to write a block checksum. 146 * Note that block checksums are not supported by the lz4 147 * command line utility 148 * @param withBlockDependency whether a block may depend on 149 * the content of a previous block. Enabling this may improve 150 * compression ratio but makes it impossible to decompress the 151 * output in parallel. 152 * @param lz77params parameters used to fine-tune compression, 153 * in particular to balance compression ratio vs compression 154 * speed. 155 */ 156 public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, 157 final boolean withBlockDependency, 158 final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 159 this.blockSize = blockSize; 160 this.withContentChecksum = withContentChecksum; 161 this.withBlockChecksum = withBlockChecksum; 162 this.withBlockDependency = withBlockDependency; 163 this.lz77params = lz77params; 164 } 165 166 @Override 167 public String toString() { 168 return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum 169 + ", withBlockChecksum " + withBlockChecksum + ", withBlockDependency " + withBlockDependency; 170 } 171 } 172 173 /** 174 * Constructs a new output stream that compresses data using the 175 * LZ4 frame format using the default block size of 4MB. 176 * @param out the OutputStream to which to write the compressed data 177 * @throws IOException if writing the signature fails 178 */ 179 public FramedLZ4CompressorOutputStream(final OutputStream out) throws IOException { 180 this(out, Parameters.DEFAULT); 181 } 182 183 /** 184 * Constructs a new output stream that compresses data using the 185 * LZ4 frame format using the given block size. 186 * @param out the OutputStream to which to write the compressed data 187 * @param params the parameters to use 188 * @throws IOException if writing the signature fails 189 */ 190 public FramedLZ4CompressorOutputStream(final OutputStream out, final Parameters params) throws IOException { 191 this.params = params; 192 blockData = new byte[params.blockSize.getSize()]; 193 this.out = out; 194 blockHash = params.withBlockChecksum ? new XXHash32() : null; 195 out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE); 196 writeFrameDescriptor(); 197 blockDependencyBuffer = params.withBlockDependency 198 ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] 199 : null; 200 } 201 202 @Override 203 public void write(final int b) throws IOException { 204 oneByte[0] = (byte) (b & 0xff); 205 write(oneByte); 206 } 207 208 @Override 209 public void write(final byte[] data, int off, int len) throws IOException { 210 if (params.withContentChecksum) { 211 contentHash.update(data, off, len); 212 } 213 final int blockDataLength = blockData.length; 214 if (currentIndex + len > blockDataLength) { 215 flushBlock(); 216 while (len > blockDataLength) { 217 System.arraycopy(data, off, blockData, 0, blockDataLength); 218 off += blockDataLength; 219 len -= blockDataLength; 220 currentIndex = blockDataLength; 221 flushBlock(); 222 } 223 } 224 System.arraycopy(data, off, blockData, currentIndex, len); 225 currentIndex += len; 226 } 227 228 @Override 229 public void close() throws IOException { 230 try { 231 finish(); 232 } finally { 233 out.close(); 234 } 235 } 236 237 /** 238 * Compresses all remaining data and writes it to the stream, 239 * doesn't close the underlying stream. 240 * @throws IOException if an error occurs 241 */ 242 public void finish() throws IOException { 243 if (!finished) { 244 if (currentIndex > 0) { 245 flushBlock(); 246 } 247 writeTrailer(); 248 finished = true; 249 } 250 } 251 252 private void writeFrameDescriptor() throws IOException { 253 int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION; 254 if (!params.withBlockDependency) { 255 flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK; 256 } 257 if (params.withContentChecksum) { 258 flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK; 259 } 260 if (params.withBlockChecksum) { 261 flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK; 262 } 263 out.write(flags); 264 contentHash.update(flags); 265 final int bd = (params.blockSize.getIndex() << 4) & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK; 266 out.write(bd); 267 contentHash.update(bd); 268 out.write((int) ((contentHash.getValue() >> 8) & 0xff)); 269 contentHash.reset(); 270 } 271 272 private void flushBlock() throws IOException { 273 final boolean withBlockDependency = params.withBlockDependency; 274 final ByteArrayOutputStream baos = new ByteArrayOutputStream(); 275 try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) { 276 if (withBlockDependency) { 277 o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, 278 collectedBlockDependencyBytes); 279 } 280 o.write(blockData, 0, currentIndex); 281 } 282 if (withBlockDependency) { 283 appendToBlockDependencyBuffer(blockData, 0, currentIndex); 284 } 285 final byte[] b = baos.toByteArray(); 286 if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize 287 ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, 288 4); 289 out.write(blockData, 0, currentIndex); 290 if (params.withBlockChecksum) { 291 blockHash.update(blockData, 0, currentIndex); 292 } 293 } else { 294 ByteUtils.toLittleEndian(out, b.length, 4); 295 out.write(b); 296 if (params.withBlockChecksum) { 297 blockHash.update(b, 0, b.length); 298 } 299 } 300 if (params.withBlockChecksum) { 301 ByteUtils.toLittleEndian(out, blockHash.getValue(), 4); 302 blockHash.reset(); 303 } 304 currentIndex = 0; 305 } 306 307 private void writeTrailer() throws IOException { 308 out.write(END_MARK); 309 if (params.withContentChecksum) { 310 ByteUtils.toLittleEndian(out, contentHash.getValue(), 4); 311 } 312 } 313 314 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 315 len = Math.min(len, blockDependencyBuffer.length); 316 if (len > 0) { 317 final int keep = blockDependencyBuffer.length - len; 318 if (keep > 0) { 319 // move last keep bytes towards the start of the buffer 320 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 321 } 322 // append new data 323 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 324 collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, 325 blockDependencyBuffer.length); 326 } 327 } 328 329} 330