001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.io.compress;
020    
021    import java.io.IOException;
022    import java.io.OutputStream;
023    
024    import org.apache.hadoop.classification.InterfaceAudience;
025    import org.apache.hadoop.classification.InterfaceStability;
026    
027    /**
028     * A {@link org.apache.hadoop.io.compress.CompressorStream} which works
029     * with 'block-based' based compression algorithms, as opposed to 
030     * 'stream-based' compression algorithms.
031     *
032     * It should be noted that this wrapper does not guarantee that blocks will
033     * be sized for the compressor. If the
034     * {@link org.apache.hadoop.io.compress.Compressor} requires buffering to
035     * effect meaningful compression, it is responsible for it.
036     */
037    @InterfaceAudience.Public
038    @InterfaceStability.Evolving
039    public class BlockCompressorStream extends CompressorStream {
040    
041      // The 'maximum' size of input data to be compressed, to account
042      // for the overhead of the compression algorithm.
043      private final int MAX_INPUT_SIZE;
044    
045      /**
046       * Create a {@link BlockCompressorStream}.
047       * 
048       * @param out stream
049       * @param compressor compressor to be used
050       * @param bufferSize size of buffer
051       * @param compressionOverhead maximum 'overhead' of the compression 
052       *                            algorithm with given bufferSize
053       */
054      public BlockCompressorStream(OutputStream out, Compressor compressor, 
055                                   int bufferSize, int compressionOverhead) {
056        super(out, compressor, bufferSize);
057        MAX_INPUT_SIZE = bufferSize - compressionOverhead;
058      }
059    
060      /**
061       * Create a {@link BlockCompressorStream} with given output-stream and 
062       * compressor.
063       * Use default of 512 as bufferSize and compressionOverhead of 
064       * (1% of bufferSize + 12 bytes) =  18 bytes (zlib algorithm).
065       * 
066       * @param out stream
067       * @param compressor compressor to be used
068       */
069      public BlockCompressorStream(OutputStream out, Compressor compressor) {
070        this(out, compressor, 512, 18);
071      }
072    
073      /**
074       * Write the data provided to the compression codec, compressing no more
075       * than the buffer size less the compression overhead as specified during
076       * construction for each block.
077       *
078       * Each block contains the uncompressed length for the block, followed by
079       * one or more length-prefixed blocks of compressed data.
080       */
081      public void write(byte[] b, int off, int len) throws IOException {
082        // Sanity checks
083        if (compressor.finished()) {
084          throw new IOException("write beyond end of stream");
085        }
086        if (b == null) {
087          throw new NullPointerException();
088        } else if ((off < 0) || (off > b.length) || (len < 0) ||
089                   ((off + len) > b.length)) {
090          throw new IndexOutOfBoundsException();
091        } else if (len == 0) {
092          return;
093        }
094    
095        long limlen = compressor.getBytesRead();
096        if (len + limlen > MAX_INPUT_SIZE && limlen > 0) {
097          // Adding this segment would exceed the maximum size.
098          // Flush data if we have it.
099          finish();
100          compressor.reset();
101        }
102    
103        if (len > MAX_INPUT_SIZE) {
104          // The data we're given exceeds the maximum size. Any data
105          // we had have been flushed, so we write out this chunk in segments
106          // not exceeding the maximum size until it is exhausted.
107          rawWriteInt(len);
108          do {
109            int bufLen = Math.min(len, MAX_INPUT_SIZE);
110            
111            compressor.setInput(b, off, bufLen);
112            compressor.finish();
113            while (!compressor.finished()) {
114              compress();
115            }
116            compressor.reset();
117            off += bufLen;
118            len -= bufLen;
119          } while (len > 0);
120          return;
121        }
122    
123        // Give data to the compressor
124        compressor.setInput(b, off, len);
125        if (!compressor.needsInput()) {
126          // compressor buffer size might be smaller than the maximum
127          // size, so we permit it to flush if required.
128          rawWriteInt((int)compressor.getBytesRead());
129          do {
130            compress();
131          } while (!compressor.needsInput());
132        }
133      }
134    
135      public void finish() throws IOException {
136        if (!compressor.finished()) {
137          rawWriteInt((int)compressor.getBytesRead());
138          compressor.finish();
139          while (!compressor.finished()) {
140            compress();
141          }
142        }
143      }
144    
145      protected void compress() throws IOException {
146        int len = compressor.compress(buffer, 0, buffer.length);
147        if (len > 0) {
148          // Write out the compressed chunk
149          rawWriteInt(len);
150          out.write(buffer, 0, len);
151        }
152      }
153      
154      private void rawWriteInt(int v) throws IOException {
155        out.write((v >>> 24) & 0xFF);
156        out.write((v >>> 16) & 0xFF);
157        out.write((v >>>  8) & 0xFF);
158        out.write((v >>>  0) & 0xFF);
159      }
160    
161    }