001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.bzip2;
020
021import java.io.IOException;
022import java.io.OutputStream;
023
024import org.apache.commons.compress.compressors.CompressorOutputStream;
025
026/**
027 * An output stream that compresses into the BZip2 format into another stream.
028 *
029 * <p>
030 * The compression requires large amounts of memory. Thus you should call the
031 * {@link #close() close()} method as soon as possible, to force
032 * {@code BZip2CompressorOutputStream} to release the allocated memory.
033 * </p>
034 *
035 * <p> You can shrink the amount of allocated memory and maybe raise
036 * the compression speed by choosing a lower blocksize, which in turn
037 * may cause a lower compression ratio. You can avoid unnecessary
038 * memory allocation by avoiding using a blocksize which is bigger
039 * than the size of the input.  </p>
040 *
041 * <p> You can compute the memory usage for compressing by the
042 * following formula: </p>
043 *
044 * <pre>
045 * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
046 * </pre>
047 *
048 * <p> To get the memory required for decompression by {@link
049 * BZip2CompressorInputStream} use </p>
050 *
051 * <pre>
052 * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
053 * </pre>
054 *
055 * <table width="100%" border="1" summary="Memory usage by blocksize">
056 * <tr>
057 * <th colspan="3">Memory usage by blocksize</th>
058 * </tr>
059 * <tr>
060 * <th align="right">Blocksize</th> <th align="right">Compression<br>
061 * memory usage</th> <th align="right">Decompression<br>
062 * memory usage</th>
063 * </tr>
064 * <tr>
065 * <td align="right">100k</td>
066 * <td align="right">1300k</td>
067 * <td align="right">565k</td>
068 * </tr>
069 * <tr>
070 * <td align="right">200k</td>
071 * <td align="right">2200k</td>
072 * <td align="right">1065k</td>
073 * </tr>
074 * <tr>
075 * <td align="right">300k</td>
076 * <td align="right">3100k</td>
077 * <td align="right">1565k</td>
078 * </tr>
079 * <tr>
080 * <td align="right">400k</td>
081 * <td align="right">4000k</td>
082 * <td align="right">2065k</td>
083 * </tr>
084 * <tr>
085 * <td align="right">500k</td>
086 * <td align="right">4900k</td>
087 * <td align="right">2565k</td>
088 * </tr>
089 * <tr>
090 * <td align="right">600k</td>
091 * <td align="right">5800k</td>
092 * <td align="right">3065k</td>
093 * </tr>
094 * <tr>
095 * <td align="right">700k</td>
096 * <td align="right">6700k</td>
097 * <td align="right">3565k</td>
098 * </tr>
099 * <tr>
100 * <td align="right">800k</td>
101 * <td align="right">7600k</td>
102 * <td align="right">4065k</td>
103 * </tr>
104 * <tr>
105 * <td align="right">900k</td>
106 * <td align="right">8500k</td>
107 * <td align="right">4565k</td>
108 * </tr>
109 * </table>
110 *
111 * <p>
112 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the
113 * bzipped input is smaller than one block.
114 * </p>
115 *
116 * <p>
117 * Instances of this class are not threadsafe.
118 * </p>
119 *
120 * <p>
121 * TODO: Update to BZip2 1.0.1
122 * </p>
123 * @NotThreadSafe
124 */
125public class BZip2CompressorOutputStream extends CompressorOutputStream
126    implements BZip2Constants {
127
128    /**
129     * The minimum supported blocksize {@code  == 1}.
130     */
131    public static final int MIN_BLOCKSIZE = 1;
132
133    /**
134     * The maximum supported blocksize {@code  == 9}.
135     */
136    public static final int MAX_BLOCKSIZE = 9;
137
138    private static final int GREATER_ICOST = 15;
139    private static final int LESSER_ICOST = 0;
140
141    private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
142                                          final Data dat, final int alphaSize,
143                                          final int maxLen) {
144        /*
145         * Nodes and heap entries run from 1. Entry 0 for both the heap and
146         * nodes is a sentinel.
147         */
148        final int[] heap = dat.heap;
149        final int[] weight = dat.weight;
150        final int[] parent = dat.parent;
151
152        for (int i = alphaSize; --i >= 0;) {
153            weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
154        }
155
156        for (boolean tooLong = true; tooLong;) {
157            tooLong = false;
158
159            int nNodes = alphaSize;
160            int nHeap = 0;
161            heap[0] = 0;
162            weight[0] = 0;
163            parent[0] = -2;
164
165            for (int i = 1; i <= alphaSize; i++) {
166                parent[i] = -1;
167                nHeap++;
168                heap[nHeap] = i;
169
170                int zz = nHeap;
171                final int tmp = heap[zz];
172                while (weight[tmp] < weight[heap[zz >> 1]]) {
173                    heap[zz] = heap[zz >> 1];
174                    zz >>= 1;
175                }
176                heap[zz] = tmp;
177            }
178
179            while (nHeap > 1) {
180                final int n1 = heap[1];
181                heap[1] = heap[nHeap];
182                nHeap--;
183
184                int yy = 0;
185                int zz = 1;
186                int tmp = heap[1];
187
188                while (true) {
189                    yy = zz << 1;
190
191                    if (yy > nHeap) {
192                        break;
193                    }
194
195                    if ((yy < nHeap)
196                        && (weight[heap[yy + 1]] < weight[heap[yy]])) {
197                        yy++;
198                    }
199
200                    if (weight[tmp] < weight[heap[yy]]) {
201                        break;
202                    }
203
204                    heap[zz] = heap[yy];
205                    zz = yy;
206                }
207
208                heap[zz] = tmp;
209
210                final int n2 = heap[1];
211                heap[1] = heap[nHeap];
212                nHeap--;
213
214                yy = 0;
215                zz = 1;
216                tmp = heap[1];
217
218                while (true) {
219                    yy = zz << 1;
220
221                    if (yy > nHeap) {
222                        break;
223                    }
224
225                    if ((yy < nHeap)
226                        && (weight[heap[yy + 1]] < weight[heap[yy]])) {
227                        yy++;
228                    }
229
230                    if (weight[tmp] < weight[heap[yy]]) {
231                        break;
232                    }
233
234                    heap[zz] = heap[yy];
235                    zz = yy;
236                }
237
238                heap[zz] = tmp;
239                nNodes++;
240                parent[n1] = parent[n2] = nNodes;
241
242                final int weight_n1 = weight[n1];
243                final int weight_n2 = weight[n2];
244                weight[nNodes] = ((weight_n1 & 0xffffff00)
245                                  + (weight_n2 & 0xffffff00))
246                    | (1 + (((weight_n1 & 0x000000ff)
247                             > (weight_n2 & 0x000000ff))
248                            ? (weight_n1 & 0x000000ff)
249                            : (weight_n2 & 0x000000ff)));
250
251                parent[nNodes] = -1;
252                nHeap++;
253                heap[nHeap] = nNodes;
254
255                tmp = 0;
256                zz = nHeap;
257                tmp = heap[zz];
258                final int weight_tmp = weight[tmp];
259                while (weight_tmp < weight[heap[zz >> 1]]) {
260                    heap[zz] = heap[zz >> 1];
261                    zz >>= 1;
262                }
263                heap[zz] = tmp;
264
265            }
266
267            for (int i = 1; i <= alphaSize; i++) {
268                int j = 0;
269                int k = i;
270
271                for (int parent_k; (parent_k = parent[k]) >= 0;) {
272                    k = parent_k;
273                    j++;
274                }
275
276                len[i - 1] = (byte) j;
277                if (j > maxLen) {
278                    tooLong = true;
279                }
280            }
281
282            if (tooLong) {
283                for (int i = 1; i < alphaSize; i++) {
284                    int j = weight[i] >> 8;
285                    j = 1 + (j >> 1);
286                    weight[i] = j << 8;
287                }
288            }
289        }
290    }
291
292    /**
293     * Index of the last char in the block, so the block size == last + 1.
294     */
295    private int last;
296
297    /**
298     * Always: in the range 0 .. 9. The current block size is 100000 * this
299     * number.
300     */
301    private final int blockSize100k;
302
303    private int bsBuff;
304    private int bsLive;
305    private final CRC crc = new CRC();
306
307    private int nInUse;
308
309    private int nMTF;
310
311    private int currentChar = -1;
312    private int runLength = 0;
313
314    private int blockCRC;
315    private int combinedCRC;
316    private final int allowableBlockSize;
317
318    /**
319     * All memory intensive stuff.
320     */
321    private Data data;
322    private BlockSort blockSorter;
323
324    private OutputStream out;
325    private volatile boolean closed;
326
327    /**
328     * Chooses a blocksize based on the given length of the data to compress.
329     *
330     * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
331     *         {@link #MAX_BLOCKSIZE} both inclusive. For a negative
332     *         {@code inputLength} this method returns {@code MAX_BLOCKSIZE}
333     *         always.
334     *
335     * @param inputLength
336     *            The length of the data which will be compressed by
337     *            {@code BZip2CompressorOutputStream}.
338     */
339    public static int chooseBlockSize(final long inputLength) {
340        return (inputLength > 0) ? (int) Math
341            .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
342    }
343
344    /**
345     * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k.
346     *
347     * @param out
348     *            the destination stream.
349     *
350     * @throws IOException
351     *             if an I/O error occurs in the specified stream.
352     * @throws NullPointerException
353     *             if <code>out == null</code>.
354     */
355    public BZip2CompressorOutputStream(final OutputStream out)
356        throws IOException {
357        this(out, MAX_BLOCKSIZE);
358    }
359
360    /**
361     * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize.
362     *
363     * @param out
364     *            the destination stream.
365     * @param blockSize
366     *            the blockSize as 100k units.
367     *
368     * @throws IOException
369     *             if an I/O error occurs in the specified stream.
370     * @throws IllegalArgumentException
371     *             if <code>(blockSize &lt; 1) || (blockSize &gt; 9)</code>.
372     * @throws NullPointerException
373     *             if <code>out == null</code>.
374     *
375     * @see #MIN_BLOCKSIZE
376     * @see #MAX_BLOCKSIZE
377     */
378    public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
379        if (blockSize < 1) {
380            throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
381        }
382        if (blockSize > 9) {
383            throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
384        }
385
386        this.blockSize100k = blockSize;
387        this.out = out;
388
389        /* 20 is just a paranoia constant */
390        this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
391        init();
392    }
393
394    @Override
395    public void write(final int b) throws IOException {
396        if (!closed) {
397            write0(b);
398        } else {
399            throw new IOException("Closed");
400        }
401    }
402
403    /**
404     * Writes the current byte to the buffer, run-length encoding it
405     * if it has been repeated at least four times (the first step
406     * RLEs sequences of four identical bytes).
407     *
408     * <p>Flushes the current block before writing data if it is
409     * full.</p>
410     *
411     * <p>"write to the buffer" means adding to data.buffer starting
412     * two steps "after" this.last - initially starting at index 1
413     * (not 0) - and updating this.last to point to the last index
414     * written minus 1.</p>
415     */
416    private void writeRun() throws IOException {
417        final int lastShadow = this.last;
418
419        if (lastShadow < this.allowableBlockSize) {
420            final int currentCharShadow = this.currentChar;
421            final Data dataShadow = this.data;
422            dataShadow.inUse[currentCharShadow] = true;
423            final byte ch = (byte) currentCharShadow;
424
425            int runLengthShadow = this.runLength;
426            this.crc.updateCRC(currentCharShadow, runLengthShadow);
427
428            switch (runLengthShadow) {
429            case 1:
430                dataShadow.block[lastShadow + 2] = ch;
431                this.last = lastShadow + 1;
432                break;
433
434            case 2:
435                dataShadow.block[lastShadow + 2] = ch;
436                dataShadow.block[lastShadow + 3] = ch;
437                this.last = lastShadow + 2;
438                break;
439
440            case 3: {
441                final byte[] block = dataShadow.block;
442                block[lastShadow + 2] = ch;
443                block[lastShadow + 3] = ch;
444                block[lastShadow + 4] = ch;
445                this.last = lastShadow + 3;
446            }
447                break;
448
449            default: {
450                runLengthShadow -= 4;
451                dataShadow.inUse[runLengthShadow] = true;
452                final byte[] block = dataShadow.block;
453                block[lastShadow + 2] = ch;
454                block[lastShadow + 3] = ch;
455                block[lastShadow + 4] = ch;
456                block[lastShadow + 5] = ch;
457                block[lastShadow + 6] = (byte) runLengthShadow;
458                this.last = lastShadow + 5;
459            }
460                break;
461
462            }
463        } else {
464            endBlock();
465            initBlock();
466            writeRun();
467        }
468    }
469
470    /**
471     * Overriden to warn about an unclosed stream.
472     */
473    @Override
474    protected void finalize() throws Throwable {
475        if (!closed) {
476            System.err.println("Unclosed BZip2CompressorOutputStream detected, will *not* close it");
477        }
478        super.finalize();
479    }
480
481
482    public void finish() throws IOException {
483        if (!closed) {
484            closed = true;
485            try {
486                if (this.runLength > 0) {
487                    writeRun();
488                }
489                this.currentChar = -1;
490                endBlock();
491                endCompression();
492            } finally {
493                this.out = null;
494                this.blockSorter = null;
495                this.data = null;
496            }
497        }
498    }
499
500    @Override
501    public void close() throws IOException {
502        if (!closed) {
503            final OutputStream outShadow = this.out;
504            try {
505                finish();
506            } finally {
507                outShadow.close();
508            }
509        }
510    }
511
512    @Override
513    public void flush() throws IOException {
514        final OutputStream outShadow = this.out;
515        if (outShadow != null) {
516            outShadow.flush();
517        }
518    }
519
520    /**
521     * Writes magic bytes like BZ on the first position of the stream
522     * and bytes indiciating the file-format, which is
523     * huffmanised, followed by a digit indicating blockSize100k.
524     * @throws IOException if the magic bytes could not been written
525     */
526    private void init() throws IOException {
527        bsPutUByte('B');
528        bsPutUByte('Z');
529
530        this.data = new Data(this.blockSize100k);
531        this.blockSorter = new BlockSort(this.data);
532
533        // huffmanised magic bytes
534        bsPutUByte('h');
535        bsPutUByte('0' + this.blockSize100k);
536
537        this.combinedCRC = 0;
538        initBlock();
539    }
540
541    private void initBlock() {
542        // blockNo++;
543        this.crc.initialiseCRC();
544        this.last = -1;
545        // ch = 0;
546
547        final boolean[] inUse = this.data.inUse;
548        for (int i = 256; --i >= 0;) {
549            inUse[i] = false;
550        }
551
552    }
553
554    private void endBlock() throws IOException {
555        this.blockCRC = this.crc.getFinalCRC();
556        this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
557        this.combinedCRC ^= this.blockCRC;
558
559        // empty block at end of file
560        if (this.last == -1) {
561            return;
562        }
563
564        /* sort the block and establish posn of original string */
565        blockSort();
566
567        /*
568         * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
569         * :-). A 32 bit value does not really give a strong enough guarantee
570         * that the value will not appear by chance in the compressed
571         * datastream. Worst-case probability of this event, for a 900k block,
572         * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
573         * bits. For a compressed file of size 100Gb -- about 100000 blocks --
574         * only a 48-bit marker will do. NB: normal compression/ decompression
575         * donot rely on these statistical properties. They are only important
576         * when trying to recover blocks from damaged files.
577         */
578        bsPutUByte(0x31);
579        bsPutUByte(0x41);
580        bsPutUByte(0x59);
581        bsPutUByte(0x26);
582        bsPutUByte(0x53);
583        bsPutUByte(0x59);
584
585        /* Now the block's CRC, so it is in a known place. */
586        bsPutInt(this.blockCRC);
587
588        /* Now a single bit indicating no randomisation. */
589        bsW(1, 0);
590
591        /* Finally, block's contents proper. */
592        moveToFrontCodeAndSend();
593    }
594
595    private void endCompression() throws IOException {
596        /*
597         * Now another magic 48-bit number, 0x177245385090, to indicate the end
598         * of the last block. (sqrt(pi), if you want to know. I did want to use
599         * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
600         * to feel statistically comfortable. Call me paranoid.)
601         */
602        bsPutUByte(0x17);
603        bsPutUByte(0x72);
604        bsPutUByte(0x45);
605        bsPutUByte(0x38);
606        bsPutUByte(0x50);
607        bsPutUByte(0x90);
608
609        bsPutInt(this.combinedCRC);
610        bsFinishedWithStream();
611    }
612
613    /**
614     * Returns the blocksize parameter specified at construction time.
615     * @return the blocksize parameter specified at construction time
616     */
617    public final int getBlockSize() {
618        return this.blockSize100k;
619    }
620
621    @Override
622    public void write(final byte[] buf, int offs, final int len)
623        throws IOException {
624        if (offs < 0) {
625            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
626        }
627        if (len < 0) {
628            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
629        }
630        if (offs + len > buf.length) {
631            throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
632                                                + len + ") > buf.length("
633                                                + buf.length + ").");
634        }
635        if (closed) {
636            throw new IOException("Stream closed");
637        }
638
639        for (final int hi = offs + len; offs < hi;) {
640            write0(buf[offs++]);
641        }
642    }
643
644    /**
645     * Keeps track of the last bytes written and implicitly performs
646     * run-length encoding as the first step of the bzip2 algorithm.
647     */
648    private void write0(int b) throws IOException {
649        if (this.currentChar != -1) {
650            b &= 0xff;
651            if (this.currentChar == b) {
652                if (++this.runLength > 254) {
653                    writeRun();
654                    this.currentChar = -1;
655                    this.runLength = 0;
656                }
657                // else nothing to do
658            } else {
659                writeRun();
660                this.runLength = 1;
661                this.currentChar = b;
662            }
663        } else {
664            this.currentChar = b & 0xff;
665            this.runLength++;
666        }
667    }
668
669    private static void hbAssignCodes(final int[] code, final byte[] length,
670                                      final int minLen, final int maxLen,
671                                      final int alphaSize) {
672        int vec = 0;
673        for (int n = minLen; n <= maxLen; n++) {
674            for (int i = 0; i < alphaSize; i++) {
675                if ((length[i] & 0xff) == n) {
676                    code[i] = vec;
677                    vec++;
678                }
679            }
680            vec <<= 1;
681        }
682    }
683
684    private void bsFinishedWithStream() throws IOException {
685        while (this.bsLive > 0) {
686            final int ch = this.bsBuff >> 24;
687            this.out.write(ch); // write 8-bit
688            this.bsBuff <<= 8;
689            this.bsLive -= 8;
690        }
691    }
692
693    private void bsW(final int n, final int v) throws IOException {
694        final OutputStream outShadow = this.out;
695        int bsLiveShadow = this.bsLive;
696        int bsBuffShadow = this.bsBuff;
697
698        while (bsLiveShadow >= 8) {
699            outShadow.write(bsBuffShadow >> 24); // write 8-bit
700            bsBuffShadow <<= 8;
701            bsLiveShadow -= 8;
702        }
703
704        this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
705        this.bsLive = bsLiveShadow + n;
706    }
707
708    private void bsPutUByte(final int c) throws IOException {
709        bsW(8, c);
710    }
711
712    private void bsPutInt(final int u) throws IOException {
713        bsW(8, (u >> 24) & 0xff);
714        bsW(8, (u >> 16) & 0xff);
715        bsW(8, (u >> 8) & 0xff);
716        bsW(8, u & 0xff);
717    }
718
719    private void sendMTFValues() throws IOException {
720        final byte[][] len = this.data.sendMTFValues_len;
721        final int alphaSize = this.nInUse + 2;
722
723        for (int t = N_GROUPS; --t >= 0;) {
724            final byte[] len_t = len[t];
725            for (int v = alphaSize; --v >= 0;) {
726                len_t[v] = GREATER_ICOST;
727            }
728        }
729
730        /* Decide how many coding tables to use */
731        // assert (this.nMTF > 0) : this.nMTF;
732        final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
733            : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
734
735        /* Generate an initial set of coding tables */
736        sendMTFValues0(nGroups, alphaSize);
737
738        /*
739         * Iterate up to N_ITERS times to improve the tables.
740         */
741        final int nSelectors = sendMTFValues1(nGroups, alphaSize);
742
743        /* Compute MTF values for the selectors. */
744        sendMTFValues2(nGroups, nSelectors);
745
746        /* Assign actual codes for the tables. */
747        sendMTFValues3(nGroups, alphaSize);
748
749        /* Transmit the mapping table. */
750        sendMTFValues4();
751
752        /* Now the selectors. */
753        sendMTFValues5(nGroups, nSelectors);
754
755        /* Now the coding tables. */
756        sendMTFValues6(nGroups, alphaSize);
757
758        /* And finally, the block data proper */
759        sendMTFValues7();
760    }
761
762    private void sendMTFValues0(final int nGroups, final int alphaSize) {
763        final byte[][] len = this.data.sendMTFValues_len;
764        final int[] mtfFreq = this.data.mtfFreq;
765
766        int remF = this.nMTF;
767        int gs = 0;
768
769        for (int nPart = nGroups; nPart > 0; nPart--) {
770            final int tFreq = remF / nPart;
771            int ge = gs - 1;
772            int aFreq = 0;
773
774            for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
775                aFreq += mtfFreq[++ge];
776            }
777
778            if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
779                && (((nGroups - nPart) & 1) != 0)) {
780                aFreq -= mtfFreq[ge--];
781            }
782
783            final byte[] len_np = len[nPart - 1];
784            for (int v = alphaSize; --v >= 0;) {
785                if ((v >= gs) && (v <= ge)) {
786                    len_np[v] = LESSER_ICOST;
787                } else {
788                    len_np[v] = GREATER_ICOST;
789                }
790            }
791
792            gs = ge + 1;
793            remF -= aFreq;
794        }
795    }
796
797    private int sendMTFValues1(final int nGroups, final int alphaSize) {
798        final Data dataShadow = this.data;
799        final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
800        final int[] fave = dataShadow.sendMTFValues_fave;
801        final short[] cost = dataShadow.sendMTFValues_cost;
802        final char[] sfmap = dataShadow.sfmap;
803        final byte[] selector = dataShadow.selector;
804        final byte[][] len = dataShadow.sendMTFValues_len;
805        final byte[] len_0 = len[0];
806        final byte[] len_1 = len[1];
807        final byte[] len_2 = len[2];
808        final byte[] len_3 = len[3];
809        final byte[] len_4 = len[4];
810        final byte[] len_5 = len[5];
811        final int nMTFShadow = this.nMTF;
812
813        int nSelectors = 0;
814
815        for (int iter = 0; iter < N_ITERS; iter++) {
816            for (int t = nGroups; --t >= 0;) {
817                fave[t] = 0;
818                final int[] rfreqt = rfreq[t];
819                for (int i = alphaSize; --i >= 0;) {
820                    rfreqt[i] = 0;
821                }
822            }
823
824            nSelectors = 0;
825
826            for (int gs = 0; gs < this.nMTF;) {
827                /* Set group start & end marks. */
828
829                /*
830                 * Calculate the cost of this group as coded by each of the
831                 * coding tables.
832                 */
833
834                final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
835
836                if (nGroups == N_GROUPS) {
837                    // unrolled version of the else-block
838
839                    short cost0 = 0;
840                    short cost1 = 0;
841                    short cost2 = 0;
842                    short cost3 = 0;
843                    short cost4 = 0;
844                    short cost5 = 0;
845
846                    for (int i = gs; i <= ge; i++) {
847                        final int icv = sfmap[i];
848                        cost0 += len_0[icv] & 0xff;
849                        cost1 += len_1[icv] & 0xff;
850                        cost2 += len_2[icv] & 0xff;
851                        cost3 += len_3[icv] & 0xff;
852                        cost4 += len_4[icv] & 0xff;
853                        cost5 += len_5[icv] & 0xff;
854                    }
855
856                    cost[0] = cost0;
857                    cost[1] = cost1;
858                    cost[2] = cost2;
859                    cost[3] = cost3;
860                    cost[4] = cost4;
861                    cost[5] = cost5;
862
863                } else {
864                    for (int t = nGroups; --t >= 0;) {
865                        cost[t] = 0;
866                    }
867
868                    for (int i = gs; i <= ge; i++) {
869                        final int icv = sfmap[i];
870                        for (int t = nGroups; --t >= 0;) {
871                            cost[t] += len[t][icv] & 0xff;
872                        }
873                    }
874                }
875
876                /*
877                 * Find the coding table which is best for this group, and
878                 * record its identity in the selector table.
879                 */
880                int bt = -1;
881                for (int t = nGroups, bc = 999999999; --t >= 0;) {
882                    final int cost_t = cost[t];
883                    if (cost_t < bc) {
884                        bc = cost_t;
885                        bt = t;
886                    }
887                }
888
889                fave[bt]++;
890                selector[nSelectors] = (byte) bt;
891                nSelectors++;
892
893                /*
894                 * Increment the symbol frequencies for the selected table.
895                 */
896                final int[] rfreq_bt = rfreq[bt];
897                for (int i = gs; i <= ge; i++) {
898                    rfreq_bt[sfmap[i]]++;
899                }
900
901                gs = ge + 1;
902            }
903
904            /*
905             * Recompute the tables based on the accumulated frequencies.
906             */
907            for (int t = 0; t < nGroups; t++) {
908                hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
909            }
910        }
911
912        return nSelectors;
913    }
914
915    private void sendMTFValues2(final int nGroups, final int nSelectors) {
916        // assert (nGroups < 8) : nGroups;
917
918        final Data dataShadow = this.data;
919        final byte[] pos = dataShadow.sendMTFValues2_pos;
920
921        for (int i = nGroups; --i >= 0;) {
922            pos[i] = (byte) i;
923        }
924
925        for (int i = 0; i < nSelectors; i++) {
926            final byte ll_i = dataShadow.selector[i];
927            byte tmp = pos[0];
928            int j = 0;
929
930            while (ll_i != tmp) {
931                j++;
932                final byte tmp2 = tmp;
933                tmp = pos[j];
934                pos[j] = tmp2;
935            }
936
937            pos[0] = tmp;
938            dataShadow.selectorMtf[i] = (byte) j;
939        }
940    }
941
942    private void sendMTFValues3(final int nGroups, final int alphaSize) {
943        final int[][] code = this.data.sendMTFValues_code;
944        final byte[][] len = this.data.sendMTFValues_len;
945
946        for (int t = 0; t < nGroups; t++) {
947            int minLen = 32;
948            int maxLen = 0;
949            final byte[] len_t = len[t];
950            for (int i = alphaSize; --i >= 0;) {
951                final int l = len_t[i] & 0xff;
952                if (l > maxLen) {
953                    maxLen = l;
954                }
955                if (l < minLen) {
956                    minLen = l;
957                }
958            }
959
960            // assert (maxLen <= 20) : maxLen;
961            // assert (minLen >= 1) : minLen;
962
963            hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
964        }
965    }
966
967    private void sendMTFValues4() throws IOException {
968        final boolean[] inUse = this.data.inUse;
969        final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
970
971        for (int i = 16; --i >= 0;) {
972            inUse16[i] = false;
973            final int i16 = i * 16;
974            for (int j = 16; --j >= 0;) {
975                if (inUse[i16 + j]) {
976                    inUse16[i] = true;
977                }
978            }
979        }
980
981        for (int i = 0; i < 16; i++) {
982            bsW(1, inUse16[i] ? 1 : 0);
983        }
984
985        final OutputStream outShadow = this.out;
986        int bsLiveShadow = this.bsLive;
987        int bsBuffShadow = this.bsBuff;
988
989        for (int i = 0; i < 16; i++) {
990            if (inUse16[i]) {
991                final int i16 = i * 16;
992                for (int j = 0; j < 16; j++) {
993                    // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
994                    while (bsLiveShadow >= 8) {
995                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
996                        bsBuffShadow <<= 8;
997                        bsLiveShadow -= 8;
998                    }
999                    if (inUse[i16 + j]) {
1000                        bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1001                    }
1002                    bsLiveShadow++;
1003                }
1004            }
1005        }
1006
1007        this.bsBuff = bsBuffShadow;
1008        this.bsLive = bsLiveShadow;
1009    }
1010
1011    private void sendMTFValues5(final int nGroups, final int nSelectors)
1012        throws IOException {
1013        bsW(3, nGroups);
1014        bsW(15, nSelectors);
1015
1016        final OutputStream outShadow = this.out;
1017        final byte[] selectorMtf = this.data.selectorMtf;
1018
1019        int bsLiveShadow = this.bsLive;
1020        int bsBuffShadow = this.bsBuff;
1021
1022        for (int i = 0; i < nSelectors; i++) {
1023            for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1024                // inlined: bsW(1, 1);
1025                while (bsLiveShadow >= 8) {
1026                    outShadow.write(bsBuffShadow >> 24);
1027                    bsBuffShadow <<= 8;
1028                    bsLiveShadow -= 8;
1029                }
1030                bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1031                bsLiveShadow++;
1032            }
1033
1034            // inlined: bsW(1, 0);
1035            while (bsLiveShadow >= 8) {
1036                outShadow.write(bsBuffShadow >> 24);
1037                bsBuffShadow <<= 8;
1038                bsLiveShadow -= 8;
1039            }
1040            // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1041            bsLiveShadow++;
1042        }
1043
1044        this.bsBuff = bsBuffShadow;
1045        this.bsLive = bsLiveShadow;
1046    }
1047
1048    private void sendMTFValues6(final int nGroups, final int alphaSize)
1049        throws IOException {
1050        final byte[][] len = this.data.sendMTFValues_len;
1051        final OutputStream outShadow = this.out;
1052
1053        int bsLiveShadow = this.bsLive;
1054        int bsBuffShadow = this.bsBuff;
1055
1056        for (int t = 0; t < nGroups; t++) {
1057            final byte[] len_t = len[t];
1058            int curr = len_t[0] & 0xff;
1059
1060            // inlined: bsW(5, curr);
1061            while (bsLiveShadow >= 8) {
1062                outShadow.write(bsBuffShadow >> 24); // write 8-bit
1063                bsBuffShadow <<= 8;
1064                bsLiveShadow -= 8;
1065            }
1066            bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
1067            bsLiveShadow += 5;
1068
1069            for (int i = 0; i < alphaSize; i++) {
1070                final int lti = len_t[i] & 0xff;
1071                while (curr < lti) {
1072                    // inlined: bsW(2, 2);
1073                    while (bsLiveShadow >= 8) {
1074                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1075                        bsBuffShadow <<= 8;
1076                        bsLiveShadow -= 8;
1077                    }
1078                    bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
1079                    bsLiveShadow += 2;
1080
1081                    curr++; /* 10 */
1082                }
1083
1084                while (curr > lti) {
1085                    // inlined: bsW(2, 3);
1086                    while (bsLiveShadow >= 8) {
1087                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1088                        bsBuffShadow <<= 8;
1089                        bsLiveShadow -= 8;
1090                    }
1091                    bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
1092                    bsLiveShadow += 2;
1093
1094                    curr--; /* 11 */
1095                }
1096
1097                // inlined: bsW(1, 0);
1098                while (bsLiveShadow >= 8) {
1099                    outShadow.write(bsBuffShadow >> 24); // write 8-bit
1100                    bsBuffShadow <<= 8;
1101                    bsLiveShadow -= 8;
1102                }
1103                // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1104                bsLiveShadow++;
1105            }
1106        }
1107
1108        this.bsBuff = bsBuffShadow;
1109        this.bsLive = bsLiveShadow;
1110    }
1111
1112    private void sendMTFValues7() throws IOException {
1113        final Data dataShadow = this.data;
1114        final byte[][] len = dataShadow.sendMTFValues_len;
1115        final int[][] code = dataShadow.sendMTFValues_code;
1116        final OutputStream outShadow = this.out;
1117        final byte[] selector = dataShadow.selector;
1118        final char[] sfmap = dataShadow.sfmap;
1119        final int nMTFShadow = this.nMTF;
1120
1121        int selCtr = 0;
1122
1123        int bsLiveShadow = this.bsLive;
1124        int bsBuffShadow = this.bsBuff;
1125
1126        for (int gs = 0; gs < nMTFShadow;) {
1127            final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1128            final int selector_selCtr = selector[selCtr] & 0xff;
1129            final int[] code_selCtr = code[selector_selCtr];
1130            final byte[] len_selCtr = len[selector_selCtr];
1131
1132            while (gs <= ge) {
1133                final int sfmap_i = sfmap[gs];
1134
1135                //
1136                // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1137                // code_selCtr[sfmap_i]);
1138                //
1139                while (bsLiveShadow >= 8) {
1140                    outShadow.write(bsBuffShadow >> 24);
1141                    bsBuffShadow <<= 8;
1142                    bsLiveShadow -= 8;
1143                }
1144                final int n = len_selCtr[sfmap_i] & 0xFF;
1145                bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
1146                bsLiveShadow += n;
1147
1148                gs++;
1149            }
1150
1151            gs = ge + 1;
1152            selCtr++;
1153        }
1154
1155        this.bsBuff = bsBuffShadow;
1156        this.bsLive = bsLiveShadow;
1157    }
1158
1159    private void moveToFrontCodeAndSend() throws IOException {
1160        bsW(24, this.data.origPtr);
1161        generateMTFValues();
1162        sendMTFValues();
1163    }
1164
1165    private void blockSort() {
1166        blockSorter.blockSort(data, last);
1167    }
1168
1169    /*
1170     * Performs Move-To-Front on the Burrows-Wheeler transformed
1171     * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
1172     * run-length-encoded form.
1173     *
1174     * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
1175     */
1176    private void generateMTFValues() {
1177        final int lastShadow = this.last;
1178        final Data dataShadow = this.data;
1179        final boolean[] inUse = dataShadow.inUse;
1180        final byte[] block = dataShadow.block;
1181        final int[] fmap = dataShadow.fmap;
1182        final char[] sfmap = dataShadow.sfmap;
1183        final int[] mtfFreq = dataShadow.mtfFreq;
1184        final byte[] unseqToSeq = dataShadow.unseqToSeq;
1185        final byte[] yy = dataShadow.generateMTFValues_yy;
1186
1187        // make maps
1188        int nInUseShadow = 0;
1189        for (int i = 0; i < 256; i++) {
1190            if (inUse[i]) {
1191                unseqToSeq[i] = (byte) nInUseShadow;
1192                nInUseShadow++;
1193            }
1194        }
1195        this.nInUse = nInUseShadow;
1196
1197        final int eob = nInUseShadow + 1;
1198
1199        for (int i = eob; i >= 0; i--) {
1200            mtfFreq[i] = 0;
1201        }
1202
1203        for (int i = nInUseShadow; --i >= 0;) {
1204            yy[i] = (byte) i;
1205        }
1206
1207        int wr = 0;
1208        int zPend = 0;
1209
1210        for (int i = 0; i <= lastShadow; i++) {
1211            final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
1212            byte tmp = yy[0];
1213            int j = 0;
1214
1215            while (ll_i != tmp) {
1216                j++;
1217                final byte tmp2 = tmp;
1218                tmp = yy[j];
1219                yy[j] = tmp2;
1220            }
1221            yy[0] = tmp;
1222
1223            if (j == 0) {
1224                zPend++;
1225            } else {
1226                if (zPend > 0) {
1227                    zPend--;
1228                    while (true) {
1229                        if ((zPend & 1) == 0) {
1230                            sfmap[wr] = RUNA;
1231                            wr++;
1232                            mtfFreq[RUNA]++;
1233                        } else {
1234                            sfmap[wr] = RUNB;
1235                            wr++;
1236                            mtfFreq[RUNB]++;
1237                        }
1238
1239                        if (zPend >= 2) {
1240                            zPend = (zPend - 2) >> 1;
1241                        } else {
1242                            break;
1243                        }
1244                    }
1245                    zPend = 0;
1246                }
1247                sfmap[wr] = (char) (j + 1);
1248                wr++;
1249                mtfFreq[j + 1]++;
1250            }
1251        }
1252
1253        if (zPend > 0) {
1254            zPend--;
1255            while (true) {
1256                if ((zPend & 1) == 0) {
1257                    sfmap[wr] = RUNA;
1258                    wr++;
1259                    mtfFreq[RUNA]++;
1260                } else {
1261                    sfmap[wr] = RUNB;
1262                    wr++;
1263                    mtfFreq[RUNB]++;
1264                }
1265
1266                if (zPend >= 2) {
1267                    zPend = (zPend - 2) >> 1;
1268                } else {
1269                    break;
1270                }
1271            }
1272        }
1273
1274        sfmap[wr] = (char) eob;
1275        mtfFreq[eob]++;
1276        this.nMTF = wr + 1;
1277    }
1278
1279    static final class Data {
1280
1281        // with blockSize 900k
1282        /* maps unsigned byte => "does it occur in block" */
1283        final boolean[] inUse = new boolean[256]; // 256 byte
1284        final byte[] unseqToSeq = new byte[256]; // 256 byte
1285        final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
1286        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
1287        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
1288
1289        final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
1290        final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
1291        // byte
1292        final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1293        // byte
1294        final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
1295        final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
1296        final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1297        // byte
1298        final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
1299        final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
1300
1301        final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
1302        final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1303        final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1304
1305        // ------------
1306        // 333408 byte
1307
1308        /* holds the RLEd block of original data starting at index 1.
1309         * After sorting the last byte added to the buffer is at index
1310         * 0. */
1311        final byte[] block; // 900021 byte
1312        /* maps index in Burrows-Wheeler transformed block => index of
1313         * byte in original block */
1314        final int[] fmap; // 3600000 byte
1315        final char[] sfmap; // 3600000 byte
1316        // ------------
1317        // 8433529 byte
1318        // ============
1319
1320        /**
1321         * Index of original line in Burrows-Wheeler table.
1322         *
1323         * <p>This is the index in fmap that points to the last byte
1324         * of the original data.</p>
1325         */
1326        int origPtr;
1327
1328        Data(final int blockSize100k) {
1329            final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
1330            this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
1331            this.fmap = new int[n];
1332            this.sfmap = new char[2 * n];
1333        }
1334
1335    }
1336
1337}