001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.bzip2; 020 021import java.io.IOException; 022import java.io.OutputStream; 023 024import org.apache.commons.compress.compressors.CompressorOutputStream; 025 026/** 027 * An output stream that compresses into the BZip2 format into another stream. 028 * 029 * <p> 030 * The compression requires large amounts of memory. Thus you should call the 031 * {@link #close() close()} method as soon as possible, to force 032 * {@code BZip2CompressorOutputStream} to release the allocated memory. 033 * </p> 034 * 035 * <p> You can shrink the amount of allocated memory and maybe raise 036 * the compression speed by choosing a lower blocksize, which in turn 037 * may cause a lower compression ratio. You can avoid unnecessary 038 * memory allocation by avoiding using a blocksize which is bigger 039 * than the size of the input. </p> 040 * 041 * <p> You can compute the memory usage for compressing by the 042 * following formula: </p> 043 * 044 * <pre> 045 * <code>400k + (9 * blocksize)</code>. 046 * </pre> 047 * 048 * <p> To get the memory required for decompression by {@link 049 * BZip2CompressorInputStream} use </p> 050 * 051 * <pre> 052 * <code>65k + (5 * blocksize)</code>. 053 * </pre> 054 * 055 * <table width="100%" border="1" summary="Memory usage by blocksize"> 056 * <tr> 057 * <th colspan="3">Memory usage by blocksize</th> 058 * </tr> 059 * <tr> 060 * <th align="right">Blocksize</th> <th align="right">Compression<br> 061 * memory usage</th> <th align="right">Decompression<br> 062 * memory usage</th> 063 * </tr> 064 * <tr> 065 * <td align="right">100k</td> 066 * <td align="right">1300k</td> 067 * <td align="right">565k</td> 068 * </tr> 069 * <tr> 070 * <td align="right">200k</td> 071 * <td align="right">2200k</td> 072 * <td align="right">1065k</td> 073 * </tr> 074 * <tr> 075 * <td align="right">300k</td> 076 * <td align="right">3100k</td> 077 * <td align="right">1565k</td> 078 * </tr> 079 * <tr> 080 * <td align="right">400k</td> 081 * <td align="right">4000k</td> 082 * <td align="right">2065k</td> 083 * </tr> 084 * <tr> 085 * <td align="right">500k</td> 086 * <td align="right">4900k</td> 087 * <td align="right">2565k</td> 088 * </tr> 089 * <tr> 090 * <td align="right">600k</td> 091 * <td align="right">5800k</td> 092 * <td align="right">3065k</td> 093 * </tr> 094 * <tr> 095 * <td align="right">700k</td> 096 * <td align="right">6700k</td> 097 * <td align="right">3565k</td> 098 * </tr> 099 * <tr> 100 * <td align="right">800k</td> 101 * <td align="right">7600k</td> 102 * <td align="right">4065k</td> 103 * </tr> 104 * <tr> 105 * <td align="right">900k</td> 106 * <td align="right">8500k</td> 107 * <td align="right">4565k</td> 108 * </tr> 109 * </table> 110 * 111 * <p> 112 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the 113 * bzipped input is smaller than one block. 114 * </p> 115 * 116 * <p> 117 * Instances of this class are not threadsafe. 118 * </p> 119 * 120 * <p> 121 * TODO: Update to BZip2 1.0.1 122 * </p> 123 * @NotThreadSafe 124 */ 125public class BZip2CompressorOutputStream extends CompressorOutputStream 126 implements BZip2Constants { 127 128 /** 129 * The minimum supported blocksize {@code == 1}. 130 */ 131 public static final int MIN_BLOCKSIZE = 1; 132 133 /** 134 * The maximum supported blocksize {@code == 9}. 135 */ 136 public static final int MAX_BLOCKSIZE = 9; 137 138 private static final int GREATER_ICOST = 15; 139 private static final int LESSER_ICOST = 0; 140 141 private static void hbMakeCodeLengths(final byte[] len, final int[] freq, 142 final Data dat, final int alphaSize, 143 final int maxLen) { 144 /* 145 * Nodes and heap entries run from 1. Entry 0 for both the heap and 146 * nodes is a sentinel. 147 */ 148 final int[] heap = dat.heap; 149 final int[] weight = dat.weight; 150 final int[] parent = dat.parent; 151 152 for (int i = alphaSize; --i >= 0;) { 153 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 154 } 155 156 for (boolean tooLong = true; tooLong;) { 157 tooLong = false; 158 159 int nNodes = alphaSize; 160 int nHeap = 0; 161 heap[0] = 0; 162 weight[0] = 0; 163 parent[0] = -2; 164 165 for (int i = 1; i <= alphaSize; i++) { 166 parent[i] = -1; 167 nHeap++; 168 heap[nHeap] = i; 169 170 int zz = nHeap; 171 final int tmp = heap[zz]; 172 while (weight[tmp] < weight[heap[zz >> 1]]) { 173 heap[zz] = heap[zz >> 1]; 174 zz >>= 1; 175 } 176 heap[zz] = tmp; 177 } 178 179 while (nHeap > 1) { 180 final int n1 = heap[1]; 181 heap[1] = heap[nHeap]; 182 nHeap--; 183 184 int yy = 0; 185 int zz = 1; 186 int tmp = heap[1]; 187 188 while (true) { 189 yy = zz << 1; 190 191 if (yy > nHeap) { 192 break; 193 } 194 195 if ((yy < nHeap) 196 && (weight[heap[yy + 1]] < weight[heap[yy]])) { 197 yy++; 198 } 199 200 if (weight[tmp] < weight[heap[yy]]) { 201 break; 202 } 203 204 heap[zz] = heap[yy]; 205 zz = yy; 206 } 207 208 heap[zz] = tmp; 209 210 final int n2 = heap[1]; 211 heap[1] = heap[nHeap]; 212 nHeap--; 213 214 yy = 0; 215 zz = 1; 216 tmp = heap[1]; 217 218 while (true) { 219 yy = zz << 1; 220 221 if (yy > nHeap) { 222 break; 223 } 224 225 if ((yy < nHeap) 226 && (weight[heap[yy + 1]] < weight[heap[yy]])) { 227 yy++; 228 } 229 230 if (weight[tmp] < weight[heap[yy]]) { 231 break; 232 } 233 234 heap[zz] = heap[yy]; 235 zz = yy; 236 } 237 238 heap[zz] = tmp; 239 nNodes++; 240 parent[n1] = parent[n2] = nNodes; 241 242 final int weight_n1 = weight[n1]; 243 final int weight_n2 = weight[n2]; 244 weight[nNodes] = ((weight_n1 & 0xffffff00) 245 + (weight_n2 & 0xffffff00)) 246 | (1 + (((weight_n1 & 0x000000ff) 247 > (weight_n2 & 0x000000ff)) 248 ? (weight_n1 & 0x000000ff) 249 : (weight_n2 & 0x000000ff))); 250 251 parent[nNodes] = -1; 252 nHeap++; 253 heap[nHeap] = nNodes; 254 255 tmp = 0; 256 zz = nHeap; 257 tmp = heap[zz]; 258 final int weight_tmp = weight[tmp]; 259 while (weight_tmp < weight[heap[zz >> 1]]) { 260 heap[zz] = heap[zz >> 1]; 261 zz >>= 1; 262 } 263 heap[zz] = tmp; 264 265 } 266 267 for (int i = 1; i <= alphaSize; i++) { 268 int j = 0; 269 int k = i; 270 271 for (int parent_k; (parent_k = parent[k]) >= 0;) { 272 k = parent_k; 273 j++; 274 } 275 276 len[i - 1] = (byte) j; 277 if (j > maxLen) { 278 tooLong = true; 279 } 280 } 281 282 if (tooLong) { 283 for (int i = 1; i < alphaSize; i++) { 284 int j = weight[i] >> 8; 285 j = 1 + (j >> 1); 286 weight[i] = j << 8; 287 } 288 } 289 } 290 } 291 292 /** 293 * Index of the last char in the block, so the block size == last + 1. 294 */ 295 private int last; 296 297 /** 298 * Always: in the range 0 .. 9. The current block size is 100000 * this 299 * number. 300 */ 301 private final int blockSize100k; 302 303 private int bsBuff; 304 private int bsLive; 305 private final CRC crc = new CRC(); 306 307 private int nInUse; 308 309 private int nMTF; 310 311 private int currentChar = -1; 312 private int runLength = 0; 313 314 private int blockCRC; 315 private int combinedCRC; 316 private final int allowableBlockSize; 317 318 /** 319 * All memory intensive stuff. 320 */ 321 private Data data; 322 private BlockSort blockSorter; 323 324 private OutputStream out; 325 private volatile boolean closed; 326 327 /** 328 * Chooses a blocksize based on the given length of the data to compress. 329 * 330 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and 331 * {@link #MAX_BLOCKSIZE} both inclusive. For a negative 332 * {@code inputLength} this method returns {@code MAX_BLOCKSIZE} 333 * always. 334 * 335 * @param inputLength 336 * The length of the data which will be compressed by 337 * {@code BZip2CompressorOutputStream}. 338 */ 339 public static int chooseBlockSize(final long inputLength) { 340 return (inputLength > 0) ? (int) Math 341 .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; 342 } 343 344 /** 345 * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k. 346 * 347 * @param out 348 * the destination stream. 349 * 350 * @throws IOException 351 * if an I/O error occurs in the specified stream. 352 * @throws NullPointerException 353 * if <code>out == null</code>. 354 */ 355 public BZip2CompressorOutputStream(final OutputStream out) 356 throws IOException { 357 this(out, MAX_BLOCKSIZE); 358 } 359 360 /** 361 * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize. 362 * 363 * @param out 364 * the destination stream. 365 * @param blockSize 366 * the blockSize as 100k units. 367 * 368 * @throws IOException 369 * if an I/O error occurs in the specified stream. 370 * @throws IllegalArgumentException 371 * if <code>(blockSize < 1) || (blockSize > 9)</code>. 372 * @throws NullPointerException 373 * if <code>out == null</code>. 374 * 375 * @see #MIN_BLOCKSIZE 376 * @see #MAX_BLOCKSIZE 377 */ 378 public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException { 379 if (blockSize < 1) { 380 throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); 381 } 382 if (blockSize > 9) { 383 throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); 384 } 385 386 this.blockSize100k = blockSize; 387 this.out = out; 388 389 /* 20 is just a paranoia constant */ 390 this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20; 391 init(); 392 } 393 394 @Override 395 public void write(final int b) throws IOException { 396 if (!closed) { 397 write0(b); 398 } else { 399 throw new IOException("Closed"); 400 } 401 } 402 403 /** 404 * Writes the current byte to the buffer, run-length encoding it 405 * if it has been repeated at least four times (the first step 406 * RLEs sequences of four identical bytes). 407 * 408 * <p>Flushes the current block before writing data if it is 409 * full.</p> 410 * 411 * <p>"write to the buffer" means adding to data.buffer starting 412 * two steps "after" this.last - initially starting at index 1 413 * (not 0) - and updating this.last to point to the last index 414 * written minus 1.</p> 415 */ 416 private void writeRun() throws IOException { 417 final int lastShadow = this.last; 418 419 if (lastShadow < this.allowableBlockSize) { 420 final int currentCharShadow = this.currentChar; 421 final Data dataShadow = this.data; 422 dataShadow.inUse[currentCharShadow] = true; 423 final byte ch = (byte) currentCharShadow; 424 425 int runLengthShadow = this.runLength; 426 this.crc.updateCRC(currentCharShadow, runLengthShadow); 427 428 switch (runLengthShadow) { 429 case 1: 430 dataShadow.block[lastShadow + 2] = ch; 431 this.last = lastShadow + 1; 432 break; 433 434 case 2: 435 dataShadow.block[lastShadow + 2] = ch; 436 dataShadow.block[lastShadow + 3] = ch; 437 this.last = lastShadow + 2; 438 break; 439 440 case 3: { 441 final byte[] block = dataShadow.block; 442 block[lastShadow + 2] = ch; 443 block[lastShadow + 3] = ch; 444 block[lastShadow + 4] = ch; 445 this.last = lastShadow + 3; 446 } 447 break; 448 449 default: { 450 runLengthShadow -= 4; 451 dataShadow.inUse[runLengthShadow] = true; 452 final byte[] block = dataShadow.block; 453 block[lastShadow + 2] = ch; 454 block[lastShadow + 3] = ch; 455 block[lastShadow + 4] = ch; 456 block[lastShadow + 5] = ch; 457 block[lastShadow + 6] = (byte) runLengthShadow; 458 this.last = lastShadow + 5; 459 } 460 break; 461 462 } 463 } else { 464 endBlock(); 465 initBlock(); 466 writeRun(); 467 } 468 } 469 470 /** 471 * Overriden to warn about an unclosed stream. 472 */ 473 @Override 474 protected void finalize() throws Throwable { 475 if (!closed) { 476 System.err.println("Unclosed BZip2CompressorOutputStream detected, will *not* close it"); 477 } 478 super.finalize(); 479 } 480 481 482 public void finish() throws IOException { 483 if (!closed) { 484 closed = true; 485 try { 486 if (this.runLength > 0) { 487 writeRun(); 488 } 489 this.currentChar = -1; 490 endBlock(); 491 endCompression(); 492 } finally { 493 this.out = null; 494 this.blockSorter = null; 495 this.data = null; 496 } 497 } 498 } 499 500 @Override 501 public void close() throws IOException { 502 if (!closed) { 503 final OutputStream outShadow = this.out; 504 try { 505 finish(); 506 } finally { 507 outShadow.close(); 508 } 509 } 510 } 511 512 @Override 513 public void flush() throws IOException { 514 final OutputStream outShadow = this.out; 515 if (outShadow != null) { 516 outShadow.flush(); 517 } 518 } 519 520 /** 521 * Writes magic bytes like BZ on the first position of the stream 522 * and bytes indiciating the file-format, which is 523 * huffmanised, followed by a digit indicating blockSize100k. 524 * @throws IOException if the magic bytes could not been written 525 */ 526 private void init() throws IOException { 527 bsPutUByte('B'); 528 bsPutUByte('Z'); 529 530 this.data = new Data(this.blockSize100k); 531 this.blockSorter = new BlockSort(this.data); 532 533 // huffmanised magic bytes 534 bsPutUByte('h'); 535 bsPutUByte('0' + this.blockSize100k); 536 537 this.combinedCRC = 0; 538 initBlock(); 539 } 540 541 private void initBlock() { 542 // blockNo++; 543 this.crc.initialiseCRC(); 544 this.last = -1; 545 // ch = 0; 546 547 final boolean[] inUse = this.data.inUse; 548 for (int i = 256; --i >= 0;) { 549 inUse[i] = false; 550 } 551 552 } 553 554 private void endBlock() throws IOException { 555 this.blockCRC = this.crc.getFinalCRC(); 556 this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); 557 this.combinedCRC ^= this.blockCRC; 558 559 // empty block at end of file 560 if (this.last == -1) { 561 return; 562 } 563 564 /* sort the block and establish posn of original string */ 565 blockSort(); 566 567 /* 568 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 569 * :-). A 32 bit value does not really give a strong enough guarantee 570 * that the value will not appear by chance in the compressed 571 * datastream. Worst-case probability of this event, for a 900k block, 572 * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 573 * bits. For a compressed file of size 100Gb -- about 100000 blocks -- 574 * only a 48-bit marker will do. NB: normal compression/ decompression 575 * donot rely on these statistical properties. They are only important 576 * when trying to recover blocks from damaged files. 577 */ 578 bsPutUByte(0x31); 579 bsPutUByte(0x41); 580 bsPutUByte(0x59); 581 bsPutUByte(0x26); 582 bsPutUByte(0x53); 583 bsPutUByte(0x59); 584 585 /* Now the block's CRC, so it is in a known place. */ 586 bsPutInt(this.blockCRC); 587 588 /* Now a single bit indicating no randomisation. */ 589 bsW(1, 0); 590 591 /* Finally, block's contents proper. */ 592 moveToFrontCodeAndSend(); 593 } 594 595 private void endCompression() throws IOException { 596 /* 597 * Now another magic 48-bit number, 0x177245385090, to indicate the end 598 * of the last block. (sqrt(pi), if you want to know. I did want to use 599 * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me 600 * to feel statistically comfortable. Call me paranoid.) 601 */ 602 bsPutUByte(0x17); 603 bsPutUByte(0x72); 604 bsPutUByte(0x45); 605 bsPutUByte(0x38); 606 bsPutUByte(0x50); 607 bsPutUByte(0x90); 608 609 bsPutInt(this.combinedCRC); 610 bsFinishedWithStream(); 611 } 612 613 /** 614 * Returns the blocksize parameter specified at construction time. 615 * @return the blocksize parameter specified at construction time 616 */ 617 public final int getBlockSize() { 618 return this.blockSize100k; 619 } 620 621 @Override 622 public void write(final byte[] buf, int offs, final int len) 623 throws IOException { 624 if (offs < 0) { 625 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 626 } 627 if (len < 0) { 628 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 629 } 630 if (offs + len > buf.length) { 631 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" 632 + len + ") > buf.length(" 633 + buf.length + ")."); 634 } 635 if (closed) { 636 throw new IOException("Stream closed"); 637 } 638 639 for (final int hi = offs + len; offs < hi;) { 640 write0(buf[offs++]); 641 } 642 } 643 644 /** 645 * Keeps track of the last bytes written and implicitly performs 646 * run-length encoding as the first step of the bzip2 algorithm. 647 */ 648 private void write0(int b) throws IOException { 649 if (this.currentChar != -1) { 650 b &= 0xff; 651 if (this.currentChar == b) { 652 if (++this.runLength > 254) { 653 writeRun(); 654 this.currentChar = -1; 655 this.runLength = 0; 656 } 657 // else nothing to do 658 } else { 659 writeRun(); 660 this.runLength = 1; 661 this.currentChar = b; 662 } 663 } else { 664 this.currentChar = b & 0xff; 665 this.runLength++; 666 } 667 } 668 669 private static void hbAssignCodes(final int[] code, final byte[] length, 670 final int minLen, final int maxLen, 671 final int alphaSize) { 672 int vec = 0; 673 for (int n = minLen; n <= maxLen; n++) { 674 for (int i = 0; i < alphaSize; i++) { 675 if ((length[i] & 0xff) == n) { 676 code[i] = vec; 677 vec++; 678 } 679 } 680 vec <<= 1; 681 } 682 } 683 684 private void bsFinishedWithStream() throws IOException { 685 while (this.bsLive > 0) { 686 final int ch = this.bsBuff >> 24; 687 this.out.write(ch); // write 8-bit 688 this.bsBuff <<= 8; 689 this.bsLive -= 8; 690 } 691 } 692 693 private void bsW(final int n, final int v) throws IOException { 694 final OutputStream outShadow = this.out; 695 int bsLiveShadow = this.bsLive; 696 int bsBuffShadow = this.bsBuff; 697 698 while (bsLiveShadow >= 8) { 699 outShadow.write(bsBuffShadow >> 24); // write 8-bit 700 bsBuffShadow <<= 8; 701 bsLiveShadow -= 8; 702 } 703 704 this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); 705 this.bsLive = bsLiveShadow + n; 706 } 707 708 private void bsPutUByte(final int c) throws IOException { 709 bsW(8, c); 710 } 711 712 private void bsPutInt(final int u) throws IOException { 713 bsW(8, (u >> 24) & 0xff); 714 bsW(8, (u >> 16) & 0xff); 715 bsW(8, (u >> 8) & 0xff); 716 bsW(8, u & 0xff); 717 } 718 719 private void sendMTFValues() throws IOException { 720 final byte[][] len = this.data.sendMTFValues_len; 721 final int alphaSize = this.nInUse + 2; 722 723 for (int t = N_GROUPS; --t >= 0;) { 724 final byte[] len_t = len[t]; 725 for (int v = alphaSize; --v >= 0;) { 726 len_t[v] = GREATER_ICOST; 727 } 728 } 729 730 /* Decide how many coding tables to use */ 731 // assert (this.nMTF > 0) : this.nMTF; 732 final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 733 : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; 734 735 /* Generate an initial set of coding tables */ 736 sendMTFValues0(nGroups, alphaSize); 737 738 /* 739 * Iterate up to N_ITERS times to improve the tables. 740 */ 741 final int nSelectors = sendMTFValues1(nGroups, alphaSize); 742 743 /* Compute MTF values for the selectors. */ 744 sendMTFValues2(nGroups, nSelectors); 745 746 /* Assign actual codes for the tables. */ 747 sendMTFValues3(nGroups, alphaSize); 748 749 /* Transmit the mapping table. */ 750 sendMTFValues4(); 751 752 /* Now the selectors. */ 753 sendMTFValues5(nGroups, nSelectors); 754 755 /* Now the coding tables. */ 756 sendMTFValues6(nGroups, alphaSize); 757 758 /* And finally, the block data proper */ 759 sendMTFValues7(); 760 } 761 762 private void sendMTFValues0(final int nGroups, final int alphaSize) { 763 final byte[][] len = this.data.sendMTFValues_len; 764 final int[] mtfFreq = this.data.mtfFreq; 765 766 int remF = this.nMTF; 767 int gs = 0; 768 769 for (int nPart = nGroups; nPart > 0; nPart--) { 770 final int tFreq = remF / nPart; 771 int ge = gs - 1; 772 int aFreq = 0; 773 774 for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) { 775 aFreq += mtfFreq[++ge]; 776 } 777 778 if ((ge > gs) && (nPart != nGroups) && (nPart != 1) 779 && (((nGroups - nPart) & 1) != 0)) { 780 aFreq -= mtfFreq[ge--]; 781 } 782 783 final byte[] len_np = len[nPart - 1]; 784 for (int v = alphaSize; --v >= 0;) { 785 if ((v >= gs) && (v <= ge)) { 786 len_np[v] = LESSER_ICOST; 787 } else { 788 len_np[v] = GREATER_ICOST; 789 } 790 } 791 792 gs = ge + 1; 793 remF -= aFreq; 794 } 795 } 796 797 private int sendMTFValues1(final int nGroups, final int alphaSize) { 798 final Data dataShadow = this.data; 799 final int[][] rfreq = dataShadow.sendMTFValues_rfreq; 800 final int[] fave = dataShadow.sendMTFValues_fave; 801 final short[] cost = dataShadow.sendMTFValues_cost; 802 final char[] sfmap = dataShadow.sfmap; 803 final byte[] selector = dataShadow.selector; 804 final byte[][] len = dataShadow.sendMTFValues_len; 805 final byte[] len_0 = len[0]; 806 final byte[] len_1 = len[1]; 807 final byte[] len_2 = len[2]; 808 final byte[] len_3 = len[3]; 809 final byte[] len_4 = len[4]; 810 final byte[] len_5 = len[5]; 811 final int nMTFShadow = this.nMTF; 812 813 int nSelectors = 0; 814 815 for (int iter = 0; iter < N_ITERS; iter++) { 816 for (int t = nGroups; --t >= 0;) { 817 fave[t] = 0; 818 final int[] rfreqt = rfreq[t]; 819 for (int i = alphaSize; --i >= 0;) { 820 rfreqt[i] = 0; 821 } 822 } 823 824 nSelectors = 0; 825 826 for (int gs = 0; gs < this.nMTF;) { 827 /* Set group start & end marks. */ 828 829 /* 830 * Calculate the cost of this group as coded by each of the 831 * coding tables. 832 */ 833 834 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 835 836 if (nGroups == N_GROUPS) { 837 // unrolled version of the else-block 838 839 short cost0 = 0; 840 short cost1 = 0; 841 short cost2 = 0; 842 short cost3 = 0; 843 short cost4 = 0; 844 short cost5 = 0; 845 846 for (int i = gs; i <= ge; i++) { 847 final int icv = sfmap[i]; 848 cost0 += len_0[icv] & 0xff; 849 cost1 += len_1[icv] & 0xff; 850 cost2 += len_2[icv] & 0xff; 851 cost3 += len_3[icv] & 0xff; 852 cost4 += len_4[icv] & 0xff; 853 cost5 += len_5[icv] & 0xff; 854 } 855 856 cost[0] = cost0; 857 cost[1] = cost1; 858 cost[2] = cost2; 859 cost[3] = cost3; 860 cost[4] = cost4; 861 cost[5] = cost5; 862 863 } else { 864 for (int t = nGroups; --t >= 0;) { 865 cost[t] = 0; 866 } 867 868 for (int i = gs; i <= ge; i++) { 869 final int icv = sfmap[i]; 870 for (int t = nGroups; --t >= 0;) { 871 cost[t] += len[t][icv] & 0xff; 872 } 873 } 874 } 875 876 /* 877 * Find the coding table which is best for this group, and 878 * record its identity in the selector table. 879 */ 880 int bt = -1; 881 for (int t = nGroups, bc = 999999999; --t >= 0;) { 882 final int cost_t = cost[t]; 883 if (cost_t < bc) { 884 bc = cost_t; 885 bt = t; 886 } 887 } 888 889 fave[bt]++; 890 selector[nSelectors] = (byte) bt; 891 nSelectors++; 892 893 /* 894 * Increment the symbol frequencies for the selected table. 895 */ 896 final int[] rfreq_bt = rfreq[bt]; 897 for (int i = gs; i <= ge; i++) { 898 rfreq_bt[sfmap[i]]++; 899 } 900 901 gs = ge + 1; 902 } 903 904 /* 905 * Recompute the tables based on the accumulated frequencies. 906 */ 907 for (int t = 0; t < nGroups; t++) { 908 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); 909 } 910 } 911 912 return nSelectors; 913 } 914 915 private void sendMTFValues2(final int nGroups, final int nSelectors) { 916 // assert (nGroups < 8) : nGroups; 917 918 final Data dataShadow = this.data; 919 final byte[] pos = dataShadow.sendMTFValues2_pos; 920 921 for (int i = nGroups; --i >= 0;) { 922 pos[i] = (byte) i; 923 } 924 925 for (int i = 0; i < nSelectors; i++) { 926 final byte ll_i = dataShadow.selector[i]; 927 byte tmp = pos[0]; 928 int j = 0; 929 930 while (ll_i != tmp) { 931 j++; 932 final byte tmp2 = tmp; 933 tmp = pos[j]; 934 pos[j] = tmp2; 935 } 936 937 pos[0] = tmp; 938 dataShadow.selectorMtf[i] = (byte) j; 939 } 940 } 941 942 private void sendMTFValues3(final int nGroups, final int alphaSize) { 943 final int[][] code = this.data.sendMTFValues_code; 944 final byte[][] len = this.data.sendMTFValues_len; 945 946 for (int t = 0; t < nGroups; t++) { 947 int minLen = 32; 948 int maxLen = 0; 949 final byte[] len_t = len[t]; 950 for (int i = alphaSize; --i >= 0;) { 951 final int l = len_t[i] & 0xff; 952 if (l > maxLen) { 953 maxLen = l; 954 } 955 if (l < minLen) { 956 minLen = l; 957 } 958 } 959 960 // assert (maxLen <= 20) : maxLen; 961 // assert (minLen >= 1) : minLen; 962 963 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); 964 } 965 } 966 967 private void sendMTFValues4() throws IOException { 968 final boolean[] inUse = this.data.inUse; 969 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; 970 971 for (int i = 16; --i >= 0;) { 972 inUse16[i] = false; 973 final int i16 = i * 16; 974 for (int j = 16; --j >= 0;) { 975 if (inUse[i16 + j]) { 976 inUse16[i] = true; 977 } 978 } 979 } 980 981 for (int i = 0; i < 16; i++) { 982 bsW(1, inUse16[i] ? 1 : 0); 983 } 984 985 final OutputStream outShadow = this.out; 986 int bsLiveShadow = this.bsLive; 987 int bsBuffShadow = this.bsBuff; 988 989 for (int i = 0; i < 16; i++) { 990 if (inUse16[i]) { 991 final int i16 = i * 16; 992 for (int j = 0; j < 16; j++) { 993 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); 994 while (bsLiveShadow >= 8) { 995 outShadow.write(bsBuffShadow >> 24); // write 8-bit 996 bsBuffShadow <<= 8; 997 bsLiveShadow -= 8; 998 } 999 if (inUse[i16 + j]) { 1000 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); 1001 } 1002 bsLiveShadow++; 1003 } 1004 } 1005 } 1006 1007 this.bsBuff = bsBuffShadow; 1008 this.bsLive = bsLiveShadow; 1009 } 1010 1011 private void sendMTFValues5(final int nGroups, final int nSelectors) 1012 throws IOException { 1013 bsW(3, nGroups); 1014 bsW(15, nSelectors); 1015 1016 final OutputStream outShadow = this.out; 1017 final byte[] selectorMtf = this.data.selectorMtf; 1018 1019 int bsLiveShadow = this.bsLive; 1020 int bsBuffShadow = this.bsBuff; 1021 1022 for (int i = 0; i < nSelectors; i++) { 1023 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { 1024 // inlined: bsW(1, 1); 1025 while (bsLiveShadow >= 8) { 1026 outShadow.write(bsBuffShadow >> 24); 1027 bsBuffShadow <<= 8; 1028 bsLiveShadow -= 8; 1029 } 1030 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); 1031 bsLiveShadow++; 1032 } 1033 1034 // inlined: bsW(1, 0); 1035 while (bsLiveShadow >= 8) { 1036 outShadow.write(bsBuffShadow >> 24); 1037 bsBuffShadow <<= 8; 1038 bsLiveShadow -= 8; 1039 } 1040 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1041 bsLiveShadow++; 1042 } 1043 1044 this.bsBuff = bsBuffShadow; 1045 this.bsLive = bsLiveShadow; 1046 } 1047 1048 private void sendMTFValues6(final int nGroups, final int alphaSize) 1049 throws IOException { 1050 final byte[][] len = this.data.sendMTFValues_len; 1051 final OutputStream outShadow = this.out; 1052 1053 int bsLiveShadow = this.bsLive; 1054 int bsBuffShadow = this.bsBuff; 1055 1056 for (int t = 0; t < nGroups; t++) { 1057 final byte[] len_t = len[t]; 1058 int curr = len_t[0] & 0xff; 1059 1060 // inlined: bsW(5, curr); 1061 while (bsLiveShadow >= 8) { 1062 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1063 bsBuffShadow <<= 8; 1064 bsLiveShadow -= 8; 1065 } 1066 bsBuffShadow |= curr << (32 - bsLiveShadow - 5); 1067 bsLiveShadow += 5; 1068 1069 for (int i = 0; i < alphaSize; i++) { 1070 final int lti = len_t[i] & 0xff; 1071 while (curr < lti) { 1072 // inlined: bsW(2, 2); 1073 while (bsLiveShadow >= 8) { 1074 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1075 bsBuffShadow <<= 8; 1076 bsLiveShadow -= 8; 1077 } 1078 bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); 1079 bsLiveShadow += 2; 1080 1081 curr++; /* 10 */ 1082 } 1083 1084 while (curr > lti) { 1085 // inlined: bsW(2, 3); 1086 while (bsLiveShadow >= 8) { 1087 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1088 bsBuffShadow <<= 8; 1089 bsLiveShadow -= 8; 1090 } 1091 bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); 1092 bsLiveShadow += 2; 1093 1094 curr--; /* 11 */ 1095 } 1096 1097 // inlined: bsW(1, 0); 1098 while (bsLiveShadow >= 8) { 1099 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1100 bsBuffShadow <<= 8; 1101 bsLiveShadow -= 8; 1102 } 1103 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1104 bsLiveShadow++; 1105 } 1106 } 1107 1108 this.bsBuff = bsBuffShadow; 1109 this.bsLive = bsLiveShadow; 1110 } 1111 1112 private void sendMTFValues7() throws IOException { 1113 final Data dataShadow = this.data; 1114 final byte[][] len = dataShadow.sendMTFValues_len; 1115 final int[][] code = dataShadow.sendMTFValues_code; 1116 final OutputStream outShadow = this.out; 1117 final byte[] selector = dataShadow.selector; 1118 final char[] sfmap = dataShadow.sfmap; 1119 final int nMTFShadow = this.nMTF; 1120 1121 int selCtr = 0; 1122 1123 int bsLiveShadow = this.bsLive; 1124 int bsBuffShadow = this.bsBuff; 1125 1126 for (int gs = 0; gs < nMTFShadow;) { 1127 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 1128 final int selector_selCtr = selector[selCtr] & 0xff; 1129 final int[] code_selCtr = code[selector_selCtr]; 1130 final byte[] len_selCtr = len[selector_selCtr]; 1131 1132 while (gs <= ge) { 1133 final int sfmap_i = sfmap[gs]; 1134 1135 // 1136 // inlined: bsW(len_selCtr[sfmap_i] & 0xff, 1137 // code_selCtr[sfmap_i]); 1138 // 1139 while (bsLiveShadow >= 8) { 1140 outShadow.write(bsBuffShadow >> 24); 1141 bsBuffShadow <<= 8; 1142 bsLiveShadow -= 8; 1143 } 1144 final int n = len_selCtr[sfmap_i] & 0xFF; 1145 bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); 1146 bsLiveShadow += n; 1147 1148 gs++; 1149 } 1150 1151 gs = ge + 1; 1152 selCtr++; 1153 } 1154 1155 this.bsBuff = bsBuffShadow; 1156 this.bsLive = bsLiveShadow; 1157 } 1158 1159 private void moveToFrontCodeAndSend() throws IOException { 1160 bsW(24, this.data.origPtr); 1161 generateMTFValues(); 1162 sendMTFValues(); 1163 } 1164 1165 private void blockSort() { 1166 blockSorter.blockSort(data, last); 1167 } 1168 1169 /* 1170 * Performs Move-To-Front on the Burrows-Wheeler transformed 1171 * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB 1172 * run-length-encoded form. 1173 * 1174 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> 1175 */ 1176 private void generateMTFValues() { 1177 final int lastShadow = this.last; 1178 final Data dataShadow = this.data; 1179 final boolean[] inUse = dataShadow.inUse; 1180 final byte[] block = dataShadow.block; 1181 final int[] fmap = dataShadow.fmap; 1182 final char[] sfmap = dataShadow.sfmap; 1183 final int[] mtfFreq = dataShadow.mtfFreq; 1184 final byte[] unseqToSeq = dataShadow.unseqToSeq; 1185 final byte[] yy = dataShadow.generateMTFValues_yy; 1186 1187 // make maps 1188 int nInUseShadow = 0; 1189 for (int i = 0; i < 256; i++) { 1190 if (inUse[i]) { 1191 unseqToSeq[i] = (byte) nInUseShadow; 1192 nInUseShadow++; 1193 } 1194 } 1195 this.nInUse = nInUseShadow; 1196 1197 final int eob = nInUseShadow + 1; 1198 1199 for (int i = eob; i >= 0; i--) { 1200 mtfFreq[i] = 0; 1201 } 1202 1203 for (int i = nInUseShadow; --i >= 0;) { 1204 yy[i] = (byte) i; 1205 } 1206 1207 int wr = 0; 1208 int zPend = 0; 1209 1210 for (int i = 0; i <= lastShadow; i++) { 1211 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; 1212 byte tmp = yy[0]; 1213 int j = 0; 1214 1215 while (ll_i != tmp) { 1216 j++; 1217 final byte tmp2 = tmp; 1218 tmp = yy[j]; 1219 yy[j] = tmp2; 1220 } 1221 yy[0] = tmp; 1222 1223 if (j == 0) { 1224 zPend++; 1225 } else { 1226 if (zPend > 0) { 1227 zPend--; 1228 while (true) { 1229 if ((zPend & 1) == 0) { 1230 sfmap[wr] = RUNA; 1231 wr++; 1232 mtfFreq[RUNA]++; 1233 } else { 1234 sfmap[wr] = RUNB; 1235 wr++; 1236 mtfFreq[RUNB]++; 1237 } 1238 1239 if (zPend >= 2) { 1240 zPend = (zPend - 2) >> 1; 1241 } else { 1242 break; 1243 } 1244 } 1245 zPend = 0; 1246 } 1247 sfmap[wr] = (char) (j + 1); 1248 wr++; 1249 mtfFreq[j + 1]++; 1250 } 1251 } 1252 1253 if (zPend > 0) { 1254 zPend--; 1255 while (true) { 1256 if ((zPend & 1) == 0) { 1257 sfmap[wr] = RUNA; 1258 wr++; 1259 mtfFreq[RUNA]++; 1260 } else { 1261 sfmap[wr] = RUNB; 1262 wr++; 1263 mtfFreq[RUNB]++; 1264 } 1265 1266 if (zPend >= 2) { 1267 zPend = (zPend - 2) >> 1; 1268 } else { 1269 break; 1270 } 1271 } 1272 } 1273 1274 sfmap[wr] = (char) eob; 1275 mtfFreq[eob]++; 1276 this.nMTF = wr + 1; 1277 } 1278 1279 static final class Data { 1280 1281 // with blockSize 900k 1282 /* maps unsigned byte => "does it occur in block" */ 1283 final boolean[] inUse = new boolean[256]; // 256 byte 1284 final byte[] unseqToSeq = new byte[256]; // 256 byte 1285 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte 1286 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 1287 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 1288 1289 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte 1290 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 1291 // byte 1292 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 1293 // byte 1294 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte 1295 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte 1296 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 1297 // byte 1298 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte 1299 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte 1300 1301 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte 1302 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 1303 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 1304 1305 // ------------ 1306 // 333408 byte 1307 1308 /* holds the RLEd block of original data starting at index 1. 1309 * After sorting the last byte added to the buffer is at index 1310 * 0. */ 1311 final byte[] block; // 900021 byte 1312 /* maps index in Burrows-Wheeler transformed block => index of 1313 * byte in original block */ 1314 final int[] fmap; // 3600000 byte 1315 final char[] sfmap; // 3600000 byte 1316 // ------------ 1317 // 8433529 byte 1318 // ============ 1319 1320 /** 1321 * Index of original line in Burrows-Wheeler table. 1322 * 1323 * <p>This is the index in fmap that points to the last byte 1324 * of the original data.</p> 1325 */ 1326 int origPtr; 1327 1328 Data(final int blockSize100k) { 1329 final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE; 1330 this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; 1331 this.fmap = new int[n]; 1332 this.sfmap = new char[2 * n]; 1333 } 1334 1335 } 1336 1337}