001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.z; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.nio.ByteOrder; 024 025import org.apache.commons.compress.compressors.lzw.LZWInputStream; 026 027/** 028 * Input stream that decompresses .Z files. 029 * @NotThreadSafe 030 * @since 1.7 031 */ 032public class ZCompressorInputStream extends LZWInputStream { 033 private static final int MAGIC_1 = 0x1f; 034 private static final int MAGIC_2 = 0x9d; 035 private static final int BLOCK_MODE_MASK = 0x80; 036 private static final int MAX_CODE_SIZE_MASK = 0x1f; 037 private final boolean blockMode; 038 private final int maxCodeSize; 039 private long totalCodesRead = 0; 040 041 public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKb) 042 throws IOException { 043 super(inputStream, ByteOrder.LITTLE_ENDIAN); 044 final int firstByte = (int) in.readBits(8); 045 final int secondByte = (int) in.readBits(8); 046 final int thirdByte = (int) in.readBits(8); 047 if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) { 048 throw new IOException("Input is not in .Z format"); 049 } 050 blockMode = (thirdByte & BLOCK_MODE_MASK) != 0; 051 maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK; 052 if (blockMode) { 053 setClearCode(DEFAULT_CODE_SIZE); 054 } 055 initializeTables(maxCodeSize, memoryLimitInKb); 056 clearEntries(); 057 } 058 059 public ZCompressorInputStream(final InputStream inputStream) throws IOException { 060 this(inputStream, -1); 061 } 062 063 private void clearEntries() { 064 setTableSize((1 << 8) + (blockMode ? 1 : 0)); 065 } 066 067 /** 068 * {@inheritDoc} 069 * <p><strong>This method is only protected for technical reasons 070 * and is not part of Commons Compress' published API. It may 071 * change or disappear without warning.</strong></p> 072 */ 073 @Override 074 protected int readNextCode() throws IOException { 075 final int code = super.readNextCode(); 076 if (code >= 0) { 077 ++totalCodesRead; 078 } 079 return code; 080 } 081 082 private void reAlignReading() throws IOException { 083 // "compress" works in multiples of 8 symbols, each codeBits bits long. 084 // When codeBits changes, the remaining unused symbols in the current 085 // group of 8 are still written out, in the old codeSize, 086 // as garbage values (usually zeroes) that need to be skipped. 087 long codeReadsToThrowAway = 8 - (totalCodesRead % 8); 088 if (codeReadsToThrowAway == 8) { 089 codeReadsToThrowAway = 0; 090 } 091 for (long i = 0; i < codeReadsToThrowAway; i++) { 092 readNextCode(); 093 } 094 in.clearBitCache(); 095 } 096 097 /** 098 * {@inheritDoc} 099 * <p><strong>This method is only protected for technical reasons 100 * and is not part of Commons Compress' published API. It may 101 * change or disappear without warning.</strong></p> 102 */ 103 @Override 104 protected int addEntry(final int previousCode, final byte character) throws IOException { 105 final int maxTableSize = 1 << getCodeSize(); 106 final int r = addEntry(previousCode, character, maxTableSize); 107 if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) { 108 reAlignReading(); 109 incrementCodeSize(); 110 } 111 return r; 112 } 113 114 /** 115 * {@inheritDoc} 116 * <p><strong>This method is only protected for technical reasons 117 * and is not part of Commons Compress' published API. It may 118 * change or disappear without warning.</strong></p> 119 */ 120 @Override 121 protected int decompressNextSymbol() throws IOException { 122 // 123 // table entry table entry 124 // _____________ _____ 125 // table entry / \ / \ 126 // ____________/ \ \ 127 // / / \ / \ \ 128 // +---+---+---+---+---+---+---+---+---+---+ 129 // | . | . | . | . | . | . | . | . | . | . | 130 // +---+---+---+---+---+---+---+---+---+---+ 131 // |<--------->|<------------->|<----->|<->| 132 // symbol symbol symbol symbol 133 // 134 final int code = readNextCode(); 135 if (code < 0) { 136 return -1; 137 } else if (blockMode && code == getClearCode()) { 138 clearEntries(); 139 reAlignReading(); 140 resetCodeSize(); 141 resetPreviousCode(); 142 return 0; 143 } else { 144 boolean addedUnfinishedEntry = false; 145 if (code == getTableSize()) { 146 addRepeatOfPreviousCode(); 147 addedUnfinishedEntry = true; 148 } else if (code > getTableSize()) { 149 throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code)); 150 } 151 return expandCodeToOutputStack(code, addedUnfinishedEntry); 152 } 153 } 154 155 /** 156 * Checks if the signature matches what is expected for a Unix compress file. 157 * 158 * @param signature 159 * the bytes to check 160 * @param length 161 * the number of bytes to check 162 * @return true, if this stream is a Unix compress compressed 163 * stream, false otherwise 164 * 165 * @since 1.9 166 */ 167 public static boolean matches(final byte[] signature, final int length) { 168 return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2; 169 } 170 171}