001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lzw; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.nio.ByteOrder; 024 025import org.apache.commons.compress.MemoryLimitException; 026import org.apache.commons.compress.compressors.CompressorInputStream; 027import org.apache.commons.compress.utils.BitInputStream; 028import org.apache.commons.compress.utils.InputStreamStatistics; 029 030/** 031 * <p>Generic LZW implementation. It is used internally for 032 * the Z decompressor and the Unshrinking Zip file compression method, 033 * but may be useful for third-party projects in implementing their own LZW variations.</p> 034 * 035 * @NotThreadSafe 036 * @since 1.10 037 */ 038public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics { 039 protected static final int DEFAULT_CODE_SIZE = 9; 040 protected static final int UNUSED_PREFIX = -1; 041 042 private final byte[] oneByte = new byte[1]; 043 044 protected final BitInputStream in; 045 private int clearCode = -1; 046 private int codeSize = DEFAULT_CODE_SIZE; 047 private byte previousCodeFirstChar; 048 private int previousCode = UNUSED_PREFIX; 049 private int tableSize; 050 private int[] prefixes; 051 private byte[] characters; 052 private byte[] outputStack; 053 private int outputStackLocation; 054 055 protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) { 056 this.in = new BitInputStream(inputStream, byteOrder); 057 } 058 059 @Override 060 public void close() throws IOException { 061 in.close(); 062 } 063 064 @Override 065 public int read() throws IOException { 066 final int ret = read(oneByte); 067 if (ret < 0) { 068 return ret; 069 } 070 return 0xff & oneByte[0]; 071 } 072 073 @Override 074 public int read(final byte[] b, final int off, final int len) throws IOException { 075 if (len == 0) { 076 return 0; 077 } 078 int bytesRead = readFromStack(b, off, len); 079 while (len - bytesRead > 0) { 080 final int result = decompressNextSymbol(); 081 if (result < 0) { 082 if (bytesRead > 0) { 083 count(bytesRead); 084 return bytesRead; 085 } 086 return result; 087 } 088 bytesRead += readFromStack(b, off + bytesRead, len - bytesRead); 089 } 090 count(bytesRead); 091 return bytesRead; 092 } 093 094 /** 095 * @since 1.17 096 */ 097 @Override 098 public long getCompressedCount() { 099 return in.getBytesRead(); 100 } 101 102 /** 103 * Read the next code and expand it. 104 * @return the expanded next code, negative on EOF 105 * @throws IOException on error 106 */ 107 protected abstract int decompressNextSymbol() throws IOException; 108 109 /** 110 * Add a new entry to the dictionary. 111 * @param previousCode the previous code 112 * @param character the next character to append 113 * @return the new code 114 * @throws IOException on error 115 */ 116 protected abstract int addEntry(int previousCode, byte character) 117 throws IOException; 118 119 /** 120 * Sets the clear code based on the code size. 121 * @param codeSize code size 122 */ 123 protected void setClearCode(final int codeSize) { 124 clearCode = (1 << (codeSize - 1)); 125 } 126 127 /** 128 * Initializes the arrays based on the maximum code size. 129 * First checks that the estimated memory usage is below memoryLimitInKb 130 * 131 * @param maxCodeSize maximum code size 132 * @param memoryLimitInKb maximum allowed estimated memory usage in Kb 133 * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitInKb 134 * @throws IllegalArgumentException if <code>maxCodeSize</code> is not bigger than 0 135 */ 136 protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb) 137 throws MemoryLimitException { 138 if (maxCodeSize <= 0) { 139 throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize 140 + ", must be bigger than 0"); 141 } 142 143 if (memoryLimitInKb > -1) { 144 final int maxTableSize = 1 << maxCodeSize; 145 //account for potential overflow 146 long memoryUsageInBytes = (long) maxTableSize * 6;//(4 (prefixes) + 1 (characters) +1 (outputStack)) 147 long memoryUsageInKb = memoryUsageInBytes >> 10; 148 149 if (memoryUsageInKb > memoryLimitInKb) { 150 throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb); 151 } 152 } 153 initializeTables(maxCodeSize); 154 } 155 156 /** 157 * Initializes the arrays based on the maximum code size. 158 * @param maxCodeSize maximum code size 159 * @throws IllegalArgumentException if <code>maxCodeSize</code> is not bigger than 0 160 */ 161 protected void initializeTables(final int maxCodeSize) { 162 if (maxCodeSize <= 0) { 163 throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize 164 + ", must be bigger than 0"); 165 } 166 final int maxTableSize = 1 << maxCodeSize; 167 prefixes = new int[maxTableSize]; 168 characters = new byte[maxTableSize]; 169 outputStack = new byte[maxTableSize]; 170 outputStackLocation = maxTableSize; 171 final int max = 1 << 8; 172 for (int i = 0; i < max; i++) { 173 prefixes[i] = -1; 174 characters[i] = (byte) i; 175 } 176 } 177 178 /** 179 * Reads the next code from the stream. 180 * @return the next code 181 * @throws IOException on error 182 */ 183 protected int readNextCode() throws IOException { 184 if (codeSize > 31) { 185 throw new IllegalArgumentException("Code size must not be bigger than 31"); 186 } 187 return (int) in.readBits(codeSize); 188 } 189 190 /** 191 * Adds a new entry if the maximum table size hasn't been exceeded 192 * and returns the new index. 193 * @param previousCode the previous code 194 * @param character the character to append 195 * @param maxTableSize the maximum table size 196 * @return the new code or -1 if maxTableSize has been reached already 197 */ 198 protected int addEntry(final int previousCode, final byte character, final int maxTableSize) { 199 if (tableSize < maxTableSize) { 200 prefixes[tableSize] = previousCode; 201 characters[tableSize] = character; 202 return tableSize++; 203 } 204 return -1; 205 } 206 207 /** 208 * Add entry for repeat of previousCode we haven't added, yet. 209 * @return new code for a repeat of the previous code or -1 if 210 * maxTableSize has been reached already 211 * @throws IOException on error 212 */ 213 protected int addRepeatOfPreviousCode() throws IOException { 214 if (previousCode == -1) { 215 // can't have a repeat for the very first code 216 throw new IOException("The first code can't be a reference to its preceding code"); 217 } 218 return addEntry(previousCode, previousCodeFirstChar); 219 } 220 221 /** 222 * Expands the entry with index code to the output stack and may 223 * create a new entry 224 * @param code the code 225 * @param addedUnfinishedEntry whether unfinished entries have been added 226 * @return the new location of the output stack 227 * @throws IOException on error 228 */ 229 protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) 230 throws IOException { 231 for (int entry = code; entry >= 0; entry = prefixes[entry]) { 232 outputStack[--outputStackLocation] = characters[entry]; 233 } 234 if (previousCode != -1 && !addedUnfinishedEntry) { 235 addEntry(previousCode, outputStack[outputStackLocation]); 236 } 237 previousCode = code; 238 previousCodeFirstChar = outputStack[outputStackLocation]; 239 return outputStackLocation; 240 } 241 242 private int readFromStack(final byte[] b, final int off, final int len) { 243 final int remainingInStack = outputStack.length - outputStackLocation; 244 if (remainingInStack > 0) { 245 final int maxLength = Math.min(remainingInStack, len); 246 System.arraycopy(outputStack, outputStackLocation, b, off, maxLength); 247 outputStackLocation += maxLength; 248 return maxLength; 249 } 250 return 0; 251 } 252 253 protected int getCodeSize() { 254 return codeSize; 255 } 256 257 protected void resetCodeSize() { 258 setCodeSize(DEFAULT_CODE_SIZE); 259 } 260 261 protected void setCodeSize(final int cs) { 262 this.codeSize = cs; 263 } 264 265 protected void incrementCodeSize() { 266 codeSize++; 267 } 268 269 protected void resetPreviousCode() { 270 this.previousCode = -1; 271 } 272 273 protected int getPrefix(final int offset) { 274 return prefixes[offset]; 275 } 276 277 protected void setPrefix(final int offset, final int value) { 278 prefixes[offset] = value; 279 } 280 281 protected int getPrefixesLength() { 282 return prefixes.length; 283 } 284 285 protected int getClearCode() { 286 return clearCode; 287 } 288 289 protected int getTableSize() { 290 return tableSize; 291 } 292 293 protected void setTableSize(final int newSize) { 294 tableSize = newSize; 295 } 296 297}