001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.arj; 019 020import java.io.ByteArrayInputStream; 021import java.io.ByteArrayOutputStream; 022import java.io.DataInputStream; 023import java.io.IOException; 024import java.io.InputStream; 025import java.util.ArrayList; 026import java.util.zip.CRC32; 027 028import org.apache.commons.compress.archivers.ArchiveEntry; 029import org.apache.commons.compress.archivers.ArchiveException; 030import org.apache.commons.compress.archivers.ArchiveInputStream; 031import org.apache.commons.compress.utils.BoundedInputStream; 032import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 033import org.apache.commons.compress.utils.IOUtils; 034 035/** 036 * Implements the "arj" archive format as an InputStream. 037 * <p> 038 * <a href="https://github.com/FarGroup/FarManager/blob/master/plugins/multiarc/arc.doc/arj.txt">Reference 1</a> 039 * <br> 040 * <a href="http://www.fileformat.info/format/arj/corion.htm">Reference 2</a> 041 * @NotThreadSafe 042 * @since 1.6 043 */ 044public class ArjArchiveInputStream extends ArchiveInputStream { 045 private static final int ARJ_MAGIC_1 = 0x60; 046 private static final int ARJ_MAGIC_2 = 0xEA; 047 private final DataInputStream in; 048 private final String charsetName; 049 private final MainHeader mainHeader; 050 private LocalFileHeader currentLocalFileHeader = null; 051 private InputStream currentInputStream = null; 052 053 /** 054 * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in. 055 * @param inputStream the underlying stream, whose ownership is taken 056 * @param charsetName the charset used for file names and comments 057 * in the archive. May be {@code null} to use the platform default. 058 * @throws ArchiveException if an exception occurs while reading 059 */ 060 public ArjArchiveInputStream(final InputStream inputStream, 061 final String charsetName) throws ArchiveException { 062 in = new DataInputStream(inputStream); 063 this.charsetName = charsetName; 064 try { 065 mainHeader = readMainHeader(); 066 if ((mainHeader.arjFlags & MainHeader.Flags.GARBLED) != 0) { 067 throw new ArchiveException("Encrypted ARJ files are unsupported"); 068 } 069 if ((mainHeader.arjFlags & MainHeader.Flags.VOLUME) != 0) { 070 throw new ArchiveException("Multi-volume ARJ files are unsupported"); 071 } 072 } catch (final IOException ioException) { 073 throw new ArchiveException(ioException.getMessage(), ioException); 074 } 075 } 076 077 /** 078 * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in, 079 * and using the CP437 character encoding. 080 * @param inputStream the underlying stream, whose ownership is taken 081 * @throws ArchiveException if an exception occurs while reading 082 */ 083 public ArjArchiveInputStream(final InputStream inputStream) 084 throws ArchiveException { 085 this(inputStream, "CP437"); 086 } 087 088 @Override 089 public void close() throws IOException { 090 in.close(); 091 } 092 093 private int read8(final DataInputStream dataIn) throws IOException { 094 final int value = dataIn.readUnsignedByte(); 095 count(1); 096 return value; 097 } 098 099 private int read16(final DataInputStream dataIn) throws IOException { 100 final int value = dataIn.readUnsignedShort(); 101 count(2); 102 return Integer.reverseBytes(value) >>> 16; 103 } 104 105 private int read32(final DataInputStream dataIn) throws IOException { 106 final int value = dataIn.readInt(); 107 count(4); 108 return Integer.reverseBytes(value); 109 } 110 111 private String readString(final DataInputStream dataIn) throws IOException { 112 try (final ByteArrayOutputStream buffer = new ByteArrayOutputStream()) { 113 int nextByte; 114 while ((nextByte = dataIn.readUnsignedByte()) != 0) { 115 buffer.write(nextByte); 116 } 117 if (charsetName != null) { 118 return new String(buffer.toByteArray(), charsetName); 119 } 120 // intentionally using the default encoding as that's the contract for a null charsetName 121 return new String(buffer.toByteArray()); 122 } 123 } 124 125 private void readFully(final DataInputStream dataIn, final byte[] b) 126 throws IOException { 127 dataIn.readFully(b); 128 count(b.length); 129 } 130 131 private byte[] readHeader() throws IOException { 132 boolean found = false; 133 byte[] basicHeaderBytes = null; 134 do { 135 int first = 0; 136 int second = read8(in); 137 do { 138 first = second; 139 second = read8(in); 140 } while (first != ARJ_MAGIC_1 && second != ARJ_MAGIC_2); 141 final int basicHeaderSize = read16(in); 142 if (basicHeaderSize == 0) { 143 // end of archive 144 return null; 145 } 146 if (basicHeaderSize <= 2600) { 147 basicHeaderBytes = new byte[basicHeaderSize]; 148 readFully(in, basicHeaderBytes); 149 final long basicHeaderCrc32 = read32(in) & 0xFFFFFFFFL; 150 final CRC32 crc32 = new CRC32(); 151 crc32.update(basicHeaderBytes); 152 if (basicHeaderCrc32 == crc32.getValue()) { 153 found = true; 154 } 155 } 156 } while (!found); 157 return basicHeaderBytes; 158 } 159 160 private MainHeader readMainHeader() throws IOException { 161 final byte[] basicHeaderBytes = readHeader(); 162 if (basicHeaderBytes == null) { 163 throw new IOException("Archive ends without any headers"); 164 } 165 final DataInputStream basicHeader = new DataInputStream( 166 new ByteArrayInputStream(basicHeaderBytes)); 167 168 final int firstHeaderSize = basicHeader.readUnsignedByte(); 169 final byte[] firstHeaderBytes = new byte[firstHeaderSize - 1]; 170 basicHeader.readFully(firstHeaderBytes); 171 final DataInputStream firstHeader = new DataInputStream( 172 new ByteArrayInputStream(firstHeaderBytes)); 173 174 final MainHeader hdr = new MainHeader(); 175 hdr.archiverVersionNumber = firstHeader.readUnsignedByte(); 176 hdr.minVersionToExtract = firstHeader.readUnsignedByte(); 177 hdr.hostOS = firstHeader.readUnsignedByte(); 178 hdr.arjFlags = firstHeader.readUnsignedByte(); 179 hdr.securityVersion = firstHeader.readUnsignedByte(); 180 hdr.fileType = firstHeader.readUnsignedByte(); 181 hdr.reserved = firstHeader.readUnsignedByte(); 182 hdr.dateTimeCreated = read32(firstHeader); 183 hdr.dateTimeModified = read32(firstHeader); 184 hdr.archiveSize = 0xffffFFFFL & read32(firstHeader); 185 hdr.securityEnvelopeFilePosition = read32(firstHeader); 186 hdr.fileSpecPosition = read16(firstHeader); 187 hdr.securityEnvelopeLength = read16(firstHeader); 188 pushedBackBytes(20); // count has already counted them via readFully 189 hdr.encryptionVersion = firstHeader.readUnsignedByte(); 190 hdr.lastChapter = firstHeader.readUnsignedByte(); 191 192 if (firstHeaderSize >= 33) { 193 hdr.arjProtectionFactor = firstHeader.readUnsignedByte(); 194 hdr.arjFlags2 = firstHeader.readUnsignedByte(); 195 firstHeader.readUnsignedByte(); 196 firstHeader.readUnsignedByte(); 197 } 198 199 hdr.name = readString(basicHeader); 200 hdr.comment = readString(basicHeader); 201 202 final int extendedHeaderSize = read16(in); 203 if (extendedHeaderSize > 0) { 204 hdr.extendedHeaderBytes = new byte[extendedHeaderSize]; 205 readFully(in, hdr.extendedHeaderBytes); 206 final long extendedHeaderCrc32 = 0xffffFFFFL & read32(in); 207 final CRC32 crc32 = new CRC32(); 208 crc32.update(hdr.extendedHeaderBytes); 209 if (extendedHeaderCrc32 != crc32.getValue()) { 210 throw new IOException("Extended header CRC32 verification failure"); 211 } 212 } 213 214 return hdr; 215 } 216 217 private LocalFileHeader readLocalFileHeader() throws IOException { 218 final byte[] basicHeaderBytes = readHeader(); 219 if (basicHeaderBytes == null) { 220 return null; 221 } 222 try (final DataInputStream basicHeader = new DataInputStream(new ByteArrayInputStream(basicHeaderBytes))) { 223 224 final int firstHeaderSize = basicHeader.readUnsignedByte(); 225 final byte[] firstHeaderBytes = new byte[firstHeaderSize - 1]; 226 basicHeader.readFully(firstHeaderBytes); 227 try (final DataInputStream firstHeader = new DataInputStream(new ByteArrayInputStream(firstHeaderBytes))) { 228 229 final LocalFileHeader localFileHeader = new LocalFileHeader(); 230 localFileHeader.archiverVersionNumber = firstHeader.readUnsignedByte(); 231 localFileHeader.minVersionToExtract = firstHeader.readUnsignedByte(); 232 localFileHeader.hostOS = firstHeader.readUnsignedByte(); 233 localFileHeader.arjFlags = firstHeader.readUnsignedByte(); 234 localFileHeader.method = firstHeader.readUnsignedByte(); 235 localFileHeader.fileType = firstHeader.readUnsignedByte(); 236 localFileHeader.reserved = firstHeader.readUnsignedByte(); 237 localFileHeader.dateTimeModified = read32(firstHeader); 238 localFileHeader.compressedSize = 0xffffFFFFL & read32(firstHeader); 239 localFileHeader.originalSize = 0xffffFFFFL & read32(firstHeader); 240 localFileHeader.originalCrc32 = 0xffffFFFFL & read32(firstHeader); 241 localFileHeader.fileSpecPosition = read16(firstHeader); 242 localFileHeader.fileAccessMode = read16(firstHeader); 243 pushedBackBytes(20); 244 localFileHeader.firstChapter = firstHeader.readUnsignedByte(); 245 localFileHeader.lastChapter = firstHeader.readUnsignedByte(); 246 247 readExtraData(firstHeaderSize, firstHeader, localFileHeader); 248 249 localFileHeader.name = readString(basicHeader); 250 localFileHeader.comment = readString(basicHeader); 251 252 final ArrayList<byte[]> extendedHeaders = new ArrayList<>(); 253 int extendedHeaderSize; 254 while ((extendedHeaderSize = read16(in)) > 0) { 255 final byte[] extendedHeaderBytes = new byte[extendedHeaderSize]; 256 readFully(in, extendedHeaderBytes); 257 final long extendedHeaderCrc32 = 0xffffFFFFL & read32(in); 258 final CRC32 crc32 = new CRC32(); 259 crc32.update(extendedHeaderBytes); 260 if (extendedHeaderCrc32 != crc32.getValue()) { 261 throw new IOException("Extended header CRC32 verification failure"); 262 } 263 extendedHeaders.add(extendedHeaderBytes); 264 } 265 localFileHeader.extendedHeaders = extendedHeaders.toArray(new byte[0][]); 266 267 return localFileHeader; 268 } 269 } 270 } 271 272 private void readExtraData(final int firstHeaderSize, final DataInputStream firstHeader, 273 final LocalFileHeader localFileHeader) throws IOException { 274 if (firstHeaderSize >= 33) { 275 localFileHeader.extendedFilePosition = read32(firstHeader); 276 if (firstHeaderSize >= 45) { 277 localFileHeader.dateTimeAccessed = read32(firstHeader); 278 localFileHeader.dateTimeCreated = read32(firstHeader); 279 localFileHeader.originalSizeEvenForVolumes = read32(firstHeader); 280 pushedBackBytes(12); 281 } 282 pushedBackBytes(4); 283 } 284 } 285 286 /** 287 * Checks if the signature matches what is expected for an arj file. 288 * 289 * @param signature 290 * the bytes to check 291 * @param length 292 * the number of bytes to check 293 * @return true, if this stream is an arj archive stream, false otherwise 294 */ 295 public static boolean matches(final byte[] signature, final int length) { 296 return length >= 2 && 297 (0xff & signature[0]) == ARJ_MAGIC_1 && 298 (0xff & signature[1]) == ARJ_MAGIC_2; 299 } 300 301 /** 302 * Gets the archive's recorded name. 303 * @return the archive's name 304 */ 305 public String getArchiveName() { 306 return mainHeader.name; 307 } 308 309 /** 310 * Gets the archive's comment. 311 * @return the archive's comment 312 */ 313 public String getArchiveComment() { 314 return mainHeader.comment; 315 } 316 317 @Override 318 public ArjArchiveEntry getNextEntry() throws IOException { 319 if (currentInputStream != null) { 320 // return value ignored as IOUtils.skip ensures the stream is drained completely 321 IOUtils.skip(currentInputStream, Long.MAX_VALUE); 322 currentInputStream.close(); 323 currentLocalFileHeader = null; 324 currentInputStream = null; 325 } 326 327 currentLocalFileHeader = readLocalFileHeader(); 328 if (currentLocalFileHeader != null) { 329 currentInputStream = new BoundedInputStream(in, currentLocalFileHeader.compressedSize); 330 if (currentLocalFileHeader.method == LocalFileHeader.Methods.STORED) { 331 currentInputStream = new CRC32VerifyingInputStream(currentInputStream, 332 currentLocalFileHeader.originalSize, currentLocalFileHeader.originalCrc32); 333 } 334 return new ArjArchiveEntry(currentLocalFileHeader); 335 } 336 currentInputStream = null; 337 return null; 338 } 339 340 @Override 341 public boolean canReadEntryData(final ArchiveEntry ae) { 342 return ae instanceof ArjArchiveEntry 343 && ((ArjArchiveEntry) ae).getMethod() == LocalFileHeader.Methods.STORED; 344 } 345 346 @Override 347 public int read(final byte[] b, final int off, final int len) throws IOException { 348 if (len == 0) { 349 return 0; 350 } 351 if (currentLocalFileHeader == null) { 352 throw new IllegalStateException("No current arj entry"); 353 } 354 if (currentLocalFileHeader.method != LocalFileHeader.Methods.STORED) { 355 throw new IOException("Unsupported compression method " + currentLocalFileHeader.method); 356 } 357 return currentInputStream.read(b, off, len); 358 } 359}