001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.arj;
019
020import java.io.ByteArrayInputStream;
021import java.io.ByteArrayOutputStream;
022import java.io.DataInputStream;
023import java.io.IOException;
024import java.io.InputStream;
025import java.util.ArrayList;
026import java.util.zip.CRC32;
027
028import org.apache.commons.compress.archivers.ArchiveEntry;
029import org.apache.commons.compress.archivers.ArchiveException;
030import org.apache.commons.compress.archivers.ArchiveInputStream;
031import org.apache.commons.compress.utils.BoundedInputStream;
032import org.apache.commons.compress.utils.CRC32VerifyingInputStream;
033import org.apache.commons.compress.utils.IOUtils;
034
035/**
036 * Implements the "arj" archive format as an InputStream.
037 * <p>
038 * <a href="https://github.com/FarGroup/FarManager/blob/master/plugins/multiarc/arc.doc/arj.txt">Reference 1</a>
039 * <br>
040 * <a href="http://www.fileformat.info/format/arj/corion.htm">Reference 2</a>
041 * @NotThreadSafe
042 * @since 1.6
043 */
044public class ArjArchiveInputStream extends ArchiveInputStream {
045    private static final int ARJ_MAGIC_1 = 0x60;
046    private static final int ARJ_MAGIC_2 = 0xEA;
047    private final DataInputStream in;
048    private final String charsetName;
049    private final MainHeader mainHeader;
050    private LocalFileHeader currentLocalFileHeader = null;
051    private InputStream currentInputStream = null;
052
053    /**
054     * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in.
055     * @param inputStream the underlying stream, whose ownership is taken
056     * @param charsetName the charset used for file names and comments
057     *   in the archive. May be {@code null} to use the platform default.
058     * @throws ArchiveException if an exception occurs while reading
059     */
060    public ArjArchiveInputStream(final InputStream inputStream,
061            final String charsetName) throws ArchiveException {
062        in = new DataInputStream(inputStream);
063        this.charsetName = charsetName;
064        try {
065            mainHeader = readMainHeader();
066            if ((mainHeader.arjFlags & MainHeader.Flags.GARBLED) != 0) {
067                throw new ArchiveException("Encrypted ARJ files are unsupported");
068            }
069            if ((mainHeader.arjFlags & MainHeader.Flags.VOLUME) != 0) {
070                throw new ArchiveException("Multi-volume ARJ files are unsupported");
071            }
072        } catch (final IOException ioException) {
073            throw new ArchiveException(ioException.getMessage(), ioException);
074        }
075    }
076
077    /**
078     * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in,
079     * and using the CP437 character encoding.
080     * @param inputStream the underlying stream, whose ownership is taken
081     * @throws ArchiveException if an exception occurs while reading
082     */
083    public ArjArchiveInputStream(final InputStream inputStream)
084            throws ArchiveException {
085        this(inputStream, "CP437");
086    }
087
088    @Override
089    public void close() throws IOException {
090        in.close();
091    }
092
093    private int read8(final DataInputStream dataIn) throws IOException {
094        final int value = dataIn.readUnsignedByte();
095        count(1);
096        return value;
097    }
098
099    private int read16(final DataInputStream dataIn) throws IOException {
100        final int value = dataIn.readUnsignedShort();
101        count(2);
102        return Integer.reverseBytes(value) >>> 16;
103    }
104
105    private int read32(final DataInputStream dataIn) throws IOException {
106        final int value = dataIn.readInt();
107        count(4);
108        return Integer.reverseBytes(value);
109    }
110
111    private String readString(final DataInputStream dataIn) throws IOException {
112        try (final ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
113            int nextByte;
114            while ((nextByte = dataIn.readUnsignedByte()) != 0) {
115                buffer.write(nextByte);
116            }
117            if (charsetName != null) {
118                return new String(buffer.toByteArray(), charsetName);
119            }
120            // intentionally using the default encoding as that's the contract for a null charsetName
121            return new String(buffer.toByteArray());
122        }
123    }
124
125    private void readFully(final DataInputStream dataIn, final byte[] b)
126        throws IOException {
127        dataIn.readFully(b);
128        count(b.length);
129    }
130
131    private byte[] readHeader() throws IOException {
132        boolean found = false;
133        byte[] basicHeaderBytes = null;
134        do {
135            int first = 0;
136            int second = read8(in);
137            do {
138                first = second;
139                second = read8(in);
140            } while (first != ARJ_MAGIC_1 && second != ARJ_MAGIC_2);
141            final int basicHeaderSize = read16(in);
142            if (basicHeaderSize == 0) {
143                // end of archive
144                return null;
145            }
146            if (basicHeaderSize <= 2600) {
147                basicHeaderBytes = new byte[basicHeaderSize];
148                readFully(in, basicHeaderBytes);
149                final long basicHeaderCrc32 = read32(in) & 0xFFFFFFFFL;
150                final CRC32 crc32 = new CRC32();
151                crc32.update(basicHeaderBytes);
152                if (basicHeaderCrc32 == crc32.getValue()) {
153                    found = true;
154                }
155            }
156        } while (!found);
157        return basicHeaderBytes;
158    }
159
160    private MainHeader readMainHeader() throws IOException {
161        final byte[] basicHeaderBytes = readHeader();
162        if (basicHeaderBytes == null) {
163            throw new IOException("Archive ends without any headers");
164        }
165        final DataInputStream basicHeader = new DataInputStream(
166                new ByteArrayInputStream(basicHeaderBytes));
167
168        final int firstHeaderSize = basicHeader.readUnsignedByte();
169        final byte[] firstHeaderBytes = new byte[firstHeaderSize - 1];
170        basicHeader.readFully(firstHeaderBytes);
171        final DataInputStream firstHeader = new DataInputStream(
172                new ByteArrayInputStream(firstHeaderBytes));
173
174        final MainHeader hdr = new MainHeader();
175        hdr.archiverVersionNumber = firstHeader.readUnsignedByte();
176        hdr.minVersionToExtract = firstHeader.readUnsignedByte();
177        hdr.hostOS = firstHeader.readUnsignedByte();
178        hdr.arjFlags = firstHeader.readUnsignedByte();
179        hdr.securityVersion = firstHeader.readUnsignedByte();
180        hdr.fileType = firstHeader.readUnsignedByte();
181        hdr.reserved = firstHeader.readUnsignedByte();
182        hdr.dateTimeCreated = read32(firstHeader);
183        hdr.dateTimeModified = read32(firstHeader);
184        hdr.archiveSize = 0xffffFFFFL & read32(firstHeader);
185        hdr.securityEnvelopeFilePosition = read32(firstHeader);
186        hdr.fileSpecPosition = read16(firstHeader);
187        hdr.securityEnvelopeLength = read16(firstHeader);
188        pushedBackBytes(20); // count has already counted them via readFully
189        hdr.encryptionVersion = firstHeader.readUnsignedByte();
190        hdr.lastChapter = firstHeader.readUnsignedByte();
191
192        if (firstHeaderSize >= 33) {
193            hdr.arjProtectionFactor = firstHeader.readUnsignedByte();
194            hdr.arjFlags2 = firstHeader.readUnsignedByte();
195            firstHeader.readUnsignedByte();
196            firstHeader.readUnsignedByte();
197        }
198
199        hdr.name = readString(basicHeader);
200        hdr.comment = readString(basicHeader);
201
202        final  int extendedHeaderSize = read16(in);
203        if (extendedHeaderSize > 0) {
204            hdr.extendedHeaderBytes = new byte[extendedHeaderSize];
205            readFully(in, hdr.extendedHeaderBytes);
206            final long extendedHeaderCrc32 = 0xffffFFFFL & read32(in);
207            final CRC32 crc32 = new CRC32();
208            crc32.update(hdr.extendedHeaderBytes);
209            if (extendedHeaderCrc32 != crc32.getValue()) {
210                throw new IOException("Extended header CRC32 verification failure");
211            }
212        }
213
214        return hdr;
215    }
216
217    private LocalFileHeader readLocalFileHeader() throws IOException {
218        final byte[] basicHeaderBytes = readHeader();
219        if (basicHeaderBytes == null) {
220            return null;
221        }
222        try (final DataInputStream basicHeader = new DataInputStream(new ByteArrayInputStream(basicHeaderBytes))) {
223
224            final int firstHeaderSize = basicHeader.readUnsignedByte();
225            final byte[] firstHeaderBytes = new byte[firstHeaderSize - 1];
226            basicHeader.readFully(firstHeaderBytes);
227            try (final DataInputStream firstHeader = new DataInputStream(new ByteArrayInputStream(firstHeaderBytes))) {
228
229                final LocalFileHeader localFileHeader = new LocalFileHeader();
230                localFileHeader.archiverVersionNumber = firstHeader.readUnsignedByte();
231                localFileHeader.minVersionToExtract = firstHeader.readUnsignedByte();
232                localFileHeader.hostOS = firstHeader.readUnsignedByte();
233                localFileHeader.arjFlags = firstHeader.readUnsignedByte();
234                localFileHeader.method = firstHeader.readUnsignedByte();
235                localFileHeader.fileType = firstHeader.readUnsignedByte();
236                localFileHeader.reserved = firstHeader.readUnsignedByte();
237                localFileHeader.dateTimeModified = read32(firstHeader);
238                localFileHeader.compressedSize = 0xffffFFFFL & read32(firstHeader);
239                localFileHeader.originalSize = 0xffffFFFFL & read32(firstHeader);
240                localFileHeader.originalCrc32 = 0xffffFFFFL & read32(firstHeader);
241                localFileHeader.fileSpecPosition = read16(firstHeader);
242                localFileHeader.fileAccessMode = read16(firstHeader);
243                pushedBackBytes(20);
244                localFileHeader.firstChapter = firstHeader.readUnsignedByte();
245                localFileHeader.lastChapter = firstHeader.readUnsignedByte();
246
247                readExtraData(firstHeaderSize, firstHeader, localFileHeader);
248
249                localFileHeader.name = readString(basicHeader);
250                localFileHeader.comment = readString(basicHeader);
251
252                final ArrayList<byte[]> extendedHeaders = new ArrayList<>();
253                int extendedHeaderSize;
254                while ((extendedHeaderSize = read16(in)) > 0) {
255                    final byte[] extendedHeaderBytes = new byte[extendedHeaderSize];
256                    readFully(in, extendedHeaderBytes);
257                    final long extendedHeaderCrc32 = 0xffffFFFFL & read32(in);
258                    final CRC32 crc32 = new CRC32();
259                    crc32.update(extendedHeaderBytes);
260                    if (extendedHeaderCrc32 != crc32.getValue()) {
261                        throw new IOException("Extended header CRC32 verification failure");
262                    }
263                    extendedHeaders.add(extendedHeaderBytes);
264                }
265                localFileHeader.extendedHeaders = extendedHeaders.toArray(new byte[0][]);
266
267                return localFileHeader;
268            }
269        }
270    }
271
272    private void readExtraData(final int firstHeaderSize, final DataInputStream firstHeader,
273                               final LocalFileHeader localFileHeader) throws IOException {
274        if (firstHeaderSize >= 33) {
275            localFileHeader.extendedFilePosition = read32(firstHeader);
276            if (firstHeaderSize >= 45) {
277                localFileHeader.dateTimeAccessed = read32(firstHeader);
278                localFileHeader.dateTimeCreated = read32(firstHeader);
279                localFileHeader.originalSizeEvenForVolumes = read32(firstHeader);
280                pushedBackBytes(12);
281            }
282            pushedBackBytes(4);
283        }
284    }
285
286    /**
287     * Checks if the signature matches what is expected for an arj file.
288     *
289     * @param signature
290     *            the bytes to check
291     * @param length
292     *            the number of bytes to check
293     * @return true, if this stream is an arj archive stream, false otherwise
294     */
295    public static boolean matches(final byte[] signature, final int length) {
296        return length >= 2 &&
297                (0xff & signature[0]) == ARJ_MAGIC_1 &&
298                (0xff & signature[1]) == ARJ_MAGIC_2;
299    }
300
301    /**
302     * Gets the archive's recorded name.
303     * @return the archive's name
304     */
305    public String getArchiveName() {
306        return mainHeader.name;
307    }
308
309    /**
310     * Gets the archive's comment.
311     * @return the archive's comment
312     */
313    public String getArchiveComment() {
314        return mainHeader.comment;
315    }
316
317    @Override
318    public ArjArchiveEntry getNextEntry() throws IOException {
319        if (currentInputStream != null) {
320            // return value ignored as IOUtils.skip ensures the stream is drained completely
321            IOUtils.skip(currentInputStream, Long.MAX_VALUE);
322            currentInputStream.close();
323            currentLocalFileHeader = null;
324            currentInputStream = null;
325        }
326
327        currentLocalFileHeader = readLocalFileHeader();
328        if (currentLocalFileHeader != null) {
329            currentInputStream = new BoundedInputStream(in, currentLocalFileHeader.compressedSize);
330            if (currentLocalFileHeader.method == LocalFileHeader.Methods.STORED) {
331                currentInputStream = new CRC32VerifyingInputStream(currentInputStream,
332                        currentLocalFileHeader.originalSize, currentLocalFileHeader.originalCrc32);
333            }
334            return new ArjArchiveEntry(currentLocalFileHeader);
335        }
336        currentInputStream = null;
337        return null;
338    }
339
340    @Override
341    public boolean canReadEntryData(final ArchiveEntry ae) {
342        return ae instanceof ArjArchiveEntry
343            && ((ArjArchiveEntry) ae).getMethod() == LocalFileHeader.Methods.STORED;
344    }
345
346    @Override
347    public int read(final byte[] b, final int off, final int len) throws IOException {
348        if (len == 0) {
349            return 0;
350        }
351        if (currentLocalFileHeader == null) {
352            throw new IllegalStateException("No current arj entry");
353        }
354        if (currentLocalFileHeader.method != LocalFileHeader.Methods.STORED) {
355            throw new IOException("Unsupported compression method " + currentLocalFileHeader.method);
356        }
357        return currentInputStream.read(b, off, len);
358    }
359}