001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.cpio;
020
021import java.io.EOFException;
022import java.io.IOException;
023import java.io.InputStream;
024
025import org.apache.commons.compress.archivers.ArchiveEntry;
026import org.apache.commons.compress.archivers.ArchiveInputStream;
027import org.apache.commons.compress.archivers.zip.ZipEncoding;
028import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
029import org.apache.commons.compress.utils.ArchiveUtils;
030import org.apache.commons.compress.utils.CharsetNames;
031import org.apache.commons.compress.utils.IOUtils;
032
033/**
034 * CPIOArchiveInputStream is a stream for reading cpio streams. All formats of
035 * cpio are supported (old ascii, old binary, new portable format and the new
036 * portable format with crc).
037 *
038 * <p>
039 * The stream can be read by extracting a cpio entry (containing all
040 * informations about a entry) and afterwards reading from the stream the file
041 * specified by the entry.
042 * </p>
043 * <pre>
044 * CPIOArchiveInputStream cpioIn = new CPIOArchiveInputStream(
045 *         new FileInputStream(new File(&quot;test.cpio&quot;)));
046 * CPIOArchiveEntry cpioEntry;
047 *
048 * while ((cpioEntry = cpioIn.getNextEntry()) != null) {
049 *     System.out.println(cpioEntry.getName());
050 *     int tmp;
051 *     StringBuilder buf = new StringBuilder();
052 *     while ((tmp = cpIn.read()) != -1) {
053 *         buf.append((char) tmp);
054 *     }
055 *     System.out.println(buf.toString());
056 * }
057 * cpioIn.close();
058 * </pre>
059 * <p>
060 * Note: This implementation should be compatible to cpio 2.5
061 * 
062 * <p>This class uses mutable fields and is not considered to be threadsafe.
063 * 
064 * <p>Based on code from the jRPM project (jrpm.sourceforge.net)
065 */
066
067public class CpioArchiveInputStream extends ArchiveInputStream implements
068        CpioConstants {
069
070    private boolean closed = false;
071
072    private CpioArchiveEntry entry;
073
074    private long entryBytesRead = 0;
075
076    private boolean entryEOF = false;
077
078    private final byte tmpbuf[] = new byte[4096];
079
080    private long crc = 0;
081
082    private final InputStream in;
083
084    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
085    private final byte[] twoBytesBuf = new byte[2];
086    private final byte[] fourBytesBuf = new byte[4];
087    private final byte[] sixBytesBuf = new byte[6];
088
089    private final int blockSize;
090
091    /**
092     * The encoding to use for filenames and labels.
093     */
094    private final ZipEncoding zipEncoding;
095
096    // the provided encoding (for unit tests)
097    final String encoding;
098
099    /**
100     * Construct the cpio input stream with a blocksize of {@link
101     * CpioConstants#BLOCK_SIZE BLOCK_SIZE} and expecting ASCII file
102     * names.
103     * 
104     * @param in
105     *            The cpio stream
106     */
107    public CpioArchiveInputStream(final InputStream in) {
108        this(in, BLOCK_SIZE, CharsetNames.US_ASCII);
109    }
110
111    /**
112     * Construct the cpio input stream with a blocksize of {@link
113     * CpioConstants#BLOCK_SIZE BLOCK_SIZE}.
114     * 
115     * @param in
116     *            The cpio stream
117     * @param encoding
118     *            The encoding of file names to expect - use null for
119     *            the platform's default.
120     * @since 1.6
121     */
122    public CpioArchiveInputStream(final InputStream in, final String encoding) {
123        this(in, BLOCK_SIZE, encoding);
124    }
125
126    /**
127     * Construct the cpio input stream with a blocksize of {@link
128     * CpioConstants#BLOCK_SIZE BLOCK_SIZE} expecting ASCII file
129     * names.
130     * 
131     * @param in
132     *            The cpio stream
133     * @param blockSize
134     *            The block size of the archive.
135     * @since 1.5
136     */
137    public CpioArchiveInputStream(final InputStream in, final int blockSize) {
138        this(in, blockSize, CharsetNames.US_ASCII);
139    }
140
141    /**
142     * Construct the cpio input stream with a blocksize of {@link CpioConstants#BLOCK_SIZE BLOCK_SIZE}.
143     * 
144     * @param in
145     *            The cpio stream
146     * @param blockSize
147     *            The block size of the archive.
148     * @param encoding
149     *            The encoding of file names to expect - use null for
150     *            the platform's default.
151     * @since 1.6
152     */
153    public CpioArchiveInputStream(final InputStream in, final int blockSize, final String encoding) {
154        this.in = in;
155        this.blockSize = blockSize;
156        this.encoding = encoding;
157        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
158    }
159
160    /**
161     * Returns 0 after EOF has reached for the current entry data, otherwise
162     * always return 1.
163     * <p>
164     * Programs should not count on this method to return the actual number of
165     * bytes that could be read without blocking.
166     * 
167     * @return 1 before EOF and 0 after EOF has reached for current entry.
168     * @throws IOException
169     *             if an I/O error has occurred or if a CPIO file error has
170     *             occurred
171     */
172    @Override
173    public int available() throws IOException {
174        ensureOpen();
175        if (this.entryEOF) {
176            return 0;
177        }
178        return 1;
179    }
180
181    /**
182     * Closes the CPIO input stream.
183     * 
184     * @throws IOException
185     *             if an I/O error has occurred
186     */
187    @Override
188    public void close() throws IOException {
189        if (!this.closed) {
190            in.close();
191            this.closed = true;
192        }
193    }
194
195    /**
196     * Closes the current CPIO entry and positions the stream for reading the
197     * next entry.
198     * 
199     * @throws IOException
200     *             if an I/O error has occurred or if a CPIO file error has
201     *             occurred
202     */
203    private void closeEntry() throws IOException {
204        // the skip implementation of this class will not skip more
205        // than Integer.MAX_VALUE bytes
206        while (skip((long) Integer.MAX_VALUE) == Integer.MAX_VALUE) { // NOPMD
207            // do nothing
208        }
209    }
210
211    /**
212     * Check to make sure that this stream has not been closed
213     * 
214     * @throws IOException
215     *             if the stream is already closed
216     */
217    private void ensureOpen() throws IOException {
218        if (this.closed) {
219            throw new IOException("Stream closed");
220        }
221    }
222
223    /**
224     * Reads the next CPIO file entry and positions stream at the beginning of
225     * the entry data.
226     * 
227     * @return the CPIOArchiveEntry just read
228     * @throws IOException
229     *             if an I/O error has occurred or if a CPIO file error has
230     *             occurred
231     */
232    public CpioArchiveEntry getNextCPIOEntry() throws IOException {
233        ensureOpen();
234        if (this.entry != null) {
235            closeEntry();
236        }
237        readFully(twoBytesBuf, 0, twoBytesBuf.length);
238        if (CpioUtil.byteArray2long(twoBytesBuf, false) == MAGIC_OLD_BINARY) {
239            this.entry = readOldBinaryEntry(false);
240        } else if (CpioUtil.byteArray2long(twoBytesBuf, true)
241                   == MAGIC_OLD_BINARY) {
242            this.entry = readOldBinaryEntry(true);
243        } else {
244            System.arraycopy(twoBytesBuf, 0, sixBytesBuf, 0,
245                             twoBytesBuf.length);
246            readFully(sixBytesBuf, twoBytesBuf.length,
247                      fourBytesBuf.length);
248            final String magicString = ArchiveUtils.toAsciiString(sixBytesBuf);
249            switch (magicString) {
250                case MAGIC_NEW:
251                    this.entry = readNewEntry(false);
252                    break;
253                case MAGIC_NEW_CRC:
254                    this.entry = readNewEntry(true);
255                    break;
256                case MAGIC_OLD_ASCII:
257                    this.entry = readOldAsciiEntry();
258                    break;
259                default:
260                    throw new IOException("Unknown magic [" + magicString + "]. Occured at byte: " + getBytesRead());
261            }
262        }
263
264        this.entryBytesRead = 0;
265        this.entryEOF = false;
266        this.crc = 0;
267
268        if (this.entry.getName().equals(CPIO_TRAILER)) {
269            this.entryEOF = true;
270            skipRemainderOfLastBlock();
271            return null;
272        }
273        return this.entry;
274    }
275
276    private void skip(final int bytes) throws IOException{
277        // bytes cannot be more than 3 bytes
278        if (bytes > 0) {
279            readFully(fourBytesBuf, 0, bytes);
280        }
281    }
282
283    /**
284     * Reads from the current CPIO entry into an array of bytes. Blocks until
285     * some input is available.
286     * 
287     * @param b
288     *            the buffer into which the data is read
289     * @param off
290     *            the start offset of the data
291     * @param len
292     *            the maximum number of bytes read
293     * @return the actual number of bytes read, or -1 if the end of the entry is
294     *         reached
295     * @throws IOException
296     *             if an I/O error has occurred or if a CPIO file error has
297     *             occurred
298     */
299    @Override
300    public int read(final byte[] b, final int off, final int len)
301            throws IOException {
302        ensureOpen();
303        if (off < 0 || len < 0 || off > b.length - len) {
304            throw new IndexOutOfBoundsException();
305        } else if (len == 0) {
306            return 0;
307        }
308
309        if (this.entry == null || this.entryEOF) {
310            return -1;
311        }
312        if (this.entryBytesRead == this.entry.getSize()) {
313            skip(entry.getDataPadCount());
314            this.entryEOF = true;
315            if (this.entry.getFormat() == FORMAT_NEW_CRC
316                && this.crc != this.entry.getChksum()) {
317                throw new IOException("CRC Error. Occured at byte: "
318                                      + getBytesRead());
319            }
320            return -1; // EOF for this entry
321        }
322        final int tmplength = (int) Math.min(len, this.entry.getSize()
323                - this.entryBytesRead);
324        if (tmplength < 0) {
325            return -1;
326        }
327
328        final int tmpread = readFully(b, off, tmplength);
329        if (this.entry.getFormat() == FORMAT_NEW_CRC) {
330            for (int pos = 0; pos < tmpread; pos++) {
331                this.crc += b[pos] & 0xFF;
332            }
333        }
334        this.entryBytesRead += tmpread;
335
336        return tmpread;
337    }
338
339    private final int readFully(final byte[] b, final int off, final int len)
340            throws IOException {
341        final int count = IOUtils.readFully(in, b, off, len);
342        count(count);
343        if (count < len) {
344            throw new EOFException();
345        }
346        return count;
347    }
348
349    private long readBinaryLong(final int length, final boolean swapHalfWord)
350            throws IOException {
351        final byte tmp[] = new byte[length];
352        readFully(tmp, 0, tmp.length);
353        return CpioUtil.byteArray2long(tmp, swapHalfWord);
354    }
355
356    private long readAsciiLong(final int length, final int radix)
357            throws IOException {
358        final byte tmpBuffer[] = new byte[length];
359        readFully(tmpBuffer, 0, tmpBuffer.length);
360        return Long.parseLong(ArchiveUtils.toAsciiString(tmpBuffer), radix);
361    }
362
363    private CpioArchiveEntry readNewEntry(final boolean hasCrc)
364            throws IOException {
365        CpioArchiveEntry ret;
366        if (hasCrc) {
367            ret = new CpioArchiveEntry(FORMAT_NEW_CRC);
368        } else {
369            ret = new CpioArchiveEntry(FORMAT_NEW);
370        }
371
372        ret.setInode(readAsciiLong(8, 16));
373        final long mode = readAsciiLong(8, 16);
374        if (CpioUtil.fileType(mode) != 0){ // mode is initialised to 0
375            ret.setMode(mode);
376        }
377        ret.setUID(readAsciiLong(8, 16));
378        ret.setGID(readAsciiLong(8, 16));
379        ret.setNumberOfLinks(readAsciiLong(8, 16));
380        ret.setTime(readAsciiLong(8, 16));
381        ret.setSize(readAsciiLong(8, 16));
382        ret.setDeviceMaj(readAsciiLong(8, 16));
383        ret.setDeviceMin(readAsciiLong(8, 16));
384        ret.setRemoteDeviceMaj(readAsciiLong(8, 16));
385        ret.setRemoteDeviceMin(readAsciiLong(8, 16));
386        final long namesize = readAsciiLong(8, 16);
387        ret.setChksum(readAsciiLong(8, 16));
388        final String name = readCString((int) namesize);
389        ret.setName(name);
390        if (CpioUtil.fileType(mode) == 0 && !name.equals(CPIO_TRAILER)){
391            throw new IOException("Mode 0 only allowed in the trailer. Found entry name: "
392                                  + ArchiveUtils.sanitize(name)
393                                  + " Occured at byte: " + getBytesRead());
394        }
395        skip(ret.getHeaderPadCount());
396
397        return ret;
398    }
399
400    private CpioArchiveEntry readOldAsciiEntry() throws IOException {
401        final CpioArchiveEntry ret = new CpioArchiveEntry(FORMAT_OLD_ASCII);
402
403        ret.setDevice(readAsciiLong(6, 8));
404        ret.setInode(readAsciiLong(6, 8));
405        final long mode = readAsciiLong(6, 8);
406        if (CpioUtil.fileType(mode) != 0) {
407            ret.setMode(mode);
408        }
409        ret.setUID(readAsciiLong(6, 8));
410        ret.setGID(readAsciiLong(6, 8));
411        ret.setNumberOfLinks(readAsciiLong(6, 8));
412        ret.setRemoteDevice(readAsciiLong(6, 8));
413        ret.setTime(readAsciiLong(11, 8));
414        final long namesize = readAsciiLong(6, 8);
415        ret.setSize(readAsciiLong(11, 8));
416        final String name = readCString((int) namesize);
417        ret.setName(name);
418        if (CpioUtil.fileType(mode) == 0 && !name.equals(CPIO_TRAILER)){
419            throw new IOException("Mode 0 only allowed in the trailer. Found entry: "
420                                  + ArchiveUtils.sanitize(name)
421                                  + " Occured at byte: " + getBytesRead());
422        }
423
424        return ret;
425    }
426
427    private CpioArchiveEntry readOldBinaryEntry(final boolean swapHalfWord)
428            throws IOException {
429        final CpioArchiveEntry ret = new CpioArchiveEntry(FORMAT_OLD_BINARY);
430
431        ret.setDevice(readBinaryLong(2, swapHalfWord));
432        ret.setInode(readBinaryLong(2, swapHalfWord));
433        final long mode = readBinaryLong(2, swapHalfWord);
434        if (CpioUtil.fileType(mode) != 0){
435            ret.setMode(mode);
436        }
437        ret.setUID(readBinaryLong(2, swapHalfWord));
438        ret.setGID(readBinaryLong(2, swapHalfWord));
439        ret.setNumberOfLinks(readBinaryLong(2, swapHalfWord));
440        ret.setRemoteDevice(readBinaryLong(2, swapHalfWord));
441        ret.setTime(readBinaryLong(4, swapHalfWord));
442        final long namesize = readBinaryLong(2, swapHalfWord);
443        ret.setSize(readBinaryLong(4, swapHalfWord));
444        final String name = readCString((int) namesize);
445        ret.setName(name);
446        if (CpioUtil.fileType(mode) == 0 && !name.equals(CPIO_TRAILER)){
447            throw new IOException("Mode 0 only allowed in the trailer. Found entry: "
448                                  + ArchiveUtils.sanitize(name)
449                                  + "Occured at byte: " + getBytesRead());
450        }
451        skip(ret.getHeaderPadCount());
452
453        return ret;
454    }
455
456    private String readCString(final int length) throws IOException {
457        // don't include trailing NUL in file name to decode
458        final byte tmpBuffer[] = new byte[length - 1];
459        readFully(tmpBuffer, 0, tmpBuffer.length);
460        this.in.read();
461        return zipEncoding.decode(tmpBuffer);
462    }
463
464    /**
465     * Skips specified number of bytes in the current CPIO entry.
466     * 
467     * @param n
468     *            the number of bytes to skip
469     * @return the actual number of bytes skipped
470     * @throws IOException
471     *             if an I/O error has occurred
472     * @throws IllegalArgumentException
473     *             if n &lt; 0
474     */
475    @Override
476    public long skip(final long n) throws IOException {
477        if (n < 0) {
478            throw new IllegalArgumentException("negative skip length");
479        }
480        ensureOpen();
481        final int max = (int) Math.min(n, Integer.MAX_VALUE);
482        int total = 0;
483
484        while (total < max) {
485            int len = max - total;
486            if (len > this.tmpbuf.length) {
487                len = this.tmpbuf.length;
488            }
489            len = read(this.tmpbuf, 0, len);
490            if (len == -1) {
491                this.entryEOF = true;
492                break;
493            }
494            total += len;
495        }
496        return total;
497    }
498
499    @Override
500    public ArchiveEntry getNextEntry() throws IOException {
501        return getNextCPIOEntry();
502    }
503
504    /**
505     * Skips the padding zeros written after the TRAILER!!! entry.
506     */
507    private void skipRemainderOfLastBlock() throws IOException {
508        final long readFromLastBlock = getBytesRead() % blockSize;
509        long remainingBytes = readFromLastBlock == 0 ? 0
510            : blockSize - readFromLastBlock;
511        while (remainingBytes > 0) {
512            final long skipped = skip(blockSize - readFromLastBlock);
513            if (skipped <= 0) {
514                break;
515            }
516            remainingBytes -= skipped;
517        }
518    }
519
520    /**
521     * Checks if the signature matches one of the following magic values:
522     * 
523     * Strings:
524     *
525     * "070701" - MAGIC_NEW
526     * "070702" - MAGIC_NEW_CRC
527     * "070707" - MAGIC_OLD_ASCII
528     * 
529     * Octal Binary value:
530     * 
531     * 070707 - MAGIC_OLD_BINARY (held as a short) = 0x71C7 or 0xC771
532     * @param signature data to match
533     * @param length length of data
534     * @return whether the buffer seems to contain CPIO data
535     */
536    public static boolean matches(final byte[] signature, final int length) {
537        if (length < 6) {
538            return false;
539        }
540
541        // Check binary values
542        if (signature[0] == 0x71 && (signature[1] & 0xFF) == 0xc7) {
543            return true;
544        }
545        if (signature[1] == 0x71 && (signature[0] & 0xFF) == 0xc7) {
546            return true;
547        }
548
549        // Check Ascii (String) values
550        // 3037 3037 30nn
551        if (signature[0] != 0x30) {
552            return false;
553        }
554        if (signature[1] != 0x37) {
555            return false;
556        }
557        if (signature[2] != 0x30) {
558            return false;
559        }
560        if (signature[3] != 0x37) {
561            return false;
562        }
563        if (signature[4] != 0x30) {
564            return false;
565        }
566        // Check last byte
567        if (signature[5] == 0x31) {
568            return true;
569        }
570        if (signature[5] == 0x32) {
571            return true;
572        }
573        if (signature[5] == 0x37) {
574            return true;
575        }
576
577        return false;
578    }
579}