001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.ar;
020
021import java.io.EOFException;
022import java.io.IOException;
023import java.io.InputStream;
024
025import org.apache.commons.compress.archivers.ArchiveEntry;
026import org.apache.commons.compress.archivers.ArchiveInputStream;
027import org.apache.commons.compress.utils.ArchiveUtils;
028import org.apache.commons.compress.utils.IOUtils;
029
030/**
031 * Implements the "ar" archive format as an input stream.
032 * 
033 * @NotThreadSafe
034 * 
035 */
036public class ArArchiveInputStream extends ArchiveInputStream {
037
038    private final InputStream input;
039    private long offset = 0;
040    private boolean closed;
041
042    /*
043     * If getNextEnxtry has been called, the entry metadata is stored in
044     * currentEntry.
045     */
046    private ArArchiveEntry currentEntry = null;
047
048    // Storage area for extra long names (GNU ar)
049    private byte[] namebuffer = null;
050
051    /*
052     * The offset where the current entry started. -1 if no entry has been
053     * called
054     */
055    private long entryOffset = -1;
056
057    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
058    private final byte[] NAME_BUF = new byte[16];
059    private final byte[] LAST_MODIFIED_BUF = new byte[12];
060    private final byte[] ID_BUF = new byte[6];
061    private final byte[] FILE_MODE_BUF = new byte[8];
062    private final byte[] LENGTH_BUF = new byte[10];
063
064    /**
065     * Constructs an Ar input stream with the referenced stream
066     * 
067     * @param pInput
068     *            the ar input stream
069     */
070    public ArArchiveInputStream(final InputStream pInput) {
071        input = pInput;
072        closed = false;
073    }
074
075    /**
076     * Returns the next AR entry in this stream.
077     * 
078     * @return the next AR entry.
079     * @throws IOException
080     *             if the entry could not be read
081     */
082    public ArArchiveEntry getNextArEntry() throws IOException {
083        if (currentEntry != null) {
084            final long entryEnd = entryOffset + currentEntry.getLength();
085            IOUtils.skip(this, entryEnd - offset);
086            currentEntry = null;
087        }
088
089        if (offset == 0) {
090            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
091            final byte[] realized = new byte[expected.length];
092            final int read = IOUtils.readFully(this, realized);
093            if (read != expected.length) {
094                throw new IOException("failed to read header. Occured at byte: " + getBytesRead());
095            }
096            for (int i = 0; i < expected.length; i++) {
097                if (expected[i] != realized[i]) {
098                    throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized));
099                }
100            }
101        }
102
103        if (offset % 2 != 0 && read() < 0) {
104            // hit eof
105            return null;
106        }
107
108        if (input.available() == 0) {
109            return null;
110        }
111
112        IOUtils.readFully(this, NAME_BUF);
113        IOUtils.readFully(this, LAST_MODIFIED_BUF);
114        IOUtils.readFully(this, ID_BUF);
115        final int userId = asInt(ID_BUF, true);
116        IOUtils.readFully(this, ID_BUF);
117        IOUtils.readFully(this, FILE_MODE_BUF);
118        IOUtils.readFully(this, LENGTH_BUF);
119
120        {
121            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
122            final byte[] realized = new byte[expected.length];
123            final int read = IOUtils.readFully(this, realized);
124            if (read != expected.length) {
125                throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead());
126            }
127            for (int i = 0; i < expected.length; i++) {
128                if (expected[i] != realized[i]) {
129                    throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead());
130                }
131            }
132        }
133
134        entryOffset = offset;
135
136//        GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename.
137
138        // entry name is stored as ASCII string
139        String temp = ArchiveUtils.toAsciiString(NAME_BUF).trim();
140        if (isGNUStringTable(temp)) { // GNU extended filenames entry
141            currentEntry = readGNUStringTable(LENGTH_BUF);
142            return getNextArEntry();
143        }
144
145        long len = asLong(LENGTH_BUF);
146        if (temp.endsWith("/")) { // GNU terminator
147            temp = temp.substring(0, temp.length() - 1);
148        } else if (isGNULongName(temp)) {
149            final int off = Integer.parseInt(temp.substring(1));// get the offset
150            temp = getExtendedName(off); // convert to the long name
151        } else if (isBSDLongName(temp)) {
152            temp = getBSDLongName(temp);
153            // entry length contained the length of the file name in
154            // addition to the real length of the entry.
155            // assume file name was ASCII, there is no "standard" otherwise
156            final int nameLen = temp.length();
157            len -= nameLen;
158            entryOffset += nameLen;
159        }
160
161        currentEntry = new ArArchiveEntry(temp, len, userId,
162                                          asInt(ID_BUF, true),
163                                          asInt(FILE_MODE_BUF, 8),
164                                          asLong(LAST_MODIFIED_BUF));
165        return currentEntry;
166    }
167
168    /**
169     * Get an extended name from the GNU extended name buffer.
170     * 
171     * @param offset pointer to entry within the buffer
172     * @return the extended file name; without trailing "/" if present.
173     * @throws IOException if name not found or buffer not set up
174     */
175    private String getExtendedName(final int offset) throws IOException{
176        if (namebuffer == null) {
177            throw new IOException("Cannot process GNU long filename as no // record was found");
178        }
179        for(int i=offset; i < namebuffer.length; i++){
180            if (namebuffer[i] == '\012' || namebuffer[i] == 0){
181                if (namebuffer[i-1]=='/') {
182                    i--; // drop trailing /
183                }
184                return ArchiveUtils.toAsciiString(namebuffer, offset, i-offset);
185            }
186        }
187        throw new IOException("Failed to read entry: "+offset);
188    }
189    private long asLong(final byte[] input) {
190        return Long.parseLong(ArchiveUtils.toAsciiString(input).trim());
191    }
192
193    private int asInt(final byte[] input) {
194        return asInt(input, 10, false);
195    }
196
197    private int asInt(final byte[] input, final boolean treatBlankAsZero) {
198        return asInt(input, 10, treatBlankAsZero);
199    }
200
201    private int asInt(final byte[] input, final int base) {
202        return asInt(input, base, false);
203    }
204
205    private int asInt(final byte[] input, final int base, final boolean treatBlankAsZero) {
206        final String string = ArchiveUtils.toAsciiString(input).trim();
207        if (string.length() == 0 && treatBlankAsZero) {
208            return 0;
209        }
210        return Integer.parseInt(string, base);
211    }
212
213    /*
214     * (non-Javadoc)
215     * 
216     * @see
217     * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
218     */
219    @Override
220    public ArchiveEntry getNextEntry() throws IOException {
221        return getNextArEntry();
222    }
223
224    /*
225     * (non-Javadoc)
226     * 
227     * @see java.io.InputStream#close()
228     */
229    @Override
230    public void close() throws IOException {
231        if (!closed) {
232            closed = true;
233            input.close();
234        }
235        currentEntry = null;
236    }
237
238    /*
239     * (non-Javadoc)
240     * 
241     * @see java.io.InputStream#read(byte[], int, int)
242     */
243    @Override
244    public int read(final byte[] b, final int off, final int len) throws IOException {
245        int toRead = len;
246        if (currentEntry != null) {
247            final long entryEnd = entryOffset + currentEntry.getLength();
248            if (len > 0 && entryEnd > offset) {
249                toRead = (int) Math.min(len, entryEnd - offset);
250            } else {
251                return -1;
252            }
253        }
254        final int ret = this.input.read(b, off, toRead);
255        count(ret);
256        offset += ret > 0 ? ret : 0;
257        return ret;
258    }
259
260    /**
261     * Checks if the signature matches ASCII "!&lt;arch&gt;" followed by a single LF
262     * control character
263     * 
264     * @param signature
265     *            the bytes to check
266     * @param length
267     *            the number of bytes to check
268     * @return true, if this stream is an Ar archive stream, false otherwise
269     */
270    public static boolean matches(final byte[] signature, final int length) {
271        // 3c21 7261 6863 0a3e
272
273        if (length < 8) {
274            return false;
275        }
276        if (signature[0] != 0x21) {
277            return false;
278        }
279        if (signature[1] != 0x3c) {
280            return false;
281        }
282        if (signature[2] != 0x61) {
283            return false;
284        }
285        if (signature[3] != 0x72) {
286            return false;
287        }
288        if (signature[4] != 0x63) {
289            return false;
290        }
291        if (signature[5] != 0x68) {
292            return false;
293        }
294        if (signature[6] != 0x3e) {
295            return false;
296        }
297        if (signature[7] != 0x0a) {
298            return false;
299        }
300
301        return true;
302    }
303
304    static final String BSD_LONGNAME_PREFIX = "#1/";
305    private static final int BSD_LONGNAME_PREFIX_LEN =
306        BSD_LONGNAME_PREFIX.length();
307    private static final String BSD_LONGNAME_PATTERN =
308        "^" + BSD_LONGNAME_PREFIX + "\\d+";
309
310    /**
311     * Does the name look like it is a long name (or a name containing
312     * spaces) as encoded by BSD ar?
313     *
314     * <p>From the FreeBSD ar(5) man page:</p>
315     * <pre>
316     * BSD   In the BSD variant, names that are shorter than 16
317     *       characters and without embedded spaces are stored
318     *       directly in this field.  If a name has an embedded
319     *       space, or if it is longer than 16 characters, then
320     *       the string "#1/" followed by the decimal represen-
321     *       tation of the length of the file name is placed in
322     *       this field. The actual file name is stored immedi-
323     *       ately after the archive header.  The content of the
324     *       archive member follows the file name.  The ar_size
325     *       field of the header (see below) will then hold the
326     *       sum of the size of the file name and the size of
327     *       the member.
328     * </pre>
329     *
330     * @since 1.3
331     */
332    private static boolean isBSDLongName(final String name) {
333        return name != null && name.matches(BSD_LONGNAME_PATTERN);
334    }
335
336    /**
337     * Reads the real name from the current stream assuming the very
338     * first bytes to be read are the real file name.
339     *
340     * @see #isBSDLongName
341     *
342     * @since 1.3
343     */
344    private String getBSDLongName(final String bsdLongName) throws IOException {
345        final int nameLen =
346            Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
347        final byte[] name = new byte[nameLen];
348        final int read = IOUtils.readFully(this, name);
349        if (read != nameLen) {
350            throw new EOFException();
351        }
352        return ArchiveUtils.toAsciiString(name);
353    }
354
355    private static final String GNU_STRING_TABLE_NAME = "//";
356
357    /**
358     * Is this the name of the "Archive String Table" as used by
359     * SVR4/GNU to store long file names?
360     *
361     * <p>GNU ar stores multiple extended filenames in the data section
362     * of a file with the name "//", this record is referred to by
363     * future headers.</p>
364     *
365     * <p>A header references an extended filename by storing a "/"
366     * followed by a decimal offset to the start of the filename in
367     * the extended filename data section.</p>
368     * 
369     * <p>The format of the "//" file itself is simply a list of the
370     * long filenames, each separated by one or more LF
371     * characters. Note that the decimal offsets are number of
372     * characters, not line or string number within the "//" file.</p>
373     */
374    private static boolean isGNUStringTable(final String name) {
375        return GNU_STRING_TABLE_NAME.equals(name);
376    }
377
378    /**
379     * Reads the GNU archive String Table.
380     *
381     * @see #isGNUStringTable
382     */
383    private ArArchiveEntry readGNUStringTable(final byte[] length) throws IOException {
384        final int bufflen = asInt(length); // Assume length will fit in an int
385        namebuffer = new byte[bufflen];
386        final int read = IOUtils.readFully(this, namebuffer, 0, bufflen);
387        if (read != bufflen){
388            throw new IOException("Failed to read complete // record: expected="
389                                  + bufflen + " read=" + read);
390        }
391        return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen);
392    }
393
394    private static final String GNU_LONGNAME_PATTERN = "^/\\d+";
395
396    /**
397     * Does the name look like it is a long name (or a name containing
398     * spaces) as encoded by SVR4/GNU ar?
399     *
400     * @see #isGNUStringTable
401     */
402    private boolean isGNULongName(final String name) {
403        return name != null && name.matches(GNU_LONGNAME_PATTERN);
404    }
405}