001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.ar;
020    
021    import java.io.EOFException;
022    import java.io.IOException;
023    import java.io.InputStream;
024    
025    import org.apache.commons.compress.archivers.ArchiveEntry;
026    import org.apache.commons.compress.archivers.ArchiveInputStream;
027    import org.apache.commons.compress.utils.ArchiveUtils;
028    
029    /**
030     * Implements the "ar" archive format as an input stream.
031     * 
032     * @NotThreadSafe
033     * 
034     */
035    public class ArArchiveInputStream extends ArchiveInputStream {
036    
037        private final InputStream input;
038        private long offset = 0;
039        private boolean closed;
040    
041        /*
042         * If getNextEnxtry has been called, the entry metadata is stored in
043         * currentEntry.
044         */
045        private ArArchiveEntry currentEntry = null;
046    
047        // Storage area for extra long names (GNU ar)
048        private byte[] namebuffer = null;
049    
050        /*
051         * The offset where the current entry started. -1 if no entry has been
052         * called
053         */
054        private long entryOffset = -1;
055    
056        /**
057         * Constructs an Ar input stream with the referenced stream
058         * 
059         * @param pInput
060         *            the ar input stream
061         */
062        public ArArchiveInputStream(final InputStream pInput) {
063            input = pInput;
064            closed = false;
065        }
066    
067        /**
068         * Returns the next AR entry in this stream.
069         * 
070         * @return the next AR entry.
071         * @throws IOException
072         *             if the entry could not be read
073         */
074        public ArArchiveEntry getNextArEntry() throws IOException {
075            if (currentEntry != null) {
076                final long entryEnd = entryOffset + currentEntry.getLength();
077                while (offset < entryEnd) {
078                    int x = read();
079                    if (x == -1) {
080                        // hit EOF before previous entry was complete
081                        // TODO: throw an exception instead?
082                        return null;
083                    }
084                }
085                currentEntry = null;
086            }
087    
088            if (offset == 0) {
089                final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
090                final byte[] realized = new byte[expected.length];
091                final int read = read(realized);
092                if (read != expected.length) {
093                    throw new IOException("failed to read header. Occured at byte: " + getBytesRead());
094                }
095                for (int i = 0; i < expected.length; i++) {
096                    if (expected[i] != realized[i]) {
097                        throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized));
098                    }
099                }
100            }
101    
102            if (offset % 2 != 0 && read() < 0) {
103                // hit eof
104                return null;
105            }
106    
107            if (input.available() == 0) {
108                return null;
109            }
110    
111            final byte[] name = new byte[16];
112            final byte[] lastmodified = new byte[12];
113            final byte[] userid = new byte[6];
114            final byte[] groupid = new byte[6];
115            final byte[] filemode = new byte[8];
116            final byte[] length = new byte[10];
117    
118            read(name);
119            read(lastmodified);
120            read(userid);
121            read(groupid);
122            read(filemode);
123            read(length);
124    
125            {
126                final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
127                final byte[] realized = new byte[expected.length];
128                final int read = read(realized);
129                if (read != expected.length) {
130                    throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead());
131                }
132                for (int i = 0; i < expected.length; i++) {
133                    if (expected[i] != realized[i]) {
134                        throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead());
135                    }
136                }
137            }
138    
139            entryOffset = offset;
140    
141    //        GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename.
142    
143            // entry name is stored as ASCII string
144            String temp = ArchiveUtils.toAsciiString(name).trim();
145            long len = asLong(length);
146    
147            if (isGNUStringTable(temp)) { // GNU extended filenames entry
148                currentEntry = readGNUStringTable(length);
149                return getNextArEntry();
150            } else if (temp.endsWith("/")) { // GNU terminator
151                temp = temp.substring(0, temp.length() - 1);
152            } else if (isGNULongName(temp)) {
153                int offset = Integer.parseInt(temp.substring(1));// get the offset
154                temp = getExtendedName(offset); // convert to the long name
155            } else if (isBSDLongName(temp)) {
156                temp = getBSDLongName(temp);
157                // entry length contained the length of the file name in
158                // addition to the real length of the entry.
159                // assume file name was ASCII, there is no "standard" otherwise
160                int nameLen = temp.length();
161                len -= nameLen;
162                entryOffset += nameLen;
163            }
164    
165            currentEntry = new ArArchiveEntry(temp, len, asInt(userid, true),
166                                              asInt(groupid, true), asInt(filemode, 8),
167                                              asLong(lastmodified));
168            return currentEntry;
169        }
170    
171        /**
172         * Get an extended name from the GNU extended name buffer.
173         * 
174         * @param offset pointer to entry within the buffer
175         * @return the extended file name; without trailing "/" if present.
176         * @throws IOException if name not found or buffer not set up
177         */
178        private String getExtendedName(int offset) throws IOException{
179            if (namebuffer == null) {
180                throw new IOException("Cannot process GNU long filename as no // record was found");
181            }
182            for(int i=offset; i < namebuffer.length; i++){
183                if (namebuffer[i]=='\012'){
184                    if (namebuffer[i-1]=='/') {
185                        i--; // drop trailing /
186                    }
187                    return ArchiveUtils.toAsciiString(namebuffer, offset, i-offset);
188                }
189            }
190            throw new IOException("Failed to read entry: "+offset);
191        }
192        private long asLong(byte[] input) {
193            return Long.parseLong(new String(input).trim());
194        }
195    
196        private int asInt(byte[] input) {
197            return asInt(input, 10, false);
198        }
199    
200        private int asInt(byte[] input, boolean treatBlankAsZero) {
201            return asInt(input, 10, treatBlankAsZero);
202        }
203    
204        private int asInt(byte[] input, int base) {
205            return asInt(input, base, false);
206        }
207    
208        private int asInt(byte[] input, int base, boolean treatBlankAsZero) {
209            String string = new String(input).trim();
210            if (string.length() == 0 && treatBlankAsZero) {
211                return 0;
212            }
213            return Integer.parseInt(string, base);
214        }
215    
216        /*
217         * (non-Javadoc)
218         * 
219         * @see
220         * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
221         */
222        @Override
223        public ArchiveEntry getNextEntry() throws IOException {
224            return getNextArEntry();
225        }
226    
227        /*
228         * (non-Javadoc)
229         * 
230         * @see java.io.InputStream#close()
231         */
232        @Override
233        public void close() throws IOException {
234            if (!closed) {
235                closed = true;
236                input.close();
237            }
238            currentEntry = null;
239        }
240    
241        /*
242         * (non-Javadoc)
243         * 
244         * @see java.io.InputStream#read(byte[], int, int)
245         */
246        @Override
247        public int read(byte[] b, final int off, final int len) throws IOException {
248            int toRead = len;
249            if (currentEntry != null) {
250                final long entryEnd = entryOffset + currentEntry.getLength();
251                if (len > 0 && entryEnd > offset) {
252                    toRead = (int) Math.min(len, entryEnd - offset);
253                } else {
254                    return -1;
255                }
256            }
257            final int ret = this.input.read(b, off, toRead);
258            count(ret);
259            offset += (ret > 0 ? ret : 0);
260            return ret;
261        }
262    
263        /**
264         * Checks if the signature matches ASCII "!<arch>" followed by a single LF
265         * control character
266         * 
267         * @param signature
268         *            the bytes to check
269         * @param length
270         *            the number of bytes to check
271         * @return true, if this stream is an Ar archive stream, false otherwise
272         */
273        public static boolean matches(byte[] signature, int length) {
274            // 3c21 7261 6863 0a3e
275    
276            if (length < 8) {
277                return false;
278            }
279            if (signature[0] != 0x21) {
280                return false;
281            }
282            if (signature[1] != 0x3c) {
283                return false;
284            }
285            if (signature[2] != 0x61) {
286                return false;
287            }
288            if (signature[3] != 0x72) {
289                return false;
290            }
291            if (signature[4] != 0x63) {
292                return false;
293            }
294            if (signature[5] != 0x68) {
295                return false;
296            }
297            if (signature[6] != 0x3e) {
298                return false;
299            }
300            if (signature[7] != 0x0a) {
301                return false;
302            }
303    
304            return true;
305        }
306    
307        static final String BSD_LONGNAME_PREFIX = "#1/";
308        private static final int BSD_LONGNAME_PREFIX_LEN =
309            BSD_LONGNAME_PREFIX.length();
310        private static final String BSD_LONGNAME_PATTERN =
311            "^" + BSD_LONGNAME_PREFIX + "\\d+";
312    
313        /**
314         * Does the name look like it is a long name (or a name containing
315         * spaces) as encoded by BSD ar?
316         *
317         * <p>From the FreeBSD ar(5) man page:</p>
318         * <pre>
319         * BSD   In the BSD variant, names that are shorter than 16
320         *       characters and without embedded spaces are stored
321         *       directly in this field.  If a name has an embedded
322         *       space, or if it is longer than 16 characters, then
323         *       the string "#1/" followed by the decimal represen-
324         *       tation of the length of the file name is placed in
325         *       this field.        The actual file name is stored immedi-
326         *       ately after the archive header.  The content of the
327         *       archive member follows the file name.  The ar_size
328         *       field of the header (see below) will then hold the
329         *       sum of the size of the file name and the size of
330         *       the member.
331         * </pre>
332         *
333         * @since Apache Commons Compress 1.3
334         */
335        private static boolean isBSDLongName(String name) {
336            return name != null && name.matches(BSD_LONGNAME_PATTERN);
337        }
338    
339        /**
340         * Reads the real name from the current stream assuming the very
341         * first bytes to be read are the real file name.
342         *
343         * @see #isBSDLongName
344         *
345         * @since Apache Commons Compress 1.3
346         */
347        private String getBSDLongName(String bsdLongName) throws IOException {
348            int nameLen =
349                Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
350            byte[] name = new byte[nameLen];
351            int read = 0, readNow = 0;
352            while ((readNow = input.read(name, read, nameLen - read)) >= 0) {
353                read += readNow;
354                count(readNow);
355                if (read == nameLen) {
356                    break;
357                }
358            }
359            if (read != nameLen) {
360                throw new EOFException();
361            }
362            return ArchiveUtils.toAsciiString(name);
363        }
364    
365        private static final String GNU_STRING_TABLE_NAME = "//";
366    
367        /**
368         * Is this the name of the "Archive String Table" as used by
369         * SVR4/GNU to store long file names?
370         *
371         * <p>GNU ar stores multiple extended filenames in the data section
372         * of a file with the name "//", this record is referred to by
373         * future headers.</p>
374         *
375         * <p>A header references an extended filename by storing a "/"
376         * followed by a decimal offset to the start of the filename in
377         * the extended filename data section.</p>
378         * 
379         * <p>The format of the "//" file itself is simply a list of the
380         * long filenames, each separated by one or more LF
381         * characters. Note that the decimal offsets are number of
382         * characters, not line or string number within the "//" file.</p>
383         */
384        private static boolean isGNUStringTable(String name) {
385            return GNU_STRING_TABLE_NAME.equals(name);
386        }
387    
388        /**
389         * Reads the GNU archive String Table.
390         *
391         * @see #isGNUStringTable
392         */
393        private ArArchiveEntry readGNUStringTable(byte[] length) throws IOException {
394            int bufflen = asInt(length); // Assume length will fit in an int
395            namebuffer = new byte[bufflen];
396            int read = read(namebuffer, 0, bufflen);
397            if (read != bufflen){
398                throw new IOException("Failed to read complete // record: expected="
399                                      + bufflen + " read=" + read);
400            }
401            return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen);
402        }
403    
404        private static final String GNU_LONGNAME_PATTERN = "^/\\d+";
405    
406        /**
407         * Does the name look like it is a long name (or a name containing
408         * spaces) as encoded by SVR4/GNU ar?
409         *
410         * @see #isGNUStringTable
411         */
412        private boolean isGNULongName(String name) {
413            return name != null && name.matches(GNU_LONGNAME_PATTERN);
414        }
415    }