001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.ar; 020 021import java.io.EOFException; 022import java.io.IOException; 023import java.io.InputStream; 024 025import org.apache.commons.compress.archivers.ArchiveEntry; 026import org.apache.commons.compress.archivers.ArchiveInputStream; 027import org.apache.commons.compress.utils.ArchiveUtils; 028import org.apache.commons.compress.utils.IOUtils; 029 030/** 031 * Implements the "ar" archive format as an input stream. 032 * 033 * @NotThreadSafe 034 * 035 */ 036public class ArArchiveInputStream extends ArchiveInputStream { 037 038 private final InputStream input; 039 private long offset = 0; 040 private boolean closed; 041 042 /* 043 * If getNextEnxtry has been called, the entry metadata is stored in 044 * currentEntry. 045 */ 046 private ArArchiveEntry currentEntry = null; 047 048 // Storage area for extra long names (GNU ar) 049 private byte[] namebuffer = null; 050 051 /* 052 * The offset where the current entry started. -1 if no entry has been 053 * called 054 */ 055 private long entryOffset = -1; 056 057 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 058 private final byte[] NAME_BUF = new byte[16]; 059 private final byte[] LAST_MODIFIED_BUF = new byte[12]; 060 private final byte[] ID_BUF = new byte[6]; 061 private final byte[] FILE_MODE_BUF = new byte[8]; 062 private final byte[] LENGTH_BUF = new byte[10]; 063 064 /** 065 * Constructs an Ar input stream with the referenced stream 066 * 067 * @param pInput 068 * the ar input stream 069 */ 070 public ArArchiveInputStream(final InputStream pInput) { 071 input = pInput; 072 closed = false; 073 } 074 075 /** 076 * Returns the next AR entry in this stream. 077 * 078 * @return the next AR entry. 079 * @throws IOException 080 * if the entry could not be read 081 */ 082 public ArArchiveEntry getNextArEntry() throws IOException { 083 if (currentEntry != null) { 084 final long entryEnd = entryOffset + currentEntry.getLength(); 085 IOUtils.skip(this, entryEnd - offset); 086 currentEntry = null; 087 } 088 089 if (offset == 0) { 090 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); 091 final byte[] realized = new byte[expected.length]; 092 final int read = IOUtils.readFully(this, realized); 093 if (read != expected.length) { 094 throw new IOException("failed to read header. Occured at byte: " + getBytesRead()); 095 } 096 for (int i = 0; i < expected.length; i++) { 097 if (expected[i] != realized[i]) { 098 throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized)); 099 } 100 } 101 } 102 103 if (offset % 2 != 0 && read() < 0) { 104 // hit eof 105 return null; 106 } 107 108 if (input.available() == 0) { 109 return null; 110 } 111 112 IOUtils.readFully(this, NAME_BUF); 113 IOUtils.readFully(this, LAST_MODIFIED_BUF); 114 IOUtils.readFully(this, ID_BUF); 115 final int userId = asInt(ID_BUF, true); 116 IOUtils.readFully(this, ID_BUF); 117 IOUtils.readFully(this, FILE_MODE_BUF); 118 IOUtils.readFully(this, LENGTH_BUF); 119 120 { 121 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER); 122 final byte[] realized = new byte[expected.length]; 123 final int read = IOUtils.readFully(this, realized); 124 if (read != expected.length) { 125 throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead()); 126 } 127 for (int i = 0; i < expected.length; i++) { 128 if (expected[i] != realized[i]) { 129 throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead()); 130 } 131 } 132 } 133 134 entryOffset = offset; 135 136// GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename. 137 138 // entry name is stored as ASCII string 139 String temp = ArchiveUtils.toAsciiString(NAME_BUF).trim(); 140 if (isGNUStringTable(temp)) { // GNU extended filenames entry 141 currentEntry = readGNUStringTable(LENGTH_BUF); 142 return getNextArEntry(); 143 } 144 145 long len = asLong(LENGTH_BUF); 146 if (temp.endsWith("/")) { // GNU terminator 147 temp = temp.substring(0, temp.length() - 1); 148 } else if (isGNULongName(temp)) { 149 final int off = Integer.parseInt(temp.substring(1));// get the offset 150 temp = getExtendedName(off); // convert to the long name 151 } else if (isBSDLongName(temp)) { 152 temp = getBSDLongName(temp); 153 // entry length contained the length of the file name in 154 // addition to the real length of the entry. 155 // assume file name was ASCII, there is no "standard" otherwise 156 final int nameLen = temp.length(); 157 len -= nameLen; 158 entryOffset += nameLen; 159 } 160 161 currentEntry = new ArArchiveEntry(temp, len, userId, 162 asInt(ID_BUF, true), 163 asInt(FILE_MODE_BUF, 8), 164 asLong(LAST_MODIFIED_BUF)); 165 return currentEntry; 166 } 167 168 /** 169 * Get an extended name from the GNU extended name buffer. 170 * 171 * @param offset pointer to entry within the buffer 172 * @return the extended file name; without trailing "/" if present. 173 * @throws IOException if name not found or buffer not set up 174 */ 175 private String getExtendedName(final int offset) throws IOException{ 176 if (namebuffer == null) { 177 throw new IOException("Cannot process GNU long filename as no // record was found"); 178 } 179 for(int i=offset; i < namebuffer.length; i++){ 180 if (namebuffer[i] == '\012' || namebuffer[i] == 0){ 181 if (namebuffer[i-1]=='/') { 182 i--; // drop trailing / 183 } 184 return ArchiveUtils.toAsciiString(namebuffer, offset, i-offset); 185 } 186 } 187 throw new IOException("Failed to read entry: "+offset); 188 } 189 private long asLong(final byte[] input) { 190 return Long.parseLong(ArchiveUtils.toAsciiString(input).trim()); 191 } 192 193 private int asInt(final byte[] input) { 194 return asInt(input, 10, false); 195 } 196 197 private int asInt(final byte[] input, final boolean treatBlankAsZero) { 198 return asInt(input, 10, treatBlankAsZero); 199 } 200 201 private int asInt(final byte[] input, final int base) { 202 return asInt(input, base, false); 203 } 204 205 private int asInt(final byte[] input, final int base, final boolean treatBlankAsZero) { 206 final String string = ArchiveUtils.toAsciiString(input).trim(); 207 if (string.length() == 0 && treatBlankAsZero) { 208 return 0; 209 } 210 return Integer.parseInt(string, base); 211 } 212 213 /* 214 * (non-Javadoc) 215 * 216 * @see 217 * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry() 218 */ 219 @Override 220 public ArchiveEntry getNextEntry() throws IOException { 221 return getNextArEntry(); 222 } 223 224 /* 225 * (non-Javadoc) 226 * 227 * @see java.io.InputStream#close() 228 */ 229 @Override 230 public void close() throws IOException { 231 if (!closed) { 232 closed = true; 233 input.close(); 234 } 235 currentEntry = null; 236 } 237 238 /* 239 * (non-Javadoc) 240 * 241 * @see java.io.InputStream#read(byte[], int, int) 242 */ 243 @Override 244 public int read(final byte[] b, final int off, final int len) throws IOException { 245 int toRead = len; 246 if (currentEntry != null) { 247 final long entryEnd = entryOffset + currentEntry.getLength(); 248 if (len > 0 && entryEnd > offset) { 249 toRead = (int) Math.min(len, entryEnd - offset); 250 } else { 251 return -1; 252 } 253 } 254 final int ret = this.input.read(b, off, toRead); 255 count(ret); 256 offset += ret > 0 ? ret : 0; 257 return ret; 258 } 259 260 /** 261 * Checks if the signature matches ASCII "!<arch>" followed by a single LF 262 * control character 263 * 264 * @param signature 265 * the bytes to check 266 * @param length 267 * the number of bytes to check 268 * @return true, if this stream is an Ar archive stream, false otherwise 269 */ 270 public static boolean matches(final byte[] signature, final int length) { 271 // 3c21 7261 6863 0a3e 272 273 if (length < 8) { 274 return false; 275 } 276 if (signature[0] != 0x21) { 277 return false; 278 } 279 if (signature[1] != 0x3c) { 280 return false; 281 } 282 if (signature[2] != 0x61) { 283 return false; 284 } 285 if (signature[3] != 0x72) { 286 return false; 287 } 288 if (signature[4] != 0x63) { 289 return false; 290 } 291 if (signature[5] != 0x68) { 292 return false; 293 } 294 if (signature[6] != 0x3e) { 295 return false; 296 } 297 if (signature[7] != 0x0a) { 298 return false; 299 } 300 301 return true; 302 } 303 304 static final String BSD_LONGNAME_PREFIX = "#1/"; 305 private static final int BSD_LONGNAME_PREFIX_LEN = 306 BSD_LONGNAME_PREFIX.length(); 307 private static final String BSD_LONGNAME_PATTERN = 308 "^" + BSD_LONGNAME_PREFIX + "\\d+"; 309 310 /** 311 * Does the name look like it is a long name (or a name containing 312 * spaces) as encoded by BSD ar? 313 * 314 * <p>From the FreeBSD ar(5) man page:</p> 315 * <pre> 316 * BSD In the BSD variant, names that are shorter than 16 317 * characters and without embedded spaces are stored 318 * directly in this field. If a name has an embedded 319 * space, or if it is longer than 16 characters, then 320 * the string "#1/" followed by the decimal represen- 321 * tation of the length of the file name is placed in 322 * this field. The actual file name is stored immedi- 323 * ately after the archive header. The content of the 324 * archive member follows the file name. The ar_size 325 * field of the header (see below) will then hold the 326 * sum of the size of the file name and the size of 327 * the member. 328 * </pre> 329 * 330 * @since 1.3 331 */ 332 private static boolean isBSDLongName(final String name) { 333 return name != null && name.matches(BSD_LONGNAME_PATTERN); 334 } 335 336 /** 337 * Reads the real name from the current stream assuming the very 338 * first bytes to be read are the real file name. 339 * 340 * @see #isBSDLongName 341 * 342 * @since 1.3 343 */ 344 private String getBSDLongName(final String bsdLongName) throws IOException { 345 final int nameLen = 346 Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN)); 347 final byte[] name = new byte[nameLen]; 348 final int read = IOUtils.readFully(this, name); 349 if (read != nameLen) { 350 throw new EOFException(); 351 } 352 return ArchiveUtils.toAsciiString(name); 353 } 354 355 private static final String GNU_STRING_TABLE_NAME = "//"; 356 357 /** 358 * Is this the name of the "Archive String Table" as used by 359 * SVR4/GNU to store long file names? 360 * 361 * <p>GNU ar stores multiple extended filenames in the data section 362 * of a file with the name "//", this record is referred to by 363 * future headers.</p> 364 * 365 * <p>A header references an extended filename by storing a "/" 366 * followed by a decimal offset to the start of the filename in 367 * the extended filename data section.</p> 368 * 369 * <p>The format of the "//" file itself is simply a list of the 370 * long filenames, each separated by one or more LF 371 * characters. Note that the decimal offsets are number of 372 * characters, not line or string number within the "//" file.</p> 373 */ 374 private static boolean isGNUStringTable(final String name) { 375 return GNU_STRING_TABLE_NAME.equals(name); 376 } 377 378 /** 379 * Reads the GNU archive String Table. 380 * 381 * @see #isGNUStringTable 382 */ 383 private ArArchiveEntry readGNUStringTable(final byte[] length) throws IOException { 384 final int bufflen = asInt(length); // Assume length will fit in an int 385 namebuffer = new byte[bufflen]; 386 final int read = IOUtils.readFully(this, namebuffer, 0, bufflen); 387 if (read != bufflen){ 388 throw new IOException("Failed to read complete // record: expected=" 389 + bufflen + " read=" + read); 390 } 391 return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen); 392 } 393 394 private static final String GNU_LONGNAME_PATTERN = "^/\\d+"; 395 396 /** 397 * Does the name look like it is a long name (or a name containing 398 * spaces) as encoded by SVR4/GNU ar? 399 * 400 * @see #isGNUStringTable 401 */ 402 private boolean isGNULongName(final String name) { 403 return name != null && name.matches(GNU_LONGNAME_PATTERN); 404 } 405}