001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 package org.apache.commons.compress.archivers.ar; 020 021 import java.io.EOFException; 022 import java.io.IOException; 023 import java.io.InputStream; 024 025 import org.apache.commons.compress.archivers.ArchiveEntry; 026 import org.apache.commons.compress.archivers.ArchiveInputStream; 027 import org.apache.commons.compress.utils.ArchiveUtils; 028 029 /** 030 * Implements the "ar" archive format as an input stream. 031 * 032 * @NotThreadSafe 033 * 034 */ 035 public class ArArchiveInputStream extends ArchiveInputStream { 036 037 private final InputStream input; 038 private long offset = 0; 039 private boolean closed; 040 041 /* 042 * If getNextEnxtry has been called, the entry metadata is stored in 043 * currentEntry. 044 */ 045 private ArArchiveEntry currentEntry = null; 046 047 // Storage area for extra long names (GNU ar) 048 private byte[] namebuffer = null; 049 050 /* 051 * The offset where the current entry started. -1 if no entry has been 052 * called 053 */ 054 private long entryOffset = -1; 055 056 /** 057 * Constructs an Ar input stream with the referenced stream 058 * 059 * @param pInput 060 * the ar input stream 061 */ 062 public ArArchiveInputStream(final InputStream pInput) { 063 input = pInput; 064 closed = false; 065 } 066 067 /** 068 * Returns the next AR entry in this stream. 069 * 070 * @return the next AR entry. 071 * @throws IOException 072 * if the entry could not be read 073 */ 074 public ArArchiveEntry getNextArEntry() throws IOException { 075 if (currentEntry != null) { 076 final long entryEnd = entryOffset + currentEntry.getLength(); 077 while (offset < entryEnd) { 078 int x = read(); 079 if (x == -1) { 080 // hit EOF before previous entry was complete 081 // TODO: throw an exception instead? 082 return null; 083 } 084 } 085 currentEntry = null; 086 } 087 088 if (offset == 0) { 089 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); 090 final byte[] realized = new byte[expected.length]; 091 final int read = read(realized); 092 if (read != expected.length) { 093 throw new IOException("failed to read header. Occured at byte: " + getBytesRead()); 094 } 095 for (int i = 0; i < expected.length; i++) { 096 if (expected[i] != realized[i]) { 097 throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized)); 098 } 099 } 100 } 101 102 if (offset % 2 != 0 && read() < 0) { 103 // hit eof 104 return null; 105 } 106 107 if (input.available() == 0) { 108 return null; 109 } 110 111 final byte[] name = new byte[16]; 112 final byte[] lastmodified = new byte[12]; 113 final byte[] userid = new byte[6]; 114 final byte[] groupid = new byte[6]; 115 final byte[] filemode = new byte[8]; 116 final byte[] length = new byte[10]; 117 118 read(name); 119 read(lastmodified); 120 read(userid); 121 read(groupid); 122 read(filemode); 123 read(length); 124 125 { 126 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER); 127 final byte[] realized = new byte[expected.length]; 128 final int read = read(realized); 129 if (read != expected.length) { 130 throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead()); 131 } 132 for (int i = 0; i < expected.length; i++) { 133 if (expected[i] != realized[i]) { 134 throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead()); 135 } 136 } 137 } 138 139 entryOffset = offset; 140 141 // GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename. 142 143 // entry name is stored as ASCII string 144 String temp = ArchiveUtils.toAsciiString(name).trim(); 145 long len = asLong(length); 146 147 if (isGNUStringTable(temp)) { // GNU extended filenames entry 148 currentEntry = readGNUStringTable(length); 149 return getNextArEntry(); 150 } else if (temp.endsWith("/")) { // GNU terminator 151 temp = temp.substring(0, temp.length() - 1); 152 } else if (isGNULongName(temp)) { 153 int offset = Integer.parseInt(temp.substring(1));// get the offset 154 temp = getExtendedName(offset); // convert to the long name 155 } else if (isBSDLongName(temp)) { 156 temp = getBSDLongName(temp); 157 // entry length contained the length of the file name in 158 // addition to the real length of the entry. 159 // assume file name was ASCII, there is no "standard" otherwise 160 int nameLen = temp.length(); 161 len -= nameLen; 162 entryOffset += nameLen; 163 } 164 165 currentEntry = new ArArchiveEntry(temp, len, asInt(userid, true), 166 asInt(groupid, true), asInt(filemode, 8), 167 asLong(lastmodified)); 168 return currentEntry; 169 } 170 171 /** 172 * Get an extended name from the GNU extended name buffer. 173 * 174 * @param offset pointer to entry within the buffer 175 * @return the extended file name; without trailing "/" if present. 176 * @throws IOException if name not found or buffer not set up 177 */ 178 private String getExtendedName(int offset) throws IOException{ 179 if (namebuffer == null) { 180 throw new IOException("Cannot process GNU long filename as no // record was found"); 181 } 182 for(int i=offset; i < namebuffer.length; i++){ 183 if (namebuffer[i]=='\012'){ 184 if (namebuffer[i-1]=='/') { 185 i--; // drop trailing / 186 } 187 return ArchiveUtils.toAsciiString(namebuffer, offset, i-offset); 188 } 189 } 190 throw new IOException("Failed to read entry: "+offset); 191 } 192 private long asLong(byte[] input) { 193 return Long.parseLong(new String(input).trim()); 194 } 195 196 private int asInt(byte[] input) { 197 return asInt(input, 10, false); 198 } 199 200 private int asInt(byte[] input, boolean treatBlankAsZero) { 201 return asInt(input, 10, treatBlankAsZero); 202 } 203 204 private int asInt(byte[] input, int base) { 205 return asInt(input, base, false); 206 } 207 208 private int asInt(byte[] input, int base, boolean treatBlankAsZero) { 209 String string = new String(input).trim(); 210 if (string.length() == 0 && treatBlankAsZero) { 211 return 0; 212 } 213 return Integer.parseInt(string, base); 214 } 215 216 /* 217 * (non-Javadoc) 218 * 219 * @see 220 * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry() 221 */ 222 @Override 223 public ArchiveEntry getNextEntry() throws IOException { 224 return getNextArEntry(); 225 } 226 227 /* 228 * (non-Javadoc) 229 * 230 * @see java.io.InputStream#close() 231 */ 232 @Override 233 public void close() throws IOException { 234 if (!closed) { 235 closed = true; 236 input.close(); 237 } 238 currentEntry = null; 239 } 240 241 /* 242 * (non-Javadoc) 243 * 244 * @see java.io.InputStream#read(byte[], int, int) 245 */ 246 @Override 247 public int read(byte[] b, final int off, final int len) throws IOException { 248 int toRead = len; 249 if (currentEntry != null) { 250 final long entryEnd = entryOffset + currentEntry.getLength(); 251 if (len > 0 && entryEnd > offset) { 252 toRead = (int) Math.min(len, entryEnd - offset); 253 } else { 254 return -1; 255 } 256 } 257 final int ret = this.input.read(b, off, toRead); 258 count(ret); 259 offset += (ret > 0 ? ret : 0); 260 return ret; 261 } 262 263 /** 264 * Checks if the signature matches ASCII "!<arch>" followed by a single LF 265 * control character 266 * 267 * @param signature 268 * the bytes to check 269 * @param length 270 * the number of bytes to check 271 * @return true, if this stream is an Ar archive stream, false otherwise 272 */ 273 public static boolean matches(byte[] signature, int length) { 274 // 3c21 7261 6863 0a3e 275 276 if (length < 8) { 277 return false; 278 } 279 if (signature[0] != 0x21) { 280 return false; 281 } 282 if (signature[1] != 0x3c) { 283 return false; 284 } 285 if (signature[2] != 0x61) { 286 return false; 287 } 288 if (signature[3] != 0x72) { 289 return false; 290 } 291 if (signature[4] != 0x63) { 292 return false; 293 } 294 if (signature[5] != 0x68) { 295 return false; 296 } 297 if (signature[6] != 0x3e) { 298 return false; 299 } 300 if (signature[7] != 0x0a) { 301 return false; 302 } 303 304 return true; 305 } 306 307 static final String BSD_LONGNAME_PREFIX = "#1/"; 308 private static final int BSD_LONGNAME_PREFIX_LEN = 309 BSD_LONGNAME_PREFIX.length(); 310 private static final String BSD_LONGNAME_PATTERN = 311 "^" + BSD_LONGNAME_PREFIX + "\\d+"; 312 313 /** 314 * Does the name look like it is a long name (or a name containing 315 * spaces) as encoded by BSD ar? 316 * 317 * <p>From the FreeBSD ar(5) man page:</p> 318 * <pre> 319 * BSD In the BSD variant, names that are shorter than 16 320 * characters and without embedded spaces are stored 321 * directly in this field. If a name has an embedded 322 * space, or if it is longer than 16 characters, then 323 * the string "#1/" followed by the decimal represen- 324 * tation of the length of the file name is placed in 325 * this field. The actual file name is stored immedi- 326 * ately after the archive header. The content of the 327 * archive member follows the file name. The ar_size 328 * field of the header (see below) will then hold the 329 * sum of the size of the file name and the size of 330 * the member. 331 * </pre> 332 * 333 * @since Apache Commons Compress 1.3 334 */ 335 private static boolean isBSDLongName(String name) { 336 return name != null && name.matches(BSD_LONGNAME_PATTERN); 337 } 338 339 /** 340 * Reads the real name from the current stream assuming the very 341 * first bytes to be read are the real file name. 342 * 343 * @see #isBSDLongName 344 * 345 * @since Apache Commons Compress 1.3 346 */ 347 private String getBSDLongName(String bsdLongName) throws IOException { 348 int nameLen = 349 Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN)); 350 byte[] name = new byte[nameLen]; 351 int read = 0, readNow = 0; 352 while ((readNow = input.read(name, read, nameLen - read)) >= 0) { 353 read += readNow; 354 count(readNow); 355 if (read == nameLen) { 356 break; 357 } 358 } 359 if (read != nameLen) { 360 throw new EOFException(); 361 } 362 return ArchiveUtils.toAsciiString(name); 363 } 364 365 private static final String GNU_STRING_TABLE_NAME = "//"; 366 367 /** 368 * Is this the name of the "Archive String Table" as used by 369 * SVR4/GNU to store long file names? 370 * 371 * <p>GNU ar stores multiple extended filenames in the data section 372 * of a file with the name "//", this record is referred to by 373 * future headers.</p> 374 * 375 * <p>A header references an extended filename by storing a "/" 376 * followed by a decimal offset to the start of the filename in 377 * the extended filename data section.</p> 378 * 379 * <p>The format of the "//" file itself is simply a list of the 380 * long filenames, each separated by one or more LF 381 * characters. Note that the decimal offsets are number of 382 * characters, not line or string number within the "//" file.</p> 383 */ 384 private static boolean isGNUStringTable(String name) { 385 return GNU_STRING_TABLE_NAME.equals(name); 386 } 387 388 /** 389 * Reads the GNU archive String Table. 390 * 391 * @see #isGNUStringTable 392 */ 393 private ArArchiveEntry readGNUStringTable(byte[] length) throws IOException { 394 int bufflen = asInt(length); // Assume length will fit in an int 395 namebuffer = new byte[bufflen]; 396 int read = read(namebuffer, 0, bufflen); 397 if (read != bufflen){ 398 throw new IOException("Failed to read complete // record: expected=" 399 + bufflen + " read=" + read); 400 } 401 return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen); 402 } 403 404 private static final String GNU_LONGNAME_PATTERN = "^/\\d+"; 405 406 /** 407 * Does the name look like it is a long name (or a name containing 408 * spaces) as encoded by SVR4/GNU ar? 409 * 410 * @see #isGNUStringTable 411 */ 412 private boolean isGNULongName(String name) { 413 return name != null && name.matches(GNU_LONGNAME_PATTERN); 414 } 415 }