001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.OutputStream; 025 026import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; 027import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; 028import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; 029import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; 030import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; 031import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; 032import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; 033import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream; 034import org.apache.commons.compress.archivers.sevenz.SevenZFile; 035import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; 036import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 037import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; 038import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; 039import org.apache.commons.compress.utils.IOUtils; 040 041/** 042 * Factory to create Archive[In|Out]putStreams from names or the first bytes of 043 * the InputStream. In order to add other implementations, you should extend 044 * ArchiveStreamFactory and override the appropriate methods (and call their 045 * implementation from super of course). 046 * 047 * Compressing a ZIP-File: 048 * 049 * <pre> 050 * final OutputStream out = new FileOutputStream(output); 051 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out); 052 * 053 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); 054 * IOUtils.copy(new FileInputStream(file1), os); 055 * os.closeArchiveEntry(); 056 * 057 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); 058 * IOUtils.copy(new FileInputStream(file2), os); 059 * os.closeArchiveEntry(); 060 * os.close(); 061 * </pre> 062 * 063 * Decompressing a ZIP-File: 064 * 065 * <pre> 066 * final InputStream is = new FileInputStream(input); 067 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is); 068 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry(); 069 * OutputStream out = new FileOutputStream(new File(dir, entry.getName())); 070 * IOUtils.copy(in, out); 071 * out.close(); 072 * in.close(); 073 * </pre> 074 * @Immutable provided that the deprecated method setEntryEncoding is not used. 075 * @ThreadSafe even if the deprecated method setEntryEncoding is used 076 */ 077public class ArchiveStreamFactory { 078 079 /** 080 * Constant (value {@value}) used to identify the AR archive format. 081 * @since 1.1 082 */ 083 public static final String AR = "ar"; 084 /** 085 * Constant (value {@value}) used to identify the ARJ archive format. 086 * Not supported as an output stream type. 087 * @since 1.6 088 */ 089 public static final String ARJ = "arj"; 090 /** 091 * Constant (value {@value}) used to identify the CPIO archive format. 092 * @since 1.1 093 */ 094 public static final String CPIO = "cpio"; 095 /** 096 * Constant (value {@value}) used to identify the Unix DUMP archive format. 097 * Not supported as an output stream type. 098 * @since 1.3 099 */ 100 public static final String DUMP = "dump"; 101 /** 102 * Constant (value {@value}) used to identify the JAR archive format. 103 * @since 1.1 104 */ 105 public static final String JAR = "jar"; 106 /** 107 * Constant used to identify the TAR archive format. 108 * @since 1.1 109 */ 110 public static final String TAR = "tar"; 111 /** 112 * Constant (value {@value}) used to identify the ZIP archive format. 113 * @since 1.1 114 */ 115 public static final String ZIP = "zip"; 116 /** 117 * Constant (value {@value}) used to identify the 7z archive format. 118 * @since 1.8 119 */ 120 public static final String SEVEN_Z = "7z"; 121 122 /** 123 * Entry encoding, null for the platform default. 124 */ 125 private final String encoding; 126 127 /** 128 * Entry encoding, null for the default. 129 */ 130 private volatile String entryEncoding = null; 131 132 /** 133 * Create an instance using the platform default encoding. 134 */ 135 public ArchiveStreamFactory() { 136 this(null); 137 } 138 139 /** 140 * Create an instance using the specified encoding. 141 * 142 * @param encoding the encoding to be used. 143 * 144 * @since 1.10 145 */ 146 public ArchiveStreamFactory(final String encoding) { 147 super(); 148 this.encoding = encoding; 149 // Also set the original field so can continue to use it. 150 this.entryEncoding = encoding; 151 } 152 153 /** 154 * Returns the encoding to use for arj, jar, zip, dump, cpio and tar 155 * files, or null for the archiver default. 156 * 157 * @return entry encoding, or null for the archiver default 158 * @since 1.5 159 */ 160 public String getEntryEncoding() { 161 return entryEncoding; 162 } 163 164 /** 165 * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default. 166 * 167 * @param entryEncoding the entry encoding, null uses the archiver default. 168 * @since 1.5 169 * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding 170 * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} 171 * was used to specify the factory encoding. 172 */ 173 @Deprecated 174 public void setEntryEncoding(final String entryEncoding) { 175 // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway 176 if (encoding != null) { 177 throw new IllegalStateException("Cannot overide encoding set by the constructor"); 178 } 179 this.entryEncoding = entryEncoding; 180 } 181 182 /** 183 * Create an archive input stream from an archiver name and an input stream. 184 * 185 * @param archiverName the archive name, 186 * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z} 187 * @param in the input stream 188 * @return the archive input stream 189 * @throws ArchiveException if the archiver name is not known 190 * @throws StreamingNotSupportedException if the format cannot be 191 * read from a stream 192 * @throws IllegalArgumentException if the archiver name or stream is null 193 */ 194 public ArchiveInputStream createArchiveInputStream( 195 final String archiverName, final InputStream in) 196 throws ArchiveException { 197 198 if (archiverName == null) { 199 throw new IllegalArgumentException("Archivername must not be null."); 200 } 201 202 if (in == null) { 203 throw new IllegalArgumentException("InputStream must not be null."); 204 } 205 206 if (AR.equalsIgnoreCase(archiverName)) { 207 return new ArArchiveInputStream(in); 208 } 209 if (ARJ.equalsIgnoreCase(archiverName)) { 210 if (entryEncoding != null) { 211 return new ArjArchiveInputStream(in, entryEncoding); 212 } 213 return new ArjArchiveInputStream(in); 214 } 215 if (ZIP.equalsIgnoreCase(archiverName)) { 216 if (entryEncoding != null) { 217 return new ZipArchiveInputStream(in, entryEncoding); 218 } 219 return new ZipArchiveInputStream(in); 220 } 221 if (TAR.equalsIgnoreCase(archiverName)) { 222 if (entryEncoding != null) { 223 return new TarArchiveInputStream(in, entryEncoding); 224 } 225 return new TarArchiveInputStream(in); 226 } 227 if (JAR.equalsIgnoreCase(archiverName)) { 228 if (entryEncoding != null) { 229 return new JarArchiveInputStream(in, entryEncoding); 230 } 231 return new JarArchiveInputStream(in); 232 } 233 if (CPIO.equalsIgnoreCase(archiverName)) { 234 if (entryEncoding != null) { 235 return new CpioArchiveInputStream(in, entryEncoding); 236 } 237 return new CpioArchiveInputStream(in); 238 } 239 if (DUMP.equalsIgnoreCase(archiverName)) { 240 if (entryEncoding != null) { 241 return new DumpArchiveInputStream(in, entryEncoding); 242 } 243 return new DumpArchiveInputStream(in); 244 } 245 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 246 throw new StreamingNotSupportedException(SEVEN_Z); 247 } 248 249 throw new ArchiveException("Archiver: " + archiverName + " not found."); 250 } 251 252 /** 253 * Create an archive output stream from an archiver name and an output stream. 254 * 255 * @param archiverName the archive name, 256 * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} 257 * @param out the output stream 258 * @return the archive output stream 259 * @throws ArchiveException if the archiver name is not known 260 * @throws StreamingNotSupportedException if the format cannot be 261 * written to a stream 262 * @throws IllegalArgumentException if the archiver name or stream is null 263 */ 264 public ArchiveOutputStream createArchiveOutputStream( 265 final String archiverName, final OutputStream out) 266 throws ArchiveException { 267 if (archiverName == null) { 268 throw new IllegalArgumentException("Archivername must not be null."); 269 } 270 if (out == null) { 271 throw new IllegalArgumentException("OutputStream must not be null."); 272 } 273 274 if (AR.equalsIgnoreCase(archiverName)) { 275 return new ArArchiveOutputStream(out); 276 } 277 if (ZIP.equalsIgnoreCase(archiverName)) { 278 final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out); 279 if (entryEncoding != null) { 280 zip.setEncoding(entryEncoding); 281 } 282 return zip; 283 } 284 if (TAR.equalsIgnoreCase(archiverName)) { 285 if (entryEncoding != null) { 286 return new TarArchiveOutputStream(out, entryEncoding); 287 } 288 return new TarArchiveOutputStream(out); 289 } 290 if (JAR.equalsIgnoreCase(archiverName)) { 291 if (entryEncoding != null) { 292 return new JarArchiveOutputStream(out, entryEncoding); 293 } 294 return new JarArchiveOutputStream(out); 295 } 296 if (CPIO.equalsIgnoreCase(archiverName)) { 297 if (entryEncoding != null) { 298 return new CpioArchiveOutputStream(out, entryEncoding); 299 } 300 return new CpioArchiveOutputStream(out); 301 } 302 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 303 throw new StreamingNotSupportedException(SEVEN_Z); 304 } 305 throw new ArchiveException("Archiver: " + archiverName + " not found."); 306 } 307 308 /** 309 * Create an archive input stream from an input stream, autodetecting 310 * the archive type from the first few bytes of the stream. The InputStream 311 * must support marks, like BufferedInputStream. 312 * 313 * @param in the input stream 314 * @return the archive input stream 315 * @throws ArchiveException if the archiver name is not known 316 * @throws StreamingNotSupportedException if the format cannot be 317 * read from a stream 318 * @throws IllegalArgumentException if the stream is null or does not support mark 319 */ 320 public ArchiveInputStream createArchiveInputStream(final InputStream in) 321 throws ArchiveException { 322 if (in == null) { 323 throw new IllegalArgumentException("Stream must not be null."); 324 } 325 326 if (!in.markSupported()) { 327 throw new IllegalArgumentException("Mark is not supported."); 328 } 329 330 final byte[] signature = new byte[12]; 331 in.mark(signature.length); 332 try { 333 int signatureLength = IOUtils.readFully(in, signature); 334 in.reset(); 335 if (ZipArchiveInputStream.matches(signature, signatureLength)) { 336 return createArchiveInputStream(ZIP, in); 337 } else if (JarArchiveInputStream.matches(signature, signatureLength)) { 338 return createArchiveInputStream(JAR, in); 339 } else if (ArArchiveInputStream.matches(signature, signatureLength)) { 340 return createArchiveInputStream(AR, in); 341 } else if (CpioArchiveInputStream.matches(signature, signatureLength)) { 342 return createArchiveInputStream(CPIO, in); 343 } else if (ArjArchiveInputStream.matches(signature, signatureLength)) { 344 return createArchiveInputStream(ARJ, in); 345 } else if (SevenZFile.matches(signature, signatureLength)) { 346 throw new StreamingNotSupportedException(SEVEN_Z); 347 } 348 349 // Dump needs a bigger buffer to check the signature; 350 final byte[] dumpsig = new byte[32]; 351 in.mark(dumpsig.length); 352 signatureLength = IOUtils.readFully(in, dumpsig); 353 in.reset(); 354 if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) { 355 return createArchiveInputStream(DUMP, in); 356 } 357 358 // Tar needs an even bigger buffer to check the signature; read the first block 359 final byte[] tarheader = new byte[512]; 360 in.mark(tarheader.length); 361 signatureLength = IOUtils.readFully(in, tarheader); 362 in.reset(); 363 if (TarArchiveInputStream.matches(tarheader, signatureLength)) { 364 return createArchiveInputStream(TAR, in); 365 } 366 // COMPRESS-117 - improve auto-recognition 367 if (signatureLength >= 512) { 368 TarArchiveInputStream tais = null; 369 try { 370 tais = new TarArchiveInputStream(new ByteArrayInputStream(tarheader)); 371 // COMPRESS-191 - verify the header checksum 372 if (tais.getNextTarEntry().isCheckSumOK()) { 373 return createArchiveInputStream(TAR, in); 374 } 375 } catch (final Exception e) { // NOPMD 376 // can generate IllegalArgumentException as well 377 // as IOException 378 // autodetection, simply not a TAR 379 // ignored 380 } finally { 381 IOUtils.closeQuietly(tais); 382 } 383 } 384 } catch (final IOException e) { 385 throw new ArchiveException("Could not use reset and mark operations.", e); 386 } 387 388 throw new ArchiveException("No Archiver found for the stream signature"); 389 } 390 391}