001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025
026import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
027import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
028import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
029import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
030import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
031import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
032import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
033import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
034import org.apache.commons.compress.archivers.sevenz.SevenZFile;
035import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
036import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
037import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
038import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
039import org.apache.commons.compress.utils.IOUtils;
040
041/**
042 * Factory to create Archive[In|Out]putStreams from names or the first bytes of
043 * the InputStream. In order to add other implementations, you should extend
044 * ArchiveStreamFactory and override the appropriate methods (and call their
045 * implementation from super of course).
046 * 
047 * Compressing a ZIP-File:
048 * 
049 * <pre>
050 * final OutputStream out = new FileOutputStream(output); 
051 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
052 * 
053 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
054 * IOUtils.copy(new FileInputStream(file1), os);
055 * os.closeArchiveEntry();
056 *
057 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
058 * IOUtils.copy(new FileInputStream(file2), os);
059 * os.closeArchiveEntry();
060 * os.close();
061 * </pre>
062 * 
063 * Decompressing a ZIP-File:
064 * 
065 * <pre>
066 * final InputStream is = new FileInputStream(input); 
067 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
068 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry();
069 * OutputStream out = new FileOutputStream(new File(dir, entry.getName()));
070 * IOUtils.copy(in, out);
071 * out.close();
072 * in.close();
073 * </pre>
074 * @Immutable provided that the deprecated method setEntryEncoding is not used.
075 * @ThreadSafe even if the deprecated method setEntryEncoding is used
076 */
077public class ArchiveStreamFactory {
078
079    /**
080     * Constant (value {@value}) used to identify the AR archive format.
081     * @since 1.1
082     */
083    public static final String AR = "ar";
084    /**
085     * Constant (value {@value}) used to identify the ARJ archive format.
086     * Not supported as an output stream type.
087     * @since 1.6
088     */
089    public static final String ARJ = "arj";
090    /**
091     * Constant (value {@value}) used to identify the CPIO archive format.
092     * @since 1.1
093     */
094    public static final String CPIO = "cpio";
095    /**
096     * Constant (value {@value}) used to identify the Unix DUMP archive format.
097     * Not supported as an output stream type.
098     * @since 1.3
099     */
100    public static final String DUMP = "dump";
101    /**
102     * Constant (value {@value}) used to identify the JAR archive format.
103     * @since 1.1
104     */
105    public static final String JAR = "jar";
106    /**
107     * Constant used to identify the TAR archive format.
108     * @since 1.1
109     */
110    public static final String TAR = "tar";
111    /**
112     * Constant (value {@value}) used to identify the ZIP archive format.
113     * @since 1.1
114     */
115    public static final String ZIP = "zip";
116    /**
117     * Constant (value {@value}) used to identify the 7z archive format.
118     * @since 1.8
119     */
120    public static final String SEVEN_Z = "7z";
121
122    /**
123     * Entry encoding, null for the platform default.
124     */
125    private final String encoding;
126
127    /**
128     * Entry encoding, null for the default.
129     */
130    private volatile String entryEncoding = null;
131
132    /**
133     * Create an instance using the platform default encoding.
134     */
135    public ArchiveStreamFactory() {
136        this(null);
137    }
138
139    /**
140     * Create an instance using the specified encoding.
141     *
142     * @param encoding the encoding to be used.
143     *
144     * @since 1.10
145     */
146    public ArchiveStreamFactory(final String encoding) {
147        super();
148        this.encoding = encoding;
149        // Also set the original field so can continue to use it.
150        this.entryEncoding = encoding;
151    }
152
153    /**
154     * Returns the encoding to use for arj, jar, zip, dump, cpio and tar
155     * files, or null for the archiver default.
156     *
157     * @return entry encoding, or null for the archiver default
158     * @since 1.5
159     */
160    public String getEntryEncoding() {
161        return entryEncoding;
162    }
163
164    /**
165     * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default.
166     * 
167     * @param entryEncoding the entry encoding, null uses the archiver default.
168     * @since 1.5
169     * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
170     * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} 
171     * was used to specify the factory encoding.
172     */
173    @Deprecated
174    public void setEntryEncoding(final String entryEncoding) {
175        // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway
176        if (encoding != null) {
177            throw new IllegalStateException("Cannot overide encoding set by the constructor");
178        }
179        this.entryEncoding = entryEncoding;
180    }
181
182    /**
183     * Create an archive input stream from an archiver name and an input stream.
184     * 
185     * @param archiverName the archive name,
186     * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z}
187     * @param in the input stream
188     * @return the archive input stream
189     * @throws ArchiveException if the archiver name is not known
190     * @throws StreamingNotSupportedException if the format cannot be
191     * read from a stream
192     * @throws IllegalArgumentException if the archiver name or stream is null
193     */
194    public ArchiveInputStream createArchiveInputStream(
195            final String archiverName, final InputStream in)
196            throws ArchiveException {
197
198        if (archiverName == null) {
199            throw new IllegalArgumentException("Archivername must not be null.");
200        }
201
202        if (in == null) {
203            throw new IllegalArgumentException("InputStream must not be null.");
204        }
205
206        if (AR.equalsIgnoreCase(archiverName)) {
207            return new ArArchiveInputStream(in);
208        }
209        if (ARJ.equalsIgnoreCase(archiverName)) {
210            if (entryEncoding != null) {
211                return new ArjArchiveInputStream(in, entryEncoding);
212            }
213            return new ArjArchiveInputStream(in);
214        }
215        if (ZIP.equalsIgnoreCase(archiverName)) {
216            if (entryEncoding != null) {
217                return new ZipArchiveInputStream(in, entryEncoding);
218            }
219            return new ZipArchiveInputStream(in);
220        }
221        if (TAR.equalsIgnoreCase(archiverName)) {
222            if (entryEncoding != null) {
223                return new TarArchiveInputStream(in, entryEncoding);
224            }
225            return new TarArchiveInputStream(in);
226        }
227        if (JAR.equalsIgnoreCase(archiverName)) {
228            if (entryEncoding != null) {
229                return new JarArchiveInputStream(in, entryEncoding);
230            }
231            return new JarArchiveInputStream(in);
232        }
233        if (CPIO.equalsIgnoreCase(archiverName)) {
234            if (entryEncoding != null) {
235                return new CpioArchiveInputStream(in, entryEncoding);
236            }
237            return new CpioArchiveInputStream(in);
238        }
239        if (DUMP.equalsIgnoreCase(archiverName)) {
240            if (entryEncoding != null) {
241                return new DumpArchiveInputStream(in, entryEncoding);
242            }
243            return new DumpArchiveInputStream(in);
244        }
245        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
246            throw new StreamingNotSupportedException(SEVEN_Z);
247        }
248
249        throw new ArchiveException("Archiver: " + archiverName + " not found.");
250    }
251
252    /**
253     * Create an archive output stream from an archiver name and an output stream.
254     * 
255     * @param archiverName the archive name,
256     * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} 
257     * @param out the output stream
258     * @return the archive output stream
259     * @throws ArchiveException if the archiver name is not known
260     * @throws StreamingNotSupportedException if the format cannot be
261     * written to a stream
262     * @throws IllegalArgumentException if the archiver name or stream is null
263     */
264    public ArchiveOutputStream createArchiveOutputStream(
265            final String archiverName, final OutputStream out)
266            throws ArchiveException {
267        if (archiverName == null) {
268            throw new IllegalArgumentException("Archivername must not be null.");
269        }
270        if (out == null) {
271            throw new IllegalArgumentException("OutputStream must not be null.");
272        }
273
274        if (AR.equalsIgnoreCase(archiverName)) {
275            return new ArArchiveOutputStream(out);
276        }
277        if (ZIP.equalsIgnoreCase(archiverName)) {
278            final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
279            if (entryEncoding != null) {
280                zip.setEncoding(entryEncoding);
281            }
282            return zip;
283        }
284        if (TAR.equalsIgnoreCase(archiverName)) {
285            if (entryEncoding != null) {
286                return new TarArchiveOutputStream(out, entryEncoding);
287            }
288            return new TarArchiveOutputStream(out);
289        }
290        if (JAR.equalsIgnoreCase(archiverName)) {
291            if (entryEncoding != null) {
292                return new JarArchiveOutputStream(out, entryEncoding);
293            }
294            return new JarArchiveOutputStream(out);
295        }
296        if (CPIO.equalsIgnoreCase(archiverName)) {
297            if (entryEncoding != null) {
298                return new CpioArchiveOutputStream(out, entryEncoding);
299            }
300            return new CpioArchiveOutputStream(out);
301        }
302        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
303            throw new StreamingNotSupportedException(SEVEN_Z);
304        }
305        throw new ArchiveException("Archiver: " + archiverName + " not found.");
306    }
307
308    /**
309     * Create an archive input stream from an input stream, autodetecting
310     * the archive type from the first few bytes of the stream. The InputStream
311     * must support marks, like BufferedInputStream.
312     * 
313     * @param in the input stream
314     * @return the archive input stream
315     * @throws ArchiveException if the archiver name is not known
316     * @throws StreamingNotSupportedException if the format cannot be
317     * read from a stream
318     * @throws IllegalArgumentException if the stream is null or does not support mark
319     */
320    public ArchiveInputStream createArchiveInputStream(final InputStream in)
321            throws ArchiveException {
322        if (in == null) {
323            throw new IllegalArgumentException("Stream must not be null.");
324        }
325
326        if (!in.markSupported()) {
327            throw new IllegalArgumentException("Mark is not supported.");
328        }
329
330        final byte[] signature = new byte[12];
331        in.mark(signature.length);
332        try {
333            int signatureLength = IOUtils.readFully(in, signature);
334            in.reset();
335            if (ZipArchiveInputStream.matches(signature, signatureLength)) {
336                return createArchiveInputStream(ZIP, in);
337            } else if (JarArchiveInputStream.matches(signature, signatureLength)) {
338                return createArchiveInputStream(JAR, in);
339            } else if (ArArchiveInputStream.matches(signature, signatureLength)) {
340                return createArchiveInputStream(AR, in);
341            } else if (CpioArchiveInputStream.matches(signature, signatureLength)) {
342                return createArchiveInputStream(CPIO, in);
343            } else if (ArjArchiveInputStream.matches(signature, signatureLength)) {
344                return createArchiveInputStream(ARJ, in);
345            } else if (SevenZFile.matches(signature, signatureLength)) {
346                throw new StreamingNotSupportedException(SEVEN_Z);
347            }
348
349            // Dump needs a bigger buffer to check the signature;
350            final byte[] dumpsig = new byte[32];
351            in.mark(dumpsig.length);
352            signatureLength = IOUtils.readFully(in, dumpsig);
353            in.reset();
354            if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
355                return createArchiveInputStream(DUMP, in);
356            }
357
358            // Tar needs an even bigger buffer to check the signature; read the first block
359            final byte[] tarheader = new byte[512];
360            in.mark(tarheader.length);
361            signatureLength = IOUtils.readFully(in, tarheader);
362            in.reset();
363            if (TarArchiveInputStream.matches(tarheader, signatureLength)) {
364                return createArchiveInputStream(TAR, in);
365            }
366            // COMPRESS-117 - improve auto-recognition
367            if (signatureLength >= 512) {
368                TarArchiveInputStream tais = null;
369                try {
370                    tais = new TarArchiveInputStream(new ByteArrayInputStream(tarheader));
371                    // COMPRESS-191 - verify the header checksum
372                    if (tais.getNextTarEntry().isCheckSumOK()) {
373                        return createArchiveInputStream(TAR, in);
374                    }
375                } catch (final Exception e) { // NOPMD
376                    // can generate IllegalArgumentException as well
377                    // as IOException
378                    // autodetection, simply not a TAR
379                    // ignored
380                } finally {
381                    IOUtils.closeQuietly(tais);
382                }
383            }
384        } catch (final IOException e) {
385            throw new ArchiveException("Could not use reset and mark operations.", e);
386        }
387
388        throw new ArchiveException("No Archiver found for the stream signature");
389    }
390
391}