001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *   http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 */
018
019package org.apache.commons.compress.utils;
020
021import java.nio.charset.Charset;
022
023/**
024 * Charsets required of every implementation of the Java platform.
025 *
026 * From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
027 * charsets</a>:
028 * <p>
029 * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the
030 * release documentation for your implementation to see if any other encodings are supported. Consult the release
031 * documentation for your implementation to see if any other encodings are supported. </cite>
032 * </p>
033 *
034 * <dl>
035 * <dt><code>US-ASCII</code></dt>
036 * <dd>Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</dd>
037 * <dt><code>ISO-8859-1</code></dt>
038 * <dd>ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</dd>
039 * <dt><code>UTF-8</code></dt>
040 * <dd>Eight-bit Unicode Transformation Format.</dd>
041 * <dt><code>UTF-16BE</code></dt>
042 * <dd>Sixteen-bit Unicode Transformation Format, big-endian byte order.</dd>
043 * <dt><code>UTF-16LE</code></dt>
044 * <dd>Sixteen-bit Unicode Transformation Format, little-endian byte order.</dd>
045 * <dt><code>UTF-16</code></dt>
046 * <dd>Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order
047 * accepted on input, big-endian used on output.)</dd>
048 * </dl>
049 *
050 * <p>This class best belongs in the Commons Lang or IO project. Even if a similar class is defined in another Commons
051 * component, it is not foreseen that Commons Compress would be made to depend on another Commons component.</p>
052 *
053 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
054 * @since 1.4
055 * @version $Id$
056 */
057public class Charsets {
058
059    //
060    // This class should only contain Charset instances for required encodings. This guarantees that it will load correctly and
061    // without delay on all Java platforms.
062    //
063
064    /**
065     * Returns the given Charset or the default Charset if the given Charset is null.
066     *
067     * @param charset
068     *            A charset or null.
069     * @return the given Charset or the default Charset if the given Charset is null
070     */
071    public static Charset toCharset(final Charset charset) {
072        return charset == null ? Charset.defaultCharset() : charset;
073    }
074
075    /**
076     * Returns a Charset for the named charset. If the name is null, return the default Charset.
077     *
078     * @param charset
079     *            The name of the requested charset, may be null.
080     * @return a Charset for the named charset
081     * @throws java.nio.charset.UnsupportedCharsetException
082     *             If the named charset is unavailable
083     * @throws java.nio.charset.IllegalCharsetNameException
084     *             If the given charset name is illegal
085     */
086    public static Charset toCharset(final String charset) {
087        return charset == null ? Charset.defaultCharset() : Charset.forName(charset);
088    }
089
090    /**
091     * CharsetNamesISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.
092     * <p>
093     * Every implementation of the Java platform is required to support this character encoding.
094     * </p>
095     *
096     * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
097     */
098    public static final Charset ISO_8859_1 = Charset.forName(CharsetNames.ISO_8859_1);
099
100    /**
101     * <p>
102     * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set.
103     * </p>
104     * <p>
105     * Every implementation of the Java platform is required to support this character encoding.
106     * </p>
107     *
108     * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
109     */
110    public static final Charset US_ASCII = Charset.forName(CharsetNames.US_ASCII);
111
112    /**
113     * <p>
114     * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark
115     * (either order accepted on input, big-endian used on output)
116     * </p>
117     * <p>
118     * Every implementation of the Java platform is required to support this character encoding.
119     * </p>
120     *
121     * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
122     */
123    public static final Charset UTF_16 = Charset.forName(CharsetNames.UTF_16);
124
125    /**
126     * <p>
127     * Sixteen-bit Unicode Transformation Format, big-endian byte order.
128     * </p>
129     * <p>
130     * Every implementation of the Java platform is required to support this character encoding.
131     * </p>
132     *
133     * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
134     */
135    public static final Charset UTF_16BE = Charset.forName(CharsetNames.UTF_16BE);
136
137    /**
138     * <p>
139     * Sixteen-bit Unicode Transformation Format, little-endian byte order.
140     * </p>
141     * <p>
142     * Every implementation of the Java platform is required to support this character encoding.
143     * </p>
144     *
145     * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
146     */
147    public static final Charset UTF_16LE = Charset.forName(CharsetNames.UTF_16LE);
148
149    /**
150     * <p>
151     * Eight-bit Unicode Transformation Format.
152     * </p>
153     * <p>
154     * Every implementation of the Java platform is required to support this character encoding.
155     * </p>
156     *
157     * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
158     */
159    public static final Charset UTF_8 = Charset.forName(CharsetNames.UTF_8);
160}