001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.util.BitSet;
020import java.util.List;
021
022/**
023 * Encoder for unsafe URI characters.
024 * <p/>
025 * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article.
026 */
027public final class UnsafeUriCharactersEncoder {
028    private static BitSet unsafeCharactersFastParser;
029    private static BitSet unsafeCharactersRfc1738;
030    private static BitSet unsafeCharactersHttp;
031    private static final char[] HEX_DIGITS = {
032            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
033            'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f' };
034
035    static {
036        unsafeCharactersFastParser = new BitSet(14);
037        unsafeCharactersFastParser.set(' ');
038        unsafeCharactersFastParser.set('"');
039        unsafeCharactersFastParser.set('<');
040        unsafeCharactersFastParser.set('>');
041        unsafeCharactersFastParser.set('%');
042        unsafeCharactersFastParser.set('{');
043        unsafeCharactersFastParser.set('}');
044        unsafeCharactersFastParser.set('|');
045        unsafeCharactersFastParser.set('\\');
046        unsafeCharactersFastParser.set('^');
047        unsafeCharactersFastParser.set('~');
048        unsafeCharactersFastParser.set('[');
049        unsafeCharactersFastParser.set(']');
050        unsafeCharactersFastParser.set('`');
051        // we allow # as a safe when using the fast parser as its used for
052        // looking up beans in the registry (foo=#myBar)
053    }
054
055    static {
056        unsafeCharactersRfc1738 = new BitSet(15);
057        unsafeCharactersRfc1738.set(' ');
058        unsafeCharactersRfc1738.set('"');
059        unsafeCharactersRfc1738.set('<');
060        unsafeCharactersRfc1738.set('>');
061        unsafeCharactersRfc1738.set('#');
062        unsafeCharactersRfc1738.set('%');
063        unsafeCharactersRfc1738.set('{');
064        unsafeCharactersRfc1738.set('}');
065        unsafeCharactersRfc1738.set('|');
066        unsafeCharactersRfc1738.set('\\');
067        unsafeCharactersRfc1738.set('^');
068        unsafeCharactersRfc1738.set('~');
069        unsafeCharactersRfc1738.set('[');
070        unsafeCharactersRfc1738.set(']');
071        unsafeCharactersRfc1738.set('`');
072    }
073
074    static {
075        unsafeCharactersHttp = new BitSet(13);
076        unsafeCharactersHttp.set(' ');
077        unsafeCharactersHttp.set('"');
078        unsafeCharactersHttp.set('<');
079        unsafeCharactersHttp.set('>');
080        unsafeCharactersHttp.set('#');
081        unsafeCharactersHttp.set('%');
082        unsafeCharactersHttp.set('{');
083        unsafeCharactersHttp.set('}');
084        unsafeCharactersHttp.set('|');
085        unsafeCharactersHttp.set('\\');
086        unsafeCharactersHttp.set('^');
087        unsafeCharactersHttp.set('~');
088        unsafeCharactersHttp.set('`');
089    }
090
091    private UnsafeUriCharactersEncoder() {
092        // util class
093    }
094
095    public static boolean isSafeFastParser(char ch) {
096        return !unsafeCharactersFastParser.get(ch);
097    }
098
099    public static String encode(String s) {
100        return encode(s, unsafeCharactersRfc1738);
101    }
102
103    public static String encodeHttpURI(String s) {
104        return encode(s, unsafeCharactersHttp);
105    }
106
107    public static String encode(String s, BitSet unsafeCharacters) {
108        return encode(s, unsafeCharacters, false);
109    }
110
111    public static String encode(String s, boolean checkRaw) {
112        return encode(s, unsafeCharactersRfc1738, checkRaw);
113    }
114
115    public static String encodeHttpURI(String s, boolean checkRaw) {
116        return encode(s, unsafeCharactersHttp, checkRaw);
117    }
118
119    // Just skip the encode for isRAW part
120    public static String encode(String s, BitSet unsafeCharacters, boolean checkRaw) {
121        if (s == null) {
122            return null;
123        }
124        int len = s.length();
125        if (len == 0) {
126            return s;
127        }
128
129        // first check whether we actually need to encode
130        boolean safe = true;
131        for (int i = 0; i < len; i++) {
132            char ch = s.charAt(i);
133            // just deal with the ascii character
134            if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) {
135                safe = false;
136                break;
137            }
138        }
139        if (safe) {
140            return s;
141        }
142
143        List<Pair<Integer>> rawPairs = null;
144        if (checkRaw) {
145            rawPairs = URISupport.scanRaw(s);
146        }
147
148        // add a bit of extra space as initial capacity
149        int initial = len + 8;
150
151        // okay there are some unsafe characters so we do need to encode
152        // see details at: http://en.wikipedia.org/wiki/Url_encode
153        StringBuilder sb = new StringBuilder(initial);
154        for (int i = 0; i < len; i++) {
155            char ch = s.charAt(i);
156            if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) {
157                // special for % sign as it may be a decimal encoded value
158                if (ch == '%') {
159                    char next = i + 1 < len ? s.charAt(i + 1) : ' ';
160                    char next2 = i + 2 < len ? s.charAt(i + 2) : ' ';
161
162                    if (isHexDigit(next) && isHexDigit(next2) && !URISupport.isRaw(i, rawPairs)) {
163                        // its already encoded (decimal encoded) so just append as is
164                        sb.append(ch);
165                    } else {
166                        // must escape then, as its an unsafe character
167                        appendEscape(sb, (byte) ch);
168                    }
169                } else {
170                    // must escape then, as its an unsafe character
171                    appendEscape(sb, (byte) ch);
172                }
173            } else {
174                sb.append(ch);
175            }
176        }
177        return sb.toString();
178    }
179
180    private static void appendEscape(StringBuilder sb, byte b) {
181        sb.append('%');
182        sb.append(HEX_DIGITS[(b >> 4) & 0x0f]);
183        sb.append(HEX_DIGITS[(b >> 0) & 0x0f]);
184    }
185
186    private static boolean isHexDigit(char ch) {
187        // 0..9 A..F a..f
188        return ch >= 48 && ch <= 57 || ch >= 65 && ch <= 70 || ch >= 97 && ch <= 102;
189    }
190
191}