001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.util.ArrayList;
020import java.util.List;
021import java.util.StringTokenizer;
022
023/**
024 * PathMatcher implementation for Ant-style path patterns. Examples are provided below.
025 * <p>
026 * Part of this mapping code has been kindly borrowed from <a href="http://ant.apache.org">Apache Ant</a>
027 * and <a href="http://springframework.org">Spring Framework</a>.
028 * <p>
029 * The mapping matches URLs using the following rules:<br>
030 * <ul>
031 *   <li>? matches one character</li>
032 *   <li>* matches zero or more characters</li>
033 *   <li>** matches zero or more 'directories' in a path</li>
034 * </ul>
035 * <p>
036 * Some examples:<br>
037 * <ul>
038 *   <li><code>com/t?st.jsp</code> - matches <code>com/test.jsp</code> but also
039 *       <code>com/tast.jsp</code> or <code>com/txst.jsp</code>
040 *   </li>
041 *   <li><code>com/*.jsp</code> - matches all <code>.jsp</code> files in the
042 *       <code>com</code> directory
043 *   </li>
044 *   <li><code>com/&#42;&#42;/test.jsp</code> - matches all <code>test.jsp</code>
045 *       files underneath the <code>com</code> path
046 *   </li>
047 *   <li><code>org/springframework/&#42;&#42;/*.jsp</code> - matches all
048 *       <code>.jsp</code> files underneath the <code>org/springframework</code> path
049 *   </li>
050 *   <li><code>org/&#42;&#42;/servlet/bla.jsp</code> - matches
051 *       <code>org/springframework/servlet/bla.jsp</code> but also
052 *       <code>org/springframework/testing/servlet/bla.jsp</code> and
053 *       <code>org/servlet/bla.jsp</code>
054 *   </li>
055 * </ul>
056 */
057public class AntPathMatcher {
058
059    /** Default path separator: "/" */
060    public static final String DEFAULT_PATH_SEPARATOR = "/";
061
062    private String pathSeparator = DEFAULT_PATH_SEPARATOR;
063
064    /**
065     * Set the path separator to use for pattern parsing. Default is "/", as in
066     * Ant.
067     */
068    public void setPathSeparator(String pathSeparator) {
069        this.pathSeparator = pathSeparator != null ? pathSeparator : DEFAULT_PATH_SEPARATOR;
070    }
071
072    public boolean isPattern(String path) {
073        return path.indexOf('*') != -1 || path.indexOf('?') != -1;
074    }
075
076    public boolean match(String pattern, String path) {
077        return match(pattern, path, true);
078    }
079
080    public boolean matchStart(String pattern, String path) {
081        return matchStart(pattern, path, true);
082    }
083
084    public boolean match(String pattern, String path, boolean isCaseSensitive) {
085        return doMatch(pattern, path, true, isCaseSensitive);
086    }
087
088    public boolean matchStart(String pattern, String path, boolean isCaseSensitive) {
089        return doMatch(pattern, path, false, isCaseSensitive);
090    }
091
092    /**
093     * Actually match the given <code>path</code> against the given
094     * <code>pattern</code>.
095     * 
096     * @param pattern the pattern to match against
097     * @param path the path String to test
098     * @param fullMatch whether a full pattern match is required (else a pattern
099     *            match as far as the given base path goes is sufficient)
100     * @param isCaseSensitive Whether or not matching should be performed
101     *                        case sensitively.
102     * @return <code>true</code> if the supplied <code>path</code> matched,
103     *         <code>false</code> if it didn't
104     */
105    protected boolean doMatch(String pattern, String path, boolean fullMatch, boolean isCaseSensitive) {
106        if (path.startsWith(this.pathSeparator) != pattern.startsWith(this.pathSeparator)) {
107            return false;
108        }
109
110        String[] pattDirs = tokenizeToStringArray(pattern, this.pathSeparator);
111        String[] pathDirs = tokenizeToStringArray(path, this.pathSeparator);
112
113        int pattIdxStart = 0;
114        int pattIdxEnd = pattDirs.length - 1;
115        int pathIdxStart = 0;
116        int pathIdxEnd = pathDirs.length - 1;
117
118        // Match all elements up to the first **
119        while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
120            String patDir = pattDirs[pattIdxStart];
121            if ("**".equals(patDir)) {
122                break;
123            }
124            if (!matchStrings(patDir, pathDirs[pathIdxStart], isCaseSensitive)) {
125                return false;
126            }
127            pattIdxStart++;
128            pathIdxStart++;
129        }
130
131        if (pathIdxStart > pathIdxEnd) {
132            // Path is exhausted, only match if rest of pattern is * or **'s
133            if (pattIdxStart > pattIdxEnd) {
134                return pattern.endsWith(this.pathSeparator) ? path.endsWith(this.pathSeparator) : !path
135                    .endsWith(this.pathSeparator);
136            }
137            if (!fullMatch) {
138                return true;
139            }
140            if (pattIdxStart == pattIdxEnd && pattDirs[pattIdxStart].equals("*")
141                && path.endsWith(this.pathSeparator)) {
142                return true;
143            }
144            for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
145                if (!pattDirs[i].equals("**")) {
146                    return false;
147                }
148            }
149            return true;
150        } else if (pattIdxStart > pattIdxEnd) {
151            // String not exhausted, but pattern is. Failure.
152            return false;
153        } else if (!fullMatch && "**".equals(pattDirs[pattIdxStart])) {
154            // Path start definitely matches due to "**" part in pattern.
155            return true;
156        }
157
158        // up to last '**'
159        while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
160            String patDir = pattDirs[pattIdxEnd];
161            if (patDir.equals("**")) {
162                break;
163            }
164            if (!matchStrings(patDir, pathDirs[pathIdxEnd], isCaseSensitive)) {
165                return false;
166            }
167            pattIdxEnd--;
168            pathIdxEnd--;
169        }
170        if (pathIdxStart > pathIdxEnd) {
171            // String is exhausted
172            for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
173                if (!pattDirs[i].equals("**")) {
174                    return false;
175                }
176            }
177            return true;
178        }
179
180        while (pattIdxStart != pattIdxEnd && pathIdxStart <= pathIdxEnd) {
181            int patIdxTmp = -1;
182            for (int i = pattIdxStart + 1; i <= pattIdxEnd; i++) {
183                if (pattDirs[i].equals("**")) {
184                    patIdxTmp = i;
185                    break;
186                }
187            }
188            if (patIdxTmp == pattIdxStart + 1) {
189                // '**/**' situation, so skip one
190                pattIdxStart++;
191                continue;
192            }
193            // Find the pattern between padIdxStart & padIdxTmp in str between
194            // strIdxStart & strIdxEnd
195            int patLength = patIdxTmp - pattIdxStart - 1;
196            int strLength = pathIdxEnd - pathIdxStart + 1;
197            int foundIdx = -1;
198
199        strLoop:
200            for (int i = 0; i <= strLength - patLength; i++) {
201                for (int j = 0; j < patLength; j++) {
202                    String subPat = pattDirs[pattIdxStart + j + 1];
203                    String subStr = pathDirs[pathIdxStart + i + j];
204                    if (!matchStrings(subPat, subStr, isCaseSensitive)) {
205                        continue strLoop;
206                    }
207                }
208                foundIdx = pathIdxStart + i;
209                break;
210            }
211
212            if (foundIdx == -1) {
213                return false;
214            }
215
216            pattIdxStart = patIdxTmp;
217            pathIdxStart = foundIdx + patLength;
218        }
219
220        for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
221            if (!pattDirs[i].equals("**")) {
222                return false;
223            }
224        }
225
226        return true;
227    }
228
229    /**
230     * Tests whether or not a string matches against a pattern. The pattern may
231     * contain two special characters:<br>
232     * '*' means zero or more characters<br>
233     * '?' means one and only one character
234     * 
235     * @param pattern pattern to match against. Must not be <code>null</code>.
236     * @param str string which must be matched against the pattern. Must not be
237     *            <code>null</code>.
238     * @param caseSensitive Whether or not matching should be performed
239     *                      case sensitively.
240     * @return <code>true</code> if the string matches against the pattern, or
241     *         <code>false</code> otherwise.
242     */
243    private boolean matchStrings(String pattern, String str, boolean caseSensitive) {
244        char[] patArr = pattern.toCharArray();
245        char[] strArr = str.toCharArray();
246        int patIdxStart = 0;
247        int patIdxEnd = patArr.length - 1;
248        int strIdxStart = 0;
249        int strIdxEnd = strArr.length - 1;
250        char ch;
251
252        boolean containsStar = false;
253        for (char c : patArr) {
254            if (c == '*') {
255                containsStar = true;
256                break;
257            }
258        }
259
260        if (!containsStar) {
261            // No '*'s, so we make a shortcut
262            if (patIdxEnd != strIdxEnd) {
263                return false; // Pattern and string do not have the same size
264            }
265            for (int i = 0; i <= patIdxEnd; i++) {
266                ch = patArr[i];
267                if (ch != '?') {
268                    if (different(caseSensitive, ch, strArr[i])) {
269                        return false;
270                        // Character mismatch
271                    }
272                }
273            }
274            return true; // String matches against pattern
275        }
276
277        if (patIdxEnd == 0) {
278            return true; // Pattern contains only '*', which matches anything
279        }
280
281        // Process characters before first star
282        while ((ch = patArr[patIdxStart]) != '*' && strIdxStart <= strIdxEnd) {
283            if (ch != '?') {
284                if (different(caseSensitive, ch, strArr[strIdxStart])) {
285                    return false;
286                    // Character mismatch
287                }
288            }
289            patIdxStart++;
290            strIdxStart++;
291        }
292        if (strIdxStart > strIdxEnd) {
293            // All characters in the string are used. Check if only '*'s are
294            // left in the pattern. If so, we succeeded. Otherwise failure.
295            for (int i = patIdxStart; i <= patIdxEnd; i++) {
296                if (patArr[i] != '*') {
297                    return false;
298                }
299            }
300            return true;
301        }
302
303        // Process characters after last star
304        while ((ch = patArr[patIdxEnd]) != '*' && strIdxStart <= strIdxEnd) {
305            if (ch != '?') {
306                if (different(caseSensitive, ch, strArr[strIdxEnd])) {
307                    return false;
308                    // Character mismatch
309                }
310            }
311            patIdxEnd--;
312            strIdxEnd--;
313        }
314        if (strIdxStart > strIdxEnd) {
315            // All characters in the string are used. Check if only '*'s are
316            // left in the pattern. If so, we succeeded. Otherwise failure.
317            for (int i = patIdxStart; i <= patIdxEnd; i++) {
318                if (patArr[i] != '*') {
319                    return false;
320                }
321            }
322            return true;
323        }
324
325        // process pattern between stars. padIdxStart and patIdxEnd point
326        // always to a '*'.
327        while (patIdxStart != patIdxEnd && strIdxStart <= strIdxEnd) {
328            int patIdxTmp = -1;
329            for (int i = patIdxStart + 1; i <= patIdxEnd; i++) {
330                if (patArr[i] == '*') {
331                    patIdxTmp = i;
332                    break;
333                }
334            }
335            if (patIdxTmp == patIdxStart + 1) {
336                // Two stars next to each other, skip the first one.
337                patIdxStart++;
338                continue;
339            }
340            // Find the pattern between padIdxStart & padIdxTmp in str between
341            // strIdxStart & strIdxEnd
342            int patLength = patIdxTmp - patIdxStart - 1;
343            int strLength = strIdxEnd - strIdxStart + 1;
344            int foundIdx = -1;
345        strLoop: 
346            for (int i = 0; i <= strLength - patLength; i++) {
347                for (int j = 0; j < patLength; j++) {
348                    ch = patArr[patIdxStart + j + 1];
349                    if (ch != '?') {
350                        if (different(caseSensitive, ch, strArr[strIdxStart + i + j])) {
351                            continue strLoop;
352                        }
353                    }
354                }
355
356                foundIdx = strIdxStart + i;
357                break;
358            }
359
360            if (foundIdx == -1) {
361                return false;
362            }
363
364            patIdxStart = patIdxTmp;
365            strIdxStart = foundIdx + patLength;
366        }
367
368        // All characters in the string are used. Check if only '*'s are left
369        // in the pattern. If so, we succeeded. Otherwise failure.
370        for (int i = patIdxStart; i <= patIdxEnd; i++) {
371            if (patArr[i] != '*') {
372                return false;
373            }
374        }
375
376        return true;
377    }
378
379    /**
380     * Given a pattern and a full path, determine the pattern-mapped part.
381     * <p>
382     * For example:
383     * <ul>
384     * <li>'<code>/docs/cvs/commit.html</code>' and '
385     * <code>/docs/cvs/commit.html</code> -> ''</li>
386     * <li>'<code>/docs/*</code>' and '<code>/docs/cvs/commit</code> -> '
387     * <code>cvs/commit</code>'</li>
388     * <li>'<code>/docs/cvs/*.html</code>' and '
389     * <code>/docs/cvs/commit.html</code> -> '<code>commit.html</code>'</li>
390     * <li>'<code>/docs/**</code>' and '<code>/docs/cvs/commit</code> -> '
391     * <code>cvs/commit</code>'</li>
392     * <li>'<code>/docs/**\/*.html</code>' and '
393     * <code>/docs/cvs/commit.html</code> -> '<code>cvs/commit.html</code>'</li>
394     * <li>'<code>/*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
395     * <code>docs/cvs/commit.html</code>'</li>
396     * <li>'<code>*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
397     * <code>/docs/cvs/commit.html</code>'</li>
398     * <li>'<code>*</code>' and '<code>/docs/cvs/commit.html</code> -> '
399     * <code>/docs/cvs/commit.html</code>'</li>
400     * </ul>
401     * <p>
402     * Assumes that {@link #match} returns <code>true</code> for '
403     * <code>pattern</code>' and '<code>path</code>', but does
404     * <strong>not</strong> enforce this.
405     */
406    public String extractPathWithinPattern(String pattern, String path) {
407        String[] patternParts = tokenizeToStringArray(pattern, this.pathSeparator);
408        String[] pathParts = tokenizeToStringArray(path, this.pathSeparator);
409
410        StringBuilder buffer = new StringBuilder();
411
412        // Add any path parts that have a wildcarded pattern part.
413        int puts = 0;
414        for (int i = 0; i < patternParts.length; i++) {
415            String patternPart = patternParts[i];
416            if ((patternPart.indexOf('*') > -1 || patternPart.indexOf('?') > -1) && pathParts.length >= i + 1) {
417                if (puts > 0 || (i == 0 && !pattern.startsWith(this.pathSeparator))) {
418                    buffer.append(this.pathSeparator);
419                }
420                buffer.append(pathParts[i]);
421                puts++;
422            }
423        }
424
425        // Append any trailing path parts.
426        for (int i = patternParts.length; i < pathParts.length; i++) {
427            if (puts > 0 || i > 0) {
428                buffer.append(this.pathSeparator);
429            }
430            buffer.append(pathParts[i]);
431        }
432
433        return buffer.toString();
434    }
435
436    /**
437     * Tokenize the given String into a String array via a StringTokenizer.
438     * Trims tokens and omits empty tokens.
439     * <p>
440     * The given delimiters string is supposed to consist of any number of
441     * delimiter characters. Each of those characters can be used to separate
442     * tokens. A delimiter is always a single character; for multi-character
443     * delimiters, consider using <code>delimitedListToStringArray</code>
444     * 
445     * @param str the String to tokenize
446     * @param delimiters the delimiter characters, assembled as String (each of
447     *            those characters is individually considered as delimiter).
448     * @return an array of the tokens
449     * @see java.util.StringTokenizer
450     * @see java.lang.String#trim()
451     */
452    public static String[] tokenizeToStringArray(String str, String delimiters) {
453        if (str == null) {
454            return null;
455        }
456        StringTokenizer st = new StringTokenizer(str, delimiters);
457        List<String> tokens = new ArrayList<String>();
458        while (st.hasMoreTokens()) {
459            String token = st.nextToken();
460            token = token.trim();
461            if (token.length() > 0) {
462                tokens.add(token);
463            }
464        }
465        return tokens.toArray(new String[tokens.size()]);
466    }
467
468    private static boolean different(boolean caseSensitive, char ch, char other) {
469        return caseSensitive ? ch != other : Character.toUpperCase(ch) != Character.toUpperCase(other);
470    }
471}