001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.language.simple;
018
019import java.util.List;
020import java.util.concurrent.CopyOnWriteArrayList;
021
022import org.apache.camel.language.simple.types.SimpleToken;
023import org.apache.camel.language.simple.types.SimpleTokenType;
024import org.apache.camel.language.simple.types.TokenType;
025import org.apache.camel.util.ObjectHelper;
026
027/**
028 * Tokenizer to create {@link SimpleToken} from the input.
029 */
030public final class SimpleTokenizer {
031
032    // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens
033    private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<SimpleTokenType>();
034
035    static {
036        // add known tokens
037        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " "));
038        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t"));
039        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n"));
040        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r"));
041        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'"));
042        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\""));
043        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${"));
044        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{"));
045        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}"));
046        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true"));
047        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false"));
048        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null"));
049        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\"));
050
051        // binary operators
052        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=="));
053        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">="));
054        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<="));
055        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">"));
056        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<"));
057        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!="));
058        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is"));
059        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is"));
060        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains"));
061        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains"));
062        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex"));
063        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex"));
064        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in"));
065        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in"));
066        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range"));
067        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range"));
068
069        // unary operators
070        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++"));
071        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--"));
072
073        // logical operators
074        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&"));
075        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||"));
076        // TODO: @deprecated logical operators, to be removed in Camel 3.0
077        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and"));
078        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or"));
079    }
080
081    private SimpleTokenizer() {
082        // static methods
083    }
084
085
086    /**
087     * @see SimpleLanguage#changeFunctionStartToken(String...)
088     */
089    public static void changeFunctionStartToken(String... startToken) {
090        for (SimpleTokenType type : KNOWN_TOKENS) {
091            if (type.getType() == TokenType.functionStart) {
092                KNOWN_TOKENS.remove(type);
093            }
094        }
095
096        // add in start of list as its a more common token to be used
097        for (String token : startToken) {
098            KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token));
099        }
100    }
101
102    /**
103     * @see SimpleLanguage#changeFunctionEndToken(String...)
104     */
105    public static void changeFunctionEndToken(String... endToken) {
106        for (SimpleTokenType type : KNOWN_TOKENS) {
107            if (type.getType() == TokenType.functionEnd) {
108                KNOWN_TOKENS.remove(type);
109            }
110        }
111
112        // add in start of list as its a more common token to be used
113        for (String token : endToken) {
114            KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionEnd, token));
115        }
116    }
117
118    /**
119     * Create the next token
120     *
121     * @param expression  the input expression
122     * @param index       the current index
123     * @param allowEscape whether to allow escapes
124     * @param filter      defines the accepted token types to be returned (character is always used as fallback)
125     * @return the created token, will always return a token
126     */
127    public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) {
128        return doNextToken(expression, index, allowEscape, filter);
129    }
130
131    /**
132     * Create the next token
133     *
134     * @param expression  the input expression
135     * @param index       the current index
136     * @param allowEscape whether to allow escapes
137     * @return the created token, will always return a token
138     */
139    public static SimpleToken nextToken(String expression, int index, boolean allowEscape) {
140        return doNextToken(expression, index, allowEscape);
141    }
142
143    private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) {
144
145        boolean numericAllowed = acceptType(TokenType.numericValue, filters);
146        if (numericAllowed) {
147            // is it a numeric value
148            StringBuilder sb = new StringBuilder();
149            boolean digit = true;
150            while (digit && index < expression.length()) {
151                digit = Character.isDigit(expression.charAt(index));
152                if (digit) {
153                    char ch = expression.charAt(index);
154                    sb.append(ch);
155                    index++;
156                    continue;
157                }
158                // is it a dot or comma as part of a floating point number
159                boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index);
160                if (decimalSeparator && sb.length() > 0) {
161                    char ch = expression.charAt(index);
162                    sb.append(ch);
163                    index++;
164                    // assume its still a digit
165                    digit = true;
166                    continue;
167                }
168            }
169            if (sb.length() > 0) {
170                return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index);
171            }
172        }
173
174        boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters);
175        if (escapeAllowed) {
176            StringBuilder sb = new StringBuilder();
177            char ch = expression.charAt(index);
178            boolean escaped = '\\' == ch;
179            if (escaped && index < expression.length() - 1) {
180                // grab next character to escape
181                char next = expression.charAt(++index);
182                // special for new line, tabs and carriage return
183                boolean special = false;
184                if ('n' == next) {
185                    sb.append("\n");
186                    special = true;
187                } else if ('t' == next) {
188                    sb.append("\t");
189                    special = true;
190                } else if ('r' == next) {
191                    sb.append("\r");
192                    special = true;
193                } else {
194                    // not special just a regular character
195                    sb.append(ch);
196                }
197
198                // force 2 as length if special
199                return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, special ? 2 : 1);
200            }
201        }
202
203        // it could be any of the known tokens
204        String text = expression.substring(index);
205        for (SimpleTokenType token : KNOWN_TOKENS) {
206            if (acceptType(token.getType(), filters)) {
207                if (acceptToken(token, text, expression, index)) {
208                    return new SimpleToken(token, index);
209                }
210            }
211        }
212
213        // fallback and create a character token
214        char ch = expression.charAt(index);
215        SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index);
216        return token;
217    }
218
219    private static boolean acceptType(TokenType type, TokenType... filters) {
220        if (filters == null || filters.length == 0) {
221            return true;
222        }
223        for (TokenType filter : filters) {
224            if (type == filter) {
225                return true;
226            }
227        }
228        return false;
229    }
230
231    private static boolean acceptToken(SimpleTokenType token, String text, String expression, int index) {
232        if (token.isUnary() && text.startsWith(token.getValue())) {
233            SimpleTokenType functionEndToken = getFunctionEndToken();
234            if (functionEndToken != null) {
235                int endLen = functionEndToken.getValue().length();
236
237                // special check for unary as the previous must be a function end, and the next a whitespace
238                // to ensure unary operators is only applied on functions as intended
239                int len = token.getValue().length();
240
241                String previous = "";
242                if (index - endLen >= 0) {
243                    previous = expression.substring(index - endLen, index);
244                }
245                String after = text.substring(len);
246                boolean whiteSpace = ObjectHelper.isEmpty(after) || after.startsWith(" ");
247                boolean functionEnd = previous.equals(functionEndToken.getValue());
248                return functionEnd && whiteSpace;
249            }
250        }
251
252        return text.startsWith(token.getValue());
253    }
254
255    private static SimpleTokenType getFunctionEndToken() {
256        for (SimpleTokenType token : KNOWN_TOKENS) {
257            if (token.isFunctionEnd()) {
258                return token;
259            }
260        }
261        return null;
262    }
263
264}