001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.language.simple;
018
019import java.util.List;
020import java.util.concurrent.CopyOnWriteArrayList;
021
022import org.apache.camel.language.simple.types.SimpleToken;
023import org.apache.camel.language.simple.types.SimpleTokenType;
024import org.apache.camel.language.simple.types.TokenType;
025import org.apache.camel.util.ObjectHelper;
026
027/**
028 * Tokenizer to create {@link SimpleToken} from the input.
029 */
030public final class SimpleTokenizer {
031
032    // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens
033    private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<>();
034
035    // optimise to be able to quick check for start functions
036    private static final String[] FUNCTION_START = new String[]{"${", "$simple{"};
037
038    static {
039        // add known tokens
040        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${"));
041        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{"));
042        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}"));
043        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " "));
044        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t"));
045        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n"));
046        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r"));
047        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'"));
048        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\""));
049        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true"));
050        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false"));
051        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null"));
052        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\"));
053
054        // binary operators
055        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=="));
056        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=~"));
057        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">="));
058        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<="));
059        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">"));
060        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<"));
061        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!="));
062        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is"));
063        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is"));
064        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains"));
065        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains"));
066        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "~~"));
067        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex"));
068        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex"));
069        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in"));
070        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in"));
071        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range"));
072        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range"));
073        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "starts with"));
074        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "ends with"));
075
076        // unary operators
077        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++"));
078        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--"));
079
080        // logical operators
081        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&"));
082        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||"));
083        // TODO: @deprecated logical operators, to be removed in Camel 3.0
084        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and"));
085        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or"));
086        
087        //binary operator 
088        // it is added as the last item because unary -- has the priority
089        // if unary not found it is highly possible - operator is run into.
090        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.minusValue, "-"));
091    }
092
093    private SimpleTokenizer() {
094        // static methods
095    }
096
097    /**
098     * Does the expression include a simple function.
099     *
100     * @param expression the expression
101     * @return <tt>true</tt> if one or more simple function is included in the expression
102     */
103    public static boolean hasFunctionStartToken(String expression) {
104        if (expression != null) {
105            return expression.contains(FUNCTION_START[0]) || expression.contains(FUNCTION_START[1]);
106        }
107        return false;
108    }
109
110    /**
111     * @see SimpleLanguage#changeFunctionStartToken(String...)
112     */
113    public static void changeFunctionStartToken(String... startToken) {
114        for (SimpleTokenType type : KNOWN_TOKENS) {
115            if (type.getType() == TokenType.functionStart) {
116                KNOWN_TOKENS.remove(type);
117            }
118        }
119
120        if (startToken.length > 2) {
121            throw new IllegalArgumentException("At most 2 start tokens is allowed");
122        }
123
124        // reset
125        FUNCTION_START[0] = "";
126        FUNCTION_START[1] = "";
127
128        // add in start of list as its a more common token to be used
129        for (int i = 0; i < startToken.length; i++) {
130            String token = startToken[i];
131            FUNCTION_START[i] = token;
132            KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token));
133        }
134    }
135
136    /**
137     * @see SimpleLanguage#changeFunctionEndToken(String...)
138     */
139    public static void changeFunctionEndToken(String... endToken) {
140        for (SimpleTokenType type : KNOWN_TOKENS) {
141            if (type.getType() == TokenType.functionEnd) {
142                KNOWN_TOKENS.remove(type);
143            }
144        }
145
146        // add after the start tokens
147        int pos = 0;
148        for (SimpleTokenType type : KNOWN_TOKENS) {
149            if (type.getType() == TokenType.functionStart) {
150                pos++;
151            }
152        }
153
154        // add after function start of list as its a more common token to be used
155        for (String token : endToken) {
156            KNOWN_TOKENS.add(pos, new SimpleTokenType(TokenType.functionEnd, token));
157        }
158    }
159
160    /**
161     * Create the next token
162     *
163     * @param expression  the input expression
164     * @param index       the current index
165     * @param allowEscape whether to allow escapes
166     * @param filter      defines the accepted token types to be returned (character is always used as fallback)
167     * @return the created token, will always return a token
168     */
169    public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) {
170        return doNextToken(expression, index, allowEscape, filter);
171    }
172
173    /**
174     * Create the next token
175     *
176     * @param expression  the input expression
177     * @param index       the current index
178     * @param allowEscape whether to allow escapes
179     * @return the created token, will always return a token
180     */
181    public static SimpleToken nextToken(String expression, int index, boolean allowEscape) {
182        return doNextToken(expression, index, allowEscape);
183    }
184
185    private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) {
186
187        boolean numericAllowed = acceptType(TokenType.numericValue, filters);
188        if (numericAllowed) {
189            // is it a numeric value
190            StringBuilder sb = new StringBuilder();
191            boolean digit = true;
192            while (digit && index < expression.length()) {
193                digit = Character.isDigit(expression.charAt(index));
194                if (digit) {
195                    char ch = expression.charAt(index);
196                    sb.append(ch);
197                    index++;
198                    continue;
199                }
200                // is it a dot or comma as part of a floating point number
201                boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index);
202                if (decimalSeparator && sb.length() > 0) {
203                    char ch = expression.charAt(index);
204                    sb.append(ch);
205                    index++;
206                    // assume its still a digit
207                    digit = true;
208                    continue;
209                }
210            }
211            if (sb.length() > 0) {
212                return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index);
213            }
214        }
215
216        boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters);
217        if (escapeAllowed) {
218            StringBuilder sb = new StringBuilder();
219            char ch = expression.charAt(index);
220            boolean escaped = '\\' == ch;
221            if (escaped && index < expression.length() - 1) {
222                // grab next character to escape
223                char next = expression.charAt(++index);
224                // special for new line, tabs and carriage return
225                boolean special = false;
226                if ('n' == next) {
227                    sb.append("\n");
228                    special = true;
229                } else if ('t' == next) {
230                    sb.append("\t");
231                    special = true;
232                } else if ('r' == next) {
233                    sb.append("\r");
234                    special = true;
235                } else if ('}' == next) {
236                    sb.append("}");
237                    special = true;
238                } else {
239                    // not special just a regular character
240                    sb.append(ch);
241                }
242
243                // force 2 as length if special
244                return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, special ? 2 : 1);
245            }
246        }
247
248        // it could be any of the known tokens
249        String text = expression.substring(index);
250        for (SimpleTokenType token : KNOWN_TOKENS) {
251            if (acceptType(token.getType(), filters)) {
252                if (acceptToken(token, text, expression, index)) {
253                    return new SimpleToken(token, index);
254                }
255            }
256        }
257
258        // fallback and create a character token
259        char ch = expression.charAt(index);
260        SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index);
261        return token;
262    }
263
264    private static boolean acceptType(TokenType type, TokenType... filters) {
265        if (filters == null || filters.length == 0) {
266            return true;
267        }
268        for (TokenType filter : filters) {
269            if (type == filter) {
270                return true;
271            }
272        }
273        return false;
274    }
275
276    private static boolean acceptToken(SimpleTokenType token, String text, String expression, int index) {
277        if (token.isUnary() && text.startsWith(token.getValue())) {
278            SimpleTokenType functionEndToken = getFunctionEndToken();
279            if (functionEndToken != null) {
280                int endLen = functionEndToken.getValue().length();
281
282                // special check for unary as the previous must be a function end, and the next a whitespace
283                // to ensure unary operators is only applied on functions as intended
284                int len = token.getValue().length();
285
286                String previous = "";
287                if (index - endLen >= 0) {
288                    previous = expression.substring(index - endLen, index);
289                }
290                String after = text.substring(len);
291                boolean whiteSpace = ObjectHelper.isEmpty(after) || after.startsWith(" ");
292                boolean functionEnd = previous.equals(functionEndToken.getValue());
293                return functionEnd && whiteSpace;
294            }
295        }
296
297        return text.startsWith(token.getValue());
298    }
299
300    private static SimpleTokenType getFunctionEndToken() {
301        for (SimpleTokenType token : KNOWN_TOKENS) {
302            if (token.isFunctionEnd()) {
303                return token;
304            }
305        }
306        return null;
307    }
308
309}