001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.language.simple; 018 019import java.util.List; 020import java.util.concurrent.CopyOnWriteArrayList; 021 022import org.apache.camel.language.simple.types.SimpleToken; 023import org.apache.camel.language.simple.types.SimpleTokenType; 024import org.apache.camel.language.simple.types.TokenType; 025import org.apache.camel.util.ObjectHelper; 026 027/** 028 * Tokenizer to create {@link SimpleToken} from the input. 029 */ 030public final class SimpleTokenizer { 031 032 // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens 033 private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<>(); 034 035 // optimise to be able to quick check for start functions 036 private static final String[] FUNCTION_START = new String[]{"${", "$simple{"}; 037 038 static { 039 // add known tokens 040 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${")); 041 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{")); 042 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}")); 043 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " ")); 044 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t")); 045 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n")); 046 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r")); 047 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'")); 048 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\"")); 049 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true")); 050 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false")); 051 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null")); 052 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\")); 053 054 // binary operators 055 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "==")); 056 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=~")); 057 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">=")); 058 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<=")); 059 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">")); 060 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<")); 061 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!=")); 062 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is")); 063 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is")); 064 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains")); 065 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains")); 066 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "~~")); 067 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex")); 068 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex")); 069 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in")); 070 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in")); 071 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range")); 072 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range")); 073 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "starts with")); 074 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "ends with")); 075 076 // unary operators 077 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++")); 078 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--")); 079 080 // logical operators 081 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&")); 082 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||")); 083 // TODO: @deprecated logical operators, to be removed in Camel 3.0 084 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and")); 085 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or")); 086 087 //binary operator 088 // it is added as the last item because unary -- has the priority 089 // if unary not found it is highly possible - operator is run into. 090 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.minusValue, "-")); 091 } 092 093 private SimpleTokenizer() { 094 // static methods 095 } 096 097 /** 098 * Does the expression include a simple function. 099 * 100 * @param expression the expression 101 * @return <tt>true</tt> if one or more simple function is included in the expression 102 */ 103 public static boolean hasFunctionStartToken(String expression) { 104 if (expression != null) { 105 return expression.contains(FUNCTION_START[0]) || expression.contains(FUNCTION_START[1]); 106 } 107 return false; 108 } 109 110 /** 111 * @see SimpleLanguage#changeFunctionStartToken(String...) 112 */ 113 public static void changeFunctionStartToken(String... startToken) { 114 for (SimpleTokenType type : KNOWN_TOKENS) { 115 if (type.getType() == TokenType.functionStart) { 116 KNOWN_TOKENS.remove(type); 117 } 118 } 119 120 if (startToken.length > 2) { 121 throw new IllegalArgumentException("At most 2 start tokens is allowed"); 122 } 123 124 // reset 125 FUNCTION_START[0] = ""; 126 FUNCTION_START[1] = ""; 127 128 // add in start of list as its a more common token to be used 129 for (int i = 0; i < startToken.length; i++) { 130 String token = startToken[i]; 131 FUNCTION_START[i] = token; 132 KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token)); 133 } 134 } 135 136 /** 137 * @see SimpleLanguage#changeFunctionEndToken(String...) 138 */ 139 public static void changeFunctionEndToken(String... endToken) { 140 for (SimpleTokenType type : KNOWN_TOKENS) { 141 if (type.getType() == TokenType.functionEnd) { 142 KNOWN_TOKENS.remove(type); 143 } 144 } 145 146 // add after the start tokens 147 int pos = 0; 148 for (SimpleTokenType type : KNOWN_TOKENS) { 149 if (type.getType() == TokenType.functionStart) { 150 pos++; 151 } 152 } 153 154 // add after function start of list as its a more common token to be used 155 for (String token : endToken) { 156 KNOWN_TOKENS.add(pos, new SimpleTokenType(TokenType.functionEnd, token)); 157 } 158 } 159 160 /** 161 * Create the next token 162 * 163 * @param expression the input expression 164 * @param index the current index 165 * @param allowEscape whether to allow escapes 166 * @param filter defines the accepted token types to be returned (character is always used as fallback) 167 * @return the created token, will always return a token 168 */ 169 public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) { 170 return doNextToken(expression, index, allowEscape, filter); 171 } 172 173 /** 174 * Create the next token 175 * 176 * @param expression the input expression 177 * @param index the current index 178 * @param allowEscape whether to allow escapes 179 * @return the created token, will always return a token 180 */ 181 public static SimpleToken nextToken(String expression, int index, boolean allowEscape) { 182 return doNextToken(expression, index, allowEscape); 183 } 184 185 private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) { 186 187 boolean numericAllowed = acceptType(TokenType.numericValue, filters); 188 if (numericAllowed) { 189 // is it a numeric value 190 StringBuilder sb = new StringBuilder(); 191 boolean digit = true; 192 while (digit && index < expression.length()) { 193 digit = Character.isDigit(expression.charAt(index)); 194 if (digit) { 195 char ch = expression.charAt(index); 196 sb.append(ch); 197 index++; 198 continue; 199 } 200 // is it a dot or comma as part of a floating point number 201 boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index); 202 if (decimalSeparator && sb.length() > 0) { 203 char ch = expression.charAt(index); 204 sb.append(ch); 205 index++; 206 // assume its still a digit 207 digit = true; 208 continue; 209 } 210 } 211 if (sb.length() > 0) { 212 return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index); 213 } 214 } 215 216 boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters); 217 if (escapeAllowed) { 218 StringBuilder sb = new StringBuilder(); 219 char ch = expression.charAt(index); 220 boolean escaped = '\\' == ch; 221 if (escaped && index < expression.length() - 1) { 222 // grab next character to escape 223 char next = expression.charAt(++index); 224 // special for new line, tabs and carriage return 225 boolean special = false; 226 if ('n' == next) { 227 sb.append("\n"); 228 special = true; 229 } else if ('t' == next) { 230 sb.append("\t"); 231 special = true; 232 } else if ('r' == next) { 233 sb.append("\r"); 234 special = true; 235 } else if ('}' == next) { 236 sb.append("}"); 237 special = true; 238 } else { 239 // not special just a regular character 240 sb.append(ch); 241 } 242 243 // force 2 as length if special 244 return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, special ? 2 : 1); 245 } 246 } 247 248 // it could be any of the known tokens 249 String text = expression.substring(index); 250 for (SimpleTokenType token : KNOWN_TOKENS) { 251 if (acceptType(token.getType(), filters)) { 252 if (acceptToken(token, text, expression, index)) { 253 return new SimpleToken(token, index); 254 } 255 } 256 } 257 258 // fallback and create a character token 259 char ch = expression.charAt(index); 260 SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index); 261 return token; 262 } 263 264 private static boolean acceptType(TokenType type, TokenType... filters) { 265 if (filters == null || filters.length == 0) { 266 return true; 267 } 268 for (TokenType filter : filters) { 269 if (type == filter) { 270 return true; 271 } 272 } 273 return false; 274 } 275 276 private static boolean acceptToken(SimpleTokenType token, String text, String expression, int index) { 277 if (token.isUnary() && text.startsWith(token.getValue())) { 278 SimpleTokenType functionEndToken = getFunctionEndToken(); 279 if (functionEndToken != null) { 280 int endLen = functionEndToken.getValue().length(); 281 282 // special check for unary as the previous must be a function end, and the next a whitespace 283 // to ensure unary operators is only applied on functions as intended 284 int len = token.getValue().length(); 285 286 String previous = ""; 287 if (index - endLen >= 0) { 288 previous = expression.substring(index - endLen, index); 289 } 290 String after = text.substring(len); 291 boolean whiteSpace = ObjectHelper.isEmpty(after) || after.startsWith(" "); 292 boolean functionEnd = previous.equals(functionEndToken.getValue()); 293 return functionEnd && whiteSpace; 294 } 295 } 296 297 return text.startsWith(token.getValue()); 298 } 299 300 private static SimpleTokenType getFunctionEndToken() { 301 for (SimpleTokenType token : KNOWN_TOKENS) { 302 if (token.isFunctionEnd()) { 303 return token; 304 } 305 } 306 return null; 307 } 308 309}