001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.language.simple; 018 019import java.util.List; 020import java.util.concurrent.CopyOnWriteArrayList; 021 022import org.apache.camel.language.simple.types.SimpleToken; 023import org.apache.camel.language.simple.types.SimpleTokenType; 024import org.apache.camel.language.simple.types.TokenType; 025import org.apache.camel.util.ObjectHelper; 026 027/** 028 * Tokenizer to create {@link SimpleToken} from the input. 029 */ 030public final class SimpleTokenizer { 031 032 // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens 033 private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<SimpleTokenType>(); 034 035 static { 036 // add known tokens 037 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " ")); 038 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t")); 039 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n")); 040 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r")); 041 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'")); 042 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\"")); 043 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${")); 044 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{")); 045 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}")); 046 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true")); 047 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false")); 048 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null")); 049 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\")); 050 051 // binary operators 052 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "==")); 053 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">=")); 054 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<=")); 055 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">")); 056 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<")); 057 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!=")); 058 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is")); 059 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is")); 060 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains")); 061 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains")); 062 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex")); 063 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex")); 064 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in")); 065 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in")); 066 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range")); 067 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range")); 068 069 // unary operators 070 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++")); 071 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--")); 072 073 // logical operators 074 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&")); 075 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||")); 076 // TODO: @deprecated logical operators, to be removed in Camel 3.0 077 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and")); 078 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or")); 079 } 080 081 private SimpleTokenizer() { 082 // static methods 083 } 084 085 086 /** 087 * @see SimpleLanguage#changeFunctionStartToken(String...) 088 */ 089 public static void changeFunctionStartToken(String... startToken) { 090 for (SimpleTokenType type : KNOWN_TOKENS) { 091 if (type.getType() == TokenType.functionStart) { 092 KNOWN_TOKENS.remove(type); 093 } 094 } 095 096 // add in start of list as its a more common token to be used 097 for (String token : startToken) { 098 KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token)); 099 } 100 } 101 102 /** 103 * @see SimpleLanguage#changeFunctionEndToken(String...) 104 */ 105 public static void changeFunctionEndToken(String... endToken) { 106 for (SimpleTokenType type : KNOWN_TOKENS) { 107 if (type.getType() == TokenType.functionEnd) { 108 KNOWN_TOKENS.remove(type); 109 } 110 } 111 112 // add in start of list as its a more common token to be used 113 for (String token : endToken) { 114 KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionEnd, token)); 115 } 116 } 117 118 /** 119 * Create the next token 120 * 121 * @param expression the input expression 122 * @param index the current index 123 * @param allowEscape whether to allow escapes 124 * @param filter defines the accepted token types to be returned (character is always used as fallback) 125 * @return the created token, will always return a token 126 */ 127 public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) { 128 return doNextToken(expression, index, allowEscape, filter); 129 } 130 131 /** 132 * Create the next token 133 * 134 * @param expression the input expression 135 * @param index the current index 136 * @param allowEscape whether to allow escapes 137 * @return the created token, will always return a token 138 */ 139 public static SimpleToken nextToken(String expression, int index, boolean allowEscape) { 140 return doNextToken(expression, index, allowEscape); 141 } 142 143 private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) { 144 145 boolean numericAllowed = acceptType(TokenType.numericValue, filters); 146 if (numericAllowed) { 147 // is it a numeric value 148 StringBuilder sb = new StringBuilder(); 149 boolean digit = true; 150 while (digit && index < expression.length()) { 151 digit = Character.isDigit(expression.charAt(index)); 152 if (digit) { 153 char ch = expression.charAt(index); 154 sb.append(ch); 155 index++; 156 continue; 157 } 158 // is it a dot or comma as part of a floating point number 159 boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index); 160 if (decimalSeparator && sb.length() > 0) { 161 char ch = expression.charAt(index); 162 sb.append(ch); 163 index++; 164 // assume its still a digit 165 digit = true; 166 continue; 167 } 168 } 169 if (sb.length() > 0) { 170 return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index); 171 } 172 } 173 174 boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters); 175 if (escapeAllowed) { 176 StringBuilder sb = new StringBuilder(); 177 char ch = expression.charAt(index); 178 boolean escaped = '\\' == ch; 179 if (escaped && index < expression.length() - 1) { 180 // grab next character to escape 181 char next = expression.charAt(++index); 182 // special for new line, tabs and carriage return 183 boolean special = false; 184 if ('n' == next) { 185 sb.append("\n"); 186 special = true; 187 } else if ('t' == next) { 188 sb.append("\t"); 189 special = true; 190 } else if ('r' == next) { 191 sb.append("\r"); 192 special = true; 193 } else { 194 // not special just a regular character 195 sb.append(ch); 196 } 197 198 // force 2 as length if special 199 return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, special ? 2 : 1); 200 } 201 } 202 203 // it could be any of the known tokens 204 String text = expression.substring(index); 205 for (SimpleTokenType token : KNOWN_TOKENS) { 206 if (acceptType(token.getType(), filters)) { 207 if (acceptToken(token, text, expression, index)) { 208 return new SimpleToken(token, index); 209 } 210 } 211 } 212 213 // fallback and create a character token 214 char ch = expression.charAt(index); 215 SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index); 216 return token; 217 } 218 219 private static boolean acceptType(TokenType type, TokenType... filters) { 220 if (filters == null || filters.length == 0) { 221 return true; 222 } 223 for (TokenType filter : filters) { 224 if (type == filter) { 225 return true; 226 } 227 } 228 return false; 229 } 230 231 private static boolean acceptToken(SimpleTokenType token, String text, String expression, int index) { 232 if (token.isUnary() && text.startsWith(token.getValue())) { 233 SimpleTokenType functionEndToken = getFunctionEndToken(); 234 if (functionEndToken != null) { 235 int endLen = functionEndToken.getValue().length(); 236 237 // special check for unary as the previous must be a function end, and the next a whitespace 238 // to ensure unary operators is only applied on functions as intended 239 int len = token.getValue().length(); 240 241 String previous = ""; 242 if (index - endLen >= 0) { 243 previous = expression.substring(index - endLen, index); 244 } 245 String after = text.substring(len); 246 boolean whiteSpace = ObjectHelper.isEmpty(after) || after.startsWith(" "); 247 boolean functionEnd = previous.equals(functionEndToken.getValue()); 248 return functionEnd && whiteSpace; 249 } 250 } 251 252 return text.startsWith(token.getValue()); 253 } 254 255 private static SimpleTokenType getFunctionEndToken() { 256 for (SimpleTokenType token : KNOWN_TOKENS) { 257 if (token.isFunctionEnd()) { 258 return token; 259 } 260 } 261 return null; 262 } 263 264}