001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.language.simple;
018
019import java.util.ArrayList;
020import java.util.Iterator;
021import java.util.List;
022import java.util.Stack;
023import java.util.concurrent.atomic.AtomicBoolean;
024
025import org.apache.camel.Expression;
026import org.apache.camel.Predicate;
027import org.apache.camel.builder.PredicateBuilder;
028import org.apache.camel.language.simple.ast.BinaryExpression;
029import org.apache.camel.language.simple.ast.DoubleQuoteEnd;
030import org.apache.camel.language.simple.ast.DoubleQuoteStart;
031import org.apache.camel.language.simple.ast.LiteralExpression;
032import org.apache.camel.language.simple.ast.LiteralNode;
033import org.apache.camel.language.simple.ast.LogicalExpression;
034import org.apache.camel.language.simple.ast.NullExpression;
035import org.apache.camel.language.simple.ast.SimpleFunctionEnd;
036import org.apache.camel.language.simple.ast.SimpleFunctionStart;
037import org.apache.camel.language.simple.ast.SimpleNode;
038import org.apache.camel.language.simple.ast.SingleQuoteEnd;
039import org.apache.camel.language.simple.ast.SingleQuoteStart;
040import org.apache.camel.language.simple.ast.UnaryExpression;
041import org.apache.camel.language.simple.types.BinaryOperatorType;
042import org.apache.camel.language.simple.types.LogicalOperatorType;
043import org.apache.camel.language.simple.types.SimpleIllegalSyntaxException;
044import org.apache.camel.language.simple.types.SimpleParserException;
045import org.apache.camel.language.simple.types.SimpleToken;
046import org.apache.camel.language.simple.types.TokenType;
047import org.apache.camel.util.ExpressionToPredicateAdapter;
048
049/**
050 * A parser to parse simple language as a Camel {@link Predicate}
051 */
052public class SimplePredicateParser extends BaseSimpleParser {
053
054    @Deprecated
055    public SimplePredicateParser(String expression) {
056        super(expression, true);
057    }
058
059    public SimplePredicateParser(String expression, boolean allowEscape) {
060        super(expression, allowEscape);
061    }
062
063    public Predicate parsePredicate() {
064        clear();
065        try {
066            return doParsePredicate();
067        } catch (SimpleParserException e) {
068            // catch parser exception and turn that into a syntax exceptions
069            throw new SimpleIllegalSyntaxException(expression, e.getIndex(), e.getMessage(), e);
070        } catch (Exception e) {
071            // include exception in rethrown exception
072            throw new SimpleIllegalSyntaxException(expression, -1, e.getMessage(), e);
073        }
074    }
075
076    protected Predicate doParsePredicate() {
077
078        // parse using the following grammar
079        nextToken();
080        while (!token.getType().isEol()) {
081            // predicate supports quotes, functions, operators and whitespaces
082            //CHECKSTYLE:OFF
083            if (!singleQuotedLiteralWithFunctionsText()
084                    && !doubleQuotedLiteralWithFunctionsText()
085                    && !functionText()
086                    && !unaryOperator()
087                    && !binaryOperator()
088                    && !logicalOperator()
089                    && !isBooleanValue()
090                    && !token.getType().isWhitespace()
091                    && !token.getType().isEol()) {
092                // okay the symbol was not one of the above, so its not supported
093                // use the previous index as that is where the problem is
094                throw new SimpleParserException("Unexpected token " + token, previousIndex);
095            }
096            //CHECKSTYLE:ON
097            // take the next token
098            nextToken();
099        }
100
101        // now after parsing we need a bit of work to do, to make it easier to turn the tokens
102        // into and ast, and then from the ast, to Camel predicate(s).
103        // hence why there is a number of tasks going on below to accomplish this
104
105        // remove any ignorable white space tokens
106        removeIgnorableWhiteSpaceTokens();
107        // turn the tokens into the ast model
108        parseTokensAndCreateNodes();
109        // compact and stack blocks (eg function start/end, quotes start/end, etc.)
110        prepareBlocks();
111        // compact and stack unary expressions
112        prepareUnaryExpressions();
113        // compact and stack binary expressions
114        prepareBinaryExpressions();
115        // compact and stack logical expressions
116        prepareLogicalExpressions();
117
118        // create and return as a Camel predicate
119        List<Predicate> predicates = createPredicates();
120        if (predicates.isEmpty()) {
121            // return a false predicate as response as there was nothing to parse
122            return PredicateBuilder.constant(false);
123        } else if (predicates.size() == 1) {
124            return predicates.get(0);
125        } else {
126            return PredicateBuilder.and(predicates);
127        }
128    }
129
130    /**
131     * Parses the tokens and crates the AST nodes.
132     * <p/>
133     * After the initial parsing of the input (input -> tokens) then we
134     * parse again (tokens -> ast).
135     * <p/>
136     * In this parsing the balance of the blocks is checked, so that each block has a matching
137     * start and end token. For example a single quote block, or a function block etc.
138     */
139    protected void parseTokensAndCreateNodes() {
140        // we loop the tokens and create a sequence of ast nodes
141
142        // we need to keep a bit of state for keeping track of single and double quotes
143        // which need to be balanced and have matching start/end pairs
144        SimpleNode lastSingle = null;
145        SimpleNode lastDouble = null;
146        SimpleNode lastFunction = null;
147        AtomicBoolean startSingle = new AtomicBoolean(false);
148        AtomicBoolean startDouble = new AtomicBoolean(false);
149        AtomicBoolean startFunction = new AtomicBoolean(false);
150
151        LiteralNode imageToken = null;
152        for (SimpleToken token : tokens) {
153            // break if eol
154            if (token.getType().isEol()) {
155                break;
156            }
157
158            // create a node from the token
159            SimpleNode node = createNode(token, startSingle, startDouble, startFunction);
160            if (node != null) {
161                // keep state of last single/double
162                if (node instanceof SingleQuoteStart) {
163                    lastSingle = node;
164                } else if (node instanceof DoubleQuoteStart) {
165                    lastDouble = node;
166                } else if (node instanceof SimpleFunctionStart) {
167                    lastFunction = node;
168                }
169
170                // a new token was created so the current image token need to be added first
171                if (imageToken != null) {
172                    nodes.add(imageToken);
173                    imageToken = null;
174                }
175                // and then add the created node
176                nodes.add(node);
177                // continue to next
178                continue;
179            }
180
181            // if no token was created then its a character/whitespace/escaped symbol
182            // which we need to add together in the same image
183            if (imageToken == null) {
184                imageToken = new LiteralExpression(token);
185            }
186            imageToken.addText(token.getText());
187        }
188
189        // append any leftover image tokens (when we reached eol)
190        if (imageToken != null) {
191            nodes.add(imageToken);
192        }
193
194        // validate the single, double quote pairs and functions is in balance
195        if (startSingle.get()) {
196            int index = lastSingle != null ? lastSingle.getToken().getIndex() : 0;
197            throw new SimpleParserException("single quote has no ending quote", index);
198        }
199        if (startDouble.get()) {
200            int index = lastDouble != null ? lastDouble.getToken().getIndex() : 0;
201            throw new SimpleParserException("double quote has no ending quote", index);
202        }
203        if (startFunction.get()) {
204            // we have a start function, but no ending function
205            int index = lastFunction != null ? lastFunction.getToken().getIndex() : 0;
206            throw new SimpleParserException("function has no ending token", index);
207        }
208    }
209
210
211    /**
212     * Creates a node from the given token
213     *
214     * @param token         the token
215     * @param startSingle   state of single quoted blocks
216     * @param startDouble   state of double quoted blocks
217     * @param startFunction state of function blocks
218     * @return the created node, or <tt>null</tt> to let a default node be created instead.
219     */
220    private SimpleNode createNode(SimpleToken token, AtomicBoolean startSingle, AtomicBoolean startDouble,
221                                  AtomicBoolean startFunction) {
222        if (token.getType().isFunctionStart()) {
223            startFunction.set(true);
224            return new SimpleFunctionStart(token);
225        } else if (token.getType().isFunctionEnd()) {
226            startFunction.set(false);
227            return new SimpleFunctionEnd(token);
228        }
229
230        // if we are inside a function, then we do not support any other kind of tokens
231        // as we want all the tokens to be literal instead
232        if (startFunction.get()) {
233            return null;
234        }
235
236        // okay so far we also want to support quotes
237        if (token.getType().isSingleQuote()) {
238            SimpleNode answer;
239            boolean start = startSingle.get();
240            if (!start) {
241                answer = new SingleQuoteStart(token);
242            } else {
243                answer = new SingleQuoteEnd(token);
244            }
245            // flip state on start/end flag
246            startSingle.set(!start);
247            return answer;
248        } else if (token.getType().isDoubleQuote()) {
249            SimpleNode answer;
250            boolean start = startDouble.get();
251            if (!start) {
252                answer = new DoubleQuoteStart(token);
253            } else {
254                answer = new DoubleQuoteEnd(token);
255            }
256            // flip state on start/end flag
257            startDouble.set(!start);
258            return answer;
259        }
260
261        // if we are inside a quote, then we do not support any further kind of tokens
262        // as we want to only support embedded functions and all other kinds to be literal tokens
263        if (startSingle.get() || startDouble.get()) {
264            return null;
265        }
266
267        // okay we are not inside a function or quote, so we want to support operators
268        // and the special null value as well
269        if (token.getType().isUnary()) {
270            return new UnaryExpression(token);
271        } else if (token.getType().isBinary()) {
272            return new BinaryExpression(token);
273        } else if (token.getType().isLogical()) {
274            return new LogicalExpression(token);
275        } else if (token.getType().isNullValue()) {
276            return new NullExpression(token);
277        }
278
279        // by returning null, we will let the parser determine what to do
280        return null;
281    }
282
283    /**
284     * Removes any ignorable whitespace tokens.
285     * <p/>
286     * During the initial parsing (input -> tokens), then there may
287     * be excessive whitespace tokens, which can safely be removed,
288     * which makes the succeeding parsing easier.
289     */
290    private void removeIgnorableWhiteSpaceTokens() {
291        // white space can be removed if its not part of a quoted text or within function(s)
292        boolean quote = false;
293        int functionCount = 0;
294
295        Iterator<SimpleToken> it = tokens.iterator();
296        while (it.hasNext()) {
297            SimpleToken token = it.next();
298            if (token.getType().isSingleQuote()) {
299                quote = !quote;
300            } else if (!quote) {
301                if (token.getType().isFunctionStart()) {
302                    functionCount++;
303                } else if (token.getType().isFunctionEnd()) {
304                    functionCount--;
305                } else if (token.getType().isWhitespace() && functionCount == 0) {
306                    it.remove();
307                }
308            }
309        }
310    }
311
312    /**
313     * Prepares binary expressions.
314     * <p/>
315     * This process prepares the binary expressions in the AST. This is done
316     * by linking the binary operator with both the right and left hand side
317     * nodes, to have the AST graph updated and prepared properly.
318     * <p/>
319     * So when the AST node is later used to create the {@link Predicate}s
320     * to be used by Camel then the AST graph has a linked and prepared
321     * graph of nodes which represent the input expression.
322     */
323    private void prepareBinaryExpressions() {
324        Stack<SimpleNode> stack = new Stack<SimpleNode>();
325
326        SimpleNode left = null;
327        for (int i = 0; i < nodes.size(); i++) {
328            if (left == null) {
329                left = i > 0 ? nodes.get(i - 1) : null;
330            }
331            SimpleNode token = nodes.get(i);
332            SimpleNode right = i < nodes.size() - 1 ? nodes.get(i + 1) : null;
333
334            if (token instanceof BinaryExpression) {
335                BinaryExpression binary = (BinaryExpression) token;
336
337                // remember the binary operator
338                String operator = binary.getOperator().toString();
339
340                if (left == null) {
341                    throw new SimpleParserException("Binary operator " + operator + " has no left hand side token", token.getToken().getIndex());
342                }
343                if (!binary.acceptLeftNode(left)) {
344                    throw new SimpleParserException("Binary operator " + operator + " does not support left hand side token " + left.getToken(), token.getToken().getIndex());
345                }
346                if (right == null) {
347                    throw new SimpleParserException("Binary operator " + operator + " has no right hand side token", token.getToken().getIndex());
348                }
349                if (!binary.acceptRightNode(right)) {
350                    throw new SimpleParserException("Binary operator " + operator + " does not support right hand side token " + right.getToken(), token.getToken().getIndex());
351                }
352
353                // pop previous as we need to replace it with this binary operator
354                stack.pop();
355                stack.push(token);
356                // advantage after the right hand side
357                i++;
358                // this token is now the left for the next loop
359                left = token;
360            } else {
361                // clear left
362                left = null;
363                stack.push(token);
364            }
365        }
366
367        nodes.clear();
368        nodes.addAll(stack);
369    }
370
371    /**
372     * Prepares logical expressions.
373     * <p/>
374     * This process prepares the logical expressions in the AST. This is done
375     * by linking the logical operator with both the right and left hand side
376     * nodes, to have the AST graph updated and prepared properly.
377     * <p/>
378     * So when the AST node is later used to create the {@link Predicate}s
379     * to be used by Camel then the AST graph has a linked and prepared
380     * graph of nodes which represent the input expression.
381     */
382    private void prepareLogicalExpressions() {
383        Stack<SimpleNode> stack = new Stack<SimpleNode>();
384
385        SimpleNode left = null;
386        for (int i = 0; i < nodes.size(); i++) {
387            if (left == null) {
388                left = i > 0 ? nodes.get(i - 1) : null;
389            }
390            SimpleNode token = nodes.get(i);
391            SimpleNode right = i < nodes.size() - 1 ? nodes.get(i + 1) : null;
392
393            if (token instanceof LogicalExpression) {
394                LogicalExpression logical = (LogicalExpression) token;
395
396                // remember the logical operator
397                String operator = logical.getOperator().toString();
398
399                if (left == null) {
400                    throw new SimpleParserException("Logical operator " + operator + " has no left hand side token", token.getToken().getIndex());
401                }
402                if (!logical.acceptLeftNode(left)) {
403                    throw new SimpleParserException("Logical operator " + operator + " does not support left hand side token " + left.getToken(), token.getToken().getIndex());
404                }
405                if (right == null) {
406                    throw new SimpleParserException("Logical operator " + operator + " has no right hand side token", token.getToken().getIndex());
407                }
408                if (!logical.acceptRightNode(right)) {
409                    throw new SimpleParserException("Logical operator " + operator + " does not support right hand side token " + left.getToken(), token.getToken().getIndex());
410                }
411
412                // pop previous as we need to replace it with this binary operator
413                stack.pop();
414                stack.push(token);
415                // advantage after the right hand side
416                i++;
417                // this token is now the left for the next loop
418                left = token;
419            } else {
420                // clear left
421                left = null;
422                stack.push(token);
423            }
424        }
425
426        nodes.clear();
427        nodes.addAll(stack);
428    }
429
430    /**
431     * Creates the {@link Predicate}s from the AST nodes.
432     *
433     * @return the created {@link Predicate}s, is never <tt>null</tt>.
434     */
435    private List<Predicate> createPredicates() {
436        List<Predicate> answer = new ArrayList<Predicate>();
437        for (SimpleNode node : nodes) {
438            Expression exp = node.createExpression(expression);
439            if (exp != null) {
440                Predicate predicate = ExpressionToPredicateAdapter.toPredicate(exp);
441                answer.add(predicate);
442            }
443        }
444        return answer;
445    }
446
447    // --------------------------------------------------------------
448    // grammar
449    // --------------------------------------------------------------
450
451    // the predicate parser understands a lot more than the expression parser
452    // - boolean value = either true or false value (literal)
453    // - single quoted = block of nodes enclosed by single quotes
454    // - double quoted = block of nodes enclosed by double quotes
455    // - single quoted with functions = block of nodes enclosed by single quotes allowing embedded functions
456    // - double quoted with functions = block of nodes enclosed by double quotes allowing embedded functions
457    // - function = simple functions such as ${body} etc
458    // - numeric = numeric value
459    // - boolean = boolean value
460    // - null = null value
461    // - unary operator = operator attached to the left hand side node
462    // - binary operator = operator attached to both the left and right hand side nodes
463    // - logical operator = operator attached to both the left and right hand side nodes
464
465    protected boolean isBooleanValue() {
466        if (accept(TokenType.booleanValue)) {
467            return true;
468        }
469        return false;
470    }
471
472    protected boolean singleQuotedLiteralWithFunctionsText() {
473        if (accept(TokenType.singleQuote)) {
474            nextToken(TokenType.singleQuote, TokenType.eol, TokenType.functionStart, TokenType.functionEnd);
475            while (!token.getType().isSingleQuote() && !token.getType().isEol()) {
476                // we need to loop until we find the ending single quote, or the eol
477                nextToken(TokenType.singleQuote, TokenType.eol, TokenType.functionStart, TokenType.functionEnd);
478            }
479            expect(TokenType.singleQuote);
480            return true;
481        }
482        return false;
483    }
484
485    protected boolean singleQuotedLiteralText() {
486        if (accept(TokenType.singleQuote)) {
487            nextToken(TokenType.singleQuote, TokenType.eol);
488            while (!token.getType().isSingleQuote() && !token.getType().isEol()) {
489                // we need to loop until we find the ending single quote, or the eol
490                nextToken(TokenType.singleQuote, TokenType.eol);
491            }
492            expect(TokenType.singleQuote);
493            return true;
494        }
495        return false;
496    }
497
498    protected boolean doubleQuotedLiteralWithFunctionsText() {
499        if (accept(TokenType.doubleQuote)) {
500            nextToken(TokenType.doubleQuote, TokenType.eol, TokenType.functionStart, TokenType.functionEnd);
501            while (!token.getType().isDoubleQuote() && !token.getType().isEol()) {
502                // we need to loop until we find the ending double quote, or the eol
503                nextToken(TokenType.doubleQuote, TokenType.eol, TokenType.functionStart, TokenType.functionEnd);
504            }
505            expect(TokenType.doubleQuote);
506            return true;
507        }
508        return false;
509    }
510
511    protected boolean doubleQuotedLiteralText() {
512        if (accept(TokenType.doubleQuote)) {
513            nextToken(TokenType.doubleQuote, TokenType.eol);
514            while (!token.getType().isDoubleQuote() && !token.getType().isEol()) {
515                // we need to loop until we find the ending double quote, or the eol
516                nextToken(TokenType.doubleQuote, TokenType.eol);
517            }
518            expect(TokenType.doubleQuote);
519            return true;
520        }
521        return false;
522    }
523
524    protected boolean functionText() {
525        if (accept(TokenType.functionStart)) {
526            nextToken();
527            while (!token.getType().isFunctionEnd() && !token.getType().isEol()) {
528                if (token.getType().isFunctionStart()) {
529                    // embedded function
530                    functionText();
531                }
532                // we need to loop until we find the ending function quote, an embedded function, or the eol
533                nextToken();
534            }
535            // if its not an embedded function then we expect the end token
536            if (!token.getType().isFunctionStart()) {
537                expect(TokenType.functionEnd);
538            }
539            return true;
540        }
541        return false;
542    }
543
544    protected boolean unaryOperator() {
545        if (accept(TokenType.unaryOperator)) {
546            nextToken();
547            // there should be a whitespace after the operator
548            expect(TokenType.whiteSpace);
549            return true;
550        }
551        return false;
552    }
553
554    protected boolean binaryOperator() {
555        if (accept(TokenType.binaryOperator)) {
556            // remember the binary operator
557            BinaryOperatorType operatorType = BinaryOperatorType.asOperator(token.getText());
558
559            nextToken();
560            // there should be at least one whitespace after the operator
561            expectAndAcceptMore(TokenType.whiteSpace);
562
563            // okay a binary operator may not support all kind if preceding parameters, so we need to limit this
564            BinaryOperatorType.ParameterType[] types = BinaryOperatorType.supportedParameterTypes(operatorType);
565
566            // based on the parameter types the binary operator support, we need to set this state into
567            // the following booleans so we know how to proceed in the grammar
568            boolean literalWithFunctionsSupported = false;
569            boolean literalSupported = false;
570            boolean functionSupported = false;
571            boolean numericSupported = false;
572            boolean booleanSupported = false;
573            boolean nullSupported = false;
574            if (types == null || types.length == 0) {
575                literalWithFunctionsSupported = true;
576                // favor literal with functions over literals without functions
577                literalSupported = false;
578                functionSupported = true;
579                numericSupported = true;
580                booleanSupported = true;
581                nullSupported = true;
582            } else {
583                for (BinaryOperatorType.ParameterType parameterType : types) {
584                    literalSupported |= parameterType.isLiteralSupported();
585                    literalWithFunctionsSupported |= parameterType.isLiteralWithFunctionSupport();
586                    functionSupported |= parameterType.isFunctionSupport();
587                    nullSupported |= parameterType.isNumericValueSupported();
588                    booleanSupported |= parameterType.isBooleanValueSupported();
589                    nullSupported |= parameterType.isNullValueSupported();
590                }
591            }
592
593            // then we proceed in the grammar according to the parameter types supported by the given binary operator
594            //CHECKSTYLE:OFF
595            if ((literalWithFunctionsSupported && singleQuotedLiteralWithFunctionsText())
596                    || (literalWithFunctionsSupported && doubleQuotedLiteralWithFunctionsText())
597                    || (literalSupported && singleQuotedLiteralText())
598                    || (literalSupported && doubleQuotedLiteralText())
599                    || (functionSupported && functionText())
600                    || (numericSupported && numericValue())
601                    || (booleanSupported && booleanValue())
602                    || (nullSupported && nullValue())) {
603                // then after the right hand side value, there should be a whitespace if there is more tokens
604                nextToken();
605                if (!token.getType().isEol()) {
606                    expect(TokenType.whiteSpace);
607                }
608            } else {
609                throw new SimpleParserException("Binary operator " + operatorType + " does not support token " + token, token.getIndex());
610            }
611            //CHECKSTYLE:ON
612            return true;
613        }
614        return false;
615    }
616
617    protected boolean logicalOperator() {
618        if (accept(TokenType.logicalOperator)) {
619            // remember the logical operator
620            LogicalOperatorType operatorType = LogicalOperatorType.asOperator(token.getText());
621
622            nextToken();
623            // there should be at least one whitespace after the operator
624            expectAndAcceptMore(TokenType.whiteSpace);
625
626            // then we expect either some quoted text, another function, or a numeric, boolean or null value
627            if (singleQuotedLiteralWithFunctionsText()
628                    || doubleQuotedLiteralWithFunctionsText()
629                    || functionText()
630                    || numericValue()
631                    || booleanValue()
632                    || nullValue()) {
633                // then after the right hand side value, there should be a whitespace if there is more tokens
634                nextToken();
635                if (!token.getType().isEol()) {
636                    expect(TokenType.whiteSpace);
637                }
638            } else {
639                throw new SimpleParserException("Logical operator " + operatorType + " does not support token " + token, token.getIndex());
640            }
641            return true;
642        }
643        return false;
644    }
645
646    protected boolean numericValue() {
647        return accept(TokenType.numericValue);
648        // no other tokens to check so do not use nextToken
649    }
650
651    protected boolean booleanValue() {
652        return accept(TokenType.booleanValue);
653        // no other tokens to check so do not use nextToken
654    }
655
656    protected boolean nullValue() {
657        return accept(TokenType.nullValue);
658        // no other tokens to check so do not use nextToken
659    }
660
661}