001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.language.simple;
018
019import java.util.ArrayList;
020import java.util.List;
021import java.util.Stack;
022
023import org.apache.camel.language.simple.ast.Block;
024import org.apache.camel.language.simple.ast.BlockEnd;
025import org.apache.camel.language.simple.ast.BlockStart;
026import org.apache.camel.language.simple.ast.SimpleNode;
027import org.apache.camel.language.simple.ast.UnaryExpression;
028import org.apache.camel.language.simple.types.SimpleParserException;
029import org.apache.camel.language.simple.types.SimpleToken;
030import org.apache.camel.language.simple.types.SimpleTokenType;
031import org.apache.camel.language.simple.types.TokenType;
032
033/**
034 * Base class for Simple language parser.
035 * <p/>
036 * This parser is based on the principles of a
037 * <a href="http://en.wikipedia.org/wiki/Recursive_descent_parser">recursive descent parser</a>.
038 */
039public abstract class BaseSimpleParser {
040
041    protected final String expression;
042    protected final List<SimpleToken> tokens = new ArrayList<SimpleToken>();
043    protected final List<SimpleNode> nodes = new ArrayList<SimpleNode>();
044    protected SimpleToken token;
045    protected int previousIndex;
046    protected int index;
047    protected boolean allowEscape = true;
048
049    protected BaseSimpleParser(String expression, boolean allowEscape) {
050        this.expression = expression;
051        this.allowEscape = allowEscape;
052    }
053
054    /**
055     * Advances the parser position to the next known {@link SimpleToken}
056     * in the input.
057     */
058    protected void nextToken() {
059        if (index < expression.length()) {
060            SimpleToken next = SimpleTokenizer.nextToken(expression, index, allowEscape);
061            // add token
062            tokens.add(next);
063            token = next;
064            // position index after the token
065            previousIndex = index;
066            index += next.getLength();
067        } else {
068            // end of tokens
069            token = new SimpleToken(new SimpleTokenType(TokenType.eol, null), index);
070        }
071    }
072
073    /**
074     * Advances the parser position to the next known {@link SimpleToken}
075     * in the input.
076     *
077     * @param filter filter for accepted token types
078     */
079    protected void nextToken(TokenType... filter) {
080        if (index < expression.length()) {
081            SimpleToken next = SimpleTokenizer.nextToken(expression, index, allowEscape, filter);
082            // add token
083            tokens.add(next);
084            token = next;
085            // position index after the token
086            previousIndex = index;
087            index += next.getLength();
088        } else {
089            // end of tokens
090            token = new SimpleToken(new SimpleTokenType(TokenType.eol, null), index);
091        }
092    }
093
094    /**
095     * Clears the parser state, which means it can be used for parsing a new input.
096     */
097    protected void clear() {
098        token = null;
099        previousIndex = 0;
100        index = 0;
101        tokens.clear();
102        nodes.clear();
103    }
104
105    /**
106     * Prepares blocks, such as functions, single or double quoted texts.
107     * <p/>
108     * This process prepares the {@link Block}s in the AST. This is done
109     * by linking child {@link SimpleNode nodes} which are within the start and end of the blocks,
110     * as child to the given block. This is done to have the AST graph updated and prepared properly.
111     * <p/>
112     * So when the AST node is later used to create the {@link org.apache.camel.Predicate}s
113     * or {@link org.apache.camel.Expression}s to be used by Camel then the AST graph
114     * has a linked and prepared graph of nodes which represent the input expression.
115     */
116    protected void prepareBlocks() {
117        List<SimpleNode> answer = new ArrayList<SimpleNode>();
118        Stack<Block> stack = new Stack<Block>();
119
120        for (SimpleNode token : nodes) {
121            if (token instanceof BlockStart) {
122                // a new block is started, so push on the stack
123                stack.push((Block) token);
124            } else if (token instanceof BlockEnd) {
125                // end block is just an abstract mode, so we should not add it
126                if (stack.isEmpty()) {
127                    throw new SimpleParserException(token.getToken().getType().getType() + " has no matching start token", token.getToken().getIndex());
128                }
129
130                Block top = stack.pop();
131                // if there is a block on the stack then it should accept the child token
132                Block block = stack.isEmpty() ? null : stack.peek();
133                if (block != null) {
134                    if (!block.acceptAndAddNode(top)) {
135                        throw new SimpleParserException(block.getToken().getType() + " cannot accept " + token.getToken().getType(), token.getToken().getIndex());
136                    }
137                } else {
138                    // no block, so add to answer
139                    answer.add(top);
140                }
141            } else {
142                // if there is a block on the stack then it should accept the child token
143                Block block = stack.isEmpty() ? null : stack.peek();
144                if (block != null) {
145                    if (!block.acceptAndAddNode(token)) {
146                        throw new SimpleParserException(block.getToken().getType() + " cannot accept " + token.getToken().getType(), token.getToken().getIndex());
147                    }
148                } else {
149                    // no block, so add to answer
150                    answer.add(token);
151                }
152            }
153        }
154
155        // replace nodes from the stack
156        nodes.clear();
157        nodes.addAll(answer);
158    }
159
160    /**
161     * Prepares unary expressions.
162     * <p/>
163     * This process prepares the unary expressions in the AST. This is done
164     * by linking the unary operator with the left hand side node,
165     * to have the AST graph updated and prepared properly.
166     * <p/>
167     * So when the AST node is later used to create the {@link org.apache.camel.Predicate}s
168     * or {@link org.apache.camel.Expression}s to be used by Camel then the AST graph
169     * has a linked and prepared graph of nodes which represent the input expression.
170     */
171    protected void prepareUnaryExpressions() {
172        Stack<SimpleNode> stack = new Stack<SimpleNode>();
173
174        for (SimpleNode node : nodes) {
175            if (node instanceof UnaryExpression) {
176                UnaryExpression token = (UnaryExpression) node;
177
178                // remember the logical operator
179                String operator = token.getOperator().toString();
180
181                SimpleNode previous = stack.isEmpty() ? null : stack.pop();
182                if (previous == null) {
183                    throw new SimpleParserException("Unary operator " + operator + " has no left hand side token", token.getToken().getIndex());
184                } else {
185                    token.acceptLeft(previous);
186                }
187            }
188            stack.push(node);
189        }
190
191        // replace nodes from the stack
192        nodes.clear();
193        nodes.addAll(stack);
194    }
195
196    // --------------------------------------------------------------
197    // grammar
198    // --------------------------------------------------------------
199
200    /**
201     * Accept the given token.
202     * <p/>
203     * This is to be used by the grammar to accept tokens and then continue parsing
204     * using the grammar, such as a function grammar.
205     *
206     * @param accept  the token
207     * @return <tt>true</tt> if accepted, <tt>false</tt> otherwise.
208     */
209    protected boolean accept(TokenType accept) {
210        return token == null || token.getType().getType() == accept;
211    }
212
213    /**
214     * Expect a given token
215     *
216     * @param expect the token to expect
217     * @throws SimpleParserException is thrown if the token is not as expected
218     */
219    protected void expect(TokenType expect) throws SimpleParserException {
220        if (token != null && token.getType().getType() == expect) {
221            return;
222        } else if (token == null) {
223            // use the previous index as that is where the problem is
224            throw new SimpleParserException("expected symbol " + expect + " but reached eol", previousIndex);
225        } else {
226            // use the previous index as that is where the problem is
227            throw new SimpleParserException("expected symbol " + expect + " but was " + token.getType().getType(), previousIndex);
228        }
229    }
230
231    /**
232     * Expect and accept a given number of tokens in sequence.
233     * <p/>
234     * This is used to accept whitespace or string literals.
235     *
236     * @param expect the token to accept
237     */
238    protected void expectAndAcceptMore(TokenType expect) {
239        expect(expect);
240
241        while (!token.getType().isEol() && token.getType().getType() == expect) {
242            nextToken();
243        }
244    }
245
246}