001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.io.InputStream;
022import java.text.MessageFormat;
023import java.util.ArrayList;
024import java.util.Iterator;
025import java.util.LinkedHashMap;
026import java.util.List;
027import java.util.Map;
028import java.util.Scanner;
029import java.util.regex.MatchResult;
030import java.util.regex.Matcher;
031import java.util.regex.Pattern;
032
033import org.apache.camel.Exchange;
034import org.apache.camel.InvalidPayloadException;
035import org.apache.camel.language.simple.SimpleLanguage;
036import org.apache.camel.util.IOHelper;
037import org.apache.camel.util.ObjectHelper;
038
039/**
040 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body
041 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token,
042 * where the end token corresponds implicitly to either the end tag or the self-closing start tag.
043 * <p/>
044 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream
045 * to access the message body.
046 * <p/>
047 * Can be used to split big XML files.
048 * <p/>
049 * This implementation supports inheriting namespaces from a parent/root tag.
050 */
051public class TokenXMLExpressionIterator extends ExpressionAdapter {
052    private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")");
053    private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)";
054    private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>";
055    private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>";
056    private static final String OPTION_WRAP_TOKEN = "<*>";
057
058    protected final String tagToken;
059    protected final String inheritNamespaceToken;
060
061    public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) {
062        ObjectHelper.notEmpty(tagToken, "tagToken");
063        this.tagToken = tagToken;
064        // namespace token is optional
065        this.inheritNamespaceToken = inheritNamespaceToken;
066    }
067
068    protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) {
069        String tag = tagToken;
070        if (SimpleLanguage.hasSimpleFunction(tag)) {
071            tag = SimpleLanguage.expression(tag).evaluate(exchange, String.class);
072        }
073        String inherit = inheritNamespaceToken;
074        if (inherit != null && SimpleLanguage.hasSimpleFunction(inherit)) {
075            inherit = SimpleLanguage.expression(inherit).evaluate(exchange, String.class);
076        }
077
078        // must be XML tokens
079        if (!tag.startsWith("<")) {
080            tag = "<" + tag;
081        }
082        if (!tag.endsWith(">")) {
083            tag = tag + ">";
084        }
085
086        if (inherit != null) {
087            if (!inherit.startsWith("<")) {
088                inherit = "<" + inherit;
089            }
090            if (!inherit.endsWith(">")) {
091                inherit = inherit + ">";
092            }
093        }
094
095        // must be XML tokens
096        if (!tag.startsWith("<") || !tag.endsWith(">")) {
097            throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tag);
098        }
099        if (inherit != null && (!inherit.startsWith("<") || !inherit.endsWith(">"))) {
100            throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inherit);
101        }
102
103        XMLTokenIterator iterator = new XMLTokenIterator(tag, inherit, in, charset);
104        iterator.init();
105        return iterator;
106    }
107
108    @Override
109    public boolean matches(Exchange exchange) {
110        // as a predicate we must close the stream, as we do not return an iterator that can be used
111        // afterwards to iterate the input stream
112        Object value = doEvaluate(exchange, true);
113        return ObjectHelper.evaluateValuePredicate(value);
114    }
115
116    @Override
117    public Object evaluate(Exchange exchange) {
118        // as we return an iterator to access the input stream, we should not close it
119        return doEvaluate(exchange, false);
120    }
121
122    /**
123     * Strategy to evaluate the exchange
124     *
125     * @param exchange   the exchange
126     * @param closeStream whether to close the stream before returning from this method.
127     * @return the evaluated value
128     */
129    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
130        InputStream in = null;
131        try {
132            in = exchange.getIn().getMandatoryBody(InputStream.class);
133            // we may read from a file, and want to support custom charset defined on the exchange
134            String charset = IOHelper.getCharsetName(exchange);
135            return createIterator(exchange, in, charset);
136        } catch (InvalidPayloadException e) {
137            exchange.setException(e);
138            // must close input stream
139            IOHelper.close(in);
140            return null;
141        } finally {
142            if (closeStream) {
143                IOHelper.close(in);
144            }
145        }
146    }
147    
148    /**
149     * Iterator to walk the input stream
150     */
151    static class XMLTokenIterator implements Iterator<Object>, Closeable {
152        final String tagToken;
153        final InputStream in;
154        final String charset;
155        Scanner scanner;
156        Object image;
157
158        private final Pattern tagTokenPattern;
159        private final String inheritNamespaceToken;
160        private final boolean wrapToken;
161        private Pattern inheritNamespaceTokenPattern;
162        private String rootTokenNamespaces;
163        private String wrapHead;
164        private String wrapTail;
165
166        XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) {
167            this.tagToken = tagToken;
168            this.charset = charset;
169          
170            // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns
171            this.tagTokenPattern = 
172                Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, 
173                                                     SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), 
174                                                     Pattern.MULTILINE | Pattern.DOTALL);
175            
176            this.inheritNamespaceToken = inheritNamespaceToken;
177            if (inheritNamespaceToken != null && OPTION_WRAP_TOKEN.equals(inheritNamespaceToken)) {
178                this.wrapToken = true;
179                this.in = new RecordableInputStream(in, charset);
180            } else {
181                this.wrapToken = false;
182                this.in = in;
183                if (inheritNamespaceToken != null) {
184                    // the inherit namespace token may itself have a namespace prefix
185                    // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
186                    this.inheritNamespaceTokenPattern = 
187                        Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE,
188                                                             SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 
189                                                             Pattern.MULTILINE | Pattern.DOTALL);
190                }
191            }
192        }
193
194        void init() {
195            // use a scanner with the default delimiter
196            this.scanner = new Scanner(in, charset);
197            this.image = scanner.hasNext() ? (String) next(true) : null;
198        }
199
200        String getNext(boolean first) {
201            // initialize inherited namespaces on first
202            if (first && inheritNamespaceToken != null && !wrapToken) {
203                rootTokenNamespaces =  getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0));
204            }
205
206            String next = scanner.findWithinHorizon(tagTokenPattern, 0);
207            if (next == null) {
208                return null;
209            }
210            if (first && wrapToken) {
211                MatchResult mres = scanner.match();
212                wrapHead = ((RecordableInputStream)in).getText(mres.start());
213                wrapTail = buildXMLTail(wrapHead);
214            }
215
216            // build answer accordingly to whether namespaces should be inherited or not
217            if (inheritNamespaceToken != null && rootTokenNamespaces != null) {
218                // REVISIT should skip the prefixes that are declared within the child itself.
219                String head = ObjectHelper.before(next, ">");
220                boolean empty = false;
221                if (head.endsWith("/")) {
222                    head = head.substring(0, head.length() - 1);
223                    empty = true;
224                }
225                StringBuilder sb = new StringBuilder();
226                // append root namespaces to local start token
227                // grab the text
228                String tail = ObjectHelper.after(next, ">");
229                // build result with inherited namespaces
230                next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString();
231            } else if (wrapToken) {
232                // wrap the token
233                StringBuilder sb = new StringBuilder();
234                next = sb.append(wrapHead).append(next).append(wrapTail).toString();
235            }
236            
237            return next;
238        }
239
240        private String getNamespacesFromNamespaceToken(String text) {
241            if (text == null) {
242                return null;
243            }
244
245            // find namespaces (there can be attributes mixed, so we should only grab the namespaces)
246            Map<String, String> namespaces = new LinkedHashMap<>();
247            Matcher matcher = NAMESPACE_PATTERN.matcher(text);
248            while (matcher.find()) {
249                String prefix = matcher.group(1);
250                String url = matcher.group(2);
251                if (ObjectHelper.isEmpty(prefix)) {
252                    prefix = "_DEFAULT_";
253                } else {
254                    // skip leading :
255                    prefix = prefix.substring(1);
256                }
257                namespaces.put(prefix, url);
258            }
259
260            // did we find any namespaces
261            if (namespaces.isEmpty()) {
262                return null;
263            }
264
265            // build namespace String
266            StringBuilder sb = new StringBuilder();
267            for (Map.Entry<String, String> entry : namespaces.entrySet()) {
268                String key = entry.getKey();
269                // note the value is already quoted
270                String value = entry.getValue();
271                if ("_DEFAULT_".equals(key)) {
272                    sb.append(" xmlns=").append(value);
273                } else {
274                    sb.append(" xmlns:").append(key).append("=").append(value);
275                }
276            }
277
278            return sb.toString();
279        }
280        
281        @Override
282        public boolean hasNext() {
283            return image != null;
284        }
285
286        @Override
287        public Object next() {
288            return next(false);
289        }
290
291        Object next(boolean first) {
292            Object answer = image;
293            // calculate next
294            if (scanner.hasNext()) {
295                image = getNext(first);
296            } else {
297                image = null;
298            }
299
300            if (answer == null) {
301                // first time the image may be null
302                answer = image;
303            }
304            return answer;
305        }
306
307        @Override
308        public void remove() {
309            // noop
310        }
311
312        @Override
313        public void close() throws IOException {
314            scanner.close();
315        }
316
317    }
318
319    private static String buildXMLTail(String xmlhead) {
320        // assume the input text is a portion of a well-formed xml
321        List<String> tags = new ArrayList<>();
322        int p = 0;
323        while (p < xmlhead.length()) {
324            p = xmlhead.indexOf('<', p);
325            if (p < 0) {
326                break;
327            }
328            int nc = xmlhead.charAt(p + 1); 
329            if (nc == '?') {
330                p++;
331                continue;
332            } else if (nc == '/') {
333                p++;
334                tags.remove(tags.size() - 1);
335            } else {
336                final int ep = xmlhead.indexOf('>', p);
337                if (xmlhead.charAt(ep - 1) == '/') {
338                    p++;
339                    continue;
340                }
341                final int sp = xmlhead.substring(p, ep).indexOf(' ');
342                tags.add(xmlhead.substring(p + 1, sp > 0 ? p + sp : ep));
343                p = ep;
344            }
345        }
346        StringBuilder sb = new StringBuilder();
347        for (int i = tags.size() - 1; i >= 0; i--) {
348            sb.append("</").append(tags.get(i)).append(">");
349        }
350        return sb.toString();
351    }
352}