001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.Closeable; 020import java.io.IOException; 021import java.io.InputStream; 022import java.text.MessageFormat; 023import java.util.ArrayList; 024import java.util.Iterator; 025import java.util.LinkedHashMap; 026import java.util.List; 027import java.util.Map; 028import java.util.Scanner; 029import java.util.regex.MatchResult; 030import java.util.regex.Matcher; 031import java.util.regex.Pattern; 032 033import org.apache.camel.Exchange; 034import org.apache.camel.InvalidPayloadException; 035import org.apache.camel.language.simple.SimpleLanguage; 036import org.apache.camel.util.IOHelper; 037import org.apache.camel.util.ObjectHelper; 038 039/** 040 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body 041 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token, 042 * where the end token corresponds implicitly to either the end tag or the self-closing start tag. 043 * <p/> 044 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream 045 * to access the message body. 046 * <p/> 047 * Can be used to split big XML files. 048 * <p/> 049 * This implementation supports inheriting namespaces from a parent/root tag. 050 */ 051public class TokenXMLExpressionIterator extends ExpressionAdapter { 052 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")"); 053 private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)"; 054 private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>"; 055 private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>"; 056 private static final String OPTION_WRAP_TOKEN = "<*>"; 057 058 protected final String tagToken; 059 protected final String inheritNamespaceToken; 060 061 public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) { 062 ObjectHelper.notEmpty(tagToken, "tagToken"); 063 this.tagToken = tagToken; 064 // namespace token is optional 065 this.inheritNamespaceToken = inheritNamespaceToken; 066 } 067 068 protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) { 069 String tag = tagToken; 070 if (SimpleLanguage.hasSimpleFunction(tag)) { 071 tag = SimpleLanguage.expression(tag).evaluate(exchange, String.class); 072 } 073 String inherit = inheritNamespaceToken; 074 if (inherit != null && SimpleLanguage.hasSimpleFunction(inherit)) { 075 inherit = SimpleLanguage.expression(inherit).evaluate(exchange, String.class); 076 } 077 078 // must be XML tokens 079 if (!tag.startsWith("<")) { 080 tag = "<" + tag; 081 } 082 if (!tag.endsWith(">")) { 083 tag = tag + ">"; 084 } 085 086 if (inherit != null) { 087 if (!inherit.startsWith("<")) { 088 inherit = "<" + inherit; 089 } 090 if (!inherit.endsWith(">")) { 091 inherit = inherit + ">"; 092 } 093 } 094 095 // must be XML tokens 096 if (!tag.startsWith("<") || !tag.endsWith(">")) { 097 throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tag); 098 } 099 if (inherit != null && (!inherit.startsWith("<") || !inherit.endsWith(">"))) { 100 throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inherit); 101 } 102 103 XMLTokenIterator iterator = new XMLTokenIterator(tag, inherit, in, charset); 104 iterator.init(); 105 return iterator; 106 } 107 108 @Override 109 public boolean matches(Exchange exchange) { 110 // as a predicate we must close the stream, as we do not return an iterator that can be used 111 // afterwards to iterate the input stream 112 Object value = doEvaluate(exchange, true); 113 return ObjectHelper.evaluateValuePredicate(value); 114 } 115 116 @Override 117 public Object evaluate(Exchange exchange) { 118 // as we return an iterator to access the input stream, we should not close it 119 return doEvaluate(exchange, false); 120 } 121 122 /** 123 * Strategy to evaluate the exchange 124 * 125 * @param exchange the exchange 126 * @param closeStream whether to close the stream before returning from this method. 127 * @return the evaluated value 128 */ 129 protected Object doEvaluate(Exchange exchange, boolean closeStream) { 130 InputStream in = null; 131 try { 132 in = exchange.getIn().getMandatoryBody(InputStream.class); 133 // we may read from a file, and want to support custom charset defined on the exchange 134 String charset = IOHelper.getCharsetName(exchange); 135 return createIterator(exchange, in, charset); 136 } catch (InvalidPayloadException e) { 137 exchange.setException(e); 138 // must close input stream 139 IOHelper.close(in); 140 return null; 141 } finally { 142 if (closeStream) { 143 IOHelper.close(in); 144 } 145 } 146 } 147 148 /** 149 * Iterator to walk the input stream 150 */ 151 static class XMLTokenIterator implements Iterator<Object>, Closeable { 152 final String tagToken; 153 final InputStream in; 154 final String charset; 155 Scanner scanner; 156 Object image; 157 158 private final Pattern tagTokenPattern; 159 private final String inheritNamespaceToken; 160 private final boolean wrapToken; 161 private Pattern inheritNamespaceTokenPattern; 162 private String rootTokenNamespaces; 163 private String wrapHead; 164 private String wrapTail; 165 166 XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) { 167 this.tagToken = tagToken; 168 this.charset = charset; 169 170 // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns 171 this.tagTokenPattern = 172 Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, 173 SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), 174 Pattern.MULTILINE | Pattern.DOTALL); 175 176 this.inheritNamespaceToken = inheritNamespaceToken; 177 if (inheritNamespaceToken != null && OPTION_WRAP_TOKEN.equals(inheritNamespaceToken)) { 178 this.wrapToken = true; 179 this.in = new RecordableInputStream(in, charset); 180 } else { 181 this.wrapToken = false; 182 this.in = in; 183 if (inheritNamespaceToken != null) { 184 // the inherit namespace token may itself have a namespace prefix 185 // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines 186 this.inheritNamespaceTokenPattern = 187 Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE, 188 SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 189 Pattern.MULTILINE | Pattern.DOTALL); 190 } 191 } 192 } 193 194 void init() { 195 // use a scanner with the default delimiter 196 this.scanner = new Scanner(in, charset); 197 this.image = scanner.hasNext() ? (String) next(true) : null; 198 } 199 200 String getNext(boolean first) { 201 // initialize inherited namespaces on first 202 if (first && inheritNamespaceToken != null && !wrapToken) { 203 rootTokenNamespaces = getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0)); 204 } 205 206 String next = scanner.findWithinHorizon(tagTokenPattern, 0); 207 if (next == null) { 208 return null; 209 } 210 if (first && wrapToken) { 211 MatchResult mres = scanner.match(); 212 wrapHead = ((RecordableInputStream)in).getText(mres.start()); 213 wrapTail = buildXMLTail(wrapHead); 214 } 215 216 // build answer accordingly to whether namespaces should be inherited or not 217 if (inheritNamespaceToken != null && rootTokenNamespaces != null) { 218 // REVISIT should skip the prefixes that are declared within the child itself. 219 String head = ObjectHelper.before(next, ">"); 220 boolean empty = false; 221 if (head.endsWith("/")) { 222 head = head.substring(0, head.length() - 1); 223 empty = true; 224 } 225 StringBuilder sb = new StringBuilder(); 226 // append root namespaces to local start token 227 // grab the text 228 String tail = ObjectHelper.after(next, ">"); 229 // build result with inherited namespaces 230 next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString(); 231 } else if (wrapToken) { 232 // wrap the token 233 StringBuilder sb = new StringBuilder(); 234 next = sb.append(wrapHead).append(next).append(wrapTail).toString(); 235 } 236 237 return next; 238 } 239 240 private String getNamespacesFromNamespaceToken(String text) { 241 if (text == null) { 242 return null; 243 } 244 245 // find namespaces (there can be attributes mixed, so we should only grab the namespaces) 246 Map<String, String> namespaces = new LinkedHashMap<>(); 247 Matcher matcher = NAMESPACE_PATTERN.matcher(text); 248 while (matcher.find()) { 249 String prefix = matcher.group(1); 250 String url = matcher.group(2); 251 if (ObjectHelper.isEmpty(prefix)) { 252 prefix = "_DEFAULT_"; 253 } else { 254 // skip leading : 255 prefix = prefix.substring(1); 256 } 257 namespaces.put(prefix, url); 258 } 259 260 // did we find any namespaces 261 if (namespaces.isEmpty()) { 262 return null; 263 } 264 265 // build namespace String 266 StringBuilder sb = new StringBuilder(); 267 for (Map.Entry<String, String> entry : namespaces.entrySet()) { 268 String key = entry.getKey(); 269 // note the value is already quoted 270 String value = entry.getValue(); 271 if ("_DEFAULT_".equals(key)) { 272 sb.append(" xmlns=").append(value); 273 } else { 274 sb.append(" xmlns:").append(key).append("=").append(value); 275 } 276 } 277 278 return sb.toString(); 279 } 280 281 @Override 282 public boolean hasNext() { 283 return image != null; 284 } 285 286 @Override 287 public Object next() { 288 return next(false); 289 } 290 291 Object next(boolean first) { 292 Object answer = image; 293 // calculate next 294 if (scanner.hasNext()) { 295 image = getNext(first); 296 } else { 297 image = null; 298 } 299 300 if (answer == null) { 301 // first time the image may be null 302 answer = image; 303 } 304 return answer; 305 } 306 307 @Override 308 public void remove() { 309 // noop 310 } 311 312 @Override 313 public void close() throws IOException { 314 scanner.close(); 315 } 316 317 } 318 319 private static String buildXMLTail(String xmlhead) { 320 // assume the input text is a portion of a well-formed xml 321 List<String> tags = new ArrayList<>(); 322 int p = 0; 323 while (p < xmlhead.length()) { 324 p = xmlhead.indexOf('<', p); 325 if (p < 0) { 326 break; 327 } 328 int nc = xmlhead.charAt(p + 1); 329 if (nc == '?') { 330 p++; 331 continue; 332 } else if (nc == '/') { 333 p++; 334 tags.remove(tags.size() - 1); 335 } else { 336 final int ep = xmlhead.indexOf('>', p); 337 if (xmlhead.charAt(ep - 1) == '/') { 338 p++; 339 continue; 340 } 341 final int sp = xmlhead.substring(p, ep).indexOf(' '); 342 tags.add(xmlhead.substring(p + 1, sp > 0 ? p + sp : ep)); 343 p = ep; 344 } 345 } 346 StringBuilder sb = new StringBuilder(); 347 for (int i = tags.size() - 1; i >= 0; i--) { 348 sb.append("</").append(tags.get(i)).append(">"); 349 } 350 return sb.toString(); 351 } 352}