001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.InputStream; 020import java.util.Iterator; 021import java.util.LinkedHashMap; 022import java.util.Map; 023import java.util.Scanner; 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026 027import org.apache.camel.Exchange; 028import org.apache.camel.language.simple.SimpleLanguage; 029import org.apache.camel.util.ObjectHelper; 030 031/** 032 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body 033 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token. 034 * <p/> 035 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream 036 * to access the message body. 037 * <p/> 038 * Can be used to split big XML files. 039 * <p/> 040 * This implementation supports inheriting namespaces from a parent/root tag. 041 * 042 * @deprecated use {@link TokenXMLExpressionIterator} instead. 043 */ 044@Deprecated 045public class TokenXMLPairExpressionIterator extends TokenPairExpressionIterator { 046 047 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)=\\\"(.*?)\\\""); 048 private static final String SCAN_TOKEN_REGEX = "(\\s+.*?|)>"; 049 private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "(.{1,15}?:|)"; 050 protected final String inheritNamespaceToken; 051 052 public TokenXMLPairExpressionIterator(String startToken, String endToken, String inheritNamespaceToken) { 053 super(startToken, endToken, true); 054 // namespace token is optional 055 this.inheritNamespaceToken = inheritNamespaceToken; 056 } 057 058 @Override 059 protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) { 060 String start = startToken; 061 if (SimpleLanguage.hasSimpleFunction(start)) { 062 start = SimpleLanguage.expression(start).evaluate(exchange, String.class); 063 } 064 String end = endToken; 065 if (SimpleLanguage.hasSimpleFunction(end)) { 066 end = SimpleLanguage.expression(end).evaluate(exchange, String.class); 067 } 068 String inherit = inheritNamespaceToken; 069 if (inherit != null && SimpleLanguage.hasSimpleFunction(inherit)) { 070 inherit = SimpleLanguage.expression(inherit).evaluate(exchange, String.class); 071 } 072 073 // must be XML tokens 074 if (!start.startsWith("<") || !start.endsWith(">")) { 075 throw new IllegalArgumentException("Start token must be a valid XML token, was: " + start); 076 } 077 if (!end.startsWith("<") || !end.endsWith(">")) { 078 throw new IllegalArgumentException("End token must be a valid XML token, was: " + end); 079 } 080 if (inherit != null && (!inherit.startsWith("<") || !inherit.endsWith(">"))) { 081 throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inherit); 082 } 083 084 XMLTokenPairIterator iterator = new XMLTokenPairIterator(start, end, inherit, in, charset); 085 iterator.init(); 086 return iterator; 087 } 088 089 /** 090 * Iterator to walk the input stream 091 */ 092 static class XMLTokenPairIterator extends TokenPairIterator { 093 094 private final Pattern startTokenPattern; 095 private final String scanEndToken; 096 private final String inheritNamespaceToken; 097 private Pattern inheritNamespaceTokenPattern; 098 private String rootTokenNamespaces; 099 100 XMLTokenPairIterator(String startToken, String endToken, String inheritNamespaceToken, InputStream in, String charset) { 101 super(startToken, endToken, true, in, charset); 102 103 // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns 104 StringBuilder tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX). 105 append(startToken.substring(1, startToken.length() - 1)).append(SCAN_TOKEN_REGEX); 106 this.startTokenPattern = Pattern.compile(tokenSb.toString()); 107 108 tokenSb = new StringBuilder("</").append(SCAN_TOKEN_NS_PREFIX_REGEX). 109 append(endToken.substring(2, endToken.length() - 1)).append(SCAN_TOKEN_REGEX); 110 this.scanEndToken = tokenSb.toString(); 111 112 this.inheritNamespaceToken = inheritNamespaceToken; 113 if (inheritNamespaceToken != null) { 114 // the inherit namespace token may itself have a namespace prefix 115 tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX). 116 append(inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)).append(SCAN_TOKEN_REGEX); 117 // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines 118 this.inheritNamespaceTokenPattern = Pattern.compile(tokenSb.toString(), Pattern.MULTILINE | Pattern.DOTALL); 119 } 120 } 121 122 @Override 123 void init() { 124 // use scan end token as delimiter which supports attributes/namespaces 125 this.scanner = new Scanner(in, charset).useDelimiter(scanEndToken); 126 // this iterator will do look ahead as we may have data 127 // after the last end token, which the scanner would find 128 // so we need to be one step ahead of the scanner 129 this.image = scanner.hasNext() ? (String) next(true) : null; 130 } 131 132 @Override 133 String getNext(boolean first) { 134 String next = scanner.next(); 135 if (next == null) { 136 return null; 137 } 138 139 // initialize inherited namespaces on first 140 if (first && inheritNamespaceToken != null) { 141 rootTokenNamespaces = getNamespacesFromNamespaceToken(next); 142 } 143 144 // make sure next is positioned at start token as we can have leading data 145 // or we reached EOL and there is no more start tags 146 Matcher matcher = startTokenPattern.matcher(next); 147 if (!matcher.find()) { 148 return null; 149 } else { 150 int index = matcher.start(); 151 next = next.substring(index); 152 } 153 154 // make sure the end tag matches the begin tag if the tag has a namespace prefix 155 String tag = ObjectHelper.before(next, ">"); 156 StringBuilder endTagSb = new StringBuilder("</"); 157 int firstSpaceIndex = tag.indexOf(" "); 158 if (firstSpaceIndex > 0) { 159 endTagSb.append(tag.substring(1, firstSpaceIndex)).append(">"); 160 } else { 161 endTagSb.append(tag.substring(1, tag.length())).append(">"); 162 } 163 164 // build answer accordingly to whether namespaces should be inherited or not 165 StringBuilder sb = new StringBuilder(); 166 if (inheritNamespaceToken != null && rootTokenNamespaces != null) { 167 // append root namespaces to local start token 168 // grab the text 169 String text = ObjectHelper.after(next, ">"); 170 // build result with inherited namespaces 171 next = sb.append(tag).append(rootTokenNamespaces).append(">").append(text).append(endTagSb.toString()).toString(); 172 } else { 173 next = sb.append(next).append(endTagSb.toString()).toString(); 174 } 175 176 return next; 177 } 178 179 private String getNamespacesFromNamespaceToken(String text) { 180 if (text == null) { 181 return null; 182 } 183 184 // grab the namespace tag 185 Matcher mat = inheritNamespaceTokenPattern.matcher(text); 186 if (mat.find()) { 187 text = mat.group(0); 188 } else { 189 // cannot find namespace tag 190 return null; 191 } 192 193 // find namespaces (there can be attributes mixed, so we should only grab the namespaces) 194 Map<String, String> namespaces = new LinkedHashMap<>(); 195 Matcher matcher = NAMESPACE_PATTERN.matcher(text); 196 while (matcher.find()) { 197 String prefix = matcher.group(1); 198 String url = matcher.group(2); 199 if (ObjectHelper.isEmpty(prefix)) { 200 prefix = "_DEFAULT_"; 201 } else { 202 // skip leading : 203 prefix = prefix.substring(1); 204 } 205 namespaces.put(prefix, url); 206 } 207 208 // did we find any namespaces 209 if (namespaces.isEmpty()) { 210 return null; 211 } 212 213 // build namespace String 214 StringBuilder sb = new StringBuilder(); 215 for (Map.Entry<String, String> entry : namespaces.entrySet()) { 216 String key = entry.getKey(); 217 String value = entry.getValue(); 218 if ("_DEFAULT_".equals(key)) { 219 sb.append(" xmlns=\"").append(value).append("\""); 220 } else { 221 sb.append(" xmlns:").append(key).append("=\"").append(value).append("\""); 222 } 223 } 224 225 return sb.toString(); 226 } 227 } 228 229}