001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.util; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.FileInputStream; 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.InputStreamReader; 026import java.io.StringReader; 027import java.nio.CharBuffer; 028import java.nio.channels.Channels; 029import java.nio.channels.ReadableByteChannel; 030import java.nio.charset.Charset; 031import java.nio.charset.CharsetDecoder; 032import java.nio.charset.IllegalCharsetNameException; 033import java.nio.charset.UnsupportedCharsetException; 034import java.util.InputMismatchException; 035import java.util.Iterator; 036import java.util.LinkedHashMap; 037import java.util.Map; 038import java.util.Map.Entry; 039import java.util.NoSuchElementException; 040import java.util.Objects; 041import java.util.concurrent.locks.Lock; 042import java.util.concurrent.locks.ReentrantLock; 043import java.util.regex.Matcher; 044import java.util.regex.Pattern; 045 046import static org.apache.camel.util.BufferCaster.cast; 047 048public final class Scanner implements Iterator<String>, Closeable { 049 050 static { 051 WHITESPACE_PATTERN = Pattern.compile("\\s+"); 052 FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 053 } 054 055 private static final Lock LOCK = new ReentrantLock(); 056 private static final Map<String, Pattern> CACHE = new LinkedHashMap<>() { 057 @Override 058 protected boolean removeEldestEntry(Entry<String, Pattern> eldest) { 059 return size() >= 7; 060 } 061 }; 062 063 private static final Pattern WHITESPACE_PATTERN; 064 065 private static final Pattern FIND_ANY_PATTERN; 066 067 private static final int BUFFER_SIZE = 1024; 068 069 private final Readable source; 070 private final Pattern delimPattern; 071 private final Matcher matcher; 072 private CharBuffer buf; 073 private int position; 074 private boolean inputExhausted; 075 private boolean needInput; 076 private boolean skipped; 077 private int savedPosition = -1; 078 private boolean closed; 079 private IOException lastIOException; 080 081 public Scanner(InputStream source, String charsetName, String pattern) { 082 this(new InputStreamReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), cachePattern(pattern)); 083 } 084 085 public Scanner(File source, String charsetName, String pattern) throws FileNotFoundException { 086 this(new FileInputStream(Objects.requireNonNull(source, "source")).getChannel(), charsetName, pattern); 087 } 088 089 public Scanner(String source, String pattern) { 090 this(new StringReader(Objects.requireNonNull(source, "source")), cachePattern(pattern)); 091 } 092 093 public Scanner(String source, Pattern pattern) { 094 this(new StringReader(Objects.requireNonNull(source, "source")), pattern); 095 } 096 097 public Scanner(ReadableByteChannel source, String charsetName, String pattern) { 098 this(Channels.newReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName), -1), cachePattern(pattern)); 099 } 100 101 public Scanner(Readable source, String pattern) { 102 this(Objects.requireNonNull(source, "source"), cachePattern(pattern)); 103 } 104 105 private Scanner(Readable source, Pattern pattern) { 106 this.source = source; 107 delimPattern = pattern != null ? pattern : WHITESPACE_PATTERN; 108 buf = CharBuffer.allocate(BUFFER_SIZE); 109 cast(buf).limit(0); 110 matcher = delimPattern.matcher(buf); 111 matcher.useTransparentBounds(true); 112 matcher.useAnchoringBounds(false); 113 } 114 115 private static CharsetDecoder toDecoder(String charsetName) { 116 try { 117 Charset cs = charsetName != null ? Charset.forName(charsetName) : Charset.defaultCharset(); 118 return cs.newDecoder(); 119 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { 120 throw new IllegalArgumentException(e); 121 } 122 } 123 124 @Override 125 public boolean hasNext() { 126 if (closed) { 127 return false; 128 } 129 saveState(); 130 while (!inputExhausted) { 131 if (hasTokenInBuffer()) { 132 revertState(); 133 return true; 134 } 135 readMore(); 136 } 137 boolean result = hasTokenInBuffer(); 138 revertState(); 139 return result; 140 } 141 142 @Override 143 public String next() { 144 checkClosed(); 145 while (true) { 146 String token = getCompleteTokenInBuffer(); 147 if (token != null) { 148 skipped = false; 149 return token; 150 } 151 if (needInput) { 152 readMore(); 153 } else { 154 throwFor(); 155 } 156 } 157 } 158 159 public String getDelim() { 160 return delimPattern.pattern(); 161 } 162 163 private void saveState() { 164 savedPosition = position; 165 } 166 167 private void revertState() { 168 position = savedPosition; 169 savedPosition = -1; 170 skipped = false; 171 } 172 173 private void readMore() { 174 if (buf.limit() == buf.capacity()) { 175 expandBuffer(); 176 } 177 int p = buf.position(); 178 cast(buf).position(buf.limit()); 179 cast(buf).limit(buf.capacity()); 180 int n; 181 try { 182 n = source.read(buf); 183 } catch (IOException ioe) { 184 lastIOException = ioe; 185 n = -1; 186 } 187 if (n == -1) { 188 inputExhausted = true; 189 needInput = false; 190 } else if (n > 0) { 191 needInput = false; 192 } 193 cast(buf).limit(buf.position()); 194 cast(buf).position(p); 195 } 196 197 private void expandBuffer() { 198 int offset = savedPosition == -1 ? position : savedPosition; 199 cast(buf).position(offset); 200 if (offset > 0) { 201 buf.compact(); 202 translateSavedIndexes(offset); 203 position -= offset; 204 cast(buf).flip(); 205 } else { 206 int newSize = buf.capacity() * 2; 207 CharBuffer newBuf = CharBuffer.allocate(newSize); 208 newBuf.put(buf); 209 cast(newBuf).flip(); 210 translateSavedIndexes(offset); 211 position -= offset; 212 buf = newBuf; 213 matcher.reset(buf); 214 } 215 } 216 217 private void translateSavedIndexes(int offset) { 218 if (savedPosition != -1) { 219 savedPosition -= offset; 220 } 221 } 222 223 private void throwFor() { 224 skipped = false; 225 if (inputExhausted && position == buf.limit()) { 226 throw new NoSuchElementException(); 227 } else { 228 throw new InputMismatchException(); 229 } 230 } 231 232 private boolean hasTokenInBuffer() { 233 matcher.usePattern(delimPattern); 234 matcher.region(position, buf.limit()); 235 if (matcher.lookingAt()) { 236 position = matcher.end(); 237 } 238 return position != buf.limit(); 239 } 240 241 private String getCompleteTokenInBuffer() { 242 matcher.usePattern(delimPattern); 243 if (!skipped) { 244 matcher.region(position, buf.limit()); 245 if (matcher.lookingAt()) { 246 if (matcher.hitEnd() && !inputExhausted) { 247 needInput = true; 248 return null; 249 } 250 skipped = true; 251 position = matcher.end(); 252 } 253 } 254 if (position == buf.limit()) { 255 if (inputExhausted) { 256 return null; 257 } 258 needInput = true; 259 return null; 260 } 261 matcher.region(position, buf.limit()); 262 boolean foundNextDelim = matcher.find(); 263 if (foundNextDelim && matcher.end() == position) { 264 foundNextDelim = matcher.find(); 265 } 266 if (foundNextDelim) { 267 if (matcher.requireEnd() && !inputExhausted) { 268 needInput = true; 269 return null; 270 } 271 int tokenEnd = matcher.start(); 272 matcher.usePattern(FIND_ANY_PATTERN); 273 matcher.region(position, tokenEnd); 274 if (matcher.matches()) { 275 String s = matcher.group(); 276 position = matcher.end(); 277 return s; 278 } else { 279 return null; 280 } 281 } 282 if (inputExhausted) { 283 matcher.usePattern(FIND_ANY_PATTERN); 284 matcher.region(position, buf.limit()); 285 if (matcher.matches()) { 286 String s = matcher.group(); 287 position = matcher.end(); 288 return s; 289 } 290 return null; 291 } 292 needInput = true; 293 return null; 294 } 295 296 private void checkClosed() { 297 if (closed) { 298 throw new IllegalStateException(); 299 } 300 } 301 302 @Override 303 public void close() throws IOException { 304 if (!closed) { 305 closed = true; 306 if (source instanceof Closeable closeable) { 307 try { 308 closeable.close(); 309 } catch (IOException e) { 310 lastIOException = e; 311 } 312 } 313 } 314 if (lastIOException != null) { 315 throw lastIOException; 316 } 317 } 318 319 private static Pattern cachePattern(String pattern) { 320 if (pattern == null) { 321 return null; 322 } 323 LOCK.lock(); 324 try { 325 return CACHE.computeIfAbsent(pattern, Pattern::compile); 326 } finally { 327 LOCK.unlock(); 328 } 329 } 330 331}