001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.FileInputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.InputStreamReader;
026import java.io.StringReader;
027import java.nio.CharBuffer;
028import java.nio.channels.Channels;
029import java.nio.channels.ReadableByteChannel;
030import java.nio.charset.Charset;
031import java.nio.charset.CharsetDecoder;
032import java.nio.charset.IllegalCharsetNameException;
033import java.nio.charset.UnsupportedCharsetException;
034import java.util.InputMismatchException;
035import java.util.Iterator;
036import java.util.LinkedHashMap;
037import java.util.Map;
038import java.util.Map.Entry;
039import java.util.NoSuchElementException;
040import java.util.Objects;
041import java.util.concurrent.locks.Lock;
042import java.util.concurrent.locks.ReentrantLock;
043import java.util.regex.Matcher;
044import java.util.regex.Pattern;
045
046import static org.apache.camel.util.BufferCaster.cast;
047
048public final class Scanner implements Iterator<String>, Closeable {
049
050    static {
051        WHITESPACE_PATTERN = Pattern.compile("\\s+");
052        FIND_ANY_PATTERN = Pattern.compile("(?s).*");
053    }
054
055    private static final Lock LOCK = new ReentrantLock();
056    private static final Map<String, Pattern> CACHE = new LinkedHashMap<>() {
057        @Override
058        protected boolean removeEldestEntry(Entry<String, Pattern> eldest) {
059            return size() >= 7;
060        }
061    };
062
063    private static final Pattern WHITESPACE_PATTERN;
064
065    private static final Pattern FIND_ANY_PATTERN;
066
067    private static final int BUFFER_SIZE = 1024;
068
069    private final Readable source;
070    private final Pattern delimPattern;
071    private final Matcher matcher;
072    private CharBuffer buf;
073    private int position;
074    private boolean inputExhausted;
075    private boolean needInput;
076    private boolean skipped;
077    private int savedPosition = -1;
078    private boolean closed;
079    private IOException lastIOException;
080
081    public Scanner(InputStream source, String charsetName, String pattern) {
082        this(new InputStreamReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), cachePattern(pattern));
083    }
084
085    public Scanner(File source, String charsetName, String pattern) throws FileNotFoundException {
086        this(new FileInputStream(Objects.requireNonNull(source, "source")).getChannel(), charsetName, pattern);
087    }
088
089    public Scanner(String source, String pattern) {
090        this(new StringReader(Objects.requireNonNull(source, "source")), cachePattern(pattern));
091    }
092
093    public Scanner(String source, Pattern pattern) {
094        this(new StringReader(Objects.requireNonNull(source, "source")), pattern);
095    }
096
097    public Scanner(ReadableByteChannel source, String charsetName, String pattern) {
098        this(Channels.newReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName), -1), cachePattern(pattern));
099    }
100
101    public Scanner(Readable source, String pattern) {
102        this(Objects.requireNonNull(source, "source"), cachePattern(pattern));
103    }
104
105    private Scanner(Readable source, Pattern pattern) {
106        this.source = source;
107        delimPattern = pattern != null ? pattern : WHITESPACE_PATTERN;
108        buf = CharBuffer.allocate(BUFFER_SIZE);
109        cast(buf).limit(0);
110        matcher = delimPattern.matcher(buf);
111        matcher.useTransparentBounds(true);
112        matcher.useAnchoringBounds(false);
113    }
114
115    private static CharsetDecoder toDecoder(String charsetName) {
116        try {
117            Charset cs = charsetName != null ? Charset.forName(charsetName) : Charset.defaultCharset();
118            return cs.newDecoder();
119        } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
120            throw new IllegalArgumentException(e);
121        }
122    }
123
124    @Override
125    public boolean hasNext() {
126        if (closed) {
127            return false;
128        }
129        saveState();
130        while (!inputExhausted) {
131            if (hasTokenInBuffer()) {
132                revertState();
133                return true;
134            }
135            readMore();
136        }
137        boolean result = hasTokenInBuffer();
138        revertState();
139        return result;
140    }
141
142    @Override
143    public String next() {
144        checkClosed();
145        while (true) {
146            String token = getCompleteTokenInBuffer();
147            if (token != null) {
148                skipped = false;
149                return token;
150            }
151            if (needInput) {
152                readMore();
153            } else {
154                throwFor();
155            }
156        }
157    }
158
159    public String getDelim() {
160        return delimPattern.pattern();
161    }
162
163    private void saveState() {
164        savedPosition = position;
165    }
166
167    private void revertState() {
168        position = savedPosition;
169        savedPosition = -1;
170        skipped = false;
171    }
172
173    private void readMore() {
174        if (buf.limit() == buf.capacity()) {
175            expandBuffer();
176        }
177        int p = buf.position();
178        cast(buf).position(buf.limit());
179        cast(buf).limit(buf.capacity());
180        int n;
181        try {
182            n = source.read(buf);
183        } catch (IOException ioe) {
184            lastIOException = ioe;
185            n = -1;
186        }
187        if (n == -1) {
188            inputExhausted = true;
189            needInput = false;
190        } else if (n > 0) {
191            needInput = false;
192        }
193        cast(buf).limit(buf.position());
194        cast(buf).position(p);
195    }
196
197    private void expandBuffer() {
198        int offset = savedPosition == -1 ? position : savedPosition;
199        cast(buf).position(offset);
200        if (offset > 0) {
201            buf.compact();
202            translateSavedIndexes(offset);
203            position -= offset;
204            cast(buf).flip();
205        } else {
206            int newSize = buf.capacity() * 2;
207            CharBuffer newBuf = CharBuffer.allocate(newSize);
208            newBuf.put(buf);
209            cast(newBuf).flip();
210            translateSavedIndexes(offset);
211            position -= offset;
212            buf = newBuf;
213            matcher.reset(buf);
214        }
215    }
216
217    private void translateSavedIndexes(int offset) {
218        if (savedPosition != -1) {
219            savedPosition -= offset;
220        }
221    }
222
223    private void throwFor() {
224        skipped = false;
225        if (inputExhausted && position == buf.limit()) {
226            throw new NoSuchElementException();
227        } else {
228            throw new InputMismatchException();
229        }
230    }
231
232    private boolean hasTokenInBuffer() {
233        matcher.usePattern(delimPattern);
234        matcher.region(position, buf.limit());
235        if (matcher.lookingAt()) {
236            position = matcher.end();
237        }
238        return position != buf.limit();
239    }
240
241    private String getCompleteTokenInBuffer() {
242        matcher.usePattern(delimPattern);
243        if (!skipped) {
244            matcher.region(position, buf.limit());
245            if (matcher.lookingAt()) {
246                if (matcher.hitEnd() && !inputExhausted) {
247                    needInput = true;
248                    return null;
249                }
250                skipped = true;
251                position = matcher.end();
252            }
253        }
254        if (position == buf.limit()) {
255            if (inputExhausted) {
256                return null;
257            }
258            needInput = true;
259            return null;
260        }
261        matcher.region(position, buf.limit());
262        boolean foundNextDelim = matcher.find();
263        if (foundNextDelim && matcher.end() == position) {
264            foundNextDelim = matcher.find();
265        }
266        if (foundNextDelim) {
267            if (matcher.requireEnd() && !inputExhausted) {
268                needInput = true;
269                return null;
270            }
271            int tokenEnd = matcher.start();
272            matcher.usePattern(FIND_ANY_PATTERN);
273            matcher.region(position, tokenEnd);
274            if (matcher.matches()) {
275                String s = matcher.group();
276                position = matcher.end();
277                return s;
278            } else {
279                return null;
280            }
281        }
282        if (inputExhausted) {
283            matcher.usePattern(FIND_ANY_PATTERN);
284            matcher.region(position, buf.limit());
285            if (matcher.matches()) {
286                String s = matcher.group();
287                position = matcher.end();
288                return s;
289            }
290            return null;
291        }
292        needInput = true;
293        return null;
294    }
295
296    private void checkClosed() {
297        if (closed) {
298            throw new IllegalStateException();
299        }
300    }
301
302    @Override
303    public void close() throws IOException {
304        if (!closed) {
305            closed = true;
306            if (source instanceof Closeable closeable) {
307                try {
308                    closeable.close();
309                } catch (IOException e) {
310                    lastIOException = e;
311                }
312            }
313        }
314        if (lastIOException != null) {
315            throw lastIOException;
316        }
317    }
318
319    private static Pattern cachePattern(String pattern) {
320        if (pattern == null) {
321            return null;
322        }
323        LOCK.lock();
324        try {
325            return CACHE.computeIfAbsent(pattern, Pattern::compile);
326        } finally {
327            LOCK.unlock();
328        }
329    }
330
331}