001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.component.file;
018
019import java.util.ArrayList;
020import java.util.Collections;
021import java.util.Deque;
022import java.util.LinkedList;
023import java.util.List;
024import java.util.Queue;
025
026import org.apache.camel.AsyncCallback;
027import org.apache.camel.Exchange;
028import org.apache.camel.Processor;
029import org.apache.camel.ShutdownRunningTask;
030import org.apache.camel.impl.ScheduledBatchPollingConsumer;
031import org.apache.camel.spi.UriParam;
032import org.apache.camel.util.CastUtils;
033import org.apache.camel.util.ObjectHelper;
034import org.apache.camel.util.StopWatch;
035import org.apache.camel.util.TimeUtils;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039/**
040 * Base class for file consumers.
041 */
042public abstract class GenericFileConsumer<T> extends ScheduledBatchPollingConsumer {
043    protected final Logger log = LoggerFactory.getLogger(getClass());
044    protected GenericFileEndpoint<T> endpoint;
045    protected GenericFileOperations<T> operations;
046    protected volatile boolean loggedIn;
047    protected String fileExpressionResult;
048    protected volatile ShutdownRunningTask shutdownRunningTask;
049    protected volatile int pendingExchanges;
050    protected Processor customProcessor;
051    @UriParam
052    protected boolean eagerLimitMaxMessagesPerPoll = true;
053    protected volatile boolean prepareOnStartup;
054
055    public GenericFileConsumer(GenericFileEndpoint<T> endpoint, Processor processor, GenericFileOperations<T> operations) {
056        super(endpoint, processor);
057        this.endpoint = endpoint;
058        this.operations = operations;
059    }
060
061    public Processor getCustomProcessor() {
062        return customProcessor;
063    }
064
065    /**
066     * Use a custom processor to process the exchange.
067     * <p/>
068     * Only set this if you need to do custom processing, instead of the regular processing.
069     * <p/>
070     * This is for example used to browse file endpoints by leveraging the file consumer to poll
071     * the directory to gather the list of exchanges. But to avoid processing the files regularly
072     * we can use a custom processor.
073     *
074     * @param processor a custom processor
075     */
076    public void setCustomProcessor(Processor processor) {
077        this.customProcessor = processor;
078    }
079
080    public boolean isEagerLimitMaxMessagesPerPoll() {
081        return eagerLimitMaxMessagesPerPoll;
082    }
083
084    public void setEagerLimitMaxMessagesPerPoll(boolean eagerLimitMaxMessagesPerPoll) {
085        this.eagerLimitMaxMessagesPerPoll = eagerLimitMaxMessagesPerPoll;
086    }
087
088    /**
089     * Poll for files
090     */
091    protected int poll() throws Exception {
092        // must prepare on startup the very first time
093        if (!prepareOnStartup) {
094            // prepare on startup
095            endpoint.getGenericFileProcessStrategy().prepareOnStartup(operations, endpoint);
096            prepareOnStartup = true;
097        }
098
099        // must reset for each poll
100        fileExpressionResult = null;
101        shutdownRunningTask = null;
102        pendingExchanges = 0;
103
104        // before we poll is there anything we need to check?
105        // such as are we connected to the FTP Server still?
106        if (!prePollCheck()) {
107            log.debug("Skipping poll as pre poll check returned false");
108            return 0;
109        }
110
111        // gather list of files to process
112        List<GenericFile<T>> files = new ArrayList<GenericFile<T>>();
113        String name = endpoint.getConfiguration().getDirectory();
114
115        // time how long time it takes to poll
116        StopWatch stop = new StopWatch();
117        boolean limitHit;
118        try {
119            limitHit = !pollDirectory(name, files, 0);
120        } catch (Exception e) {
121            // during poll directory we add files to the in progress repository, in case of any exception thrown after this work
122            // we must then drain the in progress files before rethrowing the exception
123            log.debug("Error occurred during poll directory: " + name + " due " + e.getMessage() + ". Removing " + files.size() + " files marked as in-progress.");
124            removeExcessiveInProgressFiles(files);
125            throw e;
126        }
127
128        long delta = stop.stop();
129        if (log.isDebugEnabled()) {
130            log.debug("Took {} to poll: {}", TimeUtils.printDuration(delta), name);
131        }
132
133        // log if we hit the limit
134        if (limitHit) {
135            log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll);
136        }
137
138        // sort files using file comparator if provided
139        if (endpoint.getSorter() != null) {
140            Collections.sort(files, endpoint.getSorter());
141        }
142
143        // sort using build in sorters so we can use expressions
144        // use a linked list so we can dequeue the exchanges
145        LinkedList<Exchange> exchanges = new LinkedList<Exchange>();
146        for (GenericFile<T> file : files) {
147            Exchange exchange = endpoint.createExchange(file);
148            endpoint.configureExchange(exchange);
149            endpoint.configureMessage(file, exchange.getIn());
150            exchanges.add(exchange);
151        }
152        // sort files using exchange comparator if provided
153        if (endpoint.getSortBy() != null) {
154            Collections.sort(exchanges, endpoint.getSortBy());
155        }
156
157        // use a queue for the exchanges
158        Deque<Exchange> q = exchanges;
159
160        // we are not eager limiting, but we have configured a limit, so cut the list of files
161        if (!eagerLimitMaxMessagesPerPoll && maxMessagesPerPoll > 0) {
162            if (files.size() > maxMessagesPerPoll) {
163                log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll);
164                // must first remove excessive files from the in progress repository
165                removeExcessiveInProgressFiles(q, maxMessagesPerPoll);
166            }
167        }
168
169        // consume files one by one
170        int total = exchanges.size();
171        if (total > 0) {
172            log.debug("Total {} files to consume", total);
173        }
174
175        int polledMessages = processBatch(CastUtils.cast(q));
176
177        postPollCheck();
178
179        return polledMessages;
180    }
181
182    public int processBatch(Queue<Object> exchanges) {
183        int total = exchanges.size();
184        int answer = total;
185
186        // limit if needed
187        if (maxMessagesPerPoll > 0 && total > maxMessagesPerPoll) {
188            log.debug("Limiting to maximum messages to poll {} as there was {} messages in this poll.", maxMessagesPerPoll, total);
189            total = maxMessagesPerPoll;
190        }
191
192        for (int index = 0; index < total && isBatchAllowed(); index++) {
193            // only loop if we are started (allowed to run)
194            // use poll to remove the head so it does not consume memory even after we have processed it
195            Exchange exchange = (Exchange) exchanges.poll();
196            // add current index and total as properties
197            exchange.setProperty(Exchange.BATCH_INDEX, index);
198            exchange.setProperty(Exchange.BATCH_SIZE, total);
199            exchange.setProperty(Exchange.BATCH_COMPLETE, index == total - 1);
200
201            // update pending number of exchanges
202            pendingExchanges = total - index - 1;
203
204            // process the current exchange
205            boolean started;
206            if (customProcessor != null) {
207                // use a custom processor
208                started = customProcessExchange(exchange, customProcessor);
209            } else {
210                // process the exchange regular
211                started = processExchange(exchange);
212            }
213
214            // if we did not start process the file then decrement the counter
215            if (!started) {
216                answer--;
217            }
218        }
219
220        // drain any in progress files as we are done with this batch
221        removeExcessiveInProgressFiles(CastUtils.cast((Deque<?>) exchanges, Exchange.class), 0);
222
223        return answer;
224    }
225
226    /**
227     * Drain any in progress files as we are done with this batch
228     *
229     * @param exchanges  the exchanges
230     * @param limit      the limit
231     */
232    protected void removeExcessiveInProgressFiles(Deque<Exchange> exchanges, int limit) {
233        // remove the file from the in progress list in case the batch was limited by max messages per poll
234        while (exchanges.size() > limit) {
235            // must remove last
236            Exchange exchange = exchanges.removeLast();
237            GenericFile<?> file = exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE, GenericFile.class);
238            String key = file.getAbsoluteFilePath();
239            endpoint.getInProgressRepository().remove(key);
240        }
241    }
242
243    /**
244     * Drain any in progress files as we are done with the files
245     *
246     * @param files  the files
247     */
248    protected void removeExcessiveInProgressFiles(List<GenericFile<T>> files) {
249        for (GenericFile file : files) {
250            String key = file.getAbsoluteFilePath();
251            endpoint.getInProgressRepository().remove(key);
252        }
253    }
254
255    /**
256     * Whether or not we can continue polling for more files
257     *
258     * @param fileList  the current list of gathered files
259     * @return <tt>true</tt> to continue, <tt>false</tt> to stop due hitting maxMessagesPerPoll limit
260     */
261    public boolean canPollMoreFiles(List<?> fileList) {
262        // at this point we should not limit if we are not eager
263        if (!eagerLimitMaxMessagesPerPoll) {
264            return true;
265        }
266
267        if (maxMessagesPerPoll <= 0) {
268            // no limitation
269            return true;
270        }
271
272        // then only poll if we haven't reached the max limit
273        return fileList.size() < maxMessagesPerPoll;
274    }
275
276    /**
277     * Override if required. Perform some checks (and perhaps actions) before we poll.
278     *
279     * @return <tt>true</tt> to poll, <tt>false</tt> to skip this poll.
280     */
281    protected boolean prePollCheck() throws Exception {
282        return true;
283    }
284
285    /**
286     * Override if required. Perform some checks (and perhaps actions) after we have polled.
287     */
288    protected void postPollCheck() {
289        // noop
290    }
291
292    /**
293     * Polls the given directory for files to process
294     *
295     * @param fileName current directory or file
296     * @param fileList current list of files gathered
297     * @param depth the current depth of the directory (will start from 0)
298     * @return whether or not to continue polling, <tt>false</tt> means the maxMessagesPerPoll limit has been hit
299     */
300    protected abstract boolean pollDirectory(String fileName, List<GenericFile<T>> fileList, int depth);
301
302    /**
303     * Sets the operations to be used.
304     * <p/>
305     * Can be used to set a fresh operations in case of recovery attempts
306     *
307     * @param operations the operations
308     */
309    public void setOperations(GenericFileOperations<T> operations) {
310        this.operations = operations;
311    }
312
313    /**
314     * Whether to ignore if the file cannot be retrieved.
315     * <p/>
316     * By default an {@link GenericFileOperationFailedException} is thrown if the file cannot be retrieved.
317     * <p/>
318     * This method allows to suppress this and just ignore that.
319     *
320     * @param name        the file name
321     * @param exchange    the exchange
322     * @param cause       optional exception occurred during retrieving file
323     * @return <tt>true</tt> to ignore, <tt>false</tt> is the default.
324     */
325    protected boolean ignoreCannotRetrieveFile(String name, Exchange exchange, Exception cause) {
326        return false;
327    }
328
329    /**
330     * Processes the exchange
331     *
332     * @param exchange the exchange
333     * @return <tt>true</tt> if the file was started to be processed, <tt>false</tt> if the file was not started
334     * to be processed, for some reason (not found, or aborted etc)
335     */
336    protected boolean processExchange(final Exchange exchange) {
337        GenericFile<T> file = getExchangeFileProperty(exchange);
338        log.trace("Processing file: {}", file);
339
340        // must extract the absolute name before the begin strategy as the file could potentially be pre moved
341        // and then the file name would be changed
342        String absoluteFileName = file.getAbsoluteFilePath();
343
344        // check if we can begin processing the file
345        final GenericFileProcessStrategy<T> processStrategy = endpoint.getGenericFileProcessStrategy();
346
347        Exception beginCause = null;
348        boolean begin = false;
349        try {
350            begin = processStrategy.begin(operations, endpoint, exchange, file);
351        } catch (Exception e) {
352            beginCause = e;
353        }
354
355        if (!begin) {
356            // no something was wrong, so we need to abort and remove the file from the in progress list
357            Exception abortCause = null;
358            log.debug("{} cannot begin processing file: {}", endpoint, file);
359            try {
360                // abort
361                processStrategy.abort(operations, endpoint, exchange, file);
362            } catch (Exception e) {
363                abortCause = e;
364            } finally {
365                // begin returned false, so remove file from the in progress list as its no longer in progress
366                endpoint.getInProgressRepository().remove(absoluteFileName);
367            }
368            if (beginCause != null) {
369                String msg = endpoint + " cannot begin processing file: " + file + " due to: " + beginCause.getMessage();
370                handleException(msg, beginCause);
371            }
372            if (abortCause != null) {
373                String msg2 = endpoint + " cannot abort processing file: " + file + " due to: " + abortCause.getMessage();
374                handleException(msg2, abortCause);
375            }
376            return false;
377        }
378
379        // must use file from exchange as it can be updated due the
380        // preMoveNamePrefix/preMoveNamePostfix options
381        final GenericFile<T> target = getExchangeFileProperty(exchange);
382        // must use full name when downloading so we have the correct path
383        final String name = target.getAbsoluteFilePath();
384        try {
385            
386            if (isRetrieveFile()) {
387                // retrieve the file using the stream
388                log.trace("Retrieving file: {} from: {}", name, endpoint);
389    
390                // retrieve the file and check it was a success
391                boolean retrieved;
392                Exception cause = null;
393                try {
394                    retrieved = operations.retrieveFile(name, exchange);
395                } catch (Exception e) {
396                    retrieved = false;
397                    cause = e;
398                }
399
400                if (!retrieved) {
401                    if (ignoreCannotRetrieveFile(name, exchange, cause)) {
402                        log.trace("Cannot retrieve file {} maybe it does not exists. Ignoring.", name);
403                        // remove file from the in progress list as we could not retrieve it, but should ignore
404                        endpoint.getInProgressRepository().remove(absoluteFileName);
405                        return false;
406                    } else {
407                        // throw exception to handle the problem with retrieving the file
408                        // then if the method return false or throws an exception is handled the same in here
409                        // as in both cases an exception is being thrown
410                        if (cause != null && cause instanceof GenericFileOperationFailedException) {
411                            throw cause;
412                        } else {
413                            throw new GenericFileOperationFailedException("Cannot retrieve file: " + file + " from: " + endpoint, cause);
414                        }
415                    }
416                }
417    
418                log.trace("Retrieved file: {} from: {}", name, endpoint);                
419            } else {
420                log.trace("Skipped retrieval of file: {} from: {}", name, endpoint);
421                exchange.getIn().setBody(null);
422            }
423
424            // register on completion callback that does the completion strategies
425            // (for instance to move the file after we have processed it)
426            exchange.addOnCompletion(new GenericFileOnCompletion<T>(endpoint, operations, target, absoluteFileName));
427
428            log.debug("About to process file: {} using exchange: {}", target, exchange);
429
430            // process the exchange using the async consumer to support async routing engine
431            // which can be supported by this file consumer as all the done work is
432            // provided in the GenericFileOnCompletion
433            getAsyncProcessor().process(exchange, new AsyncCallback() {
434                public void done(boolean doneSync) {
435                    // noop
436                    if (log.isTraceEnabled()) {
437                        log.trace("Done processing file: {} {}", target, doneSync ? "synchronously" : "asynchronously");
438                    }
439                }
440            });
441
442        } catch (Exception e) {
443            // remove file from the in progress list due to failure
444            // (cannot be in finally block due to GenericFileOnCompletion will remove it
445            // from in progress when it takes over and processes the file, which may happen
446            // by another thread at a later time. So its only safe to remove it if there was an exception)
447            endpoint.getInProgressRepository().remove(absoluteFileName);
448
449            String msg = "Error processing file " + file + " due to " + e.getMessage();
450            handleException(msg, e);
451        }
452
453        return true;
454    }
455
456    /**
457     * Override if required.  Files are retrieved / returns true by default
458     *
459     * @return <tt>true</tt> to retrieve files, <tt>false</tt> to skip retrieval of files.
460     */
461    protected boolean isRetrieveFile() {
462        return true;
463    }
464
465    /**
466     * Processes the exchange using a custom processor.
467     *
468     * @param exchange the exchange
469     * @param processor the custom processor
470     */
471    protected boolean customProcessExchange(final Exchange exchange, final Processor processor) {
472        GenericFile<T> file = getExchangeFileProperty(exchange);
473        log.trace("Custom processing file: {}", file);
474
475        // must extract the absolute name before the begin strategy as the file could potentially be pre moved
476        // and then the file name would be changed
477        String absoluteFileName = file.getAbsoluteFilePath();
478
479        try {
480            // process using the custom processor
481            processor.process(exchange);
482        } catch (Exception e) {
483            if (log.isDebugEnabled()) {
484                log.debug(endpoint + " error custom processing: " + file + " due to: " + e.getMessage() + ". This exception will be ignored.", e);
485            }
486            handleException(e);
487        } finally {
488            // always remove file from the in progress list as its no longer in progress
489            // use the original file name that was used to add it to the repository
490            // as the name can be different when using preMove option
491            endpoint.getInProgressRepository().remove(absoluteFileName);
492        }
493
494        return true;
495    }
496
497    /**
498     * Strategy for validating if the given remote file should be included or not
499     *
500     * @param file        the file
501     * @param isDirectory whether the file is a directory or a file
502     * @param files       files in the directory
503     * @return <tt>true</tt> to include the file, <tt>false</tt> to skip it
504     */
505    protected boolean isValidFile(GenericFile<T> file, boolean isDirectory, List<T> files) {
506        String absoluteFilePath = file.getAbsoluteFilePath();
507
508        if (!isMatched(file, isDirectory, files)) {
509            log.trace("File did not match. Will skip this file: {}", file);
510            return false;
511        }
512
513        // directory is always valid
514        if (isDirectory) {
515            return true;
516        }
517
518        // check if file is already in progress
519        if (endpoint.getInProgressRepository().contains(absoluteFilePath)) {
520            if (log.isTraceEnabled()) {
521                log.trace("Skipping as file is already in progress: {}", file.getFileName());
522            }
523            return false;
524        }
525
526        // if its a file then check we have the file in the idempotent registry already
527        if (endpoint.isIdempotent()) {
528            // use absolute file path as default key, but evaluate if an expression key was configured
529            String key = file.getAbsoluteFilePath();
530            if (endpoint.getIdempotentKey() != null) {
531                Exchange dummy = endpoint.createExchange(file);
532                key = endpoint.getIdempotentKey().evaluate(dummy, String.class);
533            }
534            if (key != null && endpoint.getIdempotentRepository().contains(key)) {
535                log.trace("This consumer is idempotent and the file has been consumed before matching idempotentKey: {}. Will skip this file: {}", key, file);
536                return false;
537            }
538        }
539
540        // okay so final step is to be able to add atomic as in-progress, so we are the
541        // only thread processing this file
542        return endpoint.getInProgressRepository().add(absoluteFilePath);
543    }
544
545    /**
546     * Strategy to perform file matching based on endpoint configuration.
547     * <p/>
548     * Will always return <tt>false</tt> for certain files/folders:
549     * <ul>
550     * <li>Starting with a dot</li>
551     * <li>lock files</li>
552     * </ul>
553     * And then <tt>true</tt> for directories.
554     *
555     * @param file        the file
556     * @param isDirectory whether the file is a directory or a file
557     * @param files       files in the directory
558     * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not
559     */
560    protected boolean isMatched(GenericFile<T> file, boolean isDirectory, List<T> files) {
561        String name = file.getFileNameOnly();
562
563        // folders/names starting with dot is always skipped (eg. ".", ".camel", ".camelLock")
564        if (name.startsWith(".")) {
565            return false;
566        }
567
568        // lock files should be skipped
569        if (name.endsWith(FileComponent.DEFAULT_LOCK_FILE_POSTFIX)) {
570            return false;
571        }
572
573        if (endpoint.getFilter() != null) {
574            if (!endpoint.getFilter().accept(file)) {
575                return false;
576            }
577        }
578
579        if (endpoint.getAntFilter() != null) {
580            if (!endpoint.getAntFilter().accept(file)) {
581                return false;
582            }
583        }
584
585        // directories are regarded as matched if filter accepted them
586        if (isDirectory) {
587            return true;
588        }
589
590        if (ObjectHelper.isNotEmpty(endpoint.getExclude())) {
591            if (name.matches(endpoint.getExclude())) {
592                return false;
593            }
594        }
595
596        if (ObjectHelper.isNotEmpty(endpoint.getInclude())) {
597            if (!name.matches(endpoint.getInclude())) {
598                return false;
599            }
600        }
601
602        // use file expression for a simple dynamic file filter
603        if (endpoint.getFileName() != null) {
604            fileExpressionResult = evaluateFileExpression();
605            if (fileExpressionResult != null) {
606                if (!name.equals(fileExpressionResult)) {
607                    return false;
608                }
609            }
610        }
611
612        // if done file name is enabled, then the file is only valid if a done file exists
613        if (endpoint.getDoneFileName() != null) {
614            // done file must be in same path as the file
615            String doneFileName = endpoint.createDoneFileName(file.getAbsoluteFilePath());
616            ObjectHelper.notEmpty(doneFileName, "doneFileName", endpoint);
617
618            // is it a done file name?
619            if (endpoint.isDoneFile(file.getFileNameOnly())) {
620                log.trace("Skipping done file: {}", file);
621                return false;
622            }
623
624            if (!isMatched(file, doneFileName, files)) {
625                return false;
626            }
627        }
628
629        return true;
630    }
631
632    /**
633     * Strategy to perform file matching based on endpoint configuration in terms of done file name.
634     *
635     * @param file         the file
636     * @param doneFileName the done file name (without any paths)
637     * @param files        files in the directory
638     * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not
639     */
640    protected abstract boolean isMatched(GenericFile<T> file, String doneFileName, List<T> files);
641
642    /**
643     * Is the given file already in progress.
644     *
645     * @param file the file
646     * @return <tt>true</tt> if the file is already in progress
647     * @deprecated no longer in use, use {@link org.apache.camel.component.file.GenericFileEndpoint#getInProgressRepository()} instead.
648     */
649    @Deprecated
650    protected boolean isInProgress(GenericFile<T> file) {
651        String key = file.getAbsoluteFilePath();
652        // must use add, to have operation as atomic
653        return !endpoint.getInProgressRepository().add(key);
654    }
655
656    protected String evaluateFileExpression() {
657        if (fileExpressionResult == null && endpoint.getFileName() != null) {
658            // create a dummy exchange as Exchange is needed for expression evaluation
659            Exchange dummy = endpoint.createExchange();
660            fileExpressionResult = endpoint.getFileName().evaluate(dummy, String.class);
661        }
662        return fileExpressionResult;
663    }
664
665    @SuppressWarnings("unchecked")
666    private GenericFile<T> getExchangeFileProperty(Exchange exchange) {
667        return (GenericFile<T>) exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE);
668    }
669
670    @Override
671    protected void doStart() throws Exception {
672        super.doStart();
673    }
674
675    @Override
676    protected void doStop() throws Exception {
677        prepareOnStartup = false;
678        super.doStop();
679    }
680}