001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.component.file; 018 019import java.util.ArrayList; 020import java.util.Collections; 021import java.util.Deque; 022import java.util.LinkedList; 023import java.util.List; 024import java.util.Queue; 025 026import org.apache.camel.AsyncCallback; 027import org.apache.camel.Exchange; 028import org.apache.camel.Processor; 029import org.apache.camel.ShutdownRunningTask; 030import org.apache.camel.impl.ScheduledBatchPollingConsumer; 031import org.apache.camel.spi.UriParam; 032import org.apache.camel.util.CastUtils; 033import org.apache.camel.util.ObjectHelper; 034import org.apache.camel.util.StopWatch; 035import org.apache.camel.util.TimeUtils; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038 039/** 040 * Base class for file consumers. 041 */ 042public abstract class GenericFileConsumer<T> extends ScheduledBatchPollingConsumer { 043 protected final Logger log = LoggerFactory.getLogger(getClass()); 044 protected GenericFileEndpoint<T> endpoint; 045 protected GenericFileOperations<T> operations; 046 protected volatile boolean loggedIn; 047 protected String fileExpressionResult; 048 protected volatile ShutdownRunningTask shutdownRunningTask; 049 protected volatile int pendingExchanges; 050 protected Processor customProcessor; 051 @UriParam 052 protected boolean eagerLimitMaxMessagesPerPoll = true; 053 protected volatile boolean prepareOnStartup; 054 055 public GenericFileConsumer(GenericFileEndpoint<T> endpoint, Processor processor, GenericFileOperations<T> operations) { 056 super(endpoint, processor); 057 this.endpoint = endpoint; 058 this.operations = operations; 059 } 060 061 public Processor getCustomProcessor() { 062 return customProcessor; 063 } 064 065 /** 066 * Use a custom processor to process the exchange. 067 * <p/> 068 * Only set this if you need to do custom processing, instead of the regular processing. 069 * <p/> 070 * This is for example used to browse file endpoints by leveraging the file consumer to poll 071 * the directory to gather the list of exchanges. But to avoid processing the files regularly 072 * we can use a custom processor. 073 * 074 * @param processor a custom processor 075 */ 076 public void setCustomProcessor(Processor processor) { 077 this.customProcessor = processor; 078 } 079 080 public boolean isEagerLimitMaxMessagesPerPoll() { 081 return eagerLimitMaxMessagesPerPoll; 082 } 083 084 public void setEagerLimitMaxMessagesPerPoll(boolean eagerLimitMaxMessagesPerPoll) { 085 this.eagerLimitMaxMessagesPerPoll = eagerLimitMaxMessagesPerPoll; 086 } 087 088 /** 089 * Poll for files 090 */ 091 protected int poll() throws Exception { 092 // must prepare on startup the very first time 093 if (!prepareOnStartup) { 094 // prepare on startup 095 endpoint.getGenericFileProcessStrategy().prepareOnStartup(operations, endpoint); 096 prepareOnStartup = true; 097 } 098 099 // must reset for each poll 100 fileExpressionResult = null; 101 shutdownRunningTask = null; 102 pendingExchanges = 0; 103 104 // before we poll is there anything we need to check? 105 // such as are we connected to the FTP Server still? 106 if (!prePollCheck()) { 107 log.debug("Skipping poll as pre poll check returned false"); 108 return 0; 109 } 110 111 // gather list of files to process 112 List<GenericFile<T>> files = new ArrayList<GenericFile<T>>(); 113 String name = endpoint.getConfiguration().getDirectory(); 114 115 // time how long time it takes to poll 116 StopWatch stop = new StopWatch(); 117 boolean limitHit; 118 try { 119 limitHit = !pollDirectory(name, files, 0); 120 } catch (Exception e) { 121 // during poll directory we add files to the in progress repository, in case of any exception thrown after this work 122 // we must then drain the in progress files before rethrowing the exception 123 log.debug("Error occurred during poll directory: " + name + " due " + e.getMessage() + ". Removing " + files.size() + " files marked as in-progress."); 124 removeExcessiveInProgressFiles(files); 125 throw e; 126 } 127 128 long delta = stop.stop(); 129 if (log.isDebugEnabled()) { 130 log.debug("Took {} to poll: {}", TimeUtils.printDuration(delta), name); 131 } 132 133 // log if we hit the limit 134 if (limitHit) { 135 log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll); 136 } 137 138 // sort files using file comparator if provided 139 if (endpoint.getSorter() != null) { 140 Collections.sort(files, endpoint.getSorter()); 141 } 142 143 // sort using build in sorters so we can use expressions 144 // use a linked list so we can dequeue the exchanges 145 LinkedList<Exchange> exchanges = new LinkedList<Exchange>(); 146 for (GenericFile<T> file : files) { 147 Exchange exchange = endpoint.createExchange(file); 148 endpoint.configureExchange(exchange); 149 endpoint.configureMessage(file, exchange.getIn()); 150 exchanges.add(exchange); 151 } 152 // sort files using exchange comparator if provided 153 if (endpoint.getSortBy() != null) { 154 Collections.sort(exchanges, endpoint.getSortBy()); 155 } 156 157 // use a queue for the exchanges 158 Deque<Exchange> q = exchanges; 159 160 // we are not eager limiting, but we have configured a limit, so cut the list of files 161 if (!eagerLimitMaxMessagesPerPoll && maxMessagesPerPoll > 0) { 162 if (files.size() > maxMessagesPerPoll) { 163 log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll); 164 // must first remove excessive files from the in progress repository 165 removeExcessiveInProgressFiles(q, maxMessagesPerPoll); 166 } 167 } 168 169 // consume files one by one 170 int total = exchanges.size(); 171 if (total > 0) { 172 log.debug("Total {} files to consume", total); 173 } 174 175 int polledMessages = processBatch(CastUtils.cast(q)); 176 177 postPollCheck(); 178 179 return polledMessages; 180 } 181 182 public int processBatch(Queue<Object> exchanges) { 183 int total = exchanges.size(); 184 int answer = total; 185 186 // limit if needed 187 if (maxMessagesPerPoll > 0 && total > maxMessagesPerPoll) { 188 log.debug("Limiting to maximum messages to poll {} as there was {} messages in this poll.", maxMessagesPerPoll, total); 189 total = maxMessagesPerPoll; 190 } 191 192 for (int index = 0; index < total && isBatchAllowed(); index++) { 193 // only loop if we are started (allowed to run) 194 // use poll to remove the head so it does not consume memory even after we have processed it 195 Exchange exchange = (Exchange) exchanges.poll(); 196 // add current index and total as properties 197 exchange.setProperty(Exchange.BATCH_INDEX, index); 198 exchange.setProperty(Exchange.BATCH_SIZE, total); 199 exchange.setProperty(Exchange.BATCH_COMPLETE, index == total - 1); 200 201 // update pending number of exchanges 202 pendingExchanges = total - index - 1; 203 204 // process the current exchange 205 boolean started; 206 if (customProcessor != null) { 207 // use a custom processor 208 started = customProcessExchange(exchange, customProcessor); 209 } else { 210 // process the exchange regular 211 started = processExchange(exchange); 212 } 213 214 // if we did not start process the file then decrement the counter 215 if (!started) { 216 answer--; 217 } 218 } 219 220 // drain any in progress files as we are done with this batch 221 removeExcessiveInProgressFiles(CastUtils.cast((Deque<?>) exchanges, Exchange.class), 0); 222 223 return answer; 224 } 225 226 /** 227 * Drain any in progress files as we are done with this batch 228 * 229 * @param exchanges the exchanges 230 * @param limit the limit 231 */ 232 protected void removeExcessiveInProgressFiles(Deque<Exchange> exchanges, int limit) { 233 // remove the file from the in progress list in case the batch was limited by max messages per poll 234 while (exchanges.size() > limit) { 235 // must remove last 236 Exchange exchange = exchanges.removeLast(); 237 GenericFile<?> file = exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE, GenericFile.class); 238 String key = file.getAbsoluteFilePath(); 239 endpoint.getInProgressRepository().remove(key); 240 } 241 } 242 243 /** 244 * Drain any in progress files as we are done with the files 245 * 246 * @param files the files 247 */ 248 protected void removeExcessiveInProgressFiles(List<GenericFile<T>> files) { 249 for (GenericFile file : files) { 250 String key = file.getAbsoluteFilePath(); 251 endpoint.getInProgressRepository().remove(key); 252 } 253 } 254 255 /** 256 * Whether or not we can continue polling for more files 257 * 258 * @param fileList the current list of gathered files 259 * @return <tt>true</tt> to continue, <tt>false</tt> to stop due hitting maxMessagesPerPoll limit 260 */ 261 public boolean canPollMoreFiles(List<?> fileList) { 262 // at this point we should not limit if we are not eager 263 if (!eagerLimitMaxMessagesPerPoll) { 264 return true; 265 } 266 267 if (maxMessagesPerPoll <= 0) { 268 // no limitation 269 return true; 270 } 271 272 // then only poll if we haven't reached the max limit 273 return fileList.size() < maxMessagesPerPoll; 274 } 275 276 /** 277 * Override if required. Perform some checks (and perhaps actions) before we poll. 278 * 279 * @return <tt>true</tt> to poll, <tt>false</tt> to skip this poll. 280 */ 281 protected boolean prePollCheck() throws Exception { 282 return true; 283 } 284 285 /** 286 * Override if required. Perform some checks (and perhaps actions) after we have polled. 287 */ 288 protected void postPollCheck() { 289 // noop 290 } 291 292 /** 293 * Polls the given directory for files to process 294 * 295 * @param fileName current directory or file 296 * @param fileList current list of files gathered 297 * @param depth the current depth of the directory (will start from 0) 298 * @return whether or not to continue polling, <tt>false</tt> means the maxMessagesPerPoll limit has been hit 299 */ 300 protected abstract boolean pollDirectory(String fileName, List<GenericFile<T>> fileList, int depth); 301 302 /** 303 * Sets the operations to be used. 304 * <p/> 305 * Can be used to set a fresh operations in case of recovery attempts 306 * 307 * @param operations the operations 308 */ 309 public void setOperations(GenericFileOperations<T> operations) { 310 this.operations = operations; 311 } 312 313 /** 314 * Whether to ignore if the file cannot be retrieved. 315 * <p/> 316 * By default an {@link GenericFileOperationFailedException} is thrown if the file cannot be retrieved. 317 * <p/> 318 * This method allows to suppress this and just ignore that. 319 * 320 * @param name the file name 321 * @param exchange the exchange 322 * @param cause optional exception occurred during retrieving file 323 * @return <tt>true</tt> to ignore, <tt>false</tt> is the default. 324 */ 325 protected boolean ignoreCannotRetrieveFile(String name, Exchange exchange, Exception cause) { 326 return false; 327 } 328 329 /** 330 * Processes the exchange 331 * 332 * @param exchange the exchange 333 * @return <tt>true</tt> if the file was started to be processed, <tt>false</tt> if the file was not started 334 * to be processed, for some reason (not found, or aborted etc) 335 */ 336 protected boolean processExchange(final Exchange exchange) { 337 GenericFile<T> file = getExchangeFileProperty(exchange); 338 log.trace("Processing file: {}", file); 339 340 // must extract the absolute name before the begin strategy as the file could potentially be pre moved 341 // and then the file name would be changed 342 String absoluteFileName = file.getAbsoluteFilePath(); 343 344 // check if we can begin processing the file 345 final GenericFileProcessStrategy<T> processStrategy = endpoint.getGenericFileProcessStrategy(); 346 347 Exception beginCause = null; 348 boolean begin = false; 349 try { 350 begin = processStrategy.begin(operations, endpoint, exchange, file); 351 } catch (Exception e) { 352 beginCause = e; 353 } 354 355 if (!begin) { 356 // no something was wrong, so we need to abort and remove the file from the in progress list 357 Exception abortCause = null; 358 log.debug("{} cannot begin processing file: {}", endpoint, file); 359 try { 360 // abort 361 processStrategy.abort(operations, endpoint, exchange, file); 362 } catch (Exception e) { 363 abortCause = e; 364 } finally { 365 // begin returned false, so remove file from the in progress list as its no longer in progress 366 endpoint.getInProgressRepository().remove(absoluteFileName); 367 } 368 if (beginCause != null) { 369 String msg = endpoint + " cannot begin processing file: " + file + " due to: " + beginCause.getMessage(); 370 handleException(msg, beginCause); 371 } 372 if (abortCause != null) { 373 String msg2 = endpoint + " cannot abort processing file: " + file + " due to: " + abortCause.getMessage(); 374 handleException(msg2, abortCause); 375 } 376 return false; 377 } 378 379 // must use file from exchange as it can be updated due the 380 // preMoveNamePrefix/preMoveNamePostfix options 381 final GenericFile<T> target = getExchangeFileProperty(exchange); 382 // must use full name when downloading so we have the correct path 383 final String name = target.getAbsoluteFilePath(); 384 try { 385 386 if (isRetrieveFile()) { 387 // retrieve the file using the stream 388 log.trace("Retrieving file: {} from: {}", name, endpoint); 389 390 // retrieve the file and check it was a success 391 boolean retrieved; 392 Exception cause = null; 393 try { 394 retrieved = operations.retrieveFile(name, exchange); 395 } catch (Exception e) { 396 retrieved = false; 397 cause = e; 398 } 399 400 if (!retrieved) { 401 if (ignoreCannotRetrieveFile(name, exchange, cause)) { 402 log.trace("Cannot retrieve file {} maybe it does not exists. Ignoring.", name); 403 // remove file from the in progress list as we could not retrieve it, but should ignore 404 endpoint.getInProgressRepository().remove(absoluteFileName); 405 return false; 406 } else { 407 // throw exception to handle the problem with retrieving the file 408 // then if the method return false or throws an exception is handled the same in here 409 // as in both cases an exception is being thrown 410 if (cause != null && cause instanceof GenericFileOperationFailedException) { 411 throw cause; 412 } else { 413 throw new GenericFileOperationFailedException("Cannot retrieve file: " + file + " from: " + endpoint, cause); 414 } 415 } 416 } 417 418 log.trace("Retrieved file: {} from: {}", name, endpoint); 419 } else { 420 log.trace("Skipped retrieval of file: {} from: {}", name, endpoint); 421 exchange.getIn().setBody(null); 422 } 423 424 // register on completion callback that does the completion strategies 425 // (for instance to move the file after we have processed it) 426 exchange.addOnCompletion(new GenericFileOnCompletion<T>(endpoint, operations, target, absoluteFileName)); 427 428 log.debug("About to process file: {} using exchange: {}", target, exchange); 429 430 // process the exchange using the async consumer to support async routing engine 431 // which can be supported by this file consumer as all the done work is 432 // provided in the GenericFileOnCompletion 433 getAsyncProcessor().process(exchange, new AsyncCallback() { 434 public void done(boolean doneSync) { 435 // noop 436 if (log.isTraceEnabled()) { 437 log.trace("Done processing file: {} {}", target, doneSync ? "synchronously" : "asynchronously"); 438 } 439 } 440 }); 441 442 } catch (Exception e) { 443 // remove file from the in progress list due to failure 444 // (cannot be in finally block due to GenericFileOnCompletion will remove it 445 // from in progress when it takes over and processes the file, which may happen 446 // by another thread at a later time. So its only safe to remove it if there was an exception) 447 endpoint.getInProgressRepository().remove(absoluteFileName); 448 449 String msg = "Error processing file " + file + " due to " + e.getMessage(); 450 handleException(msg, e); 451 } 452 453 return true; 454 } 455 456 /** 457 * Override if required. Files are retrieved / returns true by default 458 * 459 * @return <tt>true</tt> to retrieve files, <tt>false</tt> to skip retrieval of files. 460 */ 461 protected boolean isRetrieveFile() { 462 return true; 463 } 464 465 /** 466 * Processes the exchange using a custom processor. 467 * 468 * @param exchange the exchange 469 * @param processor the custom processor 470 */ 471 protected boolean customProcessExchange(final Exchange exchange, final Processor processor) { 472 GenericFile<T> file = getExchangeFileProperty(exchange); 473 log.trace("Custom processing file: {}", file); 474 475 // must extract the absolute name before the begin strategy as the file could potentially be pre moved 476 // and then the file name would be changed 477 String absoluteFileName = file.getAbsoluteFilePath(); 478 479 try { 480 // process using the custom processor 481 processor.process(exchange); 482 } catch (Exception e) { 483 if (log.isDebugEnabled()) { 484 log.debug(endpoint + " error custom processing: " + file + " due to: " + e.getMessage() + ". This exception will be ignored.", e); 485 } 486 handleException(e); 487 } finally { 488 // always remove file from the in progress list as its no longer in progress 489 // use the original file name that was used to add it to the repository 490 // as the name can be different when using preMove option 491 endpoint.getInProgressRepository().remove(absoluteFileName); 492 } 493 494 return true; 495 } 496 497 /** 498 * Strategy for validating if the given remote file should be included or not 499 * 500 * @param file the file 501 * @param isDirectory whether the file is a directory or a file 502 * @param files files in the directory 503 * @return <tt>true</tt> to include the file, <tt>false</tt> to skip it 504 */ 505 protected boolean isValidFile(GenericFile<T> file, boolean isDirectory, List<T> files) { 506 String absoluteFilePath = file.getAbsoluteFilePath(); 507 508 if (!isMatched(file, isDirectory, files)) { 509 log.trace("File did not match. Will skip this file: {}", file); 510 return false; 511 } 512 513 // directory is always valid 514 if (isDirectory) { 515 return true; 516 } 517 518 // check if file is already in progress 519 if (endpoint.getInProgressRepository().contains(absoluteFilePath)) { 520 if (log.isTraceEnabled()) { 521 log.trace("Skipping as file is already in progress: {}", file.getFileName()); 522 } 523 return false; 524 } 525 526 // if its a file then check we have the file in the idempotent registry already 527 if (endpoint.isIdempotent()) { 528 // use absolute file path as default key, but evaluate if an expression key was configured 529 String key = file.getAbsoluteFilePath(); 530 if (endpoint.getIdempotentKey() != null) { 531 Exchange dummy = endpoint.createExchange(file); 532 key = endpoint.getIdempotentKey().evaluate(dummy, String.class); 533 } 534 if (key != null && endpoint.getIdempotentRepository().contains(key)) { 535 log.trace("This consumer is idempotent and the file has been consumed before matching idempotentKey: {}. Will skip this file: {}", key, file); 536 return false; 537 } 538 } 539 540 // okay so final step is to be able to add atomic as in-progress, so we are the 541 // only thread processing this file 542 return endpoint.getInProgressRepository().add(absoluteFilePath); 543 } 544 545 /** 546 * Strategy to perform file matching based on endpoint configuration. 547 * <p/> 548 * Will always return <tt>false</tt> for certain files/folders: 549 * <ul> 550 * <li>Starting with a dot</li> 551 * <li>lock files</li> 552 * </ul> 553 * And then <tt>true</tt> for directories. 554 * 555 * @param file the file 556 * @param isDirectory whether the file is a directory or a file 557 * @param files files in the directory 558 * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not 559 */ 560 protected boolean isMatched(GenericFile<T> file, boolean isDirectory, List<T> files) { 561 String name = file.getFileNameOnly(); 562 563 // folders/names starting with dot is always skipped (eg. ".", ".camel", ".camelLock") 564 if (name.startsWith(".")) { 565 return false; 566 } 567 568 // lock files should be skipped 569 if (name.endsWith(FileComponent.DEFAULT_LOCK_FILE_POSTFIX)) { 570 return false; 571 } 572 573 if (endpoint.getFilter() != null) { 574 if (!endpoint.getFilter().accept(file)) { 575 return false; 576 } 577 } 578 579 if (endpoint.getAntFilter() != null) { 580 if (!endpoint.getAntFilter().accept(file)) { 581 return false; 582 } 583 } 584 585 // directories are regarded as matched if filter accepted them 586 if (isDirectory) { 587 return true; 588 } 589 590 if (ObjectHelper.isNotEmpty(endpoint.getExclude())) { 591 if (name.matches(endpoint.getExclude())) { 592 return false; 593 } 594 } 595 596 if (ObjectHelper.isNotEmpty(endpoint.getInclude())) { 597 if (!name.matches(endpoint.getInclude())) { 598 return false; 599 } 600 } 601 602 // use file expression for a simple dynamic file filter 603 if (endpoint.getFileName() != null) { 604 fileExpressionResult = evaluateFileExpression(); 605 if (fileExpressionResult != null) { 606 if (!name.equals(fileExpressionResult)) { 607 return false; 608 } 609 } 610 } 611 612 // if done file name is enabled, then the file is only valid if a done file exists 613 if (endpoint.getDoneFileName() != null) { 614 // done file must be in same path as the file 615 String doneFileName = endpoint.createDoneFileName(file.getAbsoluteFilePath()); 616 ObjectHelper.notEmpty(doneFileName, "doneFileName", endpoint); 617 618 // is it a done file name? 619 if (endpoint.isDoneFile(file.getFileNameOnly())) { 620 log.trace("Skipping done file: {}", file); 621 return false; 622 } 623 624 if (!isMatched(file, doneFileName, files)) { 625 return false; 626 } 627 } 628 629 return true; 630 } 631 632 /** 633 * Strategy to perform file matching based on endpoint configuration in terms of done file name. 634 * 635 * @param file the file 636 * @param doneFileName the done file name (without any paths) 637 * @param files files in the directory 638 * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not 639 */ 640 protected abstract boolean isMatched(GenericFile<T> file, String doneFileName, List<T> files); 641 642 /** 643 * Is the given file already in progress. 644 * 645 * @param file the file 646 * @return <tt>true</tt> if the file is already in progress 647 * @deprecated no longer in use, use {@link org.apache.camel.component.file.GenericFileEndpoint#getInProgressRepository()} instead. 648 */ 649 @Deprecated 650 protected boolean isInProgress(GenericFile<T> file) { 651 String key = file.getAbsoluteFilePath(); 652 // must use add, to have operation as atomic 653 return !endpoint.getInProgressRepository().add(key); 654 } 655 656 protected String evaluateFileExpression() { 657 if (fileExpressionResult == null && endpoint.getFileName() != null) { 658 // create a dummy exchange as Exchange is needed for expression evaluation 659 Exchange dummy = endpoint.createExchange(); 660 fileExpressionResult = endpoint.getFileName().evaluate(dummy, String.class); 661 } 662 return fileExpressionResult; 663 } 664 665 @SuppressWarnings("unchecked") 666 private GenericFile<T> getExchangeFileProperty(Exchange exchange) { 667 return (GenericFile<T>) exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE); 668 } 669 670 @Override 671 protected void doStart() throws Exception { 672 super.doStart(); 673 } 674 675 @Override 676 protected void doStop() throws Exception { 677 prepareOnStartup = false; 678 super.doStop(); 679 } 680}