001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.io;
019
020import java.io.FileDescriptor;
021import java.io.IOException;
022import java.util.concurrent.ArrayBlockingQueue;
023import java.util.concurrent.ThreadPoolExecutor;
024import java.util.concurrent.TimeUnit;
025
026import org.apache.commons.logging.Log;
027import org.apache.commons.logging.LogFactory;
028import org.apache.hadoop.classification.InterfaceAudience;
029import org.apache.hadoop.classification.InterfaceStability;
030import org.apache.hadoop.io.nativeio.NativeIO;
031
032import com.google.common.base.Preconditions;
033import com.google.common.util.concurrent.ThreadFactoryBuilder;
034
035/**
036 * Manages a pool of threads which can issue readahead requests on file descriptors.
037 */
038@InterfaceAudience.Private
039@InterfaceStability.Evolving
040public class ReadaheadPool {
041  static final Log LOG = LogFactory.getLog(ReadaheadPool.class);
042  private static final int POOL_SIZE = 4;
043  private static final int MAX_POOL_SIZE = 16;
044  private static final int CAPACITY = 1024;
045  private final ThreadPoolExecutor pool;
046  
047  private static ReadaheadPool instance;
048
049  /**
050   * Return the singleton instance for the current process.
051   */
052  public static ReadaheadPool getInstance() {
053    synchronized (ReadaheadPool.class) {
054      if (instance == null && NativeIO.isAvailable()) {
055        instance = new ReadaheadPool();
056      }
057      return instance;
058    }
059  }
060  
061  private ReadaheadPool() {
062    pool = new ThreadPoolExecutor(POOL_SIZE, MAX_POOL_SIZE, 3L, TimeUnit.SECONDS,
063        new ArrayBlockingQueue<Runnable>(CAPACITY));
064    pool.setRejectedExecutionHandler(new ThreadPoolExecutor.DiscardOldestPolicy());
065    pool.setThreadFactory(new ThreadFactoryBuilder()
066      .setDaemon(true)
067      .setNameFormat("Readahead Thread #%d")
068      .build());
069  }
070
071  /**
072   * Issue a request to readahead on the given file descriptor.
073   * 
074   * @param identifier a textual identifier that will be used in error
075   * messages (e.g. the file name)
076   * @param fd the file descriptor to read ahead
077   * @param curPos the current offset at which reads are being issued
078   * @param readaheadLength the configured length to read ahead
079   * @param maxOffsetToRead the maximum offset that will be readahead
080   *        (useful if, for example, only some segment of the file is
081   *        requested by the user). Pass {@link Long.MAX_VALUE} to allow
082   *        readahead to the end of the file.
083   * @param lastReadahead the result returned by the previous invocation
084   *        of this function on this file descriptor, or null if this is
085   *        the first call
086   * @return an object representing this outstanding request, or null
087   *        if no readahead was performed
088   */
089  public ReadaheadRequest readaheadStream(
090      String identifier,
091      FileDescriptor fd,
092      long curPos,
093      long readaheadLength,
094      long maxOffsetToRead,
095      ReadaheadRequest lastReadahead) {
096    
097    Preconditions.checkArgument(curPos <= maxOffsetToRead,
098        "Readahead position %s higher than maxOffsetToRead %s",
099        curPos, maxOffsetToRead);
100
101    if (readaheadLength <= 0) {
102      return null;
103    }
104    
105    long lastOffset = Long.MIN_VALUE;
106    
107    if (lastReadahead != null) {
108      lastOffset = lastReadahead.getOffset();
109    }
110
111    // trigger each readahead when we have reached the halfway mark
112    // in the previous readahead. This gives the system time
113    // to satisfy the readahead before we start reading the data.
114    long nextOffset = lastOffset + readaheadLength / 2; 
115    if (curPos >= nextOffset) {
116      // cancel any currently pending readahead, to avoid
117      // piling things up in the queue. Each reader should have at most
118      // one outstanding request in the queue.
119      if (lastReadahead != null) {
120        lastReadahead.cancel();
121        lastReadahead = null;
122      }
123      
124      long length = Math.min(readaheadLength,
125          maxOffsetToRead - curPos);
126
127      if (length <= 0) {
128        // we've reached the end of the stream
129        return null;
130      }
131      
132      return submitReadahead(identifier, fd, curPos, length);
133    } else {
134      return lastReadahead;
135    }
136  }
137      
138  /**
139   * Submit a request to readahead on the given file descriptor.
140   * @param identifier a textual identifier used in error messages, etc.
141   * @param fd the file descriptor to readahead
142   * @param off the offset at which to start the readahead
143   * @param len the number of bytes to read
144   * @return an object representing this pending request
145   */
146  public ReadaheadRequest submitReadahead(
147      String identifier, FileDescriptor fd, long off, long len) {
148    ReadaheadRequestImpl req = new ReadaheadRequestImpl(
149        identifier, fd, off, len);
150    pool.execute(req);
151    if (LOG.isTraceEnabled()) {
152      LOG.trace("submit readahead: " + req);
153    }
154    return req;
155  }
156  
157  /**
158   * An outstanding readahead request that has been submitted to
159   * the pool. This request may be pending or may have been
160   * completed.
161   */
162  public interface ReadaheadRequest {
163    /**
164     * Cancels the request for readahead. This should be used
165     * if the reader no longer needs the requested data, <em>before</em>
166     * closing the related file descriptor.
167     * 
168     * It is safe to use even if the readahead request has already
169     * been fulfilled.
170     */
171    public void cancel();
172    
173    /**
174     * @return the requested offset
175     */
176    public long getOffset();
177
178    /**
179     * @return the requested length
180     */
181    public long getLength();
182  }
183  
184  private static class ReadaheadRequestImpl implements Runnable, ReadaheadRequest {
185    private final String identifier;
186    private final FileDescriptor fd;
187    private final long off, len;
188    private volatile boolean canceled = false;
189    
190    private ReadaheadRequestImpl(String identifier, FileDescriptor fd, long off, long len) {
191      this.identifier = identifier;
192      this.fd = fd;
193      this.off = off;
194      this.len = len;
195    }
196    
197    @Override
198    public void run() {
199      if (canceled) return;
200      // There's a very narrow race here that the file will close right at
201      // this instant. But if that happens, we'll likely receive an EBADF
202      // error below, and see that it's canceled, ignoring the error.
203      // It's also possible that we'll end up requesting readahead on some
204      // other FD, which may be wasted work, but won't cause a problem.
205      try {
206        NativeIO.POSIX.getCacheManipulator().posixFadviseIfPossible(identifier,
207            fd, off, len, NativeIO.POSIX.POSIX_FADV_WILLNEED);
208      } catch (IOException ioe) {
209        if (canceled) {
210          // no big deal - the reader canceled the request and closed
211          // the file.
212          return;
213        }
214        LOG.warn("Failed readahead on " + identifier,
215            ioe);
216      }
217    }
218
219    @Override
220    public void cancel() {
221      canceled = true;
222      // We could attempt to remove it from the work queue, but that would
223      // add complexity. In practice, the work queues remain very short,
224      // so removing canceled requests has no gain.
225    }
226
227    @Override
228    public long getOffset() {
229      return off;
230    }
231
232    @Override
233    public long getLength() {
234      return len;
235    }
236
237    @Override
238    public String toString() {
239      return "ReadaheadRequestImpl [identifier='" + identifier + "', fd=" + fd
240          + ", off=" + off + ", len=" + len + "]";
241    }
242  }
243}