001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.fs;
019
020 import java.io.Closeable;
021 import java.io.FileNotFoundException;
022 import java.io.IOException;
023 import java.net.URI;
024 import java.security.PrivilegedExceptionAction;
025 import java.util.ArrayList;
026 import java.util.Arrays;
027 import java.util.Collections;
028 import java.util.EnumSet;
029 import java.util.HashMap;
030 import java.util.HashSet;
031 import java.util.IdentityHashMap;
032 import java.util.Iterator;
033 import java.util.List;
034 import java.util.Map;
035 import java.util.NoSuchElementException;
036 import java.util.ServiceLoader;
037 import java.util.Set;
038 import java.util.Stack;
039 import java.util.TreeSet;
040 import java.util.concurrent.atomic.AtomicInteger;
041 import java.util.concurrent.atomic.AtomicLong;
042
043 import org.apache.commons.logging.Log;
044 import org.apache.commons.logging.LogFactory;
045 import org.apache.hadoop.classification.InterfaceAudience;
046 import org.apache.hadoop.classification.InterfaceStability;
047 import org.apache.hadoop.conf.Configuration;
048 import org.apache.hadoop.conf.Configured;
049 import org.apache.hadoop.fs.Options.ChecksumOpt;
050 import org.apache.hadoop.fs.Options.Rename;
051 import org.apache.hadoop.fs.permission.FsPermission;
052 import org.apache.hadoop.io.MultipleIOException;
053 import org.apache.hadoop.io.Text;
054 import org.apache.hadoop.net.NetUtils;
055 import org.apache.hadoop.security.Credentials;
056 import org.apache.hadoop.security.SecurityUtil;
057 import org.apache.hadoop.security.UserGroupInformation;
058 import org.apache.hadoop.security.token.Token;
059 import org.apache.hadoop.util.DataChecksum;
060 import org.apache.hadoop.util.Progressable;
061 import org.apache.hadoop.util.ReflectionUtils;
062 import org.apache.hadoop.util.ShutdownHookManager;
063
064 import com.google.common.annotations.VisibleForTesting;
065
066 /****************************************************************
067 * An abstract base class for a fairly generic filesystem. It
068 * may be implemented as a distributed filesystem, or as a "local"
069 * one that reflects the locally-connected disk. The local version
070 * exists for small Hadoop instances and for testing.
071 *
072 * <p>
073 *
074 * All user code that may potentially use the Hadoop Distributed
075 * File System should be written to use a FileSystem object. The
076 * Hadoop DFS is a multi-machine system that appears as a single
077 * disk. It's useful because of its fault tolerance and potentially
078 * very large capacity.
079 *
080 * <p>
081 * The local implementation is {@link LocalFileSystem} and distributed
082 * implementation is DistributedFileSystem.
083 *****************************************************************/
084 @InterfaceAudience.Public
085 @InterfaceStability.Stable
086 public abstract class FileSystem extends Configured implements Closeable {
087 public static final String FS_DEFAULT_NAME_KEY =
088 CommonConfigurationKeys.FS_DEFAULT_NAME_KEY;
089 public static final String DEFAULT_FS =
090 CommonConfigurationKeys.FS_DEFAULT_NAME_DEFAULT;
091
092 public static final Log LOG = LogFactory.getLog(FileSystem.class);
093
094 /**
095 * Priority of the FileSystem shutdown hook.
096 */
097 public static final int SHUTDOWN_HOOK_PRIORITY = 10;
098
099 /** FileSystem cache */
100 static final Cache CACHE = new Cache();
101
102 /** The key this instance is stored under in the cache. */
103 private Cache.Key key;
104
105 /** Recording statistics per a FileSystem class */
106 private static final Map<Class<? extends FileSystem>, Statistics>
107 statisticsTable =
108 new IdentityHashMap<Class<? extends FileSystem>, Statistics>();
109
110 /**
111 * The statistics for this file system.
112 */
113 protected Statistics statistics;
114
115 /**
116 * A cache of files that should be deleted when filsystem is closed
117 * or the JVM is exited.
118 */
119 private Set<Path> deleteOnExit = new TreeSet<Path>();
120
121 /**
122 * This method adds a file system for testing so that we can find it later. It
123 * is only for testing.
124 * @param uri the uri to store it under
125 * @param conf the configuration to store it under
126 * @param fs the file system to store
127 * @throws IOException
128 */
129 static void addFileSystemForTesting(URI uri, Configuration conf,
130 FileSystem fs) throws IOException {
131 CACHE.map.put(new Cache.Key(uri, conf), fs);
132 }
133
134 /**
135 * Get a filesystem instance based on the uri, the passed
136 * configuration and the user
137 * @param uri of the filesystem
138 * @param conf the configuration to use
139 * @param user to perform the get as
140 * @return the filesystem instance
141 * @throws IOException
142 * @throws InterruptedException
143 */
144 public static FileSystem get(final URI uri, final Configuration conf,
145 final String user) throws IOException, InterruptedException {
146 String ticketCachePath =
147 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH);
148 UserGroupInformation ugi =
149 UserGroupInformation.getBestUGI(ticketCachePath, user);
150 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
151 @Override
152 public FileSystem run() throws IOException {
153 return get(uri, conf);
154 }
155 });
156 }
157
158 /**
159 * Returns the configured filesystem implementation.
160 * @param conf the configuration to use
161 */
162 public static FileSystem get(Configuration conf) throws IOException {
163 return get(getDefaultUri(conf), conf);
164 }
165
166 /** Get the default filesystem URI from a configuration.
167 * @param conf the configuration to use
168 * @return the uri of the default filesystem
169 */
170 public static URI getDefaultUri(Configuration conf) {
171 return URI.create(fixName(conf.get(FS_DEFAULT_NAME_KEY, DEFAULT_FS)));
172 }
173
174 /** Set the default filesystem URI in a configuration.
175 * @param conf the configuration to alter
176 * @param uri the new default filesystem uri
177 */
178 public static void setDefaultUri(Configuration conf, URI uri) {
179 conf.set(FS_DEFAULT_NAME_KEY, uri.toString());
180 }
181
182 /** Set the default filesystem URI in a configuration.
183 * @param conf the configuration to alter
184 * @param uri the new default filesystem uri
185 */
186 public static void setDefaultUri(Configuration conf, String uri) {
187 setDefaultUri(conf, URI.create(fixName(uri)));
188 }
189
190 /** Called after a new FileSystem instance is constructed.
191 * @param name a uri whose authority section names the host, port, etc.
192 * for this FileSystem
193 * @param conf the configuration
194 */
195 public void initialize(URI name, Configuration conf) throws IOException {
196 statistics = getStatistics(name.getScheme(), getClass());
197 }
198
199 /**
200 * Return the protocol scheme for the FileSystem.
201 * <p/>
202 * This implementation throws an <code>UnsupportedOperationException</code>.
203 *
204 * @return the protocol scheme for the FileSystem.
205 */
206 public String getScheme() {
207 throw new UnsupportedOperationException("Not implemented by the " + getClass().getSimpleName() + " FileSystem implementation");
208 }
209
210 /** Returns a URI whose scheme and authority identify this FileSystem.*/
211 public abstract URI getUri();
212
213 /**
214 * Resolve the uri's hostname and add the default port if not in the uri
215 * @return URI
216 * @see NetUtils#getCanonicalUri(URI, int)
217 */
218 protected URI getCanonicalUri() {
219 return NetUtils.getCanonicalUri(getUri(), getDefaultPort());
220 }
221
222 /**
223 * Get the default port for this file system.
224 * @return the default port or 0 if there isn't one
225 */
226 protected int getDefaultPort() {
227 return 0;
228 }
229
230 /**
231 * Get a canonical service name for this file system. The token cache is
232 * the only user of the canonical service name, and uses it to lookup this
233 * filesystem's service tokens.
234 * If file system provides a token of its own then it must have a canonical
235 * name, otherwise canonical name can be null.
236 *
237 * Default Impl: If the file system has child file systems
238 * (such as an embedded file system) then it is assumed that the fs has no
239 * tokens of its own and hence returns a null name; otherwise a service
240 * name is built using Uri and port.
241 *
242 * @return a service string that uniquely identifies this file system, null
243 * if the filesystem does not implement tokens
244 * @see SecurityUtil#buildDTServiceName(URI, int)
245 */
246 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" })
247 public String getCanonicalServiceName() {
248 return (getChildFileSystems() == null)
249 ? SecurityUtil.buildDTServiceName(getUri(), getDefaultPort())
250 : null;
251 }
252
253 /** @deprecated call #getUri() instead.*/
254 @Deprecated
255 public String getName() { return getUri().toString(); }
256
257 /** @deprecated call #get(URI,Configuration) instead. */
258 @Deprecated
259 public static FileSystem getNamed(String name, Configuration conf)
260 throws IOException {
261 return get(URI.create(fixName(name)), conf);
262 }
263
264 /** Update old-format filesystem names, for back-compatibility. This should
265 * eventually be replaced with a checkName() method that throws an exception
266 * for old-format names. */
267 private static String fixName(String name) {
268 // convert old-format name to new-format name
269 if (name.equals("local")) { // "local" is now "file:///".
270 LOG.warn("\"local\" is a deprecated filesystem name."
271 +" Use \"file:///\" instead.");
272 name = "file:///";
273 } else if (name.indexOf('/')==-1) { // unqualified is "hdfs://"
274 LOG.warn("\""+name+"\" is a deprecated filesystem name."
275 +" Use \"hdfs://"+name+"/\" instead.");
276 name = "hdfs://"+name;
277 }
278 return name;
279 }
280
281 /**
282 * Get the local file system.
283 * @param conf the configuration to configure the file system with
284 * @return a LocalFileSystem
285 */
286 public static LocalFileSystem getLocal(Configuration conf)
287 throws IOException {
288 return (LocalFileSystem)get(LocalFileSystem.NAME, conf);
289 }
290
291 /** Returns the FileSystem for this URI's scheme and authority. The scheme
292 * of the URI determines a configuration property name,
293 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class.
294 * The entire URI is passed to the FileSystem instance's initialize method.
295 */
296 public static FileSystem get(URI uri, Configuration conf) throws IOException {
297 String scheme = uri.getScheme();
298 String authority = uri.getAuthority();
299
300 if (scheme == null && authority == null) { // use default FS
301 return get(conf);
302 }
303
304 if (scheme != null && authority == null) { // no authority
305 URI defaultUri = getDefaultUri(conf);
306 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default
307 && defaultUri.getAuthority() != null) { // & default has authority
308 return get(defaultUri, conf); // return default
309 }
310 }
311
312 String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme);
313 if (conf.getBoolean(disableCacheName, false)) {
314 return createFileSystem(uri, conf);
315 }
316
317 return CACHE.get(uri, conf);
318 }
319
320 /**
321 * Returns the FileSystem for this URI's scheme and authority and the
322 * passed user. Internally invokes {@link #newInstance(URI, Configuration)}
323 * @param uri of the filesystem
324 * @param conf the configuration to use
325 * @param user to perform the get as
326 * @return filesystem instance
327 * @throws IOException
328 * @throws InterruptedException
329 */
330 public static FileSystem newInstance(final URI uri, final Configuration conf,
331 final String user) throws IOException, InterruptedException {
332 String ticketCachePath =
333 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH);
334 UserGroupInformation ugi =
335 UserGroupInformation.getBestUGI(ticketCachePath, user);
336 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
337 @Override
338 public FileSystem run() throws IOException {
339 return newInstance(uri,conf);
340 }
341 });
342 }
343 /** Returns the FileSystem for this URI's scheme and authority. The scheme
344 * of the URI determines a configuration property name,
345 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class.
346 * The entire URI is passed to the FileSystem instance's initialize method.
347 * This always returns a new FileSystem object.
348 */
349 public static FileSystem newInstance(URI uri, Configuration conf) throws IOException {
350 String scheme = uri.getScheme();
351 String authority = uri.getAuthority();
352
353 if (scheme == null) { // no scheme: use default FS
354 return newInstance(conf);
355 }
356
357 if (authority == null) { // no authority
358 URI defaultUri = getDefaultUri(conf);
359 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default
360 && defaultUri.getAuthority() != null) { // & default has authority
361 return newInstance(defaultUri, conf); // return default
362 }
363 }
364 return CACHE.getUnique(uri, conf);
365 }
366
367 /** Returns a unique configured filesystem implementation.
368 * This always returns a new FileSystem object.
369 * @param conf the configuration to use
370 */
371 public static FileSystem newInstance(Configuration conf) throws IOException {
372 return newInstance(getDefaultUri(conf), conf);
373 }
374
375 /**
376 * Get a unique local file system object
377 * @param conf the configuration to configure the file system with
378 * @return a LocalFileSystem
379 * This always returns a new FileSystem object.
380 */
381 public static LocalFileSystem newInstanceLocal(Configuration conf)
382 throws IOException {
383 return (LocalFileSystem)newInstance(LocalFileSystem.NAME, conf);
384 }
385
386 /**
387 * Close all cached filesystems. Be sure those filesystems are not
388 * used anymore.
389 *
390 * @throws IOException
391 */
392 public static void closeAll() throws IOException {
393 CACHE.closeAll();
394 }
395
396 /**
397 * Close all cached filesystems for a given UGI. Be sure those filesystems
398 * are not used anymore.
399 * @param ugi user group info to close
400 * @throws IOException
401 */
402 public static void closeAllForUGI(UserGroupInformation ugi)
403 throws IOException {
404 CACHE.closeAll(ugi);
405 }
406
407 /**
408 * Make sure that a path specifies a FileSystem.
409 * @param path to use
410 */
411 public Path makeQualified(Path path) {
412 checkPath(path);
413 return path.makeQualified(this.getUri(), this.getWorkingDirectory());
414 }
415
416 /**
417 * Get a new delegation token for this file system.
418 * This is an internal method that should have been declared protected
419 * but wasn't historically.
420 * Callers should use {@link #addDelegationTokens(String, Credentials)}
421 *
422 * @param renewer the account name that is allowed to renew the token.
423 * @return a new delegation token
424 * @throws IOException
425 */
426 @InterfaceAudience.Private()
427 public Token<?> getDelegationToken(String renewer) throws IOException {
428 return null;
429 }
430
431 /**
432 * Obtain all delegation tokens used by this FileSystem that are not
433 * already present in the given Credentials. Existing tokens will neither
434 * be verified as valid nor having the given renewer. Missing tokens will
435 * be acquired and added to the given Credentials.
436 *
437 * Default Impl: works for simple fs with its own token
438 * and also for an embedded fs whose tokens are those of its
439 * children file system (i.e. the embedded fs has not tokens of its
440 * own).
441 *
442 * @param renewer the user allowed to renew the delegation tokens
443 * @param credentials cache in which to add new delegation tokens
444 * @return list of new delegation tokens
445 * @throws IOException
446 */
447 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" })
448 public Token<?>[] addDelegationTokens(
449 final String renewer, Credentials credentials) throws IOException {
450 if (credentials == null) {
451 credentials = new Credentials();
452 }
453 final List<Token<?>> tokens = new ArrayList<Token<?>>();
454 collectDelegationTokens(renewer, credentials, tokens);
455 return tokens.toArray(new Token<?>[tokens.size()]);
456 }
457
458 /**
459 * Recursively obtain the tokens for this FileSystem and all descended
460 * FileSystems as determined by getChildFileSystems().
461 * @param renewer the user allowed to renew the delegation tokens
462 * @param credentials cache in which to add the new delegation tokens
463 * @param tokens list in which to add acquired tokens
464 * @throws IOException
465 */
466 private void collectDelegationTokens(final String renewer,
467 final Credentials credentials,
468 final List<Token<?>> tokens)
469 throws IOException {
470 final String serviceName = getCanonicalServiceName();
471 // Collect token of the this filesystem and then of its embedded children
472 if (serviceName != null) { // fs has token, grab it
473 final Text service = new Text(serviceName);
474 Token<?> token = credentials.getToken(service);
475 if (token == null) {
476 token = getDelegationToken(renewer);
477 if (token != null) {
478 tokens.add(token);
479 credentials.addToken(service, token);
480 }
481 }
482 }
483 // Now collect the tokens from the children
484 final FileSystem[] children = getChildFileSystems();
485 if (children != null) {
486 for (final FileSystem fs : children) {
487 fs.collectDelegationTokens(renewer, credentials, tokens);
488 }
489 }
490 }
491
492 /**
493 * Get all the immediate child FileSystems embedded in this FileSystem.
494 * It does not recurse and get grand children. If a FileSystem
495 * has multiple child FileSystems, then it should return a unique list
496 * of those FileSystems. Default is to return null to signify no children.
497 *
498 * @return FileSystems used by this FileSystem
499 */
500 @InterfaceAudience.LimitedPrivate({ "HDFS" })
501 @VisibleForTesting
502 public FileSystem[] getChildFileSystems() {
503 return null;
504 }
505
506 /** create a file with the provided permission
507 * The permission of the file is set to be the provided permission as in
508 * setPermission, not permission&~umask
509 *
510 * It is implemented using two RPCs. It is understood that it is inefficient,
511 * but the implementation is thread-safe. The other option is to change the
512 * value of umask in configuration to be 0, but it is not thread-safe.
513 *
514 * @param fs file system handle
515 * @param file the name of the file to be created
516 * @param permission the permission of the file
517 * @return an output stream
518 * @throws IOException
519 */
520 public static FSDataOutputStream create(FileSystem fs,
521 Path file, FsPermission permission) throws IOException {
522 // create the file with default permission
523 FSDataOutputStream out = fs.create(file);
524 // set its permission to the supplied one
525 fs.setPermission(file, permission);
526 return out;
527 }
528
529 /** create a directory with the provided permission
530 * The permission of the directory is set to be the provided permission as in
531 * setPermission, not permission&~umask
532 *
533 * @see #create(FileSystem, Path, FsPermission)
534 *
535 * @param fs file system handle
536 * @param dir the name of the directory to be created
537 * @param permission the permission of the directory
538 * @return true if the directory creation succeeds; false otherwise
539 * @throws IOException
540 */
541 public static boolean mkdirs(FileSystem fs, Path dir, FsPermission permission)
542 throws IOException {
543 // create the directory using the default permission
544 boolean result = fs.mkdirs(dir);
545 // set its permission to be the supplied one
546 fs.setPermission(dir, permission);
547 return result;
548 }
549
550 ///////////////////////////////////////////////////////////////
551 // FileSystem
552 ///////////////////////////////////////////////////////////////
553
554 protected FileSystem() {
555 super(null);
556 }
557
558 /**
559 * Check that a Path belongs to this FileSystem.
560 * @param path to check
561 */
562 protected void checkPath(Path path) {
563 URI uri = path.toUri();
564 String thatScheme = uri.getScheme();
565 if (thatScheme == null) // fs is relative
566 return;
567 URI thisUri = getCanonicalUri();
568 String thisScheme = thisUri.getScheme();
569 //authority and scheme are not case sensitive
570 if (thisScheme.equalsIgnoreCase(thatScheme)) {// schemes match
571 String thisAuthority = thisUri.getAuthority();
572 String thatAuthority = uri.getAuthority();
573 if (thatAuthority == null && // path's authority is null
574 thisAuthority != null) { // fs has an authority
575 URI defaultUri = getDefaultUri(getConf());
576 if (thisScheme.equalsIgnoreCase(defaultUri.getScheme())) {
577 uri = defaultUri; // schemes match, so use this uri instead
578 } else {
579 uri = null; // can't determine auth of the path
580 }
581 }
582 if (uri != null) {
583 // canonicalize uri before comparing with this fs
584 uri = NetUtils.getCanonicalUri(uri, getDefaultPort());
585 thatAuthority = uri.getAuthority();
586 if (thisAuthority == thatAuthority || // authorities match
587 (thisAuthority != null &&
588 thisAuthority.equalsIgnoreCase(thatAuthority)))
589 return;
590 }
591 }
592 throw new IllegalArgumentException("Wrong FS: "+path+
593 ", expected: "+this.getUri());
594 }
595
596 /**
597 * Return an array containing hostnames, offset and size of
598 * portions of the given file. For a nonexistent
599 * file or regions, null will be returned.
600 *
601 * This call is most helpful with DFS, where it returns
602 * hostnames of machines that contain the given file.
603 *
604 * The FileSystem will simply return an elt containing 'localhost'.
605 *
606 * @param file FilesStatus to get data from
607 * @param start offset into the given file
608 * @param len length for which to get locations for
609 */
610 public BlockLocation[] getFileBlockLocations(FileStatus file,
611 long start, long len) throws IOException {
612 if (file == null) {
613 return null;
614 }
615
616 if (start < 0 || len < 0) {
617 throw new IllegalArgumentException("Invalid start or len parameter");
618 }
619
620 if (file.getLen() <= start) {
621 return new BlockLocation[0];
622
623 }
624 String[] name = { "localhost:50010" };
625 String[] host = { "localhost" };
626 return new BlockLocation[] {
627 new BlockLocation(name, host, 0, file.getLen()) };
628 }
629
630
631 /**
632 * Return an array containing hostnames, offset and size of
633 * portions of the given file. For a nonexistent
634 * file or regions, null will be returned.
635 *
636 * This call is most helpful with DFS, where it returns
637 * hostnames of machines that contain the given file.
638 *
639 * The FileSystem will simply return an elt containing 'localhost'.
640 *
641 * @param p path is used to identify an FS since an FS could have
642 * another FS that it could be delegating the call to
643 * @param start offset into the given file
644 * @param len length for which to get locations for
645 */
646 public BlockLocation[] getFileBlockLocations(Path p,
647 long start, long len) throws IOException {
648 if (p == null) {
649 throw new NullPointerException();
650 }
651 FileStatus file = getFileStatus(p);
652 return getFileBlockLocations(file, start, len);
653 }
654
655 /**
656 * Return a set of server default configuration values
657 * @return server default configuration values
658 * @throws IOException
659 * @deprecated use {@link #getServerDefaults(Path)} instead
660 */
661 @Deprecated
662 public FsServerDefaults getServerDefaults() throws IOException {
663 Configuration conf = getConf();
664 // CRC32 is chosen as default as it is available in all
665 // releases that support checksum.
666 // The client trash configuration is ignored.
667 return new FsServerDefaults(getDefaultBlockSize(),
668 conf.getInt("io.bytes.per.checksum", 512),
669 64 * 1024,
670 getDefaultReplication(),
671 conf.getInt("io.file.buffer.size", 4096),
672 false,
673 CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT,
674 DataChecksum.Type.CRC32);
675 }
676
677 /**
678 * Return a set of server default configuration values
679 * @param p path is used to identify an FS since an FS could have
680 * another FS that it could be delegating the call to
681 * @return server default configuration values
682 * @throws IOException
683 */
684 public FsServerDefaults getServerDefaults(Path p) throws IOException {
685 return getServerDefaults();
686 }
687
688 /**
689 * Return the fully-qualified path of path f resolving the path
690 * through any symlinks or mount point
691 * @param p path to be resolved
692 * @return fully qualified path
693 * @throws FileNotFoundException
694 */
695 public Path resolvePath(final Path p) throws IOException {
696 checkPath(p);
697 return getFileStatus(p).getPath();
698 }
699
700 /**
701 * Opens an FSDataInputStream at the indicated Path.
702 * @param f the file name to open
703 * @param bufferSize the size of the buffer to be used.
704 */
705 public abstract FSDataInputStream open(Path f, int bufferSize)
706 throws IOException;
707
708 /**
709 * Opens an FSDataInputStream at the indicated Path.
710 * @param f the file to open
711 */
712 public FSDataInputStream open(Path f) throws IOException {
713 return open(f, getConf().getInt("io.file.buffer.size", 4096));
714 }
715
716 /**
717 * Create an FSDataOutputStream at the indicated Path.
718 * Files are overwritten by default.
719 * @param f the file to create
720 */
721 public FSDataOutputStream create(Path f) throws IOException {
722 return create(f, true);
723 }
724
725 /**
726 * Create an FSDataOutputStream at the indicated Path.
727 * @param f the file to create
728 * @param overwrite if a file with this name already exists, then if true,
729 * the file will be overwritten, and if false an exception will be thrown.
730 */
731 public FSDataOutputStream create(Path f, boolean overwrite)
732 throws IOException {
733 return create(f, overwrite,
734 getConf().getInt("io.file.buffer.size", 4096),
735 getDefaultReplication(f),
736 getDefaultBlockSize(f));
737 }
738
739 /**
740 * Create an FSDataOutputStream at the indicated Path with write-progress
741 * reporting.
742 * Files are overwritten by default.
743 * @param f the file to create
744 * @param progress to report progress
745 */
746 public FSDataOutputStream create(Path f, Progressable progress)
747 throws IOException {
748 return create(f, true,
749 getConf().getInt("io.file.buffer.size", 4096),
750 getDefaultReplication(f),
751 getDefaultBlockSize(f), progress);
752 }
753
754 /**
755 * Create an FSDataOutputStream at the indicated Path.
756 * Files are overwritten by default.
757 * @param f the file to create
758 * @param replication the replication factor
759 */
760 public FSDataOutputStream create(Path f, short replication)
761 throws IOException {
762 return create(f, true,
763 getConf().getInt("io.file.buffer.size", 4096),
764 replication,
765 getDefaultBlockSize(f));
766 }
767
768 /**
769 * Create an FSDataOutputStream at the indicated Path with write-progress
770 * reporting.
771 * Files are overwritten by default.
772 * @param f the file to create
773 * @param replication the replication factor
774 * @param progress to report progress
775 */
776 public FSDataOutputStream create(Path f, short replication,
777 Progressable progress) throws IOException {
778 return create(f, true,
779 getConf().getInt("io.file.buffer.size", 4096),
780 replication,
781 getDefaultBlockSize(f), progress);
782 }
783
784
785 /**
786 * Create an FSDataOutputStream at the indicated Path.
787 * @param f the file name to create
788 * @param overwrite if a file with this name already exists, then if true,
789 * the file will be overwritten, and if false an error will be thrown.
790 * @param bufferSize the size of the buffer to be used.
791 */
792 public FSDataOutputStream create(Path f,
793 boolean overwrite,
794 int bufferSize
795 ) throws IOException {
796 return create(f, overwrite, bufferSize,
797 getDefaultReplication(f),
798 getDefaultBlockSize(f));
799 }
800
801 /**
802 * Create an FSDataOutputStream at the indicated Path with write-progress
803 * reporting.
804 * @param f the path of the file to open
805 * @param overwrite if a file with this name already exists, then if true,
806 * the file will be overwritten, and if false an error will be thrown.
807 * @param bufferSize the size of the buffer to be used.
808 */
809 public FSDataOutputStream create(Path f,
810 boolean overwrite,
811 int bufferSize,
812 Progressable progress
813 ) throws IOException {
814 return create(f, overwrite, bufferSize,
815 getDefaultReplication(f),
816 getDefaultBlockSize(f), progress);
817 }
818
819
820 /**
821 * Create an FSDataOutputStream at the indicated Path.
822 * @param f the file name to open
823 * @param overwrite if a file with this name already exists, then if true,
824 * the file will be overwritten, and if false an error will be thrown.
825 * @param bufferSize the size of the buffer to be used.
826 * @param replication required block replication for the file.
827 */
828 public FSDataOutputStream create(Path f,
829 boolean overwrite,
830 int bufferSize,
831 short replication,
832 long blockSize
833 ) throws IOException {
834 return create(f, overwrite, bufferSize, replication, blockSize, null);
835 }
836
837 /**
838 * Create an FSDataOutputStream at the indicated Path with write-progress
839 * reporting.
840 * @param f the file name to open
841 * @param overwrite if a file with this name already exists, then if true,
842 * the file will be overwritten, and if false an error will be thrown.
843 * @param bufferSize the size of the buffer to be used.
844 * @param replication required block replication for the file.
845 */
846 public FSDataOutputStream create(Path f,
847 boolean overwrite,
848 int bufferSize,
849 short replication,
850 long blockSize,
851 Progressable progress
852 ) throws IOException {
853 return this.create(f, FsPermission.getFileDefault().applyUMask(
854 FsPermission.getUMask(getConf())), overwrite, bufferSize,
855 replication, blockSize, progress);
856 }
857
858 /**
859 * Create an FSDataOutputStream at the indicated Path with write-progress
860 * reporting.
861 * @param f the file name to open
862 * @param permission
863 * @param overwrite if a file with this name already exists, then if true,
864 * the file will be overwritten, and if false an error will be thrown.
865 * @param bufferSize the size of the buffer to be used.
866 * @param replication required block replication for the file.
867 * @param blockSize
868 * @param progress
869 * @throws IOException
870 * @see #setPermission(Path, FsPermission)
871 */
872 public abstract FSDataOutputStream create(Path f,
873 FsPermission permission,
874 boolean overwrite,
875 int bufferSize,
876 short replication,
877 long blockSize,
878 Progressable progress) throws IOException;
879
880 /**
881 * Create an FSDataOutputStream at the indicated Path with write-progress
882 * reporting.
883 * @param f the file name to open
884 * @param permission
885 * @param flags {@link CreateFlag}s to use for this stream.
886 * @param bufferSize the size of the buffer to be used.
887 * @param replication required block replication for the file.
888 * @param blockSize
889 * @param progress
890 * @throws IOException
891 * @see #setPermission(Path, FsPermission)
892 */
893 public FSDataOutputStream create(Path f,
894 FsPermission permission,
895 EnumSet<CreateFlag> flags,
896 int bufferSize,
897 short replication,
898 long blockSize,
899 Progressable progress) throws IOException {
900 return create(f, permission, flags, bufferSize, replication,
901 blockSize, progress, null);
902 }
903
904 /**
905 * Create an FSDataOutputStream at the indicated Path with a custom
906 * checksum option
907 * @param f the file name to open
908 * @param permission
909 * @param flags {@link CreateFlag}s to use for this stream.
910 * @param bufferSize the size of the buffer to be used.
911 * @param replication required block replication for the file.
912 * @param blockSize
913 * @param progress
914 * @param checksumOpt checksum parameter. If null, the values
915 * found in conf will be used.
916 * @throws IOException
917 * @see #setPermission(Path, FsPermission)
918 */
919 public FSDataOutputStream create(Path f,
920 FsPermission permission,
921 EnumSet<CreateFlag> flags,
922 int bufferSize,
923 short replication,
924 long blockSize,
925 Progressable progress,
926 ChecksumOpt checksumOpt) throws IOException {
927 // Checksum options are ignored by default. The file systems that
928 // implement checksum need to override this method. The full
929 // support is currently only available in DFS.
930 return create(f, permission, flags.contains(CreateFlag.OVERWRITE),
931 bufferSize, replication, blockSize, progress);
932 }
933
934 /*.
935 * This create has been added to support the FileContext that processes
936 * the permission
937 * with umask before calling this method.
938 * This a temporary method added to support the transition from FileSystem
939 * to FileContext for user applications.
940 */
941 @Deprecated
942 protected FSDataOutputStream primitiveCreate(Path f,
943 FsPermission absolutePermission, EnumSet<CreateFlag> flag, int bufferSize,
944 short replication, long blockSize, Progressable progress,
945 ChecksumOpt checksumOpt) throws IOException {
946
947 boolean pathExists = exists(f);
948 CreateFlag.validate(f, pathExists, flag);
949
950 // Default impl assumes that permissions do not matter and
951 // nor does the bytesPerChecksum hence
952 // calling the regular create is good enough.
953 // FSs that implement permissions should override this.
954
955 if (pathExists && flag.contains(CreateFlag.APPEND)) {
956 return append(f, bufferSize, progress);
957 }
958
959 return this.create(f, absolutePermission,
960 flag.contains(CreateFlag.OVERWRITE), bufferSize, replication,
961 blockSize, progress);
962 }
963
964 /**
965 * This version of the mkdirs method assumes that the permission is absolute.
966 * It has been added to support the FileContext that processes the permission
967 * with umask before calling this method.
968 * This a temporary method added to support the transition from FileSystem
969 * to FileContext for user applications.
970 */
971 @Deprecated
972 protected boolean primitiveMkdir(Path f, FsPermission absolutePermission)
973 throws IOException {
974 // Default impl is to assume that permissions do not matter and hence
975 // calling the regular mkdirs is good enough.
976 // FSs that implement permissions should override this.
977 return this.mkdirs(f, absolutePermission);
978 }
979
980
981 /**
982 * This version of the mkdirs method assumes that the permission is absolute.
983 * It has been added to support the FileContext that processes the permission
984 * with umask before calling this method.
985 * This a temporary method added to support the transition from FileSystem
986 * to FileContext for user applications.
987 */
988 @Deprecated
989 protected void primitiveMkdir(Path f, FsPermission absolutePermission,
990 boolean createParent)
991 throws IOException {
992
993 if (!createParent) { // parent must exist.
994 // since the this.mkdirs makes parent dirs automatically
995 // we must throw exception if parent does not exist.
996 final FileStatus stat = getFileStatus(f.getParent());
997 if (stat == null) {
998 throw new FileNotFoundException("Missing parent:" + f);
999 }
1000 if (!stat.isDirectory()) {
1001 throw new ParentNotDirectoryException("parent is not a dir");
1002 }
1003 // parent does exist - go ahead with mkdir of leaf
1004 }
1005 // Default impl is to assume that permissions do not matter and hence
1006 // calling the regular mkdirs is good enough.
1007 // FSs that implement permissions should override this.
1008 if (!this.mkdirs(f, absolutePermission)) {
1009 throw new IOException("mkdir of "+ f + " failed");
1010 }
1011 }
1012
1013 /**
1014 * Opens an FSDataOutputStream at the indicated Path with write-progress
1015 * reporting. Same as create(), except fails if parent directory doesn't
1016 * already exist.
1017 * @param f the file name to open
1018 * @param overwrite if a file with this name already exists, then if true,
1019 * the file will be overwritten, and if false an error will be thrown.
1020 * @param bufferSize the size of the buffer to be used.
1021 * @param replication required block replication for the file.
1022 * @param blockSize
1023 * @param progress
1024 * @throws IOException
1025 * @see #setPermission(Path, FsPermission)
1026 * @deprecated API only for 0.20-append
1027 */
1028 @Deprecated
1029 public FSDataOutputStream createNonRecursive(Path f,
1030 boolean overwrite,
1031 int bufferSize, short replication, long blockSize,
1032 Progressable progress) throws IOException {
1033 return this.createNonRecursive(f, FsPermission.getFileDefault(),
1034 overwrite, bufferSize, replication, blockSize, progress);
1035 }
1036
1037 /**
1038 * Opens an FSDataOutputStream at the indicated Path with write-progress
1039 * reporting. Same as create(), except fails if parent directory doesn't
1040 * already exist.
1041 * @param f the file name to open
1042 * @param permission
1043 * @param overwrite if a file with this name already exists, then if true,
1044 * the file will be overwritten, and if false an error will be thrown.
1045 * @param bufferSize the size of the buffer to be used.
1046 * @param replication required block replication for the file.
1047 * @param blockSize
1048 * @param progress
1049 * @throws IOException
1050 * @see #setPermission(Path, FsPermission)
1051 * @deprecated API only for 0.20-append
1052 */
1053 @Deprecated
1054 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
1055 boolean overwrite, int bufferSize, short replication, long blockSize,
1056 Progressable progress) throws IOException {
1057 return createNonRecursive(f, permission,
1058 overwrite ? EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE)
1059 : EnumSet.of(CreateFlag.CREATE), bufferSize,
1060 replication, blockSize, progress);
1061 }
1062
1063 /**
1064 * Opens an FSDataOutputStream at the indicated Path with write-progress
1065 * reporting. Same as create(), except fails if parent directory doesn't
1066 * already exist.
1067 * @param f the file name to open
1068 * @param permission
1069 * @param flags {@link CreateFlag}s to use for this stream.
1070 * @param bufferSize the size of the buffer to be used.
1071 * @param replication required block replication for the file.
1072 * @param blockSize
1073 * @param progress
1074 * @throws IOException
1075 * @see #setPermission(Path, FsPermission)
1076 * @deprecated API only for 0.20-append
1077 */
1078 @Deprecated
1079 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
1080 EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
1081 Progressable progress) throws IOException {
1082 throw new IOException("createNonRecursive unsupported for this filesystem "
1083 + this.getClass());
1084 }
1085
1086 /**
1087 * Creates the given Path as a brand-new zero-length file. If
1088 * create fails, or if it already existed, return false.
1089 *
1090 * @param f path to use for create
1091 */
1092 public boolean createNewFile(Path f) throws IOException {
1093 if (exists(f)) {
1094 return false;
1095 } else {
1096 create(f, false, getConf().getInt("io.file.buffer.size", 4096)).close();
1097 return true;
1098 }
1099 }
1100
1101 /**
1102 * Append to an existing file (optional operation).
1103 * Same as append(f, getConf().getInt("io.file.buffer.size", 4096), null)
1104 * @param f the existing file to be appended.
1105 * @throws IOException
1106 */
1107 public FSDataOutputStream append(Path f) throws IOException {
1108 return append(f, getConf().getInt("io.file.buffer.size", 4096), null);
1109 }
1110 /**
1111 * Append to an existing file (optional operation).
1112 * Same as append(f, bufferSize, null).
1113 * @param f the existing file to be appended.
1114 * @param bufferSize the size of the buffer to be used.
1115 * @throws IOException
1116 */
1117 public FSDataOutputStream append(Path f, int bufferSize) throws IOException {
1118 return append(f, bufferSize, null);
1119 }
1120
1121 /**
1122 * Append to an existing file (optional operation).
1123 * @param f the existing file to be appended.
1124 * @param bufferSize the size of the buffer to be used.
1125 * @param progress for reporting progress if it is not null.
1126 * @throws IOException
1127 */
1128 public abstract FSDataOutputStream append(Path f, int bufferSize,
1129 Progressable progress) throws IOException;
1130
1131 /**
1132 * Concat existing files together.
1133 * @param trg the path to the target destination.
1134 * @param psrcs the paths to the sources to use for the concatenation.
1135 * @throws IOException
1136 */
1137 public void concat(final Path trg, final Path [] psrcs) throws IOException {
1138 throw new UnsupportedOperationException("Not implemented by the " +
1139 getClass().getSimpleName() + " FileSystem implementation");
1140 }
1141
1142 /**
1143 * Get replication.
1144 *
1145 * @deprecated Use getFileStatus() instead
1146 * @param src file name
1147 * @return file replication
1148 * @throws IOException
1149 */
1150 @Deprecated
1151 public short getReplication(Path src) throws IOException {
1152 return getFileStatus(src).getReplication();
1153 }
1154
1155 /**
1156 * Set replication for an existing file.
1157 *
1158 * @param src file name
1159 * @param replication new replication
1160 * @throws IOException
1161 * @return true if successful;
1162 * false if file does not exist or is a directory
1163 */
1164 public boolean setReplication(Path src, short replication)
1165 throws IOException {
1166 return true;
1167 }
1168
1169 /**
1170 * Renames Path src to Path dst. Can take place on local fs
1171 * or remote DFS.
1172 * @param src path to be renamed
1173 * @param dst new path after rename
1174 * @throws IOException on failure
1175 * @return true if rename is successful
1176 */
1177 public abstract boolean rename(Path src, Path dst) throws IOException;
1178
1179 /**
1180 * Renames Path src to Path dst
1181 * <ul>
1182 * <li
1183 * <li>Fails if src is a file and dst is a directory.
1184 * <li>Fails if src is a directory and dst is a file.
1185 * <li>Fails if the parent of dst does not exist or is a file.
1186 * </ul>
1187 * <p>
1188 * If OVERWRITE option is not passed as an argument, rename fails
1189 * if the dst already exists.
1190 * <p>
1191 * If OVERWRITE option is passed as an argument, rename overwrites
1192 * the dst if it is a file or an empty directory. Rename fails if dst is
1193 * a non-empty directory.
1194 * <p>
1195 * Note that atomicity of rename is dependent on the file system
1196 * implementation. Please refer to the file system documentation for
1197 * details. This default implementation is non atomic.
1198 * <p>
1199 * This method is deprecated since it is a temporary method added to
1200 * support the transition from FileSystem to FileContext for user
1201 * applications.
1202 *
1203 * @param src path to be renamed
1204 * @param dst new path after rename
1205 * @throws IOException on failure
1206 */
1207 @Deprecated
1208 protected void rename(final Path src, final Path dst,
1209 final Rename... options) throws IOException {
1210 // Default implementation
1211 final FileStatus srcStatus = getFileStatus(src);
1212 if (srcStatus == null) {
1213 throw new FileNotFoundException("rename source " + src + " not found.");
1214 }
1215
1216 boolean overwrite = false;
1217 if (null != options) {
1218 for (Rename option : options) {
1219 if (option == Rename.OVERWRITE) {
1220 overwrite = true;
1221 }
1222 }
1223 }
1224
1225 FileStatus dstStatus;
1226 try {
1227 dstStatus = getFileStatus(dst);
1228 } catch (IOException e) {
1229 dstStatus = null;
1230 }
1231 if (dstStatus != null) {
1232 if (srcStatus.isDirectory() != dstStatus.isDirectory()) {
1233 throw new IOException("Source " + src + " Destination " + dst
1234 + " both should be either file or directory");
1235 }
1236 if (!overwrite) {
1237 throw new FileAlreadyExistsException("rename destination " + dst
1238 + " already exists.");
1239 }
1240 // Delete the destination that is a file or an empty directory
1241 if (dstStatus.isDirectory()) {
1242 FileStatus[] list = listStatus(dst);
1243 if (list != null && list.length != 0) {
1244 throw new IOException(
1245 "rename cannot overwrite non empty destination directory " + dst);
1246 }
1247 }
1248 delete(dst, false);
1249 } else {
1250 final Path parent = dst.getParent();
1251 final FileStatus parentStatus = getFileStatus(parent);
1252 if (parentStatus == null) {
1253 throw new FileNotFoundException("rename destination parent " + parent
1254 + " not found.");
1255 }
1256 if (!parentStatus.isDirectory()) {
1257 throw new ParentNotDirectoryException("rename destination parent " + parent
1258 + " is a file.");
1259 }
1260 }
1261 if (!rename(src, dst)) {
1262 throw new IOException("rename from " + src + " to " + dst + " failed.");
1263 }
1264 }
1265
1266 /**
1267 * Delete a file
1268 * @deprecated Use {@link #delete(Path, boolean)} instead.
1269 */
1270 @Deprecated
1271 public boolean delete(Path f) throws IOException {
1272 return delete(f, true);
1273 }
1274
1275 /** Delete a file.
1276 *
1277 * @param f the path to delete.
1278 * @param recursive if path is a directory and set to
1279 * true, the directory is deleted else throws an exception. In
1280 * case of a file the recursive can be set to either true or false.
1281 * @return true if delete is successful else false.
1282 * @throws IOException
1283 */
1284 public abstract boolean delete(Path f, boolean recursive) throws IOException;
1285
1286 /**
1287 * Mark a path to be deleted when FileSystem is closed.
1288 * When the JVM shuts down,
1289 * all FileSystem objects will be closed automatically.
1290 * Then,
1291 * the marked path will be deleted as a result of closing the FileSystem.
1292 *
1293 * The path has to exist in the file system.
1294 *
1295 * @param f the path to delete.
1296 * @return true if deleteOnExit is successful, otherwise false.
1297 * @throws IOException
1298 */
1299 public boolean deleteOnExit(Path f) throws IOException {
1300 if (!exists(f)) {
1301 return false;
1302 }
1303 synchronized (deleteOnExit) {
1304 deleteOnExit.add(f);
1305 }
1306 return true;
1307 }
1308
1309 /**
1310 * Cancel the deletion of the path when the FileSystem is closed
1311 * @param f the path to cancel deletion
1312 */
1313 public boolean cancelDeleteOnExit(Path f) {
1314 synchronized (deleteOnExit) {
1315 return deleteOnExit.remove(f);
1316 }
1317 }
1318
1319 /**
1320 * Delete all files that were marked as delete-on-exit. This recursively
1321 * deletes all files in the specified paths.
1322 */
1323 protected void processDeleteOnExit() {
1324 synchronized (deleteOnExit) {
1325 for (Iterator<Path> iter = deleteOnExit.iterator(); iter.hasNext();) {
1326 Path path = iter.next();
1327 try {
1328 if (exists(path)) {
1329 delete(path, true);
1330 }
1331 }
1332 catch (IOException e) {
1333 LOG.info("Ignoring failure to deleteOnExit for path " + path);
1334 }
1335 iter.remove();
1336 }
1337 }
1338 }
1339
1340 /** Check if exists.
1341 * @param f source file
1342 */
1343 public boolean exists(Path f) throws IOException {
1344 try {
1345 return getFileStatus(f) != null;
1346 } catch (FileNotFoundException e) {
1347 return false;
1348 }
1349 }
1350
1351 /** True iff the named path is a directory.
1352 * Note: Avoid using this method. Instead reuse the FileStatus
1353 * returned by getFileStatus() or listStatus() methods.
1354 * @param f path to check
1355 */
1356 public boolean isDirectory(Path f) throws IOException {
1357 try {
1358 return getFileStatus(f).isDirectory();
1359 } catch (FileNotFoundException e) {
1360 return false; // f does not exist
1361 }
1362 }
1363
1364 /** True iff the named path is a regular file.
1365 * Note: Avoid using this method. Instead reuse the FileStatus
1366 * returned by getFileStatus() or listStatus() methods.
1367 * @param f path to check
1368 */
1369 public boolean isFile(Path f) throws IOException {
1370 try {
1371 return getFileStatus(f).isFile();
1372 } catch (FileNotFoundException e) {
1373 return false; // f does not exist
1374 }
1375 }
1376
1377 /** The number of bytes in a file. */
1378 /** @deprecated Use getFileStatus() instead */
1379 @Deprecated
1380 public long getLength(Path f) throws IOException {
1381 return getFileStatus(f).getLen();
1382 }
1383
1384 /** Return the {@link ContentSummary} of a given {@link Path}.
1385 * @param f path to use
1386 */
1387 public ContentSummary getContentSummary(Path f) throws IOException {
1388 FileStatus status = getFileStatus(f);
1389 if (status.isFile()) {
1390 // f is a file
1391 return new ContentSummary(status.getLen(), 1, 0);
1392 }
1393 // f is a directory
1394 long[] summary = {0, 0, 1};
1395 for(FileStatus s : listStatus(f)) {
1396 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) :
1397 new ContentSummary(s.getLen(), 1, 0);
1398 summary[0] += c.getLength();
1399 summary[1] += c.getFileCount();
1400 summary[2] += c.getDirectoryCount();
1401 }
1402 return new ContentSummary(summary[0], summary[1], summary[2]);
1403 }
1404
1405 final private static PathFilter DEFAULT_FILTER = new PathFilter() {
1406 @Override
1407 public boolean accept(Path file) {
1408 return true;
1409 }
1410 };
1411
1412 /**
1413 * List the statuses of the files/directories in the given path if the path is
1414 * a directory.
1415 *
1416 * @param f given path
1417 * @return the statuses of the files/directories in the given patch
1418 * @throws FileNotFoundException when the path does not exist;
1419 * IOException see specific implementation
1420 */
1421 public abstract FileStatus[] listStatus(Path f) throws FileNotFoundException,
1422 IOException;
1423
1424 /*
1425 * Filter files/directories in the given path using the user-supplied path
1426 * filter. Results are added to the given array <code>results</code>.
1427 */
1428 private void listStatus(ArrayList<FileStatus> results, Path f,
1429 PathFilter filter) throws FileNotFoundException, IOException {
1430 FileStatus listing[] = listStatus(f);
1431 if (listing == null) {
1432 throw new IOException("Error accessing " + f);
1433 }
1434
1435 for (int i = 0; i < listing.length; i++) {
1436 if (filter.accept(listing[i].getPath())) {
1437 results.add(listing[i]);
1438 }
1439 }
1440 }
1441
1442 /**
1443 * @return an iterator over the corrupt files under the given path
1444 * (may contain duplicates if a file has more than one corrupt block)
1445 * @throws IOException
1446 */
1447 public RemoteIterator<Path> listCorruptFileBlocks(Path path)
1448 throws IOException {
1449 throw new UnsupportedOperationException(getClass().getCanonicalName() +
1450 " does not support" +
1451 " listCorruptFileBlocks");
1452 }
1453
1454 /**
1455 * Filter files/directories in the given path using the user-supplied path
1456 * filter.
1457 *
1458 * @param f
1459 * a path name
1460 * @param filter
1461 * the user-supplied path filter
1462 * @return an array of FileStatus objects for the files under the given path
1463 * after applying the filter
1464 * @throws FileNotFoundException when the path does not exist;
1465 * IOException see specific implementation
1466 */
1467 public FileStatus[] listStatus(Path f, PathFilter filter)
1468 throws FileNotFoundException, IOException {
1469 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1470 listStatus(results, f, filter);
1471 return results.toArray(new FileStatus[results.size()]);
1472 }
1473
1474 /**
1475 * Filter files/directories in the given list of paths using default
1476 * path filter.
1477 *
1478 * @param files
1479 * a list of paths
1480 * @return a list of statuses for the files under the given paths after
1481 * applying the filter default Path filter
1482 * @throws FileNotFoundException when the path does not exist;
1483 * IOException see specific implementation
1484 */
1485 public FileStatus[] listStatus(Path[] files)
1486 throws FileNotFoundException, IOException {
1487 return listStatus(files, DEFAULT_FILTER);
1488 }
1489
1490 /**
1491 * Filter files/directories in the given list of paths using user-supplied
1492 * path filter.
1493 *
1494 * @param files
1495 * a list of paths
1496 * @param filter
1497 * the user-supplied path filter
1498 * @return a list of statuses for the files under the given paths after
1499 * applying the filter
1500 * @throws FileNotFoundException when the path does not exist;
1501 * IOException see specific implementation
1502 */
1503 public FileStatus[] listStatus(Path[] files, PathFilter filter)
1504 throws FileNotFoundException, IOException {
1505 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1506 for (int i = 0; i < files.length; i++) {
1507 listStatus(results, files[i], filter);
1508 }
1509 return results.toArray(new FileStatus[results.size()]);
1510 }
1511
1512 /**
1513 * <p>Return all the files that match filePattern and are not checksum
1514 * files. Results are sorted by their names.
1515 *
1516 * <p>
1517 * A filename pattern is composed of <i>regular</i> characters and
1518 * <i>special pattern matching</i> characters, which are:
1519 *
1520 * <dl>
1521 * <dd>
1522 * <dl>
1523 * <p>
1524 * <dt> <tt> ? </tt>
1525 * <dd> Matches any single character.
1526 *
1527 * <p>
1528 * <dt> <tt> * </tt>
1529 * <dd> Matches zero or more characters.
1530 *
1531 * <p>
1532 * <dt> <tt> [<i>abc</i>] </tt>
1533 * <dd> Matches a single character from character set
1534 * <tt>{<i>a,b,c</i>}</tt>.
1535 *
1536 * <p>
1537 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt>
1538 * <dd> Matches a single character from the character range
1539 * <tt>{<i>a...b</i>}</tt>. Note that character <tt><i>a</i></tt> must be
1540 * lexicographically less than or equal to character <tt><i>b</i></tt>.
1541 *
1542 * <p>
1543 * <dt> <tt> [^<i>a</i>] </tt>
1544 * <dd> Matches a single character that is not from character set or range
1545 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur
1546 * immediately to the right of the opening bracket.
1547 *
1548 * <p>
1549 * <dt> <tt> \<i>c</i> </tt>
1550 * <dd> Removes (escapes) any special meaning of character <i>c</i>.
1551 *
1552 * <p>
1553 * <dt> <tt> {ab,cd} </tt>
1554 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt>
1555 *
1556 * <p>
1557 * <dt> <tt> {ab,c{de,fh}} </tt>
1558 * <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt>
1559 *
1560 * </dl>
1561 * </dd>
1562 * </dl>
1563 *
1564 * @param pathPattern a regular expression specifying a pth pattern
1565
1566 * @return an array of paths that match the path pattern
1567 * @throws IOException
1568 */
1569 public FileStatus[] globStatus(Path pathPattern) throws IOException {
1570 return globStatus(pathPattern, DEFAULT_FILTER);
1571 }
1572
1573 /**
1574 * Return an array of FileStatus objects whose path names match pathPattern
1575 * and is accepted by the user-supplied path filter. Results are sorted by
1576 * their path names.
1577 * Return null if pathPattern has no glob and the path does not exist.
1578 * Return an empty array if pathPattern has a glob and no path matches it.
1579 *
1580 * @param pathPattern
1581 * a regular expression specifying the path pattern
1582 * @param filter
1583 * a user-supplied path filter
1584 * @return an array of FileStatus objects
1585 * @throws IOException if any I/O error occurs when fetching file status
1586 */
1587 public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
1588 throws IOException {
1589 String filename = pathPattern.toUri().getPath();
1590 List<FileStatus> allMatches = null;
1591
1592 List<String> filePatterns = GlobExpander.expand(filename);
1593 for (String filePattern : filePatterns) {
1594 Path path = new Path(filePattern.isEmpty() ? Path.CUR_DIR : filePattern);
1595 List<FileStatus> matches = globStatusInternal(path, filter);
1596 if (matches != null) {
1597 if (allMatches == null) {
1598 allMatches = matches;
1599 } else {
1600 allMatches.addAll(matches);
1601 }
1602 }
1603 }
1604
1605 FileStatus[] results = null;
1606 if (allMatches != null) {
1607 results = allMatches.toArray(new FileStatus[allMatches.size()]);
1608 } else if (filePatterns.size() > 1) {
1609 // no matches with multiple expansions is a non-matching glob
1610 results = new FileStatus[0];
1611 }
1612 return results;
1613 }
1614
1615 // sort gripes because FileStatus Comparable isn't parameterized...
1616 @SuppressWarnings("unchecked")
1617 private List<FileStatus> globStatusInternal(Path pathPattern,
1618 PathFilter filter) throws IOException {
1619 boolean patternHasGlob = false; // pathPattern has any globs
1620 List<FileStatus> matches = new ArrayList<FileStatus>();
1621
1622 // determine starting point
1623 int level = 0;
1624 String baseDir = Path.CUR_DIR;
1625 if (pathPattern.isAbsolute()) {
1626 level = 1; // need to skip empty item at beginning of split list
1627 baseDir = Path.SEPARATOR;
1628 }
1629
1630 // parse components and determine if it's a glob
1631 String[] components = null;
1632 GlobFilter[] filters = null;
1633 String filename = pathPattern.toUri().getPath();
1634 if (!filename.isEmpty() && !Path.SEPARATOR.equals(filename)) {
1635 components = filename.split(Path.SEPARATOR);
1636 filters = new GlobFilter[components.length];
1637 for (int i=level; i < components.length; i++) {
1638 filters[i] = new GlobFilter(components[i]);
1639 patternHasGlob |= filters[i].hasPattern();
1640 }
1641 if (!patternHasGlob) {
1642 baseDir = unquotePathComponent(filename);
1643 components = null; // short through to filter check
1644 }
1645 }
1646
1647 // seed the parent directory path, return if it doesn't exist
1648 try {
1649 matches.add(getFileStatus(new Path(baseDir)));
1650 } catch (FileNotFoundException e) {
1651 return patternHasGlob ? matches : null;
1652 }
1653
1654 // skip if there are no components other than the basedir
1655 if (components != null) {
1656 // iterate through each path component
1657 for (int i=level; (i < components.length) && !matches.isEmpty(); i++) {
1658 List<FileStatus> children = new ArrayList<FileStatus>();
1659 for (FileStatus match : matches) {
1660 // don't look for children in a file matched by a glob
1661 if (!match.isDirectory()) {
1662 continue;
1663 }
1664 try {
1665 if (filters[i].hasPattern()) {
1666 // get all children matching the filter
1667 FileStatus[] statuses = listStatus(match.getPath(), filters[i]);
1668 children.addAll(Arrays.asList(statuses));
1669 } else {
1670 // the component does not have a pattern
1671 String component = unquotePathComponent(components[i]);
1672 Path child = new Path(match.getPath(), component);
1673 children.add(getFileStatus(child));
1674 }
1675 } catch (FileNotFoundException e) {
1676 // don't care
1677 }
1678 }
1679 matches = children;
1680 }
1681 }
1682 // remove anything that didn't match the filter
1683 if (!matches.isEmpty()) {
1684 Iterator<FileStatus> iter = matches.iterator();
1685 while (iter.hasNext()) {
1686 if (!filter.accept(iter.next().getPath())) {
1687 iter.remove();
1688 }
1689 }
1690 }
1691 // no final paths, if there were any globs return empty list
1692 if (matches.isEmpty()) {
1693 return patternHasGlob ? matches : null;
1694 }
1695 Collections.sort(matches);
1696 return matches;
1697 }
1698
1699 /**
1700 * The glob filter builds a regexp per path component. If the component
1701 * does not contain a shell metachar, then it falls back to appending the
1702 * raw string to the list of built up paths. This raw path needs to have
1703 * the quoting removed. Ie. convert all occurances of "\X" to "X"
1704 * @param name of the path component
1705 * @return the unquoted path component
1706 */
1707 private String unquotePathComponent(String name) {
1708 return name.replaceAll("\\\\(.)", "$1");
1709 }
1710
1711 /**
1712 * List the statuses of the files/directories in the given path if the path is
1713 * a directory.
1714 * Return the file's status and block locations If the path is a file.
1715 *
1716 * If a returned status is a file, it contains the file's block locations.
1717 *
1718 * @param f is the path
1719 *
1720 * @return an iterator that traverses statuses of the files/directories
1721 * in the given path
1722 *
1723 * @throws FileNotFoundException If <code>f</code> does not exist
1724 * @throws IOException If an I/O error occurred
1725 */
1726 public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f)
1727 throws FileNotFoundException, IOException {
1728 return listLocatedStatus(f, DEFAULT_FILTER);
1729 }
1730
1731 /**
1732 * Listing a directory
1733 * The returned results include its block location if it is a file
1734 * The results are filtered by the given path filter
1735 * @param f a path
1736 * @param filter a path filter
1737 * @return an iterator that traverses statuses of the files/directories
1738 * in the given path
1739 * @throws FileNotFoundException if <code>f</code> does not exist
1740 * @throws IOException if any I/O error occurred
1741 */
1742 protected RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
1743 final PathFilter filter)
1744 throws FileNotFoundException, IOException {
1745 return new RemoteIterator<LocatedFileStatus>() {
1746 private final FileStatus[] stats = listStatus(f, filter);
1747 private int i = 0;
1748
1749 @Override
1750 public boolean hasNext() {
1751 return i<stats.length;
1752 }
1753
1754 @Override
1755 public LocatedFileStatus next() throws IOException {
1756 if (!hasNext()) {
1757 throw new NoSuchElementException("No more entry in " + f);
1758 }
1759 FileStatus result = stats[i++];
1760 BlockLocation[] locs = result.isFile() ?
1761 getFileBlockLocations(result.getPath(), 0, result.getLen()) :
1762 null;
1763 return new LocatedFileStatus(result, locs);
1764 }
1765 };
1766 }
1767
1768 /**
1769 * List the statuses and block locations of the files in the given path.
1770 *
1771 * If the path is a directory,
1772 * if recursive is false, returns files in the directory;
1773 * if recursive is true, return files in the subtree rooted at the path.
1774 * If the path is a file, return the file's status and block locations.
1775 *
1776 * @param f is the path
1777 * @param recursive if the subdirectories need to be traversed recursively
1778 *
1779 * @return an iterator that traverses statuses of the files
1780 *
1781 * @throws FileNotFoundException when the path does not exist;
1782 * IOException see specific implementation
1783 */
1784 public RemoteIterator<LocatedFileStatus> listFiles(
1785 final Path f, final boolean recursive)
1786 throws FileNotFoundException, IOException {
1787 return new RemoteIterator<LocatedFileStatus>() {
1788 private Stack<RemoteIterator<LocatedFileStatus>> itors =
1789 new Stack<RemoteIterator<LocatedFileStatus>>();
1790 private RemoteIterator<LocatedFileStatus> curItor =
1791 listLocatedStatus(f);
1792 private LocatedFileStatus curFile;
1793
1794 @Override
1795 public boolean hasNext() throws IOException {
1796 while (curFile == null) {
1797 if (curItor.hasNext()) {
1798 handleFileStat(curItor.next());
1799 } else if (!itors.empty()) {
1800 curItor = itors.pop();
1801 } else {
1802 return false;
1803 }
1804 }
1805 return true;
1806 }
1807
1808 /**
1809 * Process the input stat.
1810 * If it is a file, return the file stat.
1811 * If it is a directory, traverse the directory if recursive is true;
1812 * ignore it if recursive is false.
1813 * @param stat input status
1814 * @throws IOException if any IO error occurs
1815 */
1816 private void handleFileStat(LocatedFileStatus stat) throws IOException {
1817 if (stat.isFile()) { // file
1818 curFile = stat;
1819 } else if (recursive) { // directory
1820 itors.push(curItor);
1821 curItor = listLocatedStatus(stat.getPath());
1822 }
1823 }
1824
1825 @Override
1826 public LocatedFileStatus next() throws IOException {
1827 if (hasNext()) {
1828 LocatedFileStatus result = curFile;
1829 curFile = null;
1830 return result;
1831 }
1832 throw new java.util.NoSuchElementException("No more entry in " + f);
1833 }
1834 };
1835 }
1836
1837 /** Return the current user's home directory in this filesystem.
1838 * The default implementation returns "/user/$USER/".
1839 */
1840 public Path getHomeDirectory() {
1841 return this.makeQualified(
1842 new Path("/user/"+System.getProperty("user.name")));
1843 }
1844
1845
1846 /**
1847 * Set the current working directory for the given file system. All relative
1848 * paths will be resolved relative to it.
1849 *
1850 * @param new_dir
1851 */
1852 public abstract void setWorkingDirectory(Path new_dir);
1853
1854 /**
1855 * Get the current working directory for the given file system
1856 * @return the directory pathname
1857 */
1858 public abstract Path getWorkingDirectory();
1859
1860
1861 /**
1862 * Note: with the new FilesContext class, getWorkingDirectory()
1863 * will be removed.
1864 * The working directory is implemented in FilesContext.
1865 *
1866 * Some file systems like LocalFileSystem have an initial workingDir
1867 * that we use as the starting workingDir. For other file systems
1868 * like HDFS there is no built in notion of an initial workingDir.
1869 *
1870 * @return if there is built in notion of workingDir then it
1871 * is returned; else a null is returned.
1872 */
1873 protected Path getInitialWorkingDirectory() {
1874 return null;
1875 }
1876
1877 /**
1878 * Call {@link #mkdirs(Path, FsPermission)} with default permission.
1879 */
1880 public boolean mkdirs(Path f) throws IOException {
1881 return mkdirs(f, FsPermission.getDirDefault());
1882 }
1883
1884 /**
1885 * Make the given file and all non-existent parents into
1886 * directories. Has the semantics of Unix 'mkdir -p'.
1887 * Existence of the directory hierarchy is not an error.
1888 * @param f path to create
1889 * @param permission to apply to f
1890 */
1891 public abstract boolean mkdirs(Path f, FsPermission permission
1892 ) throws IOException;
1893
1894 /**
1895 * The src file is on the local disk. Add it to FS at
1896 * the given dst name and the source is kept intact afterwards
1897 * @param src path
1898 * @param dst path
1899 */
1900 public void copyFromLocalFile(Path src, Path dst)
1901 throws IOException {
1902 copyFromLocalFile(false, src, dst);
1903 }
1904
1905 /**
1906 * The src files is on the local disk. Add it to FS at
1907 * the given dst name, removing the source afterwards.
1908 * @param srcs path
1909 * @param dst path
1910 */
1911 public void moveFromLocalFile(Path[] srcs, Path dst)
1912 throws IOException {
1913 copyFromLocalFile(true, true, srcs, dst);
1914 }
1915
1916 /**
1917 * The src file is on the local disk. Add it to FS at
1918 * the given dst name, removing the source afterwards.
1919 * @param src path
1920 * @param dst path
1921 */
1922 public void moveFromLocalFile(Path src, Path dst)
1923 throws IOException {
1924 copyFromLocalFile(true, src, dst);
1925 }
1926
1927 /**
1928 * The src file is on the local disk. Add it to FS at
1929 * the given dst name.
1930 * delSrc indicates if the source should be removed
1931 * @param delSrc whether to delete the src
1932 * @param src path
1933 * @param dst path
1934 */
1935 public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
1936 throws IOException {
1937 copyFromLocalFile(delSrc, true, src, dst);
1938 }
1939
1940 /**
1941 * The src files are on the local disk. Add it to FS at
1942 * the given dst name.
1943 * delSrc indicates if the source should be removed
1944 * @param delSrc whether to delete the src
1945 * @param overwrite whether to overwrite an existing file
1946 * @param srcs array of paths which are source
1947 * @param dst path
1948 */
1949 public void copyFromLocalFile(boolean delSrc, boolean overwrite,
1950 Path[] srcs, Path dst)
1951 throws IOException {
1952 Configuration conf = getConf();
1953 FileUtil.copy(getLocal(conf), srcs, this, dst, delSrc, overwrite, conf);
1954 }
1955
1956 /**
1957 * The src file is on the local disk. Add it to FS at
1958 * the given dst name.
1959 * delSrc indicates if the source should be removed
1960 * @param delSrc whether to delete the src
1961 * @param overwrite whether to overwrite an existing file
1962 * @param src path
1963 * @param dst path
1964 */
1965 public void copyFromLocalFile(boolean delSrc, boolean overwrite,
1966 Path src, Path dst)
1967 throws IOException {
1968 Configuration conf = getConf();
1969 FileUtil.copy(getLocal(conf), src, this, dst, delSrc, overwrite, conf);
1970 }
1971
1972 /**
1973 * The src file is under FS, and the dst is on the local disk.
1974 * Copy it from FS control to the local dst name.
1975 * @param src path
1976 * @param dst path
1977 */
1978 public void copyToLocalFile(Path src, Path dst) throws IOException {
1979 copyToLocalFile(false, src, dst);
1980 }
1981
1982 /**
1983 * The src file is under FS, and the dst is on the local disk.
1984 * Copy it from FS control to the local dst name.
1985 * Remove the source afterwards
1986 * @param src path
1987 * @param dst path
1988 */
1989 public void moveToLocalFile(Path src, Path dst) throws IOException {
1990 copyToLocalFile(true, src, dst);
1991 }
1992
1993 /**
1994 * The src file is under FS, and the dst is on the local disk.
1995 * Copy it from FS control to the local dst name.
1996 * delSrc indicates if the src will be removed or not.
1997 * @param delSrc whether to delete the src
1998 * @param src path
1999 * @param dst path
2000 */
2001 public void copyToLocalFile(boolean delSrc, Path src, Path dst)
2002 throws IOException {
2003 copyToLocalFile(delSrc, src, dst, false);
2004 }
2005
2006 /**
2007 * The src file is under FS, and the dst is on the local disk. Copy it from FS
2008 * control to the local dst name. delSrc indicates if the src will be removed
2009 * or not. useRawLocalFileSystem indicates whether to use RawLocalFileSystem
2010 * as local file system or not. RawLocalFileSystem is non crc file system.So,
2011 * It will not create any crc files at local.
2012 *
2013 * @param delSrc
2014 * whether to delete the src
2015 * @param src
2016 * path
2017 * @param dst
2018 * path
2019 * @param useRawLocalFileSystem
2020 * whether to use RawLocalFileSystem as local file system or not.
2021 *
2022 * @throws IOException
2023 * - if any IO error
2024 */
2025 public void copyToLocalFile(boolean delSrc, Path src, Path dst,
2026 boolean useRawLocalFileSystem) throws IOException {
2027 Configuration conf = getConf();
2028 FileSystem local = null;
2029 if (useRawLocalFileSystem) {
2030 local = getLocal(conf).getRawFileSystem();
2031 } else {
2032 local = getLocal(conf);
2033 }
2034 FileUtil.copy(this, src, local, dst, delSrc, conf);
2035 }
2036
2037 /**
2038 * Returns a local File that the user can write output to. The caller
2039 * provides both the eventual FS target name and the local working
2040 * file. If the FS is local, we write directly into the target. If
2041 * the FS is remote, we write into the tmp local area.
2042 * @param fsOutputFile path of output file
2043 * @param tmpLocalFile path of local tmp file
2044 */
2045 public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
2046 throws IOException {
2047 return tmpLocalFile;
2048 }
2049
2050 /**
2051 * Called when we're all done writing to the target. A local FS will
2052 * do nothing, because we've written to exactly the right place. A remote
2053 * FS will copy the contents of tmpLocalFile to the correct target at
2054 * fsOutputFile.
2055 * @param fsOutputFile path of output file
2056 * @param tmpLocalFile path to local tmp file
2057 */
2058 public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
2059 throws IOException {
2060 moveFromLocalFile(tmpLocalFile, fsOutputFile);
2061 }
2062
2063 /**
2064 * No more filesystem operations are needed. Will
2065 * release any held locks.
2066 */
2067 @Override
2068 public void close() throws IOException {
2069 // delete all files that were marked as delete-on-exit.
2070 processDeleteOnExit();
2071 CACHE.remove(this.key, this);
2072 }
2073
2074 /** Return the total size of all files in the filesystem.*/
2075 public long getUsed() throws IOException{
2076 long used = 0;
2077 FileStatus[] files = listStatus(new Path("/"));
2078 for(FileStatus file:files){
2079 used += file.getLen();
2080 }
2081 return used;
2082 }
2083
2084 /**
2085 * Get the block size for a particular file.
2086 * @param f the filename
2087 * @return the number of bytes in a block
2088 */
2089 /** @deprecated Use getFileStatus() instead */
2090 @Deprecated
2091 public long getBlockSize(Path f) throws IOException {
2092 return getFileStatus(f).getBlockSize();
2093 }
2094
2095 /**
2096 * Return the number of bytes that large input files should be optimally
2097 * be split into to minimize i/o time.
2098 * @deprecated use {@link #getDefaultBlockSize(Path)} instead
2099 */
2100 @Deprecated
2101 public long getDefaultBlockSize() {
2102 // default to 32MB: large enough to minimize the impact of seeks
2103 return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024);
2104 }
2105
2106 /** Return the number of bytes that large input files should be optimally
2107 * be split into to minimize i/o time. The given path will be used to
2108 * locate the actual filesystem. The full path does not have to exist.
2109 * @param f path of file
2110 * @return the default block size for the path's filesystem
2111 */
2112 public long getDefaultBlockSize(Path f) {
2113 return getDefaultBlockSize();
2114 }
2115
2116 /**
2117 * Get the default replication.
2118 * @deprecated use {@link #getDefaultReplication(Path)} instead
2119 */
2120 @Deprecated
2121 public short getDefaultReplication() { return 1; }
2122
2123 /**
2124 * Get the default replication for a path. The given path will be used to
2125 * locate the actual filesystem. The full path does not have to exist.
2126 * @param path of the file
2127 * @return default replication for the path's filesystem
2128 */
2129 public short getDefaultReplication(Path path) {
2130 return getDefaultReplication();
2131 }
2132
2133 /**
2134 * Return a file status object that represents the path.
2135 * @param f The path we want information from
2136 * @return a FileStatus object
2137 * @throws FileNotFoundException when the path does not exist;
2138 * IOException see specific implementation
2139 */
2140 public abstract FileStatus getFileStatus(Path f) throws IOException;
2141
2142 /**
2143 * Get the checksum of a file.
2144 *
2145 * @param f The file path
2146 * @return The file checksum. The default return value is null,
2147 * which indicates that no checksum algorithm is implemented
2148 * in the corresponding FileSystem.
2149 */
2150 public FileChecksum getFileChecksum(Path f) throws IOException {
2151 return null;
2152 }
2153
2154 /**
2155 * Set the verify checksum flag. This is only applicable if the
2156 * corresponding FileSystem supports checksum. By default doesn't do anything.
2157 * @param verifyChecksum
2158 */
2159 public void setVerifyChecksum(boolean verifyChecksum) {
2160 //doesn't do anything
2161 }
2162
2163 /**
2164 * Set the write checksum flag. This is only applicable if the
2165 * corresponding FileSystem supports checksum. By default doesn't do anything.
2166 * @param writeChecksum
2167 */
2168 public void setWriteChecksum(boolean writeChecksum) {
2169 //doesn't do anything
2170 }
2171
2172 /**
2173 * Returns a status object describing the use and capacity of the
2174 * file system. If the file system has multiple partitions, the
2175 * use and capacity of the root partition is reflected.
2176 *
2177 * @return a FsStatus object
2178 * @throws IOException
2179 * see specific implementation
2180 */
2181 public FsStatus getStatus() throws IOException {
2182 return getStatus(null);
2183 }
2184
2185 /**
2186 * Returns a status object describing the use and capacity of the
2187 * file system. If the file system has multiple partitions, the
2188 * use and capacity of the partition pointed to by the specified
2189 * path is reflected.
2190 * @param p Path for which status should be obtained. null means
2191 * the default partition.
2192 * @return a FsStatus object
2193 * @throws IOException
2194 * see specific implementation
2195 */
2196 public FsStatus getStatus(Path p) throws IOException {
2197 return new FsStatus(Long.MAX_VALUE, 0, Long.MAX_VALUE);
2198 }
2199
2200 /**
2201 * Set permission of a path.
2202 * @param p
2203 * @param permission
2204 */
2205 public void setPermission(Path p, FsPermission permission
2206 ) throws IOException {
2207 }
2208
2209 /**
2210 * Set owner of a path (i.e. a file or a directory).
2211 * The parameters username and groupname cannot both be null.
2212 * @param p The path
2213 * @param username If it is null, the original username remains unchanged.
2214 * @param groupname If it is null, the original groupname remains unchanged.
2215 */
2216 public void setOwner(Path p, String username, String groupname
2217 ) throws IOException {
2218 }
2219
2220 /**
2221 * Set access time of a file
2222 * @param p The path
2223 * @param mtime Set the modification time of this file.
2224 * The number of milliseconds since Jan 1, 1970.
2225 * A value of -1 means that this call should not set modification time.
2226 * @param atime Set the access time of this file.
2227 * The number of milliseconds since Jan 1, 1970.
2228 * A value of -1 means that this call should not set access time.
2229 */
2230 public void setTimes(Path p, long mtime, long atime
2231 ) throws IOException {
2232 }
2233
2234 // making it volatile to be able to do a double checked locking
2235 private volatile static boolean FILE_SYSTEMS_LOADED = false;
2236
2237 private static final Map<String, Class<? extends FileSystem>>
2238 SERVICE_FILE_SYSTEMS = new HashMap<String, Class<? extends FileSystem>>();
2239
2240 private static void loadFileSystems() {
2241 synchronized (FileSystem.class) {
2242 if (!FILE_SYSTEMS_LOADED) {
2243 ServiceLoader<FileSystem> serviceLoader = ServiceLoader.load(FileSystem.class);
2244 for (FileSystem fs : serviceLoader) {
2245 SERVICE_FILE_SYSTEMS.put(fs.getScheme(), fs.getClass());
2246 }
2247 FILE_SYSTEMS_LOADED = true;
2248 }
2249 }
2250 }
2251
2252 public static Class<? extends FileSystem> getFileSystemClass(String scheme,
2253 Configuration conf) throws IOException {
2254 if (!FILE_SYSTEMS_LOADED) {
2255 loadFileSystems();
2256 }
2257 Class<? extends FileSystem> clazz = null;
2258 if (conf != null) {
2259 clazz = (Class<? extends FileSystem>) conf.getClass("fs." + scheme + ".impl", null);
2260 }
2261 if (clazz == null) {
2262 clazz = SERVICE_FILE_SYSTEMS.get(scheme);
2263 }
2264 if (clazz == null) {
2265 throw new IOException("No FileSystem for scheme: " + scheme);
2266 }
2267 return clazz;
2268 }
2269
2270 private static FileSystem createFileSystem(URI uri, Configuration conf
2271 ) throws IOException {
2272 Class<?> clazz = getFileSystemClass(uri.getScheme(), conf);
2273 if (clazz == null) {
2274 throw new IOException("No FileSystem for scheme: " + uri.getScheme());
2275 }
2276 FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf);
2277 fs.initialize(uri, conf);
2278 return fs;
2279 }
2280
2281 /** Caching FileSystem objects */
2282 static class Cache {
2283 private final ClientFinalizer clientFinalizer = new ClientFinalizer();
2284
2285 private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>();
2286 private final Set<Key> toAutoClose = new HashSet<Key>();
2287
2288 /** A variable that makes all objects in the cache unique */
2289 private static AtomicLong unique = new AtomicLong(1);
2290
2291 FileSystem get(URI uri, Configuration conf) throws IOException{
2292 Key key = new Key(uri, conf);
2293 return getInternal(uri, conf, key);
2294 }
2295
2296 /** The objects inserted into the cache using this method are all unique */
2297 FileSystem getUnique(URI uri, Configuration conf) throws IOException{
2298 Key key = new Key(uri, conf, unique.getAndIncrement());
2299 return getInternal(uri, conf, key);
2300 }
2301
2302 private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{
2303 FileSystem fs;
2304 synchronized (this) {
2305 fs = map.get(key);
2306 }
2307 if (fs != null) {
2308 return fs;
2309 }
2310
2311 fs = createFileSystem(uri, conf);
2312 synchronized (this) { // refetch the lock again
2313 FileSystem oldfs = map.get(key);
2314 if (oldfs != null) { // a file system is created while lock is releasing
2315 fs.close(); // close the new file system
2316 return oldfs; // return the old file system
2317 }
2318
2319 // now insert the new file system into the map
2320 if (map.isEmpty() ) {
2321 ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY);
2322 }
2323 fs.key = key;
2324 map.put(key, fs);
2325 if (conf.getBoolean("fs.automatic.close", true)) {
2326 toAutoClose.add(key);
2327 }
2328 return fs;
2329 }
2330 }
2331
2332 synchronized void remove(Key key, FileSystem fs) {
2333 if (map.containsKey(key) && fs == map.get(key)) {
2334 map.remove(key);
2335 toAutoClose.remove(key);
2336 }
2337 }
2338
2339 synchronized void closeAll() throws IOException {
2340 closeAll(false);
2341 }
2342
2343 /**
2344 * Close all FileSystem instances in the Cache.
2345 * @param onlyAutomatic only close those that are marked for automatic closing
2346 */
2347 synchronized void closeAll(boolean onlyAutomatic) throws IOException {
2348 List<IOException> exceptions = new ArrayList<IOException>();
2349
2350 // Make a copy of the keys in the map since we'll be modifying
2351 // the map while iterating over it, which isn't safe.
2352 List<Key> keys = new ArrayList<Key>();
2353 keys.addAll(map.keySet());
2354
2355 for (Key key : keys) {
2356 final FileSystem fs = map.get(key);
2357
2358 if (onlyAutomatic && !toAutoClose.contains(key)) {
2359 continue;
2360 }
2361
2362 //remove from cache
2363 remove(key, fs);
2364
2365 if (fs != null) {
2366 try {
2367 fs.close();
2368 }
2369 catch(IOException ioe) {
2370 exceptions.add(ioe);
2371 }
2372 }
2373 }
2374
2375 if (!exceptions.isEmpty()) {
2376 throw MultipleIOException.createIOException(exceptions);
2377 }
2378 }
2379
2380 private class ClientFinalizer implements Runnable {
2381 @Override
2382 public synchronized void run() {
2383 try {
2384 closeAll(true);
2385 } catch (IOException e) {
2386 LOG.info("FileSystem.Cache.closeAll() threw an exception:\n" + e);
2387 }
2388 }
2389 }
2390
2391 synchronized void closeAll(UserGroupInformation ugi) throws IOException {
2392 List<FileSystem> targetFSList = new ArrayList<FileSystem>();
2393 //Make a pass over the list and collect the filesystems to close
2394 //we cannot close inline since close() removes the entry from the Map
2395 for (Map.Entry<Key, FileSystem> entry : map.entrySet()) {
2396 final Key key = entry.getKey();
2397 final FileSystem fs = entry.getValue();
2398 if (ugi.equals(key.ugi) && fs != null) {
2399 targetFSList.add(fs);
2400 }
2401 }
2402 List<IOException> exceptions = new ArrayList<IOException>();
2403 //now make a pass over the target list and close each
2404 for (FileSystem fs : targetFSList) {
2405 try {
2406 fs.close();
2407 }
2408 catch(IOException ioe) {
2409 exceptions.add(ioe);
2410 }
2411 }
2412 if (!exceptions.isEmpty()) {
2413 throw MultipleIOException.createIOException(exceptions);
2414 }
2415 }
2416
2417 /** FileSystem.Cache.Key */
2418 static class Key {
2419 final String scheme;
2420 final String authority;
2421 final UserGroupInformation ugi;
2422 final long unique; // an artificial way to make a key unique
2423
2424 Key(URI uri, Configuration conf) throws IOException {
2425 this(uri, conf, 0);
2426 }
2427
2428 Key(URI uri, Configuration conf, long unique) throws IOException {
2429 scheme = uri.getScheme()==null?"":uri.getScheme().toLowerCase();
2430 authority = uri.getAuthority()==null?"":uri.getAuthority().toLowerCase();
2431 this.unique = unique;
2432
2433 this.ugi = UserGroupInformation.getCurrentUser();
2434 }
2435
2436 @Override
2437 public int hashCode() {
2438 return (scheme + authority).hashCode() + ugi.hashCode() + (int)unique;
2439 }
2440
2441 static boolean isEqual(Object a, Object b) {
2442 return a == b || (a != null && a.equals(b));
2443 }
2444
2445 @Override
2446 public boolean equals(Object obj) {
2447 if (obj == this) {
2448 return true;
2449 }
2450 if (obj != null && obj instanceof Key) {
2451 Key that = (Key)obj;
2452 return isEqual(this.scheme, that.scheme)
2453 && isEqual(this.authority, that.authority)
2454 && isEqual(this.ugi, that.ugi)
2455 && (this.unique == that.unique);
2456 }
2457 return false;
2458 }
2459
2460 @Override
2461 public String toString() {
2462 return "("+ugi.toString() + ")@" + scheme + "://" + authority;
2463 }
2464 }
2465 }
2466
2467 public static final class Statistics {
2468 private final String scheme;
2469 private AtomicLong bytesRead = new AtomicLong();
2470 private AtomicLong bytesWritten = new AtomicLong();
2471 private AtomicInteger readOps = new AtomicInteger();
2472 private AtomicInteger largeReadOps = new AtomicInteger();
2473 private AtomicInteger writeOps = new AtomicInteger();
2474
2475 public Statistics(String scheme) {
2476 this.scheme = scheme;
2477 }
2478
2479 /**
2480 * Copy constructor.
2481 *
2482 * @param st
2483 * The input Statistics object which is cloned.
2484 */
2485 public Statistics(Statistics st) {
2486 this.scheme = st.scheme;
2487 this.bytesRead = new AtomicLong(st.bytesRead.longValue());
2488 this.bytesWritten = new AtomicLong(st.bytesWritten.longValue());
2489 }
2490
2491 /**
2492 * Increment the bytes read in the statistics
2493 * @param newBytes the additional bytes read
2494 */
2495 public void incrementBytesRead(long newBytes) {
2496 bytesRead.getAndAdd(newBytes);
2497 }
2498
2499 /**
2500 * Increment the bytes written in the statistics
2501 * @param newBytes the additional bytes written
2502 */
2503 public void incrementBytesWritten(long newBytes) {
2504 bytesWritten.getAndAdd(newBytes);
2505 }
2506
2507 /**
2508 * Increment the number of read operations
2509 * @param count number of read operations
2510 */
2511 public void incrementReadOps(int count) {
2512 readOps.getAndAdd(count);
2513 }
2514
2515 /**
2516 * Increment the number of large read operations
2517 * @param count number of large read operations
2518 */
2519 public void incrementLargeReadOps(int count) {
2520 largeReadOps.getAndAdd(count);
2521 }
2522
2523 /**
2524 * Increment the number of write operations
2525 * @param count number of write operations
2526 */
2527 public void incrementWriteOps(int count) {
2528 writeOps.getAndAdd(count);
2529 }
2530
2531 /**
2532 * Get the total number of bytes read
2533 * @return the number of bytes
2534 */
2535 public long getBytesRead() {
2536 return bytesRead.get();
2537 }
2538
2539 /**
2540 * Get the total number of bytes written
2541 * @return the number of bytes
2542 */
2543 public long getBytesWritten() {
2544 return bytesWritten.get();
2545 }
2546
2547 /**
2548 * Get the number of file system read operations such as list files
2549 * @return number of read operations
2550 */
2551 public int getReadOps() {
2552 return readOps.get() + largeReadOps.get();
2553 }
2554
2555 /**
2556 * Get the number of large file system read operations such as list files
2557 * under a large directory
2558 * @return number of large read operations
2559 */
2560 public int getLargeReadOps() {
2561 return largeReadOps.get();
2562 }
2563
2564 /**
2565 * Get the number of file system write operations such as create, append
2566 * rename etc.
2567 * @return number of write operations
2568 */
2569 public int getWriteOps() {
2570 return writeOps.get();
2571 }
2572
2573 @Override
2574 public String toString() {
2575 return bytesRead + " bytes read, " + bytesWritten + " bytes written, "
2576 + readOps + " read ops, " + largeReadOps + " large read ops, "
2577 + writeOps + " write ops";
2578 }
2579
2580 /**
2581 * Reset the counts of bytes to 0.
2582 */
2583 public void reset() {
2584 bytesWritten.set(0);
2585 bytesRead.set(0);
2586 }
2587
2588 /**
2589 * Get the uri scheme associated with this statistics object.
2590 * @return the schema associated with this set of statistics
2591 */
2592 public String getScheme() {
2593 return scheme;
2594 }
2595 }
2596
2597 /**
2598 * Get the Map of Statistics object indexed by URI Scheme.
2599 * @return a Map having a key as URI scheme and value as Statistics object
2600 * @deprecated use {@link #getAllStatistics} instead
2601 */
2602 @Deprecated
2603 public static synchronized Map<String, Statistics> getStatistics() {
2604 Map<String, Statistics> result = new HashMap<String, Statistics>();
2605 for(Statistics stat: statisticsTable.values()) {
2606 result.put(stat.getScheme(), stat);
2607 }
2608 return result;
2609 }
2610
2611 /**
2612 * Return the FileSystem classes that have Statistics
2613 */
2614 public static synchronized List<Statistics> getAllStatistics() {
2615 return new ArrayList<Statistics>(statisticsTable.values());
2616 }
2617
2618 /**
2619 * Get the statistics for a particular file system
2620 * @param cls the class to lookup
2621 * @return a statistics object
2622 */
2623 public static synchronized
2624 Statistics getStatistics(String scheme, Class<? extends FileSystem> cls) {
2625 Statistics result = statisticsTable.get(cls);
2626 if (result == null) {
2627 result = new Statistics(scheme);
2628 statisticsTable.put(cls, result);
2629 }
2630 return result;
2631 }
2632
2633 /**
2634 * Reset all statistics for all file systems
2635 */
2636 public static synchronized void clearStatistics() {
2637 for(Statistics stat: statisticsTable.values()) {
2638 stat.reset();
2639 }
2640 }
2641
2642 /**
2643 * Print all statistics for all file systems
2644 */
2645 public static synchronized
2646 void printStatistics() throws IOException {
2647 for (Map.Entry<Class<? extends FileSystem>, Statistics> pair:
2648 statisticsTable.entrySet()) {
2649 System.out.println(" FileSystem " + pair.getKey().getName() +
2650 ": " + pair.getValue());
2651 }
2652 }
2653 }