001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.fs;
019
020 import java.io.Closeable;
021 import java.io.FileNotFoundException;
022 import java.io.IOException;
023 import java.net.URI;
024 import java.security.PrivilegedExceptionAction;
025 import java.util.ArrayList;
026 import java.util.Arrays;
027 import java.util.EnumSet;
028 import java.util.HashMap;
029 import java.util.HashSet;
030 import java.util.IdentityHashMap;
031 import java.util.Iterator;
032 import java.util.List;
033 import java.util.Map;
034 import java.util.NoSuchElementException;
035 import java.util.Set;
036 import java.util.Stack;
037 import java.util.TreeSet;
038 import java.util.concurrent.atomic.AtomicInteger;
039 import java.util.concurrent.atomic.AtomicLong;
040
041 import org.apache.commons.logging.Log;
042 import org.apache.commons.logging.LogFactory;
043 import org.apache.hadoop.classification.InterfaceAudience;
044 import org.apache.hadoop.classification.InterfaceStability;
045 import org.apache.hadoop.conf.Configuration;
046 import org.apache.hadoop.conf.Configured;
047 import org.apache.hadoop.fs.Options.ChecksumOpt;
048 import org.apache.hadoop.fs.Options.Rename;
049 import org.apache.hadoop.fs.permission.FsPermission;
050 import org.apache.hadoop.io.MultipleIOException;
051 import org.apache.hadoop.io.Text;
052 import org.apache.hadoop.net.NetUtils;
053 import org.apache.hadoop.security.Credentials;
054 import org.apache.hadoop.security.SecurityUtil;
055 import org.apache.hadoop.security.UserGroupInformation;
056 import org.apache.hadoop.security.token.Token;
057 import org.apache.hadoop.util.DataChecksum;
058 import org.apache.hadoop.util.Progressable;
059 import org.apache.hadoop.util.ReflectionUtils;
060 import org.apache.hadoop.util.ShutdownHookManager;
061
062 import com.google.common.annotations.VisibleForTesting;
063
064 /****************************************************************
065 * An abstract base class for a fairly generic filesystem. It
066 * may be implemented as a distributed filesystem, or as a "local"
067 * one that reflects the locally-connected disk. The local version
068 * exists for small Hadoop instances and for testing.
069 *
070 * <p>
071 *
072 * All user code that may potentially use the Hadoop Distributed
073 * File System should be written to use a FileSystem object. The
074 * Hadoop DFS is a multi-machine system that appears as a single
075 * disk. It's useful because of its fault tolerance and potentially
076 * very large capacity.
077 *
078 * <p>
079 * The local implementation is {@link LocalFileSystem} and distributed
080 * implementation is DistributedFileSystem.
081 *****************************************************************/
082 @InterfaceAudience.Public
083 @InterfaceStability.Stable
084 public abstract class FileSystem extends Configured implements Closeable {
085 public static final String FS_DEFAULT_NAME_KEY =
086 CommonConfigurationKeys.FS_DEFAULT_NAME_KEY;
087 public static final String DEFAULT_FS =
088 CommonConfigurationKeys.FS_DEFAULT_NAME_DEFAULT;
089
090 public static final Log LOG = LogFactory.getLog(FileSystem.class);
091
092 /**
093 * Priority of the FileSystem shutdown hook.
094 */
095 public static final int SHUTDOWN_HOOK_PRIORITY = 10;
096
097 /** FileSystem cache */
098 static final Cache CACHE = new Cache();
099
100 /** The key this instance is stored under in the cache. */
101 private Cache.Key key;
102
103 /** Recording statistics per a FileSystem class */
104 private static final Map<Class<? extends FileSystem>, Statistics>
105 statisticsTable =
106 new IdentityHashMap<Class<? extends FileSystem>, Statistics>();
107
108 /**
109 * The statistics for this file system.
110 */
111 protected Statistics statistics;
112
113 /**
114 * A cache of files that should be deleted when filsystem is closed
115 * or the JVM is exited.
116 */
117 private Set<Path> deleteOnExit = new TreeSet<Path>();
118
119 /**
120 * This method adds a file system for testing so that we can find it later. It
121 * is only for testing.
122 * @param uri the uri to store it under
123 * @param conf the configuration to store it under
124 * @param fs the file system to store
125 * @throws IOException
126 */
127 static void addFileSystemForTesting(URI uri, Configuration conf,
128 FileSystem fs) throws IOException {
129 CACHE.map.put(new Cache.Key(uri, conf), fs);
130 }
131
132 /**
133 * Get a filesystem instance based on the uri, the passed
134 * configuration and the user
135 * @param uri of the filesystem
136 * @param conf the configuration to use
137 * @param user to perform the get as
138 * @return the filesystem instance
139 * @throws IOException
140 * @throws InterruptedException
141 */
142 public static FileSystem get(final URI uri, final Configuration conf,
143 final String user) throws IOException, InterruptedException {
144 UserGroupInformation ugi;
145 if (user == null) {
146 ugi = UserGroupInformation.getCurrentUser();
147 } else {
148 ugi = UserGroupInformation.createRemoteUser(user);
149 }
150 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
151 public FileSystem run() throws IOException {
152 return get(uri, conf);
153 }
154 });
155 }
156
157 /**
158 * Returns the configured filesystem implementation.
159 * @param conf the configuration to use
160 */
161 public static FileSystem get(Configuration conf) throws IOException {
162 return get(getDefaultUri(conf), conf);
163 }
164
165 /** Get the default filesystem URI from a configuration.
166 * @param conf the configuration to use
167 * @return the uri of the default filesystem
168 */
169 public static URI getDefaultUri(Configuration conf) {
170 return URI.create(fixName(conf.get(FS_DEFAULT_NAME_KEY, DEFAULT_FS)));
171 }
172
173 /** Set the default filesystem URI in a configuration.
174 * @param conf the configuration to alter
175 * @param uri the new default filesystem uri
176 */
177 public static void setDefaultUri(Configuration conf, URI uri) {
178 conf.set(FS_DEFAULT_NAME_KEY, uri.toString());
179 }
180
181 /** Set the default filesystem URI in a configuration.
182 * @param conf the configuration to alter
183 * @param uri the new default filesystem uri
184 */
185 public static void setDefaultUri(Configuration conf, String uri) {
186 setDefaultUri(conf, URI.create(fixName(uri)));
187 }
188
189 /** Called after a new FileSystem instance is constructed.
190 * @param name a uri whose authority section names the host, port, etc.
191 * for this FileSystem
192 * @param conf the configuration
193 */
194 public void initialize(URI name, Configuration conf) throws IOException {
195 statistics = getStatistics(name.getScheme(), getClass());
196 }
197
198 /** Returns a URI whose scheme and authority identify this FileSystem.*/
199 public abstract URI getUri();
200
201 /**
202 * Resolve the uri's hostname and add the default port if not in the uri
203 * @return URI
204 * @see NetUtils#getCanonicalUri(URI, int)
205 */
206 protected URI getCanonicalUri() {
207 return NetUtils.getCanonicalUri(getUri(), getDefaultPort());
208 }
209
210 /**
211 * Get the default port for this file system.
212 * @return the default port or 0 if there isn't one
213 */
214 protected int getDefaultPort() {
215 return 0;
216 }
217
218 /**
219 * Get a canonical service name for this file system. The token cache is
220 * the only user of the canonical service name, and uses it to lookup this
221 * filesystem's service tokens.
222 * If file system provides a token of its own then it must have a canonical
223 * name, otherwise canonical name can be null.
224 *
225 * Default Impl: If the file system has child file systems
226 * (such as an embedded file system) then it is assumed that the fs has no
227 * tokens of its own and hence returns a null name; otherwise a service
228 * name is built using Uri and port.
229 *
230 * @return a service string that uniquely identifies this file system, null
231 * if the filesystem does not implement tokens
232 * @see SecurityUtil#buildDTServiceName(URI, int)
233 */
234 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" })
235 public String getCanonicalServiceName() {
236 return (getChildFileSystems() == null)
237 ? SecurityUtil.buildDTServiceName(getUri(), getDefaultPort())
238 : null;
239 }
240
241 /** @deprecated call #getUri() instead.*/
242 @Deprecated
243 public String getName() { return getUri().toString(); }
244
245 /** @deprecated call #get(URI,Configuration) instead. */
246 @Deprecated
247 public static FileSystem getNamed(String name, Configuration conf)
248 throws IOException {
249 return get(URI.create(fixName(name)), conf);
250 }
251
252 /** Update old-format filesystem names, for back-compatibility. This should
253 * eventually be replaced with a checkName() method that throws an exception
254 * for old-format names. */
255 private static String fixName(String name) {
256 // convert old-format name to new-format name
257 if (name.equals("local")) { // "local" is now "file:///".
258 LOG.warn("\"local\" is a deprecated filesystem name."
259 +" Use \"file:///\" instead.");
260 name = "file:///";
261 } else if (name.indexOf('/')==-1) { // unqualified is "hdfs://"
262 LOG.warn("\""+name+"\" is a deprecated filesystem name."
263 +" Use \"hdfs://"+name+"/\" instead.");
264 name = "hdfs://"+name;
265 }
266 return name;
267 }
268
269 /**
270 * Get the local file system.
271 * @param conf the configuration to configure the file system with
272 * @return a LocalFileSystem
273 */
274 public static LocalFileSystem getLocal(Configuration conf)
275 throws IOException {
276 return (LocalFileSystem)get(LocalFileSystem.NAME, conf);
277 }
278
279 /** Returns the FileSystem for this URI's scheme and authority. The scheme
280 * of the URI determines a configuration property name,
281 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class.
282 * The entire URI is passed to the FileSystem instance's initialize method.
283 */
284 public static FileSystem get(URI uri, Configuration conf) throws IOException {
285 String scheme = uri.getScheme();
286 String authority = uri.getAuthority();
287
288 if (scheme == null && authority == null) { // use default FS
289 return get(conf);
290 }
291
292 if (scheme != null && authority == null) { // no authority
293 URI defaultUri = getDefaultUri(conf);
294 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default
295 && defaultUri.getAuthority() != null) { // & default has authority
296 return get(defaultUri, conf); // return default
297 }
298 }
299
300 String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme);
301 if (conf.getBoolean(disableCacheName, false)) {
302 return createFileSystem(uri, conf);
303 }
304
305 return CACHE.get(uri, conf);
306 }
307
308 /**
309 * Returns the FileSystem for this URI's scheme and authority and the
310 * passed user. Internally invokes {@link #newInstance(URI, Configuration)}
311 * @param uri of the filesystem
312 * @param conf the configuration to use
313 * @param user to perform the get as
314 * @return filesystem instance
315 * @throws IOException
316 * @throws InterruptedException
317 */
318 public static FileSystem newInstance(final URI uri, final Configuration conf,
319 final String user) throws IOException, InterruptedException {
320 UserGroupInformation ugi;
321 if (user == null) {
322 ugi = UserGroupInformation.getCurrentUser();
323 } else {
324 ugi = UserGroupInformation.createRemoteUser(user);
325 }
326 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
327 public FileSystem run() throws IOException {
328 return newInstance(uri,conf);
329 }
330 });
331 }
332 /** Returns the FileSystem for this URI's scheme and authority. The scheme
333 * of the URI determines a configuration property name,
334 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class.
335 * The entire URI is passed to the FileSystem instance's initialize method.
336 * This always returns a new FileSystem object.
337 */
338 public static FileSystem newInstance(URI uri, Configuration conf) throws IOException {
339 String scheme = uri.getScheme();
340 String authority = uri.getAuthority();
341
342 if (scheme == null) { // no scheme: use default FS
343 return newInstance(conf);
344 }
345
346 if (authority == null) { // no authority
347 URI defaultUri = getDefaultUri(conf);
348 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default
349 && defaultUri.getAuthority() != null) { // & default has authority
350 return newInstance(defaultUri, conf); // return default
351 }
352 }
353 return CACHE.getUnique(uri, conf);
354 }
355
356 /** Returns a unique configured filesystem implementation.
357 * This always returns a new FileSystem object.
358 * @param conf the configuration to use
359 */
360 public static FileSystem newInstance(Configuration conf) throws IOException {
361 return newInstance(getDefaultUri(conf), conf);
362 }
363
364 /**
365 * Get a unique local file system object
366 * @param conf the configuration to configure the file system with
367 * @return a LocalFileSystem
368 * This always returns a new FileSystem object.
369 */
370 public static LocalFileSystem newInstanceLocal(Configuration conf)
371 throws IOException {
372 return (LocalFileSystem)newInstance(LocalFileSystem.NAME, conf);
373 }
374
375 /**
376 * Close all cached filesystems. Be sure those filesystems are not
377 * used anymore.
378 *
379 * @throws IOException
380 */
381 public static void closeAll() throws IOException {
382 CACHE.closeAll();
383 }
384
385 /**
386 * Close all cached filesystems for a given UGI. Be sure those filesystems
387 * are not used anymore.
388 * @param ugi user group info to close
389 * @throws IOException
390 */
391 public static void closeAllForUGI(UserGroupInformation ugi)
392 throws IOException {
393 CACHE.closeAll(ugi);
394 }
395
396 /**
397 * Make sure that a path specifies a FileSystem.
398 * @param path to use
399 */
400 public Path makeQualified(Path path) {
401 checkPath(path);
402 return path.makeQualified(this.getUri(), this.getWorkingDirectory());
403 }
404
405 /**
406 * Get a new delegation token for this file system.
407 * This is an internal method that should have been declared protected
408 * but wasn't historically.
409 * Callers should use {@link #addDelegationTokens(String, Credentials)}
410 *
411 * @param renewer the account name that is allowed to renew the token.
412 * @return a new delegation token
413 * @throws IOException
414 */
415 @InterfaceAudience.Private()
416 public Token<?> getDelegationToken(String renewer) throws IOException {
417 return null;
418 }
419
420 /**
421 * Obtain all delegation tokens used by this FileSystem that are not
422 * already present in the given Credentials. Existing tokens will neither
423 * be verified as valid nor having the given renewer. Missing tokens will
424 * be acquired and added to the given Credentials.
425 *
426 * Default Impl: works for simple fs with its own token
427 * and also for an embedded fs whose tokens are those of its
428 * children file system (i.e. the embedded fs has not tokens of its
429 * own).
430 *
431 * @param renewer the user allowed to renew the delegation tokens
432 * @param credentials cache in which to add new delegation tokens
433 * @return list of new delegation tokens
434 * @throws IOException
435 */
436 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" })
437 public Token<?>[] addDelegationTokens(
438 final String renewer, Credentials credentials) throws IOException {
439 if (credentials == null) {
440 credentials = new Credentials();
441 }
442 final List<Token<?>> tokens = new ArrayList<Token<?>>();
443 collectDelegationTokens(renewer, credentials, tokens);
444 return tokens.toArray(new Token<?>[tokens.size()]);
445 }
446
447 /**
448 * Recursively obtain the tokens for this FileSystem and all descended
449 * FileSystems as determined by getChildFileSystems().
450 * @param renewer the user allowed to renew the delegation tokens
451 * @param credentials cache in which to add the new delegation tokens
452 * @param tokens list in which to add acquired tokens
453 * @throws IOException
454 */
455 private void collectDelegationTokens(final String renewer,
456 final Credentials credentials,
457 final List<Token<?>> tokens)
458 throws IOException {
459 final String serviceName = getCanonicalServiceName();
460 // Collect token of the this filesystem and then of its embedded children
461 if (serviceName != null) { // fs has token, grab it
462 final Text service = new Text(serviceName);
463 Token<?> token = credentials.getToken(service);
464 if (token == null) {
465 token = getDelegationToken(renewer);
466 if (token != null) {
467 tokens.add(token);
468 credentials.addToken(service, token);
469 }
470 }
471 }
472 // Now collect the tokens from the children
473 final FileSystem[] children = getChildFileSystems();
474 if (children != null) {
475 for (final FileSystem fs : children) {
476 fs.collectDelegationTokens(renewer, credentials, tokens);
477 }
478 }
479 }
480
481 /**
482 * Get all the immediate child FileSystems embedded in this FileSystem.
483 * It does not recurse and get grand children. If a FileSystem
484 * has multiple child FileSystems, then it should return a unique list
485 * of those FileSystems. Default is to return null to signify no children.
486 *
487 * @return FileSystems used by this FileSystem
488 */
489 @InterfaceAudience.LimitedPrivate({ "HDFS" })
490 @VisibleForTesting
491 public FileSystem[] getChildFileSystems() {
492 return null;
493 }
494
495 /** create a file with the provided permission
496 * The permission of the file is set to be the provided permission as in
497 * setPermission, not permission&~umask
498 *
499 * It is implemented using two RPCs. It is understood that it is inefficient,
500 * but the implementation is thread-safe. The other option is to change the
501 * value of umask in configuration to be 0, but it is not thread-safe.
502 *
503 * @param fs file system handle
504 * @param file the name of the file to be created
505 * @param permission the permission of the file
506 * @return an output stream
507 * @throws IOException
508 */
509 public static FSDataOutputStream create(FileSystem fs,
510 Path file, FsPermission permission) throws IOException {
511 // create the file with default permission
512 FSDataOutputStream out = fs.create(file);
513 // set its permission to the supplied one
514 fs.setPermission(file, permission);
515 return out;
516 }
517
518 /** create a directory with the provided permission
519 * The permission of the directory is set to be the provided permission as in
520 * setPermission, not permission&~umask
521 *
522 * @see #create(FileSystem, Path, FsPermission)
523 *
524 * @param fs file system handle
525 * @param dir the name of the directory to be created
526 * @param permission the permission of the directory
527 * @return true if the directory creation succeeds; false otherwise
528 * @throws IOException
529 */
530 public static boolean mkdirs(FileSystem fs, Path dir, FsPermission permission)
531 throws IOException {
532 // create the directory using the default permission
533 boolean result = fs.mkdirs(dir);
534 // set its permission to be the supplied one
535 fs.setPermission(dir, permission);
536 return result;
537 }
538
539 ///////////////////////////////////////////////////////////////
540 // FileSystem
541 ///////////////////////////////////////////////////////////////
542
543 protected FileSystem() {
544 super(null);
545 }
546
547 /**
548 * Check that a Path belongs to this FileSystem.
549 * @param path to check
550 */
551 protected void checkPath(Path path) {
552 URI uri = path.toUri();
553 String thatScheme = uri.getScheme();
554 if (thatScheme == null) // fs is relative
555 return;
556 URI thisUri = getCanonicalUri();
557 String thisScheme = thisUri.getScheme();
558 //authority and scheme are not case sensitive
559 if (thisScheme.equalsIgnoreCase(thatScheme)) {// schemes match
560 String thisAuthority = thisUri.getAuthority();
561 String thatAuthority = uri.getAuthority();
562 if (thatAuthority == null && // path's authority is null
563 thisAuthority != null) { // fs has an authority
564 URI defaultUri = getDefaultUri(getConf());
565 if (thisScheme.equalsIgnoreCase(defaultUri.getScheme())) {
566 uri = defaultUri; // schemes match, so use this uri instead
567 } else {
568 uri = null; // can't determine auth of the path
569 }
570 }
571 if (uri != null) {
572 // canonicalize uri before comparing with this fs
573 uri = NetUtils.getCanonicalUri(uri, getDefaultPort());
574 thatAuthority = uri.getAuthority();
575 if (thisAuthority == thatAuthority || // authorities match
576 (thisAuthority != null &&
577 thisAuthority.equalsIgnoreCase(thatAuthority)))
578 return;
579 }
580 }
581 throw new IllegalArgumentException("Wrong FS: "+path+
582 ", expected: "+this.getUri());
583 }
584
585 /**
586 * Return an array containing hostnames, offset and size of
587 * portions of the given file. For a nonexistent
588 * file or regions, null will be returned.
589 *
590 * This call is most helpful with DFS, where it returns
591 * hostnames of machines that contain the given file.
592 *
593 * The FileSystem will simply return an elt containing 'localhost'.
594 *
595 * @param file FilesStatus to get data from
596 * @param start offset into the given file
597 * @param len length for which to get locations for
598 */
599 public BlockLocation[] getFileBlockLocations(FileStatus file,
600 long start, long len) throws IOException {
601 if (file == null) {
602 return null;
603 }
604
605 if (start < 0 || len < 0) {
606 throw new IllegalArgumentException("Invalid start or len parameter");
607 }
608
609 if (file.getLen() <= start) {
610 return new BlockLocation[0];
611
612 }
613 String[] name = { "localhost:50010" };
614 String[] host = { "localhost" };
615 return new BlockLocation[] {
616 new BlockLocation(name, host, 0, file.getLen()) };
617 }
618
619
620 /**
621 * Return an array containing hostnames, offset and size of
622 * portions of the given file. For a nonexistent
623 * file or regions, null will be returned.
624 *
625 * This call is most helpful with DFS, where it returns
626 * hostnames of machines that contain the given file.
627 *
628 * The FileSystem will simply return an elt containing 'localhost'.
629 *
630 * @param p path is used to identify an FS since an FS could have
631 * another FS that it could be delegating the call to
632 * @param start offset into the given file
633 * @param len length for which to get locations for
634 */
635 public BlockLocation[] getFileBlockLocations(Path p,
636 long start, long len) throws IOException {
637 if (p == null) {
638 throw new NullPointerException();
639 }
640 FileStatus file = getFileStatus(p);
641 return getFileBlockLocations(file, start, len);
642 }
643
644 /**
645 * Return a set of server default configuration values
646 * @return server default configuration values
647 * @throws IOException
648 */
649 public FsServerDefaults getServerDefaults() throws IOException {
650 Configuration conf = getConf();
651 // CRC32 is chosen as default as it is available in all
652 // releases that support checksum.
653 return new FsServerDefaults(getDefaultBlockSize(),
654 conf.getInt("io.bytes.per.checksum", 512),
655 64 * 1024,
656 getDefaultReplication(),
657 conf.getInt("io.file.buffer.size", 4096),
658 DataChecksum.Type.CRC32);
659 }
660
661 /**
662 * Return a set of server default configuration values
663 * @param p path is used to identify an FS since an FS could have
664 * another FS that it could be delegating the call to
665 * @return server default configuration values
666 * @throws IOException
667 */
668 public FsServerDefaults getServerDefaults(Path p) throws IOException {
669 return getServerDefaults();
670 }
671
672 /**
673 * Return the fully-qualified path of path f resolving the path
674 * through any symlinks or mount point
675 * @param p path to be resolved
676 * @return fully qualified path
677 * @throws FileNotFoundException
678 */
679 public Path resolvePath(final Path p) throws IOException {
680 checkPath(p);
681 return getFileStatus(p).getPath();
682 }
683
684 /**
685 * Opens an FSDataInputStream at the indicated Path.
686 * @param f the file name to open
687 * @param bufferSize the size of the buffer to be used.
688 */
689 public abstract FSDataInputStream open(Path f, int bufferSize)
690 throws IOException;
691
692 /**
693 * Opens an FSDataInputStream at the indicated Path.
694 * @param f the file to open
695 */
696 public FSDataInputStream open(Path f) throws IOException {
697 return open(f, getConf().getInt("io.file.buffer.size", 4096));
698 }
699
700 /**
701 * Create an FSDataOutputStream at the indicated Path.
702 * Files are overwritten by default.
703 * @param f the file to create
704 */
705 public FSDataOutputStream create(Path f) throws IOException {
706 return create(f, true);
707 }
708
709 /**
710 * Create an FSDataOutputStream at the indicated Path.
711 * @param f the file to create
712 * @param overwrite if a file with this name already exists, then if true,
713 * the file will be overwritten, and if false an exception will be thrown.
714 */
715 public FSDataOutputStream create(Path f, boolean overwrite)
716 throws IOException {
717 return create(f, overwrite,
718 getConf().getInt("io.file.buffer.size", 4096),
719 getDefaultReplication(f),
720 getDefaultBlockSize(f));
721 }
722
723 /**
724 * Create an FSDataOutputStream at the indicated Path with write-progress
725 * reporting.
726 * Files are overwritten by default.
727 * @param f the file to create
728 * @param progress to report progress
729 */
730 public FSDataOutputStream create(Path f, Progressable progress)
731 throws IOException {
732 return create(f, true,
733 getConf().getInt("io.file.buffer.size", 4096),
734 getDefaultReplication(f),
735 getDefaultBlockSize(f), progress);
736 }
737
738 /**
739 * Create an FSDataOutputStream at the indicated Path.
740 * Files are overwritten by default.
741 * @param f the file to create
742 * @param replication the replication factor
743 */
744 public FSDataOutputStream create(Path f, short replication)
745 throws IOException {
746 return create(f, true,
747 getConf().getInt("io.file.buffer.size", 4096),
748 replication,
749 getDefaultBlockSize(f));
750 }
751
752 /**
753 * Create an FSDataOutputStream at the indicated Path with write-progress
754 * reporting.
755 * Files are overwritten by default.
756 * @param f the file to create
757 * @param replication the replication factor
758 * @param progress to report progress
759 */
760 public FSDataOutputStream create(Path f, short replication,
761 Progressable progress) throws IOException {
762 return create(f, true,
763 getConf().getInt("io.file.buffer.size", 4096),
764 replication,
765 getDefaultBlockSize(f), progress);
766 }
767
768
769 /**
770 * Create an FSDataOutputStream at the indicated Path.
771 * @param f the file name to create
772 * @param overwrite if a file with this name already exists, then if true,
773 * the file will be overwritten, and if false an error will be thrown.
774 * @param bufferSize the size of the buffer to be used.
775 */
776 public FSDataOutputStream create(Path f,
777 boolean overwrite,
778 int bufferSize
779 ) throws IOException {
780 return create(f, overwrite, bufferSize,
781 getDefaultReplication(f),
782 getDefaultBlockSize(f));
783 }
784
785 /**
786 * Create an FSDataOutputStream at the indicated Path with write-progress
787 * reporting.
788 * @param f the path of the file to open
789 * @param overwrite if a file with this name already exists, then if true,
790 * the file will be overwritten, and if false an error will be thrown.
791 * @param bufferSize the size of the buffer to be used.
792 */
793 public FSDataOutputStream create(Path f,
794 boolean overwrite,
795 int bufferSize,
796 Progressable progress
797 ) throws IOException {
798 return create(f, overwrite, bufferSize,
799 getDefaultReplication(f),
800 getDefaultBlockSize(f), progress);
801 }
802
803
804 /**
805 * Create an FSDataOutputStream at the indicated Path.
806 * @param f the file name to open
807 * @param overwrite if a file with this name already exists, then if true,
808 * the file will be overwritten, and if false an error will be thrown.
809 * @param bufferSize the size of the buffer to be used.
810 * @param replication required block replication for the file.
811 */
812 public FSDataOutputStream create(Path f,
813 boolean overwrite,
814 int bufferSize,
815 short replication,
816 long blockSize
817 ) throws IOException {
818 return create(f, overwrite, bufferSize, replication, blockSize, null);
819 }
820
821 /**
822 * Create an FSDataOutputStream at the indicated Path with write-progress
823 * reporting.
824 * @param f the file name to open
825 * @param overwrite if a file with this name already exists, then if true,
826 * the file will be overwritten, and if false an error will be thrown.
827 * @param bufferSize the size of the buffer to be used.
828 * @param replication required block replication for the file.
829 */
830 public FSDataOutputStream create(Path f,
831 boolean overwrite,
832 int bufferSize,
833 short replication,
834 long blockSize,
835 Progressable progress
836 ) throws IOException {
837 return this.create(f, FsPermission.getDefault().applyUMask(
838 FsPermission.getUMask(getConf())), overwrite, bufferSize,
839 replication, blockSize, progress);
840 }
841
842 /**
843 * Create an FSDataOutputStream at the indicated Path with write-progress
844 * reporting.
845 * @param f the file name to open
846 * @param permission
847 * @param overwrite if a file with this name already exists, then if true,
848 * the file will be overwritten, and if false an error will be thrown.
849 * @param bufferSize the size of the buffer to be used.
850 * @param replication required block replication for the file.
851 * @param blockSize
852 * @param progress
853 * @throws IOException
854 * @see #setPermission(Path, FsPermission)
855 */
856 public abstract FSDataOutputStream create(Path f,
857 FsPermission permission,
858 boolean overwrite,
859 int bufferSize,
860 short replication,
861 long blockSize,
862 Progressable progress) throws IOException;
863
864 /**
865 * Create an FSDataOutputStream at the indicated Path with a custom
866 * checksum option. This create method is the common method to be
867 * used to specify ChecksumOpt in both 0.23.x and 2.x.
868 *
869 * @param f the file name to open
870 * @param permission
871 * @param flags {@link CreateFlag}s to use for this stream.
872 * @param bufferSize the size of the buffer to be used.
873 * @param replication required block replication for the file.
874 * @param blockSize
875 * @param progress
876 * @param checksumOpt checksum parameter. If null, the values
877 * found in conf will be used.
878 * @throws IOException
879 * @see #setPermission(Path, FsPermission)
880 */
881 public FSDataOutputStream create(Path f,
882 FsPermission permission,
883 EnumSet<CreateFlag> flags,
884 int bufferSize,
885 short replication,
886 long blockSize,
887 Progressable progress,
888 ChecksumOpt checksumOpt) throws IOException {
889 // Checksum options are ignored by default. The file systems that
890 // implement checksum need to override this method. The full
891 // support is currently only available in DFS.
892 return create(f, permission, flags.contains(CreateFlag.OVERWRITE),
893 bufferSize, replication, blockSize, progress);
894 }
895
896 /*.
897 * This create has been added to support the FileContext that processes
898 * the permission
899 * with umask before calling this method.
900 * This a temporary method added to support the transition from FileSystem
901 * to FileContext for user applications.
902 */
903 @Deprecated
904 protected FSDataOutputStream primitiveCreate(Path f,
905 FsPermission absolutePermission, EnumSet<CreateFlag> flag, int bufferSize,
906 short replication, long blockSize, Progressable progress,
907 ChecksumOpt checksumOpt) throws IOException {
908
909 boolean pathExists = exists(f);
910 CreateFlag.validate(f, pathExists, flag);
911
912 // Default impl assumes that permissions do not matter and
913 // nor does the bytesPerChecksum hence
914 // calling the regular create is good enough.
915 // FSs that implement permissions should override this.
916
917 if (pathExists && flag.contains(CreateFlag.APPEND)) {
918 return append(f, bufferSize, progress);
919 }
920
921 return this.create(f, absolutePermission,
922 flag.contains(CreateFlag.OVERWRITE), bufferSize, replication,
923 blockSize, progress);
924 }
925
926 /**
927 * This version of the mkdirs method assumes that the permission is absolute.
928 * It has been added to support the FileContext that processes the permission
929 * with umask before calling this method.
930 * This a temporary method added to support the transition from FileSystem
931 * to FileContext for user applications.
932 */
933 @Deprecated
934 protected boolean primitiveMkdir(Path f, FsPermission absolutePermission)
935 throws IOException {
936 // Default impl is to assume that permissions do not matter and hence
937 // calling the regular mkdirs is good enough.
938 // FSs that implement permissions should override this.
939 return this.mkdirs(f, absolutePermission);
940 }
941
942
943 /**
944 * This version of the mkdirs method assumes that the permission is absolute.
945 * It has been added to support the FileContext that processes the permission
946 * with umask before calling this method.
947 * This a temporary method added to support the transition from FileSystem
948 * to FileContext for user applications.
949 */
950 @Deprecated
951 protected void primitiveMkdir(Path f, FsPermission absolutePermission,
952 boolean createParent)
953 throws IOException {
954
955 if (!createParent) { // parent must exist.
956 // since the this.mkdirs makes parent dirs automatically
957 // we must throw exception if parent does not exist.
958 final FileStatus stat = getFileStatus(f.getParent());
959 if (stat == null) {
960 throw new FileNotFoundException("Missing parent:" + f);
961 }
962 if (!stat.isDirectory()) {
963 throw new ParentNotDirectoryException("parent is not a dir");
964 }
965 // parent does exist - go ahead with mkdir of leaf
966 }
967 // Default impl is to assume that permissions do not matter and hence
968 // calling the regular mkdirs is good enough.
969 // FSs that implement permissions should override this.
970 if (!this.mkdirs(f, absolutePermission)) {
971 throw new IOException("mkdir of "+ f + " failed");
972 }
973 }
974
975 /**
976 * Opens an FSDataOutputStream at the indicated Path with write-progress
977 * reporting. Same as create(), except fails if parent directory doesn't
978 * already exist.
979 * @param f the file name to open
980 * @param overwrite if a file with this name already exists, then if true,
981 * the file will be overwritten, and if false an error will be thrown.
982 * @param bufferSize the size of the buffer to be used.
983 * @param replication required block replication for the file.
984 * @param blockSize
985 * @param progress
986 * @throws IOException
987 * @see #setPermission(Path, FsPermission)
988 * @deprecated API only for 0.20-append
989 */
990 @Deprecated
991 public FSDataOutputStream createNonRecursive(Path f,
992 boolean overwrite,
993 int bufferSize, short replication, long blockSize,
994 Progressable progress) throws IOException {
995 return this.createNonRecursive(f, FsPermission.getDefault(),
996 overwrite, bufferSize, replication, blockSize, progress);
997 }
998
999 /**
1000 * Opens an FSDataOutputStream at the indicated Path with write-progress
1001 * reporting. Same as create(), except fails if parent directory doesn't
1002 * already exist.
1003 * @param f the file name to open
1004 * @param permission
1005 * @param overwrite if a file with this name already exists, then if true,
1006 * the file will be overwritten, and if false an error will be thrown.
1007 * @param bufferSize the size of the buffer to be used.
1008 * @param replication required block replication for the file.
1009 * @param blockSize
1010 * @param progress
1011 * @throws IOException
1012 * @see #setPermission(Path, FsPermission)
1013 * @deprecated API only for 0.20-append
1014 */
1015 @Deprecated
1016 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
1017 boolean overwrite, int bufferSize, short replication, long blockSize,
1018 Progressable progress) throws IOException {
1019 throw new IOException("createNonRecursive unsupported for this filesystem "
1020 + this.getClass());
1021 }
1022
1023 /**
1024 * Creates the given Path as a brand-new zero-length file. If
1025 * create fails, or if it already existed, return false.
1026 *
1027 * @param f path to use for create
1028 */
1029 public boolean createNewFile(Path f) throws IOException {
1030 if (exists(f)) {
1031 return false;
1032 } else {
1033 create(f, false, getConf().getInt("io.file.buffer.size", 4096)).close();
1034 return true;
1035 }
1036 }
1037
1038 /**
1039 * Append to an existing file (optional operation).
1040 * Same as append(f, getConf().getInt("io.file.buffer.size", 4096), null)
1041 * @param f the existing file to be appended.
1042 * @throws IOException
1043 */
1044 public FSDataOutputStream append(Path f) throws IOException {
1045 return append(f, getConf().getInt("io.file.buffer.size", 4096), null);
1046 }
1047 /**
1048 * Append to an existing file (optional operation).
1049 * Same as append(f, bufferSize, null).
1050 * @param f the existing file to be appended.
1051 * @param bufferSize the size of the buffer to be used.
1052 * @throws IOException
1053 */
1054 public FSDataOutputStream append(Path f, int bufferSize) throws IOException {
1055 return append(f, bufferSize, null);
1056 }
1057
1058 /**
1059 * Append to an existing file (optional operation).
1060 * @param f the existing file to be appended.
1061 * @param bufferSize the size of the buffer to be used.
1062 * @param progress for reporting progress if it is not null.
1063 * @throws IOException
1064 */
1065 public abstract FSDataOutputStream append(Path f, int bufferSize,
1066 Progressable progress) throws IOException;
1067
1068 /**
1069 * Get replication.
1070 *
1071 * @deprecated Use getFileStatus() instead
1072 * @param src file name
1073 * @return file replication
1074 * @throws IOException
1075 */
1076 @Deprecated
1077 public short getReplication(Path src) throws IOException {
1078 return getFileStatus(src).getReplication();
1079 }
1080
1081 /**
1082 * Set replication for an existing file.
1083 *
1084 * @param src file name
1085 * @param replication new replication
1086 * @throws IOException
1087 * @return true if successful;
1088 * false if file does not exist or is a directory
1089 */
1090 public boolean setReplication(Path src, short replication)
1091 throws IOException {
1092 return true;
1093 }
1094
1095 /**
1096 * Renames Path src to Path dst. Can take place on local fs
1097 * or remote DFS.
1098 * @param src path to be renamed
1099 * @param dst new path after rename
1100 * @throws IOException on failure
1101 * @return true if rename is successful
1102 */
1103 public abstract boolean rename(Path src, Path dst) throws IOException;
1104
1105 /**
1106 * Renames Path src to Path dst
1107 * <ul>
1108 * <li
1109 * <li>Fails if src is a file and dst is a directory.
1110 * <li>Fails if src is a directory and dst is a file.
1111 * <li>Fails if the parent of dst does not exist or is a file.
1112 * </ul>
1113 * <p>
1114 * If OVERWRITE option is not passed as an argument, rename fails
1115 * if the dst already exists.
1116 * <p>
1117 * If OVERWRITE option is passed as an argument, rename overwrites
1118 * the dst if it is a file or an empty directory. Rename fails if dst is
1119 * a non-empty directory.
1120 * <p>
1121 * Note that atomicity of rename is dependent on the file system
1122 * implementation. Please refer to the file system documentation for
1123 * details. This default implementation is non atomic.
1124 * <p>
1125 * This method is deprecated since it is a temporary method added to
1126 * support the transition from FileSystem to FileContext for user
1127 * applications.
1128 *
1129 * @param src path to be renamed
1130 * @param dst new path after rename
1131 * @throws IOException on failure
1132 */
1133 @Deprecated
1134 protected void rename(final Path src, final Path dst,
1135 final Rename... options) throws IOException {
1136 // Default implementation
1137 final FileStatus srcStatus = getFileStatus(src);
1138 if (srcStatus == null) {
1139 throw new FileNotFoundException("rename source " + src + " not found.");
1140 }
1141
1142 boolean overwrite = false;
1143 if (null != options) {
1144 for (Rename option : options) {
1145 if (option == Rename.OVERWRITE) {
1146 overwrite = true;
1147 }
1148 }
1149 }
1150
1151 FileStatus dstStatus;
1152 try {
1153 dstStatus = getFileStatus(dst);
1154 } catch (IOException e) {
1155 dstStatus = null;
1156 }
1157 if (dstStatus != null) {
1158 if (srcStatus.isDirectory() != dstStatus.isDirectory()) {
1159 throw new IOException("Source " + src + " Destination " + dst
1160 + " both should be either file or directory");
1161 }
1162 if (!overwrite) {
1163 throw new FileAlreadyExistsException("rename destination " + dst
1164 + " already exists.");
1165 }
1166 // Delete the destination that is a file or an empty directory
1167 if (dstStatus.isDirectory()) {
1168 FileStatus[] list = listStatus(dst);
1169 if (list != null && list.length != 0) {
1170 throw new IOException(
1171 "rename cannot overwrite non empty destination directory " + dst);
1172 }
1173 }
1174 delete(dst, false);
1175 } else {
1176 final Path parent = dst.getParent();
1177 final FileStatus parentStatus = getFileStatus(parent);
1178 if (parentStatus == null) {
1179 throw new FileNotFoundException("rename destination parent " + parent
1180 + " not found.");
1181 }
1182 if (!parentStatus.isDirectory()) {
1183 throw new ParentNotDirectoryException("rename destination parent " + parent
1184 + " is a file.");
1185 }
1186 }
1187 if (!rename(src, dst)) {
1188 throw new IOException("rename from " + src + " to " + dst + " failed.");
1189 }
1190 }
1191
1192 /**
1193 * Delete a file
1194 * @deprecated Use {@link #delete(Path, boolean)} instead.
1195 */
1196 @Deprecated
1197 public boolean delete(Path f) throws IOException {
1198 return delete(f, true);
1199 }
1200
1201 /** Delete a file.
1202 *
1203 * @param f the path to delete.
1204 * @param recursive if path is a directory and set to
1205 * true, the directory is deleted else throws an exception. In
1206 * case of a file the recursive can be set to either true or false.
1207 * @return true if delete is successful else false.
1208 * @throws IOException
1209 */
1210 public abstract boolean delete(Path f, boolean recursive) throws IOException;
1211
1212 /**
1213 * Mark a path to be deleted when FileSystem is closed.
1214 * When the JVM shuts down,
1215 * all FileSystem objects will be closed automatically.
1216 * Then,
1217 * the marked path will be deleted as a result of closing the FileSystem.
1218 *
1219 * The path has to exist in the file system.
1220 *
1221 * @param f the path to delete.
1222 * @return true if deleteOnExit is successful, otherwise false.
1223 * @throws IOException
1224 */
1225 public boolean deleteOnExit(Path f) throws IOException {
1226 if (!exists(f)) {
1227 return false;
1228 }
1229 synchronized (deleteOnExit) {
1230 deleteOnExit.add(f);
1231 }
1232 return true;
1233 }
1234
1235 /**
1236 * Cancel the deletion of the path when the FileSystem is closed
1237 * @param f the path to cancel deletion
1238 */
1239 public boolean cancelDeleteOnExit(Path f) {
1240 synchronized (deleteOnExit) {
1241 return deleteOnExit.remove(f);
1242 }
1243 }
1244
1245 /**
1246 * Delete all files that were marked as delete-on-exit. This recursively
1247 * deletes all files in the specified paths.
1248 */
1249 protected void processDeleteOnExit() {
1250 synchronized (deleteOnExit) {
1251 for (Iterator<Path> iter = deleteOnExit.iterator(); iter.hasNext();) {
1252 Path path = iter.next();
1253 try {
1254 if (exists(path)) {
1255 delete(path, true);
1256 }
1257 }
1258 catch (IOException e) {
1259 LOG.info("Ignoring failure to deleteOnExit for path " + path);
1260 }
1261 iter.remove();
1262 }
1263 }
1264 }
1265
1266 /** Check if exists.
1267 * @param f source file
1268 */
1269 public boolean exists(Path f) throws IOException {
1270 try {
1271 return getFileStatus(f) != null;
1272 } catch (FileNotFoundException e) {
1273 return false;
1274 }
1275 }
1276
1277 /** True iff the named path is a directory.
1278 * Note: Avoid using this method. Instead reuse the FileStatus
1279 * returned by getFileStatus() or listStatus() methods.
1280 * @param f path to check
1281 */
1282 public boolean isDirectory(Path f) throws IOException {
1283 try {
1284 return getFileStatus(f).isDirectory();
1285 } catch (FileNotFoundException e) {
1286 return false; // f does not exist
1287 }
1288 }
1289
1290 /** True iff the named path is a regular file.
1291 * Note: Avoid using this method. Instead reuse the FileStatus
1292 * returned by getFileStatus() or listStatus() methods.
1293 * @param f path to check
1294 */
1295 public boolean isFile(Path f) throws IOException {
1296 try {
1297 return getFileStatus(f).isFile();
1298 } catch (FileNotFoundException e) {
1299 return false; // f does not exist
1300 }
1301 }
1302
1303 /** The number of bytes in a file. */
1304 /** @deprecated Use getFileStatus() instead */
1305 @Deprecated
1306 public long getLength(Path f) throws IOException {
1307 return getFileStatus(f).getLen();
1308 }
1309
1310 /** Return the {@link ContentSummary} of a given {@link Path}.
1311 * @param f path to use
1312 */
1313 public ContentSummary getContentSummary(Path f) throws IOException {
1314 FileStatus status = getFileStatus(f);
1315 if (status.isFile()) {
1316 // f is a file
1317 return new ContentSummary(status.getLen(), 1, 0);
1318 }
1319 // f is a directory
1320 long[] summary = {0, 0, 1};
1321 for(FileStatus s : listStatus(f)) {
1322 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) :
1323 new ContentSummary(s.getLen(), 1, 0);
1324 summary[0] += c.getLength();
1325 summary[1] += c.getFileCount();
1326 summary[2] += c.getDirectoryCount();
1327 }
1328 return new ContentSummary(summary[0], summary[1], summary[2]);
1329 }
1330
1331 final private static PathFilter DEFAULT_FILTER = new PathFilter() {
1332 public boolean accept(Path file) {
1333 return true;
1334 }
1335 };
1336
1337 /**
1338 * List the statuses of the files/directories in the given path if the path is
1339 * a directory.
1340 *
1341 * @param f given path
1342 * @return the statuses of the files/directories in the given patch
1343 * @throws FileNotFoundException when the path does not exist;
1344 * IOException see specific implementation
1345 */
1346 public abstract FileStatus[] listStatus(Path f) throws FileNotFoundException,
1347 IOException;
1348
1349 /*
1350 * Filter files/directories in the given path using the user-supplied path
1351 * filter. Results are added to the given array <code>results</code>.
1352 */
1353 private void listStatus(ArrayList<FileStatus> results, Path f,
1354 PathFilter filter) throws FileNotFoundException, IOException {
1355 FileStatus listing[] = listStatus(f);
1356 if (listing == null) {
1357 throw new IOException("Error accessing " + f);
1358 }
1359
1360 for (int i = 0; i < listing.length; i++) {
1361 if (filter.accept(listing[i].getPath())) {
1362 results.add(listing[i]);
1363 }
1364 }
1365 }
1366
1367 /**
1368 * @return an iterator over the corrupt files under the given path
1369 * (may contain duplicates if a file has more than one corrupt block)
1370 * @throws IOException
1371 */
1372 public RemoteIterator<Path> listCorruptFileBlocks(Path path)
1373 throws IOException {
1374 throw new UnsupportedOperationException(getClass().getCanonicalName() +
1375 " does not support" +
1376 " listCorruptFileBlocks");
1377 }
1378
1379 /**
1380 * Filter files/directories in the given path using the user-supplied path
1381 * filter.
1382 *
1383 * @param f
1384 * a path name
1385 * @param filter
1386 * the user-supplied path filter
1387 * @return an array of FileStatus objects for the files under the given path
1388 * after applying the filter
1389 * @throws FileNotFoundException when the path does not exist;
1390 * IOException see specific implementation
1391 */
1392 public FileStatus[] listStatus(Path f, PathFilter filter)
1393 throws FileNotFoundException, IOException {
1394 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1395 listStatus(results, f, filter);
1396 return results.toArray(new FileStatus[results.size()]);
1397 }
1398
1399 /**
1400 * Filter files/directories in the given list of paths using default
1401 * path filter.
1402 *
1403 * @param files
1404 * a list of paths
1405 * @return a list of statuses for the files under the given paths after
1406 * applying the filter default Path filter
1407 * @throws FileNotFoundException when the path does not exist;
1408 * IOException see specific implementation
1409 */
1410 public FileStatus[] listStatus(Path[] files)
1411 throws FileNotFoundException, IOException {
1412 return listStatus(files, DEFAULT_FILTER);
1413 }
1414
1415 /**
1416 * Filter files/directories in the given list of paths using user-supplied
1417 * path filter.
1418 *
1419 * @param files
1420 * a list of paths
1421 * @param filter
1422 * the user-supplied path filter
1423 * @return a list of statuses for the files under the given paths after
1424 * applying the filter
1425 * @throws FileNotFoundException when the path does not exist;
1426 * IOException see specific implementation
1427 */
1428 public FileStatus[] listStatus(Path[] files, PathFilter filter)
1429 throws FileNotFoundException, IOException {
1430 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1431 for (int i = 0; i < files.length; i++) {
1432 listStatus(results, files[i], filter);
1433 }
1434 return results.toArray(new FileStatus[results.size()]);
1435 }
1436
1437 /**
1438 * <p>Return all the files that match filePattern and are not checksum
1439 * files. Results are sorted by their names.
1440 *
1441 * <p>
1442 * A filename pattern is composed of <i>regular</i> characters and
1443 * <i>special pattern matching</i> characters, which are:
1444 *
1445 * <dl>
1446 * <dd>
1447 * <dl>
1448 * <p>
1449 * <dt> <tt> ? </tt>
1450 * <dd> Matches any single character.
1451 *
1452 * <p>
1453 * <dt> <tt> * </tt>
1454 * <dd> Matches zero or more characters.
1455 *
1456 * <p>
1457 * <dt> <tt> [<i>abc</i>] </tt>
1458 * <dd> Matches a single character from character set
1459 * <tt>{<i>a,b,c</i>}</tt>.
1460 *
1461 * <p>
1462 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt>
1463 * <dd> Matches a single character from the character range
1464 * <tt>{<i>a...b</i>}</tt>. Note that character <tt><i>a</i></tt> must be
1465 * lexicographically less than or equal to character <tt><i>b</i></tt>.
1466 *
1467 * <p>
1468 * <dt> <tt> [^<i>a</i>] </tt>
1469 * <dd> Matches a single character that is not from character set or range
1470 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur
1471 * immediately to the right of the opening bracket.
1472 *
1473 * <p>
1474 * <dt> <tt> \<i>c</i> </tt>
1475 * <dd> Removes (escapes) any special meaning of character <i>c</i>.
1476 *
1477 * <p>
1478 * <dt> <tt> {ab,cd} </tt>
1479 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt>
1480 *
1481 * <p>
1482 * <dt> <tt> {ab,c{de,fh}} </tt>
1483 * <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt>
1484 *
1485 * </dl>
1486 * </dd>
1487 * </dl>
1488 *
1489 * @param pathPattern a regular expression specifying a pth pattern
1490
1491 * @return an array of paths that match the path pattern
1492 * @throws IOException
1493 */
1494 public FileStatus[] globStatus(Path pathPattern) throws IOException {
1495 return globStatus(pathPattern, DEFAULT_FILTER);
1496 }
1497
1498 /**
1499 * Return an array of FileStatus objects whose path names match pathPattern
1500 * and is accepted by the user-supplied path filter. Results are sorted by
1501 * their path names.
1502 * Return null if pathPattern has no glob and the path does not exist.
1503 * Return an empty array if pathPattern has a glob and no path matches it.
1504 *
1505 * @param pathPattern
1506 * a regular expression specifying the path pattern
1507 * @param filter
1508 * a user-supplied path filter
1509 * @return an array of FileStatus objects
1510 * @throws IOException if any I/O error occurs when fetching file status
1511 */
1512 public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
1513 throws IOException {
1514 String filename = pathPattern.toUri().getPath();
1515 List<String> filePatterns = GlobExpander.expand(filename);
1516 if (filePatterns.size() == 1) {
1517 return globStatusInternal(pathPattern, filter);
1518 } else {
1519 List<FileStatus> results = new ArrayList<FileStatus>();
1520 for (String filePattern : filePatterns) {
1521 FileStatus[] files = globStatusInternal(new Path(filePattern), filter);
1522 for (FileStatus file : files) {
1523 results.add(file);
1524 }
1525 }
1526 return results.toArray(new FileStatus[results.size()]);
1527 }
1528 }
1529
1530 private FileStatus[] globStatusInternal(Path pathPattern, PathFilter filter)
1531 throws IOException {
1532 Path[] parents = new Path[1];
1533 int level = 0;
1534 String filename = pathPattern.toUri().getPath();
1535
1536 // path has only zero component
1537 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) {
1538 return getFileStatus(new Path[]{pathPattern});
1539 }
1540
1541 // path has at least one component
1542 String[] components = filename.split(Path.SEPARATOR);
1543 // get the first component
1544 if (pathPattern.isAbsolute()) {
1545 parents[0] = new Path(Path.SEPARATOR);
1546 level = 1;
1547 } else {
1548 parents[0] = new Path(Path.CUR_DIR);
1549 }
1550
1551 // glob the paths that match the parent path, i.e., [0, components.length-1]
1552 boolean[] hasGlob = new boolean[]{false};
1553 Path[] parentPaths = globPathsLevel(parents, components, level, hasGlob);
1554 FileStatus[] results;
1555 if (parentPaths == null || parentPaths.length == 0) {
1556 results = null;
1557 } else {
1558 // Now work on the last component of the path
1559 GlobFilter fp = new GlobFilter(components[components.length - 1], filter);
1560 if (fp.hasPattern()) { // last component has a pattern
1561 // list parent directories and then glob the results
1562 try {
1563 results = listStatus(parentPaths, fp);
1564 } catch (FileNotFoundException e) {
1565 results = null;
1566 }
1567 hasGlob[0] = true;
1568 } else { // last component does not have a pattern
1569 // remove the quoting of metachars in a non-regexp expansion
1570 String name = unquotePathComponent(components[components.length - 1]);
1571 // get all the path names
1572 ArrayList<Path> filteredPaths = new ArrayList<Path>(parentPaths.length);
1573 for (int i = 0; i < parentPaths.length; i++) {
1574 parentPaths[i] = new Path(parentPaths[i], name);
1575 if (fp.accept(parentPaths[i])) {
1576 filteredPaths.add(parentPaths[i]);
1577 }
1578 }
1579 // get all their statuses
1580 results = getFileStatus(
1581 filteredPaths.toArray(new Path[filteredPaths.size()]));
1582 }
1583 }
1584
1585 // Decide if the pathPattern contains a glob or not
1586 if (results == null) {
1587 if (hasGlob[0]) {
1588 results = new FileStatus[0];
1589 }
1590 } else {
1591 if (results.length == 0 ) {
1592 if (!hasGlob[0]) {
1593 results = null;
1594 }
1595 } else {
1596 Arrays.sort(results);
1597 }
1598 }
1599 return results;
1600 }
1601
1602 /*
1603 * For a path of N components, return a list of paths that match the
1604 * components [<code>level</code>, <code>N-1</code>].
1605 */
1606 private Path[] globPathsLevel(Path[] parents, String[] filePattern,
1607 int level, boolean[] hasGlob) throws IOException {
1608 if (level == filePattern.length - 1)
1609 return parents;
1610 if (parents == null || parents.length == 0) {
1611 return null;
1612 }
1613 GlobFilter fp = new GlobFilter(filePattern[level]);
1614 if (fp.hasPattern()) {
1615 try {
1616 parents = FileUtil.stat2Paths(listStatus(parents, fp));
1617 } catch (FileNotFoundException e) {
1618 parents = null;
1619 }
1620 hasGlob[0] = true;
1621 } else { // the component does not have a pattern
1622 // remove the quoting of metachars in a non-regexp expansion
1623 String name = unquotePathComponent(filePattern[level]);
1624 for (int i = 0; i < parents.length; i++) {
1625 parents[i] = new Path(parents[i], name);
1626 }
1627 }
1628 return globPathsLevel(parents, filePattern, level + 1, hasGlob);
1629 }
1630
1631 /**
1632 * The glob filter builds a regexp per path component. If the component
1633 * does not contain a shell metachar, then it falls back to appending the
1634 * raw string to the list of built up paths. This raw path needs to have
1635 * the quoting removed. Ie. convert all occurances of "\X" to "X"
1636 * @param name of the path component
1637 * @return the unquoted path component
1638 */
1639 private String unquotePathComponent(String name) {
1640 return name.replaceAll("\\\\(.)", "$1");
1641 }
1642
1643 /**
1644 * List the statuses of the files/directories in the given path if the path is
1645 * a directory.
1646 * Return the file's status and block locations If the path is a file.
1647 *
1648 * If a returned status is a file, it contains the file's block locations.
1649 *
1650 * @param f is the path
1651 *
1652 * @return an iterator that traverses statuses of the files/directories
1653 * in the given path
1654 *
1655 * @throws FileNotFoundException If <code>f</code> does not exist
1656 * @throws IOException If an I/O error occurred
1657 */
1658 public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f)
1659 throws FileNotFoundException, IOException {
1660 return listLocatedStatus(f, DEFAULT_FILTER);
1661 }
1662
1663 /**
1664 * Listing a directory
1665 * The returned results include its block location if it is a file
1666 * The results are filtered by the given path filter
1667 * @param f a path
1668 * @param filter a path filter
1669 * @return an iterator that traverses statuses of the files/directories
1670 * in the given path
1671 * @throws FileNotFoundException if <code>f</code> does not exist
1672 * @throws IOException if any I/O error occurred
1673 */
1674 protected RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
1675 final PathFilter filter)
1676 throws FileNotFoundException, IOException {
1677 return new RemoteIterator<LocatedFileStatus>() {
1678 private final FileStatus[] stats = listStatus(f, filter);
1679 private int i = 0;
1680
1681 @Override
1682 public boolean hasNext() {
1683 return i<stats.length;
1684 }
1685
1686 @Override
1687 public LocatedFileStatus next() throws IOException {
1688 if (!hasNext()) {
1689 throw new NoSuchElementException("No more entry in " + f);
1690 }
1691 FileStatus result = stats[i++];
1692 BlockLocation[] locs = result.isFile() ?
1693 getFileBlockLocations(result.getPath(), 0, result.getLen()) :
1694 null;
1695 return new LocatedFileStatus(result, locs);
1696 }
1697 };
1698 }
1699
1700 /**
1701 * List the statuses and block locations of the files in the given path.
1702 *
1703 * If the path is a directory,
1704 * if recursive is false, returns files in the directory;
1705 * if recursive is true, return files in the subtree rooted at the path.
1706 * If the path is a file, return the file's status and block locations.
1707 *
1708 * @param f is the path
1709 * @param recursive if the subdirectories need to be traversed recursively
1710 *
1711 * @return an iterator that traverses statuses of the files
1712 *
1713 * @throws FileNotFoundException when the path does not exist;
1714 * IOException see specific implementation
1715 */
1716 public RemoteIterator<LocatedFileStatus> listFiles(
1717 final Path f, final boolean recursive)
1718 throws FileNotFoundException, IOException {
1719 return new RemoteIterator<LocatedFileStatus>() {
1720 private Stack<RemoteIterator<LocatedFileStatus>> itors =
1721 new Stack<RemoteIterator<LocatedFileStatus>>();
1722 private RemoteIterator<LocatedFileStatus> curItor =
1723 listLocatedStatus(f);
1724 private LocatedFileStatus curFile;
1725
1726 @Override
1727 public boolean hasNext() throws IOException {
1728 while (curFile == null) {
1729 if (curItor.hasNext()) {
1730 handleFileStat(curItor.next());
1731 } else if (!itors.empty()) {
1732 curItor = itors.pop();
1733 } else {
1734 return false;
1735 }
1736 }
1737 return true;
1738 }
1739
1740 /**
1741 * Process the input stat.
1742 * If it is a file, return the file stat.
1743 * If it is a directory, traverse the directory if recursive is true;
1744 * ignore it if recursive is false.
1745 * @param stat input status
1746 * @throws IOException if any IO error occurs
1747 */
1748 private void handleFileStat(LocatedFileStatus stat) throws IOException {
1749 if (stat.isFile()) { // file
1750 curFile = stat;
1751 } else if (recursive) { // directory
1752 itors.push(curItor);
1753 curItor = listLocatedStatus(stat.getPath());
1754 }
1755 }
1756
1757 @Override
1758 public LocatedFileStatus next() throws IOException {
1759 if (hasNext()) {
1760 LocatedFileStatus result = curFile;
1761 curFile = null;
1762 return result;
1763 }
1764 throw new java.util.NoSuchElementException("No more entry in " + f);
1765 }
1766 };
1767 }
1768
1769 /** Return the current user's home directory in this filesystem.
1770 * The default implementation returns "/user/$USER/".
1771 */
1772 public Path getHomeDirectory() {
1773 return this.makeQualified(
1774 new Path("/user/"+System.getProperty("user.name")));
1775 }
1776
1777
1778 /**
1779 * Set the current working directory for the given file system. All relative
1780 * paths will be resolved relative to it.
1781 *
1782 * @param new_dir
1783 */
1784 public abstract void setWorkingDirectory(Path new_dir);
1785
1786 /**
1787 * Get the current working directory for the given file system
1788 * @return the directory pathname
1789 */
1790 public abstract Path getWorkingDirectory();
1791
1792
1793 /**
1794 * Note: with the new FilesContext class, getWorkingDirectory()
1795 * will be removed.
1796 * The working directory is implemented in FilesContext.
1797 *
1798 * Some file systems like LocalFileSystem have an initial workingDir
1799 * that we use as the starting workingDir. For other file systems
1800 * like HDFS there is no built in notion of an inital workingDir.
1801 *
1802 * @return if there is built in notion of workingDir then it
1803 * is returned; else a null is returned.
1804 */
1805 protected Path getInitialWorkingDirectory() {
1806 return null;
1807 }
1808
1809 /**
1810 * Call {@link #mkdirs(Path, FsPermission)} with default permission.
1811 */
1812 public boolean mkdirs(Path f) throws IOException {
1813 return mkdirs(f, FsPermission.getDefault());
1814 }
1815
1816 /**
1817 * Make the given file and all non-existent parents into
1818 * directories. Has the semantics of Unix 'mkdir -p'.
1819 * Existence of the directory hierarchy is not an error.
1820 * @param f path to create
1821 * @param permission to apply to f
1822 */
1823 public abstract boolean mkdirs(Path f, FsPermission permission
1824 ) throws IOException;
1825
1826 /**
1827 * The src file is on the local disk. Add it to FS at
1828 * the given dst name and the source is kept intact afterwards
1829 * @param src path
1830 * @param dst path
1831 */
1832 public void copyFromLocalFile(Path src, Path dst)
1833 throws IOException {
1834 copyFromLocalFile(false, src, dst);
1835 }
1836
1837 /**
1838 * The src files is on the local disk. Add it to FS at
1839 * the given dst name, removing the source afterwards.
1840 * @param srcs path
1841 * @param dst path
1842 */
1843 public void moveFromLocalFile(Path[] srcs, Path dst)
1844 throws IOException {
1845 copyFromLocalFile(true, true, srcs, dst);
1846 }
1847
1848 /**
1849 * The src file is on the local disk. Add it to FS at
1850 * the given dst name, removing the source afterwards.
1851 * @param src path
1852 * @param dst path
1853 */
1854 public void moveFromLocalFile(Path src, Path dst)
1855 throws IOException {
1856 copyFromLocalFile(true, src, dst);
1857 }
1858
1859 /**
1860 * The src file is on the local disk. Add it to FS at
1861 * the given dst name.
1862 * delSrc indicates if the source should be removed
1863 * @param delSrc whether to delete the src
1864 * @param src path
1865 * @param dst path
1866 */
1867 public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
1868 throws IOException {
1869 copyFromLocalFile(delSrc, true, src, dst);
1870 }
1871
1872 /**
1873 * The src files are on the local disk. Add it to FS at
1874 * the given dst name.
1875 * delSrc indicates if the source should be removed
1876 * @param delSrc whether to delete the src
1877 * @param overwrite whether to overwrite an existing file
1878 * @param srcs array of paths which are source
1879 * @param dst path
1880 */
1881 public void copyFromLocalFile(boolean delSrc, boolean overwrite,
1882 Path[] srcs, Path dst)
1883 throws IOException {
1884 Configuration conf = getConf();
1885 FileUtil.copy(getLocal(conf), srcs, this, dst, delSrc, overwrite, conf);
1886 }
1887
1888 /**
1889 * The src file is on the local disk. Add it to FS at
1890 * the given dst name.
1891 * delSrc indicates if the source should be removed
1892 * @param delSrc whether to delete the src
1893 * @param overwrite whether to overwrite an existing file
1894 * @param src path
1895 * @param dst path
1896 */
1897 public void copyFromLocalFile(boolean delSrc, boolean overwrite,
1898 Path src, Path dst)
1899 throws IOException {
1900 Configuration conf = getConf();
1901 FileUtil.copy(getLocal(conf), src, this, dst, delSrc, overwrite, conf);
1902 }
1903
1904 /**
1905 * The src file is under FS, and the dst is on the local disk.
1906 * Copy it from FS control to the local dst name.
1907 * @param src path
1908 * @param dst path
1909 */
1910 public void copyToLocalFile(Path src, Path dst) throws IOException {
1911 copyToLocalFile(false, src, dst);
1912 }
1913
1914 /**
1915 * The src file is under FS, and the dst is on the local disk.
1916 * Copy it from FS control to the local dst name.
1917 * Remove the source afterwards
1918 * @param src path
1919 * @param dst path
1920 */
1921 public void moveToLocalFile(Path src, Path dst) throws IOException {
1922 copyToLocalFile(true, src, dst);
1923 }
1924
1925 /**
1926 * The src file is under FS, and the dst is on the local disk.
1927 * Copy it from FS control to the local dst name.
1928 * delSrc indicates if the src will be removed or not.
1929 * @param delSrc whether to delete the src
1930 * @param src path
1931 * @param dst path
1932 */
1933 public void copyToLocalFile(boolean delSrc, Path src, Path dst)
1934 throws IOException {
1935 copyToLocalFile(delSrc, src, dst, false);
1936 }
1937
1938 /**
1939 * The src file is under FS, and the dst is on the local disk. Copy it from FS
1940 * control to the local dst name. delSrc indicates if the src will be removed
1941 * or not. useRawLocalFileSystem indicates whether to use RawLocalFileSystem
1942 * as local file system or not. RawLocalFileSystem is non crc file system.So,
1943 * It will not create any crc files at local.
1944 *
1945 * @param delSrc
1946 * whether to delete the src
1947 * @param src
1948 * path
1949 * @param dst
1950 * path
1951 * @param useRawLocalFileSystem
1952 * whether to use RawLocalFileSystem as local file system or not.
1953 *
1954 * @throws IOException
1955 * - if any IO error
1956 */
1957 public void copyToLocalFile(boolean delSrc, Path src, Path dst,
1958 boolean useRawLocalFileSystem) throws IOException {
1959 Configuration conf = getConf();
1960 FileSystem local = null;
1961 if (useRawLocalFileSystem) {
1962 local = getLocal(conf).getRawFileSystem();
1963 } else {
1964 local = getLocal(conf);
1965 }
1966 FileUtil.copy(this, src, local, dst, delSrc, conf);
1967 }
1968
1969 /**
1970 * Returns a local File that the user can write output to. The caller
1971 * provides both the eventual FS target name and the local working
1972 * file. If the FS is local, we write directly into the target. If
1973 * the FS is remote, we write into the tmp local area.
1974 * @param fsOutputFile path of output file
1975 * @param tmpLocalFile path of local tmp file
1976 */
1977 public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
1978 throws IOException {
1979 return tmpLocalFile;
1980 }
1981
1982 /**
1983 * Called when we're all done writing to the target. A local FS will
1984 * do nothing, because we've written to exactly the right place. A remote
1985 * FS will copy the contents of tmpLocalFile to the correct target at
1986 * fsOutputFile.
1987 * @param fsOutputFile path of output file
1988 * @param tmpLocalFile path to local tmp file
1989 */
1990 public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
1991 throws IOException {
1992 moveFromLocalFile(tmpLocalFile, fsOutputFile);
1993 }
1994
1995 /**
1996 * No more filesystem operations are needed. Will
1997 * release any held locks.
1998 */
1999 public void close() throws IOException {
2000 // delete all files that were marked as delete-on-exit.
2001 processDeleteOnExit();
2002 CACHE.remove(this.key, this);
2003 }
2004
2005 /** Return the total size of all files in the filesystem.*/
2006 public long getUsed() throws IOException{
2007 long used = 0;
2008 FileStatus[] files = listStatus(new Path("/"));
2009 for(FileStatus file:files){
2010 used += file.getLen();
2011 }
2012 return used;
2013 }
2014
2015 /**
2016 * Get the block size for a particular file.
2017 * @param f the filename
2018 * @return the number of bytes in a block
2019 */
2020 /** @deprecated Use getFileStatus() instead */
2021 @Deprecated
2022 public long getBlockSize(Path f) throws IOException {
2023 return getFileStatus(f).getBlockSize();
2024 }
2025
2026 /** Return the number of bytes that large input files should be optimally
2027 * be split into to minimize i/o time. */
2028 public long getDefaultBlockSize() {
2029 // default to 32MB: large enough to minimize the impact of seeks
2030 return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024);
2031 }
2032
2033 /** Return the number of bytes that large input files should be optimally
2034 * be split into to minimize i/o time. The given path will be used to
2035 * locate the actual filesystem. The full path does not have to exist.
2036 * @param f path of file
2037 * @return the default block size for the path's filesystem
2038 */
2039 public long getDefaultBlockSize(Path f) {
2040 return getDefaultBlockSize();
2041 }
2042
2043 /**
2044 * Get the default replication.
2045 */
2046 public short getDefaultReplication() { return 1; }
2047
2048 /**
2049 * Get the default replication for a path. The given path will be used to
2050 * locate the actual filesystem. The full path does not have to exist.
2051 * @param path of the file
2052 * @return default replication for the path's filesystem
2053 */
2054 public short getDefaultReplication(Path path) {
2055 return getDefaultReplication();
2056 }
2057
2058 /**
2059 * Return a file status object that represents the path.
2060 * @param f The path we want information from
2061 * @return a FileStatus object
2062 * @throws FileNotFoundException when the path does not exist;
2063 * IOException see specific implementation
2064 */
2065 public abstract FileStatus getFileStatus(Path f) throws IOException;
2066
2067 /**
2068 * Get the checksum of a file.
2069 *
2070 * @param f The file path
2071 * @return The file checksum. The default return value is null,
2072 * which indicates that no checksum algorithm is implemented
2073 * in the corresponding FileSystem.
2074 */
2075 public FileChecksum getFileChecksum(Path f) throws IOException {
2076 return null;
2077 }
2078
2079 /**
2080 * Set the verify checksum flag. This is only applicable if the
2081 * corresponding FileSystem supports checksum. By default doesn't do anything.
2082 * @param verifyChecksum
2083 */
2084 public void setVerifyChecksum(boolean verifyChecksum) {
2085 //doesn't do anything
2086 }
2087
2088 /**
2089 * Set the write checksum flag. This is only applicable if the
2090 * corresponding FileSystem supports checksum. By default doesn't do anything.
2091 * @param writeChecksum
2092 */
2093 public void setWriteChecksum(boolean writeChecksum) {
2094 //doesn't do anything
2095 }
2096
2097 /**
2098 * Return a list of file status objects that corresponds to the list of paths
2099 * excluding those non-existent paths.
2100 *
2101 * @param paths
2102 * the list of paths we want information from
2103 * @return a list of FileStatus objects
2104 * @throws IOException
2105 * see specific implementation
2106 */
2107 private FileStatus[] getFileStatus(Path[] paths) throws IOException {
2108 if (paths == null) {
2109 return null;
2110 }
2111 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length);
2112 for (int i = 0; i < paths.length; i++) {
2113 try {
2114 results.add(getFileStatus(paths[i]));
2115 } catch (FileNotFoundException e) { // do nothing
2116 }
2117 }
2118 return results.toArray(new FileStatus[results.size()]);
2119 }
2120
2121 /**
2122 * Returns a status object describing the use and capacity of the
2123 * file system. If the file system has multiple partitions, the
2124 * use and capacity of the root partition is reflected.
2125 *
2126 * @return a FsStatus object
2127 * @throws IOException
2128 * see specific implementation
2129 */
2130 public FsStatus getStatus() throws IOException {
2131 return getStatus(null);
2132 }
2133
2134 /**
2135 * Returns a status object describing the use and capacity of the
2136 * file system. If the file system has multiple partitions, the
2137 * use and capacity of the partition pointed to by the specified
2138 * path is reflected.
2139 * @param p Path for which status should be obtained. null means
2140 * the default partition.
2141 * @return a FsStatus object
2142 * @throws IOException
2143 * see specific implementation
2144 */
2145 public FsStatus getStatus(Path p) throws IOException {
2146 return new FsStatus(Long.MAX_VALUE, 0, Long.MAX_VALUE);
2147 }
2148
2149 /**
2150 * Set permission of a path.
2151 * @param p
2152 * @param permission
2153 */
2154 public void setPermission(Path p, FsPermission permission
2155 ) throws IOException {
2156 }
2157
2158 /**
2159 * Set owner of a path (i.e. a file or a directory).
2160 * The parameters username and groupname cannot both be null.
2161 * @param p The path
2162 * @param username If it is null, the original username remains unchanged.
2163 * @param groupname If it is null, the original groupname remains unchanged.
2164 */
2165 public void setOwner(Path p, String username, String groupname
2166 ) throws IOException {
2167 }
2168
2169 /**
2170 * Set access time of a file
2171 * @param p The path
2172 * @param mtime Set the modification time of this file.
2173 * The number of milliseconds since Jan 1, 1970.
2174 * A value of -1 means that this call should not set modification time.
2175 * @param atime Set the access time of this file.
2176 * The number of milliseconds since Jan 1, 1970.
2177 * A value of -1 means that this call should not set access time.
2178 */
2179 public void setTimes(Path p, long mtime, long atime
2180 ) throws IOException {
2181 }
2182
2183 private static FileSystem createFileSystem(URI uri, Configuration conf
2184 ) throws IOException {
2185 Class<?> clazz = conf.getClass("fs." + uri.getScheme() + ".impl", null);
2186 if (clazz == null) {
2187 throw new IOException("No FileSystem for scheme: " + uri.getScheme());
2188 }
2189 FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf);
2190 fs.initialize(uri, conf);
2191 return fs;
2192 }
2193
2194 /** Caching FileSystem objects */
2195 static class Cache {
2196 private final ClientFinalizer clientFinalizer = new ClientFinalizer();
2197
2198 private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>();
2199 private final Set<Key> toAutoClose = new HashSet<Key>();
2200
2201 /** A variable that makes all objects in the cache unique */
2202 private static AtomicLong unique = new AtomicLong(1);
2203
2204 FileSystem get(URI uri, Configuration conf) throws IOException{
2205 Key key = new Key(uri, conf);
2206 return getInternal(uri, conf, key);
2207 }
2208
2209 /** The objects inserted into the cache using this method are all unique */
2210 FileSystem getUnique(URI uri, Configuration conf) throws IOException{
2211 Key key = new Key(uri, conf, unique.getAndIncrement());
2212 return getInternal(uri, conf, key);
2213 }
2214
2215 private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{
2216 FileSystem fs;
2217 synchronized (this) {
2218 fs = map.get(key);
2219 }
2220 if (fs != null) {
2221 return fs;
2222 }
2223
2224 fs = createFileSystem(uri, conf);
2225 synchronized (this) { // refetch the lock again
2226 FileSystem oldfs = map.get(key);
2227 if (oldfs != null) { // a file system is created while lock is releasing
2228 fs.close(); // close the new file system
2229 return oldfs; // return the old file system
2230 }
2231
2232 // now insert the new file system into the map
2233 if (map.isEmpty() ) {
2234 ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY);
2235 }
2236 fs.key = key;
2237 map.put(key, fs);
2238 if (conf.getBoolean("fs.automatic.close", true)) {
2239 toAutoClose.add(key);
2240 }
2241 return fs;
2242 }
2243 }
2244
2245 synchronized void remove(Key key, FileSystem fs) {
2246 if (map.containsKey(key) && fs == map.get(key)) {
2247 map.remove(key);
2248 toAutoClose.remove(key);
2249 }
2250 }
2251
2252 synchronized void closeAll() throws IOException {
2253 closeAll(false);
2254 }
2255
2256 /**
2257 * Close all FileSystem instances in the Cache.
2258 * @param onlyAutomatic only close those that are marked for automatic closing
2259 */
2260 synchronized void closeAll(boolean onlyAutomatic) throws IOException {
2261 List<IOException> exceptions = new ArrayList<IOException>();
2262
2263 // Make a copy of the keys in the map since we'll be modifying
2264 // the map while iterating over it, which isn't safe.
2265 List<Key> keys = new ArrayList<Key>();
2266 keys.addAll(map.keySet());
2267
2268 for (Key key : keys) {
2269 final FileSystem fs = map.get(key);
2270
2271 if (onlyAutomatic && !toAutoClose.contains(key)) {
2272 continue;
2273 }
2274
2275 //remove from cache
2276 remove(key, fs);
2277
2278 if (fs != null) {
2279 try {
2280 fs.close();
2281 }
2282 catch(IOException ioe) {
2283 exceptions.add(ioe);
2284 }
2285 }
2286 }
2287
2288 if (!exceptions.isEmpty()) {
2289 throw MultipleIOException.createIOException(exceptions);
2290 }
2291 }
2292
2293 private class ClientFinalizer implements Runnable {
2294 public synchronized void run() {
2295 try {
2296 closeAll(true);
2297 } catch (IOException e) {
2298 LOG.info("FileSystem.Cache.closeAll() threw an exception:\n" + e);
2299 }
2300 }
2301 }
2302
2303 synchronized void closeAll(UserGroupInformation ugi) throws IOException {
2304 List<FileSystem> targetFSList = new ArrayList<FileSystem>();
2305 //Make a pass over the list and collect the filesystems to close
2306 //we cannot close inline since close() removes the entry from the Map
2307 for (Map.Entry<Key, FileSystem> entry : map.entrySet()) {
2308 final Key key = entry.getKey();
2309 final FileSystem fs = entry.getValue();
2310 if (ugi.equals(key.ugi) && fs != null) {
2311 targetFSList.add(fs);
2312 }
2313 }
2314 List<IOException> exceptions = new ArrayList<IOException>();
2315 //now make a pass over the target list and close each
2316 for (FileSystem fs : targetFSList) {
2317 try {
2318 fs.close();
2319 }
2320 catch(IOException ioe) {
2321 exceptions.add(ioe);
2322 }
2323 }
2324 if (!exceptions.isEmpty()) {
2325 throw MultipleIOException.createIOException(exceptions);
2326 }
2327 }
2328
2329 /** FileSystem.Cache.Key */
2330 static class Key {
2331 final String scheme;
2332 final String authority;
2333 final UserGroupInformation ugi;
2334 final long unique; // an artificial way to make a key unique
2335
2336 Key(URI uri, Configuration conf) throws IOException {
2337 this(uri, conf, 0);
2338 }
2339
2340 Key(URI uri, Configuration conf, long unique) throws IOException {
2341 scheme = uri.getScheme()==null?"":uri.getScheme().toLowerCase();
2342 authority = uri.getAuthority()==null?"":uri.getAuthority().toLowerCase();
2343 this.unique = unique;
2344
2345 this.ugi = UserGroupInformation.getCurrentUser();
2346 }
2347
2348 /** {@inheritDoc} */
2349 public int hashCode() {
2350 return (scheme + authority).hashCode() + ugi.hashCode() + (int)unique;
2351 }
2352
2353 static boolean isEqual(Object a, Object b) {
2354 return a == b || (a != null && a.equals(b));
2355 }
2356
2357 /** {@inheritDoc} */
2358 public boolean equals(Object obj) {
2359 if (obj == this) {
2360 return true;
2361 }
2362 if (obj != null && obj instanceof Key) {
2363 Key that = (Key)obj;
2364 return isEqual(this.scheme, that.scheme)
2365 && isEqual(this.authority, that.authority)
2366 && isEqual(this.ugi, that.ugi)
2367 && (this.unique == that.unique);
2368 }
2369 return false;
2370 }
2371
2372 /** {@inheritDoc} */
2373 public String toString() {
2374 return "("+ugi.toString() + ")@" + scheme + "://" + authority;
2375 }
2376 }
2377 }
2378
2379 public static final class Statistics {
2380 private final String scheme;
2381 private AtomicLong bytesRead = new AtomicLong();
2382 private AtomicLong bytesWritten = new AtomicLong();
2383 private AtomicInteger readOps = new AtomicInteger();
2384 private AtomicInteger largeReadOps = new AtomicInteger();
2385 private AtomicInteger writeOps = new AtomicInteger();
2386
2387 public Statistics(String scheme) {
2388 this.scheme = scheme;
2389 }
2390
2391 /**
2392 * Copy constructor.
2393 *
2394 * @param st
2395 * The input Statistics object which is cloned.
2396 */
2397 public Statistics(Statistics st) {
2398 this.scheme = st.scheme;
2399 this.bytesRead = new AtomicLong(st.bytesRead.longValue());
2400 this.bytesWritten = new AtomicLong(st.bytesWritten.longValue());
2401 }
2402
2403 /**
2404 * Increment the bytes read in the statistics
2405 * @param newBytes the additional bytes read
2406 */
2407 public void incrementBytesRead(long newBytes) {
2408 bytesRead.getAndAdd(newBytes);
2409 }
2410
2411 /**
2412 * Increment the bytes written in the statistics
2413 * @param newBytes the additional bytes written
2414 */
2415 public void incrementBytesWritten(long newBytes) {
2416 bytesWritten.getAndAdd(newBytes);
2417 }
2418
2419 /**
2420 * Increment the number of read operations
2421 * @param count number of read operations
2422 */
2423 public void incrementReadOps(int count) {
2424 readOps.getAndAdd(count);
2425 }
2426
2427 /**
2428 * Increment the number of large read operations
2429 * @param count number of large read operations
2430 */
2431 public void incrementLargeReadOps(int count) {
2432 largeReadOps.getAndAdd(count);
2433 }
2434
2435 /**
2436 * Increment the number of write operations
2437 * @param count number of write operations
2438 */
2439 public void incrementWriteOps(int count) {
2440 writeOps.getAndAdd(count);
2441 }
2442
2443 /**
2444 * Get the total number of bytes read
2445 * @return the number of bytes
2446 */
2447 public long getBytesRead() {
2448 return bytesRead.get();
2449 }
2450
2451 /**
2452 * Get the total number of bytes written
2453 * @return the number of bytes
2454 */
2455 public long getBytesWritten() {
2456 return bytesWritten.get();
2457 }
2458
2459 /**
2460 * Get the number of file system read operations such as list files
2461 * @return number of read operations
2462 */
2463 public int getReadOps() {
2464 return readOps.get() + largeReadOps.get();
2465 }
2466
2467 /**
2468 * Get the number of large file system read operations such as list files
2469 * under a large directory
2470 * @return number of large read operations
2471 */
2472 public int getLargeReadOps() {
2473 return largeReadOps.get();
2474 }
2475
2476 /**
2477 * Get the number of file system write operations such as create, append
2478 * rename etc.
2479 * @return number of write operations
2480 */
2481 public int getWriteOps() {
2482 return writeOps.get();
2483 }
2484
2485 public String toString() {
2486 return bytesRead + " bytes read, " + bytesWritten + " bytes written, "
2487 + readOps + " read ops, " + largeReadOps + " large read ops, "
2488 + writeOps + " write ops";
2489 }
2490
2491 /**
2492 * Reset the counts of bytes to 0.
2493 */
2494 public void reset() {
2495 bytesWritten.set(0);
2496 bytesRead.set(0);
2497 }
2498
2499 /**
2500 * Get the uri scheme associated with this statistics object.
2501 * @return the schema associated with this set of statistics
2502 */
2503 public String getScheme() {
2504 return scheme;
2505 }
2506 }
2507
2508 /**
2509 * Get the Map of Statistics object indexed by URI Scheme.
2510 * @return a Map having a key as URI scheme and value as Statistics object
2511 * @deprecated use {@link #getAllStatistics} instead
2512 */
2513 @Deprecated
2514 public static synchronized Map<String, Statistics> getStatistics() {
2515 Map<String, Statistics> result = new HashMap<String, Statistics>();
2516 for(Statistics stat: statisticsTable.values()) {
2517 result.put(stat.getScheme(), stat);
2518 }
2519 return result;
2520 }
2521
2522 /**
2523 * Return the FileSystem classes that have Statistics
2524 */
2525 public static synchronized List<Statistics> getAllStatistics() {
2526 return new ArrayList<Statistics>(statisticsTable.values());
2527 }
2528
2529 /**
2530 * Get the statistics for a particular file system
2531 * @param cls the class to lookup
2532 * @return a statistics object
2533 */
2534 public static synchronized
2535 Statistics getStatistics(String scheme, Class<? extends FileSystem> cls) {
2536 Statistics result = statisticsTable.get(cls);
2537 if (result == null) {
2538 result = new Statistics(scheme);
2539 statisticsTable.put(cls, result);
2540 }
2541 return result;
2542 }
2543
2544 /**
2545 * Reset all statistics for all file systems
2546 */
2547 public static synchronized void clearStatistics() {
2548 for(Statistics stat: statisticsTable.values()) {
2549 stat.reset();
2550 }
2551 }
2552
2553 /**
2554 * Print all statistics for all file systems
2555 */
2556 public static synchronized
2557 void printStatistics() throws IOException {
2558 for (Map.Entry<Class<? extends FileSystem>, Statistics> pair:
2559 statisticsTable.entrySet()) {
2560 System.out.println(" FileSystem " + pair.getKey().getName() +
2561 ": " + pair.getValue());
2562 }
2563 }
2564 }