001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.fs;
019
020 import java.io.FileNotFoundException;
021 import java.io.IOException;
022 import java.io.InputStream;
023 import java.io.OutputStream;
024 import java.net.URI;
025 import java.security.PrivilegedExceptionAction;
026 import java.util.ArrayList;
027 import java.util.Arrays;
028 import java.util.EnumSet;
029 import java.util.HashSet;
030 import java.util.IdentityHashMap;
031 import java.util.List;
032 import java.util.Map;
033 import java.util.Set;
034 import java.util.Stack;
035 import java.util.TreeSet;
036 import java.util.Map.Entry;
037
038 import org.apache.commons.logging.Log;
039 import org.apache.commons.logging.LogFactory;
040 import org.apache.hadoop.HadoopIllegalArgumentException;
041 import org.apache.hadoop.classification.InterfaceAudience;
042 import org.apache.hadoop.classification.InterfaceStability;
043 import org.apache.hadoop.conf.Configuration;
044 import org.apache.hadoop.fs.FileSystem.Statistics;
045 import org.apache.hadoop.fs.Options.CreateOpts;
046 import org.apache.hadoop.fs.permission.FsPermission;
047 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
048 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT;
049 import org.apache.hadoop.io.IOUtils;
050 import org.apache.hadoop.ipc.RpcClientException;
051 import org.apache.hadoop.ipc.RpcServerException;
052 import org.apache.hadoop.ipc.UnexpectedServerException;
053 import org.apache.hadoop.fs.InvalidPathException;
054 import org.apache.hadoop.security.AccessControlException;
055 import org.apache.hadoop.security.UserGroupInformation;
056 import org.apache.hadoop.security.token.Token;
057 import org.apache.hadoop.util.ShutdownHookManager;
058
059 /**
060 * The FileContext class provides an interface to the application writer for
061 * using the Hadoop file system.
062 * It provides a set of methods for the usual operation: create, open,
063 * list, etc
064 *
065 * <p>
066 * <b> *** Path Names *** </b>
067 * <p>
068 *
069 * The Hadoop file system supports a URI name space and URI names.
070 * It offers a forest of file systems that can be referenced using fully
071 * qualified URIs.
072 * Two common Hadoop file systems implementations are
073 * <ul>
074 * <li> the local file system: file:///path
075 * <li> the hdfs file system hdfs://nnAddress:nnPort/path
076 * </ul>
077 *
078 * While URI names are very flexible, it requires knowing the name or address
079 * of the server. For convenience one often wants to access the default system
080 * in one's environment without knowing its name/address. This has an
081 * additional benefit that it allows one to change one's default fs
082 * (e.g. admin moves application from cluster1 to cluster2).
083 * <p>
084 *
085 * To facilitate this, Hadoop supports a notion of a default file system.
086 * The user can set his default file system, although this is
087 * typically set up for you in your environment via your default config.
088 * A default file system implies a default scheme and authority; slash-relative
089 * names (such as /for/bar) are resolved relative to that default FS.
090 * Similarly a user can also have working-directory-relative names (i.e. names
091 * not starting with a slash). While the working directory is generally in the
092 * same default FS, the wd can be in a different FS.
093 * <p>
094 * Hence Hadoop path names can be one of:
095 * <ul>
096 * <li> fully qualified URI: scheme://authority/path
097 * <li> slash relative names: /path relative to the default file system
098 * <li> wd-relative names: path relative to the working dir
099 * </ul>
100 * Relative paths with scheme (scheme:foo/bar) are illegal.
101 *
102 * <p>
103 * <b>****The Role of the FileContext and configuration defaults****</b>
104 * <p>
105 * The FileContext provides file namespace context for resolving file names;
106 * it also contains the umask for permissions, In that sense it is like the
107 * per-process file-related state in Unix system.
108 * These two properties
109 * <ul>
110 * <li> default file system i.e your slash)
111 * <li> umask
112 * </ul>
113 * in general, are obtained from the default configuration file
114 * in your environment, (@see {@link Configuration}).
115 *
116 * No other configuration parameters are obtained from the default config as
117 * far as the file context layer is concerned. All file system instances
118 * (i.e. deployments of file systems) have default properties; we call these
119 * server side (SS) defaults. Operation like create allow one to select many
120 * properties: either pass them in as explicit parameters or use
121 * the SS properties.
122 * <p>
123 * The file system related SS defaults are
124 * <ul>
125 * <li> the home directory (default is "/user/userName")
126 * <li> the initial wd (only for local fs)
127 * <li> replication factor
128 * <li> block size
129 * <li> buffer size
130 * <li> encryptDataTransfer
131 * <li> checksum option. (checksumType and bytesPerChecksum)
132 * </ul>
133 *
134 * <p>
135 * <b> *** Usage Model for the FileContext class *** </b>
136 * <p>
137 * Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
138 * Unspecified values come from core-defaults.xml in the release jar.
139 * <ul>
140 * <li> myFContext = FileContext.getFileContext(); // uses the default config
141 * // which has your default FS
142 * <li> myFContext.create(path, ...);
143 * <li> myFContext.setWorkingDir(path)
144 * <li> myFContext.open (path, ...);
145 * </ul>
146 * Example 2: Get a FileContext with a specific URI as the default FS
147 * <ul>
148 * <li> myFContext = FileContext.getFileContext(URI)
149 * <li> myFContext.create(path, ...);
150 * ...
151 * </ul>
152 * Example 3: FileContext with local file system as the default
153 * <ul>
154 * <li> myFContext = FileContext.getLocalFSFileContext()
155 * <li> myFContext.create(path, ...);
156 * <li> ...
157 * </ul>
158 * Example 4: Use a specific config, ignoring $HADOOP_CONFIG
159 * Generally you should not need use a config unless you are doing
160 * <ul>
161 * <li> configX = someConfigSomeOnePassedToYou.
162 * <li> myFContext = getFileContext(configX); // configX is not changed,
163 * // is passed down
164 * <li> myFContext.create(path, ...);
165 * <li>...
166 * </ul>
167 *
168 */
169
170 @InterfaceAudience.Public
171 @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */
172 public final class FileContext {
173
174 public static final Log LOG = LogFactory.getLog(FileContext.class);
175 /**
176 * Default permission for directory and symlink
177 * In previous versions, this default permission was also used to
178 * create files, so files created end up with ugo+x permission.
179 * See HADOOP-9155 for detail.
180 * Two new constants are added to solve this, please use
181 * {@link FileContext#DIR_DEFAULT_PERM} for directory, and use
182 * {@link FileContext#FILE_DEFAULT_PERM} for file.
183 * This constant is kept for compatibility.
184 */
185 public static final FsPermission DEFAULT_PERM = FsPermission.getDefault();
186 /**
187 * Default permission for directory
188 */
189 public static final FsPermission DIR_DEFAULT_PERM = FsPermission.getDirDefault();
190 /**
191 * Default permission for file
192 */
193 public static final FsPermission FILE_DEFAULT_PERM = FsPermission.getFileDefault();
194
195 /**
196 * Priority of the FileContext shutdown hook.
197 */
198 public static final int SHUTDOWN_HOOK_PRIORITY = 20;
199
200 /**
201 * List of files that should be deleted on JVM shutdown.
202 */
203 static final Map<FileContext, Set<Path>> DELETE_ON_EXIT =
204 new IdentityHashMap<FileContext, Set<Path>>();
205
206 /** JVM shutdown hook thread. */
207 static final FileContextFinalizer FINALIZER =
208 new FileContextFinalizer();
209
210 private static final PathFilter DEFAULT_FILTER = new PathFilter() {
211 public boolean accept(final Path file) {
212 return true;
213 }
214 };
215
216 /**
217 * The FileContext is defined by.
218 * 1) defaultFS (slash)
219 * 2) wd
220 * 3) umask
221 */
222 private final AbstractFileSystem defaultFS; //default FS for this FileContext.
223 private Path workingDir; // Fully qualified
224 private FsPermission umask;
225 private final Configuration conf;
226 private final UserGroupInformation ugi;
227
228 private FileContext(final AbstractFileSystem defFs,
229 final FsPermission theUmask, final Configuration aConf) {
230 defaultFS = defFs;
231 umask = FsPermission.getUMask(aConf);
232 conf = aConf;
233 try {
234 ugi = UserGroupInformation.getCurrentUser();
235 } catch (IOException e) {
236 LOG.error("Exception in getCurrentUser: ",e);
237 throw new RuntimeException("Failed to get the current user " +
238 "while creating a FileContext", e);
239 }
240 /*
241 * Init the wd.
242 * WorkingDir is implemented at the FileContext layer
243 * NOT at the AbstractFileSystem layer.
244 * If the DefaultFS, such as localFilesystem has a notion of
245 * builtin WD, we use that as the initial WD.
246 * Otherwise the WD is initialized to the home directory.
247 */
248 workingDir = defaultFS.getInitialWorkingDirectory();
249 if (workingDir == null) {
250 workingDir = defaultFS.getHomeDirectory();
251 }
252 util = new Util(); // for the inner class
253 }
254
255 /*
256 * Remove relative part - return "absolute":
257 * If input is relative path ("foo/bar") add wd: ie "/<workingDir>/foo/bar"
258 * A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path
259 * ("/foo/bar") are returned unchanged.
260 *
261 * Applications that use FileContext should use #makeQualified() since
262 * they really want a fully qualified URI.
263 * Hence this method is not called makeAbsolute() and
264 * has been deliberately declared private.
265 */
266 private Path fixRelativePart(Path p) {
267 if (p.isUriPathAbsolute()) {
268 return p;
269 } else {
270 return new Path(workingDir, p);
271 }
272 }
273
274 /**
275 * Delete all the paths that were marked as delete-on-exit.
276 */
277 static void processDeleteOnExit() {
278 synchronized (DELETE_ON_EXIT) {
279 Set<Entry<FileContext, Set<Path>>> set = DELETE_ON_EXIT.entrySet();
280 for (Entry<FileContext, Set<Path>> entry : set) {
281 FileContext fc = entry.getKey();
282 Set<Path> paths = entry.getValue();
283 for (Path path : paths) {
284 try {
285 fc.delete(path, true);
286 } catch (IOException e) {
287 LOG.warn("Ignoring failure to deleteOnExit for path " + path);
288 }
289 }
290 }
291 DELETE_ON_EXIT.clear();
292 }
293 }
294
295 /**
296 * Pathnames with scheme and relative path are illegal.
297 * @param path to be checked
298 */
299 private static void checkNotSchemeWithRelative(final Path path) {
300 if (path.toUri().isAbsolute() && !path.isUriPathAbsolute()) {
301 throw new HadoopIllegalArgumentException(
302 "Unsupported name: has scheme but relative path-part");
303 }
304 }
305
306 /**
307 * Get the file system of supplied path.
308 *
309 * @param absOrFqPath - absolute or fully qualified path
310 * @return the file system of the path
311 *
312 * @throws UnsupportedFileSystemException If the file system for
313 * <code>absOrFqPath</code> is not supported.
314 * @throws IOExcepton If the file system for <code>absOrFqPath</code> could
315 * not be instantiated.
316 */
317 private AbstractFileSystem getFSofPath(final Path absOrFqPath)
318 throws UnsupportedFileSystemException, IOException {
319 checkNotSchemeWithRelative(absOrFqPath);
320 if (!absOrFqPath.isAbsolute() && absOrFqPath.toUri().getScheme() == null) {
321 throw new HadoopIllegalArgumentException(
322 "FileContext Bug: path is relative");
323 }
324
325 try {
326 // Is it the default FS for this FileContext?
327 defaultFS.checkPath(absOrFqPath);
328 return defaultFS;
329 } catch (Exception e) { // it is different FileSystem
330 return getAbstractFileSystem(ugi, absOrFqPath.toUri(), conf);
331 }
332 }
333
334 private static AbstractFileSystem getAbstractFileSystem(
335 UserGroupInformation user, final URI uri, final Configuration conf)
336 throws UnsupportedFileSystemException, IOException {
337 try {
338 return user.doAs(new PrivilegedExceptionAction<AbstractFileSystem>() {
339 public AbstractFileSystem run() throws UnsupportedFileSystemException {
340 return AbstractFileSystem.get(uri, conf);
341 }
342 });
343 } catch (InterruptedException ex) {
344 LOG.error(ex);
345 throw new IOException("Failed to get the AbstractFileSystem for path: "
346 + uri, ex);
347 }
348 }
349
350 /**
351 * Protected Static Factory methods for getting a FileContexts
352 * that take a AbstractFileSystem as input. To be used for testing.
353 */
354
355 /**
356 * Create a FileContext with specified FS as default using the specified
357 * config.
358 *
359 * @param defFS
360 * @param aConf
361 * @return new FileContext with specifed FS as default.
362 */
363 public static FileContext getFileContext(final AbstractFileSystem defFS,
364 final Configuration aConf) {
365 return new FileContext(defFS, FsPermission.getUMask(aConf), aConf);
366 }
367
368 /**
369 * Create a FileContext for specified file system using the default config.
370 *
371 * @param defaultFS
372 * @return a FileContext with the specified AbstractFileSystem
373 * as the default FS.
374 */
375 protected static FileContext getFileContext(
376 final AbstractFileSystem defaultFS) {
377 return getFileContext(defaultFS, new Configuration());
378 }
379
380 /**
381 * Static Factory methods for getting a FileContext.
382 * Note new file contexts are created for each call.
383 * The only singleton is the local FS context using the default config.
384 *
385 * Methods that use the default config: the default config read from the
386 * $HADOOP_CONFIG/core.xml,
387 * Unspecified key-values for config are defaulted from core-defaults.xml
388 * in the release jar.
389 *
390 * The keys relevant to the FileContext layer are extracted at time of
391 * construction. Changes to the config after the call are ignore
392 * by the FileContext layer.
393 * The conf is passed to lower layers like AbstractFileSystem and HDFS which
394 * pick up their own config variables.
395 */
396
397 /**
398 * Create a FileContext using the default config read from the
399 * $HADOOP_CONFIG/core.xml, Unspecified key-values for config are defaulted
400 * from core-defaults.xml in the release jar.
401 *
402 * @throws UnsupportedFileSystemException If the file system from the default
403 * configuration is not supported
404 */
405 public static FileContext getFileContext()
406 throws UnsupportedFileSystemException {
407 return getFileContext(new Configuration());
408 }
409
410 /**
411 * @return a FileContext for the local file system using the default config.
412 * @throws UnsupportedFileSystemException If the file system for
413 * {@link FsConstants#LOCAL_FS_URI} is not supported.
414 */
415 public static FileContext getLocalFSFileContext()
416 throws UnsupportedFileSystemException {
417 return getFileContext(FsConstants.LOCAL_FS_URI);
418 }
419
420 /**
421 * Create a FileContext for specified URI using the default config.
422 *
423 * @param defaultFsUri
424 * @return a FileContext with the specified URI as the default FS.
425 *
426 * @throws UnsupportedFileSystemException If the file system for
427 * <code>defaultFsUri</code> is not supported
428 */
429 public static FileContext getFileContext(final URI defaultFsUri)
430 throws UnsupportedFileSystemException {
431 return getFileContext(defaultFsUri, new Configuration());
432 }
433
434 /**
435 * Create a FileContext for specified default URI using the specified config.
436 *
437 * @param defaultFsUri
438 * @param aConf
439 * @return new FileContext for specified uri
440 * @throws UnsupportedFileSystemException If the file system with specified is
441 * not supported
442 * @throws RuntimeException If the file system specified is supported but
443 * could not be instantiated, or if login fails.
444 */
445 public static FileContext getFileContext(final URI defaultFsUri,
446 final Configuration aConf) throws UnsupportedFileSystemException {
447 UserGroupInformation currentUser = null;
448 AbstractFileSystem defaultAfs = null;
449 try {
450 currentUser = UserGroupInformation.getCurrentUser();
451 defaultAfs = getAbstractFileSystem(currentUser, defaultFsUri, aConf);
452 } catch (UnsupportedFileSystemException ex) {
453 throw ex;
454 } catch (IOException ex) {
455 LOG.error(ex);
456 throw new RuntimeException(ex);
457 }
458 return getFileContext(defaultAfs, aConf);
459 }
460
461 /**
462 * Create a FileContext using the passed config. Generally it is better to use
463 * {@link #getFileContext(URI, Configuration)} instead of this one.
464 *
465 *
466 * @param aConf
467 * @return new FileContext
468 * @throws UnsupportedFileSystemException If file system in the config
469 * is not supported
470 */
471 public static FileContext getFileContext(final Configuration aConf)
472 throws UnsupportedFileSystemException {
473 return getFileContext(
474 URI.create(aConf.get(FS_DEFAULT_NAME_KEY, FS_DEFAULT_NAME_DEFAULT)),
475 aConf);
476 }
477
478 /**
479 * @param aConf - from which the FileContext is configured
480 * @return a FileContext for the local file system using the specified config.
481 *
482 * @throws UnsupportedFileSystemException If default file system in the config
483 * is not supported
484 *
485 */
486 public static FileContext getLocalFSFileContext(final Configuration aConf)
487 throws UnsupportedFileSystemException {
488 return getFileContext(FsConstants.LOCAL_FS_URI, aConf);
489 }
490
491 /* This method is needed for tests. */
492 @InterfaceAudience.Private
493 @InterfaceStability.Unstable /* return type will change to AFS once
494 HADOOP-6223 is completed */
495 public AbstractFileSystem getDefaultFileSystem() {
496 return defaultFS;
497 }
498
499 /**
500 * Set the working directory for wd-relative names (such a "foo/bar"). Working
501 * directory feature is provided by simply prefixing relative names with the
502 * working dir. Note this is different from Unix where the wd is actually set
503 * to the inode. Hence setWorkingDir does not follow symlinks etc. This works
504 * better in a distributed environment that has multiple independent roots.
505 * {@link #getWorkingDirectory()} should return what setWorkingDir() set.
506 *
507 * @param newWDir new working directory
508 * @throws IOException
509 * <br>
510 * NewWdir can be one of:
511 * <ul>
512 * <li>relative path: "foo/bar";</li>
513 * <li>absolute without scheme: "/foo/bar"</li>
514 * <li>fully qualified with scheme: "xx://auth/foo/bar"</li>
515 * </ul>
516 * <br>
517 * Illegal WDs:
518 * <ul>
519 * <li>relative with scheme: "xx:foo/bar"</li>
520 * <li>non existent directory</li>
521 * </ul>
522 */
523 public void setWorkingDirectory(final Path newWDir) throws IOException {
524 checkNotSchemeWithRelative(newWDir);
525 /* wd is stored as a fully qualified path. We check if the given
526 * path is not relative first since resolve requires and returns
527 * an absolute path.
528 */
529 final Path newWorkingDir = new Path(workingDir, newWDir);
530 FileStatus status = getFileStatus(newWorkingDir);
531 if (status.isFile()) {
532 throw new FileNotFoundException("Cannot setWD to a file");
533 }
534 workingDir = newWorkingDir;
535 }
536
537 /**
538 * Gets the working directory for wd-relative names (such a "foo/bar").
539 */
540 public Path getWorkingDirectory() {
541 return workingDir;
542 }
543
544 /**
545 * Gets the ugi in the file-context
546 * @return UserGroupInformation
547 */
548 public UserGroupInformation getUgi() {
549 return ugi;
550 }
551
552 /**
553 * Return the current user's home directory in this file system.
554 * The default implementation returns "/user/$USER/".
555 * @return the home directory
556 */
557 public Path getHomeDirectory() {
558 return defaultFS.getHomeDirectory();
559 }
560
561 /**
562 *
563 * @return the umask of this FileContext
564 */
565 public FsPermission getUMask() {
566 return umask;
567 }
568
569 /**
570 * Set umask to the supplied parameter.
571 * @param newUmask the new umask
572 */
573 public void setUMask(final FsPermission newUmask) {
574 umask = newUmask;
575 }
576
577
578 /**
579 * Resolve the path following any symlinks or mount points
580 * @param f to be resolved
581 * @return fully qualified resolved path
582 *
583 * @throws FileNotFoundException If <code>f</code> does not exist
584 * @throws AccessControlException if access denied
585 * @throws IOException If an IO Error occurred
586 *
587 * Exceptions applicable to file systems accessed over RPC:
588 * @throws RpcClientException If an exception occurred in the RPC client
589 * @throws RpcServerException If an exception occurred in the RPC server
590 * @throws UnexpectedServerException If server implementation throws
591 * undeclared exception to RPC server
592 *
593 * RuntimeExceptions:
594 * @throws InvalidPathException If path <code>f</code> is not valid
595 */
596 public Path resolvePath(final Path f) throws FileNotFoundException,
597 UnresolvedLinkException, AccessControlException, IOException {
598 return resolve(f);
599 }
600
601 /**
602 * Make the path fully qualified if it is isn't.
603 * A Fully-qualified path has scheme and authority specified and an absolute
604 * path.
605 * Use the default file system and working dir in this FileContext to qualify.
606 * @param path
607 * @return qualified path
608 */
609 public Path makeQualified(final Path path) {
610 return path.makeQualified(defaultFS.getUri(), getWorkingDirectory());
611 }
612
613 /**
614 * Create or overwrite file on indicated path and returns an output stream for
615 * writing into the file.
616 *
617 * @param f the file name to open
618 * @param createFlag gives the semantics of create; see {@link CreateFlag}
619 * @param opts file creation options; see {@link Options.CreateOpts}.
620 * <ul>
621 * <li>Progress - to report progress on the operation - default null
622 * <li>Permission - umask is applied against permisssion: default is
623 * FsPermissions:getDefault()
624 *
625 * <li>CreateParent - create missing parent path; default is to not
626 * to create parents
627 * <li>The defaults for the following are SS defaults of the file
628 * server implementing the target path. Not all parameters make sense
629 * for all kinds of file system - eg. localFS ignores Blocksize,
630 * replication, checksum
631 * <ul>
632 * <li>BufferSize - buffersize used in FSDataOutputStream
633 * <li>Blocksize - block size for file blocks
634 * <li>ReplicationFactor - replication for blocks
635 * <li>ChecksumParam - Checksum parameters. server default is used
636 * if not specified.
637 * </ul>
638 * </ul>
639 *
640 * @return {@link FSDataOutputStream} for created file
641 *
642 * @throws AccessControlException If access is denied
643 * @throws FileAlreadyExistsException If file <code>f</code> already exists
644 * @throws FileNotFoundException If parent of <code>f</code> does not exist
645 * and <code>createParent</code> is false
646 * @throws ParentNotDirectoryException If parent of <code>f</code> is not a
647 * directory.
648 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
649 * not supported
650 * @throws IOException If an I/O error occurred
651 *
652 * Exceptions applicable to file systems accessed over RPC:
653 * @throws RpcClientException If an exception occurred in the RPC client
654 * @throws RpcServerException If an exception occurred in the RPC server
655 * @throws UnexpectedServerException If server implementation throws
656 * undeclared exception to RPC server
657 *
658 * RuntimeExceptions:
659 * @throws InvalidPathException If path <code>f</code> is not valid
660 */
661 public FSDataOutputStream create(final Path f,
662 final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts)
663 throws AccessControlException, FileAlreadyExistsException,
664 FileNotFoundException, ParentNotDirectoryException,
665 UnsupportedFileSystemException, IOException {
666 Path absF = fixRelativePart(f);
667
668 // If one of the options is a permission, extract it & apply umask
669 // If not, add a default Perms and apply umask;
670 // AbstractFileSystem#create
671
672 CreateOpts.Perms permOpt =
673 (CreateOpts.Perms) CreateOpts.getOpt(CreateOpts.Perms.class, opts);
674 FsPermission permission = (permOpt != null) ? permOpt.getValue() :
675 FILE_DEFAULT_PERM;
676 permission = permission.applyUMask(umask);
677
678 final CreateOpts[] updatedOpts =
679 CreateOpts.setOpt(CreateOpts.perms(permission), opts);
680 return new FSLinkResolver<FSDataOutputStream>() {
681 public FSDataOutputStream next(final AbstractFileSystem fs, final Path p)
682 throws IOException {
683 return fs.create(p, createFlag, updatedOpts);
684 }
685 }.resolve(this, absF);
686 }
687
688 /**
689 * Make(create) a directory and all the non-existent parents.
690 *
691 * @param dir - the dir to make
692 * @param permission - permissions is set permission&~umask
693 * @param createParent - if true then missing parent dirs are created if false
694 * then parent must exist
695 *
696 * @throws AccessControlException If access is denied
697 * @throws FileAlreadyExistsException If directory <code>dir</code> already
698 * exists
699 * @throws FileNotFoundException If parent of <code>dir</code> does not exist
700 * and <code>createParent</code> is false
701 * @throws ParentNotDirectoryException If parent of <code>dir</code> is not a
702 * directory
703 * @throws UnsupportedFileSystemException If file system for <code>dir</code>
704 * is not supported
705 * @throws IOException If an I/O error occurred
706 *
707 * Exceptions applicable to file systems accessed over RPC:
708 * @throws RpcClientException If an exception occurred in the RPC client
709 * @throws UnexpectedServerException If server implementation throws
710 * undeclared exception to RPC server
711 *
712 * RuntimeExceptions:
713 * @throws InvalidPathException If path <code>dir</code> is not valid
714 */
715 public void mkdir(final Path dir, final FsPermission permission,
716 final boolean createParent) throws AccessControlException,
717 FileAlreadyExistsException, FileNotFoundException,
718 ParentNotDirectoryException, UnsupportedFileSystemException,
719 IOException {
720 final Path absDir = fixRelativePart(dir);
721 final FsPermission absFerms = (permission == null ?
722 FsPermission.getDirDefault() : permission).applyUMask(umask);
723 new FSLinkResolver<Void>() {
724 public Void next(final AbstractFileSystem fs, final Path p)
725 throws IOException, UnresolvedLinkException {
726 fs.mkdir(p, absFerms, createParent);
727 return null;
728 }
729 }.resolve(this, absDir);
730 }
731
732 /**
733 * Delete a file.
734 * @param f the path to delete.
735 * @param recursive if path is a directory and set to
736 * true, the directory is deleted else throws an exception. In
737 * case of a file the recursive can be set to either true or false.
738 *
739 * @throws AccessControlException If access is denied
740 * @throws FileNotFoundException If <code>f</code> does not exist
741 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
742 * not supported
743 * @throws IOException If an I/O error occurred
744 *
745 * Exceptions applicable to file systems accessed over RPC:
746 * @throws RpcClientException If an exception occurred in the RPC client
747 * @throws RpcServerException If an exception occurred in the RPC server
748 * @throws UnexpectedServerException If server implementation throws
749 * undeclared exception to RPC server
750 *
751 * RuntimeExceptions:
752 * @throws InvalidPathException If path <code>f</code> is invalid
753 */
754 public boolean delete(final Path f, final boolean recursive)
755 throws AccessControlException, FileNotFoundException,
756 UnsupportedFileSystemException, IOException {
757 Path absF = fixRelativePart(f);
758 return new FSLinkResolver<Boolean>() {
759 public Boolean next(final AbstractFileSystem fs, final Path p)
760 throws IOException, UnresolvedLinkException {
761 return Boolean.valueOf(fs.delete(p, recursive));
762 }
763 }.resolve(this, absF);
764 }
765
766 /**
767 * Opens an FSDataInputStream at the indicated Path using
768 * default buffersize.
769 * @param f the file name to open
770 *
771 * @throws AccessControlException If access is denied
772 * @throws FileNotFoundException If file <code>f</code> does not exist
773 * @throws UnsupportedFileSystemException If file system for <code>f</code>
774 * is not supported
775 * @throws IOException If an I/O error occurred
776 *
777 * Exceptions applicable to file systems accessed over RPC:
778 * @throws RpcClientException If an exception occurred in the RPC client
779 * @throws RpcServerException If an exception occurred in the RPC server
780 * @throws UnexpectedServerException If server implementation throws
781 * undeclared exception to RPC server
782 */
783 public FSDataInputStream open(final Path f) throws AccessControlException,
784 FileNotFoundException, UnsupportedFileSystemException, IOException {
785 final Path absF = fixRelativePart(f);
786 return new FSLinkResolver<FSDataInputStream>() {
787 public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
788 throws IOException, UnresolvedLinkException {
789 return fs.open(p);
790 }
791 }.resolve(this, absF);
792 }
793
794 /**
795 * Opens an FSDataInputStream at the indicated Path.
796 *
797 * @param f the file name to open
798 * @param bufferSize the size of the buffer to be used.
799 *
800 * @throws AccessControlException If access is denied
801 * @throws FileNotFoundException If file <code>f</code> does not exist
802 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
803 * not supported
804 * @throws IOException If an I/O error occurred
805 *
806 * Exceptions applicable to file systems accessed over RPC:
807 * @throws RpcClientException If an exception occurred in the RPC client
808 * @throws RpcServerException If an exception occurred in the RPC server
809 * @throws UnexpectedServerException If server implementation throws
810 * undeclared exception to RPC server
811 */
812 public FSDataInputStream open(final Path f, final int bufferSize)
813 throws AccessControlException, FileNotFoundException,
814 UnsupportedFileSystemException, IOException {
815 final Path absF = fixRelativePart(f);
816 return new FSLinkResolver<FSDataInputStream>() {
817 public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
818 throws IOException, UnresolvedLinkException {
819 return fs.open(p, bufferSize);
820 }
821 }.resolve(this, absF);
822 }
823
824 /**
825 * Set replication for an existing file.
826 *
827 * @param f file name
828 * @param replication new replication
829 *
830 * @return true if successful
831 *
832 * @throws AccessControlException If access is denied
833 * @throws FileNotFoundException If file <code>f</code> does not exist
834 * @throws IOException If an I/O error occurred
835 *
836 * Exceptions applicable to file systems accessed over RPC:
837 * @throws RpcClientException If an exception occurred in the RPC client
838 * @throws RpcServerException If an exception occurred in the RPC server
839 * @throws UnexpectedServerException If server implementation throws
840 * undeclared exception to RPC server
841 */
842 public boolean setReplication(final Path f, final short replication)
843 throws AccessControlException, FileNotFoundException,
844 IOException {
845 final Path absF = fixRelativePart(f);
846 return new FSLinkResolver<Boolean>() {
847 public Boolean next(final AbstractFileSystem fs, final Path p)
848 throws IOException, UnresolvedLinkException {
849 return Boolean.valueOf(fs.setReplication(p, replication));
850 }
851 }.resolve(this, absF);
852 }
853
854 /**
855 * Renames Path src to Path dst
856 * <ul>
857 * <li
858 * <li>Fails if src is a file and dst is a directory.
859 * <li>Fails if src is a directory and dst is a file.
860 * <li>Fails if the parent of dst does not exist or is a file.
861 * </ul>
862 * <p>
863 * If OVERWRITE option is not passed as an argument, rename fails if the dst
864 * already exists.
865 * <p>
866 * If OVERWRITE option is passed as an argument, rename overwrites the dst if
867 * it is a file or an empty directory. Rename fails if dst is a non-empty
868 * directory.
869 * <p>
870 * Note that atomicity of rename is dependent on the file system
871 * implementation. Please refer to the file system documentation for details
872 * <p>
873 *
874 * @param src path to be renamed
875 * @param dst new path after rename
876 *
877 * @throws AccessControlException If access is denied
878 * @throws FileAlreadyExistsException If <code>dst</code> already exists and
879 * <code>options</options> has {@link Options.Rename#OVERWRITE}
880 * option false.
881 * @throws FileNotFoundException If <code>src</code> does not exist
882 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not a
883 * directory
884 * @throws UnsupportedFileSystemException If file system for <code>src</code>
885 * and <code>dst</code> is not supported
886 * @throws IOException If an I/O error occurred
887 *
888 * Exceptions applicable to file systems accessed over RPC:
889 * @throws RpcClientException If an exception occurred in the RPC client
890 * @throws RpcServerException If an exception occurred in the RPC server
891 * @throws UnexpectedServerException If server implementation throws
892 * undeclared exception to RPC server
893 */
894 public void rename(final Path src, final Path dst,
895 final Options.Rename... options) throws AccessControlException,
896 FileAlreadyExistsException, FileNotFoundException,
897 ParentNotDirectoryException, UnsupportedFileSystemException,
898 IOException {
899 final Path absSrc = fixRelativePart(src);
900 final Path absDst = fixRelativePart(dst);
901 AbstractFileSystem srcFS = getFSofPath(absSrc);
902 AbstractFileSystem dstFS = getFSofPath(absDst);
903 if(!srcFS.getUri().equals(dstFS.getUri())) {
904 throw new IOException("Renames across AbstractFileSystems not supported");
905 }
906 try {
907 srcFS.rename(absSrc, absDst, options);
908 } catch (UnresolvedLinkException e) {
909 /* We do not know whether the source or the destination path
910 * was unresolved. Resolve the source path up until the final
911 * path component, then fully resolve the destination.
912 */
913 final Path source = resolveIntermediate(absSrc);
914 new FSLinkResolver<Void>() {
915 public Void next(final AbstractFileSystem fs, final Path p)
916 throws IOException, UnresolvedLinkException {
917 fs.rename(source, p, options);
918 return null;
919 }
920 }.resolve(this, absDst);
921 }
922 }
923
924 /**
925 * Set permission of a path.
926 * @param f
927 * @param permission - the new absolute permission (umask is not applied)
928 *
929 * @throws AccessControlException If access is denied
930 * @throws FileNotFoundException If <code>f</code> does not exist
931 * @throws UnsupportedFileSystemException If file system for <code>f</code>
932 * is not supported
933 * @throws IOException If an I/O error occurred
934 *
935 * Exceptions applicable to file systems accessed over RPC:
936 * @throws RpcClientException If an exception occurred in the RPC client
937 * @throws RpcServerException If an exception occurred in the RPC server
938 * @throws UnexpectedServerException If server implementation throws
939 * undeclared exception to RPC server
940 */
941 public void setPermission(final Path f, final FsPermission permission)
942 throws AccessControlException, FileNotFoundException,
943 UnsupportedFileSystemException, IOException {
944 final Path absF = fixRelativePart(f);
945 new FSLinkResolver<Void>() {
946 public Void next(final AbstractFileSystem fs, final Path p)
947 throws IOException, UnresolvedLinkException {
948 fs.setPermission(p, permission);
949 return null;
950 }
951 }.resolve(this, absF);
952 }
953
954 /**
955 * Set owner of a path (i.e. a file or a directory). The parameters username
956 * and groupname cannot both be null.
957 *
958 * @param f The path
959 * @param username If it is null, the original username remains unchanged.
960 * @param groupname If it is null, the original groupname remains unchanged.
961 *
962 * @throws AccessControlException If access is denied
963 * @throws FileNotFoundException If <code>f</code> does not exist
964 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
965 * not supported
966 * @throws IOException If an I/O error occurred
967 *
968 * Exceptions applicable to file systems accessed over RPC:
969 * @throws RpcClientException If an exception occurred in the RPC client
970 * @throws RpcServerException If an exception occurred in the RPC server
971 * @throws UnexpectedServerException If server implementation throws
972 * undeclared exception to RPC server
973 *
974 * RuntimeExceptions:
975 * @throws HadoopIllegalArgumentException If <code>username</code> or
976 * <code>groupname</code> is invalid.
977 */
978 public void setOwner(final Path f, final String username,
979 final String groupname) throws AccessControlException,
980 UnsupportedFileSystemException, FileNotFoundException,
981 IOException {
982 if ((username == null) && (groupname == null)) {
983 throw new HadoopIllegalArgumentException(
984 "username and groupname cannot both be null");
985 }
986 final Path absF = fixRelativePart(f);
987 new FSLinkResolver<Void>() {
988 public Void next(final AbstractFileSystem fs, final Path p)
989 throws IOException, UnresolvedLinkException {
990 fs.setOwner(p, username, groupname);
991 return null;
992 }
993 }.resolve(this, absF);
994 }
995
996 /**
997 * Set access time of a file.
998 * @param f The path
999 * @param mtime Set the modification time of this file.
1000 * The number of milliseconds since epoch (Jan 1, 1970).
1001 * A value of -1 means that this call should not set modification time.
1002 * @param atime Set the access time of this file.
1003 * The number of milliseconds since Jan 1, 1970.
1004 * A value of -1 means that this call should not set access time.
1005 *
1006 * @throws AccessControlException If access is denied
1007 * @throws FileNotFoundException If <code>f</code> does not exist
1008 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1009 * not supported
1010 * @throws IOException If an I/O error occurred
1011 *
1012 * Exceptions applicable to file systems accessed over RPC:
1013 * @throws RpcClientException If an exception occurred in the RPC client
1014 * @throws RpcServerException If an exception occurred in the RPC server
1015 * @throws UnexpectedServerException If server implementation throws
1016 * undeclared exception to RPC server
1017 */
1018 public void setTimes(final Path f, final long mtime, final long atime)
1019 throws AccessControlException, FileNotFoundException,
1020 UnsupportedFileSystemException, IOException {
1021 final Path absF = fixRelativePart(f);
1022 new FSLinkResolver<Void>() {
1023 public Void next(final AbstractFileSystem fs, final Path p)
1024 throws IOException, UnresolvedLinkException {
1025 fs.setTimes(p, mtime, atime);
1026 return null;
1027 }
1028 }.resolve(this, absF);
1029 }
1030
1031 /**
1032 * Get the checksum of a file.
1033 *
1034 * @param f file path
1035 *
1036 * @return The file checksum. The default return value is null,
1037 * which indicates that no checksum algorithm is implemented
1038 * in the corresponding FileSystem.
1039 *
1040 * @throws AccessControlException If access is denied
1041 * @throws FileNotFoundException If <code>f</code> does not exist
1042 * @throws IOException If an I/O error occurred
1043 *
1044 * Exceptions applicable to file systems accessed over RPC:
1045 * @throws RpcClientException If an exception occurred in the RPC client
1046 * @throws RpcServerException If an exception occurred in the RPC server
1047 * @throws UnexpectedServerException If server implementation throws
1048 * undeclared exception to RPC server
1049 */
1050 public FileChecksum getFileChecksum(final Path f)
1051 throws AccessControlException, FileNotFoundException,
1052 IOException {
1053 final Path absF = fixRelativePart(f);
1054 return new FSLinkResolver<FileChecksum>() {
1055 public FileChecksum next(final AbstractFileSystem fs, final Path p)
1056 throws IOException, UnresolvedLinkException {
1057 return fs.getFileChecksum(p);
1058 }
1059 }.resolve(this, absF);
1060 }
1061
1062 /**
1063 * Set the verify checksum flag for the file system denoted by the path.
1064 * This is only applicable if the
1065 * corresponding FileSystem supports checksum. By default doesn't do anything.
1066 * @param verifyChecksum
1067 * @param f set the verifyChecksum for the Filesystem containing this path
1068 *
1069 * @throws AccessControlException If access is denied
1070 * @throws FileNotFoundException If <code>f</code> does not exist
1071 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1072 * not supported
1073 * @throws IOException If an I/O error occurred
1074 *
1075 * Exceptions applicable to file systems accessed over RPC:
1076 * @throws RpcClientException If an exception occurred in the RPC client
1077 * @throws RpcServerException If an exception occurred in the RPC server
1078 * @throws UnexpectedServerException If server implementation throws
1079 * undeclared exception to RPC server
1080 */
1081 public void setVerifyChecksum(final boolean verifyChecksum, final Path f)
1082 throws AccessControlException, FileNotFoundException,
1083 UnsupportedFileSystemException, IOException {
1084 final Path absF = resolve(fixRelativePart(f));
1085 getFSofPath(absF).setVerifyChecksum(verifyChecksum);
1086 }
1087
1088 /**
1089 * Return a file status object that represents the path.
1090 * @param f The path we want information from
1091 *
1092 * @return a FileStatus object
1093 *
1094 * @throws AccessControlException If access is denied
1095 * @throws FileNotFoundException If <code>f</code> does not exist
1096 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1097 * not supported
1098 * @throws IOException If an I/O error occurred
1099 *
1100 * Exceptions applicable to file systems accessed over RPC:
1101 * @throws RpcClientException If an exception occurred in the RPC client
1102 * @throws RpcServerException If an exception occurred in the RPC server
1103 * @throws UnexpectedServerException If server implementation throws
1104 * undeclared exception to RPC server
1105 */
1106 public FileStatus getFileStatus(final Path f) throws AccessControlException,
1107 FileNotFoundException, UnsupportedFileSystemException, IOException {
1108 final Path absF = fixRelativePart(f);
1109 return new FSLinkResolver<FileStatus>() {
1110 public FileStatus next(final AbstractFileSystem fs, final Path p)
1111 throws IOException, UnresolvedLinkException {
1112 return fs.getFileStatus(p);
1113 }
1114 }.resolve(this, absF);
1115 }
1116
1117 /**
1118 * Return a fully qualified version of the given symlink target if it
1119 * has no scheme and authority. Partially and fully qualified paths
1120 * are returned unmodified.
1121 * @param pathFS The AbstractFileSystem of the path
1122 * @param pathWithLink Path that contains the symlink
1123 * @param target The symlink's absolute target
1124 * @return Fully qualified version of the target.
1125 */
1126 private Path qualifySymlinkTarget(final AbstractFileSystem pathFS,
1127 Path pathWithLink, Path target) {
1128 // NB: makeQualified uses the target's scheme and authority, if
1129 // specified, and the scheme and authority of pathFS, if not.
1130 final String scheme = target.toUri().getScheme();
1131 final String auth = target.toUri().getAuthority();
1132 return (scheme == null && auth == null)
1133 ? target.makeQualified(pathFS.getUri(), pathWithLink.getParent())
1134 : target;
1135 }
1136
1137 /**
1138 * Return a file status object that represents the path. If the path
1139 * refers to a symlink then the FileStatus of the symlink is returned.
1140 * The behavior is equivalent to #getFileStatus() if the underlying
1141 * file system does not support symbolic links.
1142 * @param f The path we want information from.
1143 * @return A FileStatus object
1144 *
1145 * @throws AccessControlException If access is denied
1146 * @throws FileNotFoundException If <code>f</code> does not exist
1147 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1148 * not supported
1149 * @throws IOException If an I/O error occurred
1150 */
1151 public FileStatus getFileLinkStatus(final Path f)
1152 throws AccessControlException, FileNotFoundException,
1153 UnsupportedFileSystemException, IOException {
1154 final Path absF = fixRelativePart(f);
1155 return new FSLinkResolver<FileStatus>() {
1156 public FileStatus next(final AbstractFileSystem fs, final Path p)
1157 throws IOException, UnresolvedLinkException {
1158 FileStatus fi = fs.getFileLinkStatus(p);
1159 if (fi.isSymlink()) {
1160 fi.setSymlink(qualifySymlinkTarget(fs, p, fi.getSymlink()));
1161 }
1162 return fi;
1163 }
1164 }.resolve(this, absF);
1165 }
1166
1167 /**
1168 * Returns the target of the given symbolic link as it was specified
1169 * when the link was created. Links in the path leading up to the
1170 * final path component are resolved transparently.
1171 *
1172 * @param f the path to return the target of
1173 * @return The un-interpreted target of the symbolic link.
1174 *
1175 * @throws AccessControlException If access is denied
1176 * @throws FileNotFoundException If path <code>f</code> does not exist
1177 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1178 * not supported
1179 * @throws IOException If the given path does not refer to a symlink
1180 * or an I/O error occurred
1181 */
1182 public Path getLinkTarget(final Path f) throws AccessControlException,
1183 FileNotFoundException, UnsupportedFileSystemException, IOException {
1184 final Path absF = fixRelativePart(f);
1185 return new FSLinkResolver<Path>() {
1186 public Path next(final AbstractFileSystem fs, final Path p)
1187 throws IOException, UnresolvedLinkException {
1188 FileStatus fi = fs.getFileLinkStatus(p);
1189 return fi.getSymlink();
1190 }
1191 }.resolve(this, absF);
1192 }
1193
1194 /**
1195 * Return blockLocation of the given file for the given offset and len.
1196 * For a nonexistent file or regions, null will be returned.
1197 *
1198 * This call is most helpful with DFS, where it returns
1199 * hostnames of machines that contain the given file.
1200 *
1201 * @param f - get blocklocations of this file
1202 * @param start position (byte offset)
1203 * @param len (in bytes)
1204 *
1205 * @return block locations for given file at specified offset of len
1206 *
1207 * @throws AccessControlException If access is denied
1208 * @throws FileNotFoundException If <code>f</code> does not exist
1209 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1210 * not supported
1211 * @throws IOException If an I/O error occurred
1212 *
1213 * Exceptions applicable to file systems accessed over RPC:
1214 * @throws RpcClientException If an exception occurred in the RPC client
1215 * @throws RpcServerException If an exception occurred in the RPC server
1216 * @throws UnexpectedServerException If server implementation throws
1217 * undeclared exception to RPC server
1218 *
1219 * RuntimeExceptions:
1220 * @throws InvalidPathException If path <code>f</code> is invalid
1221 */
1222 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
1223 @InterfaceStability.Evolving
1224 public BlockLocation[] getFileBlockLocations(final Path f, final long start,
1225 final long len) throws AccessControlException, FileNotFoundException,
1226 UnsupportedFileSystemException, IOException {
1227 final Path absF = fixRelativePart(f);
1228 return new FSLinkResolver<BlockLocation[]>() {
1229 public BlockLocation[] next(final AbstractFileSystem fs, final Path p)
1230 throws IOException, UnresolvedLinkException {
1231 return fs.getFileBlockLocations(p, start, len);
1232 }
1233 }.resolve(this, absF);
1234 }
1235
1236 /**
1237 * Returns a status object describing the use and capacity of the
1238 * file system denoted by the Parh argument p.
1239 * If the file system has multiple partitions, the
1240 * use and capacity of the partition pointed to by the specified
1241 * path is reflected.
1242 *
1243 * @param f Path for which status should be obtained. null means the
1244 * root partition of the default file system.
1245 *
1246 * @return a FsStatus object
1247 *
1248 * @throws AccessControlException If access is denied
1249 * @throws FileNotFoundException If <code>f</code> does not exist
1250 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1251 * not supported
1252 * @throws IOException If an I/O error occurred
1253 *
1254 * Exceptions applicable to file systems accessed over RPC:
1255 * @throws RpcClientException If an exception occurred in the RPC client
1256 * @throws RpcServerException If an exception occurred in the RPC server
1257 * @throws UnexpectedServerException If server implementation throws
1258 * undeclared exception to RPC server
1259 */
1260 public FsStatus getFsStatus(final Path f) throws AccessControlException,
1261 FileNotFoundException, UnsupportedFileSystemException, IOException {
1262 if (f == null) {
1263 return defaultFS.getFsStatus();
1264 }
1265 final Path absF = fixRelativePart(f);
1266 return new FSLinkResolver<FsStatus>() {
1267 public FsStatus next(final AbstractFileSystem fs, final Path p)
1268 throws IOException, UnresolvedLinkException {
1269 return fs.getFsStatus(p);
1270 }
1271 }.resolve(this, absF);
1272 }
1273
1274 /**
1275 * Creates a symbolic link to an existing file. An exception is thrown if
1276 * the symlink exits, the user does not have permission to create symlink,
1277 * or the underlying file system does not support symlinks.
1278 *
1279 * Symlink permissions are ignored, access to a symlink is determined by
1280 * the permissions of the symlink target.
1281 *
1282 * Symlinks in paths leading up to the final path component are resolved
1283 * transparently. If the final path component refers to a symlink some
1284 * functions operate on the symlink itself, these are:
1285 * - delete(f) and deleteOnExit(f) - Deletes the symlink.
1286 * - rename(src, dst) - If src refers to a symlink, the symlink is
1287 * renamed. If dst refers to a symlink, the symlink is over-written.
1288 * - getLinkTarget(f) - Returns the target of the symlink.
1289 * - getFileLinkStatus(f) - Returns a FileStatus object describing
1290 * the symlink.
1291 * Some functions, create() and mkdir(), expect the final path component
1292 * does not exist. If they are given a path that refers to a symlink that
1293 * does exist they behave as if the path referred to an existing file or
1294 * directory. All other functions fully resolve, ie follow, the symlink.
1295 * These are: open, setReplication, setOwner, setTimes, setWorkingDirectory,
1296 * setPermission, getFileChecksum, setVerifyChecksum, getFileBlockLocations,
1297 * getFsStatus, getFileStatus, exists, and listStatus.
1298 *
1299 * Symlink targets are stored as given to createSymlink, assuming the
1300 * underlying file system is capable of storing a fully qualified URI.
1301 * Dangling symlinks are permitted. FileContext supports four types of
1302 * symlink targets, and resolves them as follows
1303 * <pre>
1304 * Given a path referring to a symlink of form:
1305 *
1306 * <---X--->
1307 * fs://host/A/B/link
1308 * <-----Y----->
1309 *
1310 * In this path X is the scheme and authority that identify the file system,
1311 * and Y is the path leading up to the final path component "link". If Y is
1312 * a symlink itself then let Y' be the target of Y and X' be the scheme and
1313 * authority of Y'. Symlink targets may:
1314 *
1315 * 1. Fully qualified URIs
1316 *
1317 * fs://hostX/A/B/file Resolved according to the target file system.
1318 *
1319 * 2. Partially qualified URIs (eg scheme but no host)
1320 *
1321 * fs:///A/B/file Resolved according to the target file sytem. Eg resolving
1322 * a symlink to hdfs:///A results in an exception because
1323 * HDFS URIs must be fully qualified, while a symlink to
1324 * file:///A will not since Hadoop's local file systems
1325 * require partially qualified URIs.
1326 *
1327 * 3. Relative paths
1328 *
1329 * path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
1330 * is "../B/file" then [Y'][path] is hdfs://host/B/file
1331 *
1332 * 4. Absolute paths
1333 *
1334 * path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
1335 * is "/file" then [X][path] is hdfs://host/file
1336 * </pre>
1337 *
1338 * @param target the target of the symbolic link
1339 * @param link the path to be created that points to target
1340 * @param createParent if true then missing parent dirs are created if
1341 * false then parent must exist
1342 *
1343 *
1344 * @throws AccessControlException If access is denied
1345 * @throws FileAlreadyExistsException If file <code>linkcode> already exists
1346 * @throws FileNotFoundException If <code>target</code> does not exist
1347 * @throws ParentNotDirectoryException If parent of <code>link</code> is not a
1348 * directory.
1349 * @throws UnsupportedFileSystemException If file system for
1350 * <code>target</code> or <code>link</code> is not supported
1351 * @throws IOException If an I/O error occurred
1352 */
1353 public void createSymlink(final Path target, final Path link,
1354 final boolean createParent) throws AccessControlException,
1355 FileAlreadyExistsException, FileNotFoundException,
1356 ParentNotDirectoryException, UnsupportedFileSystemException,
1357 IOException {
1358 final Path nonRelLink = fixRelativePart(link);
1359 new FSLinkResolver<Void>() {
1360 public Void next(final AbstractFileSystem fs, final Path p)
1361 throws IOException, UnresolvedLinkException {
1362 fs.createSymlink(target, p, createParent);
1363 return null;
1364 }
1365 }.resolve(this, nonRelLink);
1366 }
1367
1368 /**
1369 * List the statuses of the files/directories in the given path if the path is
1370 * a directory.
1371 *
1372 * @param f is the path
1373 *
1374 * @return an iterator that traverses statuses of the files/directories
1375 * in the given path
1376 *
1377 * @throws AccessControlException If access is denied
1378 * @throws FileNotFoundException If <code>f</code> does not exist
1379 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1380 * not supported
1381 * @throws IOException If an I/O error occurred
1382 *
1383 * Exceptions applicable to file systems accessed over RPC:
1384 * @throws RpcClientException If an exception occurred in the RPC client
1385 * @throws RpcServerException If an exception occurred in the RPC server
1386 * @throws UnexpectedServerException If server implementation throws
1387 * undeclared exception to RPC server
1388 */
1389 public RemoteIterator<FileStatus> listStatus(final Path f) throws
1390 AccessControlException, FileNotFoundException,
1391 UnsupportedFileSystemException, IOException {
1392 final Path absF = fixRelativePart(f);
1393 return new FSLinkResolver<RemoteIterator<FileStatus>>() {
1394 public RemoteIterator<FileStatus> next(
1395 final AbstractFileSystem fs, final Path p)
1396 throws IOException, UnresolvedLinkException {
1397 return fs.listStatusIterator(p);
1398 }
1399 }.resolve(this, absF);
1400 }
1401
1402 /**
1403 * @return an iterator over the corrupt files under the given path
1404 * (may contain duplicates if a file has more than one corrupt block)
1405 * @throws IOException
1406 */
1407 public RemoteIterator<Path> listCorruptFileBlocks(Path path)
1408 throws IOException {
1409 final Path absF = fixRelativePart(path);
1410 return new FSLinkResolver<RemoteIterator<Path>>() {
1411 @Override
1412 public RemoteIterator<Path> next(final AbstractFileSystem fs,
1413 final Path p)
1414 throws IOException, UnresolvedLinkException {
1415 return fs.listCorruptFileBlocks(p);
1416 }
1417 }.resolve(this, absF);
1418 }
1419
1420 /**
1421 * List the statuses of the files/directories in the given path if the path is
1422 * a directory.
1423 * Return the file's status and block locations If the path is a file.
1424 *
1425 * If a returned status is a file, it contains the file's block locations.
1426 *
1427 * @param f is the path
1428 *
1429 * @return an iterator that traverses statuses of the files/directories
1430 * in the given path
1431 * If any IO exception (for example the input directory gets deleted while
1432 * listing is being executed), next() or hasNext() of the returned iterator
1433 * may throw a RuntimeException with the io exception as the cause.
1434 *
1435 * @throws AccessControlException If access is denied
1436 * @throws FileNotFoundException If <code>f</code> does not exist
1437 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1438 * not supported
1439 * @throws IOException If an I/O error occurred
1440 *
1441 * Exceptions applicable to file systems accessed over RPC:
1442 * @throws RpcClientException If an exception occurred in the RPC client
1443 * @throws RpcServerException If an exception occurred in the RPC server
1444 * @throws UnexpectedServerException If server implementation throws
1445 * undeclared exception to RPC server
1446 */
1447 public RemoteIterator<LocatedFileStatus> listLocatedStatus(
1448 final Path f) throws
1449 AccessControlException, FileNotFoundException,
1450 UnsupportedFileSystemException, IOException {
1451 final Path absF = fixRelativePart(f);
1452 return new FSLinkResolver<RemoteIterator<LocatedFileStatus>>() {
1453 public RemoteIterator<LocatedFileStatus> next(
1454 final AbstractFileSystem fs, final Path p)
1455 throws IOException, UnresolvedLinkException {
1456 return fs.listLocatedStatus(p);
1457 }
1458 }.resolve(this, absF);
1459 }
1460
1461 /**
1462 * Mark a path to be deleted on JVM shutdown.
1463 *
1464 * @param f the existing path to delete.
1465 *
1466 * @return true if deleteOnExit is successful, otherwise false.
1467 *
1468 * @throws AccessControlException If access is denied
1469 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1470 * not supported
1471 * @throws IOException If an I/O error occurred
1472 *
1473 * Exceptions applicable to file systems accessed over RPC:
1474 * @throws RpcClientException If an exception occurred in the RPC client
1475 * @throws RpcServerException If an exception occurred in the RPC server
1476 * @throws UnexpectedServerException If server implementation throws
1477 * undeclared exception to RPC server
1478 */
1479 public boolean deleteOnExit(Path f) throws AccessControlException,
1480 IOException {
1481 if (!this.util().exists(f)) {
1482 return false;
1483 }
1484 synchronized (DELETE_ON_EXIT) {
1485 if (DELETE_ON_EXIT.isEmpty()) {
1486 ShutdownHookManager.get().addShutdownHook(FINALIZER, SHUTDOWN_HOOK_PRIORITY);
1487 }
1488
1489 Set<Path> set = DELETE_ON_EXIT.get(this);
1490 if (set == null) {
1491 set = new TreeSet<Path>();
1492 DELETE_ON_EXIT.put(this, set);
1493 }
1494 set.add(f);
1495 }
1496 return true;
1497 }
1498
1499 private final Util util;
1500 public Util util() {
1501 return util;
1502 }
1503
1504
1505 /**
1506 * Utility/library methods built over the basic FileContext methods.
1507 * Since this are library functions, the oprtation are not atomic
1508 * and some of them may partially complete if other threads are making
1509 * changes to the same part of the name space.
1510 */
1511 public class Util {
1512 /**
1513 * Does the file exist?
1514 * Note: Avoid using this method if you already have FileStatus in hand.
1515 * Instead reuse the FileStatus
1516 * @param f the file or dir to be checked
1517 *
1518 * @throws AccessControlException If access is denied
1519 * @throws IOException If an I/O error occurred
1520 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1521 * not supported
1522 *
1523 * Exceptions applicable to file systems accessed over RPC:
1524 * @throws RpcClientException If an exception occurred in the RPC client
1525 * @throws RpcServerException If an exception occurred in the RPC server
1526 * @throws UnexpectedServerException If server implementation throws
1527 * undeclared exception to RPC server
1528 */
1529 public boolean exists(final Path f) throws AccessControlException,
1530 UnsupportedFileSystemException, IOException {
1531 try {
1532 FileStatus fs = FileContext.this.getFileStatus(f);
1533 assert fs != null;
1534 return true;
1535 } catch (FileNotFoundException e) {
1536 return false;
1537 }
1538 }
1539
1540 /**
1541 * Return a list of file status objects that corresponds to supplied paths
1542 * excluding those non-existent paths.
1543 *
1544 * @param paths list of paths we want information from
1545 *
1546 * @return a list of FileStatus objects
1547 *
1548 * @throws AccessControlException If access is denied
1549 * @throws IOException If an I/O error occurred
1550 *
1551 * Exceptions applicable to file systems accessed over RPC:
1552 * @throws RpcClientException If an exception occurred in the RPC client
1553 * @throws RpcServerException If an exception occurred in the RPC server
1554 * @throws UnexpectedServerException If server implementation throws
1555 * undeclared exception to RPC server
1556 */
1557 private FileStatus[] getFileStatus(Path[] paths)
1558 throws AccessControlException, IOException {
1559 if (paths == null) {
1560 return null;
1561 }
1562 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length);
1563 for (int i = 0; i < paths.length; i++) {
1564 try {
1565 results.add(FileContext.this.getFileStatus(paths[i]));
1566 } catch (FileNotFoundException fnfe) {
1567 // ignoring
1568 }
1569 }
1570 return results.toArray(new FileStatus[results.size()]);
1571 }
1572
1573
1574 /**
1575 * Return the {@link ContentSummary} of path f.
1576 * @param f path
1577 *
1578 * @return the {@link ContentSummary} of path f.
1579 *
1580 * @throws AccessControlException If access is denied
1581 * @throws FileNotFoundException If <code>f</code> does not exist
1582 * @throws UnsupportedFileSystemException If file system for
1583 * <code>f</code> is not supported
1584 * @throws IOException If an I/O error occurred
1585 *
1586 * Exceptions applicable to file systems accessed over RPC:
1587 * @throws RpcClientException If an exception occurred in the RPC client
1588 * @throws RpcServerException If an exception occurred in the RPC server
1589 * @throws UnexpectedServerException If server implementation throws
1590 * undeclared exception to RPC server
1591 */
1592 public ContentSummary getContentSummary(Path f)
1593 throws AccessControlException, FileNotFoundException,
1594 UnsupportedFileSystemException, IOException {
1595 FileStatus status = FileContext.this.getFileStatus(f);
1596 if (status.isFile()) {
1597 return new ContentSummary(status.getLen(), 1, 0);
1598 }
1599 long[] summary = {0, 0, 1};
1600 RemoteIterator<FileStatus> statusIterator =
1601 FileContext.this.listStatus(f);
1602 while(statusIterator.hasNext()) {
1603 FileStatus s = statusIterator.next();
1604 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) :
1605 new ContentSummary(s.getLen(), 1, 0);
1606 summary[0] += c.getLength();
1607 summary[1] += c.getFileCount();
1608 summary[2] += c.getDirectoryCount();
1609 }
1610 return new ContentSummary(summary[0], summary[1], summary[2]);
1611 }
1612
1613 /**
1614 * See {@link #listStatus(Path[], PathFilter)}
1615 */
1616 public FileStatus[] listStatus(Path[] files) throws AccessControlException,
1617 FileNotFoundException, IOException {
1618 return listStatus(files, DEFAULT_FILTER);
1619 }
1620
1621 /**
1622 * Filter files/directories in the given path using the user-supplied path
1623 * filter.
1624 *
1625 * @param f is the path name
1626 * @param filter is the user-supplied path filter
1627 *
1628 * @return an array of FileStatus objects for the files under the given path
1629 * after applying the filter
1630 *
1631 * @throws AccessControlException If access is denied
1632 * @throws FileNotFoundException If <code>f</code> does not exist
1633 * @throws UnsupportedFileSystemException If file system for
1634 * <code>pathPattern</code> is not supported
1635 * @throws IOException If an I/O error occurred
1636 *
1637 * Exceptions applicable to file systems accessed over RPC:
1638 * @throws RpcClientException If an exception occurred in the RPC client
1639 * @throws RpcServerException If an exception occurred in the RPC server
1640 * @throws UnexpectedServerException If server implementation throws
1641 * undeclared exception to RPC server
1642 */
1643 public FileStatus[] listStatus(Path f, PathFilter filter)
1644 throws AccessControlException, FileNotFoundException,
1645 UnsupportedFileSystemException, IOException {
1646 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1647 listStatus(results, f, filter);
1648 return results.toArray(new FileStatus[results.size()]);
1649 }
1650
1651 /**
1652 * Filter files/directories in the given list of paths using user-supplied
1653 * path filter.
1654 *
1655 * @param files is a list of paths
1656 * @param filter is the filter
1657 *
1658 * @return a list of statuses for the files under the given paths after
1659 * applying the filter
1660 *
1661 * @throws AccessControlException If access is denied
1662 * @throws FileNotFoundException If a file in <code>files</code> does not
1663 * exist
1664 * @throws IOException If an I/O error occurred
1665 *
1666 * Exceptions applicable to file systems accessed over RPC:
1667 * @throws RpcClientException If an exception occurred in the RPC client
1668 * @throws RpcServerException If an exception occurred in the RPC server
1669 * @throws UnexpectedServerException If server implementation throws
1670 * undeclared exception to RPC server
1671 */
1672 public FileStatus[] listStatus(Path[] files, PathFilter filter)
1673 throws AccessControlException, FileNotFoundException, IOException {
1674 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1675 for (int i = 0; i < files.length; i++) {
1676 listStatus(results, files[i], filter);
1677 }
1678 return results.toArray(new FileStatus[results.size()]);
1679 }
1680
1681 /*
1682 * Filter files/directories in the given path using the user-supplied path
1683 * filter. Results are added to the given array <code>results</code>.
1684 */
1685 private void listStatus(ArrayList<FileStatus> results, Path f,
1686 PathFilter filter) throws AccessControlException,
1687 FileNotFoundException, IOException {
1688 FileStatus[] listing = listStatus(f);
1689 if (listing != null) {
1690 for (int i = 0; i < listing.length; i++) {
1691 if (filter.accept(listing[i].getPath())) {
1692 results.add(listing[i]);
1693 }
1694 }
1695 }
1696 }
1697
1698 /**
1699 * List the statuses of the files/directories in the given path
1700 * if the path is a directory.
1701 *
1702 * @param f is the path
1703 *
1704 * @return an array that contains statuses of the files/directories
1705 * in the given path
1706 *
1707 * @throws AccessControlException If access is denied
1708 * @throws FileNotFoundException If <code>f</code> does not exist
1709 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1710 * not supported
1711 * @throws IOException If an I/O error occurred
1712 *
1713 * Exceptions applicable to file systems accessed over RPC:
1714 * @throws RpcClientException If an exception occurred in the RPC client
1715 * @throws RpcServerException If an exception occurred in the RPC server
1716 * @throws UnexpectedServerException If server implementation throws
1717 * undeclared exception to RPC server
1718 */
1719 public FileStatus[] listStatus(final Path f) throws AccessControlException,
1720 FileNotFoundException, UnsupportedFileSystemException,
1721 IOException {
1722 final Path absF = fixRelativePart(f);
1723 return new FSLinkResolver<FileStatus[]>() {
1724 public FileStatus[] next(final AbstractFileSystem fs, final Path p)
1725 throws IOException, UnresolvedLinkException {
1726 return fs.listStatus(p);
1727 }
1728 }.resolve(FileContext.this, absF);
1729 }
1730
1731 /**
1732 * List the statuses and block locations of the files in the given path.
1733 *
1734 * If the path is a directory,
1735 * if recursive is false, returns files in the directory;
1736 * if recursive is true, return files in the subtree rooted at the path.
1737 * The subtree is traversed in the depth-first order.
1738 * If the path is a file, return the file's status and block locations.
1739 * Files across symbolic links are also returned.
1740 *
1741 * @param f is the path
1742 * @param recursive if the subdirectories need to be traversed recursively
1743 *
1744 * @return an iterator that traverses statuses of the files
1745 * If any IO exception (for example a sub-directory gets deleted while
1746 * listing is being executed), next() or hasNext() of the returned iterator
1747 * may throw a RuntimeException with the IO exception as the cause.
1748 *
1749 * @throws AccessControlException If access is denied
1750 * @throws FileNotFoundException If <code>f</code> does not exist
1751 * @throws UnsupportedFileSystemException If file system for <code>f</code>
1752 * is not supported
1753 * @throws IOException If an I/O error occurred
1754 *
1755 * Exceptions applicable to file systems accessed over RPC:
1756 * @throws RpcClientException If an exception occurred in the RPC client
1757 * @throws RpcServerException If an exception occurred in the RPC server
1758 * @throws UnexpectedServerException If server implementation throws
1759 * undeclared exception to RPC server
1760 */
1761 public RemoteIterator<LocatedFileStatus> listFiles(
1762 final Path f, final boolean recursive) throws AccessControlException,
1763 FileNotFoundException, UnsupportedFileSystemException,
1764 IOException {
1765 return new RemoteIterator<LocatedFileStatus>() {
1766 private Stack<RemoteIterator<LocatedFileStatus>> itors =
1767 new Stack<RemoteIterator<LocatedFileStatus>>();
1768 RemoteIterator<LocatedFileStatus> curItor = listLocatedStatus(f);
1769 LocatedFileStatus curFile;
1770
1771 /**
1772 * Returns <tt>true</tt> if the iterator has more files.
1773 *
1774 * @return <tt>true</tt> if the iterator has more files.
1775 * @throws AccessControlException if not allowed to access next
1776 * file's status or locations
1777 * @throws FileNotFoundException if next file does not exist any more
1778 * @throws UnsupportedFileSystemException if next file's
1779 * fs is unsupported
1780 * @throws IOException for all other IO errors
1781 * for example, NameNode is not avaialbe or
1782 * NameNode throws IOException due to an error
1783 * while getting the status or block locations
1784 */
1785 @Override
1786 public boolean hasNext() throws IOException {
1787 while (curFile == null) {
1788 if (curItor.hasNext()) {
1789 handleFileStat(curItor.next());
1790 } else if (!itors.empty()) {
1791 curItor = itors.pop();
1792 } else {
1793 return false;
1794 }
1795 }
1796 return true;
1797 }
1798
1799 /**
1800 * Process the input stat.
1801 * If it is a file, return the file stat.
1802 * If it is a directory, traverse the directory if recursive is true;
1803 * ignore it if recursive is false.
1804 * If it is a symlink, resolve the symlink first and then process it
1805 * depending on if it is a file or directory.
1806 * @param stat input status
1807 * @throws AccessControlException if access is denied
1808 * @throws FileNotFoundException if file is not found
1809 * @throws UnsupportedFileSystemException if fs is not supported
1810 * @throws IOException for all other IO errors
1811 */
1812 private void handleFileStat(LocatedFileStatus stat)
1813 throws IOException {
1814 if (stat.isFile()) { // file
1815 curFile = stat;
1816 } else if (stat.isSymlink()) { // symbolic link
1817 // resolve symbolic link
1818 FileStatus symstat = FileContext.this.getFileStatus(
1819 stat.getSymlink());
1820 if (symstat.isFile() || (recursive && symstat.isDirectory())) {
1821 itors.push(curItor);
1822 curItor = listLocatedStatus(stat.getPath());
1823 }
1824 } else if (recursive) { // directory
1825 itors.push(curItor);
1826 curItor = listLocatedStatus(stat.getPath());
1827 }
1828 }
1829
1830 /**
1831 * Returns the next file's status with its block locations
1832 *
1833 * @throws AccessControlException if not allowed to access next
1834 * file's status or locations
1835 * @throws FileNotFoundException if next file does not exist any more
1836 * @throws UnsupportedFileSystemException if next file's
1837 * fs is unsupported
1838 * @throws IOException for all other IO errors
1839 * for example, NameNode is not avaialbe or
1840 * NameNode throws IOException due to an error
1841 * while getting the status or block locations
1842 */
1843 @Override
1844 public LocatedFileStatus next() throws IOException {
1845 if (hasNext()) {
1846 LocatedFileStatus result = curFile;
1847 curFile = null;
1848 return result;
1849 }
1850 throw new java.util.NoSuchElementException("No more entry in " + f);
1851 }
1852 };
1853 }
1854
1855 /**
1856 * <p>Return all the files that match filePattern and are not checksum
1857 * files. Results are sorted by their names.
1858 *
1859 * <p>
1860 * A filename pattern is composed of <i>regular</i> characters and
1861 * <i>special pattern matching</i> characters, which are:
1862 *
1863 * <dl>
1864 * <dd>
1865 * <dl>
1866 * <p>
1867 * <dt> <tt> ? </tt>
1868 * <dd> Matches any single character.
1869 *
1870 * <p>
1871 * <dt> <tt> * </tt>
1872 * <dd> Matches zero or more characters.
1873 *
1874 * <p>
1875 * <dt> <tt> [<i>abc</i>] </tt>
1876 * <dd> Matches a single character from character set
1877 * <tt>{<i>a,b,c</i>}</tt>.
1878 *
1879 * <p>
1880 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt>
1881 * <dd> Matches a single character from the character range
1882 * <tt>{<i>a...b</i>}</tt>. Note: character <tt><i>a</i></tt> must be
1883 * lexicographically less than or equal to character <tt><i>b</i></tt>.
1884 *
1885 * <p>
1886 * <dt> <tt> [^<i>a</i>] </tt>
1887 * <dd> Matches a single char that is not from character set or range
1888 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur
1889 * immediately to the right of the opening bracket.
1890 *
1891 * <p>
1892 * <dt> <tt> \<i>c</i> </tt>
1893 * <dd> Removes (escapes) any special meaning of character <i>c</i>.
1894 *
1895 * <p>
1896 * <dt> <tt> {ab,cd} </tt>
1897 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt>
1898 *
1899 * <p>
1900 * <dt> <tt> {ab,c{de,fh}} </tt>
1901 * <dd> Matches a string from string set <tt>{<i>ab, cde, cfh</i>}</tt>
1902 *
1903 * </dl>
1904 * </dd>
1905 * </dl>
1906 *
1907 * @param pathPattern a regular expression specifying a pth pattern
1908 *
1909 * @return an array of paths that match the path pattern
1910 *
1911 * @throws AccessControlException If access is denied
1912 * @throws UnsupportedFileSystemException If file system for
1913 * <code>pathPattern</code> is not supported
1914 * @throws IOException If an I/O error occurred
1915 *
1916 * Exceptions applicable to file systems accessed over RPC:
1917 * @throws RpcClientException If an exception occurred in the RPC client
1918 * @throws RpcServerException If an exception occurred in the RPC server
1919 * @throws UnexpectedServerException If server implementation throws
1920 * undeclared exception to RPC server
1921 */
1922 public FileStatus[] globStatus(Path pathPattern)
1923 throws AccessControlException, UnsupportedFileSystemException,
1924 IOException {
1925 return globStatus(pathPattern, DEFAULT_FILTER);
1926 }
1927
1928 /**
1929 * Return an array of FileStatus objects whose path names match pathPattern
1930 * and is accepted by the user-supplied path filter. Results are sorted by
1931 * their path names.
1932 * Return null if pathPattern has no glob and the path does not exist.
1933 * Return an empty array if pathPattern has a glob and no path matches it.
1934 *
1935 * @param pathPattern regular expression specifying the path pattern
1936 * @param filter user-supplied path filter
1937 *
1938 * @return an array of FileStatus objects
1939 *
1940 * @throws AccessControlException If access is denied
1941 * @throws UnsupportedFileSystemException If file system for
1942 * <code>pathPattern</code> is not supported
1943 * @throws IOException If an I/O error occurred
1944 *
1945 * Exceptions applicable to file systems accessed over RPC:
1946 * @throws RpcClientException If an exception occurred in the RPC client
1947 * @throws RpcServerException If an exception occurred in the RPC server
1948 * @throws UnexpectedServerException If server implementation throws
1949 * undeclared exception to RPC server
1950 */
1951 public FileStatus[] globStatus(final Path pathPattern,
1952 final PathFilter filter) throws AccessControlException,
1953 UnsupportedFileSystemException, IOException {
1954 URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri();
1955
1956 String filename = pathPattern.toUri().getPath();
1957
1958 List<String> filePatterns = GlobExpander.expand(filename);
1959 if (filePatterns.size() == 1) {
1960 Path absPathPattern = fixRelativePart(pathPattern);
1961 return globStatusInternal(uri, new Path(absPathPattern.toUri()
1962 .getPath()), filter);
1963 } else {
1964 List<FileStatus> results = new ArrayList<FileStatus>();
1965 for (String iFilePattern : filePatterns) {
1966 Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern));
1967 FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter);
1968 for (FileStatus file : files) {
1969 results.add(file);
1970 }
1971 }
1972 return results.toArray(new FileStatus[results.size()]);
1973 }
1974 }
1975
1976 /**
1977 *
1978 * @param uri for all the inPathPattern
1979 * @param inPathPattern - without the scheme & authority (take from uri)
1980 * @param filter
1981 *
1982 * @return an array of FileStatus objects
1983 *
1984 * @throws AccessControlException If access is denied
1985 * @throws IOException If an I/O error occurred
1986 */
1987 private FileStatus[] globStatusInternal(final URI uri,
1988 final Path inPathPattern, final PathFilter filter)
1989 throws AccessControlException, IOException
1990 {
1991 Path[] parents = new Path[1];
1992 int level = 0;
1993
1994 assert(inPathPattern.toUri().getScheme() == null &&
1995 inPathPattern.toUri().getAuthority() == null &&
1996 inPathPattern.isUriPathAbsolute());
1997
1998
1999 String filename = inPathPattern.toUri().getPath();
2000
2001 // path has only zero component
2002 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) {
2003 Path p = inPathPattern.makeQualified(uri, null);
2004 return getFileStatus(new Path[]{p});
2005 }
2006
2007 // path has at least one component
2008 String[] components = filename.split(Path.SEPARATOR);
2009
2010 // Path is absolute, first component is "/" hence first component
2011 // is the uri root
2012 parents[0] = new Path(new Path(uri), new Path("/"));
2013 level = 1;
2014
2015 // glob the paths that match the parent path, ie. [0, components.length-1]
2016 boolean[] hasGlob = new boolean[]{false};
2017 Path[] relParentPaths =
2018 globPathsLevel(parents, components, level, hasGlob);
2019 FileStatus[] results;
2020
2021 if (relParentPaths == null || relParentPaths.length == 0) {
2022 results = null;
2023 } else {
2024 // fix the pathes to be abs
2025 Path[] parentPaths = new Path [relParentPaths.length];
2026 for(int i=0; i<relParentPaths.length; i++) {
2027 parentPaths[i] = relParentPaths[i].makeQualified(uri, null);
2028 }
2029
2030 // Now work on the last component of the path
2031 GlobFilter fp =
2032 new GlobFilter(components[components.length - 1], filter);
2033 if (fp.hasPattern()) { // last component has a pattern
2034 // list parent directories and then glob the results
2035 try {
2036 results = listStatus(parentPaths, fp);
2037 } catch (FileNotFoundException e) {
2038 results = null;
2039 }
2040 hasGlob[0] = true;
2041 } else { // last component does not have a pattern
2042 // get all the path names
2043 ArrayList<Path> filteredPaths =
2044 new ArrayList<Path>(parentPaths.length);
2045 for (int i = 0; i < parentPaths.length; i++) {
2046 parentPaths[i] = new Path(parentPaths[i],
2047 components[components.length - 1]);
2048 if (fp.accept(parentPaths[i])) {
2049 filteredPaths.add(parentPaths[i]);
2050 }
2051 }
2052 // get all their statuses
2053 results = getFileStatus(
2054 filteredPaths.toArray(new Path[filteredPaths.size()]));
2055 }
2056 }
2057
2058 // Decide if the pathPattern contains a glob or not
2059 if (results == null) {
2060 if (hasGlob[0]) {
2061 results = new FileStatus[0];
2062 }
2063 } else {
2064 if (results.length == 0) {
2065 if (!hasGlob[0]) {
2066 results = null;
2067 }
2068 } else {
2069 Arrays.sort(results);
2070 }
2071 }
2072 return results;
2073 }
2074
2075 /*
2076 * For a path of N components, return a list of paths that match the
2077 * components [<code>level</code>, <code>N-1</code>].
2078 */
2079 private Path[] globPathsLevel(Path[] parents, String[] filePattern,
2080 int level, boolean[] hasGlob) throws AccessControlException,
2081 FileNotFoundException, IOException {
2082 if (level == filePattern.length - 1) {
2083 return parents;
2084 }
2085 if (parents == null || parents.length == 0) {
2086 return null;
2087 }
2088 GlobFilter fp = new GlobFilter(filePattern[level]);
2089 if (fp.hasPattern()) {
2090 try {
2091 parents = FileUtil.stat2Paths(listStatus(parents, fp));
2092 } catch (FileNotFoundException e) {
2093 parents = null;
2094 }
2095 hasGlob[0] = true;
2096 } else {
2097 for (int i = 0; i < parents.length; i++) {
2098 parents[i] = new Path(parents[i], filePattern[level]);
2099 }
2100 }
2101 return globPathsLevel(parents, filePattern, level + 1, hasGlob);
2102 }
2103
2104 /**
2105 * Copy file from src to dest. See
2106 * {@link #copy(Path, Path, boolean, boolean)}
2107 */
2108 public boolean copy(final Path src, final Path dst)
2109 throws AccessControlException, FileAlreadyExistsException,
2110 FileNotFoundException, ParentNotDirectoryException,
2111 UnsupportedFileSystemException, IOException {
2112 return copy(src, dst, false, false);
2113 }
2114
2115 /**
2116 * Copy from src to dst, optionally deleting src and overwriting dst.
2117 * @param src
2118 * @param dst
2119 * @param deleteSource - delete src if true
2120 * @param overwrite overwrite dst if true; throw IOException if dst exists
2121 * and overwrite is false.
2122 *
2123 * @return true if copy is successful
2124 *
2125 * @throws AccessControlException If access is denied
2126 * @throws FileAlreadyExistsException If <code>dst</code> already exists
2127 * @throws FileNotFoundException If <code>src</code> does not exist
2128 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not
2129 * a directory
2130 * @throws UnsupportedFileSystemException If file system for
2131 * <code>src</code> or <code>dst</code> is not supported
2132 * @throws IOException If an I/O error occurred
2133 *
2134 * Exceptions applicable to file systems accessed over RPC:
2135 * @throws RpcClientException If an exception occurred in the RPC client
2136 * @throws RpcServerException If an exception occurred in the RPC server
2137 * @throws UnexpectedServerException If server implementation throws
2138 * undeclared exception to RPC server
2139 *
2140 * RuntimeExceptions:
2141 * @throws InvalidPathException If path <code>dst</code> is invalid
2142 */
2143 public boolean copy(final Path src, final Path dst, boolean deleteSource,
2144 boolean overwrite) throws AccessControlException,
2145 FileAlreadyExistsException, FileNotFoundException,
2146 ParentNotDirectoryException, UnsupportedFileSystemException,
2147 IOException {
2148 checkNotSchemeWithRelative(src);
2149 checkNotSchemeWithRelative(dst);
2150 Path qSrc = makeQualified(src);
2151 Path qDst = makeQualified(dst);
2152 checkDest(qSrc.getName(), qDst, overwrite);
2153 FileStatus fs = FileContext.this.getFileStatus(qSrc);
2154 if (fs.isDirectory()) {
2155 checkDependencies(qSrc, qDst);
2156 mkdir(qDst, FsPermission.getDirDefault(), true);
2157 FileStatus[] contents = listStatus(qSrc);
2158 for (FileStatus content : contents) {
2159 copy(makeQualified(content.getPath()), makeQualified(new Path(qDst,
2160 content.getPath().getName())), deleteSource, overwrite);
2161 }
2162 } else {
2163 InputStream in=null;
2164 OutputStream out = null;
2165 try {
2166 in = open(qSrc);
2167 EnumSet<CreateFlag> createFlag = overwrite ? EnumSet.of(
2168 CreateFlag.CREATE, CreateFlag.OVERWRITE) :
2169 EnumSet.of(CreateFlag.CREATE);
2170 out = create(qDst, createFlag);
2171 IOUtils.copyBytes(in, out, conf, true);
2172 } catch (IOException e) {
2173 IOUtils.closeStream(out);
2174 IOUtils.closeStream(in);
2175 throw e;
2176 }
2177 }
2178 if (deleteSource) {
2179 return delete(qSrc, true);
2180 } else {
2181 return true;
2182 }
2183 }
2184 }
2185
2186 /**
2187 * Check if copying srcName to dst would overwrite an existing
2188 * file or directory.
2189 * @param srcName File or directory to be copied.
2190 * @param dst Destination to copy srcName to.
2191 * @param overwrite Whether it's ok to overwrite an existing file.
2192 * @throws AccessControlException If access is denied.
2193 * @throws IOException If dst is an existing directory, or dst is an
2194 * existing file and the overwrite option is not passed.
2195 */
2196 private void checkDest(String srcName, Path dst, boolean overwrite)
2197 throws AccessControlException, IOException {
2198 try {
2199 FileStatus dstFs = getFileStatus(dst);
2200 if (dstFs.isDirectory()) {
2201 if (null == srcName) {
2202 throw new IOException("Target " + dst + " is a directory");
2203 }
2204 // Recurse to check if dst/srcName exists.
2205 checkDest(null, new Path(dst, srcName), overwrite);
2206 } else if (!overwrite) {
2207 throw new IOException("Target " + new Path(dst, srcName)
2208 + " already exists");
2209 }
2210 } catch (FileNotFoundException e) {
2211 // dst does not exist - OK to copy.
2212 }
2213 }
2214
2215 //
2216 // If the destination is a subdirectory of the source, then
2217 // generate exception
2218 //
2219 private static void checkDependencies(Path qualSrc, Path qualDst)
2220 throws IOException {
2221 if (isSameFS(qualSrc, qualDst)) {
2222 String srcq = qualSrc.toString() + Path.SEPARATOR;
2223 String dstq = qualDst.toString() + Path.SEPARATOR;
2224 if (dstq.startsWith(srcq)) {
2225 if (srcq.length() == dstq.length()) {
2226 throw new IOException("Cannot copy " + qualSrc + " to itself.");
2227 } else {
2228 throw new IOException("Cannot copy " + qualSrc +
2229 " to its subdirectory " + qualDst);
2230 }
2231 }
2232 }
2233 }
2234
2235 /**
2236 * Are qualSrc and qualDst of the same file system?
2237 * @param qualPath1 - fully qualified path
2238 * @param qualPath2 - fully qualified path
2239 * @return
2240 */
2241 private static boolean isSameFS(Path qualPath1, Path qualPath2) {
2242 URI srcUri = qualPath1.toUri();
2243 URI dstUri = qualPath2.toUri();
2244 return (srcUri.getScheme().equals(dstUri.getScheme()) &&
2245 !(srcUri.getAuthority() != null && dstUri.getAuthority() != null && srcUri
2246 .getAuthority().equals(dstUri.getAuthority())));
2247 }
2248
2249 /**
2250 * Deletes all the paths in deleteOnExit on JVM shutdown.
2251 */
2252 static class FileContextFinalizer implements Runnable {
2253 public synchronized void run() {
2254 processDeleteOnExit();
2255 }
2256 }
2257
2258 /**
2259 * Resolves all symbolic links in the specified path.
2260 * Returns the new path object.
2261 */
2262 protected Path resolve(final Path f) throws FileNotFoundException,
2263 UnresolvedLinkException, AccessControlException, IOException {
2264 return new FSLinkResolver<Path>() {
2265 public Path next(final AbstractFileSystem fs, final Path p)
2266 throws IOException, UnresolvedLinkException {
2267 return fs.resolvePath(p);
2268 }
2269 }.resolve(this, f);
2270 }
2271
2272 /**
2273 * Resolves all symbolic links in the specified path leading up
2274 * to, but not including the final path component.
2275 * @param f path to resolve
2276 * @return the new path object.
2277 */
2278 protected Path resolveIntermediate(final Path f) throws IOException {
2279 return new FSLinkResolver<FileStatus>() {
2280 public FileStatus next(final AbstractFileSystem fs, final Path p)
2281 throws IOException, UnresolvedLinkException {
2282 return fs.getFileLinkStatus(p);
2283 }
2284 }.resolve(this, f).getPath();
2285 }
2286
2287 /**
2288 * Returns the list of AbstractFileSystems accessed in the path. The list may
2289 * contain more than one AbstractFileSystems objects in case of symlinks.
2290 *
2291 * @param f
2292 * Path which needs to be resolved
2293 * @return List of AbstractFileSystems accessed in the path
2294 * @throws IOException
2295 */
2296 Set<AbstractFileSystem> resolveAbstractFileSystems(final Path f)
2297 throws IOException {
2298 final Path absF = fixRelativePart(f);
2299 final HashSet<AbstractFileSystem> result
2300 = new HashSet<AbstractFileSystem>();
2301 new FSLinkResolver<Void>() {
2302 public Void next(final AbstractFileSystem fs, final Path p)
2303 throws IOException, UnresolvedLinkException {
2304 result.add(fs);
2305 fs.getFileStatus(p);
2306 return null;
2307 }
2308 }.resolve(this, absF);
2309 return result;
2310 }
2311
2312 /**
2313 * Class used to perform an operation on and resolve symlinks in a
2314 * path. The operation may potentially span multiple file systems.
2315 */
2316 protected abstract class FSLinkResolver<T> {
2317 // The maximum number of symbolic link components in a path
2318 private static final int MAX_PATH_LINKS = 32;
2319
2320 /**
2321 * Generic helper function overridden on instantiation to perform a
2322 * specific operation on the given file system using the given path
2323 * which may result in an UnresolvedLinkException.
2324 * @param fs AbstractFileSystem to perform the operation on.
2325 * @param p Path given the file system.
2326 * @return Generic type determined by the specific implementation.
2327 * @throws UnresolvedLinkException If symbolic link <code>path</code> could
2328 * not be resolved
2329 * @throws IOException an I/O error occured
2330 */
2331 public abstract T next(final AbstractFileSystem fs, final Path p)
2332 throws IOException, UnresolvedLinkException;
2333
2334 /**
2335 * Performs the operation specified by the next function, calling it
2336 * repeatedly until all symlinks in the given path are resolved.
2337 * @param fc FileContext used to access file systems.
2338 * @param p The path to resolve symlinks in.
2339 * @return Generic type determined by the implementation of next.
2340 * @throws IOException
2341 */
2342 public T resolve(final FileContext fc, Path p) throws IOException {
2343 int count = 0;
2344 T in = null;
2345 Path first = p;
2346 // NB: More than one AbstractFileSystem can match a scheme, eg
2347 // "file" resolves to LocalFs but could have come by RawLocalFs.
2348 AbstractFileSystem fs = fc.getFSofPath(p);
2349
2350 // Loop until all symlinks are resolved or the limit is reached
2351 for (boolean isLink = true; isLink;) {
2352 try {
2353 in = next(fs, p);
2354 isLink = false;
2355 } catch (UnresolvedLinkException e) {
2356 if (count++ > MAX_PATH_LINKS) {
2357 throw new IOException("Possible cyclic loop while " +
2358 "following symbolic link " + first);
2359 }
2360 // Resolve the first unresolved path component
2361 p = qualifySymlinkTarget(fs, p, fs.getLinkTarget(p));
2362 fs = fc.getFSofPath(p);
2363 }
2364 }
2365 return in;
2366 }
2367 }
2368
2369 /**
2370 * Get the statistics for a particular file system
2371 *
2372 * @param uri
2373 * the uri to lookup the statistics. Only scheme and authority part
2374 * of the uri are used as the key to store and lookup.
2375 * @return a statistics object
2376 */
2377 public static Statistics getStatistics(URI uri) {
2378 return AbstractFileSystem.getStatistics(uri);
2379 }
2380
2381 /**
2382 * Clears all the statistics stored in AbstractFileSystem, for all the file
2383 * systems.
2384 */
2385 public static void clearStatistics() {
2386 AbstractFileSystem.clearStatistics();
2387 }
2388
2389 /**
2390 * Prints the statistics to standard output. File System is identified by the
2391 * scheme and authority.
2392 */
2393 public static void printStatistics() {
2394 AbstractFileSystem.printStatistics();
2395 }
2396
2397 /**
2398 * @return Map of uri and statistics for each filesystem instantiated. The uri
2399 * consists of scheme and authority for the filesystem.
2400 */
2401 public static Map<URI, Statistics> getAllStatistics() {
2402 return AbstractFileSystem.getAllStatistics();
2403 }
2404
2405 /**
2406 * Get delegation tokens for the file systems accessed for a given
2407 * path.
2408 * @param p Path for which delegations tokens are requested.
2409 * @param renewer the account name that is allowed to renew the token.
2410 * @return List of delegation tokens.
2411 * @throws IOException
2412 */
2413 @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" })
2414 public List<Token<?>> getDelegationTokens(
2415 Path p, String renewer) throws IOException {
2416 Set<AbstractFileSystem> afsSet = resolveAbstractFileSystems(p);
2417 List<Token<?>> tokenList =
2418 new ArrayList<Token<?>>();
2419 for (AbstractFileSystem afs : afsSet) {
2420 List<Token<?>> afsTokens = afs.getDelegationTokens(renewer);
2421 tokenList.addAll(afsTokens);
2422 }
2423 return tokenList;
2424 }
2425 }