001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.fs;
019
020 import java.io.FileNotFoundException;
021 import java.io.IOException;
022 import java.io.InputStream;
023 import java.io.OutputStream;
024 import java.net.URI;
025 import java.security.PrivilegedExceptionAction;
026 import java.util.ArrayList;
027 import java.util.Arrays;
028 import java.util.EnumSet;
029 import java.util.HashSet;
030 import java.util.IdentityHashMap;
031 import java.util.List;
032 import java.util.Map;
033 import java.util.Set;
034 import java.util.Stack;
035 import java.util.TreeSet;
036 import java.util.Map.Entry;
037
038 import org.apache.commons.logging.Log;
039 import org.apache.commons.logging.LogFactory;
040 import org.apache.hadoop.HadoopIllegalArgumentException;
041 import org.apache.hadoop.classification.InterfaceAudience;
042 import org.apache.hadoop.classification.InterfaceStability;
043 import org.apache.hadoop.conf.Configuration;
044 import org.apache.hadoop.fs.FileSystem.Statistics;
045 import org.apache.hadoop.fs.Options.CreateOpts;
046 import org.apache.hadoop.fs.permission.FsPermission;
047 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
048 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT;
049 import org.apache.hadoop.io.IOUtils;
050 import org.apache.hadoop.ipc.RpcClientException;
051 import org.apache.hadoop.ipc.RpcServerException;
052 import org.apache.hadoop.ipc.UnexpectedServerException;
053 import org.apache.hadoop.fs.InvalidPathException;
054 import org.apache.hadoop.security.AccessControlException;
055 import org.apache.hadoop.security.UserGroupInformation;
056 import org.apache.hadoop.security.token.Token;
057 import org.apache.hadoop.util.ShutdownHookManager;
058
059 /**
060 * The FileContext class provides an interface to the application writer for
061 * using the Hadoop file system.
062 * It provides a set of methods for the usual operation: create, open,
063 * list, etc
064 *
065 * <p>
066 * <b> *** Path Names *** </b>
067 * <p>
068 *
069 * The Hadoop file system supports a URI name space and URI names.
070 * It offers a forest of file systems that can be referenced using fully
071 * qualified URIs.
072 * Two common Hadoop file systems implementations are
073 * <ul>
074 * <li> the local file system: file:///path
075 * <li> the hdfs file system hdfs://nnAddress:nnPort/path
076 * </ul>
077 *
078 * While URI names are very flexible, it requires knowing the name or address
079 * of the server. For convenience one often wants to access the default system
080 * in one's environment without knowing its name/address. This has an
081 * additional benefit that it allows one to change one's default fs
082 * (e.g. admin moves application from cluster1 to cluster2).
083 * <p>
084 *
085 * To facilitate this, Hadoop supports a notion of a default file system.
086 * The user can set his default file system, although this is
087 * typically set up for you in your environment via your default config.
088 * A default file system implies a default scheme and authority; slash-relative
089 * names (such as /for/bar) are resolved relative to that default FS.
090 * Similarly a user can also have working-directory-relative names (i.e. names
091 * not starting with a slash). While the working directory is generally in the
092 * same default FS, the wd can be in a different FS.
093 * <p>
094 * Hence Hadoop path names can be one of:
095 * <ul>
096 * <li> fully qualified URI: scheme://authority/path
097 * <li> slash relative names: /path relative to the default file system
098 * <li> wd-relative names: path relative to the working dir
099 * </ul>
100 * Relative paths with scheme (scheme:foo/bar) are illegal.
101 *
102 * <p>
103 * <b>****The Role of the FileContext and configuration defaults****</b>
104 * <p>
105 * The FileContext provides file namespace context for resolving file names;
106 * it also contains the umask for permissions, In that sense it is like the
107 * per-process file-related state in Unix system.
108 * These two properties
109 * <ul>
110 * <li> default file system i.e your slash)
111 * <li> umask
112 * </ul>
113 * in general, are obtained from the default configuration file
114 * in your environment, (@see {@link Configuration}).
115 *
116 * No other configuration parameters are obtained from the default config as
117 * far as the file context layer is concerned. All file system instances
118 * (i.e. deployments of file systems) have default properties; we call these
119 * server side (SS) defaults. Operation like create allow one to select many
120 * properties: either pass them in as explicit parameters or use
121 * the SS properties.
122 * <p>
123 * The file system related SS defaults are
124 * <ul>
125 * <li> the home directory (default is "/user/userName")
126 * <li> the initial wd (only for local fs)
127 * <li> replication factor
128 * <li> block size
129 * <li> buffer size
130 * <li> encryptDataTransfer
131 * <li> checksum option. (checksumType and bytesPerChecksum)
132 * </ul>
133 *
134 * <p>
135 * <b> *** Usage Model for the FileContext class *** </b>
136 * <p>
137 * Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
138 * Unspecified values come from core-defaults.xml in the release jar.
139 * <ul>
140 * <li> myFContext = FileContext.getFileContext(); // uses the default config
141 * // which has your default FS
142 * <li> myFContext.create(path, ...);
143 * <li> myFContext.setWorkingDir(path)
144 * <li> myFContext.open (path, ...);
145 * </ul>
146 * Example 2: Get a FileContext with a specific URI as the default FS
147 * <ul>
148 * <li> myFContext = FileContext.getFileContext(URI)
149 * <li> myFContext.create(path, ...);
150 * ...
151 * </ul>
152 * Example 3: FileContext with local file system as the default
153 * <ul>
154 * <li> myFContext = FileContext.getLocalFSFileContext()
155 * <li> myFContext.create(path, ...);
156 * <li> ...
157 * </ul>
158 * Example 4: Use a specific config, ignoring $HADOOP_CONFIG
159 * Generally you should not need use a config unless you are doing
160 * <ul>
161 * <li> configX = someConfigSomeOnePassedToYou.
162 * <li> myFContext = getFileContext(configX); // configX is not changed,
163 * // is passed down
164 * <li> myFContext.create(path, ...);
165 * <li>...
166 * </ul>
167 *
168 */
169
170 @InterfaceAudience.Public
171 @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */
172 public final class FileContext {
173
174 public static final Log LOG = LogFactory.getLog(FileContext.class);
175 public static final FsPermission DEFAULT_PERM = FsPermission.getDefault();
176
177 /**
178 * Priority of the FileContext shutdown hook.
179 */
180 public static final int SHUTDOWN_HOOK_PRIORITY = 20;
181
182 /**
183 * List of files that should be deleted on JVM shutdown.
184 */
185 static final Map<FileContext, Set<Path>> DELETE_ON_EXIT =
186 new IdentityHashMap<FileContext, Set<Path>>();
187
188 /** JVM shutdown hook thread. */
189 static final FileContextFinalizer FINALIZER =
190 new FileContextFinalizer();
191
192 private static final PathFilter DEFAULT_FILTER = new PathFilter() {
193 public boolean accept(final Path file) {
194 return true;
195 }
196 };
197
198 /**
199 * The FileContext is defined by.
200 * 1) defaultFS (slash)
201 * 2) wd
202 * 3) umask
203 */
204 private final AbstractFileSystem defaultFS; //default FS for this FileContext.
205 private Path workingDir; // Fully qualified
206 private FsPermission umask;
207 private final Configuration conf;
208 private final UserGroupInformation ugi;
209
210 private FileContext(final AbstractFileSystem defFs,
211 final FsPermission theUmask, final Configuration aConf) {
212 defaultFS = defFs;
213 umask = FsPermission.getUMask(aConf);
214 conf = aConf;
215 try {
216 ugi = UserGroupInformation.getCurrentUser();
217 } catch (IOException e) {
218 LOG.error("Exception in getCurrentUser: ",e);
219 throw new RuntimeException("Failed to get the current user " +
220 "while creating a FileContext", e);
221 }
222 /*
223 * Init the wd.
224 * WorkingDir is implemented at the FileContext layer
225 * NOT at the AbstractFileSystem layer.
226 * If the DefaultFS, such as localFilesystem has a notion of
227 * builtin WD, we use that as the initial WD.
228 * Otherwise the WD is initialized to the home directory.
229 */
230 workingDir = defaultFS.getInitialWorkingDirectory();
231 if (workingDir == null) {
232 workingDir = defaultFS.getHomeDirectory();
233 }
234 util = new Util(); // for the inner class
235 }
236
237 /*
238 * Remove relative part - return "absolute":
239 * If input is relative path ("foo/bar") add wd: ie "/<workingDir>/foo/bar"
240 * A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path
241 * ("/foo/bar") are returned unchanged.
242 *
243 * Applications that use FileContext should use #makeQualified() since
244 * they really want a fully qualified URI.
245 * Hence this method is not called makeAbsolute() and
246 * has been deliberately declared private.
247 */
248 private Path fixRelativePart(Path p) {
249 if (p.isUriPathAbsolute()) {
250 return p;
251 } else {
252 return new Path(workingDir, p);
253 }
254 }
255
256 /**
257 * Delete all the paths that were marked as delete-on-exit.
258 */
259 static void processDeleteOnExit() {
260 synchronized (DELETE_ON_EXIT) {
261 Set<Entry<FileContext, Set<Path>>> set = DELETE_ON_EXIT.entrySet();
262 for (Entry<FileContext, Set<Path>> entry : set) {
263 FileContext fc = entry.getKey();
264 Set<Path> paths = entry.getValue();
265 for (Path path : paths) {
266 try {
267 fc.delete(path, true);
268 } catch (IOException e) {
269 LOG.warn("Ignoring failure to deleteOnExit for path " + path);
270 }
271 }
272 }
273 DELETE_ON_EXIT.clear();
274 }
275 }
276
277 /**
278 * Pathnames with scheme and relative path are illegal.
279 * @param path to be checked
280 */
281 private static void checkNotSchemeWithRelative(final Path path) {
282 if (path.toUri().isAbsolute() && !path.isUriPathAbsolute()) {
283 throw new HadoopIllegalArgumentException(
284 "Unsupported name: has scheme but relative path-part");
285 }
286 }
287
288 /**
289 * Get the file system of supplied path.
290 *
291 * @param absOrFqPath - absolute or fully qualified path
292 * @return the file system of the path
293 *
294 * @throws UnsupportedFileSystemException If the file system for
295 * <code>absOrFqPath</code> is not supported.
296 * @throws IOExcepton If the file system for <code>absOrFqPath</code> could
297 * not be instantiated.
298 */
299 private AbstractFileSystem getFSofPath(final Path absOrFqPath)
300 throws UnsupportedFileSystemException, IOException {
301 checkNotSchemeWithRelative(absOrFqPath);
302 if (!absOrFqPath.isAbsolute() && absOrFqPath.toUri().getScheme() == null) {
303 throw new HadoopIllegalArgumentException(
304 "FileContext Bug: path is relative");
305 }
306
307 try {
308 // Is it the default FS for this FileContext?
309 defaultFS.checkPath(absOrFqPath);
310 return defaultFS;
311 } catch (Exception e) { // it is different FileSystem
312 return getAbstractFileSystem(ugi, absOrFqPath.toUri(), conf);
313 }
314 }
315
316 private static AbstractFileSystem getAbstractFileSystem(
317 UserGroupInformation user, final URI uri, final Configuration conf)
318 throws UnsupportedFileSystemException, IOException {
319 try {
320 return user.doAs(new PrivilegedExceptionAction<AbstractFileSystem>() {
321 public AbstractFileSystem run() throws UnsupportedFileSystemException {
322 return AbstractFileSystem.get(uri, conf);
323 }
324 });
325 } catch (InterruptedException ex) {
326 LOG.error(ex);
327 throw new IOException("Failed to get the AbstractFileSystem for path: "
328 + uri, ex);
329 }
330 }
331
332 /**
333 * Protected Static Factory methods for getting a FileContexts
334 * that take a AbstractFileSystem as input. To be used for testing.
335 */
336
337 /**
338 * Create a FileContext with specified FS as default using the specified
339 * config.
340 *
341 * @param defFS
342 * @param aConf
343 * @return new FileContext with specifed FS as default.
344 */
345 public static FileContext getFileContext(final AbstractFileSystem defFS,
346 final Configuration aConf) {
347 return new FileContext(defFS, FsPermission.getUMask(aConf), aConf);
348 }
349
350 /**
351 * Create a FileContext for specified file system using the default config.
352 *
353 * @param defaultFS
354 * @return a FileContext with the specified AbstractFileSystem
355 * as the default FS.
356 */
357 protected static FileContext getFileContext(
358 final AbstractFileSystem defaultFS) {
359 return getFileContext(defaultFS, new Configuration());
360 }
361
362 /**
363 * Static Factory methods for getting a FileContext.
364 * Note new file contexts are created for each call.
365 * The only singleton is the local FS context using the default config.
366 *
367 * Methods that use the default config: the default config read from the
368 * $HADOOP_CONFIG/core.xml,
369 * Unspecified key-values for config are defaulted from core-defaults.xml
370 * in the release jar.
371 *
372 * The keys relevant to the FileContext layer are extracted at time of
373 * construction. Changes to the config after the call are ignore
374 * by the FileContext layer.
375 * The conf is passed to lower layers like AbstractFileSystem and HDFS which
376 * pick up their own config variables.
377 */
378
379 /**
380 * Create a FileContext using the default config read from the
381 * $HADOOP_CONFIG/core.xml, Unspecified key-values for config are defaulted
382 * from core-defaults.xml in the release jar.
383 *
384 * @throws UnsupportedFileSystemException If the file system from the default
385 * configuration is not supported
386 */
387 public static FileContext getFileContext()
388 throws UnsupportedFileSystemException {
389 return getFileContext(new Configuration());
390 }
391
392 /**
393 * @return a FileContext for the local file system using the default config.
394 * @throws UnsupportedFileSystemException If the file system for
395 * {@link FsConstants#LOCAL_FS_URI} is not supported.
396 */
397 public static FileContext getLocalFSFileContext()
398 throws UnsupportedFileSystemException {
399 return getFileContext(FsConstants.LOCAL_FS_URI);
400 }
401
402 /**
403 * Create a FileContext for specified URI using the default config.
404 *
405 * @param defaultFsUri
406 * @return a FileContext with the specified URI as the default FS.
407 *
408 * @throws UnsupportedFileSystemException If the file system for
409 * <code>defaultFsUri</code> is not supported
410 */
411 public static FileContext getFileContext(final URI defaultFsUri)
412 throws UnsupportedFileSystemException {
413 return getFileContext(defaultFsUri, new Configuration());
414 }
415
416 /**
417 * Create a FileContext for specified default URI using the specified config.
418 *
419 * @param defaultFsUri
420 * @param aConf
421 * @return new FileContext for specified uri
422 * @throws UnsupportedFileSystemException If the file system with specified is
423 * not supported
424 * @throws RuntimeException If the file system specified is supported but
425 * could not be instantiated, or if login fails.
426 */
427 public static FileContext getFileContext(final URI defaultFsUri,
428 final Configuration aConf) throws UnsupportedFileSystemException {
429 UserGroupInformation currentUser = null;
430 AbstractFileSystem defaultAfs = null;
431 try {
432 currentUser = UserGroupInformation.getCurrentUser();
433 defaultAfs = getAbstractFileSystem(currentUser, defaultFsUri, aConf);
434 } catch (UnsupportedFileSystemException ex) {
435 throw ex;
436 } catch (IOException ex) {
437 LOG.error(ex);
438 throw new RuntimeException(ex);
439 }
440 return getFileContext(defaultAfs, aConf);
441 }
442
443 /**
444 * Create a FileContext using the passed config. Generally it is better to use
445 * {@link #getFileContext(URI, Configuration)} instead of this one.
446 *
447 *
448 * @param aConf
449 * @return new FileContext
450 * @throws UnsupportedFileSystemException If file system in the config
451 * is not supported
452 */
453 public static FileContext getFileContext(final Configuration aConf)
454 throws UnsupportedFileSystemException {
455 return getFileContext(
456 URI.create(aConf.get(FS_DEFAULT_NAME_KEY, FS_DEFAULT_NAME_DEFAULT)),
457 aConf);
458 }
459
460 /**
461 * @param aConf - from which the FileContext is configured
462 * @return a FileContext for the local file system using the specified config.
463 *
464 * @throws UnsupportedFileSystemException If default file system in the config
465 * is not supported
466 *
467 */
468 public static FileContext getLocalFSFileContext(final Configuration aConf)
469 throws UnsupportedFileSystemException {
470 return getFileContext(FsConstants.LOCAL_FS_URI, aConf);
471 }
472
473 /* This method is needed for tests. */
474 @InterfaceAudience.Private
475 @InterfaceStability.Unstable /* return type will change to AFS once
476 HADOOP-6223 is completed */
477 public AbstractFileSystem getDefaultFileSystem() {
478 return defaultFS;
479 }
480
481 /**
482 * Set the working directory for wd-relative names (such a "foo/bar"). Working
483 * directory feature is provided by simply prefixing relative names with the
484 * working dir. Note this is different from Unix where the wd is actually set
485 * to the inode. Hence setWorkingDir does not follow symlinks etc. This works
486 * better in a distributed environment that has multiple independent roots.
487 * {@link #getWorkingDirectory()} should return what setWorkingDir() set.
488 *
489 * @param newWDir new working directory
490 * @throws IOException
491 * <br>
492 * NewWdir can be one of:
493 * <ul>
494 * <li>relative path: "foo/bar";</li>
495 * <li>absolute without scheme: "/foo/bar"</li>
496 * <li>fully qualified with scheme: "xx://auth/foo/bar"</li>
497 * </ul>
498 * <br>
499 * Illegal WDs:
500 * <ul>
501 * <li>relative with scheme: "xx:foo/bar"</li>
502 * <li>non existent directory</li>
503 * </ul>
504 */
505 public void setWorkingDirectory(final Path newWDir) throws IOException {
506 checkNotSchemeWithRelative(newWDir);
507 /* wd is stored as a fully qualified path. We check if the given
508 * path is not relative first since resolve requires and returns
509 * an absolute path.
510 */
511 final Path newWorkingDir = new Path(workingDir, newWDir);
512 FileStatus status = getFileStatus(newWorkingDir);
513 if (status.isFile()) {
514 throw new FileNotFoundException("Cannot setWD to a file");
515 }
516 workingDir = newWorkingDir;
517 }
518
519 /**
520 * Gets the working directory for wd-relative names (such a "foo/bar").
521 */
522 public Path getWorkingDirectory() {
523 return workingDir;
524 }
525
526 /**
527 * Gets the ugi in the file-context
528 * @return UserGroupInformation
529 */
530 public UserGroupInformation getUgi() {
531 return ugi;
532 }
533
534 /**
535 * Return the current user's home directory in this file system.
536 * The default implementation returns "/user/$USER/".
537 * @return the home directory
538 */
539 public Path getHomeDirectory() {
540 return defaultFS.getHomeDirectory();
541 }
542
543 /**
544 *
545 * @return the umask of this FileContext
546 */
547 public FsPermission getUMask() {
548 return umask;
549 }
550
551 /**
552 * Set umask to the supplied parameter.
553 * @param newUmask the new umask
554 */
555 public void setUMask(final FsPermission newUmask) {
556 umask = newUmask;
557 }
558
559
560 /**
561 * Resolve the path following any symlinks or mount points
562 * @param f to be resolved
563 * @return fully qualified resolved path
564 *
565 * @throws FileNotFoundException If <code>f</code> does not exist
566 * @throws AccessControlException if access denied
567 * @throws IOException If an IO Error occurred
568 *
569 * Exceptions applicable to file systems accessed over RPC:
570 * @throws RpcClientException If an exception occurred in the RPC client
571 * @throws RpcServerException If an exception occurred in the RPC server
572 * @throws UnexpectedServerException If server implementation throws
573 * undeclared exception to RPC server
574 *
575 * RuntimeExceptions:
576 * @throws InvalidPathException If path <code>f</code> is not valid
577 */
578 public Path resolvePath(final Path f) throws FileNotFoundException,
579 UnresolvedLinkException, AccessControlException, IOException {
580 return resolve(f);
581 }
582
583 /**
584 * Make the path fully qualified if it is isn't.
585 * A Fully-qualified path has scheme and authority specified and an absolute
586 * path.
587 * Use the default file system and working dir in this FileContext to qualify.
588 * @param path
589 * @return qualified path
590 */
591 public Path makeQualified(final Path path) {
592 return path.makeQualified(defaultFS.getUri(), getWorkingDirectory());
593 }
594
595 /**
596 * Create or overwrite file on indicated path and returns an output stream for
597 * writing into the file.
598 *
599 * @param f the file name to open
600 * @param createFlag gives the semantics of create; see {@link CreateFlag}
601 * @param opts file creation options; see {@link Options.CreateOpts}.
602 * <ul>
603 * <li>Progress - to report progress on the operation - default null
604 * <li>Permission - umask is applied against permisssion: default is
605 * FsPermissions:getDefault()
606 *
607 * <li>CreateParent - create missing parent path; default is to not
608 * to create parents
609 * <li>The defaults for the following are SS defaults of the file
610 * server implementing the target path. Not all parameters make sense
611 * for all kinds of file system - eg. localFS ignores Blocksize,
612 * replication, checksum
613 * <ul>
614 * <li>BufferSize - buffersize used in FSDataOutputStream
615 * <li>Blocksize - block size for file blocks
616 * <li>ReplicationFactor - replication for blocks
617 * <li>ChecksumParam - Checksum parameters. server default is used
618 * if not specified.
619 * </ul>
620 * </ul>
621 *
622 * @return {@link FSDataOutputStream} for created file
623 *
624 * @throws AccessControlException If access is denied
625 * @throws FileAlreadyExistsException If file <code>f</code> already exists
626 * @throws FileNotFoundException If parent of <code>f</code> does not exist
627 * and <code>createParent</code> is false
628 * @throws ParentNotDirectoryException If parent of <code>f</code> is not a
629 * directory.
630 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
631 * not supported
632 * @throws IOException If an I/O error occurred
633 *
634 * Exceptions applicable to file systems accessed over RPC:
635 * @throws RpcClientException If an exception occurred in the RPC client
636 * @throws RpcServerException If an exception occurred in the RPC server
637 * @throws UnexpectedServerException If server implementation throws
638 * undeclared exception to RPC server
639 *
640 * RuntimeExceptions:
641 * @throws InvalidPathException If path <code>f</code> is not valid
642 */
643 public FSDataOutputStream create(final Path f,
644 final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts)
645 throws AccessControlException, FileAlreadyExistsException,
646 FileNotFoundException, ParentNotDirectoryException,
647 UnsupportedFileSystemException, IOException {
648 Path absF = fixRelativePart(f);
649
650 // If one of the options is a permission, extract it & apply umask
651 // If not, add a default Perms and apply umask;
652 // AbstractFileSystem#create
653
654 CreateOpts.Perms permOpt =
655 (CreateOpts.Perms) CreateOpts.getOpt(CreateOpts.Perms.class, opts);
656 FsPermission permission = (permOpt != null) ? permOpt.getValue() :
657 FsPermission.getDefault();
658 permission = permission.applyUMask(umask);
659
660 final CreateOpts[] updatedOpts =
661 CreateOpts.setOpt(CreateOpts.perms(permission), opts);
662 return new FSLinkResolver<FSDataOutputStream>() {
663 public FSDataOutputStream next(final AbstractFileSystem fs, final Path p)
664 throws IOException {
665 return fs.create(p, createFlag, updatedOpts);
666 }
667 }.resolve(this, absF);
668 }
669
670 /**
671 * Make(create) a directory and all the non-existent parents.
672 *
673 * @param dir - the dir to make
674 * @param permission - permissions is set permission&~umask
675 * @param createParent - if true then missing parent dirs are created if false
676 * then parent must exist
677 *
678 * @throws AccessControlException If access is denied
679 * @throws FileAlreadyExistsException If directory <code>dir</code> already
680 * exists
681 * @throws FileNotFoundException If parent of <code>dir</code> does not exist
682 * and <code>createParent</code> is false
683 * @throws ParentNotDirectoryException If parent of <code>dir</code> is not a
684 * directory
685 * @throws UnsupportedFileSystemException If file system for <code>dir</code>
686 * is not supported
687 * @throws IOException If an I/O error occurred
688 *
689 * Exceptions applicable to file systems accessed over RPC:
690 * @throws RpcClientException If an exception occurred in the RPC client
691 * @throws UnexpectedServerException If server implementation throws
692 * undeclared exception to RPC server
693 *
694 * RuntimeExceptions:
695 * @throws InvalidPathException If path <code>dir</code> is not valid
696 */
697 public void mkdir(final Path dir, final FsPermission permission,
698 final boolean createParent) throws AccessControlException,
699 FileAlreadyExistsException, FileNotFoundException,
700 ParentNotDirectoryException, UnsupportedFileSystemException,
701 IOException {
702 final Path absDir = fixRelativePart(dir);
703 final FsPermission absFerms = (permission == null ?
704 FsPermission.getDefault() : permission).applyUMask(umask);
705 new FSLinkResolver<Void>() {
706 public Void next(final AbstractFileSystem fs, final Path p)
707 throws IOException, UnresolvedLinkException {
708 fs.mkdir(p, absFerms, createParent);
709 return null;
710 }
711 }.resolve(this, absDir);
712 }
713
714 /**
715 * Delete a file.
716 * @param f the path to delete.
717 * @param recursive if path is a directory and set to
718 * true, the directory is deleted else throws an exception. In
719 * case of a file the recursive can be set to either true or false.
720 *
721 * @throws AccessControlException If access is denied
722 * @throws FileNotFoundException If <code>f</code> does not exist
723 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
724 * not supported
725 * @throws IOException If an I/O error occurred
726 *
727 * Exceptions applicable to file systems accessed over RPC:
728 * @throws RpcClientException If an exception occurred in the RPC client
729 * @throws RpcServerException If an exception occurred in the RPC server
730 * @throws UnexpectedServerException If server implementation throws
731 * undeclared exception to RPC server
732 *
733 * RuntimeExceptions:
734 * @throws InvalidPathException If path <code>f</code> is invalid
735 */
736 public boolean delete(final Path f, final boolean recursive)
737 throws AccessControlException, FileNotFoundException,
738 UnsupportedFileSystemException, IOException {
739 Path absF = fixRelativePart(f);
740 return new FSLinkResolver<Boolean>() {
741 public Boolean next(final AbstractFileSystem fs, final Path p)
742 throws IOException, UnresolvedLinkException {
743 return Boolean.valueOf(fs.delete(p, recursive));
744 }
745 }.resolve(this, absF);
746 }
747
748 /**
749 * Opens an FSDataInputStream at the indicated Path using
750 * default buffersize.
751 * @param f the file name to open
752 *
753 * @throws AccessControlException If access is denied
754 * @throws FileNotFoundException If file <code>f</code> does not exist
755 * @throws UnsupportedFileSystemException If file system for <code>f</code>
756 * is not supported
757 * @throws IOException If an I/O error occurred
758 *
759 * Exceptions applicable to file systems accessed over RPC:
760 * @throws RpcClientException If an exception occurred in the RPC client
761 * @throws RpcServerException If an exception occurred in the RPC server
762 * @throws UnexpectedServerException If server implementation throws
763 * undeclared exception to RPC server
764 */
765 public FSDataInputStream open(final Path f) throws AccessControlException,
766 FileNotFoundException, UnsupportedFileSystemException, IOException {
767 final Path absF = fixRelativePart(f);
768 return new FSLinkResolver<FSDataInputStream>() {
769 public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
770 throws IOException, UnresolvedLinkException {
771 return fs.open(p);
772 }
773 }.resolve(this, absF);
774 }
775
776 /**
777 * Opens an FSDataInputStream at the indicated Path.
778 *
779 * @param f the file name to open
780 * @param bufferSize the size of the buffer to be used.
781 *
782 * @throws AccessControlException If access is denied
783 * @throws FileNotFoundException If file <code>f</code> does not exist
784 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
785 * not supported
786 * @throws IOException If an I/O error occurred
787 *
788 * Exceptions applicable to file systems accessed over RPC:
789 * @throws RpcClientException If an exception occurred in the RPC client
790 * @throws RpcServerException If an exception occurred in the RPC server
791 * @throws UnexpectedServerException If server implementation throws
792 * undeclared exception to RPC server
793 */
794 public FSDataInputStream open(final Path f, final int bufferSize)
795 throws AccessControlException, FileNotFoundException,
796 UnsupportedFileSystemException, IOException {
797 final Path absF = fixRelativePart(f);
798 return new FSLinkResolver<FSDataInputStream>() {
799 public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
800 throws IOException, UnresolvedLinkException {
801 return fs.open(p, bufferSize);
802 }
803 }.resolve(this, absF);
804 }
805
806 /**
807 * Set replication for an existing file.
808 *
809 * @param f file name
810 * @param replication new replication
811 *
812 * @return true if successful
813 *
814 * @throws AccessControlException If access is denied
815 * @throws FileNotFoundException If file <code>f</code> does not exist
816 * @throws IOException If an I/O error occurred
817 *
818 * Exceptions applicable to file systems accessed over RPC:
819 * @throws RpcClientException If an exception occurred in the RPC client
820 * @throws RpcServerException If an exception occurred in the RPC server
821 * @throws UnexpectedServerException If server implementation throws
822 * undeclared exception to RPC server
823 */
824 public boolean setReplication(final Path f, final short replication)
825 throws AccessControlException, FileNotFoundException,
826 IOException {
827 final Path absF = fixRelativePart(f);
828 return new FSLinkResolver<Boolean>() {
829 public Boolean next(final AbstractFileSystem fs, final Path p)
830 throws IOException, UnresolvedLinkException {
831 return Boolean.valueOf(fs.setReplication(p, replication));
832 }
833 }.resolve(this, absF);
834 }
835
836 /**
837 * Renames Path src to Path dst
838 * <ul>
839 * <li
840 * <li>Fails if src is a file and dst is a directory.
841 * <li>Fails if src is a directory and dst is a file.
842 * <li>Fails if the parent of dst does not exist or is a file.
843 * </ul>
844 * <p>
845 * If OVERWRITE option is not passed as an argument, rename fails if the dst
846 * already exists.
847 * <p>
848 * If OVERWRITE option is passed as an argument, rename overwrites the dst if
849 * it is a file or an empty directory. Rename fails if dst is a non-empty
850 * directory.
851 * <p>
852 * Note that atomicity of rename is dependent on the file system
853 * implementation. Please refer to the file system documentation for details
854 * <p>
855 *
856 * @param src path to be renamed
857 * @param dst new path after rename
858 *
859 * @throws AccessControlException If access is denied
860 * @throws FileAlreadyExistsException If <code>dst</code> already exists and
861 * <code>options</options> has {@link Options.Rename#OVERWRITE}
862 * option false.
863 * @throws FileNotFoundException If <code>src</code> does not exist
864 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not a
865 * directory
866 * @throws UnsupportedFileSystemException If file system for <code>src</code>
867 * and <code>dst</code> is not supported
868 * @throws IOException If an I/O error occurred
869 *
870 * Exceptions applicable to file systems accessed over RPC:
871 * @throws RpcClientException If an exception occurred in the RPC client
872 * @throws RpcServerException If an exception occurred in the RPC server
873 * @throws UnexpectedServerException If server implementation throws
874 * undeclared exception to RPC server
875 */
876 public void rename(final Path src, final Path dst,
877 final Options.Rename... options) throws AccessControlException,
878 FileAlreadyExistsException, FileNotFoundException,
879 ParentNotDirectoryException, UnsupportedFileSystemException,
880 IOException {
881 final Path absSrc = fixRelativePart(src);
882 final Path absDst = fixRelativePart(dst);
883 AbstractFileSystem srcFS = getFSofPath(absSrc);
884 AbstractFileSystem dstFS = getFSofPath(absDst);
885 if(!srcFS.getUri().equals(dstFS.getUri())) {
886 throw new IOException("Renames across AbstractFileSystems not supported");
887 }
888 try {
889 srcFS.rename(absSrc, absDst, options);
890 } catch (UnresolvedLinkException e) {
891 /* We do not know whether the source or the destination path
892 * was unresolved. Resolve the source path up until the final
893 * path component, then fully resolve the destination.
894 */
895 final Path source = resolveIntermediate(absSrc);
896 new FSLinkResolver<Void>() {
897 public Void next(final AbstractFileSystem fs, final Path p)
898 throws IOException, UnresolvedLinkException {
899 fs.rename(source, p, options);
900 return null;
901 }
902 }.resolve(this, absDst);
903 }
904 }
905
906 /**
907 * Set permission of a path.
908 * @param f
909 * @param permission - the new absolute permission (umask is not applied)
910 *
911 * @throws AccessControlException If access is denied
912 * @throws FileNotFoundException If <code>f</code> does not exist
913 * @throws UnsupportedFileSystemException If file system for <code>f</code>
914 * is not supported
915 * @throws IOException If an I/O error occurred
916 *
917 * Exceptions applicable to file systems accessed over RPC:
918 * @throws RpcClientException If an exception occurred in the RPC client
919 * @throws RpcServerException If an exception occurred in the RPC server
920 * @throws UnexpectedServerException If server implementation throws
921 * undeclared exception to RPC server
922 */
923 public void setPermission(final Path f, final FsPermission permission)
924 throws AccessControlException, FileNotFoundException,
925 UnsupportedFileSystemException, IOException {
926 final Path absF = fixRelativePart(f);
927 new FSLinkResolver<Void>() {
928 public Void next(final AbstractFileSystem fs, final Path p)
929 throws IOException, UnresolvedLinkException {
930 fs.setPermission(p, permission);
931 return null;
932 }
933 }.resolve(this, absF);
934 }
935
936 /**
937 * Set owner of a path (i.e. a file or a directory). The parameters username
938 * and groupname cannot both be null.
939 *
940 * @param f The path
941 * @param username If it is null, the original username remains unchanged.
942 * @param groupname If it is null, the original groupname remains unchanged.
943 *
944 * @throws AccessControlException If access is denied
945 * @throws FileNotFoundException If <code>f</code> does not exist
946 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
947 * not supported
948 * @throws IOException If an I/O error occurred
949 *
950 * Exceptions applicable to file systems accessed over RPC:
951 * @throws RpcClientException If an exception occurred in the RPC client
952 * @throws RpcServerException If an exception occurred in the RPC server
953 * @throws UnexpectedServerException If server implementation throws
954 * undeclared exception to RPC server
955 *
956 * RuntimeExceptions:
957 * @throws HadoopIllegalArgumentException If <code>username</code> or
958 * <code>groupname</code> is invalid.
959 */
960 public void setOwner(final Path f, final String username,
961 final String groupname) throws AccessControlException,
962 UnsupportedFileSystemException, FileNotFoundException,
963 IOException {
964 if ((username == null) && (groupname == null)) {
965 throw new HadoopIllegalArgumentException(
966 "username and groupname cannot both be null");
967 }
968 final Path absF = fixRelativePart(f);
969 new FSLinkResolver<Void>() {
970 public Void next(final AbstractFileSystem fs, final Path p)
971 throws IOException, UnresolvedLinkException {
972 fs.setOwner(p, username, groupname);
973 return null;
974 }
975 }.resolve(this, absF);
976 }
977
978 /**
979 * Set access time of a file.
980 * @param f The path
981 * @param mtime Set the modification time of this file.
982 * The number of milliseconds since epoch (Jan 1, 1970).
983 * A value of -1 means that this call should not set modification time.
984 * @param atime Set the access time of this file.
985 * The number of milliseconds since Jan 1, 1970.
986 * A value of -1 means that this call should not set access time.
987 *
988 * @throws AccessControlException If access is denied
989 * @throws FileNotFoundException If <code>f</code> does not exist
990 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
991 * not supported
992 * @throws IOException If an I/O error occurred
993 *
994 * Exceptions applicable to file systems accessed over RPC:
995 * @throws RpcClientException If an exception occurred in the RPC client
996 * @throws RpcServerException If an exception occurred in the RPC server
997 * @throws UnexpectedServerException If server implementation throws
998 * undeclared exception to RPC server
999 */
1000 public void setTimes(final Path f, final long mtime, final long atime)
1001 throws AccessControlException, FileNotFoundException,
1002 UnsupportedFileSystemException, IOException {
1003 final Path absF = fixRelativePart(f);
1004 new FSLinkResolver<Void>() {
1005 public Void next(final AbstractFileSystem fs, final Path p)
1006 throws IOException, UnresolvedLinkException {
1007 fs.setTimes(p, mtime, atime);
1008 return null;
1009 }
1010 }.resolve(this, absF);
1011 }
1012
1013 /**
1014 * Get the checksum of a file.
1015 *
1016 * @param f file path
1017 *
1018 * @return The file checksum. The default return value is null,
1019 * which indicates that no checksum algorithm is implemented
1020 * in the corresponding FileSystem.
1021 *
1022 * @throws AccessControlException If access is denied
1023 * @throws FileNotFoundException If <code>f</code> does not exist
1024 * @throws IOException If an I/O error occurred
1025 *
1026 * Exceptions applicable to file systems accessed over RPC:
1027 * @throws RpcClientException If an exception occurred in the RPC client
1028 * @throws RpcServerException If an exception occurred in the RPC server
1029 * @throws UnexpectedServerException If server implementation throws
1030 * undeclared exception to RPC server
1031 */
1032 public FileChecksum getFileChecksum(final Path f)
1033 throws AccessControlException, FileNotFoundException,
1034 IOException {
1035 final Path absF = fixRelativePart(f);
1036 return new FSLinkResolver<FileChecksum>() {
1037 public FileChecksum next(final AbstractFileSystem fs, final Path p)
1038 throws IOException, UnresolvedLinkException {
1039 return fs.getFileChecksum(p);
1040 }
1041 }.resolve(this, absF);
1042 }
1043
1044 /**
1045 * Set the verify checksum flag for the file system denoted by the path.
1046 * This is only applicable if the
1047 * corresponding FileSystem supports checksum. By default doesn't do anything.
1048 * @param verifyChecksum
1049 * @param f set the verifyChecksum for the Filesystem containing this path
1050 *
1051 * @throws AccessControlException If access is denied
1052 * @throws FileNotFoundException If <code>f</code> does not exist
1053 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1054 * not supported
1055 * @throws IOException If an I/O error occurred
1056 *
1057 * Exceptions applicable to file systems accessed over RPC:
1058 * @throws RpcClientException If an exception occurred in the RPC client
1059 * @throws RpcServerException If an exception occurred in the RPC server
1060 * @throws UnexpectedServerException If server implementation throws
1061 * undeclared exception to RPC server
1062 */
1063 public void setVerifyChecksum(final boolean verifyChecksum, final Path f)
1064 throws AccessControlException, FileNotFoundException,
1065 UnsupportedFileSystemException, IOException {
1066 final Path absF = resolve(fixRelativePart(f));
1067 getFSofPath(absF).setVerifyChecksum(verifyChecksum);
1068 }
1069
1070 /**
1071 * Return a file status object that represents the path.
1072 * @param f The path we want information from
1073 *
1074 * @return a FileStatus object
1075 *
1076 * @throws AccessControlException If access is denied
1077 * @throws FileNotFoundException If <code>f</code> does not exist
1078 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1079 * not supported
1080 * @throws IOException If an I/O error occurred
1081 *
1082 * Exceptions applicable to file systems accessed over RPC:
1083 * @throws RpcClientException If an exception occurred in the RPC client
1084 * @throws RpcServerException If an exception occurred in the RPC server
1085 * @throws UnexpectedServerException If server implementation throws
1086 * undeclared exception to RPC server
1087 */
1088 public FileStatus getFileStatus(final Path f) throws AccessControlException,
1089 FileNotFoundException, UnsupportedFileSystemException, IOException {
1090 final Path absF = fixRelativePart(f);
1091 return new FSLinkResolver<FileStatus>() {
1092 public FileStatus next(final AbstractFileSystem fs, final Path p)
1093 throws IOException, UnresolvedLinkException {
1094 return fs.getFileStatus(p);
1095 }
1096 }.resolve(this, absF);
1097 }
1098
1099 /**
1100 * Return a fully qualified version of the given symlink target if it
1101 * has no scheme and authority. Partially and fully qualified paths
1102 * are returned unmodified.
1103 * @param pathFS The AbstractFileSystem of the path
1104 * @param pathWithLink Path that contains the symlink
1105 * @param target The symlink's absolute target
1106 * @return Fully qualified version of the target.
1107 */
1108 private Path qualifySymlinkTarget(final AbstractFileSystem pathFS,
1109 Path pathWithLink, Path target) {
1110 // NB: makeQualified uses the target's scheme and authority, if
1111 // specified, and the scheme and authority of pathFS, if not.
1112 final String scheme = target.toUri().getScheme();
1113 final String auth = target.toUri().getAuthority();
1114 return (scheme == null && auth == null)
1115 ? target.makeQualified(pathFS.getUri(), pathWithLink.getParent())
1116 : target;
1117 }
1118
1119 /**
1120 * Return a file status object that represents the path. If the path
1121 * refers to a symlink then the FileStatus of the symlink is returned.
1122 * The behavior is equivalent to #getFileStatus() if the underlying
1123 * file system does not support symbolic links.
1124 * @param f The path we want information from.
1125 * @return A FileStatus object
1126 *
1127 * @throws AccessControlException If access is denied
1128 * @throws FileNotFoundException If <code>f</code> does not exist
1129 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1130 * not supported
1131 * @throws IOException If an I/O error occurred
1132 */
1133 public FileStatus getFileLinkStatus(final Path f)
1134 throws AccessControlException, FileNotFoundException,
1135 UnsupportedFileSystemException, IOException {
1136 final Path absF = fixRelativePart(f);
1137 return new FSLinkResolver<FileStatus>() {
1138 public FileStatus next(final AbstractFileSystem fs, final Path p)
1139 throws IOException, UnresolvedLinkException {
1140 FileStatus fi = fs.getFileLinkStatus(p);
1141 if (fi.isSymlink()) {
1142 fi.setSymlink(qualifySymlinkTarget(fs, p, fi.getSymlink()));
1143 }
1144 return fi;
1145 }
1146 }.resolve(this, absF);
1147 }
1148
1149 /**
1150 * Returns the target of the given symbolic link as it was specified
1151 * when the link was created. Links in the path leading up to the
1152 * final path component are resolved transparently.
1153 *
1154 * @param f the path to return the target of
1155 * @return The un-interpreted target of the symbolic link.
1156 *
1157 * @throws AccessControlException If access is denied
1158 * @throws FileNotFoundException If path <code>f</code> does not exist
1159 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1160 * not supported
1161 * @throws IOException If the given path does not refer to a symlink
1162 * or an I/O error occurred
1163 */
1164 public Path getLinkTarget(final Path f) throws AccessControlException,
1165 FileNotFoundException, UnsupportedFileSystemException, IOException {
1166 final Path absF = fixRelativePart(f);
1167 return new FSLinkResolver<Path>() {
1168 public Path next(final AbstractFileSystem fs, final Path p)
1169 throws IOException, UnresolvedLinkException {
1170 FileStatus fi = fs.getFileLinkStatus(p);
1171 return fi.getSymlink();
1172 }
1173 }.resolve(this, absF);
1174 }
1175
1176 /**
1177 * Return blockLocation of the given file for the given offset and len.
1178 * For a nonexistent file or regions, null will be returned.
1179 *
1180 * This call is most helpful with DFS, where it returns
1181 * hostnames of machines that contain the given file.
1182 *
1183 * @param f - get blocklocations of this file
1184 * @param start position (byte offset)
1185 * @param len (in bytes)
1186 *
1187 * @return block locations for given file at specified offset of len
1188 *
1189 * @throws AccessControlException If access is denied
1190 * @throws FileNotFoundException If <code>f</code> does not exist
1191 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1192 * not supported
1193 * @throws IOException If an I/O error occurred
1194 *
1195 * Exceptions applicable to file systems accessed over RPC:
1196 * @throws RpcClientException If an exception occurred in the RPC client
1197 * @throws RpcServerException If an exception occurred in the RPC server
1198 * @throws UnexpectedServerException If server implementation throws
1199 * undeclared exception to RPC server
1200 *
1201 * RuntimeExceptions:
1202 * @throws InvalidPathException If path <code>f</code> is invalid
1203 */
1204 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
1205 @InterfaceStability.Evolving
1206 public BlockLocation[] getFileBlockLocations(final Path f, final long start,
1207 final long len) throws AccessControlException, FileNotFoundException,
1208 UnsupportedFileSystemException, IOException {
1209 final Path absF = fixRelativePart(f);
1210 return new FSLinkResolver<BlockLocation[]>() {
1211 public BlockLocation[] next(final AbstractFileSystem fs, final Path p)
1212 throws IOException, UnresolvedLinkException {
1213 return fs.getFileBlockLocations(p, start, len);
1214 }
1215 }.resolve(this, absF);
1216 }
1217
1218 /**
1219 * Returns a status object describing the use and capacity of the
1220 * file system denoted by the Parh argument p.
1221 * If the file system has multiple partitions, the
1222 * use and capacity of the partition pointed to by the specified
1223 * path is reflected.
1224 *
1225 * @param f Path for which status should be obtained. null means the
1226 * root partition of the default file system.
1227 *
1228 * @return a FsStatus object
1229 *
1230 * @throws AccessControlException If access is denied
1231 * @throws FileNotFoundException If <code>f</code> does not exist
1232 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1233 * not supported
1234 * @throws IOException If an I/O error occurred
1235 *
1236 * Exceptions applicable to file systems accessed over RPC:
1237 * @throws RpcClientException If an exception occurred in the RPC client
1238 * @throws RpcServerException If an exception occurred in the RPC server
1239 * @throws UnexpectedServerException If server implementation throws
1240 * undeclared exception to RPC server
1241 */
1242 public FsStatus getFsStatus(final Path f) throws AccessControlException,
1243 FileNotFoundException, UnsupportedFileSystemException, IOException {
1244 if (f == null) {
1245 return defaultFS.getFsStatus();
1246 }
1247 final Path absF = fixRelativePart(f);
1248 return new FSLinkResolver<FsStatus>() {
1249 public FsStatus next(final AbstractFileSystem fs, final Path p)
1250 throws IOException, UnresolvedLinkException {
1251 return fs.getFsStatus(p);
1252 }
1253 }.resolve(this, absF);
1254 }
1255
1256 /**
1257 * Creates a symbolic link to an existing file. An exception is thrown if
1258 * the symlink exits, the user does not have permission to create symlink,
1259 * or the underlying file system does not support symlinks.
1260 *
1261 * Symlink permissions are ignored, access to a symlink is determined by
1262 * the permissions of the symlink target.
1263 *
1264 * Symlinks in paths leading up to the final path component are resolved
1265 * transparently. If the final path component refers to a symlink some
1266 * functions operate on the symlink itself, these are:
1267 * - delete(f) and deleteOnExit(f) - Deletes the symlink.
1268 * - rename(src, dst) - If src refers to a symlink, the symlink is
1269 * renamed. If dst refers to a symlink, the symlink is over-written.
1270 * - getLinkTarget(f) - Returns the target of the symlink.
1271 * - getFileLinkStatus(f) - Returns a FileStatus object describing
1272 * the symlink.
1273 * Some functions, create() and mkdir(), expect the final path component
1274 * does not exist. If they are given a path that refers to a symlink that
1275 * does exist they behave as if the path referred to an existing file or
1276 * directory. All other functions fully resolve, ie follow, the symlink.
1277 * These are: open, setReplication, setOwner, setTimes, setWorkingDirectory,
1278 * setPermission, getFileChecksum, setVerifyChecksum, getFileBlockLocations,
1279 * getFsStatus, getFileStatus, exists, and listStatus.
1280 *
1281 * Symlink targets are stored as given to createSymlink, assuming the
1282 * underlying file system is capable of storing a fully qualified URI.
1283 * Dangling symlinks are permitted. FileContext supports four types of
1284 * symlink targets, and resolves them as follows
1285 * <pre>
1286 * Given a path referring to a symlink of form:
1287 *
1288 * <---X--->
1289 * fs://host/A/B/link
1290 * <-----Y----->
1291 *
1292 * In this path X is the scheme and authority that identify the file system,
1293 * and Y is the path leading up to the final path component "link". If Y is
1294 * a symlink itself then let Y' be the target of Y and X' be the scheme and
1295 * authority of Y'. Symlink targets may:
1296 *
1297 * 1. Fully qualified URIs
1298 *
1299 * fs://hostX/A/B/file Resolved according to the target file system.
1300 *
1301 * 2. Partially qualified URIs (eg scheme but no host)
1302 *
1303 * fs:///A/B/file Resolved according to the target file sytem. Eg resolving
1304 * a symlink to hdfs:///A results in an exception because
1305 * HDFS URIs must be fully qualified, while a symlink to
1306 * file:///A will not since Hadoop's local file systems
1307 * require partially qualified URIs.
1308 *
1309 * 3. Relative paths
1310 *
1311 * path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
1312 * is "../B/file" then [Y'][path] is hdfs://host/B/file
1313 *
1314 * 4. Absolute paths
1315 *
1316 * path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
1317 * is "/file" then [X][path] is hdfs://host/file
1318 * </pre>
1319 *
1320 * @param target the target of the symbolic link
1321 * @param link the path to be created that points to target
1322 * @param createParent if true then missing parent dirs are created if
1323 * false then parent must exist
1324 *
1325 *
1326 * @throws AccessControlException If access is denied
1327 * @throws FileAlreadyExistsException If file <code>linkcode> already exists
1328 * @throws FileNotFoundException If <code>target</code> does not exist
1329 * @throws ParentNotDirectoryException If parent of <code>link</code> is not a
1330 * directory.
1331 * @throws UnsupportedFileSystemException If file system for
1332 * <code>target</code> or <code>link</code> is not supported
1333 * @throws IOException If an I/O error occurred
1334 */
1335 public void createSymlink(final Path target, final Path link,
1336 final boolean createParent) throws AccessControlException,
1337 FileAlreadyExistsException, FileNotFoundException,
1338 ParentNotDirectoryException, UnsupportedFileSystemException,
1339 IOException {
1340 final Path nonRelLink = fixRelativePart(link);
1341 new FSLinkResolver<Void>() {
1342 public Void next(final AbstractFileSystem fs, final Path p)
1343 throws IOException, UnresolvedLinkException {
1344 fs.createSymlink(target, p, createParent);
1345 return null;
1346 }
1347 }.resolve(this, nonRelLink);
1348 }
1349
1350 /**
1351 * List the statuses of the files/directories in the given path if the path is
1352 * a directory.
1353 *
1354 * @param f is the path
1355 *
1356 * @return an iterator that traverses statuses of the files/directories
1357 * in the given path
1358 *
1359 * @throws AccessControlException If access is denied
1360 * @throws FileNotFoundException If <code>f</code> does not exist
1361 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1362 * not supported
1363 * @throws IOException If an I/O error occurred
1364 *
1365 * Exceptions applicable to file systems accessed over RPC:
1366 * @throws RpcClientException If an exception occurred in the RPC client
1367 * @throws RpcServerException If an exception occurred in the RPC server
1368 * @throws UnexpectedServerException If server implementation throws
1369 * undeclared exception to RPC server
1370 */
1371 public RemoteIterator<FileStatus> listStatus(final Path f) throws
1372 AccessControlException, FileNotFoundException,
1373 UnsupportedFileSystemException, IOException {
1374 final Path absF = fixRelativePart(f);
1375 return new FSLinkResolver<RemoteIterator<FileStatus>>() {
1376 public RemoteIterator<FileStatus> next(
1377 final AbstractFileSystem fs, final Path p)
1378 throws IOException, UnresolvedLinkException {
1379 return fs.listStatusIterator(p);
1380 }
1381 }.resolve(this, absF);
1382 }
1383
1384 /**
1385 * @return an iterator over the corrupt files under the given path
1386 * (may contain duplicates if a file has more than one corrupt block)
1387 * @throws IOException
1388 */
1389 public RemoteIterator<Path> listCorruptFileBlocks(Path path)
1390 throws IOException {
1391 final Path absF = fixRelativePart(path);
1392 return new FSLinkResolver<RemoteIterator<Path>>() {
1393 @Override
1394 public RemoteIterator<Path> next(final AbstractFileSystem fs,
1395 final Path p)
1396 throws IOException, UnresolvedLinkException {
1397 return fs.listCorruptFileBlocks(p);
1398 }
1399 }.resolve(this, absF);
1400 }
1401
1402 /**
1403 * List the statuses of the files/directories in the given path if the path is
1404 * a directory.
1405 * Return the file's status and block locations If the path is a file.
1406 *
1407 * If a returned status is a file, it contains the file's block locations.
1408 *
1409 * @param f is the path
1410 *
1411 * @return an iterator that traverses statuses of the files/directories
1412 * in the given path
1413 * If any IO exception (for example the input directory gets deleted while
1414 * listing is being executed), next() or hasNext() of the returned iterator
1415 * may throw a RuntimeException with the io exception as the cause.
1416 *
1417 * @throws AccessControlException If access is denied
1418 * @throws FileNotFoundException If <code>f</code> does not exist
1419 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1420 * not supported
1421 * @throws IOException If an I/O error occurred
1422 *
1423 * Exceptions applicable to file systems accessed over RPC:
1424 * @throws RpcClientException If an exception occurred in the RPC client
1425 * @throws RpcServerException If an exception occurred in the RPC server
1426 * @throws UnexpectedServerException If server implementation throws
1427 * undeclared exception to RPC server
1428 */
1429 public RemoteIterator<LocatedFileStatus> listLocatedStatus(
1430 final Path f) throws
1431 AccessControlException, FileNotFoundException,
1432 UnsupportedFileSystemException, IOException {
1433 final Path absF = fixRelativePart(f);
1434 return new FSLinkResolver<RemoteIterator<LocatedFileStatus>>() {
1435 public RemoteIterator<LocatedFileStatus> next(
1436 final AbstractFileSystem fs, final Path p)
1437 throws IOException, UnresolvedLinkException {
1438 return fs.listLocatedStatus(p);
1439 }
1440 }.resolve(this, absF);
1441 }
1442
1443 /**
1444 * Mark a path to be deleted on JVM shutdown.
1445 *
1446 * @param f the existing path to delete.
1447 *
1448 * @return true if deleteOnExit is successful, otherwise false.
1449 *
1450 * @throws AccessControlException If access is denied
1451 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1452 * not supported
1453 * @throws IOException If an I/O error occurred
1454 *
1455 * Exceptions applicable to file systems accessed over RPC:
1456 * @throws RpcClientException If an exception occurred in the RPC client
1457 * @throws RpcServerException If an exception occurred in the RPC server
1458 * @throws UnexpectedServerException If server implementation throws
1459 * undeclared exception to RPC server
1460 */
1461 public boolean deleteOnExit(Path f) throws AccessControlException,
1462 IOException {
1463 if (!this.util().exists(f)) {
1464 return false;
1465 }
1466 synchronized (DELETE_ON_EXIT) {
1467 if (DELETE_ON_EXIT.isEmpty()) {
1468 ShutdownHookManager.get().addShutdownHook(FINALIZER, SHUTDOWN_HOOK_PRIORITY);
1469 }
1470
1471 Set<Path> set = DELETE_ON_EXIT.get(this);
1472 if (set == null) {
1473 set = new TreeSet<Path>();
1474 DELETE_ON_EXIT.put(this, set);
1475 }
1476 set.add(f);
1477 }
1478 return true;
1479 }
1480
1481 private final Util util;
1482 public Util util() {
1483 return util;
1484 }
1485
1486
1487 /**
1488 * Utility/library methods built over the basic FileContext methods.
1489 * Since this are library functions, the oprtation are not atomic
1490 * and some of them may partially complete if other threads are making
1491 * changes to the same part of the name space.
1492 */
1493 public class Util {
1494 /**
1495 * Does the file exist?
1496 * Note: Avoid using this method if you already have FileStatus in hand.
1497 * Instead reuse the FileStatus
1498 * @param f the file or dir to be checked
1499 *
1500 * @throws AccessControlException If access is denied
1501 * @throws IOException If an I/O error occurred
1502 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1503 * not supported
1504 *
1505 * Exceptions applicable to file systems accessed over RPC:
1506 * @throws RpcClientException If an exception occurred in the RPC client
1507 * @throws RpcServerException If an exception occurred in the RPC server
1508 * @throws UnexpectedServerException If server implementation throws
1509 * undeclared exception to RPC server
1510 */
1511 public boolean exists(final Path f) throws AccessControlException,
1512 UnsupportedFileSystemException, IOException {
1513 try {
1514 FileStatus fs = FileContext.this.getFileStatus(f);
1515 assert fs != null;
1516 return true;
1517 } catch (FileNotFoundException e) {
1518 return false;
1519 }
1520 }
1521
1522 /**
1523 * Return a list of file status objects that corresponds to supplied paths
1524 * excluding those non-existent paths.
1525 *
1526 * @param paths list of paths we want information from
1527 *
1528 * @return a list of FileStatus objects
1529 *
1530 * @throws AccessControlException If access is denied
1531 * @throws IOException If an I/O error occurred
1532 *
1533 * Exceptions applicable to file systems accessed over RPC:
1534 * @throws RpcClientException If an exception occurred in the RPC client
1535 * @throws RpcServerException If an exception occurred in the RPC server
1536 * @throws UnexpectedServerException If server implementation throws
1537 * undeclared exception to RPC server
1538 */
1539 private FileStatus[] getFileStatus(Path[] paths)
1540 throws AccessControlException, IOException {
1541 if (paths == null) {
1542 return null;
1543 }
1544 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length);
1545 for (int i = 0; i < paths.length; i++) {
1546 try {
1547 results.add(FileContext.this.getFileStatus(paths[i]));
1548 } catch (FileNotFoundException fnfe) {
1549 // ignoring
1550 }
1551 }
1552 return results.toArray(new FileStatus[results.size()]);
1553 }
1554
1555
1556 /**
1557 * Return the {@link ContentSummary} of path f.
1558 * @param f path
1559 *
1560 * @return the {@link ContentSummary} of path f.
1561 *
1562 * @throws AccessControlException If access is denied
1563 * @throws FileNotFoundException If <code>f</code> does not exist
1564 * @throws UnsupportedFileSystemException If file system for
1565 * <code>f</code> is not supported
1566 * @throws IOException If an I/O error occurred
1567 *
1568 * Exceptions applicable to file systems accessed over RPC:
1569 * @throws RpcClientException If an exception occurred in the RPC client
1570 * @throws RpcServerException If an exception occurred in the RPC server
1571 * @throws UnexpectedServerException If server implementation throws
1572 * undeclared exception to RPC server
1573 */
1574 public ContentSummary getContentSummary(Path f)
1575 throws AccessControlException, FileNotFoundException,
1576 UnsupportedFileSystemException, IOException {
1577 FileStatus status = FileContext.this.getFileStatus(f);
1578 if (status.isFile()) {
1579 return new ContentSummary(status.getLen(), 1, 0);
1580 }
1581 long[] summary = {0, 0, 1};
1582 RemoteIterator<FileStatus> statusIterator =
1583 FileContext.this.listStatus(f);
1584 while(statusIterator.hasNext()) {
1585 FileStatus s = statusIterator.next();
1586 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) :
1587 new ContentSummary(s.getLen(), 1, 0);
1588 summary[0] += c.getLength();
1589 summary[1] += c.getFileCount();
1590 summary[2] += c.getDirectoryCount();
1591 }
1592 return new ContentSummary(summary[0], summary[1], summary[2]);
1593 }
1594
1595 /**
1596 * See {@link #listStatus(Path[], PathFilter)}
1597 */
1598 public FileStatus[] listStatus(Path[] files) throws AccessControlException,
1599 FileNotFoundException, IOException {
1600 return listStatus(files, DEFAULT_FILTER);
1601 }
1602
1603 /**
1604 * Filter files/directories in the given path using the user-supplied path
1605 * filter.
1606 *
1607 * @param f is the path name
1608 * @param filter is the user-supplied path filter
1609 *
1610 * @return an array of FileStatus objects for the files under the given path
1611 * after applying the filter
1612 *
1613 * @throws AccessControlException If access is denied
1614 * @throws FileNotFoundException If <code>f</code> does not exist
1615 * @throws UnsupportedFileSystemException If file system for
1616 * <code>pathPattern</code> is not supported
1617 * @throws IOException If an I/O error occurred
1618 *
1619 * Exceptions applicable to file systems accessed over RPC:
1620 * @throws RpcClientException If an exception occurred in the RPC client
1621 * @throws RpcServerException If an exception occurred in the RPC server
1622 * @throws UnexpectedServerException If server implementation throws
1623 * undeclared exception to RPC server
1624 */
1625 public FileStatus[] listStatus(Path f, PathFilter filter)
1626 throws AccessControlException, FileNotFoundException,
1627 UnsupportedFileSystemException, IOException {
1628 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1629 listStatus(results, f, filter);
1630 return results.toArray(new FileStatus[results.size()]);
1631 }
1632
1633 /**
1634 * Filter files/directories in the given list of paths using user-supplied
1635 * path filter.
1636 *
1637 * @param files is a list of paths
1638 * @param filter is the filter
1639 *
1640 * @return a list of statuses for the files under the given paths after
1641 * applying the filter
1642 *
1643 * @throws AccessControlException If access is denied
1644 * @throws FileNotFoundException If a file in <code>files</code> does not
1645 * exist
1646 * @throws IOException If an I/O error occurred
1647 *
1648 * Exceptions applicable to file systems accessed over RPC:
1649 * @throws RpcClientException If an exception occurred in the RPC client
1650 * @throws RpcServerException If an exception occurred in the RPC server
1651 * @throws UnexpectedServerException If server implementation throws
1652 * undeclared exception to RPC server
1653 */
1654 public FileStatus[] listStatus(Path[] files, PathFilter filter)
1655 throws AccessControlException, FileNotFoundException, IOException {
1656 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1657 for (int i = 0; i < files.length; i++) {
1658 listStatus(results, files[i], filter);
1659 }
1660 return results.toArray(new FileStatus[results.size()]);
1661 }
1662
1663 /*
1664 * Filter files/directories in the given path using the user-supplied path
1665 * filter. Results are added to the given array <code>results</code>.
1666 */
1667 private void listStatus(ArrayList<FileStatus> results, Path f,
1668 PathFilter filter) throws AccessControlException,
1669 FileNotFoundException, IOException {
1670 FileStatus[] listing = listStatus(f);
1671 if (listing != null) {
1672 for (int i = 0; i < listing.length; i++) {
1673 if (filter.accept(listing[i].getPath())) {
1674 results.add(listing[i]);
1675 }
1676 }
1677 }
1678 }
1679
1680 /**
1681 * List the statuses of the files/directories in the given path
1682 * if the path is a directory.
1683 *
1684 * @param f is the path
1685 *
1686 * @return an array that contains statuses of the files/directories
1687 * in the given path
1688 *
1689 * @throws AccessControlException If access is denied
1690 * @throws FileNotFoundException If <code>f</code> does not exist
1691 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1692 * not supported
1693 * @throws IOException If an I/O error occurred
1694 *
1695 * Exceptions applicable to file systems accessed over RPC:
1696 * @throws RpcClientException If an exception occurred in the RPC client
1697 * @throws RpcServerException If an exception occurred in the RPC server
1698 * @throws UnexpectedServerException If server implementation throws
1699 * undeclared exception to RPC server
1700 */
1701 public FileStatus[] listStatus(final Path f) throws AccessControlException,
1702 FileNotFoundException, UnsupportedFileSystemException,
1703 IOException {
1704 final Path absF = fixRelativePart(f);
1705 return new FSLinkResolver<FileStatus[]>() {
1706 public FileStatus[] next(final AbstractFileSystem fs, final Path p)
1707 throws IOException, UnresolvedLinkException {
1708 return fs.listStatus(p);
1709 }
1710 }.resolve(FileContext.this, absF);
1711 }
1712
1713 /**
1714 * List the statuses and block locations of the files in the given path.
1715 *
1716 * If the path is a directory,
1717 * if recursive is false, returns files in the directory;
1718 * if recursive is true, return files in the subtree rooted at the path.
1719 * The subtree is traversed in the depth-first order.
1720 * If the path is a file, return the file's status and block locations.
1721 * Files across symbolic links are also returned.
1722 *
1723 * @param f is the path
1724 * @param recursive if the subdirectories need to be traversed recursively
1725 *
1726 * @return an iterator that traverses statuses of the files
1727 * If any IO exception (for example a sub-directory gets deleted while
1728 * listing is being executed), next() or hasNext() of the returned iterator
1729 * may throw a RuntimeException with the IO exception as the cause.
1730 *
1731 * @throws AccessControlException If access is denied
1732 * @throws FileNotFoundException If <code>f</code> does not exist
1733 * @throws UnsupportedFileSystemException If file system for <code>f</code>
1734 * is not supported
1735 * @throws IOException If an I/O error occurred
1736 *
1737 * Exceptions applicable to file systems accessed over RPC:
1738 * @throws RpcClientException If an exception occurred in the RPC client
1739 * @throws RpcServerException If an exception occurred in the RPC server
1740 * @throws UnexpectedServerException If server implementation throws
1741 * undeclared exception to RPC server
1742 */
1743 public RemoteIterator<LocatedFileStatus> listFiles(
1744 final Path f, final boolean recursive) throws AccessControlException,
1745 FileNotFoundException, UnsupportedFileSystemException,
1746 IOException {
1747 return new RemoteIterator<LocatedFileStatus>() {
1748 private Stack<RemoteIterator<LocatedFileStatus>> itors =
1749 new Stack<RemoteIterator<LocatedFileStatus>>();
1750 RemoteIterator<LocatedFileStatus> curItor = listLocatedStatus(f);
1751 LocatedFileStatus curFile;
1752
1753 /**
1754 * Returns <tt>true</tt> if the iterator has more files.
1755 *
1756 * @return <tt>true</tt> if the iterator has more files.
1757 * @throws AccessControlException if not allowed to access next
1758 * file's status or locations
1759 * @throws FileNotFoundException if next file does not exist any more
1760 * @throws UnsupportedFileSystemException if next file's
1761 * fs is unsupported
1762 * @throws IOException for all other IO errors
1763 * for example, NameNode is not avaialbe or
1764 * NameNode throws IOException due to an error
1765 * while getting the status or block locations
1766 */
1767 @Override
1768 public boolean hasNext() throws IOException {
1769 while (curFile == null) {
1770 if (curItor.hasNext()) {
1771 handleFileStat(curItor.next());
1772 } else if (!itors.empty()) {
1773 curItor = itors.pop();
1774 } else {
1775 return false;
1776 }
1777 }
1778 return true;
1779 }
1780
1781 /**
1782 * Process the input stat.
1783 * If it is a file, return the file stat.
1784 * If it is a directory, traverse the directory if recursive is true;
1785 * ignore it if recursive is false.
1786 * If it is a symlink, resolve the symlink first and then process it
1787 * depending on if it is a file or directory.
1788 * @param stat input status
1789 * @throws AccessControlException if access is denied
1790 * @throws FileNotFoundException if file is not found
1791 * @throws UnsupportedFileSystemException if fs is not supported
1792 * @throws IOException for all other IO errors
1793 */
1794 private void handleFileStat(LocatedFileStatus stat)
1795 throws IOException {
1796 if (stat.isFile()) { // file
1797 curFile = stat;
1798 } else if (stat.isSymlink()) { // symbolic link
1799 // resolve symbolic link
1800 FileStatus symstat = FileContext.this.getFileStatus(
1801 stat.getSymlink());
1802 if (symstat.isFile() || (recursive && symstat.isDirectory())) {
1803 itors.push(curItor);
1804 curItor = listLocatedStatus(stat.getPath());
1805 }
1806 } else if (recursive) { // directory
1807 itors.push(curItor);
1808 curItor = listLocatedStatus(stat.getPath());
1809 }
1810 }
1811
1812 /**
1813 * Returns the next file's status with its block locations
1814 *
1815 * @throws AccessControlException if not allowed to access next
1816 * file's status or locations
1817 * @throws FileNotFoundException if next file does not exist any more
1818 * @throws UnsupportedFileSystemException if next file's
1819 * fs is unsupported
1820 * @throws IOException for all other IO errors
1821 * for example, NameNode is not avaialbe or
1822 * NameNode throws IOException due to an error
1823 * while getting the status or block locations
1824 */
1825 @Override
1826 public LocatedFileStatus next() throws IOException {
1827 if (hasNext()) {
1828 LocatedFileStatus result = curFile;
1829 curFile = null;
1830 return result;
1831 }
1832 throw new java.util.NoSuchElementException("No more entry in " + f);
1833 }
1834 };
1835 }
1836
1837 /**
1838 * <p>Return all the files that match filePattern and are not checksum
1839 * files. Results are sorted by their names.
1840 *
1841 * <p>
1842 * A filename pattern is composed of <i>regular</i> characters and
1843 * <i>special pattern matching</i> characters, which are:
1844 *
1845 * <dl>
1846 * <dd>
1847 * <dl>
1848 * <p>
1849 * <dt> <tt> ? </tt>
1850 * <dd> Matches any single character.
1851 *
1852 * <p>
1853 * <dt> <tt> * </tt>
1854 * <dd> Matches zero or more characters.
1855 *
1856 * <p>
1857 * <dt> <tt> [<i>abc</i>] </tt>
1858 * <dd> Matches a single character from character set
1859 * <tt>{<i>a,b,c</i>}</tt>.
1860 *
1861 * <p>
1862 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt>
1863 * <dd> Matches a single character from the character range
1864 * <tt>{<i>a...b</i>}</tt>. Note: character <tt><i>a</i></tt> must be
1865 * lexicographically less than or equal to character <tt><i>b</i></tt>.
1866 *
1867 * <p>
1868 * <dt> <tt> [^<i>a</i>] </tt>
1869 * <dd> Matches a single char that is not from character set or range
1870 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur
1871 * immediately to the right of the opening bracket.
1872 *
1873 * <p>
1874 * <dt> <tt> \<i>c</i> </tt>
1875 * <dd> Removes (escapes) any special meaning of character <i>c</i>.
1876 *
1877 * <p>
1878 * <dt> <tt> {ab,cd} </tt>
1879 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt>
1880 *
1881 * <p>
1882 * <dt> <tt> {ab,c{de,fh}} </tt>
1883 * <dd> Matches a string from string set <tt>{<i>ab, cde, cfh</i>}</tt>
1884 *
1885 * </dl>
1886 * </dd>
1887 * </dl>
1888 *
1889 * @param pathPattern a regular expression specifying a pth pattern
1890 *
1891 * @return an array of paths that match the path pattern
1892 *
1893 * @throws AccessControlException If access is denied
1894 * @throws UnsupportedFileSystemException If file system for
1895 * <code>pathPattern</code> is not supported
1896 * @throws IOException If an I/O error occurred
1897 *
1898 * Exceptions applicable to file systems accessed over RPC:
1899 * @throws RpcClientException If an exception occurred in the RPC client
1900 * @throws RpcServerException If an exception occurred in the RPC server
1901 * @throws UnexpectedServerException If server implementation throws
1902 * undeclared exception to RPC server
1903 */
1904 public FileStatus[] globStatus(Path pathPattern)
1905 throws AccessControlException, UnsupportedFileSystemException,
1906 IOException {
1907 return globStatus(pathPattern, DEFAULT_FILTER);
1908 }
1909
1910 /**
1911 * Return an array of FileStatus objects whose path names match pathPattern
1912 * and is accepted by the user-supplied path filter. Results are sorted by
1913 * their path names.
1914 * Return null if pathPattern has no glob and the path does not exist.
1915 * Return an empty array if pathPattern has a glob and no path matches it.
1916 *
1917 * @param pathPattern regular expression specifying the path pattern
1918 * @param filter user-supplied path filter
1919 *
1920 * @return an array of FileStatus objects
1921 *
1922 * @throws AccessControlException If access is denied
1923 * @throws UnsupportedFileSystemException If file system for
1924 * <code>pathPattern</code> is not supported
1925 * @throws IOException If an I/O error occurred
1926 *
1927 * Exceptions applicable to file systems accessed over RPC:
1928 * @throws RpcClientException If an exception occurred in the RPC client
1929 * @throws RpcServerException If an exception occurred in the RPC server
1930 * @throws UnexpectedServerException If server implementation throws
1931 * undeclared exception to RPC server
1932 */
1933 public FileStatus[] globStatus(final Path pathPattern,
1934 final PathFilter filter) throws AccessControlException,
1935 UnsupportedFileSystemException, IOException {
1936 URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri();
1937
1938 String filename = pathPattern.toUri().getPath();
1939
1940 List<String> filePatterns = GlobExpander.expand(filename);
1941 if (filePatterns.size() == 1) {
1942 Path absPathPattern = fixRelativePart(pathPattern);
1943 return globStatusInternal(uri, new Path(absPathPattern.toUri()
1944 .getPath()), filter);
1945 } else {
1946 List<FileStatus> results = new ArrayList<FileStatus>();
1947 for (String iFilePattern : filePatterns) {
1948 Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern));
1949 FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter);
1950 for (FileStatus file : files) {
1951 results.add(file);
1952 }
1953 }
1954 return results.toArray(new FileStatus[results.size()]);
1955 }
1956 }
1957
1958 /**
1959 *
1960 * @param uri for all the inPathPattern
1961 * @param inPathPattern - without the scheme & authority (take from uri)
1962 * @param filter
1963 *
1964 * @return an array of FileStatus objects
1965 *
1966 * @throws AccessControlException If access is denied
1967 * @throws IOException If an I/O error occurred
1968 */
1969 private FileStatus[] globStatusInternal(final URI uri,
1970 final Path inPathPattern, final PathFilter filter)
1971 throws AccessControlException, IOException
1972 {
1973 Path[] parents = new Path[1];
1974 int level = 0;
1975
1976 assert(inPathPattern.toUri().getScheme() == null &&
1977 inPathPattern.toUri().getAuthority() == null &&
1978 inPathPattern.isUriPathAbsolute());
1979
1980
1981 String filename = inPathPattern.toUri().getPath();
1982
1983 // path has only zero component
1984 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) {
1985 Path p = inPathPattern.makeQualified(uri, null);
1986 return getFileStatus(new Path[]{p});
1987 }
1988
1989 // path has at least one component
1990 String[] components = filename.split(Path.SEPARATOR);
1991
1992 // Path is absolute, first component is "/" hence first component
1993 // is the uri root
1994 parents[0] = new Path(new Path(uri), new Path("/"));
1995 level = 1;
1996
1997 // glob the paths that match the parent path, ie. [0, components.length-1]
1998 boolean[] hasGlob = new boolean[]{false};
1999 Path[] relParentPaths =
2000 globPathsLevel(parents, components, level, hasGlob);
2001 FileStatus[] results;
2002
2003 if (relParentPaths == null || relParentPaths.length == 0) {
2004 results = null;
2005 } else {
2006 // fix the pathes to be abs
2007 Path[] parentPaths = new Path [relParentPaths.length];
2008 for(int i=0; i<relParentPaths.length; i++) {
2009 parentPaths[i] = relParentPaths[i].makeQualified(uri, null);
2010 }
2011
2012 // Now work on the last component of the path
2013 GlobFilter fp =
2014 new GlobFilter(components[components.length - 1], filter);
2015 if (fp.hasPattern()) { // last component has a pattern
2016 // list parent directories and then glob the results
2017 try {
2018 results = listStatus(parentPaths, fp);
2019 } catch (FileNotFoundException e) {
2020 results = null;
2021 }
2022 hasGlob[0] = true;
2023 } else { // last component does not have a pattern
2024 // get all the path names
2025 ArrayList<Path> filteredPaths =
2026 new ArrayList<Path>(parentPaths.length);
2027 for (int i = 0; i < parentPaths.length; i++) {
2028 parentPaths[i] = new Path(parentPaths[i],
2029 components[components.length - 1]);
2030 if (fp.accept(parentPaths[i])) {
2031 filteredPaths.add(parentPaths[i]);
2032 }
2033 }
2034 // get all their statuses
2035 results = getFileStatus(
2036 filteredPaths.toArray(new Path[filteredPaths.size()]));
2037 }
2038 }
2039
2040 // Decide if the pathPattern contains a glob or not
2041 if (results == null) {
2042 if (hasGlob[0]) {
2043 results = new FileStatus[0];
2044 }
2045 } else {
2046 if (results.length == 0) {
2047 if (!hasGlob[0]) {
2048 results = null;
2049 }
2050 } else {
2051 Arrays.sort(results);
2052 }
2053 }
2054 return results;
2055 }
2056
2057 /*
2058 * For a path of N components, return a list of paths that match the
2059 * components [<code>level</code>, <code>N-1</code>].
2060 */
2061 private Path[] globPathsLevel(Path[] parents, String[] filePattern,
2062 int level, boolean[] hasGlob) throws AccessControlException,
2063 FileNotFoundException, IOException {
2064 if (level == filePattern.length - 1) {
2065 return parents;
2066 }
2067 if (parents == null || parents.length == 0) {
2068 return null;
2069 }
2070 GlobFilter fp = new GlobFilter(filePattern[level]);
2071 if (fp.hasPattern()) {
2072 try {
2073 parents = FileUtil.stat2Paths(listStatus(parents, fp));
2074 } catch (FileNotFoundException e) {
2075 parents = null;
2076 }
2077 hasGlob[0] = true;
2078 } else {
2079 for (int i = 0; i < parents.length; i++) {
2080 parents[i] = new Path(parents[i], filePattern[level]);
2081 }
2082 }
2083 return globPathsLevel(parents, filePattern, level + 1, hasGlob);
2084 }
2085
2086 /**
2087 * Copy file from src to dest. See
2088 * {@link #copy(Path, Path, boolean, boolean)}
2089 */
2090 public boolean copy(final Path src, final Path dst)
2091 throws AccessControlException, FileAlreadyExistsException,
2092 FileNotFoundException, ParentNotDirectoryException,
2093 UnsupportedFileSystemException, IOException {
2094 return copy(src, dst, false, false);
2095 }
2096
2097 /**
2098 * Copy from src to dst, optionally deleting src and overwriting dst.
2099 * @param src
2100 * @param dst
2101 * @param deleteSource - delete src if true
2102 * @param overwrite overwrite dst if true; throw IOException if dst exists
2103 * and overwrite is false.
2104 *
2105 * @return true if copy is successful
2106 *
2107 * @throws AccessControlException If access is denied
2108 * @throws FileAlreadyExistsException If <code>dst</code> already exists
2109 * @throws FileNotFoundException If <code>src</code> does not exist
2110 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not
2111 * a directory
2112 * @throws UnsupportedFileSystemException If file system for
2113 * <code>src</code> or <code>dst</code> is not supported
2114 * @throws IOException If an I/O error occurred
2115 *
2116 * Exceptions applicable to file systems accessed over RPC:
2117 * @throws RpcClientException If an exception occurred in the RPC client
2118 * @throws RpcServerException If an exception occurred in the RPC server
2119 * @throws UnexpectedServerException If server implementation throws
2120 * undeclared exception to RPC server
2121 *
2122 * RuntimeExceptions:
2123 * @throws InvalidPathException If path <code>dst</code> is invalid
2124 */
2125 public boolean copy(final Path src, final Path dst, boolean deleteSource,
2126 boolean overwrite) throws AccessControlException,
2127 FileAlreadyExistsException, FileNotFoundException,
2128 ParentNotDirectoryException, UnsupportedFileSystemException,
2129 IOException {
2130 checkNotSchemeWithRelative(src);
2131 checkNotSchemeWithRelative(dst);
2132 Path qSrc = makeQualified(src);
2133 Path qDst = makeQualified(dst);
2134 checkDest(qSrc.getName(), qDst, overwrite);
2135 FileStatus fs = FileContext.this.getFileStatus(qSrc);
2136 if (fs.isDirectory()) {
2137 checkDependencies(qSrc, qDst);
2138 mkdir(qDst, FsPermission.getDefault(), true);
2139 FileStatus[] contents = listStatus(qSrc);
2140 for (FileStatus content : contents) {
2141 copy(makeQualified(content.getPath()), makeQualified(new Path(qDst,
2142 content.getPath().getName())), deleteSource, overwrite);
2143 }
2144 } else {
2145 InputStream in=null;
2146 OutputStream out = null;
2147 try {
2148 in = open(qSrc);
2149 EnumSet<CreateFlag> createFlag = overwrite ? EnumSet.of(
2150 CreateFlag.CREATE, CreateFlag.OVERWRITE) :
2151 EnumSet.of(CreateFlag.CREATE);
2152 out = create(qDst, createFlag);
2153 IOUtils.copyBytes(in, out, conf, true);
2154 } catch (IOException e) {
2155 IOUtils.closeStream(out);
2156 IOUtils.closeStream(in);
2157 throw e;
2158 }
2159 }
2160 if (deleteSource) {
2161 return delete(qSrc, true);
2162 } else {
2163 return true;
2164 }
2165 }
2166 }
2167
2168 /**
2169 * Check if copying srcName to dst would overwrite an existing
2170 * file or directory.
2171 * @param srcName File or directory to be copied.
2172 * @param dst Destination to copy srcName to.
2173 * @param overwrite Whether it's ok to overwrite an existing file.
2174 * @throws AccessControlException If access is denied.
2175 * @throws IOException If dst is an existing directory, or dst is an
2176 * existing file and the overwrite option is not passed.
2177 */
2178 private void checkDest(String srcName, Path dst, boolean overwrite)
2179 throws AccessControlException, IOException {
2180 try {
2181 FileStatus dstFs = getFileStatus(dst);
2182 if (dstFs.isDirectory()) {
2183 if (null == srcName) {
2184 throw new IOException("Target " + dst + " is a directory");
2185 }
2186 // Recurse to check if dst/srcName exists.
2187 checkDest(null, new Path(dst, srcName), overwrite);
2188 } else if (!overwrite) {
2189 throw new IOException("Target " + new Path(dst, srcName)
2190 + " already exists");
2191 }
2192 } catch (FileNotFoundException e) {
2193 // dst does not exist - OK to copy.
2194 }
2195 }
2196
2197 //
2198 // If the destination is a subdirectory of the source, then
2199 // generate exception
2200 //
2201 private static void checkDependencies(Path qualSrc, Path qualDst)
2202 throws IOException {
2203 if (isSameFS(qualSrc, qualDst)) {
2204 String srcq = qualSrc.toString() + Path.SEPARATOR;
2205 String dstq = qualDst.toString() + Path.SEPARATOR;
2206 if (dstq.startsWith(srcq)) {
2207 if (srcq.length() == dstq.length()) {
2208 throw new IOException("Cannot copy " + qualSrc + " to itself.");
2209 } else {
2210 throw new IOException("Cannot copy " + qualSrc +
2211 " to its subdirectory " + qualDst);
2212 }
2213 }
2214 }
2215 }
2216
2217 /**
2218 * Are qualSrc and qualDst of the same file system?
2219 * @param qualPath1 - fully qualified path
2220 * @param qualPath2 - fully qualified path
2221 * @return
2222 */
2223 private static boolean isSameFS(Path qualPath1, Path qualPath2) {
2224 URI srcUri = qualPath1.toUri();
2225 URI dstUri = qualPath2.toUri();
2226 return (srcUri.getScheme().equals(dstUri.getScheme()) &&
2227 !(srcUri.getAuthority() != null && dstUri.getAuthority() != null && srcUri
2228 .getAuthority().equals(dstUri.getAuthority())));
2229 }
2230
2231 /**
2232 * Deletes all the paths in deleteOnExit on JVM shutdown.
2233 */
2234 static class FileContextFinalizer implements Runnable {
2235 public synchronized void run() {
2236 processDeleteOnExit();
2237 }
2238 }
2239
2240 /**
2241 * Resolves all symbolic links in the specified path.
2242 * Returns the new path object.
2243 */
2244 protected Path resolve(final Path f) throws FileNotFoundException,
2245 UnresolvedLinkException, AccessControlException, IOException {
2246 return new FSLinkResolver<Path>() {
2247 public Path next(final AbstractFileSystem fs, final Path p)
2248 throws IOException, UnresolvedLinkException {
2249 return fs.resolvePath(p);
2250 }
2251 }.resolve(this, f);
2252 }
2253
2254 /**
2255 * Resolves all symbolic links in the specified path leading up
2256 * to, but not including the final path component.
2257 * @param f path to resolve
2258 * @return the new path object.
2259 */
2260 protected Path resolveIntermediate(final Path f) throws IOException {
2261 return new FSLinkResolver<FileStatus>() {
2262 public FileStatus next(final AbstractFileSystem fs, final Path p)
2263 throws IOException, UnresolvedLinkException {
2264 return fs.getFileLinkStatus(p);
2265 }
2266 }.resolve(this, f).getPath();
2267 }
2268
2269 /**
2270 * Returns the list of AbstractFileSystems accessed in the path. The list may
2271 * contain more than one AbstractFileSystems objects in case of symlinks.
2272 *
2273 * @param f
2274 * Path which needs to be resolved
2275 * @return List of AbstractFileSystems accessed in the path
2276 * @throws IOException
2277 */
2278 Set<AbstractFileSystem> resolveAbstractFileSystems(final Path f)
2279 throws IOException {
2280 final Path absF = fixRelativePart(f);
2281 final HashSet<AbstractFileSystem> result
2282 = new HashSet<AbstractFileSystem>();
2283 new FSLinkResolver<Void>() {
2284 public Void next(final AbstractFileSystem fs, final Path p)
2285 throws IOException, UnresolvedLinkException {
2286 result.add(fs);
2287 fs.getFileStatus(p);
2288 return null;
2289 }
2290 }.resolve(this, absF);
2291 return result;
2292 }
2293
2294 /**
2295 * Class used to perform an operation on and resolve symlinks in a
2296 * path. The operation may potentially span multiple file systems.
2297 */
2298 protected abstract class FSLinkResolver<T> {
2299 // The maximum number of symbolic link components in a path
2300 private static final int MAX_PATH_LINKS = 32;
2301
2302 /**
2303 * Generic helper function overridden on instantiation to perform a
2304 * specific operation on the given file system using the given path
2305 * which may result in an UnresolvedLinkException.
2306 * @param fs AbstractFileSystem to perform the operation on.
2307 * @param p Path given the file system.
2308 * @return Generic type determined by the specific implementation.
2309 * @throws UnresolvedLinkException If symbolic link <code>path</code> could
2310 * not be resolved
2311 * @throws IOException an I/O error occured
2312 */
2313 public abstract T next(final AbstractFileSystem fs, final Path p)
2314 throws IOException, UnresolvedLinkException;
2315
2316 /**
2317 * Performs the operation specified by the next function, calling it
2318 * repeatedly until all symlinks in the given path are resolved.
2319 * @param fc FileContext used to access file systems.
2320 * @param p The path to resolve symlinks in.
2321 * @return Generic type determined by the implementation of next.
2322 * @throws IOException
2323 */
2324 public T resolve(final FileContext fc, Path p) throws IOException {
2325 int count = 0;
2326 T in = null;
2327 Path first = p;
2328 // NB: More than one AbstractFileSystem can match a scheme, eg
2329 // "file" resolves to LocalFs but could have come by RawLocalFs.
2330 AbstractFileSystem fs = fc.getFSofPath(p);
2331
2332 // Loop until all symlinks are resolved or the limit is reached
2333 for (boolean isLink = true; isLink;) {
2334 try {
2335 in = next(fs, p);
2336 isLink = false;
2337 } catch (UnresolvedLinkException e) {
2338 if (count++ > MAX_PATH_LINKS) {
2339 throw new IOException("Possible cyclic loop while " +
2340 "following symbolic link " + first);
2341 }
2342 // Resolve the first unresolved path component
2343 p = qualifySymlinkTarget(fs, p, fs.getLinkTarget(p));
2344 fs = fc.getFSofPath(p);
2345 }
2346 }
2347 return in;
2348 }
2349 }
2350
2351 /**
2352 * Get the statistics for a particular file system
2353 *
2354 * @param uri
2355 * the uri to lookup the statistics. Only scheme and authority part
2356 * of the uri are used as the key to store and lookup.
2357 * @return a statistics object
2358 */
2359 public static Statistics getStatistics(URI uri) {
2360 return AbstractFileSystem.getStatistics(uri);
2361 }
2362
2363 /**
2364 * Clears all the statistics stored in AbstractFileSystem, for all the file
2365 * systems.
2366 */
2367 public static void clearStatistics() {
2368 AbstractFileSystem.clearStatistics();
2369 }
2370
2371 /**
2372 * Prints the statistics to standard output. File System is identified by the
2373 * scheme and authority.
2374 */
2375 public static void printStatistics() {
2376 AbstractFileSystem.printStatistics();
2377 }
2378
2379 /**
2380 * @return Map of uri and statistics for each filesystem instantiated. The uri
2381 * consists of scheme and authority for the filesystem.
2382 */
2383 public static Map<URI, Statistics> getAllStatistics() {
2384 return AbstractFileSystem.getAllStatistics();
2385 }
2386
2387 /**
2388 * Get delegation tokens for the file systems accessed for a given
2389 * path.
2390 * @param p Path for which delegations tokens are requested.
2391 * @param renewer the account name that is allowed to renew the token.
2392 * @return List of delegation tokens.
2393 * @throws IOException
2394 */
2395 @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" })
2396 public List<Token<?>> getDelegationTokens(
2397 Path p, String renewer) throws IOException {
2398 Set<AbstractFileSystem> afsSet = resolveAbstractFileSystems(p);
2399 List<Token<?>> tokenList =
2400 new ArrayList<Token<?>>();
2401 for (AbstractFileSystem afs : afsSet) {
2402 List<Token<?>> afsTokens = afs.getDelegationTokens(renewer);
2403 tokenList.addAll(afsTokens);
2404 }
2405 return tokenList;
2406 }
2407 }