001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.fs;
019
020 import java.io.FileNotFoundException;
021 import java.io.IOException;
022 import java.io.InputStream;
023 import java.io.OutputStream;
024 import java.net.URI;
025 import java.security.PrivilegedExceptionAction;
026 import java.util.ArrayList;
027 import java.util.Arrays;
028 import java.util.EnumSet;
029 import java.util.HashSet;
030 import java.util.IdentityHashMap;
031 import java.util.List;
032 import java.util.Map;
033 import java.util.Set;
034 import java.util.Stack;
035 import java.util.TreeSet;
036 import java.util.Map.Entry;
037
038 import org.apache.commons.logging.Log;
039 import org.apache.commons.logging.LogFactory;
040 import org.apache.hadoop.HadoopIllegalArgumentException;
041 import org.apache.hadoop.classification.InterfaceAudience;
042 import org.apache.hadoop.classification.InterfaceStability;
043 import org.apache.hadoop.conf.Configuration;
044 import org.apache.hadoop.fs.FileSystem.Statistics;
045 import org.apache.hadoop.fs.Options.CreateOpts;
046 import org.apache.hadoop.fs.permission.FsPermission;
047 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
048 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT;
049 import org.apache.hadoop.io.IOUtils;
050 import org.apache.hadoop.ipc.RpcClientException;
051 import org.apache.hadoop.ipc.RpcServerException;
052 import org.apache.hadoop.ipc.UnexpectedServerException;
053 import org.apache.hadoop.fs.InvalidPathException;
054 import org.apache.hadoop.security.AccessControlException;
055 import org.apache.hadoop.security.UserGroupInformation;
056 import org.apache.hadoop.security.token.Token;
057 import org.apache.hadoop.util.ShutdownHookManager;
058
059 /**
060 * The FileContext class provides an interface to the application writer for
061 * using the Hadoop file system.
062 * It provides a set of methods for the usual operation: create, open,
063 * list, etc
064 *
065 * <p>
066 * <b> *** Path Names *** </b>
067 * <p>
068 *
069 * The Hadoop file system supports a URI name space and URI names.
070 * It offers a forest of file systems that can be referenced using fully
071 * qualified URIs.
072 * Two common Hadoop file systems implementations are
073 * <ul>
074 * <li> the local file system: file:///path
075 * <li> the hdfs file system hdfs://nnAddress:nnPort/path
076 * </ul>
077 *
078 * While URI names are very flexible, it requires knowing the name or address
079 * of the server. For convenience one often wants to access the default system
080 * in one's environment without knowing its name/address. This has an
081 * additional benefit that it allows one to change one's default fs
082 * (e.g. admin moves application from cluster1 to cluster2).
083 * <p>
084 *
085 * To facilitate this, Hadoop supports a notion of a default file system.
086 * The user can set his default file system, although this is
087 * typically set up for you in your environment via your default config.
088 * A default file system implies a default scheme and authority; slash-relative
089 * names (such as /for/bar) are resolved relative to that default FS.
090 * Similarly a user can also have working-directory-relative names (i.e. names
091 * not starting with a slash). While the working directory is generally in the
092 * same default FS, the wd can be in a different FS.
093 * <p>
094 * Hence Hadoop path names can be one of:
095 * <ul>
096 * <li> fully qualified URI: scheme://authority/path
097 * <li> slash relative names: /path relative to the default file system
098 * <li> wd-relative names: path relative to the working dir
099 * </ul>
100 * Relative paths with scheme (scheme:foo/bar) are illegal.
101 *
102 * <p>
103 * <b>****The Role of the FileContext and configuration defaults****</b>
104 * <p>
105 * The FileContext provides file namespace context for resolving file names;
106 * it also contains the umask for permissions, In that sense it is like the
107 * per-process file-related state in Unix system.
108 * These two properties
109 * <ul>
110 * <li> default file system i.e your slash)
111 * <li> umask
112 * </ul>
113 * in general, are obtained from the default configuration file
114 * in your environment, (@see {@link Configuration}).
115 *
116 * No other configuration parameters are obtained from the default config as
117 * far as the file context layer is concerned. All file system instances
118 * (i.e. deployments of file systems) have default properties; we call these
119 * server side (SS) defaults. Operation like create allow one to select many
120 * properties: either pass them in as explicit parameters or use
121 * the SS properties.
122 * <p>
123 * The file system related SS defaults are
124 * <ul>
125 * <li> the home directory (default is "/user/userName")
126 * <li> the initial wd (only for local fs)
127 * <li> replication factor
128 * <li> block size
129 * <li> buffer size
130 * <li> encryptDataTransfer
131 * <li> checksum option. (checksumType and bytesPerChecksum)
132 * </ul>
133 *
134 * <p>
135 * <b> *** Usage Model for the FileContext class *** </b>
136 * <p>
137 * Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
138 * Unspecified values come from core-defaults.xml in the release jar.
139 * <ul>
140 * <li> myFContext = FileContext.getFileContext(); // uses the default config
141 * // which has your default FS
142 * <li> myFContext.create(path, ...);
143 * <li> myFContext.setWorkingDir(path)
144 * <li> myFContext.open (path, ...);
145 * </ul>
146 * Example 2: Get a FileContext with a specific URI as the default FS
147 * <ul>
148 * <li> myFContext = FileContext.getFileContext(URI)
149 * <li> myFContext.create(path, ...);
150 * ...
151 * </ul>
152 * Example 3: FileContext with local file system as the default
153 * <ul>
154 * <li> myFContext = FileContext.getLocalFSFileContext()
155 * <li> myFContext.create(path, ...);
156 * <li> ...
157 * </ul>
158 * Example 4: Use a specific config, ignoring $HADOOP_CONFIG
159 * Generally you should not need use a config unless you are doing
160 * <ul>
161 * <li> configX = someConfigSomeOnePassedToYou.
162 * <li> myFContext = getFileContext(configX); // configX is not changed,
163 * // is passed down
164 * <li> myFContext.create(path, ...);
165 * <li>...
166 * </ul>
167 *
168 */
169
170 @InterfaceAudience.Public
171 @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */
172 public final class FileContext {
173
174 public static final Log LOG = LogFactory.getLog(FileContext.class);
175 public static final FsPermission DEFAULT_PERM = FsPermission.getDefault();
176
177 /**
178 * Priority of the FileContext shutdown hook.
179 */
180 public static final int SHUTDOWN_HOOK_PRIORITY = 20;
181
182 /**
183 * List of files that should be deleted on JVM shutdown.
184 */
185 static final Map<FileContext, Set<Path>> DELETE_ON_EXIT =
186 new IdentityHashMap<FileContext, Set<Path>>();
187
188 /** JVM shutdown hook thread. */
189 static final FileContextFinalizer FINALIZER =
190 new FileContextFinalizer();
191
192 private static final PathFilter DEFAULT_FILTER = new PathFilter() {
193 @Override
194 public boolean accept(final Path file) {
195 return true;
196 }
197 };
198
199 /**
200 * The FileContext is defined by.
201 * 1) defaultFS (slash)
202 * 2) wd
203 * 3) umask
204 */
205 private final AbstractFileSystem defaultFS; //default FS for this FileContext.
206 private Path workingDir; // Fully qualified
207 private FsPermission umask;
208 private final Configuration conf;
209 private final UserGroupInformation ugi;
210
211 private FileContext(final AbstractFileSystem defFs,
212 final FsPermission theUmask, final Configuration aConf) {
213 defaultFS = defFs;
214 umask = FsPermission.getUMask(aConf);
215 conf = aConf;
216 try {
217 ugi = UserGroupInformation.getCurrentUser();
218 } catch (IOException e) {
219 LOG.error("Exception in getCurrentUser: ",e);
220 throw new RuntimeException("Failed to get the current user " +
221 "while creating a FileContext", e);
222 }
223 /*
224 * Init the wd.
225 * WorkingDir is implemented at the FileContext layer
226 * NOT at the AbstractFileSystem layer.
227 * If the DefaultFS, such as localFilesystem has a notion of
228 * builtin WD, we use that as the initial WD.
229 * Otherwise the WD is initialized to the home directory.
230 */
231 workingDir = defaultFS.getInitialWorkingDirectory();
232 if (workingDir == null) {
233 workingDir = defaultFS.getHomeDirectory();
234 }
235 util = new Util(); // for the inner class
236 }
237
238 /*
239 * Remove relative part - return "absolute":
240 * If input is relative path ("foo/bar") add wd: ie "/<workingDir>/foo/bar"
241 * A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path
242 * ("/foo/bar") are returned unchanged.
243 *
244 * Applications that use FileContext should use #makeQualified() since
245 * they really want a fully qualified URI.
246 * Hence this method is not called makeAbsolute() and
247 * has been deliberately declared private.
248 */
249 private Path fixRelativePart(Path p) {
250 if (p.isUriPathAbsolute()) {
251 return p;
252 } else {
253 return new Path(workingDir, p);
254 }
255 }
256
257 /**
258 * Delete all the paths that were marked as delete-on-exit.
259 */
260 static void processDeleteOnExit() {
261 synchronized (DELETE_ON_EXIT) {
262 Set<Entry<FileContext, Set<Path>>> set = DELETE_ON_EXIT.entrySet();
263 for (Entry<FileContext, Set<Path>> entry : set) {
264 FileContext fc = entry.getKey();
265 Set<Path> paths = entry.getValue();
266 for (Path path : paths) {
267 try {
268 fc.delete(path, true);
269 } catch (IOException e) {
270 LOG.warn("Ignoring failure to deleteOnExit for path " + path);
271 }
272 }
273 }
274 DELETE_ON_EXIT.clear();
275 }
276 }
277
278 /**
279 * Pathnames with scheme and relative path are illegal.
280 * @param path to be checked
281 */
282 private static void checkNotSchemeWithRelative(final Path path) {
283 if (path.toUri().isAbsolute() && !path.isUriPathAbsolute()) {
284 throw new HadoopIllegalArgumentException(
285 "Unsupported name: has scheme but relative path-part");
286 }
287 }
288
289 /**
290 * Get the file system of supplied path.
291 *
292 * @param absOrFqPath - absolute or fully qualified path
293 * @return the file system of the path
294 *
295 * @throws UnsupportedFileSystemException If the file system for
296 * <code>absOrFqPath</code> is not supported.
297 * @throws IOExcepton If the file system for <code>absOrFqPath</code> could
298 * not be instantiated.
299 */
300 private AbstractFileSystem getFSofPath(final Path absOrFqPath)
301 throws UnsupportedFileSystemException, IOException {
302 checkNotSchemeWithRelative(absOrFqPath);
303 if (!absOrFqPath.isAbsolute() && absOrFqPath.toUri().getScheme() == null) {
304 throw new HadoopIllegalArgumentException(
305 "FileContext Bug: path is relative");
306 }
307
308 try {
309 // Is it the default FS for this FileContext?
310 defaultFS.checkPath(absOrFqPath);
311 return defaultFS;
312 } catch (Exception e) { // it is different FileSystem
313 return getAbstractFileSystem(ugi, absOrFqPath.toUri(), conf);
314 }
315 }
316
317 private static AbstractFileSystem getAbstractFileSystem(
318 UserGroupInformation user, final URI uri, final Configuration conf)
319 throws UnsupportedFileSystemException, IOException {
320 try {
321 return user.doAs(new PrivilegedExceptionAction<AbstractFileSystem>() {
322 @Override
323 public AbstractFileSystem run() throws UnsupportedFileSystemException {
324 return AbstractFileSystem.get(uri, conf);
325 }
326 });
327 } catch (InterruptedException ex) {
328 LOG.error(ex);
329 throw new IOException("Failed to get the AbstractFileSystem for path: "
330 + uri, ex);
331 }
332 }
333
334 /**
335 * Protected Static Factory methods for getting a FileContexts
336 * that take a AbstractFileSystem as input. To be used for testing.
337 */
338
339 /**
340 * Create a FileContext with specified FS as default using the specified
341 * config.
342 *
343 * @param defFS
344 * @param aConf
345 * @return new FileContext with specifed FS as default.
346 */
347 public static FileContext getFileContext(final AbstractFileSystem defFS,
348 final Configuration aConf) {
349 return new FileContext(defFS, FsPermission.getUMask(aConf), aConf);
350 }
351
352 /**
353 * Create a FileContext for specified file system using the default config.
354 *
355 * @param defaultFS
356 * @return a FileContext with the specified AbstractFileSystem
357 * as the default FS.
358 */
359 protected static FileContext getFileContext(
360 final AbstractFileSystem defaultFS) {
361 return getFileContext(defaultFS, new Configuration());
362 }
363
364 /**
365 * Static Factory methods for getting a FileContext.
366 * Note new file contexts are created for each call.
367 * The only singleton is the local FS context using the default config.
368 *
369 * Methods that use the default config: the default config read from the
370 * $HADOOP_CONFIG/core.xml,
371 * Unspecified key-values for config are defaulted from core-defaults.xml
372 * in the release jar.
373 *
374 * The keys relevant to the FileContext layer are extracted at time of
375 * construction. Changes to the config after the call are ignore
376 * by the FileContext layer.
377 * The conf is passed to lower layers like AbstractFileSystem and HDFS which
378 * pick up their own config variables.
379 */
380
381 /**
382 * Create a FileContext using the default config read from the
383 * $HADOOP_CONFIG/core.xml, Unspecified key-values for config are defaulted
384 * from core-defaults.xml in the release jar.
385 *
386 * @throws UnsupportedFileSystemException If the file system from the default
387 * configuration is not supported
388 */
389 public static FileContext getFileContext()
390 throws UnsupportedFileSystemException {
391 return getFileContext(new Configuration());
392 }
393
394 /**
395 * @return a FileContext for the local file system using the default config.
396 * @throws UnsupportedFileSystemException If the file system for
397 * {@link FsConstants#LOCAL_FS_URI} is not supported.
398 */
399 public static FileContext getLocalFSFileContext()
400 throws UnsupportedFileSystemException {
401 return getFileContext(FsConstants.LOCAL_FS_URI);
402 }
403
404 /**
405 * Create a FileContext for specified URI using the default config.
406 *
407 * @param defaultFsUri
408 * @return a FileContext with the specified URI as the default FS.
409 *
410 * @throws UnsupportedFileSystemException If the file system for
411 * <code>defaultFsUri</code> is not supported
412 */
413 public static FileContext getFileContext(final URI defaultFsUri)
414 throws UnsupportedFileSystemException {
415 return getFileContext(defaultFsUri, new Configuration());
416 }
417
418 /**
419 * Create a FileContext for specified default URI using the specified config.
420 *
421 * @param defaultFsUri
422 * @param aConf
423 * @return new FileContext for specified uri
424 * @throws UnsupportedFileSystemException If the file system with specified is
425 * not supported
426 * @throws RuntimeException If the file system specified is supported but
427 * could not be instantiated, or if login fails.
428 */
429 public static FileContext getFileContext(final URI defaultFsUri,
430 final Configuration aConf) throws UnsupportedFileSystemException {
431 UserGroupInformation currentUser = null;
432 AbstractFileSystem defaultAfs = null;
433 try {
434 currentUser = UserGroupInformation.getCurrentUser();
435 defaultAfs = getAbstractFileSystem(currentUser, defaultFsUri, aConf);
436 } catch (UnsupportedFileSystemException ex) {
437 throw ex;
438 } catch (IOException ex) {
439 LOG.error(ex);
440 throw new RuntimeException(ex);
441 }
442 return getFileContext(defaultAfs, aConf);
443 }
444
445 /**
446 * Create a FileContext using the passed config. Generally it is better to use
447 * {@link #getFileContext(URI, Configuration)} instead of this one.
448 *
449 *
450 * @param aConf
451 * @return new FileContext
452 * @throws UnsupportedFileSystemException If file system in the config
453 * is not supported
454 */
455 public static FileContext getFileContext(final Configuration aConf)
456 throws UnsupportedFileSystemException {
457 return getFileContext(
458 URI.create(aConf.get(FS_DEFAULT_NAME_KEY, FS_DEFAULT_NAME_DEFAULT)),
459 aConf);
460 }
461
462 /**
463 * @param aConf - from which the FileContext is configured
464 * @return a FileContext for the local file system using the specified config.
465 *
466 * @throws UnsupportedFileSystemException If default file system in the config
467 * is not supported
468 *
469 */
470 public static FileContext getLocalFSFileContext(final Configuration aConf)
471 throws UnsupportedFileSystemException {
472 return getFileContext(FsConstants.LOCAL_FS_URI, aConf);
473 }
474
475 /* This method is needed for tests. */
476 @InterfaceAudience.Private
477 @InterfaceStability.Unstable /* return type will change to AFS once
478 HADOOP-6223 is completed */
479 public AbstractFileSystem getDefaultFileSystem() {
480 return defaultFS;
481 }
482
483 /**
484 * Set the working directory for wd-relative names (such a "foo/bar"). Working
485 * directory feature is provided by simply prefixing relative names with the
486 * working dir. Note this is different from Unix where the wd is actually set
487 * to the inode. Hence setWorkingDir does not follow symlinks etc. This works
488 * better in a distributed environment that has multiple independent roots.
489 * {@link #getWorkingDirectory()} should return what setWorkingDir() set.
490 *
491 * @param newWDir new working directory
492 * @throws IOException
493 * <br>
494 * NewWdir can be one of:
495 * <ul>
496 * <li>relative path: "foo/bar";</li>
497 * <li>absolute without scheme: "/foo/bar"</li>
498 * <li>fully qualified with scheme: "xx://auth/foo/bar"</li>
499 * </ul>
500 * <br>
501 * Illegal WDs:
502 * <ul>
503 * <li>relative with scheme: "xx:foo/bar"</li>
504 * <li>non existent directory</li>
505 * </ul>
506 */
507 public void setWorkingDirectory(final Path newWDir) throws IOException {
508 checkNotSchemeWithRelative(newWDir);
509 /* wd is stored as a fully qualified path. We check if the given
510 * path is not relative first since resolve requires and returns
511 * an absolute path.
512 */
513 final Path newWorkingDir = new Path(workingDir, newWDir);
514 FileStatus status = getFileStatus(newWorkingDir);
515 if (status.isFile()) {
516 throw new FileNotFoundException("Cannot setWD to a file");
517 }
518 workingDir = newWorkingDir;
519 }
520
521 /**
522 * Gets the working directory for wd-relative names (such a "foo/bar").
523 */
524 public Path getWorkingDirectory() {
525 return workingDir;
526 }
527
528 /**
529 * Gets the ugi in the file-context
530 * @return UserGroupInformation
531 */
532 public UserGroupInformation getUgi() {
533 return ugi;
534 }
535
536 /**
537 * Return the current user's home directory in this file system.
538 * The default implementation returns "/user/$USER/".
539 * @return the home directory
540 */
541 public Path getHomeDirectory() {
542 return defaultFS.getHomeDirectory();
543 }
544
545 /**
546 *
547 * @return the umask of this FileContext
548 */
549 public FsPermission getUMask() {
550 return umask;
551 }
552
553 /**
554 * Set umask to the supplied parameter.
555 * @param newUmask the new umask
556 */
557 public void setUMask(final FsPermission newUmask) {
558 umask = newUmask;
559 }
560
561
562 /**
563 * Resolve the path following any symlinks or mount points
564 * @param f to be resolved
565 * @return fully qualified resolved path
566 *
567 * @throws FileNotFoundException If <code>f</code> does not exist
568 * @throws AccessControlException if access denied
569 * @throws IOException If an IO Error occurred
570 *
571 * Exceptions applicable to file systems accessed over RPC:
572 * @throws RpcClientException If an exception occurred in the RPC client
573 * @throws RpcServerException If an exception occurred in the RPC server
574 * @throws UnexpectedServerException If server implementation throws
575 * undeclared exception to RPC server
576 *
577 * RuntimeExceptions:
578 * @throws InvalidPathException If path <code>f</code> is not valid
579 */
580 public Path resolvePath(final Path f) throws FileNotFoundException,
581 UnresolvedLinkException, AccessControlException, IOException {
582 return resolve(f);
583 }
584
585 /**
586 * Make the path fully qualified if it is isn't.
587 * A Fully-qualified path has scheme and authority specified and an absolute
588 * path.
589 * Use the default file system and working dir in this FileContext to qualify.
590 * @param path
591 * @return qualified path
592 */
593 public Path makeQualified(final Path path) {
594 return path.makeQualified(defaultFS.getUri(), getWorkingDirectory());
595 }
596
597 /**
598 * Create or overwrite file on indicated path and returns an output stream for
599 * writing into the file.
600 *
601 * @param f the file name to open
602 * @param createFlag gives the semantics of create; see {@link CreateFlag}
603 * @param opts file creation options; see {@link Options.CreateOpts}.
604 * <ul>
605 * <li>Progress - to report progress on the operation - default null
606 * <li>Permission - umask is applied against permisssion: default is
607 * FsPermissions:getDefault()
608 *
609 * <li>CreateParent - create missing parent path; default is to not
610 * to create parents
611 * <li>The defaults for the following are SS defaults of the file
612 * server implementing the target path. Not all parameters make sense
613 * for all kinds of file system - eg. localFS ignores Blocksize,
614 * replication, checksum
615 * <ul>
616 * <li>BufferSize - buffersize used in FSDataOutputStream
617 * <li>Blocksize - block size for file blocks
618 * <li>ReplicationFactor - replication for blocks
619 * <li>ChecksumParam - Checksum parameters. server default is used
620 * if not specified.
621 * </ul>
622 * </ul>
623 *
624 * @return {@link FSDataOutputStream} for created file
625 *
626 * @throws AccessControlException If access is denied
627 * @throws FileAlreadyExistsException If file <code>f</code> already exists
628 * @throws FileNotFoundException If parent of <code>f</code> does not exist
629 * and <code>createParent</code> is false
630 * @throws ParentNotDirectoryException If parent of <code>f</code> is not a
631 * directory.
632 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
633 * not supported
634 * @throws IOException If an I/O error occurred
635 *
636 * Exceptions applicable to file systems accessed over RPC:
637 * @throws RpcClientException If an exception occurred in the RPC client
638 * @throws RpcServerException If an exception occurred in the RPC server
639 * @throws UnexpectedServerException If server implementation throws
640 * undeclared exception to RPC server
641 *
642 * RuntimeExceptions:
643 * @throws InvalidPathException If path <code>f</code> is not valid
644 */
645 public FSDataOutputStream create(final Path f,
646 final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts)
647 throws AccessControlException, FileAlreadyExistsException,
648 FileNotFoundException, ParentNotDirectoryException,
649 UnsupportedFileSystemException, IOException {
650 Path absF = fixRelativePart(f);
651
652 // If one of the options is a permission, extract it & apply umask
653 // If not, add a default Perms and apply umask;
654 // AbstractFileSystem#create
655
656 CreateOpts.Perms permOpt =
657 (CreateOpts.Perms) CreateOpts.getOpt(CreateOpts.Perms.class, opts);
658 FsPermission permission = (permOpt != null) ? permOpt.getValue() :
659 FsPermission.getDefault();
660 permission = permission.applyUMask(umask);
661
662 final CreateOpts[] updatedOpts =
663 CreateOpts.setOpt(CreateOpts.perms(permission), opts);
664 return new FSLinkResolver<FSDataOutputStream>() {
665 @Override
666 public FSDataOutputStream next(final AbstractFileSystem fs, final Path p)
667 throws IOException {
668 return fs.create(p, createFlag, updatedOpts);
669 }
670 }.resolve(this, absF);
671 }
672
673 /**
674 * Make(create) a directory and all the non-existent parents.
675 *
676 * @param dir - the dir to make
677 * @param permission - permissions is set permission&~umask
678 * @param createParent - if true then missing parent dirs are created if false
679 * then parent must exist
680 *
681 * @throws AccessControlException If access is denied
682 * @throws FileAlreadyExistsException If directory <code>dir</code> already
683 * exists
684 * @throws FileNotFoundException If parent of <code>dir</code> does not exist
685 * and <code>createParent</code> is false
686 * @throws ParentNotDirectoryException If parent of <code>dir</code> is not a
687 * directory
688 * @throws UnsupportedFileSystemException If file system for <code>dir</code>
689 * is not supported
690 * @throws IOException If an I/O error occurred
691 *
692 * Exceptions applicable to file systems accessed over RPC:
693 * @throws RpcClientException If an exception occurred in the RPC client
694 * @throws UnexpectedServerException If server implementation throws
695 * undeclared exception to RPC server
696 *
697 * RuntimeExceptions:
698 * @throws InvalidPathException If path <code>dir</code> is not valid
699 */
700 public void mkdir(final Path dir, final FsPermission permission,
701 final boolean createParent) throws AccessControlException,
702 FileAlreadyExistsException, FileNotFoundException,
703 ParentNotDirectoryException, UnsupportedFileSystemException,
704 IOException {
705 final Path absDir = fixRelativePart(dir);
706 final FsPermission absFerms = (permission == null ?
707 FsPermission.getDefault() : permission).applyUMask(umask);
708 new FSLinkResolver<Void>() {
709 @Override
710 public Void next(final AbstractFileSystem fs, final Path p)
711 throws IOException, UnresolvedLinkException {
712 fs.mkdir(p, absFerms, createParent);
713 return null;
714 }
715 }.resolve(this, absDir);
716 }
717
718 /**
719 * Delete a file.
720 * @param f the path to delete.
721 * @param recursive if path is a directory and set to
722 * true, the directory is deleted else throws an exception. In
723 * case of a file the recursive can be set to either true or false.
724 *
725 * @throws AccessControlException If access is denied
726 * @throws FileNotFoundException If <code>f</code> does not exist
727 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
728 * not supported
729 * @throws IOException If an I/O error occurred
730 *
731 * Exceptions applicable to file systems accessed over RPC:
732 * @throws RpcClientException If an exception occurred in the RPC client
733 * @throws RpcServerException If an exception occurred in the RPC server
734 * @throws UnexpectedServerException If server implementation throws
735 * undeclared exception to RPC server
736 *
737 * RuntimeExceptions:
738 * @throws InvalidPathException If path <code>f</code> is invalid
739 */
740 public boolean delete(final Path f, final boolean recursive)
741 throws AccessControlException, FileNotFoundException,
742 UnsupportedFileSystemException, IOException {
743 Path absF = fixRelativePart(f);
744 return new FSLinkResolver<Boolean>() {
745 @Override
746 public Boolean next(final AbstractFileSystem fs, final Path p)
747 throws IOException, UnresolvedLinkException {
748 return Boolean.valueOf(fs.delete(p, recursive));
749 }
750 }.resolve(this, absF);
751 }
752
753 /**
754 * Opens an FSDataInputStream at the indicated Path using
755 * default buffersize.
756 * @param f the file name to open
757 *
758 * @throws AccessControlException If access is denied
759 * @throws FileNotFoundException If file <code>f</code> does not exist
760 * @throws UnsupportedFileSystemException If file system for <code>f</code>
761 * is not supported
762 * @throws IOException If an I/O error occurred
763 *
764 * Exceptions applicable to file systems accessed over RPC:
765 * @throws RpcClientException If an exception occurred in the RPC client
766 * @throws RpcServerException If an exception occurred in the RPC server
767 * @throws UnexpectedServerException If server implementation throws
768 * undeclared exception to RPC server
769 */
770 public FSDataInputStream open(final Path f) throws AccessControlException,
771 FileNotFoundException, UnsupportedFileSystemException, IOException {
772 final Path absF = fixRelativePart(f);
773 return new FSLinkResolver<FSDataInputStream>() {
774 @Override
775 public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
776 throws IOException, UnresolvedLinkException {
777 return fs.open(p);
778 }
779 }.resolve(this, absF);
780 }
781
782 /**
783 * Opens an FSDataInputStream at the indicated Path.
784 *
785 * @param f the file name to open
786 * @param bufferSize the size of the buffer to be used.
787 *
788 * @throws AccessControlException If access is denied
789 * @throws FileNotFoundException If file <code>f</code> does not exist
790 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
791 * not supported
792 * @throws IOException If an I/O error occurred
793 *
794 * Exceptions applicable to file systems accessed over RPC:
795 * @throws RpcClientException If an exception occurred in the RPC client
796 * @throws RpcServerException If an exception occurred in the RPC server
797 * @throws UnexpectedServerException If server implementation throws
798 * undeclared exception to RPC server
799 */
800 public FSDataInputStream open(final Path f, final int bufferSize)
801 throws AccessControlException, FileNotFoundException,
802 UnsupportedFileSystemException, IOException {
803 final Path absF = fixRelativePart(f);
804 return new FSLinkResolver<FSDataInputStream>() {
805 @Override
806 public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
807 throws IOException, UnresolvedLinkException {
808 return fs.open(p, bufferSize);
809 }
810 }.resolve(this, absF);
811 }
812
813 /**
814 * Set replication for an existing file.
815 *
816 * @param f file name
817 * @param replication new replication
818 *
819 * @return true if successful
820 *
821 * @throws AccessControlException If access is denied
822 * @throws FileNotFoundException If file <code>f</code> does not exist
823 * @throws IOException If an I/O error occurred
824 *
825 * Exceptions applicable to file systems accessed over RPC:
826 * @throws RpcClientException If an exception occurred in the RPC client
827 * @throws RpcServerException If an exception occurred in the RPC server
828 * @throws UnexpectedServerException If server implementation throws
829 * undeclared exception to RPC server
830 */
831 public boolean setReplication(final Path f, final short replication)
832 throws AccessControlException, FileNotFoundException,
833 IOException {
834 final Path absF = fixRelativePart(f);
835 return new FSLinkResolver<Boolean>() {
836 @Override
837 public Boolean next(final AbstractFileSystem fs, final Path p)
838 throws IOException, UnresolvedLinkException {
839 return Boolean.valueOf(fs.setReplication(p, replication));
840 }
841 }.resolve(this, absF);
842 }
843
844 /**
845 * Renames Path src to Path dst
846 * <ul>
847 * <li
848 * <li>Fails if src is a file and dst is a directory.
849 * <li>Fails if src is a directory and dst is a file.
850 * <li>Fails if the parent of dst does not exist or is a file.
851 * </ul>
852 * <p>
853 * If OVERWRITE option is not passed as an argument, rename fails if the dst
854 * already exists.
855 * <p>
856 * If OVERWRITE option is passed as an argument, rename overwrites the dst if
857 * it is a file or an empty directory. Rename fails if dst is a non-empty
858 * directory.
859 * <p>
860 * Note that atomicity of rename is dependent on the file system
861 * implementation. Please refer to the file system documentation for details
862 * <p>
863 *
864 * @param src path to be renamed
865 * @param dst new path after rename
866 *
867 * @throws AccessControlException If access is denied
868 * @throws FileAlreadyExistsException If <code>dst</code> already exists and
869 * <code>options</options> has {@link Options.Rename#OVERWRITE}
870 * option false.
871 * @throws FileNotFoundException If <code>src</code> does not exist
872 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not a
873 * directory
874 * @throws UnsupportedFileSystemException If file system for <code>src</code>
875 * and <code>dst</code> is not supported
876 * @throws IOException If an I/O error occurred
877 *
878 * Exceptions applicable to file systems accessed over RPC:
879 * @throws RpcClientException If an exception occurred in the RPC client
880 * @throws RpcServerException If an exception occurred in the RPC server
881 * @throws UnexpectedServerException If server implementation throws
882 * undeclared exception to RPC server
883 */
884 public void rename(final Path src, final Path dst,
885 final Options.Rename... options) throws AccessControlException,
886 FileAlreadyExistsException, FileNotFoundException,
887 ParentNotDirectoryException, UnsupportedFileSystemException,
888 IOException {
889 final Path absSrc = fixRelativePart(src);
890 final Path absDst = fixRelativePart(dst);
891 AbstractFileSystem srcFS = getFSofPath(absSrc);
892 AbstractFileSystem dstFS = getFSofPath(absDst);
893 if(!srcFS.getUri().equals(dstFS.getUri())) {
894 throw new IOException("Renames across AbstractFileSystems not supported");
895 }
896 try {
897 srcFS.rename(absSrc, absDst, options);
898 } catch (UnresolvedLinkException e) {
899 /* We do not know whether the source or the destination path
900 * was unresolved. Resolve the source path up until the final
901 * path component, then fully resolve the destination.
902 */
903 final Path source = resolveIntermediate(absSrc);
904 new FSLinkResolver<Void>() {
905 @Override
906 public Void next(final AbstractFileSystem fs, final Path p)
907 throws IOException, UnresolvedLinkException {
908 fs.rename(source, p, options);
909 return null;
910 }
911 }.resolve(this, absDst);
912 }
913 }
914
915 /**
916 * Set permission of a path.
917 * @param f
918 * @param permission - the new absolute permission (umask is not applied)
919 *
920 * @throws AccessControlException If access is denied
921 * @throws FileNotFoundException If <code>f</code> does not exist
922 * @throws UnsupportedFileSystemException If file system for <code>f</code>
923 * is not supported
924 * @throws IOException If an I/O error occurred
925 *
926 * Exceptions applicable to file systems accessed over RPC:
927 * @throws RpcClientException If an exception occurred in the RPC client
928 * @throws RpcServerException If an exception occurred in the RPC server
929 * @throws UnexpectedServerException If server implementation throws
930 * undeclared exception to RPC server
931 */
932 public void setPermission(final Path f, final FsPermission permission)
933 throws AccessControlException, FileNotFoundException,
934 UnsupportedFileSystemException, IOException {
935 final Path absF = fixRelativePart(f);
936 new FSLinkResolver<Void>() {
937 @Override
938 public Void next(final AbstractFileSystem fs, final Path p)
939 throws IOException, UnresolvedLinkException {
940 fs.setPermission(p, permission);
941 return null;
942 }
943 }.resolve(this, absF);
944 }
945
946 /**
947 * Set owner of a path (i.e. a file or a directory). The parameters username
948 * and groupname cannot both be null.
949 *
950 * @param f The path
951 * @param username If it is null, the original username remains unchanged.
952 * @param groupname If it is null, the original groupname remains unchanged.
953 *
954 * @throws AccessControlException If access is denied
955 * @throws FileNotFoundException If <code>f</code> does not exist
956 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
957 * not supported
958 * @throws IOException If an I/O error occurred
959 *
960 * Exceptions applicable to file systems accessed over RPC:
961 * @throws RpcClientException If an exception occurred in the RPC client
962 * @throws RpcServerException If an exception occurred in the RPC server
963 * @throws UnexpectedServerException If server implementation throws
964 * undeclared exception to RPC server
965 *
966 * RuntimeExceptions:
967 * @throws HadoopIllegalArgumentException If <code>username</code> or
968 * <code>groupname</code> is invalid.
969 */
970 public void setOwner(final Path f, final String username,
971 final String groupname) throws AccessControlException,
972 UnsupportedFileSystemException, FileNotFoundException,
973 IOException {
974 if ((username == null) && (groupname == null)) {
975 throw new HadoopIllegalArgumentException(
976 "username and groupname cannot both be null");
977 }
978 final Path absF = fixRelativePart(f);
979 new FSLinkResolver<Void>() {
980 @Override
981 public Void next(final AbstractFileSystem fs, final Path p)
982 throws IOException, UnresolvedLinkException {
983 fs.setOwner(p, username, groupname);
984 return null;
985 }
986 }.resolve(this, absF);
987 }
988
989 /**
990 * Set access time of a file.
991 * @param f The path
992 * @param mtime Set the modification time of this file.
993 * The number of milliseconds since epoch (Jan 1, 1970).
994 * A value of -1 means that this call should not set modification time.
995 * @param atime Set the access time of this file.
996 * The number of milliseconds since Jan 1, 1970.
997 * A value of -1 means that this call should not set access time.
998 *
999 * @throws AccessControlException If access is denied
1000 * @throws FileNotFoundException If <code>f</code> does not exist
1001 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1002 * not supported
1003 * @throws IOException If an I/O error occurred
1004 *
1005 * Exceptions applicable to file systems accessed over RPC:
1006 * @throws RpcClientException If an exception occurred in the RPC client
1007 * @throws RpcServerException If an exception occurred in the RPC server
1008 * @throws UnexpectedServerException If server implementation throws
1009 * undeclared exception to RPC server
1010 */
1011 public void setTimes(final Path f, final long mtime, final long atime)
1012 throws AccessControlException, FileNotFoundException,
1013 UnsupportedFileSystemException, IOException {
1014 final Path absF = fixRelativePart(f);
1015 new FSLinkResolver<Void>() {
1016 @Override
1017 public Void next(final AbstractFileSystem fs, final Path p)
1018 throws IOException, UnresolvedLinkException {
1019 fs.setTimes(p, mtime, atime);
1020 return null;
1021 }
1022 }.resolve(this, absF);
1023 }
1024
1025 /**
1026 * Get the checksum of a file.
1027 *
1028 * @param f file path
1029 *
1030 * @return The file checksum. The default return value is null,
1031 * which indicates that no checksum algorithm is implemented
1032 * in the corresponding FileSystem.
1033 *
1034 * @throws AccessControlException If access is denied
1035 * @throws FileNotFoundException If <code>f</code> does not exist
1036 * @throws IOException If an I/O error occurred
1037 *
1038 * Exceptions applicable to file systems accessed over RPC:
1039 * @throws RpcClientException If an exception occurred in the RPC client
1040 * @throws RpcServerException If an exception occurred in the RPC server
1041 * @throws UnexpectedServerException If server implementation throws
1042 * undeclared exception to RPC server
1043 */
1044 public FileChecksum getFileChecksum(final Path f)
1045 throws AccessControlException, FileNotFoundException,
1046 IOException {
1047 final Path absF = fixRelativePart(f);
1048 return new FSLinkResolver<FileChecksum>() {
1049 @Override
1050 public FileChecksum next(final AbstractFileSystem fs, final Path p)
1051 throws IOException, UnresolvedLinkException {
1052 return fs.getFileChecksum(p);
1053 }
1054 }.resolve(this, absF);
1055 }
1056
1057 /**
1058 * Set the verify checksum flag for the file system denoted by the path.
1059 * This is only applicable if the
1060 * corresponding FileSystem supports checksum. By default doesn't do anything.
1061 * @param verifyChecksum
1062 * @param f set the verifyChecksum for the Filesystem containing this path
1063 *
1064 * @throws AccessControlException If access is denied
1065 * @throws FileNotFoundException If <code>f</code> does not exist
1066 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1067 * not supported
1068 * @throws IOException If an I/O error occurred
1069 *
1070 * Exceptions applicable to file systems accessed over RPC:
1071 * @throws RpcClientException If an exception occurred in the RPC client
1072 * @throws RpcServerException If an exception occurred in the RPC server
1073 * @throws UnexpectedServerException If server implementation throws
1074 * undeclared exception to RPC server
1075 */
1076 public void setVerifyChecksum(final boolean verifyChecksum, final Path f)
1077 throws AccessControlException, FileNotFoundException,
1078 UnsupportedFileSystemException, IOException {
1079 final Path absF = resolve(fixRelativePart(f));
1080 getFSofPath(absF).setVerifyChecksum(verifyChecksum);
1081 }
1082
1083 /**
1084 * Return a file status object that represents the path.
1085 * @param f The path we want information from
1086 *
1087 * @return a FileStatus object
1088 *
1089 * @throws AccessControlException If access is denied
1090 * @throws FileNotFoundException If <code>f</code> does not exist
1091 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1092 * not supported
1093 * @throws IOException If an I/O error occurred
1094 *
1095 * Exceptions applicable to file systems accessed over RPC:
1096 * @throws RpcClientException If an exception occurred in the RPC client
1097 * @throws RpcServerException If an exception occurred in the RPC server
1098 * @throws UnexpectedServerException If server implementation throws
1099 * undeclared exception to RPC server
1100 */
1101 public FileStatus getFileStatus(final Path f) throws AccessControlException,
1102 FileNotFoundException, UnsupportedFileSystemException, IOException {
1103 final Path absF = fixRelativePart(f);
1104 return new FSLinkResolver<FileStatus>() {
1105 @Override
1106 public FileStatus next(final AbstractFileSystem fs, final Path p)
1107 throws IOException, UnresolvedLinkException {
1108 return fs.getFileStatus(p);
1109 }
1110 }.resolve(this, absF);
1111 }
1112
1113 /**
1114 * Return a fully qualified version of the given symlink target if it
1115 * has no scheme and authority. Partially and fully qualified paths
1116 * are returned unmodified.
1117 * @param pathFS The AbstractFileSystem of the path
1118 * @param pathWithLink Path that contains the symlink
1119 * @param target The symlink's absolute target
1120 * @return Fully qualified version of the target.
1121 */
1122 private Path qualifySymlinkTarget(final AbstractFileSystem pathFS,
1123 Path pathWithLink, Path target) {
1124 // NB: makeQualified uses the target's scheme and authority, if
1125 // specified, and the scheme and authority of pathFS, if not.
1126 final String scheme = target.toUri().getScheme();
1127 final String auth = target.toUri().getAuthority();
1128 return (scheme == null && auth == null)
1129 ? target.makeQualified(pathFS.getUri(), pathWithLink.getParent())
1130 : target;
1131 }
1132
1133 /**
1134 * Return a file status object that represents the path. If the path
1135 * refers to a symlink then the FileStatus of the symlink is returned.
1136 * The behavior is equivalent to #getFileStatus() if the underlying
1137 * file system does not support symbolic links.
1138 * @param f The path we want information from.
1139 * @return A FileStatus object
1140 *
1141 * @throws AccessControlException If access is denied
1142 * @throws FileNotFoundException If <code>f</code> does not exist
1143 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1144 * not supported
1145 * @throws IOException If an I/O error occurred
1146 */
1147 public FileStatus getFileLinkStatus(final Path f)
1148 throws AccessControlException, FileNotFoundException,
1149 UnsupportedFileSystemException, IOException {
1150 final Path absF = fixRelativePart(f);
1151 return new FSLinkResolver<FileStatus>() {
1152 @Override
1153 public FileStatus next(final AbstractFileSystem fs, final Path p)
1154 throws IOException, UnresolvedLinkException {
1155 FileStatus fi = fs.getFileLinkStatus(p);
1156 if (fi.isSymlink()) {
1157 fi.setSymlink(qualifySymlinkTarget(fs, p, fi.getSymlink()));
1158 }
1159 return fi;
1160 }
1161 }.resolve(this, absF);
1162 }
1163
1164 /**
1165 * Returns the target of the given symbolic link as it was specified
1166 * when the link was created. Links in the path leading up to the
1167 * final path component are resolved transparently.
1168 *
1169 * @param f the path to return the target of
1170 * @return The un-interpreted target of the symbolic link.
1171 *
1172 * @throws AccessControlException If access is denied
1173 * @throws FileNotFoundException If path <code>f</code> does not exist
1174 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1175 * not supported
1176 * @throws IOException If the given path does not refer to a symlink
1177 * or an I/O error occurred
1178 */
1179 public Path getLinkTarget(final Path f) throws AccessControlException,
1180 FileNotFoundException, UnsupportedFileSystemException, IOException {
1181 final Path absF = fixRelativePart(f);
1182 return new FSLinkResolver<Path>() {
1183 @Override
1184 public Path next(final AbstractFileSystem fs, final Path p)
1185 throws IOException, UnresolvedLinkException {
1186 FileStatus fi = fs.getFileLinkStatus(p);
1187 return fi.getSymlink();
1188 }
1189 }.resolve(this, absF);
1190 }
1191
1192 /**
1193 * Return blockLocation of the given file for the given offset and len.
1194 * For a nonexistent file or regions, null will be returned.
1195 *
1196 * This call is most helpful with DFS, where it returns
1197 * hostnames of machines that contain the given file.
1198 *
1199 * @param f - get blocklocations of this file
1200 * @param start position (byte offset)
1201 * @param len (in bytes)
1202 *
1203 * @return block locations for given file at specified offset of len
1204 *
1205 * @throws AccessControlException If access is denied
1206 * @throws FileNotFoundException If <code>f</code> does not exist
1207 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1208 * not supported
1209 * @throws IOException If an I/O error occurred
1210 *
1211 * Exceptions applicable to file systems accessed over RPC:
1212 * @throws RpcClientException If an exception occurred in the RPC client
1213 * @throws RpcServerException If an exception occurred in the RPC server
1214 * @throws UnexpectedServerException If server implementation throws
1215 * undeclared exception to RPC server
1216 *
1217 * RuntimeExceptions:
1218 * @throws InvalidPathException If path <code>f</code> is invalid
1219 */
1220 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
1221 @InterfaceStability.Evolving
1222 public BlockLocation[] getFileBlockLocations(final Path f, final long start,
1223 final long len) throws AccessControlException, FileNotFoundException,
1224 UnsupportedFileSystemException, IOException {
1225 final Path absF = fixRelativePart(f);
1226 return new FSLinkResolver<BlockLocation[]>() {
1227 @Override
1228 public BlockLocation[] next(final AbstractFileSystem fs, final Path p)
1229 throws IOException, UnresolvedLinkException {
1230 return fs.getFileBlockLocations(p, start, len);
1231 }
1232 }.resolve(this, absF);
1233 }
1234
1235 /**
1236 * Returns a status object describing the use and capacity of the
1237 * file system denoted by the Parh argument p.
1238 * If the file system has multiple partitions, the
1239 * use and capacity of the partition pointed to by the specified
1240 * path is reflected.
1241 *
1242 * @param f Path for which status should be obtained. null means the
1243 * root partition of the default file system.
1244 *
1245 * @return a FsStatus object
1246 *
1247 * @throws AccessControlException If access is denied
1248 * @throws FileNotFoundException If <code>f</code> does not exist
1249 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1250 * not supported
1251 * @throws IOException If an I/O error occurred
1252 *
1253 * Exceptions applicable to file systems accessed over RPC:
1254 * @throws RpcClientException If an exception occurred in the RPC client
1255 * @throws RpcServerException If an exception occurred in the RPC server
1256 * @throws UnexpectedServerException If server implementation throws
1257 * undeclared exception to RPC server
1258 */
1259 public FsStatus getFsStatus(final Path f) throws AccessControlException,
1260 FileNotFoundException, UnsupportedFileSystemException, IOException {
1261 if (f == null) {
1262 return defaultFS.getFsStatus();
1263 }
1264 final Path absF = fixRelativePart(f);
1265 return new FSLinkResolver<FsStatus>() {
1266 @Override
1267 public FsStatus next(final AbstractFileSystem fs, final Path p)
1268 throws IOException, UnresolvedLinkException {
1269 return fs.getFsStatus(p);
1270 }
1271 }.resolve(this, absF);
1272 }
1273
1274 /**
1275 * Creates a symbolic link to an existing file. An exception is thrown if
1276 * the symlink exits, the user does not have permission to create symlink,
1277 * or the underlying file system does not support symlinks.
1278 *
1279 * Symlink permissions are ignored, access to a symlink is determined by
1280 * the permissions of the symlink target.
1281 *
1282 * Symlinks in paths leading up to the final path component are resolved
1283 * transparently. If the final path component refers to a symlink some
1284 * functions operate on the symlink itself, these are:
1285 * - delete(f) and deleteOnExit(f) - Deletes the symlink.
1286 * - rename(src, dst) - If src refers to a symlink, the symlink is
1287 * renamed. If dst refers to a symlink, the symlink is over-written.
1288 * - getLinkTarget(f) - Returns the target of the symlink.
1289 * - getFileLinkStatus(f) - Returns a FileStatus object describing
1290 * the symlink.
1291 * Some functions, create() and mkdir(), expect the final path component
1292 * does not exist. If they are given a path that refers to a symlink that
1293 * does exist they behave as if the path referred to an existing file or
1294 * directory. All other functions fully resolve, ie follow, the symlink.
1295 * These are: open, setReplication, setOwner, setTimes, setWorkingDirectory,
1296 * setPermission, getFileChecksum, setVerifyChecksum, getFileBlockLocations,
1297 * getFsStatus, getFileStatus, exists, and listStatus.
1298 *
1299 * Symlink targets are stored as given to createSymlink, assuming the
1300 * underlying file system is capable of storing a fully qualified URI.
1301 * Dangling symlinks are permitted. FileContext supports four types of
1302 * symlink targets, and resolves them as follows
1303 * <pre>
1304 * Given a path referring to a symlink of form:
1305 *
1306 * <---X--->
1307 * fs://host/A/B/link
1308 * <-----Y----->
1309 *
1310 * In this path X is the scheme and authority that identify the file system,
1311 * and Y is the path leading up to the final path component "link". If Y is
1312 * a symlink itself then let Y' be the target of Y and X' be the scheme and
1313 * authority of Y'. Symlink targets may:
1314 *
1315 * 1. Fully qualified URIs
1316 *
1317 * fs://hostX/A/B/file Resolved according to the target file system.
1318 *
1319 * 2. Partially qualified URIs (eg scheme but no host)
1320 *
1321 * fs:///A/B/file Resolved according to the target file sytem. Eg resolving
1322 * a symlink to hdfs:///A results in an exception because
1323 * HDFS URIs must be fully qualified, while a symlink to
1324 * file:///A will not since Hadoop's local file systems
1325 * require partially qualified URIs.
1326 *
1327 * 3. Relative paths
1328 *
1329 * path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
1330 * is "../B/file" then [Y'][path] is hdfs://host/B/file
1331 *
1332 * 4. Absolute paths
1333 *
1334 * path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
1335 * is "/file" then [X][path] is hdfs://host/file
1336 * </pre>
1337 *
1338 * @param target the target of the symbolic link
1339 * @param link the path to be created that points to target
1340 * @param createParent if true then missing parent dirs are created if
1341 * false then parent must exist
1342 *
1343 *
1344 * @throws AccessControlException If access is denied
1345 * @throws FileAlreadyExistsException If file <code>linkcode> already exists
1346 * @throws FileNotFoundException If <code>target</code> does not exist
1347 * @throws ParentNotDirectoryException If parent of <code>link</code> is not a
1348 * directory.
1349 * @throws UnsupportedFileSystemException If file system for
1350 * <code>target</code> or <code>link</code> is not supported
1351 * @throws IOException If an I/O error occurred
1352 */
1353 public void createSymlink(final Path target, final Path link,
1354 final boolean createParent) throws AccessControlException,
1355 FileAlreadyExistsException, FileNotFoundException,
1356 ParentNotDirectoryException, UnsupportedFileSystemException,
1357 IOException {
1358 final Path nonRelLink = fixRelativePart(link);
1359 new FSLinkResolver<Void>() {
1360 @Override
1361 public Void next(final AbstractFileSystem fs, final Path p)
1362 throws IOException, UnresolvedLinkException {
1363 fs.createSymlink(target, p, createParent);
1364 return null;
1365 }
1366 }.resolve(this, nonRelLink);
1367 }
1368
1369 /**
1370 * List the statuses of the files/directories in the given path if the path is
1371 * a directory.
1372 *
1373 * @param f is the path
1374 *
1375 * @return an iterator that traverses statuses of the files/directories
1376 * in the given path
1377 *
1378 * @throws AccessControlException If access is denied
1379 * @throws FileNotFoundException If <code>f</code> does not exist
1380 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1381 * not supported
1382 * @throws IOException If an I/O error occurred
1383 *
1384 * Exceptions applicable to file systems accessed over RPC:
1385 * @throws RpcClientException If an exception occurred in the RPC client
1386 * @throws RpcServerException If an exception occurred in the RPC server
1387 * @throws UnexpectedServerException If server implementation throws
1388 * undeclared exception to RPC server
1389 */
1390 public RemoteIterator<FileStatus> listStatus(final Path f) throws
1391 AccessControlException, FileNotFoundException,
1392 UnsupportedFileSystemException, IOException {
1393 final Path absF = fixRelativePart(f);
1394 return new FSLinkResolver<RemoteIterator<FileStatus>>() {
1395 @Override
1396 public RemoteIterator<FileStatus> next(
1397 final AbstractFileSystem fs, final Path p)
1398 throws IOException, UnresolvedLinkException {
1399 return fs.listStatusIterator(p);
1400 }
1401 }.resolve(this, absF);
1402 }
1403
1404 /**
1405 * @return an iterator over the corrupt files under the given path
1406 * (may contain duplicates if a file has more than one corrupt block)
1407 * @throws IOException
1408 */
1409 public RemoteIterator<Path> listCorruptFileBlocks(Path path)
1410 throws IOException {
1411 final Path absF = fixRelativePart(path);
1412 return new FSLinkResolver<RemoteIterator<Path>>() {
1413 @Override
1414 public RemoteIterator<Path> next(final AbstractFileSystem fs,
1415 final Path p)
1416 throws IOException, UnresolvedLinkException {
1417 return fs.listCorruptFileBlocks(p);
1418 }
1419 }.resolve(this, absF);
1420 }
1421
1422 /**
1423 * List the statuses of the files/directories in the given path if the path is
1424 * a directory.
1425 * Return the file's status and block locations If the path is a file.
1426 *
1427 * If a returned status is a file, it contains the file's block locations.
1428 *
1429 * @param f is the path
1430 *
1431 * @return an iterator that traverses statuses of the files/directories
1432 * in the given path
1433 * If any IO exception (for example the input directory gets deleted while
1434 * listing is being executed), next() or hasNext() of the returned iterator
1435 * may throw a RuntimeException with the io exception as the cause.
1436 *
1437 * @throws AccessControlException If access is denied
1438 * @throws FileNotFoundException If <code>f</code> does not exist
1439 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1440 * not supported
1441 * @throws IOException If an I/O error occurred
1442 *
1443 * Exceptions applicable to file systems accessed over RPC:
1444 * @throws RpcClientException If an exception occurred in the RPC client
1445 * @throws RpcServerException If an exception occurred in the RPC server
1446 * @throws UnexpectedServerException If server implementation throws
1447 * undeclared exception to RPC server
1448 */
1449 public RemoteIterator<LocatedFileStatus> listLocatedStatus(
1450 final Path f) throws
1451 AccessControlException, FileNotFoundException,
1452 UnsupportedFileSystemException, IOException {
1453 final Path absF = fixRelativePart(f);
1454 return new FSLinkResolver<RemoteIterator<LocatedFileStatus>>() {
1455 @Override
1456 public RemoteIterator<LocatedFileStatus> next(
1457 final AbstractFileSystem fs, final Path p)
1458 throws IOException, UnresolvedLinkException {
1459 return fs.listLocatedStatus(p);
1460 }
1461 }.resolve(this, absF);
1462 }
1463
1464 /**
1465 * Mark a path to be deleted on JVM shutdown.
1466 *
1467 * @param f the existing path to delete.
1468 *
1469 * @return true if deleteOnExit is successful, otherwise false.
1470 *
1471 * @throws AccessControlException If access is denied
1472 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1473 * not supported
1474 * @throws IOException If an I/O error occurred
1475 *
1476 * Exceptions applicable to file systems accessed over RPC:
1477 * @throws RpcClientException If an exception occurred in the RPC client
1478 * @throws RpcServerException If an exception occurred in the RPC server
1479 * @throws UnexpectedServerException If server implementation throws
1480 * undeclared exception to RPC server
1481 */
1482 public boolean deleteOnExit(Path f) throws AccessControlException,
1483 IOException {
1484 if (!this.util().exists(f)) {
1485 return false;
1486 }
1487 synchronized (DELETE_ON_EXIT) {
1488 if (DELETE_ON_EXIT.isEmpty()) {
1489 ShutdownHookManager.get().addShutdownHook(FINALIZER, SHUTDOWN_HOOK_PRIORITY);
1490 }
1491
1492 Set<Path> set = DELETE_ON_EXIT.get(this);
1493 if (set == null) {
1494 set = new TreeSet<Path>();
1495 DELETE_ON_EXIT.put(this, set);
1496 }
1497 set.add(f);
1498 }
1499 return true;
1500 }
1501
1502 private final Util util;
1503 public Util util() {
1504 return util;
1505 }
1506
1507
1508 /**
1509 * Utility/library methods built over the basic FileContext methods.
1510 * Since this are library functions, the oprtation are not atomic
1511 * and some of them may partially complete if other threads are making
1512 * changes to the same part of the name space.
1513 */
1514 public class Util {
1515 /**
1516 * Does the file exist?
1517 * Note: Avoid using this method if you already have FileStatus in hand.
1518 * Instead reuse the FileStatus
1519 * @param f the file or dir to be checked
1520 *
1521 * @throws AccessControlException If access is denied
1522 * @throws IOException If an I/O error occurred
1523 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1524 * not supported
1525 *
1526 * Exceptions applicable to file systems accessed over RPC:
1527 * @throws RpcClientException If an exception occurred in the RPC client
1528 * @throws RpcServerException If an exception occurred in the RPC server
1529 * @throws UnexpectedServerException If server implementation throws
1530 * undeclared exception to RPC server
1531 */
1532 public boolean exists(final Path f) throws AccessControlException,
1533 UnsupportedFileSystemException, IOException {
1534 try {
1535 FileStatus fs = FileContext.this.getFileStatus(f);
1536 assert fs != null;
1537 return true;
1538 } catch (FileNotFoundException e) {
1539 return false;
1540 }
1541 }
1542
1543 /**
1544 * Return a list of file status objects that corresponds to supplied paths
1545 * excluding those non-existent paths.
1546 *
1547 * @param paths list of paths we want information from
1548 *
1549 * @return a list of FileStatus objects
1550 *
1551 * @throws AccessControlException If access is denied
1552 * @throws IOException If an I/O error occurred
1553 *
1554 * Exceptions applicable to file systems accessed over RPC:
1555 * @throws RpcClientException If an exception occurred in the RPC client
1556 * @throws RpcServerException If an exception occurred in the RPC server
1557 * @throws UnexpectedServerException If server implementation throws
1558 * undeclared exception to RPC server
1559 */
1560 private FileStatus[] getFileStatus(Path[] paths)
1561 throws AccessControlException, IOException {
1562 if (paths == null) {
1563 return null;
1564 }
1565 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length);
1566 for (int i = 0; i < paths.length; i++) {
1567 try {
1568 results.add(FileContext.this.getFileStatus(paths[i]));
1569 } catch (FileNotFoundException fnfe) {
1570 // ignoring
1571 }
1572 }
1573 return results.toArray(new FileStatus[results.size()]);
1574 }
1575
1576
1577 /**
1578 * Return the {@link ContentSummary} of path f.
1579 * @param f path
1580 *
1581 * @return the {@link ContentSummary} of path f.
1582 *
1583 * @throws AccessControlException If access is denied
1584 * @throws FileNotFoundException If <code>f</code> does not exist
1585 * @throws UnsupportedFileSystemException If file system for
1586 * <code>f</code> is not supported
1587 * @throws IOException If an I/O error occurred
1588 *
1589 * Exceptions applicable to file systems accessed over RPC:
1590 * @throws RpcClientException If an exception occurred in the RPC client
1591 * @throws RpcServerException If an exception occurred in the RPC server
1592 * @throws UnexpectedServerException If server implementation throws
1593 * undeclared exception to RPC server
1594 */
1595 public ContentSummary getContentSummary(Path f)
1596 throws AccessControlException, FileNotFoundException,
1597 UnsupportedFileSystemException, IOException {
1598 FileStatus status = FileContext.this.getFileStatus(f);
1599 if (status.isFile()) {
1600 return new ContentSummary(status.getLen(), 1, 0);
1601 }
1602 long[] summary = {0, 0, 1};
1603 RemoteIterator<FileStatus> statusIterator =
1604 FileContext.this.listStatus(f);
1605 while(statusIterator.hasNext()) {
1606 FileStatus s = statusIterator.next();
1607 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) :
1608 new ContentSummary(s.getLen(), 1, 0);
1609 summary[0] += c.getLength();
1610 summary[1] += c.getFileCount();
1611 summary[2] += c.getDirectoryCount();
1612 }
1613 return new ContentSummary(summary[0], summary[1], summary[2]);
1614 }
1615
1616 /**
1617 * See {@link #listStatus(Path[], PathFilter)}
1618 */
1619 public FileStatus[] listStatus(Path[] files) throws AccessControlException,
1620 FileNotFoundException, IOException {
1621 return listStatus(files, DEFAULT_FILTER);
1622 }
1623
1624 /**
1625 * Filter files/directories in the given path using the user-supplied path
1626 * filter.
1627 *
1628 * @param f is the path name
1629 * @param filter is the user-supplied path filter
1630 *
1631 * @return an array of FileStatus objects for the files under the given path
1632 * after applying the filter
1633 *
1634 * @throws AccessControlException If access is denied
1635 * @throws FileNotFoundException If <code>f</code> does not exist
1636 * @throws UnsupportedFileSystemException If file system for
1637 * <code>pathPattern</code> is not supported
1638 * @throws IOException If an I/O error occurred
1639 *
1640 * Exceptions applicable to file systems accessed over RPC:
1641 * @throws RpcClientException If an exception occurred in the RPC client
1642 * @throws RpcServerException If an exception occurred in the RPC server
1643 * @throws UnexpectedServerException If server implementation throws
1644 * undeclared exception to RPC server
1645 */
1646 public FileStatus[] listStatus(Path f, PathFilter filter)
1647 throws AccessControlException, FileNotFoundException,
1648 UnsupportedFileSystemException, IOException {
1649 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1650 listStatus(results, f, filter);
1651 return results.toArray(new FileStatus[results.size()]);
1652 }
1653
1654 /**
1655 * Filter files/directories in the given list of paths using user-supplied
1656 * path filter.
1657 *
1658 * @param files is a list of paths
1659 * @param filter is the filter
1660 *
1661 * @return a list of statuses for the files under the given paths after
1662 * applying the filter
1663 *
1664 * @throws AccessControlException If access is denied
1665 * @throws FileNotFoundException If a file in <code>files</code> does not
1666 * exist
1667 * @throws IOException If an I/O error occurred
1668 *
1669 * Exceptions applicable to file systems accessed over RPC:
1670 * @throws RpcClientException If an exception occurred in the RPC client
1671 * @throws RpcServerException If an exception occurred in the RPC server
1672 * @throws UnexpectedServerException If server implementation throws
1673 * undeclared exception to RPC server
1674 */
1675 public FileStatus[] listStatus(Path[] files, PathFilter filter)
1676 throws AccessControlException, FileNotFoundException, IOException {
1677 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1678 for (int i = 0; i < files.length; i++) {
1679 listStatus(results, files[i], filter);
1680 }
1681 return results.toArray(new FileStatus[results.size()]);
1682 }
1683
1684 /*
1685 * Filter files/directories in the given path using the user-supplied path
1686 * filter. Results are added to the given array <code>results</code>.
1687 */
1688 private void listStatus(ArrayList<FileStatus> results, Path f,
1689 PathFilter filter) throws AccessControlException,
1690 FileNotFoundException, IOException {
1691 FileStatus[] listing = listStatus(f);
1692 if (listing != null) {
1693 for (int i = 0; i < listing.length; i++) {
1694 if (filter.accept(listing[i].getPath())) {
1695 results.add(listing[i]);
1696 }
1697 }
1698 }
1699 }
1700
1701 /**
1702 * List the statuses of the files/directories in the given path
1703 * if the path is a directory.
1704 *
1705 * @param f is the path
1706 *
1707 * @return an array that contains statuses of the files/directories
1708 * in the given path
1709 *
1710 * @throws AccessControlException If access is denied
1711 * @throws FileNotFoundException If <code>f</code> does not exist
1712 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1713 * not supported
1714 * @throws IOException If an I/O error occurred
1715 *
1716 * Exceptions applicable to file systems accessed over RPC:
1717 * @throws RpcClientException If an exception occurred in the RPC client
1718 * @throws RpcServerException If an exception occurred in the RPC server
1719 * @throws UnexpectedServerException If server implementation throws
1720 * undeclared exception to RPC server
1721 */
1722 public FileStatus[] listStatus(final Path f) throws AccessControlException,
1723 FileNotFoundException, UnsupportedFileSystemException,
1724 IOException {
1725 final Path absF = fixRelativePart(f);
1726 return new FSLinkResolver<FileStatus[]>() {
1727 @Override
1728 public FileStatus[] next(final AbstractFileSystem fs, final Path p)
1729 throws IOException, UnresolvedLinkException {
1730 return fs.listStatus(p);
1731 }
1732 }.resolve(FileContext.this, absF);
1733 }
1734
1735 /**
1736 * List the statuses and block locations of the files in the given path.
1737 *
1738 * If the path is a directory,
1739 * if recursive is false, returns files in the directory;
1740 * if recursive is true, return files in the subtree rooted at the path.
1741 * The subtree is traversed in the depth-first order.
1742 * If the path is a file, return the file's status and block locations.
1743 * Files across symbolic links are also returned.
1744 *
1745 * @param f is the path
1746 * @param recursive if the subdirectories need to be traversed recursively
1747 *
1748 * @return an iterator that traverses statuses of the files
1749 * If any IO exception (for example a sub-directory gets deleted while
1750 * listing is being executed), next() or hasNext() of the returned iterator
1751 * may throw a RuntimeException with the IO exception as the cause.
1752 *
1753 * @throws AccessControlException If access is denied
1754 * @throws FileNotFoundException If <code>f</code> does not exist
1755 * @throws UnsupportedFileSystemException If file system for <code>f</code>
1756 * is not supported
1757 * @throws IOException If an I/O error occurred
1758 *
1759 * Exceptions applicable to file systems accessed over RPC:
1760 * @throws RpcClientException If an exception occurred in the RPC client
1761 * @throws RpcServerException If an exception occurred in the RPC server
1762 * @throws UnexpectedServerException If server implementation throws
1763 * undeclared exception to RPC server
1764 */
1765 public RemoteIterator<LocatedFileStatus> listFiles(
1766 final Path f, final boolean recursive) throws AccessControlException,
1767 FileNotFoundException, UnsupportedFileSystemException,
1768 IOException {
1769 return new RemoteIterator<LocatedFileStatus>() {
1770 private Stack<RemoteIterator<LocatedFileStatus>> itors =
1771 new Stack<RemoteIterator<LocatedFileStatus>>();
1772 RemoteIterator<LocatedFileStatus> curItor = listLocatedStatus(f);
1773 LocatedFileStatus curFile;
1774
1775 /**
1776 * Returns <tt>true</tt> if the iterator has more files.
1777 *
1778 * @return <tt>true</tt> if the iterator has more files.
1779 * @throws AccessControlException if not allowed to access next
1780 * file's status or locations
1781 * @throws FileNotFoundException if next file does not exist any more
1782 * @throws UnsupportedFileSystemException if next file's
1783 * fs is unsupported
1784 * @throws IOException for all other IO errors
1785 * for example, NameNode is not avaialbe or
1786 * NameNode throws IOException due to an error
1787 * while getting the status or block locations
1788 */
1789 @Override
1790 public boolean hasNext() throws IOException {
1791 while (curFile == null) {
1792 if (curItor.hasNext()) {
1793 handleFileStat(curItor.next());
1794 } else if (!itors.empty()) {
1795 curItor = itors.pop();
1796 } else {
1797 return false;
1798 }
1799 }
1800 return true;
1801 }
1802
1803 /**
1804 * Process the input stat.
1805 * If it is a file, return the file stat.
1806 * If it is a directory, traverse the directory if recursive is true;
1807 * ignore it if recursive is false.
1808 * If it is a symlink, resolve the symlink first and then process it
1809 * depending on if it is a file or directory.
1810 * @param stat input status
1811 * @throws AccessControlException if access is denied
1812 * @throws FileNotFoundException if file is not found
1813 * @throws UnsupportedFileSystemException if fs is not supported
1814 * @throws IOException for all other IO errors
1815 */
1816 private void handleFileStat(LocatedFileStatus stat)
1817 throws IOException {
1818 if (stat.isFile()) { // file
1819 curFile = stat;
1820 } else if (stat.isSymlink()) { // symbolic link
1821 // resolve symbolic link
1822 FileStatus symstat = FileContext.this.getFileStatus(
1823 stat.getSymlink());
1824 if (symstat.isFile() || (recursive && symstat.isDirectory())) {
1825 itors.push(curItor);
1826 curItor = listLocatedStatus(stat.getPath());
1827 }
1828 } else if (recursive) { // directory
1829 itors.push(curItor);
1830 curItor = listLocatedStatus(stat.getPath());
1831 }
1832 }
1833
1834 /**
1835 * Returns the next file's status with its block locations
1836 *
1837 * @throws AccessControlException if not allowed to access next
1838 * file's status or locations
1839 * @throws FileNotFoundException if next file does not exist any more
1840 * @throws UnsupportedFileSystemException if next file's
1841 * fs is unsupported
1842 * @throws IOException for all other IO errors
1843 * for example, NameNode is not avaialbe or
1844 * NameNode throws IOException due to an error
1845 * while getting the status or block locations
1846 */
1847 @Override
1848 public LocatedFileStatus next() throws IOException {
1849 if (hasNext()) {
1850 LocatedFileStatus result = curFile;
1851 curFile = null;
1852 return result;
1853 }
1854 throw new java.util.NoSuchElementException("No more entry in " + f);
1855 }
1856 };
1857 }
1858
1859 /**
1860 * <p>Return all the files that match filePattern and are not checksum
1861 * files. Results are sorted by their names.
1862 *
1863 * <p>
1864 * A filename pattern is composed of <i>regular</i> characters and
1865 * <i>special pattern matching</i> characters, which are:
1866 *
1867 * <dl>
1868 * <dd>
1869 * <dl>
1870 * <p>
1871 * <dt> <tt> ? </tt>
1872 * <dd> Matches any single character.
1873 *
1874 * <p>
1875 * <dt> <tt> * </tt>
1876 * <dd> Matches zero or more characters.
1877 *
1878 * <p>
1879 * <dt> <tt> [<i>abc</i>] </tt>
1880 * <dd> Matches a single character from character set
1881 * <tt>{<i>a,b,c</i>}</tt>.
1882 *
1883 * <p>
1884 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt>
1885 * <dd> Matches a single character from the character range
1886 * <tt>{<i>a...b</i>}</tt>. Note: character <tt><i>a</i></tt> must be
1887 * lexicographically less than or equal to character <tt><i>b</i></tt>.
1888 *
1889 * <p>
1890 * <dt> <tt> [^<i>a</i>] </tt>
1891 * <dd> Matches a single char that is not from character set or range
1892 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur
1893 * immediately to the right of the opening bracket.
1894 *
1895 * <p>
1896 * <dt> <tt> \<i>c</i> </tt>
1897 * <dd> Removes (escapes) any special meaning of character <i>c</i>.
1898 *
1899 * <p>
1900 * <dt> <tt> {ab,cd} </tt>
1901 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt>
1902 *
1903 * <p>
1904 * <dt> <tt> {ab,c{de,fh}} </tt>
1905 * <dd> Matches a string from string set <tt>{<i>ab, cde, cfh</i>}</tt>
1906 *
1907 * </dl>
1908 * </dd>
1909 * </dl>
1910 *
1911 * @param pathPattern a regular expression specifying a pth pattern
1912 *
1913 * @return an array of paths that match the path pattern
1914 *
1915 * @throws AccessControlException If access is denied
1916 * @throws UnsupportedFileSystemException If file system for
1917 * <code>pathPattern</code> is not supported
1918 * @throws IOException If an I/O error occurred
1919 *
1920 * Exceptions applicable to file systems accessed over RPC:
1921 * @throws RpcClientException If an exception occurred in the RPC client
1922 * @throws RpcServerException If an exception occurred in the RPC server
1923 * @throws UnexpectedServerException If server implementation throws
1924 * undeclared exception to RPC server
1925 */
1926 public FileStatus[] globStatus(Path pathPattern)
1927 throws AccessControlException, UnsupportedFileSystemException,
1928 IOException {
1929 return globStatus(pathPattern, DEFAULT_FILTER);
1930 }
1931
1932 /**
1933 * Return an array of FileStatus objects whose path names match pathPattern
1934 * and is accepted by the user-supplied path filter. Results are sorted by
1935 * their path names.
1936 * Return null if pathPattern has no glob and the path does not exist.
1937 * Return an empty array if pathPattern has a glob and no path matches it.
1938 *
1939 * @param pathPattern regular expression specifying the path pattern
1940 * @param filter user-supplied path filter
1941 *
1942 * @return an array of FileStatus objects
1943 *
1944 * @throws AccessControlException If access is denied
1945 * @throws UnsupportedFileSystemException If file system for
1946 * <code>pathPattern</code> is not supported
1947 * @throws IOException If an I/O error occurred
1948 *
1949 * Exceptions applicable to file systems accessed over RPC:
1950 * @throws RpcClientException If an exception occurred in the RPC client
1951 * @throws RpcServerException If an exception occurred in the RPC server
1952 * @throws UnexpectedServerException If server implementation throws
1953 * undeclared exception to RPC server
1954 */
1955 public FileStatus[] globStatus(final Path pathPattern,
1956 final PathFilter filter) throws AccessControlException,
1957 UnsupportedFileSystemException, IOException {
1958 URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri();
1959
1960 String filename = pathPattern.toUri().getPath();
1961
1962 List<String> filePatterns = GlobExpander.expand(filename);
1963 if (filePatterns.size() == 1) {
1964 Path absPathPattern = fixRelativePart(pathPattern);
1965 return globStatusInternal(uri, new Path(absPathPattern.toUri()
1966 .getPath()), filter);
1967 } else {
1968 List<FileStatus> results = new ArrayList<FileStatus>();
1969 for (String iFilePattern : filePatterns) {
1970 Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern));
1971 FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter);
1972 for (FileStatus file : files) {
1973 results.add(file);
1974 }
1975 }
1976 return results.toArray(new FileStatus[results.size()]);
1977 }
1978 }
1979
1980 /**
1981 *
1982 * @param uri for all the inPathPattern
1983 * @param inPathPattern - without the scheme & authority (take from uri)
1984 * @param filter
1985 *
1986 * @return an array of FileStatus objects
1987 *
1988 * @throws AccessControlException If access is denied
1989 * @throws IOException If an I/O error occurred
1990 */
1991 private FileStatus[] globStatusInternal(final URI uri,
1992 final Path inPathPattern, final PathFilter filter)
1993 throws AccessControlException, IOException
1994 {
1995 Path[] parents = new Path[1];
1996 int level = 0;
1997
1998 assert(inPathPattern.toUri().getScheme() == null &&
1999 inPathPattern.toUri().getAuthority() == null &&
2000 inPathPattern.isUriPathAbsolute());
2001
2002
2003 String filename = inPathPattern.toUri().getPath();
2004
2005 // path has only zero component
2006 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) {
2007 Path p = inPathPattern.makeQualified(uri, null);
2008 return getFileStatus(new Path[]{p});
2009 }
2010
2011 // path has at least one component
2012 String[] components = filename.split(Path.SEPARATOR);
2013
2014 // Path is absolute, first component is "/" hence first component
2015 // is the uri root
2016 parents[0] = new Path(new Path(uri), new Path("/"));
2017 level = 1;
2018
2019 // glob the paths that match the parent path, ie. [0, components.length-1]
2020 boolean[] hasGlob = new boolean[]{false};
2021 Path[] relParentPaths =
2022 globPathsLevel(parents, components, level, hasGlob);
2023 FileStatus[] results;
2024
2025 if (relParentPaths == null || relParentPaths.length == 0) {
2026 results = null;
2027 } else {
2028 // fix the pathes to be abs
2029 Path[] parentPaths = new Path [relParentPaths.length];
2030 for(int i=0; i<relParentPaths.length; i++) {
2031 parentPaths[i] = relParentPaths[i].makeQualified(uri, null);
2032 }
2033
2034 // Now work on the last component of the path
2035 GlobFilter fp =
2036 new GlobFilter(components[components.length - 1], filter);
2037 if (fp.hasPattern()) { // last component has a pattern
2038 // list parent directories and then glob the results
2039 try {
2040 results = listStatus(parentPaths, fp);
2041 } catch (FileNotFoundException e) {
2042 results = null;
2043 }
2044 hasGlob[0] = true;
2045 } else { // last component does not have a pattern
2046 // get all the path names
2047 ArrayList<Path> filteredPaths =
2048 new ArrayList<Path>(parentPaths.length);
2049 for (int i = 0; i < parentPaths.length; i++) {
2050 parentPaths[i] = new Path(parentPaths[i],
2051 components[components.length - 1]);
2052 if (fp.accept(parentPaths[i])) {
2053 filteredPaths.add(parentPaths[i]);
2054 }
2055 }
2056 // get all their statuses
2057 results = getFileStatus(
2058 filteredPaths.toArray(new Path[filteredPaths.size()]));
2059 }
2060 }
2061
2062 // Decide if the pathPattern contains a glob or not
2063 if (results == null) {
2064 if (hasGlob[0]) {
2065 results = new FileStatus[0];
2066 }
2067 } else {
2068 if (results.length == 0) {
2069 if (!hasGlob[0]) {
2070 results = null;
2071 }
2072 } else {
2073 Arrays.sort(results);
2074 }
2075 }
2076 return results;
2077 }
2078
2079 /*
2080 * For a path of N components, return a list of paths that match the
2081 * components [<code>level</code>, <code>N-1</code>].
2082 */
2083 private Path[] globPathsLevel(Path[] parents, String[] filePattern,
2084 int level, boolean[] hasGlob) throws AccessControlException,
2085 FileNotFoundException, IOException {
2086 if (level == filePattern.length - 1) {
2087 return parents;
2088 }
2089 if (parents == null || parents.length == 0) {
2090 return null;
2091 }
2092 GlobFilter fp = new GlobFilter(filePattern[level]);
2093 if (fp.hasPattern()) {
2094 try {
2095 parents = FileUtil.stat2Paths(listStatus(parents, fp));
2096 } catch (FileNotFoundException e) {
2097 parents = null;
2098 }
2099 hasGlob[0] = true;
2100 } else {
2101 for (int i = 0; i < parents.length; i++) {
2102 parents[i] = new Path(parents[i], filePattern[level]);
2103 }
2104 }
2105 return globPathsLevel(parents, filePattern, level + 1, hasGlob);
2106 }
2107
2108 /**
2109 * Copy file from src to dest. See
2110 * {@link #copy(Path, Path, boolean, boolean)}
2111 */
2112 public boolean copy(final Path src, final Path dst)
2113 throws AccessControlException, FileAlreadyExistsException,
2114 FileNotFoundException, ParentNotDirectoryException,
2115 UnsupportedFileSystemException, IOException {
2116 return copy(src, dst, false, false);
2117 }
2118
2119 /**
2120 * Copy from src to dst, optionally deleting src and overwriting dst.
2121 * @param src
2122 * @param dst
2123 * @param deleteSource - delete src if true
2124 * @param overwrite overwrite dst if true; throw IOException if dst exists
2125 * and overwrite is false.
2126 *
2127 * @return true if copy is successful
2128 *
2129 * @throws AccessControlException If access is denied
2130 * @throws FileAlreadyExistsException If <code>dst</code> already exists
2131 * @throws FileNotFoundException If <code>src</code> does not exist
2132 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not
2133 * a directory
2134 * @throws UnsupportedFileSystemException If file system for
2135 * <code>src</code> or <code>dst</code> is not supported
2136 * @throws IOException If an I/O error occurred
2137 *
2138 * Exceptions applicable to file systems accessed over RPC:
2139 * @throws RpcClientException If an exception occurred in the RPC client
2140 * @throws RpcServerException If an exception occurred in the RPC server
2141 * @throws UnexpectedServerException If server implementation throws
2142 * undeclared exception to RPC server
2143 *
2144 * RuntimeExceptions:
2145 * @throws InvalidPathException If path <code>dst</code> is invalid
2146 */
2147 public boolean copy(final Path src, final Path dst, boolean deleteSource,
2148 boolean overwrite) throws AccessControlException,
2149 FileAlreadyExistsException, FileNotFoundException,
2150 ParentNotDirectoryException, UnsupportedFileSystemException,
2151 IOException {
2152 checkNotSchemeWithRelative(src);
2153 checkNotSchemeWithRelative(dst);
2154 Path qSrc = makeQualified(src);
2155 Path qDst = makeQualified(dst);
2156 checkDest(qSrc.getName(), qDst, overwrite);
2157 FileStatus fs = FileContext.this.getFileStatus(qSrc);
2158 if (fs.isDirectory()) {
2159 checkDependencies(qSrc, qDst);
2160 mkdir(qDst, FsPermission.getDefault(), true);
2161 FileStatus[] contents = listStatus(qSrc);
2162 for (FileStatus content : contents) {
2163 copy(makeQualified(content.getPath()), makeQualified(new Path(qDst,
2164 content.getPath().getName())), deleteSource, overwrite);
2165 }
2166 } else {
2167 InputStream in=null;
2168 OutputStream out = null;
2169 try {
2170 in = open(qSrc);
2171 EnumSet<CreateFlag> createFlag = overwrite ? EnumSet.of(
2172 CreateFlag.CREATE, CreateFlag.OVERWRITE) :
2173 EnumSet.of(CreateFlag.CREATE);
2174 out = create(qDst, createFlag);
2175 IOUtils.copyBytes(in, out, conf, true);
2176 } catch (IOException e) {
2177 IOUtils.closeStream(out);
2178 IOUtils.closeStream(in);
2179 throw e;
2180 }
2181 }
2182 if (deleteSource) {
2183 return delete(qSrc, true);
2184 } else {
2185 return true;
2186 }
2187 }
2188 }
2189
2190 /**
2191 * Check if copying srcName to dst would overwrite an existing
2192 * file or directory.
2193 * @param srcName File or directory to be copied.
2194 * @param dst Destination to copy srcName to.
2195 * @param overwrite Whether it's ok to overwrite an existing file.
2196 * @throws AccessControlException If access is denied.
2197 * @throws IOException If dst is an existing directory, or dst is an
2198 * existing file and the overwrite option is not passed.
2199 */
2200 private void checkDest(String srcName, Path dst, boolean overwrite)
2201 throws AccessControlException, IOException {
2202 try {
2203 FileStatus dstFs = getFileStatus(dst);
2204 if (dstFs.isDirectory()) {
2205 if (null == srcName) {
2206 throw new IOException("Target " + dst + " is a directory");
2207 }
2208 // Recurse to check if dst/srcName exists.
2209 checkDest(null, new Path(dst, srcName), overwrite);
2210 } else if (!overwrite) {
2211 throw new IOException("Target " + new Path(dst, srcName)
2212 + " already exists");
2213 }
2214 } catch (FileNotFoundException e) {
2215 // dst does not exist - OK to copy.
2216 }
2217 }
2218
2219 //
2220 // If the destination is a subdirectory of the source, then
2221 // generate exception
2222 //
2223 private static void checkDependencies(Path qualSrc, Path qualDst)
2224 throws IOException {
2225 if (isSameFS(qualSrc, qualDst)) {
2226 String srcq = qualSrc.toString() + Path.SEPARATOR;
2227 String dstq = qualDst.toString() + Path.SEPARATOR;
2228 if (dstq.startsWith(srcq)) {
2229 if (srcq.length() == dstq.length()) {
2230 throw new IOException("Cannot copy " + qualSrc + " to itself.");
2231 } else {
2232 throw new IOException("Cannot copy " + qualSrc +
2233 " to its subdirectory " + qualDst);
2234 }
2235 }
2236 }
2237 }
2238
2239 /**
2240 * Are qualSrc and qualDst of the same file system?
2241 * @param qualPath1 - fully qualified path
2242 * @param qualPath2 - fully qualified path
2243 * @return
2244 */
2245 private static boolean isSameFS(Path qualPath1, Path qualPath2) {
2246 URI srcUri = qualPath1.toUri();
2247 URI dstUri = qualPath2.toUri();
2248 return (srcUri.getScheme().equals(dstUri.getScheme()) &&
2249 !(srcUri.getAuthority() != null && dstUri.getAuthority() != null && srcUri
2250 .getAuthority().equals(dstUri.getAuthority())));
2251 }
2252
2253 /**
2254 * Deletes all the paths in deleteOnExit on JVM shutdown.
2255 */
2256 static class FileContextFinalizer implements Runnable {
2257 @Override
2258 public synchronized void run() {
2259 processDeleteOnExit();
2260 }
2261 }
2262
2263 /**
2264 * Resolves all symbolic links in the specified path.
2265 * Returns the new path object.
2266 */
2267 protected Path resolve(final Path f) throws FileNotFoundException,
2268 UnresolvedLinkException, AccessControlException, IOException {
2269 return new FSLinkResolver<Path>() {
2270 @Override
2271 public Path next(final AbstractFileSystem fs, final Path p)
2272 throws IOException, UnresolvedLinkException {
2273 return fs.resolvePath(p);
2274 }
2275 }.resolve(this, f);
2276 }
2277
2278 /**
2279 * Resolves all symbolic links in the specified path leading up
2280 * to, but not including the final path component.
2281 * @param f path to resolve
2282 * @return the new path object.
2283 */
2284 protected Path resolveIntermediate(final Path f) throws IOException {
2285 return new FSLinkResolver<FileStatus>() {
2286 @Override
2287 public FileStatus next(final AbstractFileSystem fs, final Path p)
2288 throws IOException, UnresolvedLinkException {
2289 return fs.getFileLinkStatus(p);
2290 }
2291 }.resolve(this, f).getPath();
2292 }
2293
2294 /**
2295 * Returns the list of AbstractFileSystems accessed in the path. The list may
2296 * contain more than one AbstractFileSystems objects in case of symlinks.
2297 *
2298 * @param f
2299 * Path which needs to be resolved
2300 * @return List of AbstractFileSystems accessed in the path
2301 * @throws IOException
2302 */
2303 Set<AbstractFileSystem> resolveAbstractFileSystems(final Path f)
2304 throws IOException {
2305 final Path absF = fixRelativePart(f);
2306 final HashSet<AbstractFileSystem> result
2307 = new HashSet<AbstractFileSystem>();
2308 new FSLinkResolver<Void>() {
2309 @Override
2310 public Void next(final AbstractFileSystem fs, final Path p)
2311 throws IOException, UnresolvedLinkException {
2312 result.add(fs);
2313 fs.getFileStatus(p);
2314 return null;
2315 }
2316 }.resolve(this, absF);
2317 return result;
2318 }
2319
2320 /**
2321 * Class used to perform an operation on and resolve symlinks in a
2322 * path. The operation may potentially span multiple file systems.
2323 */
2324 protected abstract class FSLinkResolver<T> {
2325 // The maximum number of symbolic link components in a path
2326 private static final int MAX_PATH_LINKS = 32;
2327
2328 /**
2329 * Generic helper function overridden on instantiation to perform a
2330 * specific operation on the given file system using the given path
2331 * which may result in an UnresolvedLinkException.
2332 * @param fs AbstractFileSystem to perform the operation on.
2333 * @param p Path given the file system.
2334 * @return Generic type determined by the specific implementation.
2335 * @throws UnresolvedLinkException If symbolic link <code>path</code> could
2336 * not be resolved
2337 * @throws IOException an I/O error occured
2338 */
2339 public abstract T next(final AbstractFileSystem fs, final Path p)
2340 throws IOException, UnresolvedLinkException;
2341
2342 /**
2343 * Performs the operation specified by the next function, calling it
2344 * repeatedly until all symlinks in the given path are resolved.
2345 * @param fc FileContext used to access file systems.
2346 * @param p The path to resolve symlinks in.
2347 * @return Generic type determined by the implementation of next.
2348 * @throws IOException
2349 */
2350 public T resolve(final FileContext fc, Path p) throws IOException {
2351 int count = 0;
2352 T in = null;
2353 Path first = p;
2354 // NB: More than one AbstractFileSystem can match a scheme, eg
2355 // "file" resolves to LocalFs but could have come by RawLocalFs.
2356 AbstractFileSystem fs = fc.getFSofPath(p);
2357
2358 // Loop until all symlinks are resolved or the limit is reached
2359 for (boolean isLink = true; isLink;) {
2360 try {
2361 in = next(fs, p);
2362 isLink = false;
2363 } catch (UnresolvedLinkException e) {
2364 if (count++ > MAX_PATH_LINKS) {
2365 throw new IOException("Possible cyclic loop while " +
2366 "following symbolic link " + first);
2367 }
2368 // Resolve the first unresolved path component
2369 p = qualifySymlinkTarget(fs, p, fs.getLinkTarget(p));
2370 fs = fc.getFSofPath(p);
2371 }
2372 }
2373 return in;
2374 }
2375 }
2376
2377 /**
2378 * Get the statistics for a particular file system
2379 *
2380 * @param uri
2381 * the uri to lookup the statistics. Only scheme and authority part
2382 * of the uri are used as the key to store and lookup.
2383 * @return a statistics object
2384 */
2385 public static Statistics getStatistics(URI uri) {
2386 return AbstractFileSystem.getStatistics(uri);
2387 }
2388
2389 /**
2390 * Clears all the statistics stored in AbstractFileSystem, for all the file
2391 * systems.
2392 */
2393 public static void clearStatistics() {
2394 AbstractFileSystem.clearStatistics();
2395 }
2396
2397 /**
2398 * Prints the statistics to standard output. File System is identified by the
2399 * scheme and authority.
2400 */
2401 public static void printStatistics() {
2402 AbstractFileSystem.printStatistics();
2403 }
2404
2405 /**
2406 * @return Map of uri and statistics for each filesystem instantiated. The uri
2407 * consists of scheme and authority for the filesystem.
2408 */
2409 public static Map<URI, Statistics> getAllStatistics() {
2410 return AbstractFileSystem.getAllStatistics();
2411 }
2412
2413 /**
2414 * Get delegation tokens for the file systems accessed for a given
2415 * path.
2416 * @param p Path for which delegations tokens are requested.
2417 * @param renewer the account name that is allowed to renew the token.
2418 * @return List of delegation tokens.
2419 * @throws IOException
2420 */
2421 @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" })
2422 public List<Token<?>> getDelegationTokens(
2423 Path p, String renewer) throws IOException {
2424 Set<AbstractFileSystem> afsSet = resolveAbstractFileSystems(p);
2425 List<Token<?>> tokenList =
2426 new ArrayList<Token<?>>();
2427 for (AbstractFileSystem afs : afsSet) {
2428 List<Token<?>> afsTokens = afs.getDelegationTokens(renewer);
2429 tokenList.addAll(afsTokens);
2430 }
2431 return tokenList;
2432 }
2433 }