001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.fs; 019 020 import java.io.FileNotFoundException; 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.io.OutputStream; 024 import java.net.URI; 025 import java.security.PrivilegedExceptionAction; 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.EnumSet; 029 import java.util.HashSet; 030 import java.util.IdentityHashMap; 031 import java.util.List; 032 import java.util.Map; 033 import java.util.Set; 034 import java.util.Stack; 035 import java.util.TreeSet; 036 import java.util.Map.Entry; 037 038 import org.apache.commons.logging.Log; 039 import org.apache.commons.logging.LogFactory; 040 import org.apache.hadoop.HadoopIllegalArgumentException; 041 import org.apache.hadoop.classification.InterfaceAudience; 042 import org.apache.hadoop.classification.InterfaceStability; 043 import org.apache.hadoop.conf.Configuration; 044 import org.apache.hadoop.fs.FileSystem.Statistics; 045 import org.apache.hadoop.fs.Options.CreateOpts; 046 import org.apache.hadoop.fs.permission.FsPermission; 047 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 048 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT; 049 import org.apache.hadoop.io.IOUtils; 050 import org.apache.hadoop.ipc.RpcClientException; 051 import org.apache.hadoop.ipc.RpcServerException; 052 import org.apache.hadoop.ipc.UnexpectedServerException; 053 import org.apache.hadoop.fs.InvalidPathException; 054 import org.apache.hadoop.security.AccessControlException; 055 import org.apache.hadoop.security.UserGroupInformation; 056 import org.apache.hadoop.security.token.Token; 057 import org.apache.hadoop.util.ShutdownHookManager; 058 059 /** 060 * The FileContext class provides an interface to the application writer for 061 * using the Hadoop file system. 062 * It provides a set of methods for the usual operation: create, open, 063 * list, etc 064 * 065 * <p> 066 * <b> *** Path Names *** </b> 067 * <p> 068 * 069 * The Hadoop file system supports a URI name space and URI names. 070 * It offers a forest of file systems that can be referenced using fully 071 * qualified URIs. 072 * Two common Hadoop file systems implementations are 073 * <ul> 074 * <li> the local file system: file:///path 075 * <li> the hdfs file system hdfs://nnAddress:nnPort/path 076 * </ul> 077 * 078 * While URI names are very flexible, it requires knowing the name or address 079 * of the server. For convenience one often wants to access the default system 080 * in one's environment without knowing its name/address. This has an 081 * additional benefit that it allows one to change one's default fs 082 * (e.g. admin moves application from cluster1 to cluster2). 083 * <p> 084 * 085 * To facilitate this, Hadoop supports a notion of a default file system. 086 * The user can set his default file system, although this is 087 * typically set up for you in your environment via your default config. 088 * A default file system implies a default scheme and authority; slash-relative 089 * names (such as /for/bar) are resolved relative to that default FS. 090 * Similarly a user can also have working-directory-relative names (i.e. names 091 * not starting with a slash). While the working directory is generally in the 092 * same default FS, the wd can be in a different FS. 093 * <p> 094 * Hence Hadoop path names can be one of: 095 * <ul> 096 * <li> fully qualified URI: scheme://authority/path 097 * <li> slash relative names: /path relative to the default file system 098 * <li> wd-relative names: path relative to the working dir 099 * </ul> 100 * Relative paths with scheme (scheme:foo/bar) are illegal. 101 * 102 * <p> 103 * <b>****The Role of the FileContext and configuration defaults****</b> 104 * <p> 105 * The FileContext provides file namespace context for resolving file names; 106 * it also contains the umask for permissions, In that sense it is like the 107 * per-process file-related state in Unix system. 108 * These two properties 109 * <ul> 110 * <li> default file system i.e your slash) 111 * <li> umask 112 * </ul> 113 * in general, are obtained from the default configuration file 114 * in your environment, (@see {@link Configuration}). 115 * 116 * No other configuration parameters are obtained from the default config as 117 * far as the file context layer is concerned. All file system instances 118 * (i.e. deployments of file systems) have default properties; we call these 119 * server side (SS) defaults. Operation like create allow one to select many 120 * properties: either pass them in as explicit parameters or use 121 * the SS properties. 122 * <p> 123 * The file system related SS defaults are 124 * <ul> 125 * <li> the home directory (default is "/user/userName") 126 * <li> the initial wd (only for local fs) 127 * <li> replication factor 128 * <li> block size 129 * <li> buffer size 130 * <li> bytesPerChecksum (if used). 131 * </ul> 132 * 133 * <p> 134 * <b> *** Usage Model for the FileContext class *** </b> 135 * <p> 136 * Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. 137 * Unspecified values come from core-defaults.xml in the release jar. 138 * <ul> 139 * <li> myFContext = FileContext.getFileContext(); // uses the default config 140 * // which has your default FS 141 * <li> myFContext.create(path, ...); 142 * <li> myFContext.setWorkingDir(path) 143 * <li> myFContext.open (path, ...); 144 * </ul> 145 * Example 2: Get a FileContext with a specific URI as the default FS 146 * <ul> 147 * <li> myFContext = FileContext.getFileContext(URI) 148 * <li> myFContext.create(path, ...); 149 * ... 150 * </ul> 151 * Example 3: FileContext with local file system as the default 152 * <ul> 153 * <li> myFContext = FileContext.getLocalFSFileContext() 154 * <li> myFContext.create(path, ...); 155 * <li> ... 156 * </ul> 157 * Example 4: Use a specific config, ignoring $HADOOP_CONFIG 158 * Generally you should not need use a config unless you are doing 159 * <ul> 160 * <li> configX = someConfigSomeOnePassedToYou. 161 * <li> myFContext = getFileContext(configX); // configX is not changed, 162 * // is passed down 163 * <li> myFContext.create(path, ...); 164 * <li>... 165 * </ul> 166 * 167 */ 168 169 @InterfaceAudience.Public 170 @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */ 171 public final class FileContext { 172 173 public static final Log LOG = LogFactory.getLog(FileContext.class); 174 public static final FsPermission DEFAULT_PERM = FsPermission.getDefault(); 175 176 /** 177 * Priority of the FileContext shutdown hook. 178 */ 179 public static final int SHUTDOWN_HOOK_PRIORITY = 20; 180 181 /** 182 * List of files that should be deleted on JVM shutdown. 183 */ 184 static final Map<FileContext, Set<Path>> DELETE_ON_EXIT = 185 new IdentityHashMap<FileContext, Set<Path>>(); 186 187 /** JVM shutdown hook thread. */ 188 static final FileContextFinalizer FINALIZER = 189 new FileContextFinalizer(); 190 191 private static final PathFilter DEFAULT_FILTER = new PathFilter() { 192 public boolean accept(final Path file) { 193 return true; 194 } 195 }; 196 197 /** 198 * The FileContext is defined by. 199 * 1) defaultFS (slash) 200 * 2) wd 201 * 3) umask 202 */ 203 private final AbstractFileSystem defaultFS; //default FS for this FileContext. 204 private Path workingDir; // Fully qualified 205 private FsPermission umask; 206 private final Configuration conf; 207 private final UserGroupInformation ugi; 208 209 private FileContext(final AbstractFileSystem defFs, 210 final FsPermission theUmask, final Configuration aConf) { 211 defaultFS = defFs; 212 umask = FsPermission.getUMask(aConf); 213 conf = aConf; 214 try { 215 ugi = UserGroupInformation.getCurrentUser(); 216 } catch (IOException e) { 217 LOG.error("Exception in getCurrentUser: ",e); 218 throw new RuntimeException("Failed to get the current user " + 219 "while creating a FileContext", e); 220 } 221 /* 222 * Init the wd. 223 * WorkingDir is implemented at the FileContext layer 224 * NOT at the AbstractFileSystem layer. 225 * If the DefaultFS, such as localFilesystem has a notion of 226 * builtin WD, we use that as the initial WD. 227 * Otherwise the WD is initialized to the home directory. 228 */ 229 workingDir = defaultFS.getInitialWorkingDirectory(); 230 if (workingDir == null) { 231 workingDir = defaultFS.getHomeDirectory(); 232 } 233 util = new Util(); // for the inner class 234 } 235 236 /* 237 * Remove relative part - return "absolute": 238 * If input is relative path ("foo/bar") add wd: ie "/<workingDir>/foo/bar" 239 * A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path 240 * ("/foo/bar") are returned unchanged. 241 * 242 * Applications that use FileContext should use #makeQualified() since 243 * they really want a fully qualified URI. 244 * Hence this method is not called makeAbsolute() and 245 * has been deliberately declared private. 246 */ 247 private Path fixRelativePart(Path p) { 248 if (p.isUriPathAbsolute()) { 249 return p; 250 } else { 251 return new Path(workingDir, p); 252 } 253 } 254 255 /** 256 * Delete all the paths that were marked as delete-on-exit. 257 */ 258 static void processDeleteOnExit() { 259 synchronized (DELETE_ON_EXIT) { 260 Set<Entry<FileContext, Set<Path>>> set = DELETE_ON_EXIT.entrySet(); 261 for (Entry<FileContext, Set<Path>> entry : set) { 262 FileContext fc = entry.getKey(); 263 Set<Path> paths = entry.getValue(); 264 for (Path path : paths) { 265 try { 266 fc.delete(path, true); 267 } catch (IOException e) { 268 LOG.warn("Ignoring failure to deleteOnExit for path " + path); 269 } 270 } 271 } 272 DELETE_ON_EXIT.clear(); 273 } 274 } 275 276 /** 277 * Pathnames with scheme and relative path are illegal. 278 * @param path to be checked 279 */ 280 private static void checkNotSchemeWithRelative(final Path path) { 281 if (path.toUri().isAbsolute() && !path.isUriPathAbsolute()) { 282 throw new HadoopIllegalArgumentException( 283 "Unsupported name: has scheme but relative path-part"); 284 } 285 } 286 287 /** 288 * Get the file system of supplied path. 289 * 290 * @param absOrFqPath - absolute or fully qualified path 291 * @return the file system of the path 292 * 293 * @throws UnsupportedFileSystemException If the file system for 294 * <code>absOrFqPath</code> is not supported. 295 * @throws IOExcepton If the file system for <code>absOrFqPath</code> could 296 * not be instantiated. 297 */ 298 private AbstractFileSystem getFSofPath(final Path absOrFqPath) 299 throws UnsupportedFileSystemException, IOException { 300 checkNotSchemeWithRelative(absOrFqPath); 301 if (!absOrFqPath.isAbsolute() && absOrFqPath.toUri().getScheme() == null) { 302 throw new HadoopIllegalArgumentException( 303 "FileContext Bug: path is relative"); 304 } 305 306 try { 307 // Is it the default FS for this FileContext? 308 defaultFS.checkPath(absOrFqPath); 309 return defaultFS; 310 } catch (Exception e) { // it is different FileSystem 311 return getAbstractFileSystem(ugi, absOrFqPath.toUri(), conf); 312 } 313 } 314 315 private static AbstractFileSystem getAbstractFileSystem( 316 UserGroupInformation user, final URI uri, final Configuration conf) 317 throws UnsupportedFileSystemException, IOException { 318 try { 319 return user.doAs(new PrivilegedExceptionAction<AbstractFileSystem>() { 320 public AbstractFileSystem run() throws UnsupportedFileSystemException { 321 return AbstractFileSystem.get(uri, conf); 322 } 323 }); 324 } catch (InterruptedException ex) { 325 LOG.error(ex); 326 throw new IOException("Failed to get the AbstractFileSystem for path: " 327 + uri, ex); 328 } 329 } 330 331 /** 332 * Protected Static Factory methods for getting a FileContexts 333 * that take a AbstractFileSystem as input. To be used for testing. 334 */ 335 336 /** 337 * Create a FileContext with specified FS as default using the specified 338 * config. 339 * 340 * @param defFS 341 * @param aConf 342 * @return new FileContext with specifed FS as default. 343 */ 344 public static FileContext getFileContext(final AbstractFileSystem defFS, 345 final Configuration aConf) { 346 return new FileContext(defFS, FsPermission.getUMask(aConf), aConf); 347 } 348 349 /** 350 * Create a FileContext for specified file system using the default config. 351 * 352 * @param defaultFS 353 * @return a FileContext with the specified AbstractFileSystem 354 * as the default FS. 355 */ 356 protected static FileContext getFileContext( 357 final AbstractFileSystem defaultFS) { 358 return getFileContext(defaultFS, new Configuration()); 359 } 360 361 /** 362 * Static Factory methods for getting a FileContext. 363 * Note new file contexts are created for each call. 364 * The only singleton is the local FS context using the default config. 365 * 366 * Methods that use the default config: the default config read from the 367 * $HADOOP_CONFIG/core.xml, 368 * Unspecified key-values for config are defaulted from core-defaults.xml 369 * in the release jar. 370 * 371 * The keys relevant to the FileContext layer are extracted at time of 372 * construction. Changes to the config after the call are ignore 373 * by the FileContext layer. 374 * The conf is passed to lower layers like AbstractFileSystem and HDFS which 375 * pick up their own config variables. 376 */ 377 378 /** 379 * Create a FileContext using the default config read from the 380 * $HADOOP_CONFIG/core.xml, Unspecified key-values for config are defaulted 381 * from core-defaults.xml in the release jar. 382 * 383 * @throws UnsupportedFileSystemException If the file system from the default 384 * configuration is not supported 385 */ 386 public static FileContext getFileContext() 387 throws UnsupportedFileSystemException { 388 return getFileContext(new Configuration()); 389 } 390 391 /** 392 * @return a FileContext for the local file system using the default config. 393 * @throws UnsupportedFileSystemException If the file system for 394 * {@link FsConstants#LOCAL_FS_URI} is not supported. 395 */ 396 public static FileContext getLocalFSFileContext() 397 throws UnsupportedFileSystemException { 398 return getFileContext(FsConstants.LOCAL_FS_URI); 399 } 400 401 /** 402 * Create a FileContext for specified URI using the default config. 403 * 404 * @param defaultFsUri 405 * @return a FileContext with the specified URI as the default FS. 406 * 407 * @throws UnsupportedFileSystemException If the file system for 408 * <code>defaultFsUri</code> is not supported 409 */ 410 public static FileContext getFileContext(final URI defaultFsUri) 411 throws UnsupportedFileSystemException { 412 return getFileContext(defaultFsUri, new Configuration()); 413 } 414 415 /** 416 * Create a FileContext for specified default URI using the specified config. 417 * 418 * @param defaultFsUri 419 * @param aConf 420 * @return new FileContext for specified uri 421 * @throws UnsupportedFileSystemException If the file system with specified is 422 * not supported 423 * @throws RuntimeException If the file system specified is supported but 424 * could not be instantiated, or if login fails. 425 */ 426 public static FileContext getFileContext(final URI defaultFsUri, 427 final Configuration aConf) throws UnsupportedFileSystemException { 428 UserGroupInformation currentUser = null; 429 AbstractFileSystem defaultAfs = null; 430 try { 431 currentUser = UserGroupInformation.getCurrentUser(); 432 defaultAfs = getAbstractFileSystem(currentUser, defaultFsUri, aConf); 433 } catch (UnsupportedFileSystemException ex) { 434 throw ex; 435 } catch (IOException ex) { 436 LOG.error(ex); 437 throw new RuntimeException(ex); 438 } 439 return getFileContext(defaultAfs, aConf); 440 } 441 442 /** 443 * Create a FileContext using the passed config. Generally it is better to use 444 * {@link #getFileContext(URI, Configuration)} instead of this one. 445 * 446 * 447 * @param aConf 448 * @return new FileContext 449 * @throws UnsupportedFileSystemException If file system in the config 450 * is not supported 451 */ 452 public static FileContext getFileContext(final Configuration aConf) 453 throws UnsupportedFileSystemException { 454 return getFileContext( 455 URI.create(aConf.get(FS_DEFAULT_NAME_KEY, FS_DEFAULT_NAME_DEFAULT)), 456 aConf); 457 } 458 459 /** 460 * @param aConf - from which the FileContext is configured 461 * @return a FileContext for the local file system using the specified config. 462 * 463 * @throws UnsupportedFileSystemException If default file system in the config 464 * is not supported 465 * 466 */ 467 public static FileContext getLocalFSFileContext(final Configuration aConf) 468 throws UnsupportedFileSystemException { 469 return getFileContext(FsConstants.LOCAL_FS_URI, aConf); 470 } 471 472 /* This method is needed for tests. */ 473 @InterfaceAudience.Private 474 @InterfaceStability.Unstable /* return type will change to AFS once 475 HADOOP-6223 is completed */ 476 public AbstractFileSystem getDefaultFileSystem() { 477 return defaultFS; 478 } 479 480 /** 481 * Set the working directory for wd-relative names (such a "foo/bar"). Working 482 * directory feature is provided by simply prefixing relative names with the 483 * working dir. Note this is different from Unix where the wd is actually set 484 * to the inode. Hence setWorkingDir does not follow symlinks etc. This works 485 * better in a distributed environment that has multiple independent roots. 486 * {@link #getWorkingDirectory()} should return what setWorkingDir() set. 487 * 488 * @param newWDir new working directory 489 * @throws IOException 490 * <br> 491 * NewWdir can be one of: 492 * <ul> 493 * <li>relative path: "foo/bar";</li> 494 * <li>absolute without scheme: "/foo/bar"</li> 495 * <li>fully qualified with scheme: "xx://auth/foo/bar"</li> 496 * </ul> 497 * <br> 498 * Illegal WDs: 499 * <ul> 500 * <li>relative with scheme: "xx:foo/bar"</li> 501 * <li>non existent directory</li> 502 * </ul> 503 */ 504 public void setWorkingDirectory(final Path newWDir) throws IOException { 505 checkNotSchemeWithRelative(newWDir); 506 /* wd is stored as a fully qualified path. We check if the given 507 * path is not relative first since resolve requires and returns 508 * an absolute path. 509 */ 510 final Path newWorkingDir = new Path(workingDir, newWDir); 511 FileStatus status = getFileStatus(newWorkingDir); 512 if (status.isFile()) { 513 throw new FileNotFoundException("Cannot setWD to a file"); 514 } 515 workingDir = newWorkingDir; 516 } 517 518 /** 519 * Gets the working directory for wd-relative names (such a "foo/bar"). 520 */ 521 public Path getWorkingDirectory() { 522 return workingDir; 523 } 524 525 /** 526 * Gets the ugi in the file-context 527 * @return UserGroupInformation 528 */ 529 public UserGroupInformation getUgi() { 530 return ugi; 531 } 532 533 /** 534 * Return the current user's home directory in this file system. 535 * The default implementation returns "/user/$USER/". 536 * @return the home directory 537 */ 538 public Path getHomeDirectory() { 539 return defaultFS.getHomeDirectory(); 540 } 541 542 /** 543 * 544 * @return the umask of this FileContext 545 */ 546 public FsPermission getUMask() { 547 return umask; 548 } 549 550 /** 551 * Set umask to the supplied parameter. 552 * @param newUmask the new umask 553 */ 554 public void setUMask(final FsPermission newUmask) { 555 umask = newUmask; 556 } 557 558 559 /** 560 * Resolve the path following any symlinks or mount points 561 * @param f to be resolved 562 * @return fully qualified resolved path 563 * 564 * @throws FileNotFoundException If <code>f</code> does not exist 565 * @throws AccessControlException if access denied 566 * @throws IOException If an IO Error occurred 567 * 568 * Exceptions applicable to file systems accessed over RPC: 569 * @throws RpcClientException If an exception occurred in the RPC client 570 * @throws RpcServerException If an exception occurred in the RPC server 571 * @throws UnexpectedServerException If server implementation throws 572 * undeclared exception to RPC server 573 * 574 * RuntimeExceptions: 575 * @throws InvalidPathException If path <code>f</code> is not valid 576 */ 577 public Path resolvePath(final Path f) throws FileNotFoundException, 578 UnresolvedLinkException, AccessControlException, IOException { 579 return resolve(f); 580 } 581 582 /** 583 * Make the path fully qualified if it is isn't. 584 * A Fully-qualified path has scheme and authority specified and an absolute 585 * path. 586 * Use the default file system and working dir in this FileContext to qualify. 587 * @param path 588 * @return qualified path 589 */ 590 public Path makeQualified(final Path path) { 591 return path.makeQualified(defaultFS.getUri(), getWorkingDirectory()); 592 } 593 594 /** 595 * Create or overwrite file on indicated path and returns an output stream for 596 * writing into the file. 597 * 598 * @param f the file name to open 599 * @param createFlag gives the semantics of create; see {@link CreateFlag} 600 * @param opts file creation options; see {@link Options.CreateOpts}. 601 * <ul> 602 * <li>Progress - to report progress on the operation - default null 603 * <li>Permission - umask is applied against permisssion: default is 604 * FsPermissions:getDefault() 605 * 606 * <li>CreateParent - create missing parent path; default is to not 607 * to create parents 608 * <li>The defaults for the following are SS defaults of the file 609 * server implementing the target path. Not all parameters make sense 610 * for all kinds of file system - eg. localFS ignores Blocksize, 611 * replication, checksum 612 * <ul> 613 * <li>BufferSize - buffersize used in FSDataOutputStream 614 * <li>Blocksize - block size for file blocks 615 * <li>ReplicationFactor - replication for blocks 616 * <li>BytesPerChecksum - bytes per checksum 617 * </ul> 618 * </ul> 619 * 620 * @return {@link FSDataOutputStream} for created file 621 * 622 * @throws AccessControlException If access is denied 623 * @throws FileAlreadyExistsException If file <code>f</code> already exists 624 * @throws FileNotFoundException If parent of <code>f</code> does not exist 625 * and <code>createParent</code> is false 626 * @throws ParentNotDirectoryException If parent of <code>f</code> is not a 627 * directory. 628 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 629 * not supported 630 * @throws IOException If an I/O error occurred 631 * 632 * Exceptions applicable to file systems accessed over RPC: 633 * @throws RpcClientException If an exception occurred in the RPC client 634 * @throws RpcServerException If an exception occurred in the RPC server 635 * @throws UnexpectedServerException If server implementation throws 636 * undeclared exception to RPC server 637 * 638 * RuntimeExceptions: 639 * @throws InvalidPathException If path <code>f</code> is not valid 640 */ 641 public FSDataOutputStream create(final Path f, 642 final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts) 643 throws AccessControlException, FileAlreadyExistsException, 644 FileNotFoundException, ParentNotDirectoryException, 645 UnsupportedFileSystemException, IOException { 646 Path absF = fixRelativePart(f); 647 648 // If one of the options is a permission, extract it & apply umask 649 // If not, add a default Perms and apply umask; 650 // AbstractFileSystem#create 651 652 CreateOpts.Perms permOpt = 653 (CreateOpts.Perms) CreateOpts.getOpt(CreateOpts.Perms.class, opts); 654 FsPermission permission = (permOpt != null) ? permOpt.getValue() : 655 FsPermission.getDefault(); 656 permission = permission.applyUMask(umask); 657 658 final CreateOpts[] updatedOpts = 659 CreateOpts.setOpt(CreateOpts.perms(permission), opts); 660 return new FSLinkResolver<FSDataOutputStream>() { 661 public FSDataOutputStream next(final AbstractFileSystem fs, final Path p) 662 throws IOException { 663 return fs.create(p, createFlag, updatedOpts); 664 } 665 }.resolve(this, absF); 666 } 667 668 /** 669 * Make(create) a directory and all the non-existent parents. 670 * 671 * @param dir - the dir to make 672 * @param permission - permissions is set permission&~umask 673 * @param createParent - if true then missing parent dirs are created if false 674 * then parent must exist 675 * 676 * @throws AccessControlException If access is denied 677 * @throws FileAlreadyExistsException If directory <code>dir</code> already 678 * exists 679 * @throws FileNotFoundException If parent of <code>dir</code> does not exist 680 * and <code>createParent</code> is false 681 * @throws ParentNotDirectoryException If parent of <code>dir</code> is not a 682 * directory 683 * @throws UnsupportedFileSystemException If file system for <code>dir</code> 684 * is not supported 685 * @throws IOException If an I/O error occurred 686 * 687 * Exceptions applicable to file systems accessed over RPC: 688 * @throws RpcClientException If an exception occurred in the RPC client 689 * @throws UnexpectedServerException If server implementation throws 690 * undeclared exception to RPC server 691 * 692 * RuntimeExceptions: 693 * @throws InvalidPathException If path <code>dir</code> is not valid 694 */ 695 public void mkdir(final Path dir, final FsPermission permission, 696 final boolean createParent) throws AccessControlException, 697 FileAlreadyExistsException, FileNotFoundException, 698 ParentNotDirectoryException, UnsupportedFileSystemException, 699 IOException { 700 final Path absDir = fixRelativePart(dir); 701 final FsPermission absFerms = (permission == null ? 702 FsPermission.getDefault() : permission).applyUMask(umask); 703 new FSLinkResolver<Void>() { 704 public Void next(final AbstractFileSystem fs, final Path p) 705 throws IOException, UnresolvedLinkException { 706 fs.mkdir(p, absFerms, createParent); 707 return null; 708 } 709 }.resolve(this, absDir); 710 } 711 712 /** 713 * Delete a file. 714 * @param f the path to delete. 715 * @param recursive if path is a directory and set to 716 * true, the directory is deleted else throws an exception. In 717 * case of a file the recursive can be set to either true or false. 718 * 719 * @throws AccessControlException If access is denied 720 * @throws FileNotFoundException If <code>f</code> does not exist 721 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 722 * not supported 723 * @throws IOException If an I/O error occurred 724 * 725 * Exceptions applicable to file systems accessed over RPC: 726 * @throws RpcClientException If an exception occurred in the RPC client 727 * @throws RpcServerException If an exception occurred in the RPC server 728 * @throws UnexpectedServerException If server implementation throws 729 * undeclared exception to RPC server 730 * 731 * RuntimeExceptions: 732 * @throws InvalidPathException If path <code>f</code> is invalid 733 */ 734 public boolean delete(final Path f, final boolean recursive) 735 throws AccessControlException, FileNotFoundException, 736 UnsupportedFileSystemException, IOException { 737 Path absF = fixRelativePart(f); 738 return new FSLinkResolver<Boolean>() { 739 public Boolean next(final AbstractFileSystem fs, final Path p) 740 throws IOException, UnresolvedLinkException { 741 return Boolean.valueOf(fs.delete(p, recursive)); 742 } 743 }.resolve(this, absF); 744 } 745 746 /** 747 * Opens an FSDataInputStream at the indicated Path using 748 * default buffersize. 749 * @param f the file name to open 750 * 751 * @throws AccessControlException If access is denied 752 * @throws FileNotFoundException If file <code>f</code> does not exist 753 * @throws UnsupportedFileSystemException If file system for <code>f</code> 754 * is not supported 755 * @throws IOException If an I/O error occurred 756 * 757 * Exceptions applicable to file systems accessed over RPC: 758 * @throws RpcClientException If an exception occurred in the RPC client 759 * @throws RpcServerException If an exception occurred in the RPC server 760 * @throws UnexpectedServerException If server implementation throws 761 * undeclared exception to RPC server 762 */ 763 public FSDataInputStream open(final Path f) throws AccessControlException, 764 FileNotFoundException, UnsupportedFileSystemException, IOException { 765 final Path absF = fixRelativePart(f); 766 return new FSLinkResolver<FSDataInputStream>() { 767 public FSDataInputStream next(final AbstractFileSystem fs, final Path p) 768 throws IOException, UnresolvedLinkException { 769 return fs.open(p); 770 } 771 }.resolve(this, absF); 772 } 773 774 /** 775 * Opens an FSDataInputStream at the indicated Path. 776 * 777 * @param f the file name to open 778 * @param bufferSize the size of the buffer to be used. 779 * 780 * @throws AccessControlException If access is denied 781 * @throws FileNotFoundException If file <code>f</code> does not exist 782 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 783 * not supported 784 * @throws IOException If an I/O error occurred 785 * 786 * Exceptions applicable to file systems accessed over RPC: 787 * @throws RpcClientException If an exception occurred in the RPC client 788 * @throws RpcServerException If an exception occurred in the RPC server 789 * @throws UnexpectedServerException If server implementation throws 790 * undeclared exception to RPC server 791 */ 792 public FSDataInputStream open(final Path f, final int bufferSize) 793 throws AccessControlException, FileNotFoundException, 794 UnsupportedFileSystemException, IOException { 795 final Path absF = fixRelativePart(f); 796 return new FSLinkResolver<FSDataInputStream>() { 797 public FSDataInputStream next(final AbstractFileSystem fs, final Path p) 798 throws IOException, UnresolvedLinkException { 799 return fs.open(p, bufferSize); 800 } 801 }.resolve(this, absF); 802 } 803 804 /** 805 * Set replication for an existing file. 806 * 807 * @param f file name 808 * @param replication new replication 809 * 810 * @return true if successful 811 * 812 * @throws AccessControlException If access is denied 813 * @throws FileNotFoundException If file <code>f</code> does not exist 814 * @throws IOException If an I/O error occurred 815 * 816 * Exceptions applicable to file systems accessed over RPC: 817 * @throws RpcClientException If an exception occurred in the RPC client 818 * @throws RpcServerException If an exception occurred in the RPC server 819 * @throws UnexpectedServerException If server implementation throws 820 * undeclared exception to RPC server 821 */ 822 public boolean setReplication(final Path f, final short replication) 823 throws AccessControlException, FileNotFoundException, 824 IOException { 825 final Path absF = fixRelativePart(f); 826 return new FSLinkResolver<Boolean>() { 827 public Boolean next(final AbstractFileSystem fs, final Path p) 828 throws IOException, UnresolvedLinkException { 829 return Boolean.valueOf(fs.setReplication(p, replication)); 830 } 831 }.resolve(this, absF); 832 } 833 834 /** 835 * Renames Path src to Path dst 836 * <ul> 837 * <li 838 * <li>Fails if src is a file and dst is a directory. 839 * <li>Fails if src is a directory and dst is a file. 840 * <li>Fails if the parent of dst does not exist or is a file. 841 * </ul> 842 * <p> 843 * If OVERWRITE option is not passed as an argument, rename fails if the dst 844 * already exists. 845 * <p> 846 * If OVERWRITE option is passed as an argument, rename overwrites the dst if 847 * it is a file or an empty directory. Rename fails if dst is a non-empty 848 * directory. 849 * <p> 850 * Note that atomicity of rename is dependent on the file system 851 * implementation. Please refer to the file system documentation for details 852 * <p> 853 * 854 * @param src path to be renamed 855 * @param dst new path after rename 856 * 857 * @throws AccessControlException If access is denied 858 * @throws FileAlreadyExistsException If <code>dst</code> already exists and 859 * <code>options</options> has {@link Options.Rename#OVERWRITE} 860 * option false. 861 * @throws FileNotFoundException If <code>src</code> does not exist 862 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not a 863 * directory 864 * @throws UnsupportedFileSystemException If file system for <code>src</code> 865 * and <code>dst</code> is not supported 866 * @throws IOException If an I/O error occurred 867 * 868 * Exceptions applicable to file systems accessed over RPC: 869 * @throws RpcClientException If an exception occurred in the RPC client 870 * @throws RpcServerException If an exception occurred in the RPC server 871 * @throws UnexpectedServerException If server implementation throws 872 * undeclared exception to RPC server 873 */ 874 public void rename(final Path src, final Path dst, 875 final Options.Rename... options) throws AccessControlException, 876 FileAlreadyExistsException, FileNotFoundException, 877 ParentNotDirectoryException, UnsupportedFileSystemException, 878 IOException { 879 final Path absSrc = fixRelativePart(src); 880 final Path absDst = fixRelativePart(dst); 881 AbstractFileSystem srcFS = getFSofPath(absSrc); 882 AbstractFileSystem dstFS = getFSofPath(absDst); 883 if(!srcFS.getUri().equals(dstFS.getUri())) { 884 throw new IOException("Renames across AbstractFileSystems not supported"); 885 } 886 try { 887 srcFS.rename(absSrc, absDst, options); 888 } catch (UnresolvedLinkException e) { 889 /* We do not know whether the source or the destination path 890 * was unresolved. Resolve the source path up until the final 891 * path component, then fully resolve the destination. 892 */ 893 final Path source = resolveIntermediate(absSrc); 894 new FSLinkResolver<Void>() { 895 public Void next(final AbstractFileSystem fs, final Path p) 896 throws IOException, UnresolvedLinkException { 897 fs.rename(source, p, options); 898 return null; 899 } 900 }.resolve(this, absDst); 901 } 902 } 903 904 /** 905 * Set permission of a path. 906 * @param f 907 * @param permission - the new absolute permission (umask is not applied) 908 * 909 * @throws AccessControlException If access is denied 910 * @throws FileNotFoundException If <code>f</code> does not exist 911 * @throws UnsupportedFileSystemException If file system for <code>f</code> 912 * is not supported 913 * @throws IOException If an I/O error occurred 914 * 915 * Exceptions applicable to file systems accessed over RPC: 916 * @throws RpcClientException If an exception occurred in the RPC client 917 * @throws RpcServerException If an exception occurred in the RPC server 918 * @throws UnexpectedServerException If server implementation throws 919 * undeclared exception to RPC server 920 */ 921 public void setPermission(final Path f, final FsPermission permission) 922 throws AccessControlException, FileNotFoundException, 923 UnsupportedFileSystemException, IOException { 924 final Path absF = fixRelativePart(f); 925 new FSLinkResolver<Void>() { 926 public Void next(final AbstractFileSystem fs, final Path p) 927 throws IOException, UnresolvedLinkException { 928 fs.setPermission(p, permission); 929 return null; 930 } 931 }.resolve(this, absF); 932 } 933 934 /** 935 * Set owner of a path (i.e. a file or a directory). The parameters username 936 * and groupname cannot both be null. 937 * 938 * @param f The path 939 * @param username If it is null, the original username remains unchanged. 940 * @param groupname If it is null, the original groupname remains unchanged. 941 * 942 * @throws AccessControlException If access is denied 943 * @throws FileNotFoundException If <code>f</code> does not exist 944 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 945 * not supported 946 * @throws IOException If an I/O error occurred 947 * 948 * Exceptions applicable to file systems accessed over RPC: 949 * @throws RpcClientException If an exception occurred in the RPC client 950 * @throws RpcServerException If an exception occurred in the RPC server 951 * @throws UnexpectedServerException If server implementation throws 952 * undeclared exception to RPC server 953 * 954 * RuntimeExceptions: 955 * @throws HadoopIllegalArgumentException If <code>username</code> or 956 * <code>groupname</code> is invalid. 957 */ 958 public void setOwner(final Path f, final String username, 959 final String groupname) throws AccessControlException, 960 UnsupportedFileSystemException, FileNotFoundException, 961 IOException { 962 if ((username == null) && (groupname == null)) { 963 throw new HadoopIllegalArgumentException( 964 "username and groupname cannot both be null"); 965 } 966 final Path absF = fixRelativePart(f); 967 new FSLinkResolver<Void>() { 968 public Void next(final AbstractFileSystem fs, final Path p) 969 throws IOException, UnresolvedLinkException { 970 fs.setOwner(p, username, groupname); 971 return null; 972 } 973 }.resolve(this, absF); 974 } 975 976 /** 977 * Set access time of a file. 978 * @param f The path 979 * @param mtime Set the modification time of this file. 980 * The number of milliseconds since epoch (Jan 1, 1970). 981 * A value of -1 means that this call should not set modification time. 982 * @param atime Set the access time of this file. 983 * The number of milliseconds since Jan 1, 1970. 984 * A value of -1 means that this call should not set access time. 985 * 986 * @throws AccessControlException If access is denied 987 * @throws FileNotFoundException If <code>f</code> does not exist 988 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 989 * not supported 990 * @throws IOException If an I/O error occurred 991 * 992 * Exceptions applicable to file systems accessed over RPC: 993 * @throws RpcClientException If an exception occurred in the RPC client 994 * @throws RpcServerException If an exception occurred in the RPC server 995 * @throws UnexpectedServerException If server implementation throws 996 * undeclared exception to RPC server 997 */ 998 public void setTimes(final Path f, final long mtime, final long atime) 999 throws AccessControlException, FileNotFoundException, 1000 UnsupportedFileSystemException, IOException { 1001 final Path absF = fixRelativePart(f); 1002 new FSLinkResolver<Void>() { 1003 public Void next(final AbstractFileSystem fs, final Path p) 1004 throws IOException, UnresolvedLinkException { 1005 fs.setTimes(p, mtime, atime); 1006 return null; 1007 } 1008 }.resolve(this, absF); 1009 } 1010 1011 /** 1012 * Get the checksum of a file. 1013 * 1014 * @param f file path 1015 * 1016 * @return The file checksum. The default return value is null, 1017 * which indicates that no checksum algorithm is implemented 1018 * in the corresponding FileSystem. 1019 * 1020 * @throws AccessControlException If access is denied 1021 * @throws FileNotFoundException If <code>f</code> does not exist 1022 * @throws IOException If an I/O error occurred 1023 * 1024 * Exceptions applicable to file systems accessed over RPC: 1025 * @throws RpcClientException If an exception occurred in the RPC client 1026 * @throws RpcServerException If an exception occurred in the RPC server 1027 * @throws UnexpectedServerException If server implementation throws 1028 * undeclared exception to RPC server 1029 */ 1030 public FileChecksum getFileChecksum(final Path f) 1031 throws AccessControlException, FileNotFoundException, 1032 IOException { 1033 final Path absF = fixRelativePart(f); 1034 return new FSLinkResolver<FileChecksum>() { 1035 public FileChecksum next(final AbstractFileSystem fs, final Path p) 1036 throws IOException, UnresolvedLinkException { 1037 return fs.getFileChecksum(p); 1038 } 1039 }.resolve(this, absF); 1040 } 1041 1042 /** 1043 * Set the verify checksum flag for the file system denoted by the path. 1044 * This is only applicable if the 1045 * corresponding FileSystem supports checksum. By default doesn't do anything. 1046 * @param verifyChecksum 1047 * @param f set the verifyChecksum for the Filesystem containing this path 1048 * 1049 * @throws AccessControlException If access is denied 1050 * @throws FileNotFoundException If <code>f</code> does not exist 1051 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1052 * not supported 1053 * @throws IOException If an I/O error occurred 1054 * 1055 * Exceptions applicable to file systems accessed over RPC: 1056 * @throws RpcClientException If an exception occurred in the RPC client 1057 * @throws RpcServerException If an exception occurred in the RPC server 1058 * @throws UnexpectedServerException If server implementation throws 1059 * undeclared exception to RPC server 1060 */ 1061 public void setVerifyChecksum(final boolean verifyChecksum, final Path f) 1062 throws AccessControlException, FileNotFoundException, 1063 UnsupportedFileSystemException, IOException { 1064 final Path absF = resolve(fixRelativePart(f)); 1065 getFSofPath(absF).setVerifyChecksum(verifyChecksum); 1066 } 1067 1068 /** 1069 * Return a file status object that represents the path. 1070 * @param f The path we want information from 1071 * 1072 * @return a FileStatus object 1073 * 1074 * @throws AccessControlException If access is denied 1075 * @throws FileNotFoundException If <code>f</code> does not exist 1076 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1077 * not supported 1078 * @throws IOException If an I/O error occurred 1079 * 1080 * Exceptions applicable to file systems accessed over RPC: 1081 * @throws RpcClientException If an exception occurred in the RPC client 1082 * @throws RpcServerException If an exception occurred in the RPC server 1083 * @throws UnexpectedServerException If server implementation throws 1084 * undeclared exception to RPC server 1085 */ 1086 public FileStatus getFileStatus(final Path f) throws AccessControlException, 1087 FileNotFoundException, UnsupportedFileSystemException, IOException { 1088 final Path absF = fixRelativePart(f); 1089 return new FSLinkResolver<FileStatus>() { 1090 public FileStatus next(final AbstractFileSystem fs, final Path p) 1091 throws IOException, UnresolvedLinkException { 1092 return fs.getFileStatus(p); 1093 } 1094 }.resolve(this, absF); 1095 } 1096 1097 /** 1098 * Return a fully qualified version of the given symlink target if it 1099 * has no scheme and authority. Partially and fully qualified paths 1100 * are returned unmodified. 1101 * @param pathFS The AbstractFileSystem of the path 1102 * @param pathWithLink Path that contains the symlink 1103 * @param target The symlink's absolute target 1104 * @return Fully qualified version of the target. 1105 */ 1106 private Path qualifySymlinkTarget(final AbstractFileSystem pathFS, 1107 Path pathWithLink, Path target) { 1108 // NB: makeQualified uses the target's scheme and authority, if 1109 // specified, and the scheme and authority of pathFS, if not. 1110 final String scheme = target.toUri().getScheme(); 1111 final String auth = target.toUri().getAuthority(); 1112 return (scheme == null && auth == null) 1113 ? target.makeQualified(pathFS.getUri(), pathWithLink.getParent()) 1114 : target; 1115 } 1116 1117 /** 1118 * Return a file status object that represents the path. If the path 1119 * refers to a symlink then the FileStatus of the symlink is returned. 1120 * The behavior is equivalent to #getFileStatus() if the underlying 1121 * file system does not support symbolic links. 1122 * @param f The path we want information from. 1123 * @return A FileStatus object 1124 * 1125 * @throws AccessControlException If access is denied 1126 * @throws FileNotFoundException If <code>f</code> does not exist 1127 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1128 * not supported 1129 * @throws IOException If an I/O error occurred 1130 */ 1131 public FileStatus getFileLinkStatus(final Path f) 1132 throws AccessControlException, FileNotFoundException, 1133 UnsupportedFileSystemException, IOException { 1134 final Path absF = fixRelativePart(f); 1135 return new FSLinkResolver<FileStatus>() { 1136 public FileStatus next(final AbstractFileSystem fs, final Path p) 1137 throws IOException, UnresolvedLinkException { 1138 FileStatus fi = fs.getFileLinkStatus(p); 1139 if (fi.isSymlink()) { 1140 fi.setSymlink(qualifySymlinkTarget(fs, p, fi.getSymlink())); 1141 } 1142 return fi; 1143 } 1144 }.resolve(this, absF); 1145 } 1146 1147 /** 1148 * Returns the target of the given symbolic link as it was specified 1149 * when the link was created. Links in the path leading up to the 1150 * final path component are resolved transparently. 1151 * 1152 * @param f the path to return the target of 1153 * @return The un-interpreted target of the symbolic link. 1154 * 1155 * @throws AccessControlException If access is denied 1156 * @throws FileNotFoundException If path <code>f</code> does not exist 1157 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1158 * not supported 1159 * @throws IOException If the given path does not refer to a symlink 1160 * or an I/O error occurred 1161 */ 1162 public Path getLinkTarget(final Path f) throws AccessControlException, 1163 FileNotFoundException, UnsupportedFileSystemException, IOException { 1164 final Path absF = fixRelativePart(f); 1165 return new FSLinkResolver<Path>() { 1166 public Path next(final AbstractFileSystem fs, final Path p) 1167 throws IOException, UnresolvedLinkException { 1168 FileStatus fi = fs.getFileLinkStatus(p); 1169 return fi.getSymlink(); 1170 } 1171 }.resolve(this, absF); 1172 } 1173 1174 /** 1175 * Return blockLocation of the given file for the given offset and len. 1176 * For a nonexistent file or regions, null will be returned. 1177 * 1178 * This call is most helpful with DFS, where it returns 1179 * hostnames of machines that contain the given file. 1180 * 1181 * @param f - get blocklocations of this file 1182 * @param start position (byte offset) 1183 * @param len (in bytes) 1184 * 1185 * @return block locations for given file at specified offset of len 1186 * 1187 * @throws AccessControlException If access is denied 1188 * @throws FileNotFoundException If <code>f</code> does not exist 1189 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1190 * not supported 1191 * @throws IOException If an I/O error occurred 1192 * 1193 * Exceptions applicable to file systems accessed over RPC: 1194 * @throws RpcClientException If an exception occurred in the RPC client 1195 * @throws RpcServerException If an exception occurred in the RPC server 1196 * @throws UnexpectedServerException If server implementation throws 1197 * undeclared exception to RPC server 1198 * 1199 * RuntimeExceptions: 1200 * @throws InvalidPathException If path <code>f</code> is invalid 1201 */ 1202 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 1203 @InterfaceStability.Evolving 1204 public BlockLocation[] getFileBlockLocations(final Path f, final long start, 1205 final long len) throws AccessControlException, FileNotFoundException, 1206 UnsupportedFileSystemException, IOException { 1207 final Path absF = fixRelativePart(f); 1208 return new FSLinkResolver<BlockLocation[]>() { 1209 public BlockLocation[] next(final AbstractFileSystem fs, final Path p) 1210 throws IOException, UnresolvedLinkException { 1211 return fs.getFileBlockLocations(p, start, len); 1212 } 1213 }.resolve(this, absF); 1214 } 1215 1216 /** 1217 * Returns a status object describing the use and capacity of the 1218 * file system denoted by the Parh argument p. 1219 * If the file system has multiple partitions, the 1220 * use and capacity of the partition pointed to by the specified 1221 * path is reflected. 1222 * 1223 * @param f Path for which status should be obtained. null means the 1224 * root partition of the default file system. 1225 * 1226 * @return a FsStatus object 1227 * 1228 * @throws AccessControlException If access is denied 1229 * @throws FileNotFoundException If <code>f</code> does not exist 1230 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1231 * not supported 1232 * @throws IOException If an I/O error occurred 1233 * 1234 * Exceptions applicable to file systems accessed over RPC: 1235 * @throws RpcClientException If an exception occurred in the RPC client 1236 * @throws RpcServerException If an exception occurred in the RPC server 1237 * @throws UnexpectedServerException If server implementation throws 1238 * undeclared exception to RPC server 1239 */ 1240 public FsStatus getFsStatus(final Path f) throws AccessControlException, 1241 FileNotFoundException, UnsupportedFileSystemException, IOException { 1242 if (f == null) { 1243 return defaultFS.getFsStatus(); 1244 } 1245 final Path absF = fixRelativePart(f); 1246 return new FSLinkResolver<FsStatus>() { 1247 public FsStatus next(final AbstractFileSystem fs, final Path p) 1248 throws IOException, UnresolvedLinkException { 1249 return fs.getFsStatus(p); 1250 } 1251 }.resolve(this, absF); 1252 } 1253 1254 /** 1255 * Creates a symbolic link to an existing file. An exception is thrown if 1256 * the symlink exits, the user does not have permission to create symlink, 1257 * or the underlying file system does not support symlinks. 1258 * 1259 * Symlink permissions are ignored, access to a symlink is determined by 1260 * the permissions of the symlink target. 1261 * 1262 * Symlinks in paths leading up to the final path component are resolved 1263 * transparently. If the final path component refers to a symlink some 1264 * functions operate on the symlink itself, these are: 1265 * - delete(f) and deleteOnExit(f) - Deletes the symlink. 1266 * - rename(src, dst) - If src refers to a symlink, the symlink is 1267 * renamed. If dst refers to a symlink, the symlink is over-written. 1268 * - getLinkTarget(f) - Returns the target of the symlink. 1269 * - getFileLinkStatus(f) - Returns a FileStatus object describing 1270 * the symlink. 1271 * Some functions, create() and mkdir(), expect the final path component 1272 * does not exist. If they are given a path that refers to a symlink that 1273 * does exist they behave as if the path referred to an existing file or 1274 * directory. All other functions fully resolve, ie follow, the symlink. 1275 * These are: open, setReplication, setOwner, setTimes, setWorkingDirectory, 1276 * setPermission, getFileChecksum, setVerifyChecksum, getFileBlockLocations, 1277 * getFsStatus, getFileStatus, exists, and listStatus. 1278 * 1279 * Symlink targets are stored as given to createSymlink, assuming the 1280 * underlying file system is capable of storing a fully qualified URI. 1281 * Dangling symlinks are permitted. FileContext supports four types of 1282 * symlink targets, and resolves them as follows 1283 * <pre> 1284 * Given a path referring to a symlink of form: 1285 * 1286 * <---X---> 1287 * fs://host/A/B/link 1288 * <-----Y-----> 1289 * 1290 * In this path X is the scheme and authority that identify the file system, 1291 * and Y is the path leading up to the final path component "link". If Y is 1292 * a symlink itself then let Y' be the target of Y and X' be the scheme and 1293 * authority of Y'. Symlink targets may: 1294 * 1295 * 1. Fully qualified URIs 1296 * 1297 * fs://hostX/A/B/file Resolved according to the target file system. 1298 * 1299 * 2. Partially qualified URIs (eg scheme but no host) 1300 * 1301 * fs:///A/B/file Resolved according to the target file sytem. Eg resolving 1302 * a symlink to hdfs:///A results in an exception because 1303 * HDFS URIs must be fully qualified, while a symlink to 1304 * file:///A will not since Hadoop's local file systems 1305 * require partially qualified URIs. 1306 * 1307 * 3. Relative paths 1308 * 1309 * path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path 1310 * is "../B/file" then [Y'][path] is hdfs://host/B/file 1311 * 1312 * 4. Absolute paths 1313 * 1314 * path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path 1315 * is "/file" then [X][path] is hdfs://host/file 1316 * </pre> 1317 * 1318 * @param target the target of the symbolic link 1319 * @param link the path to be created that points to target 1320 * @param createParent if true then missing parent dirs are created if 1321 * false then parent must exist 1322 * 1323 * 1324 * @throws AccessControlException If access is denied 1325 * @throws FileAlreadyExistsException If file <code>linkcode> already exists 1326 * @throws FileNotFoundException If <code>target</code> does not exist 1327 * @throws ParentNotDirectoryException If parent of <code>link</code> is not a 1328 * directory. 1329 * @throws UnsupportedFileSystemException If file system for 1330 * <code>target</code> or <code>link</code> is not supported 1331 * @throws IOException If an I/O error occurred 1332 */ 1333 public void createSymlink(final Path target, final Path link, 1334 final boolean createParent) throws AccessControlException, 1335 FileAlreadyExistsException, FileNotFoundException, 1336 ParentNotDirectoryException, UnsupportedFileSystemException, 1337 IOException { 1338 final Path nonRelLink = fixRelativePart(link); 1339 new FSLinkResolver<Void>() { 1340 public Void next(final AbstractFileSystem fs, final Path p) 1341 throws IOException, UnresolvedLinkException { 1342 fs.createSymlink(target, p, createParent); 1343 return null; 1344 } 1345 }.resolve(this, nonRelLink); 1346 } 1347 1348 /** 1349 * List the statuses of the files/directories in the given path if the path is 1350 * a directory. 1351 * 1352 * @param f is the path 1353 * 1354 * @return an iterator that traverses statuses of the files/directories 1355 * in the given path 1356 * 1357 * @throws AccessControlException If access is denied 1358 * @throws FileNotFoundException If <code>f</code> does not exist 1359 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1360 * not supported 1361 * @throws IOException If an I/O error occurred 1362 * 1363 * Exceptions applicable to file systems accessed over RPC: 1364 * @throws RpcClientException If an exception occurred in the RPC client 1365 * @throws RpcServerException If an exception occurred in the RPC server 1366 * @throws UnexpectedServerException If server implementation throws 1367 * undeclared exception to RPC server 1368 */ 1369 public RemoteIterator<FileStatus> listStatus(final Path f) throws 1370 AccessControlException, FileNotFoundException, 1371 UnsupportedFileSystemException, IOException { 1372 final Path absF = fixRelativePart(f); 1373 return new FSLinkResolver<RemoteIterator<FileStatus>>() { 1374 public RemoteIterator<FileStatus> next( 1375 final AbstractFileSystem fs, final Path p) 1376 throws IOException, UnresolvedLinkException { 1377 return fs.listStatusIterator(p); 1378 } 1379 }.resolve(this, absF); 1380 } 1381 1382 /** 1383 * @return an iterator over the corrupt files under the given path 1384 * (may contain duplicates if a file has more than one corrupt block) 1385 * @throws IOException 1386 */ 1387 public RemoteIterator<Path> listCorruptFileBlocks(Path path) 1388 throws IOException { 1389 final Path absF = fixRelativePart(path); 1390 return new FSLinkResolver<RemoteIterator<Path>>() { 1391 @Override 1392 public RemoteIterator<Path> next(final AbstractFileSystem fs, 1393 final Path p) 1394 throws IOException, UnresolvedLinkException { 1395 return fs.listCorruptFileBlocks(p); 1396 } 1397 }.resolve(this, absF); 1398 } 1399 1400 /** 1401 * List the statuses of the files/directories in the given path if the path is 1402 * a directory. 1403 * Return the file's status and block locations If the path is a file. 1404 * 1405 * If a returned status is a file, it contains the file's block locations. 1406 * 1407 * @param f is the path 1408 * 1409 * @return an iterator that traverses statuses of the files/directories 1410 * in the given path 1411 * If any IO exception (for example the input directory gets deleted while 1412 * listing is being executed), next() or hasNext() of the returned iterator 1413 * may throw a RuntimeException with the io exception as the cause. 1414 * 1415 * @throws AccessControlException If access is denied 1416 * @throws FileNotFoundException If <code>f</code> does not exist 1417 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1418 * not supported 1419 * @throws IOException If an I/O error occurred 1420 * 1421 * Exceptions applicable to file systems accessed over RPC: 1422 * @throws RpcClientException If an exception occurred in the RPC client 1423 * @throws RpcServerException If an exception occurred in the RPC server 1424 * @throws UnexpectedServerException If server implementation throws 1425 * undeclared exception to RPC server 1426 */ 1427 public RemoteIterator<LocatedFileStatus> listLocatedStatus( 1428 final Path f) throws 1429 AccessControlException, FileNotFoundException, 1430 UnsupportedFileSystemException, IOException { 1431 final Path absF = fixRelativePart(f); 1432 return new FSLinkResolver<RemoteIterator<LocatedFileStatus>>() { 1433 public RemoteIterator<LocatedFileStatus> next( 1434 final AbstractFileSystem fs, final Path p) 1435 throws IOException, UnresolvedLinkException { 1436 return fs.listLocatedStatus(p); 1437 } 1438 }.resolve(this, absF); 1439 } 1440 1441 /** 1442 * Mark a path to be deleted on JVM shutdown. 1443 * 1444 * @param f the existing path to delete. 1445 * 1446 * @return true if deleteOnExit is successful, otherwise false. 1447 * 1448 * @throws AccessControlException If access is denied 1449 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1450 * not supported 1451 * @throws IOException If an I/O error occurred 1452 * 1453 * Exceptions applicable to file systems accessed over RPC: 1454 * @throws RpcClientException If an exception occurred in the RPC client 1455 * @throws RpcServerException If an exception occurred in the RPC server 1456 * @throws UnexpectedServerException If server implementation throws 1457 * undeclared exception to RPC server 1458 */ 1459 public boolean deleteOnExit(Path f) throws AccessControlException, 1460 IOException { 1461 if (!this.util().exists(f)) { 1462 return false; 1463 } 1464 synchronized (DELETE_ON_EXIT) { 1465 if (DELETE_ON_EXIT.isEmpty()) { 1466 ShutdownHookManager.get().addShutdownHook(FINALIZER, SHUTDOWN_HOOK_PRIORITY); 1467 } 1468 1469 Set<Path> set = DELETE_ON_EXIT.get(this); 1470 if (set == null) { 1471 set = new TreeSet<Path>(); 1472 DELETE_ON_EXIT.put(this, set); 1473 } 1474 set.add(f); 1475 } 1476 return true; 1477 } 1478 1479 private final Util util; 1480 public Util util() { 1481 return util; 1482 } 1483 1484 1485 /** 1486 * Utility/library methods built over the basic FileContext methods. 1487 * Since this are library functions, the oprtation are not atomic 1488 * and some of them may partially complete if other threads are making 1489 * changes to the same part of the name space. 1490 */ 1491 public class Util { 1492 /** 1493 * Does the file exist? 1494 * Note: Avoid using this method if you already have FileStatus in hand. 1495 * Instead reuse the FileStatus 1496 * @param f the file or dir to be checked 1497 * 1498 * @throws AccessControlException If access is denied 1499 * @throws IOException If an I/O error occurred 1500 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1501 * not supported 1502 * 1503 * Exceptions applicable to file systems accessed over RPC: 1504 * @throws RpcClientException If an exception occurred in the RPC client 1505 * @throws RpcServerException If an exception occurred in the RPC server 1506 * @throws UnexpectedServerException If server implementation throws 1507 * undeclared exception to RPC server 1508 */ 1509 public boolean exists(final Path f) throws AccessControlException, 1510 UnsupportedFileSystemException, IOException { 1511 try { 1512 FileStatus fs = FileContext.this.getFileStatus(f); 1513 assert fs != null; 1514 return true; 1515 } catch (FileNotFoundException e) { 1516 return false; 1517 } 1518 } 1519 1520 /** 1521 * Return a list of file status objects that corresponds to supplied paths 1522 * excluding those non-existent paths. 1523 * 1524 * @param paths list of paths we want information from 1525 * 1526 * @return a list of FileStatus objects 1527 * 1528 * @throws AccessControlException If access is denied 1529 * @throws IOException If an I/O error occurred 1530 * 1531 * Exceptions applicable to file systems accessed over RPC: 1532 * @throws RpcClientException If an exception occurred in the RPC client 1533 * @throws RpcServerException If an exception occurred in the RPC server 1534 * @throws UnexpectedServerException If server implementation throws 1535 * undeclared exception to RPC server 1536 */ 1537 private FileStatus[] getFileStatus(Path[] paths) 1538 throws AccessControlException, IOException { 1539 if (paths == null) { 1540 return null; 1541 } 1542 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length); 1543 for (int i = 0; i < paths.length; i++) { 1544 try { 1545 results.add(FileContext.this.getFileStatus(paths[i])); 1546 } catch (FileNotFoundException fnfe) { 1547 // ignoring 1548 } 1549 } 1550 return results.toArray(new FileStatus[results.size()]); 1551 } 1552 1553 1554 /** 1555 * Return the {@link ContentSummary} of path f. 1556 * @param f path 1557 * 1558 * @return the {@link ContentSummary} of path f. 1559 * 1560 * @throws AccessControlException If access is denied 1561 * @throws FileNotFoundException If <code>f</code> does not exist 1562 * @throws UnsupportedFileSystemException If file system for 1563 * <code>f</code> is not supported 1564 * @throws IOException If an I/O error occurred 1565 * 1566 * Exceptions applicable to file systems accessed over RPC: 1567 * @throws RpcClientException If an exception occurred in the RPC client 1568 * @throws RpcServerException If an exception occurred in the RPC server 1569 * @throws UnexpectedServerException If server implementation throws 1570 * undeclared exception to RPC server 1571 */ 1572 public ContentSummary getContentSummary(Path f) 1573 throws AccessControlException, FileNotFoundException, 1574 UnsupportedFileSystemException, IOException { 1575 FileStatus status = FileContext.this.getFileStatus(f); 1576 if (status.isFile()) { 1577 return new ContentSummary(status.getLen(), 1, 0); 1578 } 1579 long[] summary = {0, 0, 1}; 1580 RemoteIterator<FileStatus> statusIterator = 1581 FileContext.this.listStatus(f); 1582 while(statusIterator.hasNext()) { 1583 FileStatus s = statusIterator.next(); 1584 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : 1585 new ContentSummary(s.getLen(), 1, 0); 1586 summary[0] += c.getLength(); 1587 summary[1] += c.getFileCount(); 1588 summary[2] += c.getDirectoryCount(); 1589 } 1590 return new ContentSummary(summary[0], summary[1], summary[2]); 1591 } 1592 1593 /** 1594 * See {@link #listStatus(Path[], PathFilter)} 1595 */ 1596 public FileStatus[] listStatus(Path[] files) throws AccessControlException, 1597 FileNotFoundException, IOException { 1598 return listStatus(files, DEFAULT_FILTER); 1599 } 1600 1601 /** 1602 * Filter files/directories in the given path using the user-supplied path 1603 * filter. 1604 * 1605 * @param f is the path name 1606 * @param filter is the user-supplied path filter 1607 * 1608 * @return an array of FileStatus objects for the files under the given path 1609 * after applying the filter 1610 * 1611 * @throws AccessControlException If access is denied 1612 * @throws FileNotFoundException If <code>f</code> does not exist 1613 * @throws UnsupportedFileSystemException If file system for 1614 * <code>pathPattern</code> is not supported 1615 * @throws IOException If an I/O error occurred 1616 * 1617 * Exceptions applicable to file systems accessed over RPC: 1618 * @throws RpcClientException If an exception occurred in the RPC client 1619 * @throws RpcServerException If an exception occurred in the RPC server 1620 * @throws UnexpectedServerException If server implementation throws 1621 * undeclared exception to RPC server 1622 */ 1623 public FileStatus[] listStatus(Path f, PathFilter filter) 1624 throws AccessControlException, FileNotFoundException, 1625 UnsupportedFileSystemException, IOException { 1626 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1627 listStatus(results, f, filter); 1628 return results.toArray(new FileStatus[results.size()]); 1629 } 1630 1631 /** 1632 * Filter files/directories in the given list of paths using user-supplied 1633 * path filter. 1634 * 1635 * @param files is a list of paths 1636 * @param filter is the filter 1637 * 1638 * @return a list of statuses for the files under the given paths after 1639 * applying the filter 1640 * 1641 * @throws AccessControlException If access is denied 1642 * @throws FileNotFoundException If a file in <code>files</code> does not 1643 * exist 1644 * @throws IOException If an I/O error occurred 1645 * 1646 * Exceptions applicable to file systems accessed over RPC: 1647 * @throws RpcClientException If an exception occurred in the RPC client 1648 * @throws RpcServerException If an exception occurred in the RPC server 1649 * @throws UnexpectedServerException If server implementation throws 1650 * undeclared exception to RPC server 1651 */ 1652 public FileStatus[] listStatus(Path[] files, PathFilter filter) 1653 throws AccessControlException, FileNotFoundException, IOException { 1654 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1655 for (int i = 0; i < files.length; i++) { 1656 listStatus(results, files[i], filter); 1657 } 1658 return results.toArray(new FileStatus[results.size()]); 1659 } 1660 1661 /* 1662 * Filter files/directories in the given path using the user-supplied path 1663 * filter. Results are added to the given array <code>results</code>. 1664 */ 1665 private void listStatus(ArrayList<FileStatus> results, Path f, 1666 PathFilter filter) throws AccessControlException, 1667 FileNotFoundException, IOException { 1668 FileStatus[] listing = listStatus(f); 1669 if (listing != null) { 1670 for (int i = 0; i < listing.length; i++) { 1671 if (filter.accept(listing[i].getPath())) { 1672 results.add(listing[i]); 1673 } 1674 } 1675 } 1676 } 1677 1678 /** 1679 * List the statuses of the files/directories in the given path 1680 * if the path is a directory. 1681 * 1682 * @param f is the path 1683 * 1684 * @return an array that contains statuses of the files/directories 1685 * in the given path 1686 * 1687 * @throws AccessControlException If access is denied 1688 * @throws FileNotFoundException If <code>f</code> does not exist 1689 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1690 * not supported 1691 * @throws IOException If an I/O error occurred 1692 * 1693 * Exceptions applicable to file systems accessed over RPC: 1694 * @throws RpcClientException If an exception occurred in the RPC client 1695 * @throws RpcServerException If an exception occurred in the RPC server 1696 * @throws UnexpectedServerException If server implementation throws 1697 * undeclared exception to RPC server 1698 */ 1699 public FileStatus[] listStatus(final Path f) throws AccessControlException, 1700 FileNotFoundException, UnsupportedFileSystemException, 1701 IOException { 1702 final Path absF = fixRelativePart(f); 1703 return new FSLinkResolver<FileStatus[]>() { 1704 public FileStatus[] next(final AbstractFileSystem fs, final Path p) 1705 throws IOException, UnresolvedLinkException { 1706 return fs.listStatus(p); 1707 } 1708 }.resolve(FileContext.this, absF); 1709 } 1710 1711 /** 1712 * List the statuses and block locations of the files in the given path. 1713 * 1714 * If the path is a directory, 1715 * if recursive is false, returns files in the directory; 1716 * if recursive is true, return files in the subtree rooted at the path. 1717 * The subtree is traversed in the depth-first order. 1718 * If the path is a file, return the file's status and block locations. 1719 * Files across symbolic links are also returned. 1720 * 1721 * @param f is the path 1722 * @param recursive if the subdirectories need to be traversed recursively 1723 * 1724 * @return an iterator that traverses statuses of the files 1725 * If any IO exception (for example a sub-directory gets deleted while 1726 * listing is being executed), next() or hasNext() of the returned iterator 1727 * may throw a RuntimeException with the IO exception as the cause. 1728 * 1729 * @throws AccessControlException If access is denied 1730 * @throws FileNotFoundException If <code>f</code> does not exist 1731 * @throws UnsupportedFileSystemException If file system for <code>f</code> 1732 * is not supported 1733 * @throws IOException If an I/O error occurred 1734 * 1735 * Exceptions applicable to file systems accessed over RPC: 1736 * @throws RpcClientException If an exception occurred in the RPC client 1737 * @throws RpcServerException If an exception occurred in the RPC server 1738 * @throws UnexpectedServerException If server implementation throws 1739 * undeclared exception to RPC server 1740 */ 1741 public RemoteIterator<LocatedFileStatus> listFiles( 1742 final Path f, final boolean recursive) throws AccessControlException, 1743 FileNotFoundException, UnsupportedFileSystemException, 1744 IOException { 1745 return new RemoteIterator<LocatedFileStatus>() { 1746 private Stack<RemoteIterator<LocatedFileStatus>> itors = 1747 new Stack<RemoteIterator<LocatedFileStatus>>(); 1748 RemoteIterator<LocatedFileStatus> curItor = listLocatedStatus(f); 1749 LocatedFileStatus curFile; 1750 1751 /** 1752 * Returns <tt>true</tt> if the iterator has more files. 1753 * 1754 * @return <tt>true</tt> if the iterator has more files. 1755 * @throws AccessControlException if not allowed to access next 1756 * file's status or locations 1757 * @throws FileNotFoundException if next file does not exist any more 1758 * @throws UnsupportedFileSystemException if next file's 1759 * fs is unsupported 1760 * @throws IOException for all other IO errors 1761 * for example, NameNode is not avaialbe or 1762 * NameNode throws IOException due to an error 1763 * while getting the status or block locations 1764 */ 1765 @Override 1766 public boolean hasNext() throws IOException { 1767 while (curFile == null) { 1768 if (curItor.hasNext()) { 1769 handleFileStat(curItor.next()); 1770 } else if (!itors.empty()) { 1771 curItor = itors.pop(); 1772 } else { 1773 return false; 1774 } 1775 } 1776 return true; 1777 } 1778 1779 /** 1780 * Process the input stat. 1781 * If it is a file, return the file stat. 1782 * If it is a directory, traverse the directory if recursive is true; 1783 * ignore it if recursive is false. 1784 * If it is a symlink, resolve the symlink first and then process it 1785 * depending on if it is a file or directory. 1786 * @param stat input status 1787 * @throws AccessControlException if access is denied 1788 * @throws FileNotFoundException if file is not found 1789 * @throws UnsupportedFileSystemException if fs is not supported 1790 * @throws IOException for all other IO errors 1791 */ 1792 private void handleFileStat(LocatedFileStatus stat) 1793 throws IOException { 1794 if (stat.isFile()) { // file 1795 curFile = stat; 1796 } else if (stat.isSymlink()) { // symbolic link 1797 // resolve symbolic link 1798 FileStatus symstat = FileContext.this.getFileStatus( 1799 stat.getSymlink()); 1800 if (symstat.isFile() || (recursive && symstat.isDirectory())) { 1801 itors.push(curItor); 1802 curItor = listLocatedStatus(stat.getPath()); 1803 } 1804 } else if (recursive) { // directory 1805 itors.push(curItor); 1806 curItor = listLocatedStatus(stat.getPath()); 1807 } 1808 } 1809 1810 /** 1811 * Returns the next file's status with its block locations 1812 * 1813 * @throws AccessControlException if not allowed to access next 1814 * file's status or locations 1815 * @throws FileNotFoundException if next file does not exist any more 1816 * @throws UnsupportedFileSystemException if next file's 1817 * fs is unsupported 1818 * @throws IOException for all other IO errors 1819 * for example, NameNode is not avaialbe or 1820 * NameNode throws IOException due to an error 1821 * while getting the status or block locations 1822 */ 1823 @Override 1824 public LocatedFileStatus next() throws IOException { 1825 if (hasNext()) { 1826 LocatedFileStatus result = curFile; 1827 curFile = null; 1828 return result; 1829 } 1830 throw new java.util.NoSuchElementException("No more entry in " + f); 1831 } 1832 }; 1833 } 1834 1835 /** 1836 * <p>Return all the files that match filePattern and are not checksum 1837 * files. Results are sorted by their names. 1838 * 1839 * <p> 1840 * A filename pattern is composed of <i>regular</i> characters and 1841 * <i>special pattern matching</i> characters, which are: 1842 * 1843 * <dl> 1844 * <dd> 1845 * <dl> 1846 * <p> 1847 * <dt> <tt> ? </tt> 1848 * <dd> Matches any single character. 1849 * 1850 * <p> 1851 * <dt> <tt> * </tt> 1852 * <dd> Matches zero or more characters. 1853 * 1854 * <p> 1855 * <dt> <tt> [<i>abc</i>] </tt> 1856 * <dd> Matches a single character from character set 1857 * <tt>{<i>a,b,c</i>}</tt>. 1858 * 1859 * <p> 1860 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt> 1861 * <dd> Matches a single character from the character range 1862 * <tt>{<i>a...b</i>}</tt>. Note: character <tt><i>a</i></tt> must be 1863 * lexicographically less than or equal to character <tt><i>b</i></tt>. 1864 * 1865 * <p> 1866 * <dt> <tt> [^<i>a</i>] </tt> 1867 * <dd> Matches a single char that is not from character set or range 1868 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur 1869 * immediately to the right of the opening bracket. 1870 * 1871 * <p> 1872 * <dt> <tt> \<i>c</i> </tt> 1873 * <dd> Removes (escapes) any special meaning of character <i>c</i>. 1874 * 1875 * <p> 1876 * <dt> <tt> {ab,cd} </tt> 1877 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> 1878 * 1879 * <p> 1880 * <dt> <tt> {ab,c{de,fh}} </tt> 1881 * <dd> Matches a string from string set <tt>{<i>ab, cde, cfh</i>}</tt> 1882 * 1883 * </dl> 1884 * </dd> 1885 * </dl> 1886 * 1887 * @param pathPattern a regular expression specifying a pth pattern 1888 * 1889 * @return an array of paths that match the path pattern 1890 * 1891 * @throws AccessControlException If access is denied 1892 * @throws UnsupportedFileSystemException If file system for 1893 * <code>pathPattern</code> is not supported 1894 * @throws IOException If an I/O error occurred 1895 * 1896 * Exceptions applicable to file systems accessed over RPC: 1897 * @throws RpcClientException If an exception occurred in the RPC client 1898 * @throws RpcServerException If an exception occurred in the RPC server 1899 * @throws UnexpectedServerException If server implementation throws 1900 * undeclared exception to RPC server 1901 */ 1902 public FileStatus[] globStatus(Path pathPattern) 1903 throws AccessControlException, UnsupportedFileSystemException, 1904 IOException { 1905 return globStatus(pathPattern, DEFAULT_FILTER); 1906 } 1907 1908 /** 1909 * Return an array of FileStatus objects whose path names match pathPattern 1910 * and is accepted by the user-supplied path filter. Results are sorted by 1911 * their path names. 1912 * Return null if pathPattern has no glob and the path does not exist. 1913 * Return an empty array if pathPattern has a glob and no path matches it. 1914 * 1915 * @param pathPattern regular expression specifying the path pattern 1916 * @param filter user-supplied path filter 1917 * 1918 * @return an array of FileStatus objects 1919 * 1920 * @throws AccessControlException If access is denied 1921 * @throws UnsupportedFileSystemException If file system for 1922 * <code>pathPattern</code> is not supported 1923 * @throws IOException If an I/O error occurred 1924 * 1925 * Exceptions applicable to file systems accessed over RPC: 1926 * @throws RpcClientException If an exception occurred in the RPC client 1927 * @throws RpcServerException If an exception occurred in the RPC server 1928 * @throws UnexpectedServerException If server implementation throws 1929 * undeclared exception to RPC server 1930 */ 1931 public FileStatus[] globStatus(final Path pathPattern, 1932 final PathFilter filter) throws AccessControlException, 1933 UnsupportedFileSystemException, IOException { 1934 URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri(); 1935 1936 String filename = pathPattern.toUri().getPath(); 1937 1938 List<String> filePatterns = GlobExpander.expand(filename); 1939 if (filePatterns.size() == 1) { 1940 Path absPathPattern = fixRelativePart(pathPattern); 1941 return globStatusInternal(uri, new Path(absPathPattern.toUri() 1942 .getPath()), filter); 1943 } else { 1944 List<FileStatus> results = new ArrayList<FileStatus>(); 1945 for (String iFilePattern : filePatterns) { 1946 Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern)); 1947 FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter); 1948 for (FileStatus file : files) { 1949 results.add(file); 1950 } 1951 } 1952 return results.toArray(new FileStatus[results.size()]); 1953 } 1954 } 1955 1956 /** 1957 * 1958 * @param uri for all the inPathPattern 1959 * @param inPathPattern - without the scheme & authority (take from uri) 1960 * @param filter 1961 * 1962 * @return an array of FileStatus objects 1963 * 1964 * @throws AccessControlException If access is denied 1965 * @throws IOException If an I/O error occurred 1966 */ 1967 private FileStatus[] globStatusInternal(final URI uri, 1968 final Path inPathPattern, final PathFilter filter) 1969 throws AccessControlException, IOException 1970 { 1971 Path[] parents = new Path[1]; 1972 int level = 0; 1973 1974 assert(inPathPattern.toUri().getScheme() == null && 1975 inPathPattern.toUri().getAuthority() == null && 1976 inPathPattern.isUriPathAbsolute()); 1977 1978 1979 String filename = inPathPattern.toUri().getPath(); 1980 1981 // path has only zero component 1982 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) { 1983 Path p = inPathPattern.makeQualified(uri, null); 1984 return getFileStatus(new Path[]{p}); 1985 } 1986 1987 // path has at least one component 1988 String[] components = filename.split(Path.SEPARATOR); 1989 1990 // Path is absolute, first component is "/" hence first component 1991 // is the uri root 1992 parents[0] = new Path(new Path(uri), new Path("/")); 1993 level = 1; 1994 1995 // glob the paths that match the parent path, ie. [0, components.length-1] 1996 boolean[] hasGlob = new boolean[]{false}; 1997 Path[] relParentPaths = 1998 globPathsLevel(parents, components, level, hasGlob); 1999 FileStatus[] results; 2000 2001 if (relParentPaths == null || relParentPaths.length == 0) { 2002 results = null; 2003 } else { 2004 // fix the pathes to be abs 2005 Path[] parentPaths = new Path [relParentPaths.length]; 2006 for(int i=0; i<relParentPaths.length; i++) { 2007 parentPaths[i] = relParentPaths[i].makeQualified(uri, null); 2008 } 2009 2010 // Now work on the last component of the path 2011 GlobFilter fp = 2012 new GlobFilter(components[components.length - 1], filter); 2013 if (fp.hasPattern()) { // last component has a pattern 2014 // list parent directories and then glob the results 2015 results = listStatus(parentPaths, fp); 2016 hasGlob[0] = true; 2017 } else { // last component does not have a pattern 2018 // get all the path names 2019 ArrayList<Path> filteredPaths = 2020 new ArrayList<Path>(parentPaths.length); 2021 for (int i = 0; i < parentPaths.length; i++) { 2022 parentPaths[i] = new Path(parentPaths[i], 2023 components[components.length - 1]); 2024 if (fp.accept(parentPaths[i])) { 2025 filteredPaths.add(parentPaths[i]); 2026 } 2027 } 2028 // get all their statuses 2029 results = getFileStatus( 2030 filteredPaths.toArray(new Path[filteredPaths.size()])); 2031 } 2032 } 2033 2034 // Decide if the pathPattern contains a glob or not 2035 if (results == null) { 2036 if (hasGlob[0]) { 2037 results = new FileStatus[0]; 2038 } 2039 } else { 2040 if (results.length == 0) { 2041 if (!hasGlob[0]) { 2042 results = null; 2043 } 2044 } else { 2045 Arrays.sort(results); 2046 } 2047 } 2048 return results; 2049 } 2050 2051 /* 2052 * For a path of N components, return a list of paths that match the 2053 * components [<code>level</code>, <code>N-1</code>]. 2054 */ 2055 private Path[] globPathsLevel(Path[] parents, String[] filePattern, 2056 int level, boolean[] hasGlob) throws AccessControlException, 2057 FileNotFoundException, IOException { 2058 if (level == filePattern.length - 1) { 2059 return parents; 2060 } 2061 if (parents == null || parents.length == 0) { 2062 return null; 2063 } 2064 GlobFilter fp = new GlobFilter(filePattern[level]); 2065 if (fp.hasPattern()) { 2066 parents = FileUtil.stat2Paths(listStatus(parents, fp)); 2067 hasGlob[0] = true; 2068 } else { 2069 for (int i = 0; i < parents.length; i++) { 2070 parents[i] = new Path(parents[i], filePattern[level]); 2071 } 2072 } 2073 return globPathsLevel(parents, filePattern, level + 1, hasGlob); 2074 } 2075 2076 /** 2077 * Copy file from src to dest. See 2078 * {@link #copy(Path, Path, boolean, boolean)} 2079 */ 2080 public boolean copy(final Path src, final Path dst) 2081 throws AccessControlException, FileAlreadyExistsException, 2082 FileNotFoundException, ParentNotDirectoryException, 2083 UnsupportedFileSystemException, IOException { 2084 return copy(src, dst, false, false); 2085 } 2086 2087 /** 2088 * Copy from src to dst, optionally deleting src and overwriting dst. 2089 * @param src 2090 * @param dst 2091 * @param deleteSource - delete src if true 2092 * @param overwrite overwrite dst if true; throw IOException if dst exists 2093 * and overwrite is false. 2094 * 2095 * @return true if copy is successful 2096 * 2097 * @throws AccessControlException If access is denied 2098 * @throws FileAlreadyExistsException If <code>dst</code> already exists 2099 * @throws FileNotFoundException If <code>src</code> does not exist 2100 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not 2101 * a directory 2102 * @throws UnsupportedFileSystemException If file system for 2103 * <code>src</code> or <code>dst</code> is not supported 2104 * @throws IOException If an I/O error occurred 2105 * 2106 * Exceptions applicable to file systems accessed over RPC: 2107 * @throws RpcClientException If an exception occurred in the RPC client 2108 * @throws RpcServerException If an exception occurred in the RPC server 2109 * @throws UnexpectedServerException If server implementation throws 2110 * undeclared exception to RPC server 2111 * 2112 * RuntimeExceptions: 2113 * @throws InvalidPathException If path <code>dst</code> is invalid 2114 */ 2115 public boolean copy(final Path src, final Path dst, boolean deleteSource, 2116 boolean overwrite) throws AccessControlException, 2117 FileAlreadyExistsException, FileNotFoundException, 2118 ParentNotDirectoryException, UnsupportedFileSystemException, 2119 IOException { 2120 checkNotSchemeWithRelative(src); 2121 checkNotSchemeWithRelative(dst); 2122 Path qSrc = makeQualified(src); 2123 Path qDst = makeQualified(dst); 2124 checkDest(qSrc.getName(), qDst, overwrite); 2125 FileStatus fs = FileContext.this.getFileStatus(qSrc); 2126 if (fs.isDirectory()) { 2127 checkDependencies(qSrc, qDst); 2128 mkdir(qDst, FsPermission.getDefault(), true); 2129 FileStatus[] contents = listStatus(qSrc); 2130 for (FileStatus content : contents) { 2131 copy(makeQualified(content.getPath()), makeQualified(new Path(qDst, 2132 content.getPath().getName())), deleteSource, overwrite); 2133 } 2134 } else { 2135 InputStream in=null; 2136 OutputStream out = null; 2137 try { 2138 in = open(qSrc); 2139 EnumSet<CreateFlag> createFlag = overwrite ? EnumSet.of( 2140 CreateFlag.CREATE, CreateFlag.OVERWRITE) : 2141 EnumSet.of(CreateFlag.CREATE); 2142 out = create(qDst, createFlag); 2143 IOUtils.copyBytes(in, out, conf, true); 2144 } catch (IOException e) { 2145 IOUtils.closeStream(out); 2146 IOUtils.closeStream(in); 2147 throw e; 2148 } 2149 } 2150 if (deleteSource) { 2151 return delete(qSrc, true); 2152 } else { 2153 return true; 2154 } 2155 } 2156 } 2157 2158 /** 2159 * Check if copying srcName to dst would overwrite an existing 2160 * file or directory. 2161 * @param srcName File or directory to be copied. 2162 * @param dst Destination to copy srcName to. 2163 * @param overwrite Whether it's ok to overwrite an existing file. 2164 * @throws AccessControlException If access is denied. 2165 * @throws IOException If dst is an existing directory, or dst is an 2166 * existing file and the overwrite option is not passed. 2167 */ 2168 private void checkDest(String srcName, Path dst, boolean overwrite) 2169 throws AccessControlException, IOException { 2170 try { 2171 FileStatus dstFs = getFileStatus(dst); 2172 if (dstFs.isDirectory()) { 2173 if (null == srcName) { 2174 throw new IOException("Target " + dst + " is a directory"); 2175 } 2176 // Recurse to check if dst/srcName exists. 2177 checkDest(null, new Path(dst, srcName), overwrite); 2178 } else if (!overwrite) { 2179 throw new IOException("Target " + new Path(dst, srcName) 2180 + " already exists"); 2181 } 2182 } catch (FileNotFoundException e) { 2183 // dst does not exist - OK to copy. 2184 } 2185 } 2186 2187 // 2188 // If the destination is a subdirectory of the source, then 2189 // generate exception 2190 // 2191 private static void checkDependencies(Path qualSrc, Path qualDst) 2192 throws IOException { 2193 if (isSameFS(qualSrc, qualDst)) { 2194 String srcq = qualSrc.toString() + Path.SEPARATOR; 2195 String dstq = qualDst.toString() + Path.SEPARATOR; 2196 if (dstq.startsWith(srcq)) { 2197 if (srcq.length() == dstq.length()) { 2198 throw new IOException("Cannot copy " + qualSrc + " to itself."); 2199 } else { 2200 throw new IOException("Cannot copy " + qualSrc + 2201 " to its subdirectory " + qualDst); 2202 } 2203 } 2204 } 2205 } 2206 2207 /** 2208 * Are qualSrc and qualDst of the same file system? 2209 * @param qualPath1 - fully qualified path 2210 * @param qualPath2 - fully qualified path 2211 * @return 2212 */ 2213 private static boolean isSameFS(Path qualPath1, Path qualPath2) { 2214 URI srcUri = qualPath1.toUri(); 2215 URI dstUri = qualPath2.toUri(); 2216 return (srcUri.getScheme().equals(dstUri.getScheme()) && 2217 !(srcUri.getAuthority() != null && dstUri.getAuthority() != null && srcUri 2218 .getAuthority().equals(dstUri.getAuthority()))); 2219 } 2220 2221 /** 2222 * Deletes all the paths in deleteOnExit on JVM shutdown. 2223 */ 2224 static class FileContextFinalizer implements Runnable { 2225 public synchronized void run() { 2226 processDeleteOnExit(); 2227 } 2228 } 2229 2230 /** 2231 * Resolves all symbolic links in the specified path. 2232 * Returns the new path object. 2233 */ 2234 protected Path resolve(final Path f) throws FileNotFoundException, 2235 UnresolvedLinkException, AccessControlException, IOException { 2236 return new FSLinkResolver<Path>() { 2237 public Path next(final AbstractFileSystem fs, final Path p) 2238 throws IOException, UnresolvedLinkException { 2239 return fs.resolvePath(p); 2240 } 2241 }.resolve(this, f); 2242 } 2243 2244 /** 2245 * Resolves all symbolic links in the specified path leading up 2246 * to, but not including the final path component. 2247 * @param f path to resolve 2248 * @return the new path object. 2249 */ 2250 protected Path resolveIntermediate(final Path f) throws IOException { 2251 return new FSLinkResolver<FileStatus>() { 2252 public FileStatus next(final AbstractFileSystem fs, final Path p) 2253 throws IOException, UnresolvedLinkException { 2254 return fs.getFileLinkStatus(p); 2255 } 2256 }.resolve(this, f).getPath(); 2257 } 2258 2259 /** 2260 * Returns the list of AbstractFileSystems accessed in the path. The list may 2261 * contain more than one AbstractFileSystems objects in case of symlinks. 2262 * 2263 * @param f 2264 * Path which needs to be resolved 2265 * @return List of AbstractFileSystems accessed in the path 2266 * @throws IOException 2267 */ 2268 Set<AbstractFileSystem> resolveAbstractFileSystems(final Path f) 2269 throws IOException { 2270 final Path absF = fixRelativePart(f); 2271 final HashSet<AbstractFileSystem> result 2272 = new HashSet<AbstractFileSystem>(); 2273 new FSLinkResolver<Void>() { 2274 public Void next(final AbstractFileSystem fs, final Path p) 2275 throws IOException, UnresolvedLinkException { 2276 result.add(fs); 2277 fs.getFileStatus(p); 2278 return null; 2279 } 2280 }.resolve(this, absF); 2281 return result; 2282 } 2283 2284 /** 2285 * Class used to perform an operation on and resolve symlinks in a 2286 * path. The operation may potentially span multiple file systems. 2287 */ 2288 protected abstract class FSLinkResolver<T> { 2289 // The maximum number of symbolic link components in a path 2290 private static final int MAX_PATH_LINKS = 32; 2291 2292 /** 2293 * Generic helper function overridden on instantiation to perform a 2294 * specific operation on the given file system using the given path 2295 * which may result in an UnresolvedLinkException. 2296 * @param fs AbstractFileSystem to perform the operation on. 2297 * @param p Path given the file system. 2298 * @return Generic type determined by the specific implementation. 2299 * @throws UnresolvedLinkException If symbolic link <code>path</code> could 2300 * not be resolved 2301 * @throws IOException an I/O error occured 2302 */ 2303 public abstract T next(final AbstractFileSystem fs, final Path p) 2304 throws IOException, UnresolvedLinkException; 2305 2306 /** 2307 * Performs the operation specified by the next function, calling it 2308 * repeatedly until all symlinks in the given path are resolved. 2309 * @param fc FileContext used to access file systems. 2310 * @param p The path to resolve symlinks in. 2311 * @return Generic type determined by the implementation of next. 2312 * @throws IOException 2313 */ 2314 public T resolve(final FileContext fc, Path p) throws IOException { 2315 int count = 0; 2316 T in = null; 2317 Path first = p; 2318 // NB: More than one AbstractFileSystem can match a scheme, eg 2319 // "file" resolves to LocalFs but could have come by RawLocalFs. 2320 AbstractFileSystem fs = fc.getFSofPath(p); 2321 2322 // Loop until all symlinks are resolved or the limit is reached 2323 for (boolean isLink = true; isLink;) { 2324 try { 2325 in = next(fs, p); 2326 isLink = false; 2327 } catch (UnresolvedLinkException e) { 2328 if (count++ > MAX_PATH_LINKS) { 2329 throw new IOException("Possible cyclic loop while " + 2330 "following symbolic link " + first); 2331 } 2332 // Resolve the first unresolved path component 2333 p = qualifySymlinkTarget(fs, p, fs.getLinkTarget(p)); 2334 fs = fc.getFSofPath(p); 2335 } 2336 } 2337 return in; 2338 } 2339 } 2340 2341 /** 2342 * Get the statistics for a particular file system 2343 * 2344 * @param uri 2345 * the uri to lookup the statistics. Only scheme and authority part 2346 * of the uri are used as the key to store and lookup. 2347 * @return a statistics object 2348 */ 2349 public static Statistics getStatistics(URI uri) { 2350 return AbstractFileSystem.getStatistics(uri); 2351 } 2352 2353 /** 2354 * Clears all the statistics stored in AbstractFileSystem, for all the file 2355 * systems. 2356 */ 2357 public static void clearStatistics() { 2358 AbstractFileSystem.clearStatistics(); 2359 } 2360 2361 /** 2362 * Prints the statistics to standard output. File System is identified by the 2363 * scheme and authority. 2364 */ 2365 public static void printStatistics() { 2366 AbstractFileSystem.printStatistics(); 2367 } 2368 2369 /** 2370 * @return Map of uri and statistics for each filesystem instantiated. The uri 2371 * consists of scheme and authority for the filesystem. 2372 */ 2373 public static Map<URI, Statistics> getAllStatistics() { 2374 return AbstractFileSystem.getAllStatistics(); 2375 } 2376 2377 /** 2378 * Get delegation tokens for the file systems accessed for a given 2379 * path. 2380 * @param p Path for which delegations tokens are requested. 2381 * @param renewer the account name that is allowed to renew the token. 2382 * @return List of delegation tokens. 2383 * @throws IOException 2384 */ 2385 @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" }) 2386 public List<Token<?>> getDelegationTokens( 2387 Path p, String renewer) throws IOException { 2388 Set<AbstractFileSystem> afsSet = resolveAbstractFileSystems(p); 2389 List<Token<?>> tokenList = 2390 new ArrayList<Token<?>>(); 2391 for (AbstractFileSystem afs : afsSet) { 2392 List<Token<?>> afsTokens = afs.getDelegationTokens(renewer); 2393 tokenList.addAll(afsTokens); 2394 } 2395 return tokenList; 2396 } 2397 }