001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.fs; 019 020 import java.io.FileNotFoundException; 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.io.OutputStream; 024 import java.net.URI; 025 import java.security.PrivilegedExceptionAction; 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.EnumSet; 029 import java.util.HashSet; 030 import java.util.IdentityHashMap; 031 import java.util.List; 032 import java.util.Map; 033 import java.util.Set; 034 import java.util.Stack; 035 import java.util.TreeSet; 036 import java.util.Map.Entry; 037 038 import org.apache.commons.logging.Log; 039 import org.apache.commons.logging.LogFactory; 040 import org.apache.hadoop.HadoopIllegalArgumentException; 041 import org.apache.hadoop.classification.InterfaceAudience; 042 import org.apache.hadoop.classification.InterfaceStability; 043 import org.apache.hadoop.conf.Configuration; 044 import org.apache.hadoop.fs.FileSystem.Statistics; 045 import org.apache.hadoop.fs.Options.CreateOpts; 046 import org.apache.hadoop.fs.permission.FsPermission; 047 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 048 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT; 049 import org.apache.hadoop.io.IOUtils; 050 import org.apache.hadoop.ipc.RpcClientException; 051 import org.apache.hadoop.ipc.RpcServerException; 052 import org.apache.hadoop.ipc.UnexpectedServerException; 053 import org.apache.hadoop.fs.InvalidPathException; 054 import org.apache.hadoop.security.AccessControlException; 055 import org.apache.hadoop.security.UserGroupInformation; 056 import org.apache.hadoop.security.token.Token; 057 import org.apache.hadoop.util.ShutdownHookManager; 058 059 /** 060 * The FileContext class provides an interface to the application writer for 061 * using the Hadoop file system. 062 * It provides a set of methods for the usual operation: create, open, 063 * list, etc 064 * 065 * <p> 066 * <b> *** Path Names *** </b> 067 * <p> 068 * 069 * The Hadoop file system supports a URI name space and URI names. 070 * It offers a forest of file systems that can be referenced using fully 071 * qualified URIs. 072 * Two common Hadoop file systems implementations are 073 * <ul> 074 * <li> the local file system: file:///path 075 * <li> the hdfs file system hdfs://nnAddress:nnPort/path 076 * </ul> 077 * 078 * While URI names are very flexible, it requires knowing the name or address 079 * of the server. For convenience one often wants to access the default system 080 * in one's environment without knowing its name/address. This has an 081 * additional benefit that it allows one to change one's default fs 082 * (e.g. admin moves application from cluster1 to cluster2). 083 * <p> 084 * 085 * To facilitate this, Hadoop supports a notion of a default file system. 086 * The user can set his default file system, although this is 087 * typically set up for you in your environment via your default config. 088 * A default file system implies a default scheme and authority; slash-relative 089 * names (such as /for/bar) are resolved relative to that default FS. 090 * Similarly a user can also have working-directory-relative names (i.e. names 091 * not starting with a slash). While the working directory is generally in the 092 * same default FS, the wd can be in a different FS. 093 * <p> 094 * Hence Hadoop path names can be one of: 095 * <ul> 096 * <li> fully qualified URI: scheme://authority/path 097 * <li> slash relative names: /path relative to the default file system 098 * <li> wd-relative names: path relative to the working dir 099 * </ul> 100 * Relative paths with scheme (scheme:foo/bar) are illegal. 101 * 102 * <p> 103 * <b>****The Role of the FileContext and configuration defaults****</b> 104 * <p> 105 * The FileContext provides file namespace context for resolving file names; 106 * it also contains the umask for permissions, In that sense it is like the 107 * per-process file-related state in Unix system. 108 * These two properties 109 * <ul> 110 * <li> default file system i.e your slash) 111 * <li> umask 112 * </ul> 113 * in general, are obtained from the default configuration file 114 * in your environment, (@see {@link Configuration}). 115 * 116 * No other configuration parameters are obtained from the default config as 117 * far as the file context layer is concerned. All file system instances 118 * (i.e. deployments of file systems) have default properties; we call these 119 * server side (SS) defaults. Operation like create allow one to select many 120 * properties: either pass them in as explicit parameters or use 121 * the SS properties. 122 * <p> 123 * The file system related SS defaults are 124 * <ul> 125 * <li> the home directory (default is "/user/userName") 126 * <li> the initial wd (only for local fs) 127 * <li> replication factor 128 * <li> block size 129 * <li> buffer size 130 * <li> encryptDataTransfer 131 * <li> checksum option. (checksumType and bytesPerChecksum) 132 * </ul> 133 * 134 * <p> 135 * <b> *** Usage Model for the FileContext class *** </b> 136 * <p> 137 * Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. 138 * Unspecified values come from core-defaults.xml in the release jar. 139 * <ul> 140 * <li> myFContext = FileContext.getFileContext(); // uses the default config 141 * // which has your default FS 142 * <li> myFContext.create(path, ...); 143 * <li> myFContext.setWorkingDir(path) 144 * <li> myFContext.open (path, ...); 145 * </ul> 146 * Example 2: Get a FileContext with a specific URI as the default FS 147 * <ul> 148 * <li> myFContext = FileContext.getFileContext(URI) 149 * <li> myFContext.create(path, ...); 150 * ... 151 * </ul> 152 * Example 3: FileContext with local file system as the default 153 * <ul> 154 * <li> myFContext = FileContext.getLocalFSFileContext() 155 * <li> myFContext.create(path, ...); 156 * <li> ... 157 * </ul> 158 * Example 4: Use a specific config, ignoring $HADOOP_CONFIG 159 * Generally you should not need use a config unless you are doing 160 * <ul> 161 * <li> configX = someConfigSomeOnePassedToYou. 162 * <li> myFContext = getFileContext(configX); // configX is not changed, 163 * // is passed down 164 * <li> myFContext.create(path, ...); 165 * <li>... 166 * </ul> 167 * 168 */ 169 170 @InterfaceAudience.Public 171 @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */ 172 public final class FileContext { 173 174 public static final Log LOG = LogFactory.getLog(FileContext.class); 175 public static final FsPermission DEFAULT_PERM = FsPermission.getDefault(); 176 177 /** 178 * Priority of the FileContext shutdown hook. 179 */ 180 public static final int SHUTDOWN_HOOK_PRIORITY = 20; 181 182 /** 183 * List of files that should be deleted on JVM shutdown. 184 */ 185 static final Map<FileContext, Set<Path>> DELETE_ON_EXIT = 186 new IdentityHashMap<FileContext, Set<Path>>(); 187 188 /** JVM shutdown hook thread. */ 189 static final FileContextFinalizer FINALIZER = 190 new FileContextFinalizer(); 191 192 private static final PathFilter DEFAULT_FILTER = new PathFilter() { 193 @Override 194 public boolean accept(final Path file) { 195 return true; 196 } 197 }; 198 199 /** 200 * The FileContext is defined by. 201 * 1) defaultFS (slash) 202 * 2) wd 203 * 3) umask 204 */ 205 private final AbstractFileSystem defaultFS; //default FS for this FileContext. 206 private Path workingDir; // Fully qualified 207 private FsPermission umask; 208 private final Configuration conf; 209 private final UserGroupInformation ugi; 210 211 private FileContext(final AbstractFileSystem defFs, 212 final FsPermission theUmask, final Configuration aConf) { 213 defaultFS = defFs; 214 umask = FsPermission.getUMask(aConf); 215 conf = aConf; 216 try { 217 ugi = UserGroupInformation.getCurrentUser(); 218 } catch (IOException e) { 219 LOG.error("Exception in getCurrentUser: ",e); 220 throw new RuntimeException("Failed to get the current user " + 221 "while creating a FileContext", e); 222 } 223 /* 224 * Init the wd. 225 * WorkingDir is implemented at the FileContext layer 226 * NOT at the AbstractFileSystem layer. 227 * If the DefaultFS, such as localFilesystem has a notion of 228 * builtin WD, we use that as the initial WD. 229 * Otherwise the WD is initialized to the home directory. 230 */ 231 workingDir = defaultFS.getInitialWorkingDirectory(); 232 if (workingDir == null) { 233 workingDir = defaultFS.getHomeDirectory(); 234 } 235 util = new Util(); // for the inner class 236 } 237 238 /* 239 * Remove relative part - return "absolute": 240 * If input is relative path ("foo/bar") add wd: ie "/<workingDir>/foo/bar" 241 * A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path 242 * ("/foo/bar") are returned unchanged. 243 * 244 * Applications that use FileContext should use #makeQualified() since 245 * they really want a fully qualified URI. 246 * Hence this method is not called makeAbsolute() and 247 * has been deliberately declared private. 248 */ 249 private Path fixRelativePart(Path p) { 250 if (p.isUriPathAbsolute()) { 251 return p; 252 } else { 253 return new Path(workingDir, p); 254 } 255 } 256 257 /** 258 * Delete all the paths that were marked as delete-on-exit. 259 */ 260 static void processDeleteOnExit() { 261 synchronized (DELETE_ON_EXIT) { 262 Set<Entry<FileContext, Set<Path>>> set = DELETE_ON_EXIT.entrySet(); 263 for (Entry<FileContext, Set<Path>> entry : set) { 264 FileContext fc = entry.getKey(); 265 Set<Path> paths = entry.getValue(); 266 for (Path path : paths) { 267 try { 268 fc.delete(path, true); 269 } catch (IOException e) { 270 LOG.warn("Ignoring failure to deleteOnExit for path " + path); 271 } 272 } 273 } 274 DELETE_ON_EXIT.clear(); 275 } 276 } 277 278 /** 279 * Pathnames with scheme and relative path are illegal. 280 * @param path to be checked 281 */ 282 private static void checkNotSchemeWithRelative(final Path path) { 283 if (path.toUri().isAbsolute() && !path.isUriPathAbsolute()) { 284 throw new HadoopIllegalArgumentException( 285 "Unsupported name: has scheme but relative path-part"); 286 } 287 } 288 289 /** 290 * Get the file system of supplied path. 291 * 292 * @param absOrFqPath - absolute or fully qualified path 293 * @return the file system of the path 294 * 295 * @throws UnsupportedFileSystemException If the file system for 296 * <code>absOrFqPath</code> is not supported. 297 * @throws IOExcepton If the file system for <code>absOrFqPath</code> could 298 * not be instantiated. 299 */ 300 private AbstractFileSystem getFSofPath(final Path absOrFqPath) 301 throws UnsupportedFileSystemException, IOException { 302 checkNotSchemeWithRelative(absOrFqPath); 303 if (!absOrFqPath.isAbsolute() && absOrFqPath.toUri().getScheme() == null) { 304 throw new HadoopIllegalArgumentException( 305 "FileContext Bug: path is relative"); 306 } 307 308 try { 309 // Is it the default FS for this FileContext? 310 defaultFS.checkPath(absOrFqPath); 311 return defaultFS; 312 } catch (Exception e) { // it is different FileSystem 313 return getAbstractFileSystem(ugi, absOrFqPath.toUri(), conf); 314 } 315 } 316 317 private static AbstractFileSystem getAbstractFileSystem( 318 UserGroupInformation user, final URI uri, final Configuration conf) 319 throws UnsupportedFileSystemException, IOException { 320 try { 321 return user.doAs(new PrivilegedExceptionAction<AbstractFileSystem>() { 322 @Override 323 public AbstractFileSystem run() throws UnsupportedFileSystemException { 324 return AbstractFileSystem.get(uri, conf); 325 } 326 }); 327 } catch (InterruptedException ex) { 328 LOG.error(ex); 329 throw new IOException("Failed to get the AbstractFileSystem for path: " 330 + uri, ex); 331 } 332 } 333 334 /** 335 * Protected Static Factory methods for getting a FileContexts 336 * that take a AbstractFileSystem as input. To be used for testing. 337 */ 338 339 /** 340 * Create a FileContext with specified FS as default using the specified 341 * config. 342 * 343 * @param defFS 344 * @param aConf 345 * @return new FileContext with specifed FS as default. 346 */ 347 public static FileContext getFileContext(final AbstractFileSystem defFS, 348 final Configuration aConf) { 349 return new FileContext(defFS, FsPermission.getUMask(aConf), aConf); 350 } 351 352 /** 353 * Create a FileContext for specified file system using the default config. 354 * 355 * @param defaultFS 356 * @return a FileContext with the specified AbstractFileSystem 357 * as the default FS. 358 */ 359 protected static FileContext getFileContext( 360 final AbstractFileSystem defaultFS) { 361 return getFileContext(defaultFS, new Configuration()); 362 } 363 364 /** 365 * Static Factory methods for getting a FileContext. 366 * Note new file contexts are created for each call. 367 * The only singleton is the local FS context using the default config. 368 * 369 * Methods that use the default config: the default config read from the 370 * $HADOOP_CONFIG/core.xml, 371 * Unspecified key-values for config are defaulted from core-defaults.xml 372 * in the release jar. 373 * 374 * The keys relevant to the FileContext layer are extracted at time of 375 * construction. Changes to the config after the call are ignore 376 * by the FileContext layer. 377 * The conf is passed to lower layers like AbstractFileSystem and HDFS which 378 * pick up their own config variables. 379 */ 380 381 /** 382 * Create a FileContext using the default config read from the 383 * $HADOOP_CONFIG/core.xml, Unspecified key-values for config are defaulted 384 * from core-defaults.xml in the release jar. 385 * 386 * @throws UnsupportedFileSystemException If the file system from the default 387 * configuration is not supported 388 */ 389 public static FileContext getFileContext() 390 throws UnsupportedFileSystemException { 391 return getFileContext(new Configuration()); 392 } 393 394 /** 395 * @return a FileContext for the local file system using the default config. 396 * @throws UnsupportedFileSystemException If the file system for 397 * {@link FsConstants#LOCAL_FS_URI} is not supported. 398 */ 399 public static FileContext getLocalFSFileContext() 400 throws UnsupportedFileSystemException { 401 return getFileContext(FsConstants.LOCAL_FS_URI); 402 } 403 404 /** 405 * Create a FileContext for specified URI using the default config. 406 * 407 * @param defaultFsUri 408 * @return a FileContext with the specified URI as the default FS. 409 * 410 * @throws UnsupportedFileSystemException If the file system for 411 * <code>defaultFsUri</code> is not supported 412 */ 413 public static FileContext getFileContext(final URI defaultFsUri) 414 throws UnsupportedFileSystemException { 415 return getFileContext(defaultFsUri, new Configuration()); 416 } 417 418 /** 419 * Create a FileContext for specified default URI using the specified config. 420 * 421 * @param defaultFsUri 422 * @param aConf 423 * @return new FileContext for specified uri 424 * @throws UnsupportedFileSystemException If the file system with specified is 425 * not supported 426 * @throws RuntimeException If the file system specified is supported but 427 * could not be instantiated, or if login fails. 428 */ 429 public static FileContext getFileContext(final URI defaultFsUri, 430 final Configuration aConf) throws UnsupportedFileSystemException { 431 UserGroupInformation currentUser = null; 432 AbstractFileSystem defaultAfs = null; 433 try { 434 currentUser = UserGroupInformation.getCurrentUser(); 435 defaultAfs = getAbstractFileSystem(currentUser, defaultFsUri, aConf); 436 } catch (UnsupportedFileSystemException ex) { 437 throw ex; 438 } catch (IOException ex) { 439 LOG.error(ex); 440 throw new RuntimeException(ex); 441 } 442 return getFileContext(defaultAfs, aConf); 443 } 444 445 /** 446 * Create a FileContext using the passed config. Generally it is better to use 447 * {@link #getFileContext(URI, Configuration)} instead of this one. 448 * 449 * 450 * @param aConf 451 * @return new FileContext 452 * @throws UnsupportedFileSystemException If file system in the config 453 * is not supported 454 */ 455 public static FileContext getFileContext(final Configuration aConf) 456 throws UnsupportedFileSystemException { 457 return getFileContext( 458 URI.create(aConf.get(FS_DEFAULT_NAME_KEY, FS_DEFAULT_NAME_DEFAULT)), 459 aConf); 460 } 461 462 /** 463 * @param aConf - from which the FileContext is configured 464 * @return a FileContext for the local file system using the specified config. 465 * 466 * @throws UnsupportedFileSystemException If default file system in the config 467 * is not supported 468 * 469 */ 470 public static FileContext getLocalFSFileContext(final Configuration aConf) 471 throws UnsupportedFileSystemException { 472 return getFileContext(FsConstants.LOCAL_FS_URI, aConf); 473 } 474 475 /* This method is needed for tests. */ 476 @InterfaceAudience.Private 477 @InterfaceStability.Unstable /* return type will change to AFS once 478 HADOOP-6223 is completed */ 479 public AbstractFileSystem getDefaultFileSystem() { 480 return defaultFS; 481 } 482 483 /** 484 * Set the working directory for wd-relative names (such a "foo/bar"). Working 485 * directory feature is provided by simply prefixing relative names with the 486 * working dir. Note this is different from Unix where the wd is actually set 487 * to the inode. Hence setWorkingDir does not follow symlinks etc. This works 488 * better in a distributed environment that has multiple independent roots. 489 * {@link #getWorkingDirectory()} should return what setWorkingDir() set. 490 * 491 * @param newWDir new working directory 492 * @throws IOException 493 * <br> 494 * NewWdir can be one of: 495 * <ul> 496 * <li>relative path: "foo/bar";</li> 497 * <li>absolute without scheme: "/foo/bar"</li> 498 * <li>fully qualified with scheme: "xx://auth/foo/bar"</li> 499 * </ul> 500 * <br> 501 * Illegal WDs: 502 * <ul> 503 * <li>relative with scheme: "xx:foo/bar"</li> 504 * <li>non existent directory</li> 505 * </ul> 506 */ 507 public void setWorkingDirectory(final Path newWDir) throws IOException { 508 checkNotSchemeWithRelative(newWDir); 509 /* wd is stored as a fully qualified path. We check if the given 510 * path is not relative first since resolve requires and returns 511 * an absolute path. 512 */ 513 final Path newWorkingDir = new Path(workingDir, newWDir); 514 FileStatus status = getFileStatus(newWorkingDir); 515 if (status.isFile()) { 516 throw new FileNotFoundException("Cannot setWD to a file"); 517 } 518 workingDir = newWorkingDir; 519 } 520 521 /** 522 * Gets the working directory for wd-relative names (such a "foo/bar"). 523 */ 524 public Path getWorkingDirectory() { 525 return workingDir; 526 } 527 528 /** 529 * Gets the ugi in the file-context 530 * @return UserGroupInformation 531 */ 532 public UserGroupInformation getUgi() { 533 return ugi; 534 } 535 536 /** 537 * Return the current user's home directory in this file system. 538 * The default implementation returns "/user/$USER/". 539 * @return the home directory 540 */ 541 public Path getHomeDirectory() { 542 return defaultFS.getHomeDirectory(); 543 } 544 545 /** 546 * 547 * @return the umask of this FileContext 548 */ 549 public FsPermission getUMask() { 550 return umask; 551 } 552 553 /** 554 * Set umask to the supplied parameter. 555 * @param newUmask the new umask 556 */ 557 public void setUMask(final FsPermission newUmask) { 558 umask = newUmask; 559 } 560 561 562 /** 563 * Resolve the path following any symlinks or mount points 564 * @param f to be resolved 565 * @return fully qualified resolved path 566 * 567 * @throws FileNotFoundException If <code>f</code> does not exist 568 * @throws AccessControlException if access denied 569 * @throws IOException If an IO Error occurred 570 * 571 * Exceptions applicable to file systems accessed over RPC: 572 * @throws RpcClientException If an exception occurred in the RPC client 573 * @throws RpcServerException If an exception occurred in the RPC server 574 * @throws UnexpectedServerException If server implementation throws 575 * undeclared exception to RPC server 576 * 577 * RuntimeExceptions: 578 * @throws InvalidPathException If path <code>f</code> is not valid 579 */ 580 public Path resolvePath(final Path f) throws FileNotFoundException, 581 UnresolvedLinkException, AccessControlException, IOException { 582 return resolve(f); 583 } 584 585 /** 586 * Make the path fully qualified if it is isn't. 587 * A Fully-qualified path has scheme and authority specified and an absolute 588 * path. 589 * Use the default file system and working dir in this FileContext to qualify. 590 * @param path 591 * @return qualified path 592 */ 593 public Path makeQualified(final Path path) { 594 return path.makeQualified(defaultFS.getUri(), getWorkingDirectory()); 595 } 596 597 /** 598 * Create or overwrite file on indicated path and returns an output stream for 599 * writing into the file. 600 * 601 * @param f the file name to open 602 * @param createFlag gives the semantics of create; see {@link CreateFlag} 603 * @param opts file creation options; see {@link Options.CreateOpts}. 604 * <ul> 605 * <li>Progress - to report progress on the operation - default null 606 * <li>Permission - umask is applied against permisssion: default is 607 * FsPermissions:getDefault() 608 * 609 * <li>CreateParent - create missing parent path; default is to not 610 * to create parents 611 * <li>The defaults for the following are SS defaults of the file 612 * server implementing the target path. Not all parameters make sense 613 * for all kinds of file system - eg. localFS ignores Blocksize, 614 * replication, checksum 615 * <ul> 616 * <li>BufferSize - buffersize used in FSDataOutputStream 617 * <li>Blocksize - block size for file blocks 618 * <li>ReplicationFactor - replication for blocks 619 * <li>ChecksumParam - Checksum parameters. server default is used 620 * if not specified. 621 * </ul> 622 * </ul> 623 * 624 * @return {@link FSDataOutputStream} for created file 625 * 626 * @throws AccessControlException If access is denied 627 * @throws FileAlreadyExistsException If file <code>f</code> already exists 628 * @throws FileNotFoundException If parent of <code>f</code> does not exist 629 * and <code>createParent</code> is false 630 * @throws ParentNotDirectoryException If parent of <code>f</code> is not a 631 * directory. 632 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 633 * not supported 634 * @throws IOException If an I/O error occurred 635 * 636 * Exceptions applicable to file systems accessed over RPC: 637 * @throws RpcClientException If an exception occurred in the RPC client 638 * @throws RpcServerException If an exception occurred in the RPC server 639 * @throws UnexpectedServerException If server implementation throws 640 * undeclared exception to RPC server 641 * 642 * RuntimeExceptions: 643 * @throws InvalidPathException If path <code>f</code> is not valid 644 */ 645 public FSDataOutputStream create(final Path f, 646 final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts) 647 throws AccessControlException, FileAlreadyExistsException, 648 FileNotFoundException, ParentNotDirectoryException, 649 UnsupportedFileSystemException, IOException { 650 Path absF = fixRelativePart(f); 651 652 // If one of the options is a permission, extract it & apply umask 653 // If not, add a default Perms and apply umask; 654 // AbstractFileSystem#create 655 656 CreateOpts.Perms permOpt = 657 (CreateOpts.Perms) CreateOpts.getOpt(CreateOpts.Perms.class, opts); 658 FsPermission permission = (permOpt != null) ? permOpt.getValue() : 659 FsPermission.getDefault(); 660 permission = permission.applyUMask(umask); 661 662 final CreateOpts[] updatedOpts = 663 CreateOpts.setOpt(CreateOpts.perms(permission), opts); 664 return new FSLinkResolver<FSDataOutputStream>() { 665 @Override 666 public FSDataOutputStream next(final AbstractFileSystem fs, final Path p) 667 throws IOException { 668 return fs.create(p, createFlag, updatedOpts); 669 } 670 }.resolve(this, absF); 671 } 672 673 /** 674 * Make(create) a directory and all the non-existent parents. 675 * 676 * @param dir - the dir to make 677 * @param permission - permissions is set permission&~umask 678 * @param createParent - if true then missing parent dirs are created if false 679 * then parent must exist 680 * 681 * @throws AccessControlException If access is denied 682 * @throws FileAlreadyExistsException If directory <code>dir</code> already 683 * exists 684 * @throws FileNotFoundException If parent of <code>dir</code> does not exist 685 * and <code>createParent</code> is false 686 * @throws ParentNotDirectoryException If parent of <code>dir</code> is not a 687 * directory 688 * @throws UnsupportedFileSystemException If file system for <code>dir</code> 689 * is not supported 690 * @throws IOException If an I/O error occurred 691 * 692 * Exceptions applicable to file systems accessed over RPC: 693 * @throws RpcClientException If an exception occurred in the RPC client 694 * @throws UnexpectedServerException If server implementation throws 695 * undeclared exception to RPC server 696 * 697 * RuntimeExceptions: 698 * @throws InvalidPathException If path <code>dir</code> is not valid 699 */ 700 public void mkdir(final Path dir, final FsPermission permission, 701 final boolean createParent) throws AccessControlException, 702 FileAlreadyExistsException, FileNotFoundException, 703 ParentNotDirectoryException, UnsupportedFileSystemException, 704 IOException { 705 final Path absDir = fixRelativePart(dir); 706 final FsPermission absFerms = (permission == null ? 707 FsPermission.getDefault() : permission).applyUMask(umask); 708 new FSLinkResolver<Void>() { 709 @Override 710 public Void next(final AbstractFileSystem fs, final Path p) 711 throws IOException, UnresolvedLinkException { 712 fs.mkdir(p, absFerms, createParent); 713 return null; 714 } 715 }.resolve(this, absDir); 716 } 717 718 /** 719 * Delete a file. 720 * @param f the path to delete. 721 * @param recursive if path is a directory and set to 722 * true, the directory is deleted else throws an exception. In 723 * case of a file the recursive can be set to either true or false. 724 * 725 * @throws AccessControlException If access is denied 726 * @throws FileNotFoundException If <code>f</code> does not exist 727 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 728 * not supported 729 * @throws IOException If an I/O error occurred 730 * 731 * Exceptions applicable to file systems accessed over RPC: 732 * @throws RpcClientException If an exception occurred in the RPC client 733 * @throws RpcServerException If an exception occurred in the RPC server 734 * @throws UnexpectedServerException If server implementation throws 735 * undeclared exception to RPC server 736 * 737 * RuntimeExceptions: 738 * @throws InvalidPathException If path <code>f</code> is invalid 739 */ 740 public boolean delete(final Path f, final boolean recursive) 741 throws AccessControlException, FileNotFoundException, 742 UnsupportedFileSystemException, IOException { 743 Path absF = fixRelativePart(f); 744 return new FSLinkResolver<Boolean>() { 745 @Override 746 public Boolean next(final AbstractFileSystem fs, final Path p) 747 throws IOException, UnresolvedLinkException { 748 return Boolean.valueOf(fs.delete(p, recursive)); 749 } 750 }.resolve(this, absF); 751 } 752 753 /** 754 * Opens an FSDataInputStream at the indicated Path using 755 * default buffersize. 756 * @param f the file name to open 757 * 758 * @throws AccessControlException If access is denied 759 * @throws FileNotFoundException If file <code>f</code> does not exist 760 * @throws UnsupportedFileSystemException If file system for <code>f</code> 761 * is not supported 762 * @throws IOException If an I/O error occurred 763 * 764 * Exceptions applicable to file systems accessed over RPC: 765 * @throws RpcClientException If an exception occurred in the RPC client 766 * @throws RpcServerException If an exception occurred in the RPC server 767 * @throws UnexpectedServerException If server implementation throws 768 * undeclared exception to RPC server 769 */ 770 public FSDataInputStream open(final Path f) throws AccessControlException, 771 FileNotFoundException, UnsupportedFileSystemException, IOException { 772 final Path absF = fixRelativePart(f); 773 return new FSLinkResolver<FSDataInputStream>() { 774 @Override 775 public FSDataInputStream next(final AbstractFileSystem fs, final Path p) 776 throws IOException, UnresolvedLinkException { 777 return fs.open(p); 778 } 779 }.resolve(this, absF); 780 } 781 782 /** 783 * Opens an FSDataInputStream at the indicated Path. 784 * 785 * @param f the file name to open 786 * @param bufferSize the size of the buffer to be used. 787 * 788 * @throws AccessControlException If access is denied 789 * @throws FileNotFoundException If file <code>f</code> does not exist 790 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 791 * not supported 792 * @throws IOException If an I/O error occurred 793 * 794 * Exceptions applicable to file systems accessed over RPC: 795 * @throws RpcClientException If an exception occurred in the RPC client 796 * @throws RpcServerException If an exception occurred in the RPC server 797 * @throws UnexpectedServerException If server implementation throws 798 * undeclared exception to RPC server 799 */ 800 public FSDataInputStream open(final Path f, final int bufferSize) 801 throws AccessControlException, FileNotFoundException, 802 UnsupportedFileSystemException, IOException { 803 final Path absF = fixRelativePart(f); 804 return new FSLinkResolver<FSDataInputStream>() { 805 @Override 806 public FSDataInputStream next(final AbstractFileSystem fs, final Path p) 807 throws IOException, UnresolvedLinkException { 808 return fs.open(p, bufferSize); 809 } 810 }.resolve(this, absF); 811 } 812 813 /** 814 * Set replication for an existing file. 815 * 816 * @param f file name 817 * @param replication new replication 818 * 819 * @return true if successful 820 * 821 * @throws AccessControlException If access is denied 822 * @throws FileNotFoundException If file <code>f</code> does not exist 823 * @throws IOException If an I/O error occurred 824 * 825 * Exceptions applicable to file systems accessed over RPC: 826 * @throws RpcClientException If an exception occurred in the RPC client 827 * @throws RpcServerException If an exception occurred in the RPC server 828 * @throws UnexpectedServerException If server implementation throws 829 * undeclared exception to RPC server 830 */ 831 public boolean setReplication(final Path f, final short replication) 832 throws AccessControlException, FileNotFoundException, 833 IOException { 834 final Path absF = fixRelativePart(f); 835 return new FSLinkResolver<Boolean>() { 836 @Override 837 public Boolean next(final AbstractFileSystem fs, final Path p) 838 throws IOException, UnresolvedLinkException { 839 return Boolean.valueOf(fs.setReplication(p, replication)); 840 } 841 }.resolve(this, absF); 842 } 843 844 /** 845 * Renames Path src to Path dst 846 * <ul> 847 * <li 848 * <li>Fails if src is a file and dst is a directory. 849 * <li>Fails if src is a directory and dst is a file. 850 * <li>Fails if the parent of dst does not exist or is a file. 851 * </ul> 852 * <p> 853 * If OVERWRITE option is not passed as an argument, rename fails if the dst 854 * already exists. 855 * <p> 856 * If OVERWRITE option is passed as an argument, rename overwrites the dst if 857 * it is a file or an empty directory. Rename fails if dst is a non-empty 858 * directory. 859 * <p> 860 * Note that atomicity of rename is dependent on the file system 861 * implementation. Please refer to the file system documentation for details 862 * <p> 863 * 864 * @param src path to be renamed 865 * @param dst new path after rename 866 * 867 * @throws AccessControlException If access is denied 868 * @throws FileAlreadyExistsException If <code>dst</code> already exists and 869 * <code>options</options> has {@link Options.Rename#OVERWRITE} 870 * option false. 871 * @throws FileNotFoundException If <code>src</code> does not exist 872 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not a 873 * directory 874 * @throws UnsupportedFileSystemException If file system for <code>src</code> 875 * and <code>dst</code> is not supported 876 * @throws IOException If an I/O error occurred 877 * 878 * Exceptions applicable to file systems accessed over RPC: 879 * @throws RpcClientException If an exception occurred in the RPC client 880 * @throws RpcServerException If an exception occurred in the RPC server 881 * @throws UnexpectedServerException If server implementation throws 882 * undeclared exception to RPC server 883 */ 884 public void rename(final Path src, final Path dst, 885 final Options.Rename... options) throws AccessControlException, 886 FileAlreadyExistsException, FileNotFoundException, 887 ParentNotDirectoryException, UnsupportedFileSystemException, 888 IOException { 889 final Path absSrc = fixRelativePart(src); 890 final Path absDst = fixRelativePart(dst); 891 AbstractFileSystem srcFS = getFSofPath(absSrc); 892 AbstractFileSystem dstFS = getFSofPath(absDst); 893 if(!srcFS.getUri().equals(dstFS.getUri())) { 894 throw new IOException("Renames across AbstractFileSystems not supported"); 895 } 896 try { 897 srcFS.rename(absSrc, absDst, options); 898 } catch (UnresolvedLinkException e) { 899 /* We do not know whether the source or the destination path 900 * was unresolved. Resolve the source path up until the final 901 * path component, then fully resolve the destination. 902 */ 903 final Path source = resolveIntermediate(absSrc); 904 new FSLinkResolver<Void>() { 905 @Override 906 public Void next(final AbstractFileSystem fs, final Path p) 907 throws IOException, UnresolvedLinkException { 908 fs.rename(source, p, options); 909 return null; 910 } 911 }.resolve(this, absDst); 912 } 913 } 914 915 /** 916 * Set permission of a path. 917 * @param f 918 * @param permission - the new absolute permission (umask is not applied) 919 * 920 * @throws AccessControlException If access is denied 921 * @throws FileNotFoundException If <code>f</code> does not exist 922 * @throws UnsupportedFileSystemException If file system for <code>f</code> 923 * is not supported 924 * @throws IOException If an I/O error occurred 925 * 926 * Exceptions applicable to file systems accessed over RPC: 927 * @throws RpcClientException If an exception occurred in the RPC client 928 * @throws RpcServerException If an exception occurred in the RPC server 929 * @throws UnexpectedServerException If server implementation throws 930 * undeclared exception to RPC server 931 */ 932 public void setPermission(final Path f, final FsPermission permission) 933 throws AccessControlException, FileNotFoundException, 934 UnsupportedFileSystemException, IOException { 935 final Path absF = fixRelativePart(f); 936 new FSLinkResolver<Void>() { 937 @Override 938 public Void next(final AbstractFileSystem fs, final Path p) 939 throws IOException, UnresolvedLinkException { 940 fs.setPermission(p, permission); 941 return null; 942 } 943 }.resolve(this, absF); 944 } 945 946 /** 947 * Set owner of a path (i.e. a file or a directory). The parameters username 948 * and groupname cannot both be null. 949 * 950 * @param f The path 951 * @param username If it is null, the original username remains unchanged. 952 * @param groupname If it is null, the original groupname remains unchanged. 953 * 954 * @throws AccessControlException If access is denied 955 * @throws FileNotFoundException If <code>f</code> does not exist 956 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 957 * not supported 958 * @throws IOException If an I/O error occurred 959 * 960 * Exceptions applicable to file systems accessed over RPC: 961 * @throws RpcClientException If an exception occurred in the RPC client 962 * @throws RpcServerException If an exception occurred in the RPC server 963 * @throws UnexpectedServerException If server implementation throws 964 * undeclared exception to RPC server 965 * 966 * RuntimeExceptions: 967 * @throws HadoopIllegalArgumentException If <code>username</code> or 968 * <code>groupname</code> is invalid. 969 */ 970 public void setOwner(final Path f, final String username, 971 final String groupname) throws AccessControlException, 972 UnsupportedFileSystemException, FileNotFoundException, 973 IOException { 974 if ((username == null) && (groupname == null)) { 975 throw new HadoopIllegalArgumentException( 976 "username and groupname cannot both be null"); 977 } 978 final Path absF = fixRelativePart(f); 979 new FSLinkResolver<Void>() { 980 @Override 981 public Void next(final AbstractFileSystem fs, final Path p) 982 throws IOException, UnresolvedLinkException { 983 fs.setOwner(p, username, groupname); 984 return null; 985 } 986 }.resolve(this, absF); 987 } 988 989 /** 990 * Set access time of a file. 991 * @param f The path 992 * @param mtime Set the modification time of this file. 993 * The number of milliseconds since epoch (Jan 1, 1970). 994 * A value of -1 means that this call should not set modification time. 995 * @param atime Set the access time of this file. 996 * The number of milliseconds since Jan 1, 1970. 997 * A value of -1 means that this call should not set access time. 998 * 999 * @throws AccessControlException If access is denied 1000 * @throws FileNotFoundException If <code>f</code> does not exist 1001 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1002 * not supported 1003 * @throws IOException If an I/O error occurred 1004 * 1005 * Exceptions applicable to file systems accessed over RPC: 1006 * @throws RpcClientException If an exception occurred in the RPC client 1007 * @throws RpcServerException If an exception occurred in the RPC server 1008 * @throws UnexpectedServerException If server implementation throws 1009 * undeclared exception to RPC server 1010 */ 1011 public void setTimes(final Path f, final long mtime, final long atime) 1012 throws AccessControlException, FileNotFoundException, 1013 UnsupportedFileSystemException, IOException { 1014 final Path absF = fixRelativePart(f); 1015 new FSLinkResolver<Void>() { 1016 @Override 1017 public Void next(final AbstractFileSystem fs, final Path p) 1018 throws IOException, UnresolvedLinkException { 1019 fs.setTimes(p, mtime, atime); 1020 return null; 1021 } 1022 }.resolve(this, absF); 1023 } 1024 1025 /** 1026 * Get the checksum of a file. 1027 * 1028 * @param f file path 1029 * 1030 * @return The file checksum. The default return value is null, 1031 * which indicates that no checksum algorithm is implemented 1032 * in the corresponding FileSystem. 1033 * 1034 * @throws AccessControlException If access is denied 1035 * @throws FileNotFoundException If <code>f</code> does not exist 1036 * @throws IOException If an I/O error occurred 1037 * 1038 * Exceptions applicable to file systems accessed over RPC: 1039 * @throws RpcClientException If an exception occurred in the RPC client 1040 * @throws RpcServerException If an exception occurred in the RPC server 1041 * @throws UnexpectedServerException If server implementation throws 1042 * undeclared exception to RPC server 1043 */ 1044 public FileChecksum getFileChecksum(final Path f) 1045 throws AccessControlException, FileNotFoundException, 1046 IOException { 1047 final Path absF = fixRelativePart(f); 1048 return new FSLinkResolver<FileChecksum>() { 1049 @Override 1050 public FileChecksum next(final AbstractFileSystem fs, final Path p) 1051 throws IOException, UnresolvedLinkException { 1052 return fs.getFileChecksum(p); 1053 } 1054 }.resolve(this, absF); 1055 } 1056 1057 /** 1058 * Set the verify checksum flag for the file system denoted by the path. 1059 * This is only applicable if the 1060 * corresponding FileSystem supports checksum. By default doesn't do anything. 1061 * @param verifyChecksum 1062 * @param f set the verifyChecksum for the Filesystem containing this path 1063 * 1064 * @throws AccessControlException If access is denied 1065 * @throws FileNotFoundException If <code>f</code> does not exist 1066 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1067 * not supported 1068 * @throws IOException If an I/O error occurred 1069 * 1070 * Exceptions applicable to file systems accessed over RPC: 1071 * @throws RpcClientException If an exception occurred in the RPC client 1072 * @throws RpcServerException If an exception occurred in the RPC server 1073 * @throws UnexpectedServerException If server implementation throws 1074 * undeclared exception to RPC server 1075 */ 1076 public void setVerifyChecksum(final boolean verifyChecksum, final Path f) 1077 throws AccessControlException, FileNotFoundException, 1078 UnsupportedFileSystemException, IOException { 1079 final Path absF = resolve(fixRelativePart(f)); 1080 getFSofPath(absF).setVerifyChecksum(verifyChecksum); 1081 } 1082 1083 /** 1084 * Return a file status object that represents the path. 1085 * @param f The path we want information from 1086 * 1087 * @return a FileStatus object 1088 * 1089 * @throws AccessControlException If access is denied 1090 * @throws FileNotFoundException If <code>f</code> does not exist 1091 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1092 * not supported 1093 * @throws IOException If an I/O error occurred 1094 * 1095 * Exceptions applicable to file systems accessed over RPC: 1096 * @throws RpcClientException If an exception occurred in the RPC client 1097 * @throws RpcServerException If an exception occurred in the RPC server 1098 * @throws UnexpectedServerException If server implementation throws 1099 * undeclared exception to RPC server 1100 */ 1101 public FileStatus getFileStatus(final Path f) throws AccessControlException, 1102 FileNotFoundException, UnsupportedFileSystemException, IOException { 1103 final Path absF = fixRelativePart(f); 1104 return new FSLinkResolver<FileStatus>() { 1105 @Override 1106 public FileStatus next(final AbstractFileSystem fs, final Path p) 1107 throws IOException, UnresolvedLinkException { 1108 return fs.getFileStatus(p); 1109 } 1110 }.resolve(this, absF); 1111 } 1112 1113 /** 1114 * Return a fully qualified version of the given symlink target if it 1115 * has no scheme and authority. Partially and fully qualified paths 1116 * are returned unmodified. 1117 * @param pathFS The AbstractFileSystem of the path 1118 * @param pathWithLink Path that contains the symlink 1119 * @param target The symlink's absolute target 1120 * @return Fully qualified version of the target. 1121 */ 1122 private Path qualifySymlinkTarget(final AbstractFileSystem pathFS, 1123 Path pathWithLink, Path target) { 1124 // NB: makeQualified uses the target's scheme and authority, if 1125 // specified, and the scheme and authority of pathFS, if not. 1126 final String scheme = target.toUri().getScheme(); 1127 final String auth = target.toUri().getAuthority(); 1128 return (scheme == null && auth == null) 1129 ? target.makeQualified(pathFS.getUri(), pathWithLink.getParent()) 1130 : target; 1131 } 1132 1133 /** 1134 * Return a file status object that represents the path. If the path 1135 * refers to a symlink then the FileStatus of the symlink is returned. 1136 * The behavior is equivalent to #getFileStatus() if the underlying 1137 * file system does not support symbolic links. 1138 * @param f The path we want information from. 1139 * @return A FileStatus object 1140 * 1141 * @throws AccessControlException If access is denied 1142 * @throws FileNotFoundException If <code>f</code> does not exist 1143 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1144 * not supported 1145 * @throws IOException If an I/O error occurred 1146 */ 1147 public FileStatus getFileLinkStatus(final Path f) 1148 throws AccessControlException, FileNotFoundException, 1149 UnsupportedFileSystemException, IOException { 1150 final Path absF = fixRelativePart(f); 1151 return new FSLinkResolver<FileStatus>() { 1152 @Override 1153 public FileStatus next(final AbstractFileSystem fs, final Path p) 1154 throws IOException, UnresolvedLinkException { 1155 FileStatus fi = fs.getFileLinkStatus(p); 1156 if (fi.isSymlink()) { 1157 fi.setSymlink(qualifySymlinkTarget(fs, p, fi.getSymlink())); 1158 } 1159 return fi; 1160 } 1161 }.resolve(this, absF); 1162 } 1163 1164 /** 1165 * Returns the target of the given symbolic link as it was specified 1166 * when the link was created. Links in the path leading up to the 1167 * final path component are resolved transparently. 1168 * 1169 * @param f the path to return the target of 1170 * @return The un-interpreted target of the symbolic link. 1171 * 1172 * @throws AccessControlException If access is denied 1173 * @throws FileNotFoundException If path <code>f</code> does not exist 1174 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1175 * not supported 1176 * @throws IOException If the given path does not refer to a symlink 1177 * or an I/O error occurred 1178 */ 1179 public Path getLinkTarget(final Path f) throws AccessControlException, 1180 FileNotFoundException, UnsupportedFileSystemException, IOException { 1181 final Path absF = fixRelativePart(f); 1182 return new FSLinkResolver<Path>() { 1183 @Override 1184 public Path next(final AbstractFileSystem fs, final Path p) 1185 throws IOException, UnresolvedLinkException { 1186 FileStatus fi = fs.getFileLinkStatus(p); 1187 return fi.getSymlink(); 1188 } 1189 }.resolve(this, absF); 1190 } 1191 1192 /** 1193 * Return blockLocation of the given file for the given offset and len. 1194 * For a nonexistent file or regions, null will be returned. 1195 * 1196 * This call is most helpful with DFS, where it returns 1197 * hostnames of machines that contain the given file. 1198 * 1199 * @param f - get blocklocations of this file 1200 * @param start position (byte offset) 1201 * @param len (in bytes) 1202 * 1203 * @return block locations for given file at specified offset of len 1204 * 1205 * @throws AccessControlException If access is denied 1206 * @throws FileNotFoundException If <code>f</code> does not exist 1207 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1208 * not supported 1209 * @throws IOException If an I/O error occurred 1210 * 1211 * Exceptions applicable to file systems accessed over RPC: 1212 * @throws RpcClientException If an exception occurred in the RPC client 1213 * @throws RpcServerException If an exception occurred in the RPC server 1214 * @throws UnexpectedServerException If server implementation throws 1215 * undeclared exception to RPC server 1216 * 1217 * RuntimeExceptions: 1218 * @throws InvalidPathException If path <code>f</code> is invalid 1219 */ 1220 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 1221 @InterfaceStability.Evolving 1222 public BlockLocation[] getFileBlockLocations(final Path f, final long start, 1223 final long len) throws AccessControlException, FileNotFoundException, 1224 UnsupportedFileSystemException, IOException { 1225 final Path absF = fixRelativePart(f); 1226 return new FSLinkResolver<BlockLocation[]>() { 1227 @Override 1228 public BlockLocation[] next(final AbstractFileSystem fs, final Path p) 1229 throws IOException, UnresolvedLinkException { 1230 return fs.getFileBlockLocations(p, start, len); 1231 } 1232 }.resolve(this, absF); 1233 } 1234 1235 /** 1236 * Returns a status object describing the use and capacity of the 1237 * file system denoted by the Parh argument p. 1238 * If the file system has multiple partitions, the 1239 * use and capacity of the partition pointed to by the specified 1240 * path is reflected. 1241 * 1242 * @param f Path for which status should be obtained. null means the 1243 * root partition of the default file system. 1244 * 1245 * @return a FsStatus object 1246 * 1247 * @throws AccessControlException If access is denied 1248 * @throws FileNotFoundException If <code>f</code> does not exist 1249 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1250 * not supported 1251 * @throws IOException If an I/O error occurred 1252 * 1253 * Exceptions applicable to file systems accessed over RPC: 1254 * @throws RpcClientException If an exception occurred in the RPC client 1255 * @throws RpcServerException If an exception occurred in the RPC server 1256 * @throws UnexpectedServerException If server implementation throws 1257 * undeclared exception to RPC server 1258 */ 1259 public FsStatus getFsStatus(final Path f) throws AccessControlException, 1260 FileNotFoundException, UnsupportedFileSystemException, IOException { 1261 if (f == null) { 1262 return defaultFS.getFsStatus(); 1263 } 1264 final Path absF = fixRelativePart(f); 1265 return new FSLinkResolver<FsStatus>() { 1266 @Override 1267 public FsStatus next(final AbstractFileSystem fs, final Path p) 1268 throws IOException, UnresolvedLinkException { 1269 return fs.getFsStatus(p); 1270 } 1271 }.resolve(this, absF); 1272 } 1273 1274 /** 1275 * Creates a symbolic link to an existing file. An exception is thrown if 1276 * the symlink exits, the user does not have permission to create symlink, 1277 * or the underlying file system does not support symlinks. 1278 * 1279 * Symlink permissions are ignored, access to a symlink is determined by 1280 * the permissions of the symlink target. 1281 * 1282 * Symlinks in paths leading up to the final path component are resolved 1283 * transparently. If the final path component refers to a symlink some 1284 * functions operate on the symlink itself, these are: 1285 * - delete(f) and deleteOnExit(f) - Deletes the symlink. 1286 * - rename(src, dst) - If src refers to a symlink, the symlink is 1287 * renamed. If dst refers to a symlink, the symlink is over-written. 1288 * - getLinkTarget(f) - Returns the target of the symlink. 1289 * - getFileLinkStatus(f) - Returns a FileStatus object describing 1290 * the symlink. 1291 * Some functions, create() and mkdir(), expect the final path component 1292 * does not exist. If they are given a path that refers to a symlink that 1293 * does exist they behave as if the path referred to an existing file or 1294 * directory. All other functions fully resolve, ie follow, the symlink. 1295 * These are: open, setReplication, setOwner, setTimes, setWorkingDirectory, 1296 * setPermission, getFileChecksum, setVerifyChecksum, getFileBlockLocations, 1297 * getFsStatus, getFileStatus, exists, and listStatus. 1298 * 1299 * Symlink targets are stored as given to createSymlink, assuming the 1300 * underlying file system is capable of storing a fully qualified URI. 1301 * Dangling symlinks are permitted. FileContext supports four types of 1302 * symlink targets, and resolves them as follows 1303 * <pre> 1304 * Given a path referring to a symlink of form: 1305 * 1306 * <---X---> 1307 * fs://host/A/B/link 1308 * <-----Y-----> 1309 * 1310 * In this path X is the scheme and authority that identify the file system, 1311 * and Y is the path leading up to the final path component "link". If Y is 1312 * a symlink itself then let Y' be the target of Y and X' be the scheme and 1313 * authority of Y'. Symlink targets may: 1314 * 1315 * 1. Fully qualified URIs 1316 * 1317 * fs://hostX/A/B/file Resolved according to the target file system. 1318 * 1319 * 2. Partially qualified URIs (eg scheme but no host) 1320 * 1321 * fs:///A/B/file Resolved according to the target file sytem. Eg resolving 1322 * a symlink to hdfs:///A results in an exception because 1323 * HDFS URIs must be fully qualified, while a symlink to 1324 * file:///A will not since Hadoop's local file systems 1325 * require partially qualified URIs. 1326 * 1327 * 3. Relative paths 1328 * 1329 * path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path 1330 * is "../B/file" then [Y'][path] is hdfs://host/B/file 1331 * 1332 * 4. Absolute paths 1333 * 1334 * path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path 1335 * is "/file" then [X][path] is hdfs://host/file 1336 * </pre> 1337 * 1338 * @param target the target of the symbolic link 1339 * @param link the path to be created that points to target 1340 * @param createParent if true then missing parent dirs are created if 1341 * false then parent must exist 1342 * 1343 * 1344 * @throws AccessControlException If access is denied 1345 * @throws FileAlreadyExistsException If file <code>linkcode> already exists 1346 * @throws FileNotFoundException If <code>target</code> does not exist 1347 * @throws ParentNotDirectoryException If parent of <code>link</code> is not a 1348 * directory. 1349 * @throws UnsupportedFileSystemException If file system for 1350 * <code>target</code> or <code>link</code> is not supported 1351 * @throws IOException If an I/O error occurred 1352 */ 1353 public void createSymlink(final Path target, final Path link, 1354 final boolean createParent) throws AccessControlException, 1355 FileAlreadyExistsException, FileNotFoundException, 1356 ParentNotDirectoryException, UnsupportedFileSystemException, 1357 IOException { 1358 final Path nonRelLink = fixRelativePart(link); 1359 new FSLinkResolver<Void>() { 1360 @Override 1361 public Void next(final AbstractFileSystem fs, final Path p) 1362 throws IOException, UnresolvedLinkException { 1363 fs.createSymlink(target, p, createParent); 1364 return null; 1365 } 1366 }.resolve(this, nonRelLink); 1367 } 1368 1369 /** 1370 * List the statuses of the files/directories in the given path if the path is 1371 * a directory. 1372 * 1373 * @param f is the path 1374 * 1375 * @return an iterator that traverses statuses of the files/directories 1376 * in the given path 1377 * 1378 * @throws AccessControlException If access is denied 1379 * @throws FileNotFoundException If <code>f</code> does not exist 1380 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1381 * not supported 1382 * @throws IOException If an I/O error occurred 1383 * 1384 * Exceptions applicable to file systems accessed over RPC: 1385 * @throws RpcClientException If an exception occurred in the RPC client 1386 * @throws RpcServerException If an exception occurred in the RPC server 1387 * @throws UnexpectedServerException If server implementation throws 1388 * undeclared exception to RPC server 1389 */ 1390 public RemoteIterator<FileStatus> listStatus(final Path f) throws 1391 AccessControlException, FileNotFoundException, 1392 UnsupportedFileSystemException, IOException { 1393 final Path absF = fixRelativePart(f); 1394 return new FSLinkResolver<RemoteIterator<FileStatus>>() { 1395 @Override 1396 public RemoteIterator<FileStatus> next( 1397 final AbstractFileSystem fs, final Path p) 1398 throws IOException, UnresolvedLinkException { 1399 return fs.listStatusIterator(p); 1400 } 1401 }.resolve(this, absF); 1402 } 1403 1404 /** 1405 * @return an iterator over the corrupt files under the given path 1406 * (may contain duplicates if a file has more than one corrupt block) 1407 * @throws IOException 1408 */ 1409 public RemoteIterator<Path> listCorruptFileBlocks(Path path) 1410 throws IOException { 1411 final Path absF = fixRelativePart(path); 1412 return new FSLinkResolver<RemoteIterator<Path>>() { 1413 @Override 1414 public RemoteIterator<Path> next(final AbstractFileSystem fs, 1415 final Path p) 1416 throws IOException, UnresolvedLinkException { 1417 return fs.listCorruptFileBlocks(p); 1418 } 1419 }.resolve(this, absF); 1420 } 1421 1422 /** 1423 * List the statuses of the files/directories in the given path if the path is 1424 * a directory. 1425 * Return the file's status and block locations If the path is a file. 1426 * 1427 * If a returned status is a file, it contains the file's block locations. 1428 * 1429 * @param f is the path 1430 * 1431 * @return an iterator that traverses statuses of the files/directories 1432 * in the given path 1433 * If any IO exception (for example the input directory gets deleted while 1434 * listing is being executed), next() or hasNext() of the returned iterator 1435 * may throw a RuntimeException with the io exception as the cause. 1436 * 1437 * @throws AccessControlException If access is denied 1438 * @throws FileNotFoundException If <code>f</code> does not exist 1439 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1440 * not supported 1441 * @throws IOException If an I/O error occurred 1442 * 1443 * Exceptions applicable to file systems accessed over RPC: 1444 * @throws RpcClientException If an exception occurred in the RPC client 1445 * @throws RpcServerException If an exception occurred in the RPC server 1446 * @throws UnexpectedServerException If server implementation throws 1447 * undeclared exception to RPC server 1448 */ 1449 public RemoteIterator<LocatedFileStatus> listLocatedStatus( 1450 final Path f) throws 1451 AccessControlException, FileNotFoundException, 1452 UnsupportedFileSystemException, IOException { 1453 final Path absF = fixRelativePart(f); 1454 return new FSLinkResolver<RemoteIterator<LocatedFileStatus>>() { 1455 @Override 1456 public RemoteIterator<LocatedFileStatus> next( 1457 final AbstractFileSystem fs, final Path p) 1458 throws IOException, UnresolvedLinkException { 1459 return fs.listLocatedStatus(p); 1460 } 1461 }.resolve(this, absF); 1462 } 1463 1464 /** 1465 * Mark a path to be deleted on JVM shutdown. 1466 * 1467 * @param f the existing path to delete. 1468 * 1469 * @return true if deleteOnExit is successful, otherwise false. 1470 * 1471 * @throws AccessControlException If access is denied 1472 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1473 * not supported 1474 * @throws IOException If an I/O error occurred 1475 * 1476 * Exceptions applicable to file systems accessed over RPC: 1477 * @throws RpcClientException If an exception occurred in the RPC client 1478 * @throws RpcServerException If an exception occurred in the RPC server 1479 * @throws UnexpectedServerException If server implementation throws 1480 * undeclared exception to RPC server 1481 */ 1482 public boolean deleteOnExit(Path f) throws AccessControlException, 1483 IOException { 1484 if (!this.util().exists(f)) { 1485 return false; 1486 } 1487 synchronized (DELETE_ON_EXIT) { 1488 if (DELETE_ON_EXIT.isEmpty()) { 1489 ShutdownHookManager.get().addShutdownHook(FINALIZER, SHUTDOWN_HOOK_PRIORITY); 1490 } 1491 1492 Set<Path> set = DELETE_ON_EXIT.get(this); 1493 if (set == null) { 1494 set = new TreeSet<Path>(); 1495 DELETE_ON_EXIT.put(this, set); 1496 } 1497 set.add(f); 1498 } 1499 return true; 1500 } 1501 1502 private final Util util; 1503 public Util util() { 1504 return util; 1505 } 1506 1507 1508 /** 1509 * Utility/library methods built over the basic FileContext methods. 1510 * Since this are library functions, the oprtation are not atomic 1511 * and some of them may partially complete if other threads are making 1512 * changes to the same part of the name space. 1513 */ 1514 public class Util { 1515 /** 1516 * Does the file exist? 1517 * Note: Avoid using this method if you already have FileStatus in hand. 1518 * Instead reuse the FileStatus 1519 * @param f the file or dir to be checked 1520 * 1521 * @throws AccessControlException If access is denied 1522 * @throws IOException If an I/O error occurred 1523 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1524 * not supported 1525 * 1526 * Exceptions applicable to file systems accessed over RPC: 1527 * @throws RpcClientException If an exception occurred in the RPC client 1528 * @throws RpcServerException If an exception occurred in the RPC server 1529 * @throws UnexpectedServerException If server implementation throws 1530 * undeclared exception to RPC server 1531 */ 1532 public boolean exists(final Path f) throws AccessControlException, 1533 UnsupportedFileSystemException, IOException { 1534 try { 1535 FileStatus fs = FileContext.this.getFileStatus(f); 1536 assert fs != null; 1537 return true; 1538 } catch (FileNotFoundException e) { 1539 return false; 1540 } 1541 } 1542 1543 /** 1544 * Return a list of file status objects that corresponds to supplied paths 1545 * excluding those non-existent paths. 1546 * 1547 * @param paths list of paths we want information from 1548 * 1549 * @return a list of FileStatus objects 1550 * 1551 * @throws AccessControlException If access is denied 1552 * @throws IOException If an I/O error occurred 1553 * 1554 * Exceptions applicable to file systems accessed over RPC: 1555 * @throws RpcClientException If an exception occurred in the RPC client 1556 * @throws RpcServerException If an exception occurred in the RPC server 1557 * @throws UnexpectedServerException If server implementation throws 1558 * undeclared exception to RPC server 1559 */ 1560 private FileStatus[] getFileStatus(Path[] paths) 1561 throws AccessControlException, IOException { 1562 if (paths == null) { 1563 return null; 1564 } 1565 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length); 1566 for (int i = 0; i < paths.length; i++) { 1567 try { 1568 results.add(FileContext.this.getFileStatus(paths[i])); 1569 } catch (FileNotFoundException fnfe) { 1570 // ignoring 1571 } 1572 } 1573 return results.toArray(new FileStatus[results.size()]); 1574 } 1575 1576 1577 /** 1578 * Return the {@link ContentSummary} of path f. 1579 * @param f path 1580 * 1581 * @return the {@link ContentSummary} of path f. 1582 * 1583 * @throws AccessControlException If access is denied 1584 * @throws FileNotFoundException If <code>f</code> does not exist 1585 * @throws UnsupportedFileSystemException If file system for 1586 * <code>f</code> is not supported 1587 * @throws IOException If an I/O error occurred 1588 * 1589 * Exceptions applicable to file systems accessed over RPC: 1590 * @throws RpcClientException If an exception occurred in the RPC client 1591 * @throws RpcServerException If an exception occurred in the RPC server 1592 * @throws UnexpectedServerException If server implementation throws 1593 * undeclared exception to RPC server 1594 */ 1595 public ContentSummary getContentSummary(Path f) 1596 throws AccessControlException, FileNotFoundException, 1597 UnsupportedFileSystemException, IOException { 1598 FileStatus status = FileContext.this.getFileStatus(f); 1599 if (status.isFile()) { 1600 return new ContentSummary(status.getLen(), 1, 0); 1601 } 1602 long[] summary = {0, 0, 1}; 1603 RemoteIterator<FileStatus> statusIterator = 1604 FileContext.this.listStatus(f); 1605 while(statusIterator.hasNext()) { 1606 FileStatus s = statusIterator.next(); 1607 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : 1608 new ContentSummary(s.getLen(), 1, 0); 1609 summary[0] += c.getLength(); 1610 summary[1] += c.getFileCount(); 1611 summary[2] += c.getDirectoryCount(); 1612 } 1613 return new ContentSummary(summary[0], summary[1], summary[2]); 1614 } 1615 1616 /** 1617 * See {@link #listStatus(Path[], PathFilter)} 1618 */ 1619 public FileStatus[] listStatus(Path[] files) throws AccessControlException, 1620 FileNotFoundException, IOException { 1621 return listStatus(files, DEFAULT_FILTER); 1622 } 1623 1624 /** 1625 * Filter files/directories in the given path using the user-supplied path 1626 * filter. 1627 * 1628 * @param f is the path name 1629 * @param filter is the user-supplied path filter 1630 * 1631 * @return an array of FileStatus objects for the files under the given path 1632 * after applying the filter 1633 * 1634 * @throws AccessControlException If access is denied 1635 * @throws FileNotFoundException If <code>f</code> does not exist 1636 * @throws UnsupportedFileSystemException If file system for 1637 * <code>pathPattern</code> is not supported 1638 * @throws IOException If an I/O error occurred 1639 * 1640 * Exceptions applicable to file systems accessed over RPC: 1641 * @throws RpcClientException If an exception occurred in the RPC client 1642 * @throws RpcServerException If an exception occurred in the RPC server 1643 * @throws UnexpectedServerException If server implementation throws 1644 * undeclared exception to RPC server 1645 */ 1646 public FileStatus[] listStatus(Path f, PathFilter filter) 1647 throws AccessControlException, FileNotFoundException, 1648 UnsupportedFileSystemException, IOException { 1649 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1650 listStatus(results, f, filter); 1651 return results.toArray(new FileStatus[results.size()]); 1652 } 1653 1654 /** 1655 * Filter files/directories in the given list of paths using user-supplied 1656 * path filter. 1657 * 1658 * @param files is a list of paths 1659 * @param filter is the filter 1660 * 1661 * @return a list of statuses for the files under the given paths after 1662 * applying the filter 1663 * 1664 * @throws AccessControlException If access is denied 1665 * @throws FileNotFoundException If a file in <code>files</code> does not 1666 * exist 1667 * @throws IOException If an I/O error occurred 1668 * 1669 * Exceptions applicable to file systems accessed over RPC: 1670 * @throws RpcClientException If an exception occurred in the RPC client 1671 * @throws RpcServerException If an exception occurred in the RPC server 1672 * @throws UnexpectedServerException If server implementation throws 1673 * undeclared exception to RPC server 1674 */ 1675 public FileStatus[] listStatus(Path[] files, PathFilter filter) 1676 throws AccessControlException, FileNotFoundException, IOException { 1677 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1678 for (int i = 0; i < files.length; i++) { 1679 listStatus(results, files[i], filter); 1680 } 1681 return results.toArray(new FileStatus[results.size()]); 1682 } 1683 1684 /* 1685 * Filter files/directories in the given path using the user-supplied path 1686 * filter. Results are added to the given array <code>results</code>. 1687 */ 1688 private void listStatus(ArrayList<FileStatus> results, Path f, 1689 PathFilter filter) throws AccessControlException, 1690 FileNotFoundException, IOException { 1691 FileStatus[] listing = listStatus(f); 1692 if (listing != null) { 1693 for (int i = 0; i < listing.length; i++) { 1694 if (filter.accept(listing[i].getPath())) { 1695 results.add(listing[i]); 1696 } 1697 } 1698 } 1699 } 1700 1701 /** 1702 * List the statuses of the files/directories in the given path 1703 * if the path is a directory. 1704 * 1705 * @param f is the path 1706 * 1707 * @return an array that contains statuses of the files/directories 1708 * in the given path 1709 * 1710 * @throws AccessControlException If access is denied 1711 * @throws FileNotFoundException If <code>f</code> does not exist 1712 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1713 * not supported 1714 * @throws IOException If an I/O error occurred 1715 * 1716 * Exceptions applicable to file systems accessed over RPC: 1717 * @throws RpcClientException If an exception occurred in the RPC client 1718 * @throws RpcServerException If an exception occurred in the RPC server 1719 * @throws UnexpectedServerException If server implementation throws 1720 * undeclared exception to RPC server 1721 */ 1722 public FileStatus[] listStatus(final Path f) throws AccessControlException, 1723 FileNotFoundException, UnsupportedFileSystemException, 1724 IOException { 1725 final Path absF = fixRelativePart(f); 1726 return new FSLinkResolver<FileStatus[]>() { 1727 @Override 1728 public FileStatus[] next(final AbstractFileSystem fs, final Path p) 1729 throws IOException, UnresolvedLinkException { 1730 return fs.listStatus(p); 1731 } 1732 }.resolve(FileContext.this, absF); 1733 } 1734 1735 /** 1736 * List the statuses and block locations of the files in the given path. 1737 * 1738 * If the path is a directory, 1739 * if recursive is false, returns files in the directory; 1740 * if recursive is true, return files in the subtree rooted at the path. 1741 * The subtree is traversed in the depth-first order. 1742 * If the path is a file, return the file's status and block locations. 1743 * Files across symbolic links are also returned. 1744 * 1745 * @param f is the path 1746 * @param recursive if the subdirectories need to be traversed recursively 1747 * 1748 * @return an iterator that traverses statuses of the files 1749 * If any IO exception (for example a sub-directory gets deleted while 1750 * listing is being executed), next() or hasNext() of the returned iterator 1751 * may throw a RuntimeException with the IO exception as the cause. 1752 * 1753 * @throws AccessControlException If access is denied 1754 * @throws FileNotFoundException If <code>f</code> does not exist 1755 * @throws UnsupportedFileSystemException If file system for <code>f</code> 1756 * is not supported 1757 * @throws IOException If an I/O error occurred 1758 * 1759 * Exceptions applicable to file systems accessed over RPC: 1760 * @throws RpcClientException If an exception occurred in the RPC client 1761 * @throws RpcServerException If an exception occurred in the RPC server 1762 * @throws UnexpectedServerException If server implementation throws 1763 * undeclared exception to RPC server 1764 */ 1765 public RemoteIterator<LocatedFileStatus> listFiles( 1766 final Path f, final boolean recursive) throws AccessControlException, 1767 FileNotFoundException, UnsupportedFileSystemException, 1768 IOException { 1769 return new RemoteIterator<LocatedFileStatus>() { 1770 private Stack<RemoteIterator<LocatedFileStatus>> itors = 1771 new Stack<RemoteIterator<LocatedFileStatus>>(); 1772 RemoteIterator<LocatedFileStatus> curItor = listLocatedStatus(f); 1773 LocatedFileStatus curFile; 1774 1775 /** 1776 * Returns <tt>true</tt> if the iterator has more files. 1777 * 1778 * @return <tt>true</tt> if the iterator has more files. 1779 * @throws AccessControlException if not allowed to access next 1780 * file's status or locations 1781 * @throws FileNotFoundException if next file does not exist any more 1782 * @throws UnsupportedFileSystemException if next file's 1783 * fs is unsupported 1784 * @throws IOException for all other IO errors 1785 * for example, NameNode is not avaialbe or 1786 * NameNode throws IOException due to an error 1787 * while getting the status or block locations 1788 */ 1789 @Override 1790 public boolean hasNext() throws IOException { 1791 while (curFile == null) { 1792 if (curItor.hasNext()) { 1793 handleFileStat(curItor.next()); 1794 } else if (!itors.empty()) { 1795 curItor = itors.pop(); 1796 } else { 1797 return false; 1798 } 1799 } 1800 return true; 1801 } 1802 1803 /** 1804 * Process the input stat. 1805 * If it is a file, return the file stat. 1806 * If it is a directory, traverse the directory if recursive is true; 1807 * ignore it if recursive is false. 1808 * If it is a symlink, resolve the symlink first and then process it 1809 * depending on if it is a file or directory. 1810 * @param stat input status 1811 * @throws AccessControlException if access is denied 1812 * @throws FileNotFoundException if file is not found 1813 * @throws UnsupportedFileSystemException if fs is not supported 1814 * @throws IOException for all other IO errors 1815 */ 1816 private void handleFileStat(LocatedFileStatus stat) 1817 throws IOException { 1818 if (stat.isFile()) { // file 1819 curFile = stat; 1820 } else if (stat.isSymlink()) { // symbolic link 1821 // resolve symbolic link 1822 FileStatus symstat = FileContext.this.getFileStatus( 1823 stat.getSymlink()); 1824 if (symstat.isFile() || (recursive && symstat.isDirectory())) { 1825 itors.push(curItor); 1826 curItor = listLocatedStatus(stat.getPath()); 1827 } 1828 } else if (recursive) { // directory 1829 itors.push(curItor); 1830 curItor = listLocatedStatus(stat.getPath()); 1831 } 1832 } 1833 1834 /** 1835 * Returns the next file's status with its block locations 1836 * 1837 * @throws AccessControlException if not allowed to access next 1838 * file's status or locations 1839 * @throws FileNotFoundException if next file does not exist any more 1840 * @throws UnsupportedFileSystemException if next file's 1841 * fs is unsupported 1842 * @throws IOException for all other IO errors 1843 * for example, NameNode is not avaialbe or 1844 * NameNode throws IOException due to an error 1845 * while getting the status or block locations 1846 */ 1847 @Override 1848 public LocatedFileStatus next() throws IOException { 1849 if (hasNext()) { 1850 LocatedFileStatus result = curFile; 1851 curFile = null; 1852 return result; 1853 } 1854 throw new java.util.NoSuchElementException("No more entry in " + f); 1855 } 1856 }; 1857 } 1858 1859 /** 1860 * <p>Return all the files that match filePattern and are not checksum 1861 * files. Results are sorted by their names. 1862 * 1863 * <p> 1864 * A filename pattern is composed of <i>regular</i> characters and 1865 * <i>special pattern matching</i> characters, which are: 1866 * 1867 * <dl> 1868 * <dd> 1869 * <dl> 1870 * <p> 1871 * <dt> <tt> ? </tt> 1872 * <dd> Matches any single character. 1873 * 1874 * <p> 1875 * <dt> <tt> * </tt> 1876 * <dd> Matches zero or more characters. 1877 * 1878 * <p> 1879 * <dt> <tt> [<i>abc</i>] </tt> 1880 * <dd> Matches a single character from character set 1881 * <tt>{<i>a,b,c</i>}</tt>. 1882 * 1883 * <p> 1884 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt> 1885 * <dd> Matches a single character from the character range 1886 * <tt>{<i>a...b</i>}</tt>. Note: character <tt><i>a</i></tt> must be 1887 * lexicographically less than or equal to character <tt><i>b</i></tt>. 1888 * 1889 * <p> 1890 * <dt> <tt> [^<i>a</i>] </tt> 1891 * <dd> Matches a single char that is not from character set or range 1892 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur 1893 * immediately to the right of the opening bracket. 1894 * 1895 * <p> 1896 * <dt> <tt> \<i>c</i> </tt> 1897 * <dd> Removes (escapes) any special meaning of character <i>c</i>. 1898 * 1899 * <p> 1900 * <dt> <tt> {ab,cd} </tt> 1901 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> 1902 * 1903 * <p> 1904 * <dt> <tt> {ab,c{de,fh}} </tt> 1905 * <dd> Matches a string from string set <tt>{<i>ab, cde, cfh</i>}</tt> 1906 * 1907 * </dl> 1908 * </dd> 1909 * </dl> 1910 * 1911 * @param pathPattern a regular expression specifying a pth pattern 1912 * 1913 * @return an array of paths that match the path pattern 1914 * 1915 * @throws AccessControlException If access is denied 1916 * @throws UnsupportedFileSystemException If file system for 1917 * <code>pathPattern</code> is not supported 1918 * @throws IOException If an I/O error occurred 1919 * 1920 * Exceptions applicable to file systems accessed over RPC: 1921 * @throws RpcClientException If an exception occurred in the RPC client 1922 * @throws RpcServerException If an exception occurred in the RPC server 1923 * @throws UnexpectedServerException If server implementation throws 1924 * undeclared exception to RPC server 1925 */ 1926 public FileStatus[] globStatus(Path pathPattern) 1927 throws AccessControlException, UnsupportedFileSystemException, 1928 IOException { 1929 return globStatus(pathPattern, DEFAULT_FILTER); 1930 } 1931 1932 /** 1933 * Return an array of FileStatus objects whose path names match pathPattern 1934 * and is accepted by the user-supplied path filter. Results are sorted by 1935 * their path names. 1936 * Return null if pathPattern has no glob and the path does not exist. 1937 * Return an empty array if pathPattern has a glob and no path matches it. 1938 * 1939 * @param pathPattern regular expression specifying the path pattern 1940 * @param filter user-supplied path filter 1941 * 1942 * @return an array of FileStatus objects 1943 * 1944 * @throws AccessControlException If access is denied 1945 * @throws UnsupportedFileSystemException If file system for 1946 * <code>pathPattern</code> is not supported 1947 * @throws IOException If an I/O error occurred 1948 * 1949 * Exceptions applicable to file systems accessed over RPC: 1950 * @throws RpcClientException If an exception occurred in the RPC client 1951 * @throws RpcServerException If an exception occurred in the RPC server 1952 * @throws UnexpectedServerException If server implementation throws 1953 * undeclared exception to RPC server 1954 */ 1955 public FileStatus[] globStatus(final Path pathPattern, 1956 final PathFilter filter) throws AccessControlException, 1957 UnsupportedFileSystemException, IOException { 1958 URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri(); 1959 1960 String filename = pathPattern.toUri().getPath(); 1961 1962 List<String> filePatterns = GlobExpander.expand(filename); 1963 if (filePatterns.size() == 1) { 1964 Path absPathPattern = fixRelativePart(pathPattern); 1965 return globStatusInternal(uri, new Path(absPathPattern.toUri() 1966 .getPath()), filter); 1967 } else { 1968 List<FileStatus> results = new ArrayList<FileStatus>(); 1969 for (String iFilePattern : filePatterns) { 1970 Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern)); 1971 FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter); 1972 for (FileStatus file : files) { 1973 results.add(file); 1974 } 1975 } 1976 return results.toArray(new FileStatus[results.size()]); 1977 } 1978 } 1979 1980 /** 1981 * 1982 * @param uri for all the inPathPattern 1983 * @param inPathPattern - without the scheme & authority (take from uri) 1984 * @param filter 1985 * 1986 * @return an array of FileStatus objects 1987 * 1988 * @throws AccessControlException If access is denied 1989 * @throws IOException If an I/O error occurred 1990 */ 1991 private FileStatus[] globStatusInternal(final URI uri, 1992 final Path inPathPattern, final PathFilter filter) 1993 throws AccessControlException, IOException 1994 { 1995 Path[] parents = new Path[1]; 1996 int level = 0; 1997 1998 assert(inPathPattern.toUri().getScheme() == null && 1999 inPathPattern.toUri().getAuthority() == null && 2000 inPathPattern.isUriPathAbsolute()); 2001 2002 2003 String filename = inPathPattern.toUri().getPath(); 2004 2005 // path has only zero component 2006 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) { 2007 Path p = inPathPattern.makeQualified(uri, null); 2008 return getFileStatus(new Path[]{p}); 2009 } 2010 2011 // path has at least one component 2012 String[] components = filename.split(Path.SEPARATOR); 2013 2014 // Path is absolute, first component is "/" hence first component 2015 // is the uri root 2016 parents[0] = new Path(new Path(uri), new Path("/")); 2017 level = 1; 2018 2019 // glob the paths that match the parent path, ie. [0, components.length-1] 2020 boolean[] hasGlob = new boolean[]{false}; 2021 Path[] relParentPaths = 2022 globPathsLevel(parents, components, level, hasGlob); 2023 FileStatus[] results; 2024 2025 if (relParentPaths == null || relParentPaths.length == 0) { 2026 results = null; 2027 } else { 2028 // fix the pathes to be abs 2029 Path[] parentPaths = new Path [relParentPaths.length]; 2030 for(int i=0; i<relParentPaths.length; i++) { 2031 parentPaths[i] = relParentPaths[i].makeQualified(uri, null); 2032 } 2033 2034 // Now work on the last component of the path 2035 GlobFilter fp = 2036 new GlobFilter(components[components.length - 1], filter); 2037 if (fp.hasPattern()) { // last component has a pattern 2038 // list parent directories and then glob the results 2039 try { 2040 results = listStatus(parentPaths, fp); 2041 } catch (FileNotFoundException e) { 2042 results = null; 2043 } 2044 hasGlob[0] = true; 2045 } else { // last component does not have a pattern 2046 // get all the path names 2047 ArrayList<Path> filteredPaths = 2048 new ArrayList<Path>(parentPaths.length); 2049 for (int i = 0; i < parentPaths.length; i++) { 2050 parentPaths[i] = new Path(parentPaths[i], 2051 components[components.length - 1]); 2052 if (fp.accept(parentPaths[i])) { 2053 filteredPaths.add(parentPaths[i]); 2054 } 2055 } 2056 // get all their statuses 2057 results = getFileStatus( 2058 filteredPaths.toArray(new Path[filteredPaths.size()])); 2059 } 2060 } 2061 2062 // Decide if the pathPattern contains a glob or not 2063 if (results == null) { 2064 if (hasGlob[0]) { 2065 results = new FileStatus[0]; 2066 } 2067 } else { 2068 if (results.length == 0) { 2069 if (!hasGlob[0]) { 2070 results = null; 2071 } 2072 } else { 2073 Arrays.sort(results); 2074 } 2075 } 2076 return results; 2077 } 2078 2079 /* 2080 * For a path of N components, return a list of paths that match the 2081 * components [<code>level</code>, <code>N-1</code>]. 2082 */ 2083 private Path[] globPathsLevel(Path[] parents, String[] filePattern, 2084 int level, boolean[] hasGlob) throws AccessControlException, 2085 FileNotFoundException, IOException { 2086 if (level == filePattern.length - 1) { 2087 return parents; 2088 } 2089 if (parents == null || parents.length == 0) { 2090 return null; 2091 } 2092 GlobFilter fp = new GlobFilter(filePattern[level]); 2093 if (fp.hasPattern()) { 2094 try { 2095 parents = FileUtil.stat2Paths(listStatus(parents, fp)); 2096 } catch (FileNotFoundException e) { 2097 parents = null; 2098 } 2099 hasGlob[0] = true; 2100 } else { 2101 for (int i = 0; i < parents.length; i++) { 2102 parents[i] = new Path(parents[i], filePattern[level]); 2103 } 2104 } 2105 return globPathsLevel(parents, filePattern, level + 1, hasGlob); 2106 } 2107 2108 /** 2109 * Copy file from src to dest. See 2110 * {@link #copy(Path, Path, boolean, boolean)} 2111 */ 2112 public boolean copy(final Path src, final Path dst) 2113 throws AccessControlException, FileAlreadyExistsException, 2114 FileNotFoundException, ParentNotDirectoryException, 2115 UnsupportedFileSystemException, IOException { 2116 return copy(src, dst, false, false); 2117 } 2118 2119 /** 2120 * Copy from src to dst, optionally deleting src and overwriting dst. 2121 * @param src 2122 * @param dst 2123 * @param deleteSource - delete src if true 2124 * @param overwrite overwrite dst if true; throw IOException if dst exists 2125 * and overwrite is false. 2126 * 2127 * @return true if copy is successful 2128 * 2129 * @throws AccessControlException If access is denied 2130 * @throws FileAlreadyExistsException If <code>dst</code> already exists 2131 * @throws FileNotFoundException If <code>src</code> does not exist 2132 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not 2133 * a directory 2134 * @throws UnsupportedFileSystemException If file system for 2135 * <code>src</code> or <code>dst</code> is not supported 2136 * @throws IOException If an I/O error occurred 2137 * 2138 * Exceptions applicable to file systems accessed over RPC: 2139 * @throws RpcClientException If an exception occurred in the RPC client 2140 * @throws RpcServerException If an exception occurred in the RPC server 2141 * @throws UnexpectedServerException If server implementation throws 2142 * undeclared exception to RPC server 2143 * 2144 * RuntimeExceptions: 2145 * @throws InvalidPathException If path <code>dst</code> is invalid 2146 */ 2147 public boolean copy(final Path src, final Path dst, boolean deleteSource, 2148 boolean overwrite) throws AccessControlException, 2149 FileAlreadyExistsException, FileNotFoundException, 2150 ParentNotDirectoryException, UnsupportedFileSystemException, 2151 IOException { 2152 checkNotSchemeWithRelative(src); 2153 checkNotSchemeWithRelative(dst); 2154 Path qSrc = makeQualified(src); 2155 Path qDst = makeQualified(dst); 2156 checkDest(qSrc.getName(), qDst, overwrite); 2157 FileStatus fs = FileContext.this.getFileStatus(qSrc); 2158 if (fs.isDirectory()) { 2159 checkDependencies(qSrc, qDst); 2160 mkdir(qDst, FsPermission.getDefault(), true); 2161 FileStatus[] contents = listStatus(qSrc); 2162 for (FileStatus content : contents) { 2163 copy(makeQualified(content.getPath()), makeQualified(new Path(qDst, 2164 content.getPath().getName())), deleteSource, overwrite); 2165 } 2166 } else { 2167 InputStream in=null; 2168 OutputStream out = null; 2169 try { 2170 in = open(qSrc); 2171 EnumSet<CreateFlag> createFlag = overwrite ? EnumSet.of( 2172 CreateFlag.CREATE, CreateFlag.OVERWRITE) : 2173 EnumSet.of(CreateFlag.CREATE); 2174 out = create(qDst, createFlag); 2175 IOUtils.copyBytes(in, out, conf, true); 2176 } catch (IOException e) { 2177 IOUtils.closeStream(out); 2178 IOUtils.closeStream(in); 2179 throw e; 2180 } 2181 } 2182 if (deleteSource) { 2183 return delete(qSrc, true); 2184 } else { 2185 return true; 2186 } 2187 } 2188 } 2189 2190 /** 2191 * Check if copying srcName to dst would overwrite an existing 2192 * file or directory. 2193 * @param srcName File or directory to be copied. 2194 * @param dst Destination to copy srcName to. 2195 * @param overwrite Whether it's ok to overwrite an existing file. 2196 * @throws AccessControlException If access is denied. 2197 * @throws IOException If dst is an existing directory, or dst is an 2198 * existing file and the overwrite option is not passed. 2199 */ 2200 private void checkDest(String srcName, Path dst, boolean overwrite) 2201 throws AccessControlException, IOException { 2202 try { 2203 FileStatus dstFs = getFileStatus(dst); 2204 if (dstFs.isDirectory()) { 2205 if (null == srcName) { 2206 throw new IOException("Target " + dst + " is a directory"); 2207 } 2208 // Recurse to check if dst/srcName exists. 2209 checkDest(null, new Path(dst, srcName), overwrite); 2210 } else if (!overwrite) { 2211 throw new IOException("Target " + new Path(dst, srcName) 2212 + " already exists"); 2213 } 2214 } catch (FileNotFoundException e) { 2215 // dst does not exist - OK to copy. 2216 } 2217 } 2218 2219 // 2220 // If the destination is a subdirectory of the source, then 2221 // generate exception 2222 // 2223 private static void checkDependencies(Path qualSrc, Path qualDst) 2224 throws IOException { 2225 if (isSameFS(qualSrc, qualDst)) { 2226 String srcq = qualSrc.toString() + Path.SEPARATOR; 2227 String dstq = qualDst.toString() + Path.SEPARATOR; 2228 if (dstq.startsWith(srcq)) { 2229 if (srcq.length() == dstq.length()) { 2230 throw new IOException("Cannot copy " + qualSrc + " to itself."); 2231 } else { 2232 throw new IOException("Cannot copy " + qualSrc + 2233 " to its subdirectory " + qualDst); 2234 } 2235 } 2236 } 2237 } 2238 2239 /** 2240 * Are qualSrc and qualDst of the same file system? 2241 * @param qualPath1 - fully qualified path 2242 * @param qualPath2 - fully qualified path 2243 * @return 2244 */ 2245 private static boolean isSameFS(Path qualPath1, Path qualPath2) { 2246 URI srcUri = qualPath1.toUri(); 2247 URI dstUri = qualPath2.toUri(); 2248 return (srcUri.getScheme().equals(dstUri.getScheme()) && 2249 !(srcUri.getAuthority() != null && dstUri.getAuthority() != null && srcUri 2250 .getAuthority().equals(dstUri.getAuthority()))); 2251 } 2252 2253 /** 2254 * Deletes all the paths in deleteOnExit on JVM shutdown. 2255 */ 2256 static class FileContextFinalizer implements Runnable { 2257 @Override 2258 public synchronized void run() { 2259 processDeleteOnExit(); 2260 } 2261 } 2262 2263 /** 2264 * Resolves all symbolic links in the specified path. 2265 * Returns the new path object. 2266 */ 2267 protected Path resolve(final Path f) throws FileNotFoundException, 2268 UnresolvedLinkException, AccessControlException, IOException { 2269 return new FSLinkResolver<Path>() { 2270 @Override 2271 public Path next(final AbstractFileSystem fs, final Path p) 2272 throws IOException, UnresolvedLinkException { 2273 return fs.resolvePath(p); 2274 } 2275 }.resolve(this, f); 2276 } 2277 2278 /** 2279 * Resolves all symbolic links in the specified path leading up 2280 * to, but not including the final path component. 2281 * @param f path to resolve 2282 * @return the new path object. 2283 */ 2284 protected Path resolveIntermediate(final Path f) throws IOException { 2285 return new FSLinkResolver<FileStatus>() { 2286 @Override 2287 public FileStatus next(final AbstractFileSystem fs, final Path p) 2288 throws IOException, UnresolvedLinkException { 2289 return fs.getFileLinkStatus(p); 2290 } 2291 }.resolve(this, f).getPath(); 2292 } 2293 2294 /** 2295 * Returns the list of AbstractFileSystems accessed in the path. The list may 2296 * contain more than one AbstractFileSystems objects in case of symlinks. 2297 * 2298 * @param f 2299 * Path which needs to be resolved 2300 * @return List of AbstractFileSystems accessed in the path 2301 * @throws IOException 2302 */ 2303 Set<AbstractFileSystem> resolveAbstractFileSystems(final Path f) 2304 throws IOException { 2305 final Path absF = fixRelativePart(f); 2306 final HashSet<AbstractFileSystem> result 2307 = new HashSet<AbstractFileSystem>(); 2308 new FSLinkResolver<Void>() { 2309 @Override 2310 public Void next(final AbstractFileSystem fs, final Path p) 2311 throws IOException, UnresolvedLinkException { 2312 result.add(fs); 2313 fs.getFileStatus(p); 2314 return null; 2315 } 2316 }.resolve(this, absF); 2317 return result; 2318 } 2319 2320 /** 2321 * Class used to perform an operation on and resolve symlinks in a 2322 * path. The operation may potentially span multiple file systems. 2323 */ 2324 protected abstract class FSLinkResolver<T> { 2325 // The maximum number of symbolic link components in a path 2326 private static final int MAX_PATH_LINKS = 32; 2327 2328 /** 2329 * Generic helper function overridden on instantiation to perform a 2330 * specific operation on the given file system using the given path 2331 * which may result in an UnresolvedLinkException. 2332 * @param fs AbstractFileSystem to perform the operation on. 2333 * @param p Path given the file system. 2334 * @return Generic type determined by the specific implementation. 2335 * @throws UnresolvedLinkException If symbolic link <code>path</code> could 2336 * not be resolved 2337 * @throws IOException an I/O error occured 2338 */ 2339 public abstract T next(final AbstractFileSystem fs, final Path p) 2340 throws IOException, UnresolvedLinkException; 2341 2342 /** 2343 * Performs the operation specified by the next function, calling it 2344 * repeatedly until all symlinks in the given path are resolved. 2345 * @param fc FileContext used to access file systems. 2346 * @param p The path to resolve symlinks in. 2347 * @return Generic type determined by the implementation of next. 2348 * @throws IOException 2349 */ 2350 public T resolve(final FileContext fc, Path p) throws IOException { 2351 int count = 0; 2352 T in = null; 2353 Path first = p; 2354 // NB: More than one AbstractFileSystem can match a scheme, eg 2355 // "file" resolves to LocalFs but could have come by RawLocalFs. 2356 AbstractFileSystem fs = fc.getFSofPath(p); 2357 2358 // Loop until all symlinks are resolved or the limit is reached 2359 for (boolean isLink = true; isLink;) { 2360 try { 2361 in = next(fs, p); 2362 isLink = false; 2363 } catch (UnresolvedLinkException e) { 2364 if (count++ > MAX_PATH_LINKS) { 2365 throw new IOException("Possible cyclic loop while " + 2366 "following symbolic link " + first); 2367 } 2368 // Resolve the first unresolved path component 2369 p = qualifySymlinkTarget(fs, p, fs.getLinkTarget(p)); 2370 fs = fc.getFSofPath(p); 2371 } 2372 } 2373 return in; 2374 } 2375 } 2376 2377 /** 2378 * Get the statistics for a particular file system 2379 * 2380 * @param uri 2381 * the uri to lookup the statistics. Only scheme and authority part 2382 * of the uri are used as the key to store and lookup. 2383 * @return a statistics object 2384 */ 2385 public static Statistics getStatistics(URI uri) { 2386 return AbstractFileSystem.getStatistics(uri); 2387 } 2388 2389 /** 2390 * Clears all the statistics stored in AbstractFileSystem, for all the file 2391 * systems. 2392 */ 2393 public static void clearStatistics() { 2394 AbstractFileSystem.clearStatistics(); 2395 } 2396 2397 /** 2398 * Prints the statistics to standard output. File System is identified by the 2399 * scheme and authority. 2400 */ 2401 public static void printStatistics() { 2402 AbstractFileSystem.printStatistics(); 2403 } 2404 2405 /** 2406 * @return Map of uri and statistics for each filesystem instantiated. The uri 2407 * consists of scheme and authority for the filesystem. 2408 */ 2409 public static Map<URI, Statistics> getAllStatistics() { 2410 return AbstractFileSystem.getAllStatistics(); 2411 } 2412 2413 /** 2414 * Get delegation tokens for the file systems accessed for a given 2415 * path. 2416 * @param p Path for which delegations tokens are requested. 2417 * @param renewer the account name that is allowed to renew the token. 2418 * @return List of delegation tokens. 2419 * @throws IOException 2420 */ 2421 @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" }) 2422 public List<Token<?>> getDelegationTokens( 2423 Path p, String renewer) throws IOException { 2424 Set<AbstractFileSystem> afsSet = resolveAbstractFileSystems(p); 2425 List<Token<?>> tokenList = 2426 new ArrayList<Token<?>>(); 2427 for (AbstractFileSystem afs : afsSet) { 2428 List<Token<?>> afsTokens = afs.getDelegationTokens(renewer); 2429 tokenList.addAll(afsTokens); 2430 } 2431 return tokenList; 2432 } 2433 }