001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.fs; 019 020 import java.io.FileNotFoundException; 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.io.OutputStream; 024 import java.net.URI; 025 import java.security.PrivilegedExceptionAction; 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.EnumSet; 029 import java.util.HashSet; 030 import java.util.IdentityHashMap; 031 import java.util.List; 032 import java.util.Map; 033 import java.util.Set; 034 import java.util.Stack; 035 import java.util.TreeSet; 036 import java.util.Map.Entry; 037 038 import org.apache.commons.logging.Log; 039 import org.apache.commons.logging.LogFactory; 040 import org.apache.hadoop.HadoopIllegalArgumentException; 041 import org.apache.hadoop.classification.InterfaceAudience; 042 import org.apache.hadoop.classification.InterfaceStability; 043 import org.apache.hadoop.conf.Configuration; 044 import org.apache.hadoop.fs.FileSystem.Statistics; 045 import org.apache.hadoop.fs.Options.CreateOpts; 046 import org.apache.hadoop.fs.permission.FsPermission; 047 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 048 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT; 049 import org.apache.hadoop.io.IOUtils; 050 import org.apache.hadoop.ipc.RpcClientException; 051 import org.apache.hadoop.ipc.RpcServerException; 052 import org.apache.hadoop.ipc.UnexpectedServerException; 053 import org.apache.hadoop.fs.InvalidPathException; 054 import org.apache.hadoop.security.AccessControlException; 055 import org.apache.hadoop.security.UserGroupInformation; 056 import org.apache.hadoop.security.token.Token; 057 import org.apache.hadoop.util.ShutdownHookManager; 058 059 /** 060 * The FileContext class provides an interface to the application writer for 061 * using the Hadoop file system. 062 * It provides a set of methods for the usual operation: create, open, 063 * list, etc 064 * 065 * <p> 066 * <b> *** Path Names *** </b> 067 * <p> 068 * 069 * The Hadoop file system supports a URI name space and URI names. 070 * It offers a forest of file systems that can be referenced using fully 071 * qualified URIs. 072 * Two common Hadoop file systems implementations are 073 * <ul> 074 * <li> the local file system: file:///path 075 * <li> the hdfs file system hdfs://nnAddress:nnPort/path 076 * </ul> 077 * 078 * While URI names are very flexible, it requires knowing the name or address 079 * of the server. For convenience one often wants to access the default system 080 * in one's environment without knowing its name/address. This has an 081 * additional benefit that it allows one to change one's default fs 082 * (e.g. admin moves application from cluster1 to cluster2). 083 * <p> 084 * 085 * To facilitate this, Hadoop supports a notion of a default file system. 086 * The user can set his default file system, although this is 087 * typically set up for you in your environment via your default config. 088 * A default file system implies a default scheme and authority; slash-relative 089 * names (such as /for/bar) are resolved relative to that default FS. 090 * Similarly a user can also have working-directory-relative names (i.e. names 091 * not starting with a slash). While the working directory is generally in the 092 * same default FS, the wd can be in a different FS. 093 * <p> 094 * Hence Hadoop path names can be one of: 095 * <ul> 096 * <li> fully qualified URI: scheme://authority/path 097 * <li> slash relative names: /path relative to the default file system 098 * <li> wd-relative names: path relative to the working dir 099 * </ul> 100 * Relative paths with scheme (scheme:foo/bar) are illegal. 101 * 102 * <p> 103 * <b>****The Role of the FileContext and configuration defaults****</b> 104 * <p> 105 * The FileContext provides file namespace context for resolving file names; 106 * it also contains the umask for permissions, In that sense it is like the 107 * per-process file-related state in Unix system. 108 * These two properties 109 * <ul> 110 * <li> default file system i.e your slash) 111 * <li> umask 112 * </ul> 113 * in general, are obtained from the default configuration file 114 * in your environment, (@see {@link Configuration}). 115 * 116 * No other configuration parameters are obtained from the default config as 117 * far as the file context layer is concerned. All file system instances 118 * (i.e. deployments of file systems) have default properties; we call these 119 * server side (SS) defaults. Operation like create allow one to select many 120 * properties: either pass them in as explicit parameters or use 121 * the SS properties. 122 * <p> 123 * The file system related SS defaults are 124 * <ul> 125 * <li> the home directory (default is "/user/userName") 126 * <li> the initial wd (only for local fs) 127 * <li> replication factor 128 * <li> block size 129 * <li> buffer size 130 * <li> encryptDataTransfer 131 * <li> checksum option. (checksumType and bytesPerChecksum) 132 * </ul> 133 * 134 * <p> 135 * <b> *** Usage Model for the FileContext class *** </b> 136 * <p> 137 * Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. 138 * Unspecified values come from core-defaults.xml in the release jar. 139 * <ul> 140 * <li> myFContext = FileContext.getFileContext(); // uses the default config 141 * // which has your default FS 142 * <li> myFContext.create(path, ...); 143 * <li> myFContext.setWorkingDir(path) 144 * <li> myFContext.open (path, ...); 145 * </ul> 146 * Example 2: Get a FileContext with a specific URI as the default FS 147 * <ul> 148 * <li> myFContext = FileContext.getFileContext(URI) 149 * <li> myFContext.create(path, ...); 150 * ... 151 * </ul> 152 * Example 3: FileContext with local file system as the default 153 * <ul> 154 * <li> myFContext = FileContext.getLocalFSFileContext() 155 * <li> myFContext.create(path, ...); 156 * <li> ... 157 * </ul> 158 * Example 4: Use a specific config, ignoring $HADOOP_CONFIG 159 * Generally you should not need use a config unless you are doing 160 * <ul> 161 * <li> configX = someConfigSomeOnePassedToYou. 162 * <li> myFContext = getFileContext(configX); // configX is not changed, 163 * // is passed down 164 * <li> myFContext.create(path, ...); 165 * <li>... 166 * </ul> 167 * 168 */ 169 170 @InterfaceAudience.Public 171 @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */ 172 public final class FileContext { 173 174 public static final Log LOG = LogFactory.getLog(FileContext.class); 175 /** 176 * Default permission for directory and symlink 177 * In previous versions, this default permission was also used to 178 * create files, so files created end up with ugo+x permission. 179 * See HADOOP-9155 for detail. 180 * Two new constants are added to solve this, please use 181 * {@link FileContext#DIR_DEFAULT_PERM} for directory, and use 182 * {@link FileContext#FILE_DEFAULT_PERM} for file. 183 * This constant is kept for compatibility. 184 */ 185 public static final FsPermission DEFAULT_PERM = FsPermission.getDefault(); 186 /** 187 * Default permission for directory 188 */ 189 public static final FsPermission DIR_DEFAULT_PERM = FsPermission.getDirDefault(); 190 /** 191 * Default permission for file 192 */ 193 public static final FsPermission FILE_DEFAULT_PERM = FsPermission.getFileDefault(); 194 195 /** 196 * Priority of the FileContext shutdown hook. 197 */ 198 public static final int SHUTDOWN_HOOK_PRIORITY = 20; 199 200 /** 201 * List of files that should be deleted on JVM shutdown. 202 */ 203 static final Map<FileContext, Set<Path>> DELETE_ON_EXIT = 204 new IdentityHashMap<FileContext, Set<Path>>(); 205 206 /** JVM shutdown hook thread. */ 207 static final FileContextFinalizer FINALIZER = 208 new FileContextFinalizer(); 209 210 private static final PathFilter DEFAULT_FILTER = new PathFilter() { 211 @Override 212 public boolean accept(final Path file) { 213 return true; 214 } 215 }; 216 217 /** 218 * The FileContext is defined by. 219 * 1) defaultFS (slash) 220 * 2) wd 221 * 3) umask 222 */ 223 private final AbstractFileSystem defaultFS; //default FS for this FileContext. 224 private Path workingDir; // Fully qualified 225 private FsPermission umask; 226 private final Configuration conf; 227 private final UserGroupInformation ugi; 228 229 private FileContext(final AbstractFileSystem defFs, 230 final FsPermission theUmask, final Configuration aConf) { 231 defaultFS = defFs; 232 umask = FsPermission.getUMask(aConf); 233 conf = aConf; 234 try { 235 ugi = UserGroupInformation.getCurrentUser(); 236 } catch (IOException e) { 237 LOG.error("Exception in getCurrentUser: ",e); 238 throw new RuntimeException("Failed to get the current user " + 239 "while creating a FileContext", e); 240 } 241 /* 242 * Init the wd. 243 * WorkingDir is implemented at the FileContext layer 244 * NOT at the AbstractFileSystem layer. 245 * If the DefaultFS, such as localFilesystem has a notion of 246 * builtin WD, we use that as the initial WD. 247 * Otherwise the WD is initialized to the home directory. 248 */ 249 workingDir = defaultFS.getInitialWorkingDirectory(); 250 if (workingDir == null) { 251 workingDir = defaultFS.getHomeDirectory(); 252 } 253 util = new Util(); // for the inner class 254 } 255 256 /* 257 * Remove relative part - return "absolute": 258 * If input is relative path ("foo/bar") add wd: ie "/<workingDir>/foo/bar" 259 * A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path 260 * ("/foo/bar") are returned unchanged. 261 * 262 * Applications that use FileContext should use #makeQualified() since 263 * they really want a fully qualified URI. 264 * Hence this method is not called makeAbsolute() and 265 * has been deliberately declared private. 266 */ 267 private Path fixRelativePart(Path p) { 268 if (p.isUriPathAbsolute()) { 269 return p; 270 } else { 271 return new Path(workingDir, p); 272 } 273 } 274 275 /** 276 * Delete all the paths that were marked as delete-on-exit. 277 */ 278 static void processDeleteOnExit() { 279 synchronized (DELETE_ON_EXIT) { 280 Set<Entry<FileContext, Set<Path>>> set = DELETE_ON_EXIT.entrySet(); 281 for (Entry<FileContext, Set<Path>> entry : set) { 282 FileContext fc = entry.getKey(); 283 Set<Path> paths = entry.getValue(); 284 for (Path path : paths) { 285 try { 286 fc.delete(path, true); 287 } catch (IOException e) { 288 LOG.warn("Ignoring failure to deleteOnExit for path " + path); 289 } 290 } 291 } 292 DELETE_ON_EXIT.clear(); 293 } 294 } 295 296 /** 297 * Get the file system of supplied path. 298 * 299 * @param absOrFqPath - absolute or fully qualified path 300 * @return the file system of the path 301 * 302 * @throws UnsupportedFileSystemException If the file system for 303 * <code>absOrFqPath</code> is not supported. 304 * @throws IOExcepton If the file system for <code>absOrFqPath</code> could 305 * not be instantiated. 306 */ 307 protected AbstractFileSystem getFSofPath(final Path absOrFqPath) 308 throws UnsupportedFileSystemException, IOException { 309 absOrFqPath.checkNotSchemeWithRelative(); 310 absOrFqPath.checkNotRelative(); 311 312 try { 313 // Is it the default FS for this FileContext? 314 defaultFS.checkPath(absOrFqPath); 315 return defaultFS; 316 } catch (Exception e) { // it is different FileSystem 317 return getAbstractFileSystem(ugi, absOrFqPath.toUri(), conf); 318 } 319 } 320 321 private static AbstractFileSystem getAbstractFileSystem( 322 UserGroupInformation user, final URI uri, final Configuration conf) 323 throws UnsupportedFileSystemException, IOException { 324 try { 325 return user.doAs(new PrivilegedExceptionAction<AbstractFileSystem>() { 326 @Override 327 public AbstractFileSystem run() throws UnsupportedFileSystemException { 328 return AbstractFileSystem.get(uri, conf); 329 } 330 }); 331 } catch (InterruptedException ex) { 332 LOG.error(ex); 333 throw new IOException("Failed to get the AbstractFileSystem for path: " 334 + uri, ex); 335 } 336 } 337 338 /** 339 * Protected Static Factory methods for getting a FileContexts 340 * that take a AbstractFileSystem as input. To be used for testing. 341 */ 342 343 /** 344 * Create a FileContext with specified FS as default using the specified 345 * config. 346 * 347 * @param defFS 348 * @param aConf 349 * @return new FileContext with specifed FS as default. 350 */ 351 public static FileContext getFileContext(final AbstractFileSystem defFS, 352 final Configuration aConf) { 353 return new FileContext(defFS, FsPermission.getUMask(aConf), aConf); 354 } 355 356 /** 357 * Create a FileContext for specified file system using the default config. 358 * 359 * @param defaultFS 360 * @return a FileContext with the specified AbstractFileSystem 361 * as the default FS. 362 */ 363 protected static FileContext getFileContext( 364 final AbstractFileSystem defaultFS) { 365 return getFileContext(defaultFS, new Configuration()); 366 } 367 368 /** 369 * Static Factory methods for getting a FileContext. 370 * Note new file contexts are created for each call. 371 * The only singleton is the local FS context using the default config. 372 * 373 * Methods that use the default config: the default config read from the 374 * $HADOOP_CONFIG/core.xml, 375 * Unspecified key-values for config are defaulted from core-defaults.xml 376 * in the release jar. 377 * 378 * The keys relevant to the FileContext layer are extracted at time of 379 * construction. Changes to the config after the call are ignore 380 * by the FileContext layer. 381 * The conf is passed to lower layers like AbstractFileSystem and HDFS which 382 * pick up their own config variables. 383 */ 384 385 /** 386 * Create a FileContext using the default config read from the 387 * $HADOOP_CONFIG/core.xml, Unspecified key-values for config are defaulted 388 * from core-defaults.xml in the release jar. 389 * 390 * @throws UnsupportedFileSystemException If the file system from the default 391 * configuration is not supported 392 */ 393 public static FileContext getFileContext() 394 throws UnsupportedFileSystemException { 395 return getFileContext(new Configuration()); 396 } 397 398 /** 399 * @return a FileContext for the local file system using the default config. 400 * @throws UnsupportedFileSystemException If the file system for 401 * {@link FsConstants#LOCAL_FS_URI} is not supported. 402 */ 403 public static FileContext getLocalFSFileContext() 404 throws UnsupportedFileSystemException { 405 return getFileContext(FsConstants.LOCAL_FS_URI); 406 } 407 408 /** 409 * Create a FileContext for specified URI using the default config. 410 * 411 * @param defaultFsUri 412 * @return a FileContext with the specified URI as the default FS. 413 * 414 * @throws UnsupportedFileSystemException If the file system for 415 * <code>defaultFsUri</code> is not supported 416 */ 417 public static FileContext getFileContext(final URI defaultFsUri) 418 throws UnsupportedFileSystemException { 419 return getFileContext(defaultFsUri, new Configuration()); 420 } 421 422 /** 423 * Create a FileContext for specified default URI using the specified config. 424 * 425 * @param defaultFsUri 426 * @param aConf 427 * @return new FileContext for specified uri 428 * @throws UnsupportedFileSystemException If the file system with specified is 429 * not supported 430 * @throws RuntimeException If the file system specified is supported but 431 * could not be instantiated, or if login fails. 432 */ 433 public static FileContext getFileContext(final URI defaultFsUri, 434 final Configuration aConf) throws UnsupportedFileSystemException { 435 UserGroupInformation currentUser = null; 436 AbstractFileSystem defaultAfs = null; 437 try { 438 currentUser = UserGroupInformation.getCurrentUser(); 439 defaultAfs = getAbstractFileSystem(currentUser, defaultFsUri, aConf); 440 } catch (UnsupportedFileSystemException ex) { 441 throw ex; 442 } catch (IOException ex) { 443 LOG.error(ex); 444 throw new RuntimeException(ex); 445 } 446 return getFileContext(defaultAfs, aConf); 447 } 448 449 /** 450 * Create a FileContext using the passed config. Generally it is better to use 451 * {@link #getFileContext(URI, Configuration)} instead of this one. 452 * 453 * 454 * @param aConf 455 * @return new FileContext 456 * @throws UnsupportedFileSystemException If file system in the config 457 * is not supported 458 */ 459 public static FileContext getFileContext(final Configuration aConf) 460 throws UnsupportedFileSystemException { 461 return getFileContext( 462 URI.create(aConf.get(FS_DEFAULT_NAME_KEY, FS_DEFAULT_NAME_DEFAULT)), 463 aConf); 464 } 465 466 /** 467 * @param aConf - from which the FileContext is configured 468 * @return a FileContext for the local file system using the specified config. 469 * 470 * @throws UnsupportedFileSystemException If default file system in the config 471 * is not supported 472 * 473 */ 474 public static FileContext getLocalFSFileContext(final Configuration aConf) 475 throws UnsupportedFileSystemException { 476 return getFileContext(FsConstants.LOCAL_FS_URI, aConf); 477 } 478 479 /* This method is needed for tests. */ 480 @InterfaceAudience.Private 481 @InterfaceStability.Unstable /* return type will change to AFS once 482 HADOOP-6223 is completed */ 483 public AbstractFileSystem getDefaultFileSystem() { 484 return defaultFS; 485 } 486 487 /** 488 * Set the working directory for wd-relative names (such a "foo/bar"). Working 489 * directory feature is provided by simply prefixing relative names with the 490 * working dir. Note this is different from Unix where the wd is actually set 491 * to the inode. Hence setWorkingDir does not follow symlinks etc. This works 492 * better in a distributed environment that has multiple independent roots. 493 * {@link #getWorkingDirectory()} should return what setWorkingDir() set. 494 * 495 * @param newWDir new working directory 496 * @throws IOException 497 * <br> 498 * NewWdir can be one of: 499 * <ul> 500 * <li>relative path: "foo/bar";</li> 501 * <li>absolute without scheme: "/foo/bar"</li> 502 * <li>fully qualified with scheme: "xx://auth/foo/bar"</li> 503 * </ul> 504 * <br> 505 * Illegal WDs: 506 * <ul> 507 * <li>relative with scheme: "xx:foo/bar"</li> 508 * <li>non existent directory</li> 509 * </ul> 510 */ 511 public void setWorkingDirectory(final Path newWDir) throws IOException { 512 newWDir.checkNotSchemeWithRelative(); 513 /* wd is stored as a fully qualified path. We check if the given 514 * path is not relative first since resolve requires and returns 515 * an absolute path. 516 */ 517 final Path newWorkingDir = new Path(workingDir, newWDir); 518 FileStatus status = getFileStatus(newWorkingDir); 519 if (status.isFile()) { 520 throw new FileNotFoundException("Cannot setWD to a file"); 521 } 522 workingDir = newWorkingDir; 523 } 524 525 /** 526 * Gets the working directory for wd-relative names (such a "foo/bar"). 527 */ 528 public Path getWorkingDirectory() { 529 return workingDir; 530 } 531 532 /** 533 * Gets the ugi in the file-context 534 * @return UserGroupInformation 535 */ 536 public UserGroupInformation getUgi() { 537 return ugi; 538 } 539 540 /** 541 * Return the current user's home directory in this file system. 542 * The default implementation returns "/user/$USER/". 543 * @return the home directory 544 */ 545 public Path getHomeDirectory() { 546 return defaultFS.getHomeDirectory(); 547 } 548 549 /** 550 * 551 * @return the umask of this FileContext 552 */ 553 public FsPermission getUMask() { 554 return umask; 555 } 556 557 /** 558 * Set umask to the supplied parameter. 559 * @param newUmask the new umask 560 */ 561 public void setUMask(final FsPermission newUmask) { 562 umask = newUmask; 563 } 564 565 566 /** 567 * Resolve the path following any symlinks or mount points 568 * @param f to be resolved 569 * @return fully qualified resolved path 570 * 571 * @throws FileNotFoundException If <code>f</code> does not exist 572 * @throws AccessControlException if access denied 573 * @throws IOException If an IO Error occurred 574 * 575 * Exceptions applicable to file systems accessed over RPC: 576 * @throws RpcClientException If an exception occurred in the RPC client 577 * @throws RpcServerException If an exception occurred in the RPC server 578 * @throws UnexpectedServerException If server implementation throws 579 * undeclared exception to RPC server 580 * 581 * RuntimeExceptions: 582 * @throws InvalidPathException If path <code>f</code> is not valid 583 */ 584 public Path resolvePath(final Path f) throws FileNotFoundException, 585 UnresolvedLinkException, AccessControlException, IOException { 586 return resolve(f); 587 } 588 589 /** 590 * Make the path fully qualified if it is isn't. 591 * A Fully-qualified path has scheme and authority specified and an absolute 592 * path. 593 * Use the default file system and working dir in this FileContext to qualify. 594 * @param path 595 * @return qualified path 596 */ 597 public Path makeQualified(final Path path) { 598 return path.makeQualified(defaultFS.getUri(), getWorkingDirectory()); 599 } 600 601 /** 602 * Create or overwrite file on indicated path and returns an output stream for 603 * writing into the file. 604 * 605 * @param f the file name to open 606 * @param createFlag gives the semantics of create; see {@link CreateFlag} 607 * @param opts file creation options; see {@link Options.CreateOpts}. 608 * <ul> 609 * <li>Progress - to report progress on the operation - default null 610 * <li>Permission - umask is applied against permisssion: default is 611 * FsPermissions:getDefault() 612 * 613 * <li>CreateParent - create missing parent path; default is to not 614 * to create parents 615 * <li>The defaults for the following are SS defaults of the file 616 * server implementing the target path. Not all parameters make sense 617 * for all kinds of file system - eg. localFS ignores Blocksize, 618 * replication, checksum 619 * <ul> 620 * <li>BufferSize - buffersize used in FSDataOutputStream 621 * <li>Blocksize - block size for file blocks 622 * <li>ReplicationFactor - replication for blocks 623 * <li>ChecksumParam - Checksum parameters. server default is used 624 * if not specified. 625 * </ul> 626 * </ul> 627 * 628 * @return {@link FSDataOutputStream} for created file 629 * 630 * @throws AccessControlException If access is denied 631 * @throws FileAlreadyExistsException If file <code>f</code> already exists 632 * @throws FileNotFoundException If parent of <code>f</code> does not exist 633 * and <code>createParent</code> is false 634 * @throws ParentNotDirectoryException If parent of <code>f</code> is not a 635 * directory. 636 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 637 * not supported 638 * @throws IOException If an I/O error occurred 639 * 640 * Exceptions applicable to file systems accessed over RPC: 641 * @throws RpcClientException If an exception occurred in the RPC client 642 * @throws RpcServerException If an exception occurred in the RPC server 643 * @throws UnexpectedServerException If server implementation throws 644 * undeclared exception to RPC server 645 * 646 * RuntimeExceptions: 647 * @throws InvalidPathException If path <code>f</code> is not valid 648 */ 649 public FSDataOutputStream create(final Path f, 650 final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts) 651 throws AccessControlException, FileAlreadyExistsException, 652 FileNotFoundException, ParentNotDirectoryException, 653 UnsupportedFileSystemException, IOException { 654 Path absF = fixRelativePart(f); 655 656 // If one of the options is a permission, extract it & apply umask 657 // If not, add a default Perms and apply umask; 658 // AbstractFileSystem#create 659 660 CreateOpts.Perms permOpt = 661 (CreateOpts.Perms) CreateOpts.getOpt(CreateOpts.Perms.class, opts); 662 FsPermission permission = (permOpt != null) ? permOpt.getValue() : 663 FILE_DEFAULT_PERM; 664 permission = permission.applyUMask(umask); 665 666 final CreateOpts[] updatedOpts = 667 CreateOpts.setOpt(CreateOpts.perms(permission), opts); 668 return new FSLinkResolver<FSDataOutputStream>() { 669 @Override 670 public FSDataOutputStream next(final AbstractFileSystem fs, final Path p) 671 throws IOException { 672 return fs.create(p, createFlag, updatedOpts); 673 } 674 }.resolve(this, absF); 675 } 676 677 /** 678 * Make(create) a directory and all the non-existent parents. 679 * 680 * @param dir - the dir to make 681 * @param permission - permissions is set permission&~umask 682 * @param createParent - if true then missing parent dirs are created if false 683 * then parent must exist 684 * 685 * @throws AccessControlException If access is denied 686 * @throws FileAlreadyExistsException If directory <code>dir</code> already 687 * exists 688 * @throws FileNotFoundException If parent of <code>dir</code> does not exist 689 * and <code>createParent</code> is false 690 * @throws ParentNotDirectoryException If parent of <code>dir</code> is not a 691 * directory 692 * @throws UnsupportedFileSystemException If file system for <code>dir</code> 693 * is not supported 694 * @throws IOException If an I/O error occurred 695 * 696 * Exceptions applicable to file systems accessed over RPC: 697 * @throws RpcClientException If an exception occurred in the RPC client 698 * @throws UnexpectedServerException If server implementation throws 699 * undeclared exception to RPC server 700 * 701 * RuntimeExceptions: 702 * @throws InvalidPathException If path <code>dir</code> is not valid 703 */ 704 public void mkdir(final Path dir, final FsPermission permission, 705 final boolean createParent) throws AccessControlException, 706 FileAlreadyExistsException, FileNotFoundException, 707 ParentNotDirectoryException, UnsupportedFileSystemException, 708 IOException { 709 final Path absDir = fixRelativePart(dir); 710 final FsPermission absFerms = (permission == null ? 711 FsPermission.getDirDefault() : permission).applyUMask(umask); 712 new FSLinkResolver<Void>() { 713 @Override 714 public Void next(final AbstractFileSystem fs, final Path p) 715 throws IOException, UnresolvedLinkException { 716 fs.mkdir(p, absFerms, createParent); 717 return null; 718 } 719 }.resolve(this, absDir); 720 } 721 722 /** 723 * Delete a file. 724 * @param f the path to delete. 725 * @param recursive if path is a directory and set to 726 * true, the directory is deleted else throws an exception. In 727 * case of a file the recursive can be set to either true or false. 728 * 729 * @throws AccessControlException If access is denied 730 * @throws FileNotFoundException If <code>f</code> does not exist 731 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 732 * not supported 733 * @throws IOException If an I/O error occurred 734 * 735 * Exceptions applicable to file systems accessed over RPC: 736 * @throws RpcClientException If an exception occurred in the RPC client 737 * @throws RpcServerException If an exception occurred in the RPC server 738 * @throws UnexpectedServerException If server implementation throws 739 * undeclared exception to RPC server 740 * 741 * RuntimeExceptions: 742 * @throws InvalidPathException If path <code>f</code> is invalid 743 */ 744 public boolean delete(final Path f, final boolean recursive) 745 throws AccessControlException, FileNotFoundException, 746 UnsupportedFileSystemException, IOException { 747 Path absF = fixRelativePart(f); 748 return new FSLinkResolver<Boolean>() { 749 @Override 750 public Boolean next(final AbstractFileSystem fs, final Path p) 751 throws IOException, UnresolvedLinkException { 752 return Boolean.valueOf(fs.delete(p, recursive)); 753 } 754 }.resolve(this, absF); 755 } 756 757 /** 758 * Opens an FSDataInputStream at the indicated Path using 759 * default buffersize. 760 * @param f the file name to open 761 * 762 * @throws AccessControlException If access is denied 763 * @throws FileNotFoundException If file <code>f</code> does not exist 764 * @throws UnsupportedFileSystemException If file system for <code>f</code> 765 * is not supported 766 * @throws IOException If an I/O error occurred 767 * 768 * Exceptions applicable to file systems accessed over RPC: 769 * @throws RpcClientException If an exception occurred in the RPC client 770 * @throws RpcServerException If an exception occurred in the RPC server 771 * @throws UnexpectedServerException If server implementation throws 772 * undeclared exception to RPC server 773 */ 774 public FSDataInputStream open(final Path f) throws AccessControlException, 775 FileNotFoundException, UnsupportedFileSystemException, IOException { 776 final Path absF = fixRelativePart(f); 777 return new FSLinkResolver<FSDataInputStream>() { 778 @Override 779 public FSDataInputStream next(final AbstractFileSystem fs, final Path p) 780 throws IOException, UnresolvedLinkException { 781 return fs.open(p); 782 } 783 }.resolve(this, absF); 784 } 785 786 /** 787 * Opens an FSDataInputStream at the indicated Path. 788 * 789 * @param f the file name to open 790 * @param bufferSize the size of the buffer to be used. 791 * 792 * @throws AccessControlException If access is denied 793 * @throws FileNotFoundException If file <code>f</code> does not exist 794 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 795 * not supported 796 * @throws IOException If an I/O error occurred 797 * 798 * Exceptions applicable to file systems accessed over RPC: 799 * @throws RpcClientException If an exception occurred in the RPC client 800 * @throws RpcServerException If an exception occurred in the RPC server 801 * @throws UnexpectedServerException If server implementation throws 802 * undeclared exception to RPC server 803 */ 804 public FSDataInputStream open(final Path f, final int bufferSize) 805 throws AccessControlException, FileNotFoundException, 806 UnsupportedFileSystemException, IOException { 807 final Path absF = fixRelativePart(f); 808 return new FSLinkResolver<FSDataInputStream>() { 809 @Override 810 public FSDataInputStream next(final AbstractFileSystem fs, final Path p) 811 throws IOException, UnresolvedLinkException { 812 return fs.open(p, bufferSize); 813 } 814 }.resolve(this, absF); 815 } 816 817 /** 818 * Set replication for an existing file. 819 * 820 * @param f file name 821 * @param replication new replication 822 * 823 * @return true if successful 824 * 825 * @throws AccessControlException If access is denied 826 * @throws FileNotFoundException If file <code>f</code> does not exist 827 * @throws IOException If an I/O error occurred 828 * 829 * Exceptions applicable to file systems accessed over RPC: 830 * @throws RpcClientException If an exception occurred in the RPC client 831 * @throws RpcServerException If an exception occurred in the RPC server 832 * @throws UnexpectedServerException If server implementation throws 833 * undeclared exception to RPC server 834 */ 835 public boolean setReplication(final Path f, final short replication) 836 throws AccessControlException, FileNotFoundException, 837 IOException { 838 final Path absF = fixRelativePart(f); 839 return new FSLinkResolver<Boolean>() { 840 @Override 841 public Boolean next(final AbstractFileSystem fs, final Path p) 842 throws IOException, UnresolvedLinkException { 843 return Boolean.valueOf(fs.setReplication(p, replication)); 844 } 845 }.resolve(this, absF); 846 } 847 848 /** 849 * Renames Path src to Path dst 850 * <ul> 851 * <li 852 * <li>Fails if src is a file and dst is a directory. 853 * <li>Fails if src is a directory and dst is a file. 854 * <li>Fails if the parent of dst does not exist or is a file. 855 * </ul> 856 * <p> 857 * If OVERWRITE option is not passed as an argument, rename fails if the dst 858 * already exists. 859 * <p> 860 * If OVERWRITE option is passed as an argument, rename overwrites the dst if 861 * it is a file or an empty directory. Rename fails if dst is a non-empty 862 * directory. 863 * <p> 864 * Note that atomicity of rename is dependent on the file system 865 * implementation. Please refer to the file system documentation for details 866 * <p> 867 * 868 * @param src path to be renamed 869 * @param dst new path after rename 870 * 871 * @throws AccessControlException If access is denied 872 * @throws FileAlreadyExistsException If <code>dst</code> already exists and 873 * <code>options</options> has {@link Options.Rename#OVERWRITE} 874 * option false. 875 * @throws FileNotFoundException If <code>src</code> does not exist 876 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not a 877 * directory 878 * @throws UnsupportedFileSystemException If file system for <code>src</code> 879 * and <code>dst</code> is not supported 880 * @throws IOException If an I/O error occurred 881 * 882 * Exceptions applicable to file systems accessed over RPC: 883 * @throws RpcClientException If an exception occurred in the RPC client 884 * @throws RpcServerException If an exception occurred in the RPC server 885 * @throws UnexpectedServerException If server implementation throws 886 * undeclared exception to RPC server 887 */ 888 public void rename(final Path src, final Path dst, 889 final Options.Rename... options) throws AccessControlException, 890 FileAlreadyExistsException, FileNotFoundException, 891 ParentNotDirectoryException, UnsupportedFileSystemException, 892 IOException { 893 final Path absSrc = fixRelativePart(src); 894 final Path absDst = fixRelativePart(dst); 895 AbstractFileSystem srcFS = getFSofPath(absSrc); 896 AbstractFileSystem dstFS = getFSofPath(absDst); 897 if(!srcFS.getUri().equals(dstFS.getUri())) { 898 throw new IOException("Renames across AbstractFileSystems not supported"); 899 } 900 try { 901 srcFS.rename(absSrc, absDst, options); 902 } catch (UnresolvedLinkException e) { 903 /* We do not know whether the source or the destination path 904 * was unresolved. Resolve the source path up until the final 905 * path component, then fully resolve the destination. 906 */ 907 final Path source = resolveIntermediate(absSrc); 908 new FSLinkResolver<Void>() { 909 @Override 910 public Void next(final AbstractFileSystem fs, final Path p) 911 throws IOException, UnresolvedLinkException { 912 fs.rename(source, p, options); 913 return null; 914 } 915 }.resolve(this, absDst); 916 } 917 } 918 919 /** 920 * Set permission of a path. 921 * @param f 922 * @param permission - the new absolute permission (umask is not applied) 923 * 924 * @throws AccessControlException If access is denied 925 * @throws FileNotFoundException If <code>f</code> does not exist 926 * @throws UnsupportedFileSystemException If file system for <code>f</code> 927 * is not supported 928 * @throws IOException If an I/O error occurred 929 * 930 * Exceptions applicable to file systems accessed over RPC: 931 * @throws RpcClientException If an exception occurred in the RPC client 932 * @throws RpcServerException If an exception occurred in the RPC server 933 * @throws UnexpectedServerException If server implementation throws 934 * undeclared exception to RPC server 935 */ 936 public void setPermission(final Path f, final FsPermission permission) 937 throws AccessControlException, FileNotFoundException, 938 UnsupportedFileSystemException, IOException { 939 final Path absF = fixRelativePart(f); 940 new FSLinkResolver<Void>() { 941 @Override 942 public Void next(final AbstractFileSystem fs, final Path p) 943 throws IOException, UnresolvedLinkException { 944 fs.setPermission(p, permission); 945 return null; 946 } 947 }.resolve(this, absF); 948 } 949 950 /** 951 * Set owner of a path (i.e. a file or a directory). The parameters username 952 * and groupname cannot both be null. 953 * 954 * @param f The path 955 * @param username If it is null, the original username remains unchanged. 956 * @param groupname If it is null, the original groupname remains unchanged. 957 * 958 * @throws AccessControlException If access is denied 959 * @throws FileNotFoundException If <code>f</code> does not exist 960 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 961 * not supported 962 * @throws IOException If an I/O error occurred 963 * 964 * Exceptions applicable to file systems accessed over RPC: 965 * @throws RpcClientException If an exception occurred in the RPC client 966 * @throws RpcServerException If an exception occurred in the RPC server 967 * @throws UnexpectedServerException If server implementation throws 968 * undeclared exception to RPC server 969 * 970 * RuntimeExceptions: 971 * @throws HadoopIllegalArgumentException If <code>username</code> or 972 * <code>groupname</code> is invalid. 973 */ 974 public void setOwner(final Path f, final String username, 975 final String groupname) throws AccessControlException, 976 UnsupportedFileSystemException, FileNotFoundException, 977 IOException { 978 if ((username == null) && (groupname == null)) { 979 throw new HadoopIllegalArgumentException( 980 "username and groupname cannot both be null"); 981 } 982 final Path absF = fixRelativePart(f); 983 new FSLinkResolver<Void>() { 984 @Override 985 public Void next(final AbstractFileSystem fs, final Path p) 986 throws IOException, UnresolvedLinkException { 987 fs.setOwner(p, username, groupname); 988 return null; 989 } 990 }.resolve(this, absF); 991 } 992 993 /** 994 * Set access time of a file. 995 * @param f The path 996 * @param mtime Set the modification time of this file. 997 * The number of milliseconds since epoch (Jan 1, 1970). 998 * A value of -1 means that this call should not set modification time. 999 * @param atime Set the access time of this file. 1000 * The number of milliseconds since Jan 1, 1970. 1001 * A value of -1 means that this call should not set access time. 1002 * 1003 * @throws AccessControlException If access is denied 1004 * @throws FileNotFoundException If <code>f</code> does not exist 1005 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1006 * not supported 1007 * @throws IOException If an I/O error occurred 1008 * 1009 * Exceptions applicable to file systems accessed over RPC: 1010 * @throws RpcClientException If an exception occurred in the RPC client 1011 * @throws RpcServerException If an exception occurred in the RPC server 1012 * @throws UnexpectedServerException If server implementation throws 1013 * undeclared exception to RPC server 1014 */ 1015 public void setTimes(final Path f, final long mtime, final long atime) 1016 throws AccessControlException, FileNotFoundException, 1017 UnsupportedFileSystemException, IOException { 1018 final Path absF = fixRelativePart(f); 1019 new FSLinkResolver<Void>() { 1020 @Override 1021 public Void next(final AbstractFileSystem fs, final Path p) 1022 throws IOException, UnresolvedLinkException { 1023 fs.setTimes(p, mtime, atime); 1024 return null; 1025 } 1026 }.resolve(this, absF); 1027 } 1028 1029 /** 1030 * Get the checksum of a file. 1031 * 1032 * @param f file path 1033 * 1034 * @return The file checksum. The default return value is null, 1035 * which indicates that no checksum algorithm is implemented 1036 * in the corresponding FileSystem. 1037 * 1038 * @throws AccessControlException If access is denied 1039 * @throws FileNotFoundException If <code>f</code> does not exist 1040 * @throws IOException If an I/O error occurred 1041 * 1042 * Exceptions applicable to file systems accessed over RPC: 1043 * @throws RpcClientException If an exception occurred in the RPC client 1044 * @throws RpcServerException If an exception occurred in the RPC server 1045 * @throws UnexpectedServerException If server implementation throws 1046 * undeclared exception to RPC server 1047 */ 1048 public FileChecksum getFileChecksum(final Path f) 1049 throws AccessControlException, FileNotFoundException, 1050 IOException { 1051 final Path absF = fixRelativePart(f); 1052 return new FSLinkResolver<FileChecksum>() { 1053 @Override 1054 public FileChecksum next(final AbstractFileSystem fs, final Path p) 1055 throws IOException, UnresolvedLinkException { 1056 return fs.getFileChecksum(p); 1057 } 1058 }.resolve(this, absF); 1059 } 1060 1061 /** 1062 * Set the verify checksum flag for the file system denoted by the path. 1063 * This is only applicable if the 1064 * corresponding FileSystem supports checksum. By default doesn't do anything. 1065 * @param verifyChecksum 1066 * @param f set the verifyChecksum for the Filesystem containing this path 1067 * 1068 * @throws AccessControlException If access is denied 1069 * @throws FileNotFoundException If <code>f</code> does not exist 1070 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1071 * not supported 1072 * @throws IOException If an I/O error occurred 1073 * 1074 * Exceptions applicable to file systems accessed over RPC: 1075 * @throws RpcClientException If an exception occurred in the RPC client 1076 * @throws RpcServerException If an exception occurred in the RPC server 1077 * @throws UnexpectedServerException If server implementation throws 1078 * undeclared exception to RPC server 1079 */ 1080 public void setVerifyChecksum(final boolean verifyChecksum, final Path f) 1081 throws AccessControlException, FileNotFoundException, 1082 UnsupportedFileSystemException, IOException { 1083 final Path absF = resolve(fixRelativePart(f)); 1084 getFSofPath(absF).setVerifyChecksum(verifyChecksum); 1085 } 1086 1087 /** 1088 * Return a file status object that represents the path. 1089 * @param f The path we want information from 1090 * 1091 * @return a FileStatus object 1092 * 1093 * @throws AccessControlException If access is denied 1094 * @throws FileNotFoundException If <code>f</code> does not exist 1095 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1096 * not supported 1097 * @throws IOException If an I/O error occurred 1098 * 1099 * Exceptions applicable to file systems accessed over RPC: 1100 * @throws RpcClientException If an exception occurred in the RPC client 1101 * @throws RpcServerException If an exception occurred in the RPC server 1102 * @throws UnexpectedServerException If server implementation throws 1103 * undeclared exception to RPC server 1104 */ 1105 public FileStatus getFileStatus(final Path f) throws AccessControlException, 1106 FileNotFoundException, UnsupportedFileSystemException, IOException { 1107 final Path absF = fixRelativePart(f); 1108 return new FSLinkResolver<FileStatus>() { 1109 @Override 1110 public FileStatus next(final AbstractFileSystem fs, final Path p) 1111 throws IOException, UnresolvedLinkException { 1112 return fs.getFileStatus(p); 1113 } 1114 }.resolve(this, absF); 1115 } 1116 1117 /** 1118 * Return a file status object that represents the path. If the path 1119 * refers to a symlink then the FileStatus of the symlink is returned. 1120 * The behavior is equivalent to #getFileStatus() if the underlying 1121 * file system does not support symbolic links. 1122 * @param f The path we want information from. 1123 * @return A FileStatus object 1124 * 1125 * @throws AccessControlException If access is denied 1126 * @throws FileNotFoundException If <code>f</code> does not exist 1127 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1128 * not supported 1129 * @throws IOException If an I/O error occurred 1130 */ 1131 public FileStatus getFileLinkStatus(final Path f) 1132 throws AccessControlException, FileNotFoundException, 1133 UnsupportedFileSystemException, IOException { 1134 final Path absF = fixRelativePart(f); 1135 return new FSLinkResolver<FileStatus>() { 1136 @Override 1137 public FileStatus next(final AbstractFileSystem fs, final Path p) 1138 throws IOException, UnresolvedLinkException { 1139 FileStatus fi = fs.getFileLinkStatus(p); 1140 if (fi.isSymlink()) { 1141 fi.setSymlink(FSLinkResolver.qualifySymlinkTarget(fs.getUri(), p, 1142 fi.getSymlink())); 1143 } 1144 return fi; 1145 } 1146 }.resolve(this, absF); 1147 } 1148 1149 /** 1150 * Returns the target of the given symbolic link as it was specified 1151 * when the link was created. Links in the path leading up to the 1152 * final path component are resolved transparently. 1153 * 1154 * @param f the path to return the target of 1155 * @return The un-interpreted target of the symbolic link. 1156 * 1157 * @throws AccessControlException If access is denied 1158 * @throws FileNotFoundException If path <code>f</code> does not exist 1159 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1160 * not supported 1161 * @throws IOException If the given path does not refer to a symlink 1162 * or an I/O error occurred 1163 */ 1164 public Path getLinkTarget(final Path f) throws AccessControlException, 1165 FileNotFoundException, UnsupportedFileSystemException, IOException { 1166 final Path absF = fixRelativePart(f); 1167 return new FSLinkResolver<Path>() { 1168 @Override 1169 public Path next(final AbstractFileSystem fs, final Path p) 1170 throws IOException, UnresolvedLinkException { 1171 FileStatus fi = fs.getFileLinkStatus(p); 1172 return fi.getSymlink(); 1173 } 1174 }.resolve(this, absF); 1175 } 1176 1177 /** 1178 * Return blockLocation of the given file for the given offset and len. 1179 * For a nonexistent file or regions, null will be returned. 1180 * 1181 * This call is most helpful with DFS, where it returns 1182 * hostnames of machines that contain the given file. 1183 * 1184 * @param f - get blocklocations of this file 1185 * @param start position (byte offset) 1186 * @param len (in bytes) 1187 * 1188 * @return block locations for given file at specified offset of len 1189 * 1190 * @throws AccessControlException If access is denied 1191 * @throws FileNotFoundException If <code>f</code> does not exist 1192 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1193 * not supported 1194 * @throws IOException If an I/O error occurred 1195 * 1196 * Exceptions applicable to file systems accessed over RPC: 1197 * @throws RpcClientException If an exception occurred in the RPC client 1198 * @throws RpcServerException If an exception occurred in the RPC server 1199 * @throws UnexpectedServerException If server implementation throws 1200 * undeclared exception to RPC server 1201 * 1202 * RuntimeExceptions: 1203 * @throws InvalidPathException If path <code>f</code> is invalid 1204 */ 1205 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 1206 @InterfaceStability.Evolving 1207 public BlockLocation[] getFileBlockLocations(final Path f, final long start, 1208 final long len) throws AccessControlException, FileNotFoundException, 1209 UnsupportedFileSystemException, IOException { 1210 final Path absF = fixRelativePart(f); 1211 return new FSLinkResolver<BlockLocation[]>() { 1212 @Override 1213 public BlockLocation[] next(final AbstractFileSystem fs, final Path p) 1214 throws IOException, UnresolvedLinkException { 1215 return fs.getFileBlockLocations(p, start, len); 1216 } 1217 }.resolve(this, absF); 1218 } 1219 1220 /** 1221 * Returns a status object describing the use and capacity of the 1222 * file system denoted by the Parh argument p. 1223 * If the file system has multiple partitions, the 1224 * use and capacity of the partition pointed to by the specified 1225 * path is reflected. 1226 * 1227 * @param f Path for which status should be obtained. null means the 1228 * root partition of the default file system. 1229 * 1230 * @return a FsStatus object 1231 * 1232 * @throws AccessControlException If access is denied 1233 * @throws FileNotFoundException If <code>f</code> does not exist 1234 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1235 * not supported 1236 * @throws IOException If an I/O error occurred 1237 * 1238 * Exceptions applicable to file systems accessed over RPC: 1239 * @throws RpcClientException If an exception occurred in the RPC client 1240 * @throws RpcServerException If an exception occurred in the RPC server 1241 * @throws UnexpectedServerException If server implementation throws 1242 * undeclared exception to RPC server 1243 */ 1244 public FsStatus getFsStatus(final Path f) throws AccessControlException, 1245 FileNotFoundException, UnsupportedFileSystemException, IOException { 1246 if (f == null) { 1247 return defaultFS.getFsStatus(); 1248 } 1249 final Path absF = fixRelativePart(f); 1250 return new FSLinkResolver<FsStatus>() { 1251 @Override 1252 public FsStatus next(final AbstractFileSystem fs, final Path p) 1253 throws IOException, UnresolvedLinkException { 1254 return fs.getFsStatus(p); 1255 } 1256 }.resolve(this, absF); 1257 } 1258 1259 /** 1260 * Creates a symbolic link to an existing file. An exception is thrown if 1261 * the symlink exits, the user does not have permission to create symlink, 1262 * or the underlying file system does not support symlinks. 1263 * 1264 * Symlink permissions are ignored, access to a symlink is determined by 1265 * the permissions of the symlink target. 1266 * 1267 * Symlinks in paths leading up to the final path component are resolved 1268 * transparently. If the final path component refers to a symlink some 1269 * functions operate on the symlink itself, these are: 1270 * - delete(f) and deleteOnExit(f) - Deletes the symlink. 1271 * - rename(src, dst) - If src refers to a symlink, the symlink is 1272 * renamed. If dst refers to a symlink, the symlink is over-written. 1273 * - getLinkTarget(f) - Returns the target of the symlink. 1274 * - getFileLinkStatus(f) - Returns a FileStatus object describing 1275 * the symlink. 1276 * Some functions, create() and mkdir(), expect the final path component 1277 * does not exist. If they are given a path that refers to a symlink that 1278 * does exist they behave as if the path referred to an existing file or 1279 * directory. All other functions fully resolve, ie follow, the symlink. 1280 * These are: open, setReplication, setOwner, setTimes, setWorkingDirectory, 1281 * setPermission, getFileChecksum, setVerifyChecksum, getFileBlockLocations, 1282 * getFsStatus, getFileStatus, exists, and listStatus. 1283 * 1284 * Symlink targets are stored as given to createSymlink, assuming the 1285 * underlying file system is capable of storing a fully qualified URI. 1286 * Dangling symlinks are permitted. FileContext supports four types of 1287 * symlink targets, and resolves them as follows 1288 * <pre> 1289 * Given a path referring to a symlink of form: 1290 * 1291 * <---X---> 1292 * fs://host/A/B/link 1293 * <-----Y-----> 1294 * 1295 * In this path X is the scheme and authority that identify the file system, 1296 * and Y is the path leading up to the final path component "link". If Y is 1297 * a symlink itself then let Y' be the target of Y and X' be the scheme and 1298 * authority of Y'. Symlink targets may: 1299 * 1300 * 1. Fully qualified URIs 1301 * 1302 * fs://hostX/A/B/file Resolved according to the target file system. 1303 * 1304 * 2. Partially qualified URIs (eg scheme but no host) 1305 * 1306 * fs:///A/B/file Resolved according to the target file system. Eg resolving 1307 * a symlink to hdfs:///A results in an exception because 1308 * HDFS URIs must be fully qualified, while a symlink to 1309 * file:///A will not since Hadoop's local file systems 1310 * require partially qualified URIs. 1311 * 1312 * 3. Relative paths 1313 * 1314 * path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path 1315 * is "../B/file" then [Y'][path] is hdfs://host/B/file 1316 * 1317 * 4. Absolute paths 1318 * 1319 * path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path 1320 * is "/file" then [X][path] is hdfs://host/file 1321 * </pre> 1322 * 1323 * @param target the target of the symbolic link 1324 * @param link the path to be created that points to target 1325 * @param createParent if true then missing parent dirs are created if 1326 * false then parent must exist 1327 * 1328 * 1329 * @throws AccessControlException If access is denied 1330 * @throws FileAlreadyExistsException If file <code>linkcode> already exists 1331 * @throws FileNotFoundException If <code>target</code> does not exist 1332 * @throws ParentNotDirectoryException If parent of <code>link</code> is not a 1333 * directory. 1334 * @throws UnsupportedFileSystemException If file system for 1335 * <code>target</code> or <code>link</code> is not supported 1336 * @throws IOException If an I/O error occurred 1337 */ 1338 public void createSymlink(final Path target, final Path link, 1339 final boolean createParent) throws AccessControlException, 1340 FileAlreadyExistsException, FileNotFoundException, 1341 ParentNotDirectoryException, UnsupportedFileSystemException, 1342 IOException { 1343 final Path nonRelLink = fixRelativePart(link); 1344 new FSLinkResolver<Void>() { 1345 @Override 1346 public Void next(final AbstractFileSystem fs, final Path p) 1347 throws IOException, UnresolvedLinkException { 1348 fs.createSymlink(target, p, createParent); 1349 return null; 1350 } 1351 }.resolve(this, nonRelLink); 1352 } 1353 1354 /** 1355 * List the statuses of the files/directories in the given path if the path is 1356 * a directory. 1357 * 1358 * @param f is the path 1359 * 1360 * @return an iterator that traverses statuses of the files/directories 1361 * in the given path 1362 * 1363 * @throws AccessControlException If access is denied 1364 * @throws FileNotFoundException If <code>f</code> does not exist 1365 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1366 * not supported 1367 * @throws IOException If an I/O error occurred 1368 * 1369 * Exceptions applicable to file systems accessed over RPC: 1370 * @throws RpcClientException If an exception occurred in the RPC client 1371 * @throws RpcServerException If an exception occurred in the RPC server 1372 * @throws UnexpectedServerException If server implementation throws 1373 * undeclared exception to RPC server 1374 */ 1375 public RemoteIterator<FileStatus> listStatus(final Path f) throws 1376 AccessControlException, FileNotFoundException, 1377 UnsupportedFileSystemException, IOException { 1378 final Path absF = fixRelativePart(f); 1379 return new FSLinkResolver<RemoteIterator<FileStatus>>() { 1380 @Override 1381 public RemoteIterator<FileStatus> next( 1382 final AbstractFileSystem fs, final Path p) 1383 throws IOException, UnresolvedLinkException { 1384 return fs.listStatusIterator(p); 1385 } 1386 }.resolve(this, absF); 1387 } 1388 1389 /** 1390 * @return an iterator over the corrupt files under the given path 1391 * (may contain duplicates if a file has more than one corrupt block) 1392 * @throws IOException 1393 */ 1394 public RemoteIterator<Path> listCorruptFileBlocks(Path path) 1395 throws IOException { 1396 final Path absF = fixRelativePart(path); 1397 return new FSLinkResolver<RemoteIterator<Path>>() { 1398 @Override 1399 public RemoteIterator<Path> next(final AbstractFileSystem fs, 1400 final Path p) 1401 throws IOException, UnresolvedLinkException { 1402 return fs.listCorruptFileBlocks(p); 1403 } 1404 }.resolve(this, absF); 1405 } 1406 1407 /** 1408 * List the statuses of the files/directories in the given path if the path is 1409 * a directory. 1410 * Return the file's status and block locations If the path is a file. 1411 * 1412 * If a returned status is a file, it contains the file's block locations. 1413 * 1414 * @param f is the path 1415 * 1416 * @return an iterator that traverses statuses of the files/directories 1417 * in the given path 1418 * If any IO exception (for example the input directory gets deleted while 1419 * listing is being executed), next() or hasNext() of the returned iterator 1420 * may throw a RuntimeException with the io exception as the cause. 1421 * 1422 * @throws AccessControlException If access is denied 1423 * @throws FileNotFoundException If <code>f</code> does not exist 1424 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1425 * not supported 1426 * @throws IOException If an I/O error occurred 1427 * 1428 * Exceptions applicable to file systems accessed over RPC: 1429 * @throws RpcClientException If an exception occurred in the RPC client 1430 * @throws RpcServerException If an exception occurred in the RPC server 1431 * @throws UnexpectedServerException If server implementation throws 1432 * undeclared exception to RPC server 1433 */ 1434 public RemoteIterator<LocatedFileStatus> listLocatedStatus( 1435 final Path f) throws 1436 AccessControlException, FileNotFoundException, 1437 UnsupportedFileSystemException, IOException { 1438 final Path absF = fixRelativePart(f); 1439 return new FSLinkResolver<RemoteIterator<LocatedFileStatus>>() { 1440 @Override 1441 public RemoteIterator<LocatedFileStatus> next( 1442 final AbstractFileSystem fs, final Path p) 1443 throws IOException, UnresolvedLinkException { 1444 return fs.listLocatedStatus(p); 1445 } 1446 }.resolve(this, absF); 1447 } 1448 1449 /** 1450 * Mark a path to be deleted on JVM shutdown. 1451 * 1452 * @param f the existing path to delete. 1453 * 1454 * @return true if deleteOnExit is successful, otherwise false. 1455 * 1456 * @throws AccessControlException If access is denied 1457 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1458 * not supported 1459 * @throws IOException If an I/O error occurred 1460 * 1461 * Exceptions applicable to file systems accessed over RPC: 1462 * @throws RpcClientException If an exception occurred in the RPC client 1463 * @throws RpcServerException If an exception occurred in the RPC server 1464 * @throws UnexpectedServerException If server implementation throws 1465 * undeclared exception to RPC server 1466 */ 1467 public boolean deleteOnExit(Path f) throws AccessControlException, 1468 IOException { 1469 if (!this.util().exists(f)) { 1470 return false; 1471 } 1472 synchronized (DELETE_ON_EXIT) { 1473 if (DELETE_ON_EXIT.isEmpty()) { 1474 ShutdownHookManager.get().addShutdownHook(FINALIZER, SHUTDOWN_HOOK_PRIORITY); 1475 } 1476 1477 Set<Path> set = DELETE_ON_EXIT.get(this); 1478 if (set == null) { 1479 set = new TreeSet<Path>(); 1480 DELETE_ON_EXIT.put(this, set); 1481 } 1482 set.add(f); 1483 } 1484 return true; 1485 } 1486 1487 private final Util util; 1488 public Util util() { 1489 return util; 1490 } 1491 1492 1493 /** 1494 * Utility/library methods built over the basic FileContext methods. 1495 * Since this are library functions, the oprtation are not atomic 1496 * and some of them may partially complete if other threads are making 1497 * changes to the same part of the name space. 1498 */ 1499 public class Util { 1500 /** 1501 * Does the file exist? 1502 * Note: Avoid using this method if you already have FileStatus in hand. 1503 * Instead reuse the FileStatus 1504 * @param f the file or dir to be checked 1505 * 1506 * @throws AccessControlException If access is denied 1507 * @throws IOException If an I/O error occurred 1508 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1509 * not supported 1510 * 1511 * Exceptions applicable to file systems accessed over RPC: 1512 * @throws RpcClientException If an exception occurred in the RPC client 1513 * @throws RpcServerException If an exception occurred in the RPC server 1514 * @throws UnexpectedServerException If server implementation throws 1515 * undeclared exception to RPC server 1516 */ 1517 public boolean exists(final Path f) throws AccessControlException, 1518 UnsupportedFileSystemException, IOException { 1519 try { 1520 FileStatus fs = FileContext.this.getFileStatus(f); 1521 assert fs != null; 1522 return true; 1523 } catch (FileNotFoundException e) { 1524 return false; 1525 } 1526 } 1527 1528 /** 1529 * Return a list of file status objects that corresponds to supplied paths 1530 * excluding those non-existent paths. 1531 * 1532 * @param paths list of paths we want information from 1533 * 1534 * @return a list of FileStatus objects 1535 * 1536 * @throws AccessControlException If access is denied 1537 * @throws IOException If an I/O error occurred 1538 * 1539 * Exceptions applicable to file systems accessed over RPC: 1540 * @throws RpcClientException If an exception occurred in the RPC client 1541 * @throws RpcServerException If an exception occurred in the RPC server 1542 * @throws UnexpectedServerException If server implementation throws 1543 * undeclared exception to RPC server 1544 */ 1545 private FileStatus[] getFileStatus(Path[] paths) 1546 throws AccessControlException, IOException { 1547 if (paths == null) { 1548 return null; 1549 } 1550 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length); 1551 for (int i = 0; i < paths.length; i++) { 1552 try { 1553 results.add(FileContext.this.getFileStatus(paths[i])); 1554 } catch (FileNotFoundException fnfe) { 1555 // ignoring 1556 } 1557 } 1558 return results.toArray(new FileStatus[results.size()]); 1559 } 1560 1561 1562 /** 1563 * Return the {@link ContentSummary} of path f. 1564 * @param f path 1565 * 1566 * @return the {@link ContentSummary} of path f. 1567 * 1568 * @throws AccessControlException If access is denied 1569 * @throws FileNotFoundException If <code>f</code> does not exist 1570 * @throws UnsupportedFileSystemException If file system for 1571 * <code>f</code> is not supported 1572 * @throws IOException If an I/O error occurred 1573 * 1574 * Exceptions applicable to file systems accessed over RPC: 1575 * @throws RpcClientException If an exception occurred in the RPC client 1576 * @throws RpcServerException If an exception occurred in the RPC server 1577 * @throws UnexpectedServerException If server implementation throws 1578 * undeclared exception to RPC server 1579 */ 1580 public ContentSummary getContentSummary(Path f) 1581 throws AccessControlException, FileNotFoundException, 1582 UnsupportedFileSystemException, IOException { 1583 FileStatus status = FileContext.this.getFileStatus(f); 1584 if (status.isFile()) { 1585 return new ContentSummary(status.getLen(), 1, 0); 1586 } 1587 long[] summary = {0, 0, 1}; 1588 RemoteIterator<FileStatus> statusIterator = 1589 FileContext.this.listStatus(f); 1590 while(statusIterator.hasNext()) { 1591 FileStatus s = statusIterator.next(); 1592 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : 1593 new ContentSummary(s.getLen(), 1, 0); 1594 summary[0] += c.getLength(); 1595 summary[1] += c.getFileCount(); 1596 summary[2] += c.getDirectoryCount(); 1597 } 1598 return new ContentSummary(summary[0], summary[1], summary[2]); 1599 } 1600 1601 /** 1602 * See {@link #listStatus(Path[], PathFilter)} 1603 */ 1604 public FileStatus[] listStatus(Path[] files) throws AccessControlException, 1605 FileNotFoundException, IOException { 1606 return listStatus(files, DEFAULT_FILTER); 1607 } 1608 1609 /** 1610 * Filter files/directories in the given path using the user-supplied path 1611 * filter. 1612 * 1613 * @param f is the path name 1614 * @param filter is the user-supplied path filter 1615 * 1616 * @return an array of FileStatus objects for the files under the given path 1617 * after applying the filter 1618 * 1619 * @throws AccessControlException If access is denied 1620 * @throws FileNotFoundException If <code>f</code> does not exist 1621 * @throws UnsupportedFileSystemException If file system for 1622 * <code>pathPattern</code> is not supported 1623 * @throws IOException If an I/O error occurred 1624 * 1625 * Exceptions applicable to file systems accessed over RPC: 1626 * @throws RpcClientException If an exception occurred in the RPC client 1627 * @throws RpcServerException If an exception occurred in the RPC server 1628 * @throws UnexpectedServerException If server implementation throws 1629 * undeclared exception to RPC server 1630 */ 1631 public FileStatus[] listStatus(Path f, PathFilter filter) 1632 throws AccessControlException, FileNotFoundException, 1633 UnsupportedFileSystemException, IOException { 1634 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1635 listStatus(results, f, filter); 1636 return results.toArray(new FileStatus[results.size()]); 1637 } 1638 1639 /** 1640 * Filter files/directories in the given list of paths using user-supplied 1641 * path filter. 1642 * 1643 * @param files is a list of paths 1644 * @param filter is the filter 1645 * 1646 * @return a list of statuses for the files under the given paths after 1647 * applying the filter 1648 * 1649 * @throws AccessControlException If access is denied 1650 * @throws FileNotFoundException If a file in <code>files</code> does not 1651 * exist 1652 * @throws IOException If an I/O error occurred 1653 * 1654 * Exceptions applicable to file systems accessed over RPC: 1655 * @throws RpcClientException If an exception occurred in the RPC client 1656 * @throws RpcServerException If an exception occurred in the RPC server 1657 * @throws UnexpectedServerException If server implementation throws 1658 * undeclared exception to RPC server 1659 */ 1660 public FileStatus[] listStatus(Path[] files, PathFilter filter) 1661 throws AccessControlException, FileNotFoundException, IOException { 1662 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1663 for (int i = 0; i < files.length; i++) { 1664 listStatus(results, files[i], filter); 1665 } 1666 return results.toArray(new FileStatus[results.size()]); 1667 } 1668 1669 /* 1670 * Filter files/directories in the given path using the user-supplied path 1671 * filter. Results are added to the given array <code>results</code>. 1672 */ 1673 private void listStatus(ArrayList<FileStatus> results, Path f, 1674 PathFilter filter) throws AccessControlException, 1675 FileNotFoundException, IOException { 1676 FileStatus[] listing = listStatus(f); 1677 if (listing != null) { 1678 for (int i = 0; i < listing.length; i++) { 1679 if (filter.accept(listing[i].getPath())) { 1680 results.add(listing[i]); 1681 } 1682 } 1683 } 1684 } 1685 1686 /** 1687 * List the statuses of the files/directories in the given path 1688 * if the path is a directory. 1689 * 1690 * @param f is the path 1691 * 1692 * @return an array that contains statuses of the files/directories 1693 * in the given path 1694 * 1695 * @throws AccessControlException If access is denied 1696 * @throws FileNotFoundException If <code>f</code> does not exist 1697 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1698 * not supported 1699 * @throws IOException If an I/O error occurred 1700 * 1701 * Exceptions applicable to file systems accessed over RPC: 1702 * @throws RpcClientException If an exception occurred in the RPC client 1703 * @throws RpcServerException If an exception occurred in the RPC server 1704 * @throws UnexpectedServerException If server implementation throws 1705 * undeclared exception to RPC server 1706 */ 1707 public FileStatus[] listStatus(final Path f) throws AccessControlException, 1708 FileNotFoundException, UnsupportedFileSystemException, 1709 IOException { 1710 final Path absF = fixRelativePart(f); 1711 return new FSLinkResolver<FileStatus[]>() { 1712 @Override 1713 public FileStatus[] next(final AbstractFileSystem fs, final Path p) 1714 throws IOException, UnresolvedLinkException { 1715 return fs.listStatus(p); 1716 } 1717 }.resolve(FileContext.this, absF); 1718 } 1719 1720 /** 1721 * List the statuses and block locations of the files in the given path. 1722 * 1723 * If the path is a directory, 1724 * if recursive is false, returns files in the directory; 1725 * if recursive is true, return files in the subtree rooted at the path. 1726 * The subtree is traversed in the depth-first order. 1727 * If the path is a file, return the file's status and block locations. 1728 * Files across symbolic links are also returned. 1729 * 1730 * @param f is the path 1731 * @param recursive if the subdirectories need to be traversed recursively 1732 * 1733 * @return an iterator that traverses statuses of the files 1734 * If any IO exception (for example a sub-directory gets deleted while 1735 * listing is being executed), next() or hasNext() of the returned iterator 1736 * may throw a RuntimeException with the IO exception as the cause. 1737 * 1738 * @throws AccessControlException If access is denied 1739 * @throws FileNotFoundException If <code>f</code> does not exist 1740 * @throws UnsupportedFileSystemException If file system for <code>f</code> 1741 * is not supported 1742 * @throws IOException If an I/O error occurred 1743 * 1744 * Exceptions applicable to file systems accessed over RPC: 1745 * @throws RpcClientException If an exception occurred in the RPC client 1746 * @throws RpcServerException If an exception occurred in the RPC server 1747 * @throws UnexpectedServerException If server implementation throws 1748 * undeclared exception to RPC server 1749 */ 1750 public RemoteIterator<LocatedFileStatus> listFiles( 1751 final Path f, final boolean recursive) throws AccessControlException, 1752 FileNotFoundException, UnsupportedFileSystemException, 1753 IOException { 1754 return new RemoteIterator<LocatedFileStatus>() { 1755 private Stack<RemoteIterator<LocatedFileStatus>> itors = 1756 new Stack<RemoteIterator<LocatedFileStatus>>(); 1757 RemoteIterator<LocatedFileStatus> curItor = listLocatedStatus(f); 1758 LocatedFileStatus curFile; 1759 1760 /** 1761 * Returns <tt>true</tt> if the iterator has more files. 1762 * 1763 * @return <tt>true</tt> if the iterator has more files. 1764 * @throws AccessControlException if not allowed to access next 1765 * file's status or locations 1766 * @throws FileNotFoundException if next file does not exist any more 1767 * @throws UnsupportedFileSystemException if next file's 1768 * fs is unsupported 1769 * @throws IOException for all other IO errors 1770 * for example, NameNode is not avaialbe or 1771 * NameNode throws IOException due to an error 1772 * while getting the status or block locations 1773 */ 1774 @Override 1775 public boolean hasNext() throws IOException { 1776 while (curFile == null) { 1777 if (curItor.hasNext()) { 1778 handleFileStat(curItor.next()); 1779 } else if (!itors.empty()) { 1780 curItor = itors.pop(); 1781 } else { 1782 return false; 1783 } 1784 } 1785 return true; 1786 } 1787 1788 /** 1789 * Process the input stat. 1790 * If it is a file, return the file stat. 1791 * If it is a directory, traverse the directory if recursive is true; 1792 * ignore it if recursive is false. 1793 * If it is a symlink, resolve the symlink first and then process it 1794 * depending on if it is a file or directory. 1795 * @param stat input status 1796 * @throws AccessControlException if access is denied 1797 * @throws FileNotFoundException if file is not found 1798 * @throws UnsupportedFileSystemException if fs is not supported 1799 * @throws IOException for all other IO errors 1800 */ 1801 private void handleFileStat(LocatedFileStatus stat) 1802 throws IOException { 1803 if (stat.isFile()) { // file 1804 curFile = stat; 1805 } else if (stat.isSymlink()) { // symbolic link 1806 // resolve symbolic link 1807 FileStatus symstat = FileContext.this.getFileStatus( 1808 stat.getSymlink()); 1809 if (symstat.isFile() || (recursive && symstat.isDirectory())) { 1810 itors.push(curItor); 1811 curItor = listLocatedStatus(stat.getPath()); 1812 } 1813 } else if (recursive) { // directory 1814 itors.push(curItor); 1815 curItor = listLocatedStatus(stat.getPath()); 1816 } 1817 } 1818 1819 /** 1820 * Returns the next file's status with its block locations 1821 * 1822 * @throws AccessControlException if not allowed to access next 1823 * file's status or locations 1824 * @throws FileNotFoundException if next file does not exist any more 1825 * @throws UnsupportedFileSystemException if next file's 1826 * fs is unsupported 1827 * @throws IOException for all other IO errors 1828 * for example, NameNode is not avaialbe or 1829 * NameNode throws IOException due to an error 1830 * while getting the status or block locations 1831 */ 1832 @Override 1833 public LocatedFileStatus next() throws IOException { 1834 if (hasNext()) { 1835 LocatedFileStatus result = curFile; 1836 curFile = null; 1837 return result; 1838 } 1839 throw new java.util.NoSuchElementException("No more entry in " + f); 1840 } 1841 }; 1842 } 1843 1844 /** 1845 * <p>Return all the files that match filePattern and are not checksum 1846 * files. Results are sorted by their names. 1847 * 1848 * <p> 1849 * A filename pattern is composed of <i>regular</i> characters and 1850 * <i>special pattern matching</i> characters, which are: 1851 * 1852 * <dl> 1853 * <dd> 1854 * <dl> 1855 * <p> 1856 * <dt> <tt> ? </tt> 1857 * <dd> Matches any single character. 1858 * 1859 * <p> 1860 * <dt> <tt> * </tt> 1861 * <dd> Matches zero or more characters. 1862 * 1863 * <p> 1864 * <dt> <tt> [<i>abc</i>] </tt> 1865 * <dd> Matches a single character from character set 1866 * <tt>{<i>a,b,c</i>}</tt>. 1867 * 1868 * <p> 1869 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt> 1870 * <dd> Matches a single character from the character range 1871 * <tt>{<i>a...b</i>}</tt>. Note: character <tt><i>a</i></tt> must be 1872 * lexicographically less than or equal to character <tt><i>b</i></tt>. 1873 * 1874 * <p> 1875 * <dt> <tt> [^<i>a</i>] </tt> 1876 * <dd> Matches a single char that is not from character set or range 1877 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur 1878 * immediately to the right of the opening bracket. 1879 * 1880 * <p> 1881 * <dt> <tt> \<i>c</i> </tt> 1882 * <dd> Removes (escapes) any special meaning of character <i>c</i>. 1883 * 1884 * <p> 1885 * <dt> <tt> {ab,cd} </tt> 1886 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> 1887 * 1888 * <p> 1889 * <dt> <tt> {ab,c{de,fh}} </tt> 1890 * <dd> Matches a string from string set <tt>{<i>ab, cde, cfh</i>}</tt> 1891 * 1892 * </dl> 1893 * </dd> 1894 * </dl> 1895 * 1896 * @param pathPattern a regular expression specifying a pth pattern 1897 * 1898 * @return an array of paths that match the path pattern 1899 * 1900 * @throws AccessControlException If access is denied 1901 * @throws UnsupportedFileSystemException If file system for 1902 * <code>pathPattern</code> is not supported 1903 * @throws IOException If an I/O error occurred 1904 * 1905 * Exceptions applicable to file systems accessed over RPC: 1906 * @throws RpcClientException If an exception occurred in the RPC client 1907 * @throws RpcServerException If an exception occurred in the RPC server 1908 * @throws UnexpectedServerException If server implementation throws 1909 * undeclared exception to RPC server 1910 */ 1911 public FileStatus[] globStatus(Path pathPattern) 1912 throws AccessControlException, UnsupportedFileSystemException, 1913 IOException { 1914 return globStatus(pathPattern, DEFAULT_FILTER); 1915 } 1916 1917 /** 1918 * Return an array of FileStatus objects whose path names match pathPattern 1919 * and is accepted by the user-supplied path filter. Results are sorted by 1920 * their path names. 1921 * Return null if pathPattern has no glob and the path does not exist. 1922 * Return an empty array if pathPattern has a glob and no path matches it. 1923 * 1924 * @param pathPattern regular expression specifying the path pattern 1925 * @param filter user-supplied path filter 1926 * 1927 * @return an array of FileStatus objects 1928 * 1929 * @throws AccessControlException If access is denied 1930 * @throws UnsupportedFileSystemException If file system for 1931 * <code>pathPattern</code> is not supported 1932 * @throws IOException If an I/O error occurred 1933 * 1934 * Exceptions applicable to file systems accessed over RPC: 1935 * @throws RpcClientException If an exception occurred in the RPC client 1936 * @throws RpcServerException If an exception occurred in the RPC server 1937 * @throws UnexpectedServerException If server implementation throws 1938 * undeclared exception to RPC server 1939 */ 1940 public FileStatus[] globStatus(final Path pathPattern, 1941 final PathFilter filter) throws AccessControlException, 1942 UnsupportedFileSystemException, IOException { 1943 URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri(); 1944 1945 String filename = pathPattern.toUri().getPath(); 1946 1947 List<String> filePatterns = GlobExpander.expand(filename); 1948 if (filePatterns.size() == 1) { 1949 Path absPathPattern = fixRelativePart(pathPattern); 1950 return globStatusInternal(uri, new Path(absPathPattern.toUri() 1951 .getPath()), filter); 1952 } else { 1953 List<FileStatus> results = new ArrayList<FileStatus>(); 1954 for (String iFilePattern : filePatterns) { 1955 Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern)); 1956 FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter); 1957 for (FileStatus file : files) { 1958 results.add(file); 1959 } 1960 } 1961 return results.toArray(new FileStatus[results.size()]); 1962 } 1963 } 1964 1965 /** 1966 * 1967 * @param uri for all the inPathPattern 1968 * @param inPathPattern - without the scheme & authority (take from uri) 1969 * @param filter 1970 * 1971 * @return an array of FileStatus objects 1972 * 1973 * @throws AccessControlException If access is denied 1974 * @throws IOException If an I/O error occurred 1975 */ 1976 private FileStatus[] globStatusInternal(final URI uri, 1977 final Path inPathPattern, final PathFilter filter) 1978 throws AccessControlException, IOException 1979 { 1980 Path[] parents = new Path[1]; 1981 int level = 0; 1982 1983 assert(inPathPattern.toUri().getScheme() == null && 1984 inPathPattern.toUri().getAuthority() == null && 1985 inPathPattern.isUriPathAbsolute()); 1986 1987 1988 String filename = inPathPattern.toUri().getPath(); 1989 1990 // path has only zero component 1991 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) { 1992 Path p = inPathPattern.makeQualified(uri, null); 1993 return getFileStatus(new Path[]{p}); 1994 } 1995 1996 // path has at least one component 1997 String[] components = filename.split(Path.SEPARATOR); 1998 1999 // Path is absolute, first component is "/" hence first component 2000 // is the uri root 2001 parents[0] = new Path(new Path(uri), new Path("/")); 2002 level = 1; 2003 2004 // glob the paths that match the parent path, ie. [0, components.length-1] 2005 boolean[] hasGlob = new boolean[]{false}; 2006 Path[] relParentPaths = 2007 globPathsLevel(parents, components, level, hasGlob); 2008 FileStatus[] results; 2009 2010 if (relParentPaths == null || relParentPaths.length == 0) { 2011 results = null; 2012 } else { 2013 // fix the pathes to be abs 2014 Path[] parentPaths = new Path [relParentPaths.length]; 2015 for(int i=0; i<relParentPaths.length; i++) { 2016 parentPaths[i] = relParentPaths[i].makeQualified(uri, null); 2017 } 2018 2019 // Now work on the last component of the path 2020 GlobFilter fp = 2021 new GlobFilter(components[components.length - 1], filter); 2022 if (fp.hasPattern()) { // last component has a pattern 2023 // list parent directories and then glob the results 2024 try { 2025 results = listStatus(parentPaths, fp); 2026 } catch (FileNotFoundException e) { 2027 results = null; 2028 } 2029 hasGlob[0] = true; 2030 } else { // last component does not have a pattern 2031 // get all the path names 2032 ArrayList<Path> filteredPaths = 2033 new ArrayList<Path>(parentPaths.length); 2034 for (int i = 0; i < parentPaths.length; i++) { 2035 parentPaths[i] = new Path(parentPaths[i], 2036 components[components.length - 1]); 2037 if (fp.accept(parentPaths[i])) { 2038 filteredPaths.add(parentPaths[i]); 2039 } 2040 } 2041 // get all their statuses 2042 results = getFileStatus( 2043 filteredPaths.toArray(new Path[filteredPaths.size()])); 2044 } 2045 } 2046 2047 // Decide if the pathPattern contains a glob or not 2048 if (results == null) { 2049 if (hasGlob[0]) { 2050 results = new FileStatus[0]; 2051 } 2052 } else { 2053 if (results.length == 0) { 2054 if (!hasGlob[0]) { 2055 results = null; 2056 } 2057 } else { 2058 Arrays.sort(results); 2059 } 2060 } 2061 return results; 2062 } 2063 2064 /* 2065 * For a path of N components, return a list of paths that match the 2066 * components [<code>level</code>, <code>N-1</code>]. 2067 */ 2068 private Path[] globPathsLevel(Path[] parents, String[] filePattern, 2069 int level, boolean[] hasGlob) throws AccessControlException, 2070 FileNotFoundException, IOException { 2071 if (level == filePattern.length - 1) { 2072 return parents; 2073 } 2074 if (parents == null || parents.length == 0) { 2075 return null; 2076 } 2077 GlobFilter fp = new GlobFilter(filePattern[level]); 2078 if (fp.hasPattern()) { 2079 try { 2080 parents = FileUtil.stat2Paths(listStatus(parents, fp)); 2081 } catch (FileNotFoundException e) { 2082 parents = null; 2083 } 2084 hasGlob[0] = true; 2085 } else { 2086 for (int i = 0; i < parents.length; i++) { 2087 parents[i] = new Path(parents[i], filePattern[level]); 2088 } 2089 } 2090 return globPathsLevel(parents, filePattern, level + 1, hasGlob); 2091 } 2092 2093 /** 2094 * Copy file from src to dest. See 2095 * {@link #copy(Path, Path, boolean, boolean)} 2096 */ 2097 public boolean copy(final Path src, final Path dst) 2098 throws AccessControlException, FileAlreadyExistsException, 2099 FileNotFoundException, ParentNotDirectoryException, 2100 UnsupportedFileSystemException, IOException { 2101 return copy(src, dst, false, false); 2102 } 2103 2104 /** 2105 * Copy from src to dst, optionally deleting src and overwriting dst. 2106 * @param src 2107 * @param dst 2108 * @param deleteSource - delete src if true 2109 * @param overwrite overwrite dst if true; throw IOException if dst exists 2110 * and overwrite is false. 2111 * 2112 * @return true if copy is successful 2113 * 2114 * @throws AccessControlException If access is denied 2115 * @throws FileAlreadyExistsException If <code>dst</code> already exists 2116 * @throws FileNotFoundException If <code>src</code> does not exist 2117 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not 2118 * a directory 2119 * @throws UnsupportedFileSystemException If file system for 2120 * <code>src</code> or <code>dst</code> is not supported 2121 * @throws IOException If an I/O error occurred 2122 * 2123 * Exceptions applicable to file systems accessed over RPC: 2124 * @throws RpcClientException If an exception occurred in the RPC client 2125 * @throws RpcServerException If an exception occurred in the RPC server 2126 * @throws UnexpectedServerException If server implementation throws 2127 * undeclared exception to RPC server 2128 * 2129 * RuntimeExceptions: 2130 * @throws InvalidPathException If path <code>dst</code> is invalid 2131 */ 2132 public boolean copy(final Path src, final Path dst, boolean deleteSource, 2133 boolean overwrite) throws AccessControlException, 2134 FileAlreadyExistsException, FileNotFoundException, 2135 ParentNotDirectoryException, UnsupportedFileSystemException, 2136 IOException { 2137 src.checkNotSchemeWithRelative(); 2138 dst.checkNotSchemeWithRelative(); 2139 Path qSrc = makeQualified(src); 2140 Path qDst = makeQualified(dst); 2141 checkDest(qSrc.getName(), qDst, overwrite); 2142 FileStatus fs = FileContext.this.getFileStatus(qSrc); 2143 if (fs.isDirectory()) { 2144 checkDependencies(qSrc, qDst); 2145 mkdir(qDst, FsPermission.getDirDefault(), true); 2146 FileStatus[] contents = listStatus(qSrc); 2147 for (FileStatus content : contents) { 2148 copy(makeQualified(content.getPath()), makeQualified(new Path(qDst, 2149 content.getPath().getName())), deleteSource, overwrite); 2150 } 2151 } else { 2152 InputStream in=null; 2153 OutputStream out = null; 2154 try { 2155 in = open(qSrc); 2156 EnumSet<CreateFlag> createFlag = overwrite ? EnumSet.of( 2157 CreateFlag.CREATE, CreateFlag.OVERWRITE) : 2158 EnumSet.of(CreateFlag.CREATE); 2159 out = create(qDst, createFlag); 2160 IOUtils.copyBytes(in, out, conf, true); 2161 } catch (IOException e) { 2162 IOUtils.closeStream(out); 2163 IOUtils.closeStream(in); 2164 throw e; 2165 } 2166 } 2167 if (deleteSource) { 2168 return delete(qSrc, true); 2169 } else { 2170 return true; 2171 } 2172 } 2173 } 2174 2175 /** 2176 * Check if copying srcName to dst would overwrite an existing 2177 * file or directory. 2178 * @param srcName File or directory to be copied. 2179 * @param dst Destination to copy srcName to. 2180 * @param overwrite Whether it's ok to overwrite an existing file. 2181 * @throws AccessControlException If access is denied. 2182 * @throws IOException If dst is an existing directory, or dst is an 2183 * existing file and the overwrite option is not passed. 2184 */ 2185 private void checkDest(String srcName, Path dst, boolean overwrite) 2186 throws AccessControlException, IOException { 2187 try { 2188 FileStatus dstFs = getFileStatus(dst); 2189 if (dstFs.isDirectory()) { 2190 if (null == srcName) { 2191 throw new IOException("Target " + dst + " is a directory"); 2192 } 2193 // Recurse to check if dst/srcName exists. 2194 checkDest(null, new Path(dst, srcName), overwrite); 2195 } else if (!overwrite) { 2196 throw new IOException("Target " + new Path(dst, srcName) 2197 + " already exists"); 2198 } 2199 } catch (FileNotFoundException e) { 2200 // dst does not exist - OK to copy. 2201 } 2202 } 2203 2204 // 2205 // If the destination is a subdirectory of the source, then 2206 // generate exception 2207 // 2208 private static void checkDependencies(Path qualSrc, Path qualDst) 2209 throws IOException { 2210 if (isSameFS(qualSrc, qualDst)) { 2211 String srcq = qualSrc.toString() + Path.SEPARATOR; 2212 String dstq = qualDst.toString() + Path.SEPARATOR; 2213 if (dstq.startsWith(srcq)) { 2214 if (srcq.length() == dstq.length()) { 2215 throw new IOException("Cannot copy " + qualSrc + " to itself."); 2216 } else { 2217 throw new IOException("Cannot copy " + qualSrc + 2218 " to its subdirectory " + qualDst); 2219 } 2220 } 2221 } 2222 } 2223 2224 /** 2225 * Are qualSrc and qualDst of the same file system? 2226 * @param qualPath1 - fully qualified path 2227 * @param qualPath2 - fully qualified path 2228 * @return 2229 */ 2230 private static boolean isSameFS(Path qualPath1, Path qualPath2) { 2231 URI srcUri = qualPath1.toUri(); 2232 URI dstUri = qualPath2.toUri(); 2233 return (srcUri.getScheme().equals(dstUri.getScheme()) && 2234 !(srcUri.getAuthority() != null && dstUri.getAuthority() != null && srcUri 2235 .getAuthority().equals(dstUri.getAuthority()))); 2236 } 2237 2238 /** 2239 * Deletes all the paths in deleteOnExit on JVM shutdown. 2240 */ 2241 static class FileContextFinalizer implements Runnable { 2242 @Override 2243 public synchronized void run() { 2244 processDeleteOnExit(); 2245 } 2246 } 2247 2248 /** 2249 * Resolves all symbolic links in the specified path. 2250 * Returns the new path object. 2251 */ 2252 protected Path resolve(final Path f) throws FileNotFoundException, 2253 UnresolvedLinkException, AccessControlException, IOException { 2254 return new FSLinkResolver<Path>() { 2255 @Override 2256 public Path next(final AbstractFileSystem fs, final Path p) 2257 throws IOException, UnresolvedLinkException { 2258 return fs.resolvePath(p); 2259 } 2260 }.resolve(this, f); 2261 } 2262 2263 /** 2264 * Resolves all symbolic links in the specified path leading up 2265 * to, but not including the final path component. 2266 * @param f path to resolve 2267 * @return the new path object. 2268 */ 2269 protected Path resolveIntermediate(final Path f) throws IOException { 2270 return new FSLinkResolver<FileStatus>() { 2271 @Override 2272 public FileStatus next(final AbstractFileSystem fs, final Path p) 2273 throws IOException, UnresolvedLinkException { 2274 return fs.getFileLinkStatus(p); 2275 } 2276 }.resolve(this, f).getPath(); 2277 } 2278 2279 /** 2280 * Returns the list of AbstractFileSystems accessed in the path. The list may 2281 * contain more than one AbstractFileSystems objects in case of symlinks. 2282 * 2283 * @param f 2284 * Path which needs to be resolved 2285 * @return List of AbstractFileSystems accessed in the path 2286 * @throws IOException 2287 */ 2288 Set<AbstractFileSystem> resolveAbstractFileSystems(final Path f) 2289 throws IOException { 2290 final Path absF = fixRelativePart(f); 2291 final HashSet<AbstractFileSystem> result 2292 = new HashSet<AbstractFileSystem>(); 2293 new FSLinkResolver<Void>() { 2294 @Override 2295 public Void next(final AbstractFileSystem fs, final Path p) 2296 throws IOException, UnresolvedLinkException { 2297 result.add(fs); 2298 fs.getFileStatus(p); 2299 return null; 2300 } 2301 }.resolve(this, absF); 2302 return result; 2303 } 2304 2305 /** 2306 * Get the statistics for a particular file system 2307 * 2308 * @param uri 2309 * the uri to lookup the statistics. Only scheme and authority part 2310 * of the uri are used as the key to store and lookup. 2311 * @return a statistics object 2312 */ 2313 public static Statistics getStatistics(URI uri) { 2314 return AbstractFileSystem.getStatistics(uri); 2315 } 2316 2317 /** 2318 * Clears all the statistics stored in AbstractFileSystem, for all the file 2319 * systems. 2320 */ 2321 public static void clearStatistics() { 2322 AbstractFileSystem.clearStatistics(); 2323 } 2324 2325 /** 2326 * Prints the statistics to standard output. File System is identified by the 2327 * scheme and authority. 2328 */ 2329 public static void printStatistics() { 2330 AbstractFileSystem.printStatistics(); 2331 } 2332 2333 /** 2334 * @return Map of uri and statistics for each filesystem instantiated. The uri 2335 * consists of scheme and authority for the filesystem. 2336 */ 2337 public static Map<URI, Statistics> getAllStatistics() { 2338 return AbstractFileSystem.getAllStatistics(); 2339 } 2340 2341 /** 2342 * Get delegation tokens for the file systems accessed for a given 2343 * path. 2344 * @param p Path for which delegations tokens are requested. 2345 * @param renewer the account name that is allowed to renew the token. 2346 * @return List of delegation tokens. 2347 * @throws IOException 2348 */ 2349 @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" }) 2350 public List<Token<?>> getDelegationTokens( 2351 Path p, String renewer) throws IOException { 2352 Set<AbstractFileSystem> afsSet = resolveAbstractFileSystems(p); 2353 List<Token<?>> tokenList = 2354 new ArrayList<Token<?>>(); 2355 for (AbstractFileSystem afs : afsSet) { 2356 List<Token<?>> afsTokens = afs.getDelegationTokens(renewer); 2357 tokenList.addAll(afsTokens); 2358 } 2359 return tokenList; 2360 } 2361 }