001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.fs; 019 020 import java.io.FileNotFoundException; 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.io.OutputStream; 024 import java.net.URI; 025 import java.security.PrivilegedExceptionAction; 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.EnumSet; 029 import java.util.HashSet; 030 import java.util.IdentityHashMap; 031 import java.util.List; 032 import java.util.Map; 033 import java.util.Set; 034 import java.util.Stack; 035 import java.util.TreeSet; 036 import java.util.Map.Entry; 037 038 import org.apache.commons.logging.Log; 039 import org.apache.commons.logging.LogFactory; 040 import org.apache.hadoop.HadoopIllegalArgumentException; 041 import org.apache.hadoop.classification.InterfaceAudience; 042 import org.apache.hadoop.classification.InterfaceStability; 043 import org.apache.hadoop.conf.Configuration; 044 import org.apache.hadoop.fs.FileSystem.Statistics; 045 import org.apache.hadoop.fs.Options.CreateOpts; 046 import org.apache.hadoop.fs.permission.FsPermission; 047 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 048 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT; 049 import org.apache.hadoop.io.IOUtils; 050 import org.apache.hadoop.ipc.RpcClientException; 051 import org.apache.hadoop.ipc.RpcServerException; 052 import org.apache.hadoop.ipc.UnexpectedServerException; 053 import org.apache.hadoop.fs.InvalidPathException; 054 import org.apache.hadoop.security.AccessControlException; 055 import org.apache.hadoop.security.UserGroupInformation; 056 import org.apache.hadoop.security.token.Token; 057 import org.apache.hadoop.util.ShutdownHookManager; 058 059 /** 060 * The FileContext class provides an interface to the application writer for 061 * using the Hadoop file system. 062 * It provides a set of methods for the usual operation: create, open, 063 * list, etc 064 * 065 * <p> 066 * <b> *** Path Names *** </b> 067 * <p> 068 * 069 * The Hadoop file system supports a URI name space and URI names. 070 * It offers a forest of file systems that can be referenced using fully 071 * qualified URIs. 072 * Two common Hadoop file systems implementations are 073 * <ul> 074 * <li> the local file system: file:///path 075 * <li> the hdfs file system hdfs://nnAddress:nnPort/path 076 * </ul> 077 * 078 * While URI names are very flexible, it requires knowing the name or address 079 * of the server. For convenience one often wants to access the default system 080 * in one's environment without knowing its name/address. This has an 081 * additional benefit that it allows one to change one's default fs 082 * (e.g. admin moves application from cluster1 to cluster2). 083 * <p> 084 * 085 * To facilitate this, Hadoop supports a notion of a default file system. 086 * The user can set his default file system, although this is 087 * typically set up for you in your environment via your default config. 088 * A default file system implies a default scheme and authority; slash-relative 089 * names (such as /for/bar) are resolved relative to that default FS. 090 * Similarly a user can also have working-directory-relative names (i.e. names 091 * not starting with a slash). While the working directory is generally in the 092 * same default FS, the wd can be in a different FS. 093 * <p> 094 * Hence Hadoop path names can be one of: 095 * <ul> 096 * <li> fully qualified URI: scheme://authority/path 097 * <li> slash relative names: /path relative to the default file system 098 * <li> wd-relative names: path relative to the working dir 099 * </ul> 100 * Relative paths with scheme (scheme:foo/bar) are illegal. 101 * 102 * <p> 103 * <b>****The Role of the FileContext and configuration defaults****</b> 104 * <p> 105 * The FileContext provides file namespace context for resolving file names; 106 * it also contains the umask for permissions, In that sense it is like the 107 * per-process file-related state in Unix system. 108 * These two properties 109 * <ul> 110 * <li> default file system i.e your slash) 111 * <li> umask 112 * </ul> 113 * in general, are obtained from the default configuration file 114 * in your environment, (@see {@link Configuration}). 115 * 116 * No other configuration parameters are obtained from the default config as 117 * far as the file context layer is concerned. All file system instances 118 * (i.e. deployments of file systems) have default properties; we call these 119 * server side (SS) defaults. Operation like create allow one to select many 120 * properties: either pass them in as explicit parameters or use 121 * the SS properties. 122 * <p> 123 * The file system related SS defaults are 124 * <ul> 125 * <li> the home directory (default is "/user/userName") 126 * <li> the initial wd (only for local fs) 127 * <li> replication factor 128 * <li> block size 129 * <li> buffer size 130 * <li> encryptDataTransfer 131 * <li> checksum option. (checksumType and bytesPerChecksum) 132 * </ul> 133 * 134 * <p> 135 * <b> *** Usage Model for the FileContext class *** </b> 136 * <p> 137 * Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. 138 * Unspecified values come from core-defaults.xml in the release jar. 139 * <ul> 140 * <li> myFContext = FileContext.getFileContext(); // uses the default config 141 * // which has your default FS 142 * <li> myFContext.create(path, ...); 143 * <li> myFContext.setWorkingDir(path) 144 * <li> myFContext.open (path, ...); 145 * </ul> 146 * Example 2: Get a FileContext with a specific URI as the default FS 147 * <ul> 148 * <li> myFContext = FileContext.getFileContext(URI) 149 * <li> myFContext.create(path, ...); 150 * ... 151 * </ul> 152 * Example 3: FileContext with local file system as the default 153 * <ul> 154 * <li> myFContext = FileContext.getLocalFSFileContext() 155 * <li> myFContext.create(path, ...); 156 * <li> ... 157 * </ul> 158 * Example 4: Use a specific config, ignoring $HADOOP_CONFIG 159 * Generally you should not need use a config unless you are doing 160 * <ul> 161 * <li> configX = someConfigSomeOnePassedToYou. 162 * <li> myFContext = getFileContext(configX); // configX is not changed, 163 * // is passed down 164 * <li> myFContext.create(path, ...); 165 * <li>... 166 * </ul> 167 * 168 */ 169 170 @InterfaceAudience.Public 171 @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */ 172 public final class FileContext { 173 174 public static final Log LOG = LogFactory.getLog(FileContext.class); 175 public static final FsPermission DEFAULT_PERM = FsPermission.getDefault(); 176 177 /** 178 * Priority of the FileContext shutdown hook. 179 */ 180 public static final int SHUTDOWN_HOOK_PRIORITY = 20; 181 182 /** 183 * List of files that should be deleted on JVM shutdown. 184 */ 185 static final Map<FileContext, Set<Path>> DELETE_ON_EXIT = 186 new IdentityHashMap<FileContext, Set<Path>>(); 187 188 /** JVM shutdown hook thread. */ 189 static final FileContextFinalizer FINALIZER = 190 new FileContextFinalizer(); 191 192 private static final PathFilter DEFAULT_FILTER = new PathFilter() { 193 public boolean accept(final Path file) { 194 return true; 195 } 196 }; 197 198 /** 199 * The FileContext is defined by. 200 * 1) defaultFS (slash) 201 * 2) wd 202 * 3) umask 203 */ 204 private final AbstractFileSystem defaultFS; //default FS for this FileContext. 205 private Path workingDir; // Fully qualified 206 private FsPermission umask; 207 private final Configuration conf; 208 private final UserGroupInformation ugi; 209 210 private FileContext(final AbstractFileSystem defFs, 211 final FsPermission theUmask, final Configuration aConf) { 212 defaultFS = defFs; 213 umask = FsPermission.getUMask(aConf); 214 conf = aConf; 215 try { 216 ugi = UserGroupInformation.getCurrentUser(); 217 } catch (IOException e) { 218 LOG.error("Exception in getCurrentUser: ",e); 219 throw new RuntimeException("Failed to get the current user " + 220 "while creating a FileContext", e); 221 } 222 /* 223 * Init the wd. 224 * WorkingDir is implemented at the FileContext layer 225 * NOT at the AbstractFileSystem layer. 226 * If the DefaultFS, such as localFilesystem has a notion of 227 * builtin WD, we use that as the initial WD. 228 * Otherwise the WD is initialized to the home directory. 229 */ 230 workingDir = defaultFS.getInitialWorkingDirectory(); 231 if (workingDir == null) { 232 workingDir = defaultFS.getHomeDirectory(); 233 } 234 util = new Util(); // for the inner class 235 } 236 237 /* 238 * Remove relative part - return "absolute": 239 * If input is relative path ("foo/bar") add wd: ie "/<workingDir>/foo/bar" 240 * A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path 241 * ("/foo/bar") are returned unchanged. 242 * 243 * Applications that use FileContext should use #makeQualified() since 244 * they really want a fully qualified URI. 245 * Hence this method is not called makeAbsolute() and 246 * has been deliberately declared private. 247 */ 248 private Path fixRelativePart(Path p) { 249 if (p.isUriPathAbsolute()) { 250 return p; 251 } else { 252 return new Path(workingDir, p); 253 } 254 } 255 256 /** 257 * Delete all the paths that were marked as delete-on-exit. 258 */ 259 static void processDeleteOnExit() { 260 synchronized (DELETE_ON_EXIT) { 261 Set<Entry<FileContext, Set<Path>>> set = DELETE_ON_EXIT.entrySet(); 262 for (Entry<FileContext, Set<Path>> entry : set) { 263 FileContext fc = entry.getKey(); 264 Set<Path> paths = entry.getValue(); 265 for (Path path : paths) { 266 try { 267 fc.delete(path, true); 268 } catch (IOException e) { 269 LOG.warn("Ignoring failure to deleteOnExit for path " + path); 270 } 271 } 272 } 273 DELETE_ON_EXIT.clear(); 274 } 275 } 276 277 /** 278 * Pathnames with scheme and relative path are illegal. 279 * @param path to be checked 280 */ 281 private static void checkNotSchemeWithRelative(final Path path) { 282 if (path.toUri().isAbsolute() && !path.isUriPathAbsolute()) { 283 throw new HadoopIllegalArgumentException( 284 "Unsupported name: has scheme but relative path-part"); 285 } 286 } 287 288 /** 289 * Get the file system of supplied path. 290 * 291 * @param absOrFqPath - absolute or fully qualified path 292 * @return the file system of the path 293 * 294 * @throws UnsupportedFileSystemException If the file system for 295 * <code>absOrFqPath</code> is not supported. 296 * @throws IOExcepton If the file system for <code>absOrFqPath</code> could 297 * not be instantiated. 298 */ 299 private AbstractFileSystem getFSofPath(final Path absOrFqPath) 300 throws UnsupportedFileSystemException, IOException { 301 checkNotSchemeWithRelative(absOrFqPath); 302 if (!absOrFqPath.isAbsolute() && absOrFqPath.toUri().getScheme() == null) { 303 throw new HadoopIllegalArgumentException( 304 "FileContext Bug: path is relative"); 305 } 306 307 try { 308 // Is it the default FS for this FileContext? 309 defaultFS.checkPath(absOrFqPath); 310 return defaultFS; 311 } catch (Exception e) { // it is different FileSystem 312 return getAbstractFileSystem(ugi, absOrFqPath.toUri(), conf); 313 } 314 } 315 316 private static AbstractFileSystem getAbstractFileSystem( 317 UserGroupInformation user, final URI uri, final Configuration conf) 318 throws UnsupportedFileSystemException, IOException { 319 try { 320 return user.doAs(new PrivilegedExceptionAction<AbstractFileSystem>() { 321 public AbstractFileSystem run() throws UnsupportedFileSystemException { 322 return AbstractFileSystem.get(uri, conf); 323 } 324 }); 325 } catch (InterruptedException ex) { 326 LOG.error(ex); 327 throw new IOException("Failed to get the AbstractFileSystem for path: " 328 + uri, ex); 329 } 330 } 331 332 /** 333 * Protected Static Factory methods for getting a FileContexts 334 * that take a AbstractFileSystem as input. To be used for testing. 335 */ 336 337 /** 338 * Create a FileContext with specified FS as default using the specified 339 * config. 340 * 341 * @param defFS 342 * @param aConf 343 * @return new FileContext with specifed FS as default. 344 */ 345 public static FileContext getFileContext(final AbstractFileSystem defFS, 346 final Configuration aConf) { 347 return new FileContext(defFS, FsPermission.getUMask(aConf), aConf); 348 } 349 350 /** 351 * Create a FileContext for specified file system using the default config. 352 * 353 * @param defaultFS 354 * @return a FileContext with the specified AbstractFileSystem 355 * as the default FS. 356 */ 357 protected static FileContext getFileContext( 358 final AbstractFileSystem defaultFS) { 359 return getFileContext(defaultFS, new Configuration()); 360 } 361 362 /** 363 * Static Factory methods for getting a FileContext. 364 * Note new file contexts are created for each call. 365 * The only singleton is the local FS context using the default config. 366 * 367 * Methods that use the default config: the default config read from the 368 * $HADOOP_CONFIG/core.xml, 369 * Unspecified key-values for config are defaulted from core-defaults.xml 370 * in the release jar. 371 * 372 * The keys relevant to the FileContext layer are extracted at time of 373 * construction. Changes to the config after the call are ignore 374 * by the FileContext layer. 375 * The conf is passed to lower layers like AbstractFileSystem and HDFS which 376 * pick up their own config variables. 377 */ 378 379 /** 380 * Create a FileContext using the default config read from the 381 * $HADOOP_CONFIG/core.xml, Unspecified key-values for config are defaulted 382 * from core-defaults.xml in the release jar. 383 * 384 * @throws UnsupportedFileSystemException If the file system from the default 385 * configuration is not supported 386 */ 387 public static FileContext getFileContext() 388 throws UnsupportedFileSystemException { 389 return getFileContext(new Configuration()); 390 } 391 392 /** 393 * @return a FileContext for the local file system using the default config. 394 * @throws UnsupportedFileSystemException If the file system for 395 * {@link FsConstants#LOCAL_FS_URI} is not supported. 396 */ 397 public static FileContext getLocalFSFileContext() 398 throws UnsupportedFileSystemException { 399 return getFileContext(FsConstants.LOCAL_FS_URI); 400 } 401 402 /** 403 * Create a FileContext for specified URI using the default config. 404 * 405 * @param defaultFsUri 406 * @return a FileContext with the specified URI as the default FS. 407 * 408 * @throws UnsupportedFileSystemException If the file system for 409 * <code>defaultFsUri</code> is not supported 410 */ 411 public static FileContext getFileContext(final URI defaultFsUri) 412 throws UnsupportedFileSystemException { 413 return getFileContext(defaultFsUri, new Configuration()); 414 } 415 416 /** 417 * Create a FileContext for specified default URI using the specified config. 418 * 419 * @param defaultFsUri 420 * @param aConf 421 * @return new FileContext for specified uri 422 * @throws UnsupportedFileSystemException If the file system with specified is 423 * not supported 424 * @throws RuntimeException If the file system specified is supported but 425 * could not be instantiated, or if login fails. 426 */ 427 public static FileContext getFileContext(final URI defaultFsUri, 428 final Configuration aConf) throws UnsupportedFileSystemException { 429 UserGroupInformation currentUser = null; 430 AbstractFileSystem defaultAfs = null; 431 try { 432 currentUser = UserGroupInformation.getCurrentUser(); 433 defaultAfs = getAbstractFileSystem(currentUser, defaultFsUri, aConf); 434 } catch (UnsupportedFileSystemException ex) { 435 throw ex; 436 } catch (IOException ex) { 437 LOG.error(ex); 438 throw new RuntimeException(ex); 439 } 440 return getFileContext(defaultAfs, aConf); 441 } 442 443 /** 444 * Create a FileContext using the passed config. Generally it is better to use 445 * {@link #getFileContext(URI, Configuration)} instead of this one. 446 * 447 * 448 * @param aConf 449 * @return new FileContext 450 * @throws UnsupportedFileSystemException If file system in the config 451 * is not supported 452 */ 453 public static FileContext getFileContext(final Configuration aConf) 454 throws UnsupportedFileSystemException { 455 return getFileContext( 456 URI.create(aConf.get(FS_DEFAULT_NAME_KEY, FS_DEFAULT_NAME_DEFAULT)), 457 aConf); 458 } 459 460 /** 461 * @param aConf - from which the FileContext is configured 462 * @return a FileContext for the local file system using the specified config. 463 * 464 * @throws UnsupportedFileSystemException If default file system in the config 465 * is not supported 466 * 467 */ 468 public static FileContext getLocalFSFileContext(final Configuration aConf) 469 throws UnsupportedFileSystemException { 470 return getFileContext(FsConstants.LOCAL_FS_URI, aConf); 471 } 472 473 /* This method is needed for tests. */ 474 @InterfaceAudience.Private 475 @InterfaceStability.Unstable /* return type will change to AFS once 476 HADOOP-6223 is completed */ 477 public AbstractFileSystem getDefaultFileSystem() { 478 return defaultFS; 479 } 480 481 /** 482 * Set the working directory for wd-relative names (such a "foo/bar"). Working 483 * directory feature is provided by simply prefixing relative names with the 484 * working dir. Note this is different from Unix where the wd is actually set 485 * to the inode. Hence setWorkingDir does not follow symlinks etc. This works 486 * better in a distributed environment that has multiple independent roots. 487 * {@link #getWorkingDirectory()} should return what setWorkingDir() set. 488 * 489 * @param newWDir new working directory 490 * @throws IOException 491 * <br> 492 * NewWdir can be one of: 493 * <ul> 494 * <li>relative path: "foo/bar";</li> 495 * <li>absolute without scheme: "/foo/bar"</li> 496 * <li>fully qualified with scheme: "xx://auth/foo/bar"</li> 497 * </ul> 498 * <br> 499 * Illegal WDs: 500 * <ul> 501 * <li>relative with scheme: "xx:foo/bar"</li> 502 * <li>non existent directory</li> 503 * </ul> 504 */ 505 public void setWorkingDirectory(final Path newWDir) throws IOException { 506 checkNotSchemeWithRelative(newWDir); 507 /* wd is stored as a fully qualified path. We check if the given 508 * path is not relative first since resolve requires and returns 509 * an absolute path. 510 */ 511 final Path newWorkingDir = new Path(workingDir, newWDir); 512 FileStatus status = getFileStatus(newWorkingDir); 513 if (status.isFile()) { 514 throw new FileNotFoundException("Cannot setWD to a file"); 515 } 516 workingDir = newWorkingDir; 517 } 518 519 /** 520 * Gets the working directory for wd-relative names (such a "foo/bar"). 521 */ 522 public Path getWorkingDirectory() { 523 return workingDir; 524 } 525 526 /** 527 * Gets the ugi in the file-context 528 * @return UserGroupInformation 529 */ 530 public UserGroupInformation getUgi() { 531 return ugi; 532 } 533 534 /** 535 * Return the current user's home directory in this file system. 536 * The default implementation returns "/user/$USER/". 537 * @return the home directory 538 */ 539 public Path getHomeDirectory() { 540 return defaultFS.getHomeDirectory(); 541 } 542 543 /** 544 * 545 * @return the umask of this FileContext 546 */ 547 public FsPermission getUMask() { 548 return umask; 549 } 550 551 /** 552 * Set umask to the supplied parameter. 553 * @param newUmask the new umask 554 */ 555 public void setUMask(final FsPermission newUmask) { 556 umask = newUmask; 557 } 558 559 560 /** 561 * Resolve the path following any symlinks or mount points 562 * @param f to be resolved 563 * @return fully qualified resolved path 564 * 565 * @throws FileNotFoundException If <code>f</code> does not exist 566 * @throws AccessControlException if access denied 567 * @throws IOException If an IO Error occurred 568 * 569 * Exceptions applicable to file systems accessed over RPC: 570 * @throws RpcClientException If an exception occurred in the RPC client 571 * @throws RpcServerException If an exception occurred in the RPC server 572 * @throws UnexpectedServerException If server implementation throws 573 * undeclared exception to RPC server 574 * 575 * RuntimeExceptions: 576 * @throws InvalidPathException If path <code>f</code> is not valid 577 */ 578 public Path resolvePath(final Path f) throws FileNotFoundException, 579 UnresolvedLinkException, AccessControlException, IOException { 580 return resolve(f); 581 } 582 583 /** 584 * Make the path fully qualified if it is isn't. 585 * A Fully-qualified path has scheme and authority specified and an absolute 586 * path. 587 * Use the default file system and working dir in this FileContext to qualify. 588 * @param path 589 * @return qualified path 590 */ 591 public Path makeQualified(final Path path) { 592 return path.makeQualified(defaultFS.getUri(), getWorkingDirectory()); 593 } 594 595 /** 596 * Create or overwrite file on indicated path and returns an output stream for 597 * writing into the file. 598 * 599 * @param f the file name to open 600 * @param createFlag gives the semantics of create; see {@link CreateFlag} 601 * @param opts file creation options; see {@link Options.CreateOpts}. 602 * <ul> 603 * <li>Progress - to report progress on the operation - default null 604 * <li>Permission - umask is applied against permisssion: default is 605 * FsPermissions:getDefault() 606 * 607 * <li>CreateParent - create missing parent path; default is to not 608 * to create parents 609 * <li>The defaults for the following are SS defaults of the file 610 * server implementing the target path. Not all parameters make sense 611 * for all kinds of file system - eg. localFS ignores Blocksize, 612 * replication, checksum 613 * <ul> 614 * <li>BufferSize - buffersize used in FSDataOutputStream 615 * <li>Blocksize - block size for file blocks 616 * <li>ReplicationFactor - replication for blocks 617 * <li>ChecksumParam - Checksum parameters. server default is used 618 * if not specified. 619 * </ul> 620 * </ul> 621 * 622 * @return {@link FSDataOutputStream} for created file 623 * 624 * @throws AccessControlException If access is denied 625 * @throws FileAlreadyExistsException If file <code>f</code> already exists 626 * @throws FileNotFoundException If parent of <code>f</code> does not exist 627 * and <code>createParent</code> is false 628 * @throws ParentNotDirectoryException If parent of <code>f</code> is not a 629 * directory. 630 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 631 * not supported 632 * @throws IOException If an I/O error occurred 633 * 634 * Exceptions applicable to file systems accessed over RPC: 635 * @throws RpcClientException If an exception occurred in the RPC client 636 * @throws RpcServerException If an exception occurred in the RPC server 637 * @throws UnexpectedServerException If server implementation throws 638 * undeclared exception to RPC server 639 * 640 * RuntimeExceptions: 641 * @throws InvalidPathException If path <code>f</code> is not valid 642 */ 643 public FSDataOutputStream create(final Path f, 644 final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts) 645 throws AccessControlException, FileAlreadyExistsException, 646 FileNotFoundException, ParentNotDirectoryException, 647 UnsupportedFileSystemException, IOException { 648 Path absF = fixRelativePart(f); 649 650 // If one of the options is a permission, extract it & apply umask 651 // If not, add a default Perms and apply umask; 652 // AbstractFileSystem#create 653 654 CreateOpts.Perms permOpt = 655 (CreateOpts.Perms) CreateOpts.getOpt(CreateOpts.Perms.class, opts); 656 FsPermission permission = (permOpt != null) ? permOpt.getValue() : 657 FsPermission.getDefault(); 658 permission = permission.applyUMask(umask); 659 660 final CreateOpts[] updatedOpts = 661 CreateOpts.setOpt(CreateOpts.perms(permission), opts); 662 return new FSLinkResolver<FSDataOutputStream>() { 663 public FSDataOutputStream next(final AbstractFileSystem fs, final Path p) 664 throws IOException { 665 return fs.create(p, createFlag, updatedOpts); 666 } 667 }.resolve(this, absF); 668 } 669 670 /** 671 * Make(create) a directory and all the non-existent parents. 672 * 673 * @param dir - the dir to make 674 * @param permission - permissions is set permission&~umask 675 * @param createParent - if true then missing parent dirs are created if false 676 * then parent must exist 677 * 678 * @throws AccessControlException If access is denied 679 * @throws FileAlreadyExistsException If directory <code>dir</code> already 680 * exists 681 * @throws FileNotFoundException If parent of <code>dir</code> does not exist 682 * and <code>createParent</code> is false 683 * @throws ParentNotDirectoryException If parent of <code>dir</code> is not a 684 * directory 685 * @throws UnsupportedFileSystemException If file system for <code>dir</code> 686 * is not supported 687 * @throws IOException If an I/O error occurred 688 * 689 * Exceptions applicable to file systems accessed over RPC: 690 * @throws RpcClientException If an exception occurred in the RPC client 691 * @throws UnexpectedServerException If server implementation throws 692 * undeclared exception to RPC server 693 * 694 * RuntimeExceptions: 695 * @throws InvalidPathException If path <code>dir</code> is not valid 696 */ 697 public void mkdir(final Path dir, final FsPermission permission, 698 final boolean createParent) throws AccessControlException, 699 FileAlreadyExistsException, FileNotFoundException, 700 ParentNotDirectoryException, UnsupportedFileSystemException, 701 IOException { 702 final Path absDir = fixRelativePart(dir); 703 final FsPermission absFerms = (permission == null ? 704 FsPermission.getDefault() : permission).applyUMask(umask); 705 new FSLinkResolver<Void>() { 706 public Void next(final AbstractFileSystem fs, final Path p) 707 throws IOException, UnresolvedLinkException { 708 fs.mkdir(p, absFerms, createParent); 709 return null; 710 } 711 }.resolve(this, absDir); 712 } 713 714 /** 715 * Delete a file. 716 * @param f the path to delete. 717 * @param recursive if path is a directory and set to 718 * true, the directory is deleted else throws an exception. In 719 * case of a file the recursive can be set to either true or false. 720 * 721 * @throws AccessControlException If access is denied 722 * @throws FileNotFoundException If <code>f</code> does not exist 723 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 724 * not supported 725 * @throws IOException If an I/O error occurred 726 * 727 * Exceptions applicable to file systems accessed over RPC: 728 * @throws RpcClientException If an exception occurred in the RPC client 729 * @throws RpcServerException If an exception occurred in the RPC server 730 * @throws UnexpectedServerException If server implementation throws 731 * undeclared exception to RPC server 732 * 733 * RuntimeExceptions: 734 * @throws InvalidPathException If path <code>f</code> is invalid 735 */ 736 public boolean delete(final Path f, final boolean recursive) 737 throws AccessControlException, FileNotFoundException, 738 UnsupportedFileSystemException, IOException { 739 Path absF = fixRelativePart(f); 740 return new FSLinkResolver<Boolean>() { 741 public Boolean next(final AbstractFileSystem fs, final Path p) 742 throws IOException, UnresolvedLinkException { 743 return Boolean.valueOf(fs.delete(p, recursive)); 744 } 745 }.resolve(this, absF); 746 } 747 748 /** 749 * Opens an FSDataInputStream at the indicated Path using 750 * default buffersize. 751 * @param f the file name to open 752 * 753 * @throws AccessControlException If access is denied 754 * @throws FileNotFoundException If file <code>f</code> does not exist 755 * @throws UnsupportedFileSystemException If file system for <code>f</code> 756 * is not supported 757 * @throws IOException If an I/O error occurred 758 * 759 * Exceptions applicable to file systems accessed over RPC: 760 * @throws RpcClientException If an exception occurred in the RPC client 761 * @throws RpcServerException If an exception occurred in the RPC server 762 * @throws UnexpectedServerException If server implementation throws 763 * undeclared exception to RPC server 764 */ 765 public FSDataInputStream open(final Path f) throws AccessControlException, 766 FileNotFoundException, UnsupportedFileSystemException, IOException { 767 final Path absF = fixRelativePart(f); 768 return new FSLinkResolver<FSDataInputStream>() { 769 public FSDataInputStream next(final AbstractFileSystem fs, final Path p) 770 throws IOException, UnresolvedLinkException { 771 return fs.open(p); 772 } 773 }.resolve(this, absF); 774 } 775 776 /** 777 * Opens an FSDataInputStream at the indicated Path. 778 * 779 * @param f the file name to open 780 * @param bufferSize the size of the buffer to be used. 781 * 782 * @throws AccessControlException If access is denied 783 * @throws FileNotFoundException If file <code>f</code> does not exist 784 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 785 * not supported 786 * @throws IOException If an I/O error occurred 787 * 788 * Exceptions applicable to file systems accessed over RPC: 789 * @throws RpcClientException If an exception occurred in the RPC client 790 * @throws RpcServerException If an exception occurred in the RPC server 791 * @throws UnexpectedServerException If server implementation throws 792 * undeclared exception to RPC server 793 */ 794 public FSDataInputStream open(final Path f, final int bufferSize) 795 throws AccessControlException, FileNotFoundException, 796 UnsupportedFileSystemException, IOException { 797 final Path absF = fixRelativePart(f); 798 return new FSLinkResolver<FSDataInputStream>() { 799 public FSDataInputStream next(final AbstractFileSystem fs, final Path p) 800 throws IOException, UnresolvedLinkException { 801 return fs.open(p, bufferSize); 802 } 803 }.resolve(this, absF); 804 } 805 806 /** 807 * Set replication for an existing file. 808 * 809 * @param f file name 810 * @param replication new replication 811 * 812 * @return true if successful 813 * 814 * @throws AccessControlException If access is denied 815 * @throws FileNotFoundException If file <code>f</code> does not exist 816 * @throws IOException If an I/O error occurred 817 * 818 * Exceptions applicable to file systems accessed over RPC: 819 * @throws RpcClientException If an exception occurred in the RPC client 820 * @throws RpcServerException If an exception occurred in the RPC server 821 * @throws UnexpectedServerException If server implementation throws 822 * undeclared exception to RPC server 823 */ 824 public boolean setReplication(final Path f, final short replication) 825 throws AccessControlException, FileNotFoundException, 826 IOException { 827 final Path absF = fixRelativePart(f); 828 return new FSLinkResolver<Boolean>() { 829 public Boolean next(final AbstractFileSystem fs, final Path p) 830 throws IOException, UnresolvedLinkException { 831 return Boolean.valueOf(fs.setReplication(p, replication)); 832 } 833 }.resolve(this, absF); 834 } 835 836 /** 837 * Renames Path src to Path dst 838 * <ul> 839 * <li 840 * <li>Fails if src is a file and dst is a directory. 841 * <li>Fails if src is a directory and dst is a file. 842 * <li>Fails if the parent of dst does not exist or is a file. 843 * </ul> 844 * <p> 845 * If OVERWRITE option is not passed as an argument, rename fails if the dst 846 * already exists. 847 * <p> 848 * If OVERWRITE option is passed as an argument, rename overwrites the dst if 849 * it is a file or an empty directory. Rename fails if dst is a non-empty 850 * directory. 851 * <p> 852 * Note that atomicity of rename is dependent on the file system 853 * implementation. Please refer to the file system documentation for details 854 * <p> 855 * 856 * @param src path to be renamed 857 * @param dst new path after rename 858 * 859 * @throws AccessControlException If access is denied 860 * @throws FileAlreadyExistsException If <code>dst</code> already exists and 861 * <code>options</options> has {@link Options.Rename#OVERWRITE} 862 * option false. 863 * @throws FileNotFoundException If <code>src</code> does not exist 864 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not a 865 * directory 866 * @throws UnsupportedFileSystemException If file system for <code>src</code> 867 * and <code>dst</code> is not supported 868 * @throws IOException If an I/O error occurred 869 * 870 * Exceptions applicable to file systems accessed over RPC: 871 * @throws RpcClientException If an exception occurred in the RPC client 872 * @throws RpcServerException If an exception occurred in the RPC server 873 * @throws UnexpectedServerException If server implementation throws 874 * undeclared exception to RPC server 875 */ 876 public void rename(final Path src, final Path dst, 877 final Options.Rename... options) throws AccessControlException, 878 FileAlreadyExistsException, FileNotFoundException, 879 ParentNotDirectoryException, UnsupportedFileSystemException, 880 IOException { 881 final Path absSrc = fixRelativePart(src); 882 final Path absDst = fixRelativePart(dst); 883 AbstractFileSystem srcFS = getFSofPath(absSrc); 884 AbstractFileSystem dstFS = getFSofPath(absDst); 885 if(!srcFS.getUri().equals(dstFS.getUri())) { 886 throw new IOException("Renames across AbstractFileSystems not supported"); 887 } 888 try { 889 srcFS.rename(absSrc, absDst, options); 890 } catch (UnresolvedLinkException e) { 891 /* We do not know whether the source or the destination path 892 * was unresolved. Resolve the source path up until the final 893 * path component, then fully resolve the destination. 894 */ 895 final Path source = resolveIntermediate(absSrc); 896 new FSLinkResolver<Void>() { 897 public Void next(final AbstractFileSystem fs, final Path p) 898 throws IOException, UnresolvedLinkException { 899 fs.rename(source, p, options); 900 return null; 901 } 902 }.resolve(this, absDst); 903 } 904 } 905 906 /** 907 * Set permission of a path. 908 * @param f 909 * @param permission - the new absolute permission (umask is not applied) 910 * 911 * @throws AccessControlException If access is denied 912 * @throws FileNotFoundException If <code>f</code> does not exist 913 * @throws UnsupportedFileSystemException If file system for <code>f</code> 914 * is not supported 915 * @throws IOException If an I/O error occurred 916 * 917 * Exceptions applicable to file systems accessed over RPC: 918 * @throws RpcClientException If an exception occurred in the RPC client 919 * @throws RpcServerException If an exception occurred in the RPC server 920 * @throws UnexpectedServerException If server implementation throws 921 * undeclared exception to RPC server 922 */ 923 public void setPermission(final Path f, final FsPermission permission) 924 throws AccessControlException, FileNotFoundException, 925 UnsupportedFileSystemException, IOException { 926 final Path absF = fixRelativePart(f); 927 new FSLinkResolver<Void>() { 928 public Void next(final AbstractFileSystem fs, final Path p) 929 throws IOException, UnresolvedLinkException { 930 fs.setPermission(p, permission); 931 return null; 932 } 933 }.resolve(this, absF); 934 } 935 936 /** 937 * Set owner of a path (i.e. a file or a directory). The parameters username 938 * and groupname cannot both be null. 939 * 940 * @param f The path 941 * @param username If it is null, the original username remains unchanged. 942 * @param groupname If it is null, the original groupname remains unchanged. 943 * 944 * @throws AccessControlException If access is denied 945 * @throws FileNotFoundException If <code>f</code> does not exist 946 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 947 * not supported 948 * @throws IOException If an I/O error occurred 949 * 950 * Exceptions applicable to file systems accessed over RPC: 951 * @throws RpcClientException If an exception occurred in the RPC client 952 * @throws RpcServerException If an exception occurred in the RPC server 953 * @throws UnexpectedServerException If server implementation throws 954 * undeclared exception to RPC server 955 * 956 * RuntimeExceptions: 957 * @throws HadoopIllegalArgumentException If <code>username</code> or 958 * <code>groupname</code> is invalid. 959 */ 960 public void setOwner(final Path f, final String username, 961 final String groupname) throws AccessControlException, 962 UnsupportedFileSystemException, FileNotFoundException, 963 IOException { 964 if ((username == null) && (groupname == null)) { 965 throw new HadoopIllegalArgumentException( 966 "username and groupname cannot both be null"); 967 } 968 final Path absF = fixRelativePart(f); 969 new FSLinkResolver<Void>() { 970 public Void next(final AbstractFileSystem fs, final Path p) 971 throws IOException, UnresolvedLinkException { 972 fs.setOwner(p, username, groupname); 973 return null; 974 } 975 }.resolve(this, absF); 976 } 977 978 /** 979 * Set access time of a file. 980 * @param f The path 981 * @param mtime Set the modification time of this file. 982 * The number of milliseconds since epoch (Jan 1, 1970). 983 * A value of -1 means that this call should not set modification time. 984 * @param atime Set the access time of this file. 985 * The number of milliseconds since Jan 1, 1970. 986 * A value of -1 means that this call should not set access time. 987 * 988 * @throws AccessControlException If access is denied 989 * @throws FileNotFoundException If <code>f</code> does not exist 990 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 991 * not supported 992 * @throws IOException If an I/O error occurred 993 * 994 * Exceptions applicable to file systems accessed over RPC: 995 * @throws RpcClientException If an exception occurred in the RPC client 996 * @throws RpcServerException If an exception occurred in the RPC server 997 * @throws UnexpectedServerException If server implementation throws 998 * undeclared exception to RPC server 999 */ 1000 public void setTimes(final Path f, final long mtime, final long atime) 1001 throws AccessControlException, FileNotFoundException, 1002 UnsupportedFileSystemException, IOException { 1003 final Path absF = fixRelativePart(f); 1004 new FSLinkResolver<Void>() { 1005 public Void next(final AbstractFileSystem fs, final Path p) 1006 throws IOException, UnresolvedLinkException { 1007 fs.setTimes(p, mtime, atime); 1008 return null; 1009 } 1010 }.resolve(this, absF); 1011 } 1012 1013 /** 1014 * Get the checksum of a file. 1015 * 1016 * @param f file path 1017 * 1018 * @return The file checksum. The default return value is null, 1019 * which indicates that no checksum algorithm is implemented 1020 * in the corresponding FileSystem. 1021 * 1022 * @throws AccessControlException If access is denied 1023 * @throws FileNotFoundException If <code>f</code> does not exist 1024 * @throws IOException If an I/O error occurred 1025 * 1026 * Exceptions applicable to file systems accessed over RPC: 1027 * @throws RpcClientException If an exception occurred in the RPC client 1028 * @throws RpcServerException If an exception occurred in the RPC server 1029 * @throws UnexpectedServerException If server implementation throws 1030 * undeclared exception to RPC server 1031 */ 1032 public FileChecksum getFileChecksum(final Path f) 1033 throws AccessControlException, FileNotFoundException, 1034 IOException { 1035 final Path absF = fixRelativePart(f); 1036 return new FSLinkResolver<FileChecksum>() { 1037 public FileChecksum next(final AbstractFileSystem fs, final Path p) 1038 throws IOException, UnresolvedLinkException { 1039 return fs.getFileChecksum(p); 1040 } 1041 }.resolve(this, absF); 1042 } 1043 1044 /** 1045 * Set the verify checksum flag for the file system denoted by the path. 1046 * This is only applicable if the 1047 * corresponding FileSystem supports checksum. By default doesn't do anything. 1048 * @param verifyChecksum 1049 * @param f set the verifyChecksum for the Filesystem containing this path 1050 * 1051 * @throws AccessControlException If access is denied 1052 * @throws FileNotFoundException If <code>f</code> does not exist 1053 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1054 * not supported 1055 * @throws IOException If an I/O error occurred 1056 * 1057 * Exceptions applicable to file systems accessed over RPC: 1058 * @throws RpcClientException If an exception occurred in the RPC client 1059 * @throws RpcServerException If an exception occurred in the RPC server 1060 * @throws UnexpectedServerException If server implementation throws 1061 * undeclared exception to RPC server 1062 */ 1063 public void setVerifyChecksum(final boolean verifyChecksum, final Path f) 1064 throws AccessControlException, FileNotFoundException, 1065 UnsupportedFileSystemException, IOException { 1066 final Path absF = resolve(fixRelativePart(f)); 1067 getFSofPath(absF).setVerifyChecksum(verifyChecksum); 1068 } 1069 1070 /** 1071 * Return a file status object that represents the path. 1072 * @param f The path we want information from 1073 * 1074 * @return a FileStatus object 1075 * 1076 * @throws AccessControlException If access is denied 1077 * @throws FileNotFoundException If <code>f</code> does not exist 1078 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1079 * not supported 1080 * @throws IOException If an I/O error occurred 1081 * 1082 * Exceptions applicable to file systems accessed over RPC: 1083 * @throws RpcClientException If an exception occurred in the RPC client 1084 * @throws RpcServerException If an exception occurred in the RPC server 1085 * @throws UnexpectedServerException If server implementation throws 1086 * undeclared exception to RPC server 1087 */ 1088 public FileStatus getFileStatus(final Path f) throws AccessControlException, 1089 FileNotFoundException, UnsupportedFileSystemException, IOException { 1090 final Path absF = fixRelativePart(f); 1091 return new FSLinkResolver<FileStatus>() { 1092 public FileStatus next(final AbstractFileSystem fs, final Path p) 1093 throws IOException, UnresolvedLinkException { 1094 return fs.getFileStatus(p); 1095 } 1096 }.resolve(this, absF); 1097 } 1098 1099 /** 1100 * Return a fully qualified version of the given symlink target if it 1101 * has no scheme and authority. Partially and fully qualified paths 1102 * are returned unmodified. 1103 * @param pathFS The AbstractFileSystem of the path 1104 * @param pathWithLink Path that contains the symlink 1105 * @param target The symlink's absolute target 1106 * @return Fully qualified version of the target. 1107 */ 1108 private Path qualifySymlinkTarget(final AbstractFileSystem pathFS, 1109 Path pathWithLink, Path target) { 1110 // NB: makeQualified uses the target's scheme and authority, if 1111 // specified, and the scheme and authority of pathFS, if not. 1112 final String scheme = target.toUri().getScheme(); 1113 final String auth = target.toUri().getAuthority(); 1114 return (scheme == null && auth == null) 1115 ? target.makeQualified(pathFS.getUri(), pathWithLink.getParent()) 1116 : target; 1117 } 1118 1119 /** 1120 * Return a file status object that represents the path. If the path 1121 * refers to a symlink then the FileStatus of the symlink is returned. 1122 * The behavior is equivalent to #getFileStatus() if the underlying 1123 * file system does not support symbolic links. 1124 * @param f The path we want information from. 1125 * @return A FileStatus object 1126 * 1127 * @throws AccessControlException If access is denied 1128 * @throws FileNotFoundException If <code>f</code> does not exist 1129 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1130 * not supported 1131 * @throws IOException If an I/O error occurred 1132 */ 1133 public FileStatus getFileLinkStatus(final Path f) 1134 throws AccessControlException, FileNotFoundException, 1135 UnsupportedFileSystemException, IOException { 1136 final Path absF = fixRelativePart(f); 1137 return new FSLinkResolver<FileStatus>() { 1138 public FileStatus next(final AbstractFileSystem fs, final Path p) 1139 throws IOException, UnresolvedLinkException { 1140 FileStatus fi = fs.getFileLinkStatus(p); 1141 if (fi.isSymlink()) { 1142 fi.setSymlink(qualifySymlinkTarget(fs, p, fi.getSymlink())); 1143 } 1144 return fi; 1145 } 1146 }.resolve(this, absF); 1147 } 1148 1149 /** 1150 * Returns the target of the given symbolic link as it was specified 1151 * when the link was created. Links in the path leading up to the 1152 * final path component are resolved transparently. 1153 * 1154 * @param f the path to return the target of 1155 * @return The un-interpreted target of the symbolic link. 1156 * 1157 * @throws AccessControlException If access is denied 1158 * @throws FileNotFoundException If path <code>f</code> does not exist 1159 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1160 * not supported 1161 * @throws IOException If the given path does not refer to a symlink 1162 * or an I/O error occurred 1163 */ 1164 public Path getLinkTarget(final Path f) throws AccessControlException, 1165 FileNotFoundException, UnsupportedFileSystemException, IOException { 1166 final Path absF = fixRelativePart(f); 1167 return new FSLinkResolver<Path>() { 1168 public Path next(final AbstractFileSystem fs, final Path p) 1169 throws IOException, UnresolvedLinkException { 1170 FileStatus fi = fs.getFileLinkStatus(p); 1171 return fi.getSymlink(); 1172 } 1173 }.resolve(this, absF); 1174 } 1175 1176 /** 1177 * Return blockLocation of the given file for the given offset and len. 1178 * For a nonexistent file or regions, null will be returned. 1179 * 1180 * This call is most helpful with DFS, where it returns 1181 * hostnames of machines that contain the given file. 1182 * 1183 * @param f - get blocklocations of this file 1184 * @param start position (byte offset) 1185 * @param len (in bytes) 1186 * 1187 * @return block locations for given file at specified offset of len 1188 * 1189 * @throws AccessControlException If access is denied 1190 * @throws FileNotFoundException If <code>f</code> does not exist 1191 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1192 * not supported 1193 * @throws IOException If an I/O error occurred 1194 * 1195 * Exceptions applicable to file systems accessed over RPC: 1196 * @throws RpcClientException If an exception occurred in the RPC client 1197 * @throws RpcServerException If an exception occurred in the RPC server 1198 * @throws UnexpectedServerException If server implementation throws 1199 * undeclared exception to RPC server 1200 * 1201 * RuntimeExceptions: 1202 * @throws InvalidPathException If path <code>f</code> is invalid 1203 */ 1204 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 1205 @InterfaceStability.Evolving 1206 public BlockLocation[] getFileBlockLocations(final Path f, final long start, 1207 final long len) throws AccessControlException, FileNotFoundException, 1208 UnsupportedFileSystemException, IOException { 1209 final Path absF = fixRelativePart(f); 1210 return new FSLinkResolver<BlockLocation[]>() { 1211 public BlockLocation[] next(final AbstractFileSystem fs, final Path p) 1212 throws IOException, UnresolvedLinkException { 1213 return fs.getFileBlockLocations(p, start, len); 1214 } 1215 }.resolve(this, absF); 1216 } 1217 1218 /** 1219 * Returns a status object describing the use and capacity of the 1220 * file system denoted by the Parh argument p. 1221 * If the file system has multiple partitions, the 1222 * use and capacity of the partition pointed to by the specified 1223 * path is reflected. 1224 * 1225 * @param f Path for which status should be obtained. null means the 1226 * root partition of the default file system. 1227 * 1228 * @return a FsStatus object 1229 * 1230 * @throws AccessControlException If access is denied 1231 * @throws FileNotFoundException If <code>f</code> does not exist 1232 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1233 * not supported 1234 * @throws IOException If an I/O error occurred 1235 * 1236 * Exceptions applicable to file systems accessed over RPC: 1237 * @throws RpcClientException If an exception occurred in the RPC client 1238 * @throws RpcServerException If an exception occurred in the RPC server 1239 * @throws UnexpectedServerException If server implementation throws 1240 * undeclared exception to RPC server 1241 */ 1242 public FsStatus getFsStatus(final Path f) throws AccessControlException, 1243 FileNotFoundException, UnsupportedFileSystemException, IOException { 1244 if (f == null) { 1245 return defaultFS.getFsStatus(); 1246 } 1247 final Path absF = fixRelativePart(f); 1248 return new FSLinkResolver<FsStatus>() { 1249 public FsStatus next(final AbstractFileSystem fs, final Path p) 1250 throws IOException, UnresolvedLinkException { 1251 return fs.getFsStatus(p); 1252 } 1253 }.resolve(this, absF); 1254 } 1255 1256 /** 1257 * Creates a symbolic link to an existing file. An exception is thrown if 1258 * the symlink exits, the user does not have permission to create symlink, 1259 * or the underlying file system does not support symlinks. 1260 * 1261 * Symlink permissions are ignored, access to a symlink is determined by 1262 * the permissions of the symlink target. 1263 * 1264 * Symlinks in paths leading up to the final path component are resolved 1265 * transparently. If the final path component refers to a symlink some 1266 * functions operate on the symlink itself, these are: 1267 * - delete(f) and deleteOnExit(f) - Deletes the symlink. 1268 * - rename(src, dst) - If src refers to a symlink, the symlink is 1269 * renamed. If dst refers to a symlink, the symlink is over-written. 1270 * - getLinkTarget(f) - Returns the target of the symlink. 1271 * - getFileLinkStatus(f) - Returns a FileStatus object describing 1272 * the symlink. 1273 * Some functions, create() and mkdir(), expect the final path component 1274 * does not exist. If they are given a path that refers to a symlink that 1275 * does exist they behave as if the path referred to an existing file or 1276 * directory. All other functions fully resolve, ie follow, the symlink. 1277 * These are: open, setReplication, setOwner, setTimes, setWorkingDirectory, 1278 * setPermission, getFileChecksum, setVerifyChecksum, getFileBlockLocations, 1279 * getFsStatus, getFileStatus, exists, and listStatus. 1280 * 1281 * Symlink targets are stored as given to createSymlink, assuming the 1282 * underlying file system is capable of storing a fully qualified URI. 1283 * Dangling symlinks are permitted. FileContext supports four types of 1284 * symlink targets, and resolves them as follows 1285 * <pre> 1286 * Given a path referring to a symlink of form: 1287 * 1288 * <---X---> 1289 * fs://host/A/B/link 1290 * <-----Y-----> 1291 * 1292 * In this path X is the scheme and authority that identify the file system, 1293 * and Y is the path leading up to the final path component "link". If Y is 1294 * a symlink itself then let Y' be the target of Y and X' be the scheme and 1295 * authority of Y'. Symlink targets may: 1296 * 1297 * 1. Fully qualified URIs 1298 * 1299 * fs://hostX/A/B/file Resolved according to the target file system. 1300 * 1301 * 2. Partially qualified URIs (eg scheme but no host) 1302 * 1303 * fs:///A/B/file Resolved according to the target file sytem. Eg resolving 1304 * a symlink to hdfs:///A results in an exception because 1305 * HDFS URIs must be fully qualified, while a symlink to 1306 * file:///A will not since Hadoop's local file systems 1307 * require partially qualified URIs. 1308 * 1309 * 3. Relative paths 1310 * 1311 * path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path 1312 * is "../B/file" then [Y'][path] is hdfs://host/B/file 1313 * 1314 * 4. Absolute paths 1315 * 1316 * path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path 1317 * is "/file" then [X][path] is hdfs://host/file 1318 * </pre> 1319 * 1320 * @param target the target of the symbolic link 1321 * @param link the path to be created that points to target 1322 * @param createParent if true then missing parent dirs are created if 1323 * false then parent must exist 1324 * 1325 * 1326 * @throws AccessControlException If access is denied 1327 * @throws FileAlreadyExistsException If file <code>linkcode> already exists 1328 * @throws FileNotFoundException If <code>target</code> does not exist 1329 * @throws ParentNotDirectoryException If parent of <code>link</code> is not a 1330 * directory. 1331 * @throws UnsupportedFileSystemException If file system for 1332 * <code>target</code> or <code>link</code> is not supported 1333 * @throws IOException If an I/O error occurred 1334 */ 1335 public void createSymlink(final Path target, final Path link, 1336 final boolean createParent) throws AccessControlException, 1337 FileAlreadyExistsException, FileNotFoundException, 1338 ParentNotDirectoryException, UnsupportedFileSystemException, 1339 IOException { 1340 final Path nonRelLink = fixRelativePart(link); 1341 new FSLinkResolver<Void>() { 1342 public Void next(final AbstractFileSystem fs, final Path p) 1343 throws IOException, UnresolvedLinkException { 1344 fs.createSymlink(target, p, createParent); 1345 return null; 1346 } 1347 }.resolve(this, nonRelLink); 1348 } 1349 1350 /** 1351 * List the statuses of the files/directories in the given path if the path is 1352 * a directory. 1353 * 1354 * @param f is the path 1355 * 1356 * @return an iterator that traverses statuses of the files/directories 1357 * in the given path 1358 * 1359 * @throws AccessControlException If access is denied 1360 * @throws FileNotFoundException If <code>f</code> does not exist 1361 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1362 * not supported 1363 * @throws IOException If an I/O error occurred 1364 * 1365 * Exceptions applicable to file systems accessed over RPC: 1366 * @throws RpcClientException If an exception occurred in the RPC client 1367 * @throws RpcServerException If an exception occurred in the RPC server 1368 * @throws UnexpectedServerException If server implementation throws 1369 * undeclared exception to RPC server 1370 */ 1371 public RemoteIterator<FileStatus> listStatus(final Path f) throws 1372 AccessControlException, FileNotFoundException, 1373 UnsupportedFileSystemException, IOException { 1374 final Path absF = fixRelativePart(f); 1375 return new FSLinkResolver<RemoteIterator<FileStatus>>() { 1376 public RemoteIterator<FileStatus> next( 1377 final AbstractFileSystem fs, final Path p) 1378 throws IOException, UnresolvedLinkException { 1379 return fs.listStatusIterator(p); 1380 } 1381 }.resolve(this, absF); 1382 } 1383 1384 /** 1385 * @return an iterator over the corrupt files under the given path 1386 * (may contain duplicates if a file has more than one corrupt block) 1387 * @throws IOException 1388 */ 1389 public RemoteIterator<Path> listCorruptFileBlocks(Path path) 1390 throws IOException { 1391 final Path absF = fixRelativePart(path); 1392 return new FSLinkResolver<RemoteIterator<Path>>() { 1393 @Override 1394 public RemoteIterator<Path> next(final AbstractFileSystem fs, 1395 final Path p) 1396 throws IOException, UnresolvedLinkException { 1397 return fs.listCorruptFileBlocks(p); 1398 } 1399 }.resolve(this, absF); 1400 } 1401 1402 /** 1403 * List the statuses of the files/directories in the given path if the path is 1404 * a directory. 1405 * Return the file's status and block locations If the path is a file. 1406 * 1407 * If a returned status is a file, it contains the file's block locations. 1408 * 1409 * @param f is the path 1410 * 1411 * @return an iterator that traverses statuses of the files/directories 1412 * in the given path 1413 * If any IO exception (for example the input directory gets deleted while 1414 * listing is being executed), next() or hasNext() of the returned iterator 1415 * may throw a RuntimeException with the io exception as the cause. 1416 * 1417 * @throws AccessControlException If access is denied 1418 * @throws FileNotFoundException If <code>f</code> does not exist 1419 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1420 * not supported 1421 * @throws IOException If an I/O error occurred 1422 * 1423 * Exceptions applicable to file systems accessed over RPC: 1424 * @throws RpcClientException If an exception occurred in the RPC client 1425 * @throws RpcServerException If an exception occurred in the RPC server 1426 * @throws UnexpectedServerException If server implementation throws 1427 * undeclared exception to RPC server 1428 */ 1429 public RemoteIterator<LocatedFileStatus> listLocatedStatus( 1430 final Path f) throws 1431 AccessControlException, FileNotFoundException, 1432 UnsupportedFileSystemException, IOException { 1433 final Path absF = fixRelativePart(f); 1434 return new FSLinkResolver<RemoteIterator<LocatedFileStatus>>() { 1435 public RemoteIterator<LocatedFileStatus> next( 1436 final AbstractFileSystem fs, final Path p) 1437 throws IOException, UnresolvedLinkException { 1438 return fs.listLocatedStatus(p); 1439 } 1440 }.resolve(this, absF); 1441 } 1442 1443 /** 1444 * Mark a path to be deleted on JVM shutdown. 1445 * 1446 * @param f the existing path to delete. 1447 * 1448 * @return true if deleteOnExit is successful, otherwise false. 1449 * 1450 * @throws AccessControlException If access is denied 1451 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1452 * not supported 1453 * @throws IOException If an I/O error occurred 1454 * 1455 * Exceptions applicable to file systems accessed over RPC: 1456 * @throws RpcClientException If an exception occurred in the RPC client 1457 * @throws RpcServerException If an exception occurred in the RPC server 1458 * @throws UnexpectedServerException If server implementation throws 1459 * undeclared exception to RPC server 1460 */ 1461 public boolean deleteOnExit(Path f) throws AccessControlException, 1462 IOException { 1463 if (!this.util().exists(f)) { 1464 return false; 1465 } 1466 synchronized (DELETE_ON_EXIT) { 1467 if (DELETE_ON_EXIT.isEmpty()) { 1468 ShutdownHookManager.get().addShutdownHook(FINALIZER, SHUTDOWN_HOOK_PRIORITY); 1469 } 1470 1471 Set<Path> set = DELETE_ON_EXIT.get(this); 1472 if (set == null) { 1473 set = new TreeSet<Path>(); 1474 DELETE_ON_EXIT.put(this, set); 1475 } 1476 set.add(f); 1477 } 1478 return true; 1479 } 1480 1481 private final Util util; 1482 public Util util() { 1483 return util; 1484 } 1485 1486 1487 /** 1488 * Utility/library methods built over the basic FileContext methods. 1489 * Since this are library functions, the oprtation are not atomic 1490 * and some of them may partially complete if other threads are making 1491 * changes to the same part of the name space. 1492 */ 1493 public class Util { 1494 /** 1495 * Does the file exist? 1496 * Note: Avoid using this method if you already have FileStatus in hand. 1497 * Instead reuse the FileStatus 1498 * @param f the file or dir to be checked 1499 * 1500 * @throws AccessControlException If access is denied 1501 * @throws IOException If an I/O error occurred 1502 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1503 * not supported 1504 * 1505 * Exceptions applicable to file systems accessed over RPC: 1506 * @throws RpcClientException If an exception occurred in the RPC client 1507 * @throws RpcServerException If an exception occurred in the RPC server 1508 * @throws UnexpectedServerException If server implementation throws 1509 * undeclared exception to RPC server 1510 */ 1511 public boolean exists(final Path f) throws AccessControlException, 1512 UnsupportedFileSystemException, IOException { 1513 try { 1514 FileStatus fs = FileContext.this.getFileStatus(f); 1515 assert fs != null; 1516 return true; 1517 } catch (FileNotFoundException e) { 1518 return false; 1519 } 1520 } 1521 1522 /** 1523 * Return a list of file status objects that corresponds to supplied paths 1524 * excluding those non-existent paths. 1525 * 1526 * @param paths list of paths we want information from 1527 * 1528 * @return a list of FileStatus objects 1529 * 1530 * @throws AccessControlException If access is denied 1531 * @throws IOException If an I/O error occurred 1532 * 1533 * Exceptions applicable to file systems accessed over RPC: 1534 * @throws RpcClientException If an exception occurred in the RPC client 1535 * @throws RpcServerException If an exception occurred in the RPC server 1536 * @throws UnexpectedServerException If server implementation throws 1537 * undeclared exception to RPC server 1538 */ 1539 private FileStatus[] getFileStatus(Path[] paths) 1540 throws AccessControlException, IOException { 1541 if (paths == null) { 1542 return null; 1543 } 1544 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length); 1545 for (int i = 0; i < paths.length; i++) { 1546 try { 1547 results.add(FileContext.this.getFileStatus(paths[i])); 1548 } catch (FileNotFoundException fnfe) { 1549 // ignoring 1550 } 1551 } 1552 return results.toArray(new FileStatus[results.size()]); 1553 } 1554 1555 1556 /** 1557 * Return the {@link ContentSummary} of path f. 1558 * @param f path 1559 * 1560 * @return the {@link ContentSummary} of path f. 1561 * 1562 * @throws AccessControlException If access is denied 1563 * @throws FileNotFoundException If <code>f</code> does not exist 1564 * @throws UnsupportedFileSystemException If file system for 1565 * <code>f</code> is not supported 1566 * @throws IOException If an I/O error occurred 1567 * 1568 * Exceptions applicable to file systems accessed over RPC: 1569 * @throws RpcClientException If an exception occurred in the RPC client 1570 * @throws RpcServerException If an exception occurred in the RPC server 1571 * @throws UnexpectedServerException If server implementation throws 1572 * undeclared exception to RPC server 1573 */ 1574 public ContentSummary getContentSummary(Path f) 1575 throws AccessControlException, FileNotFoundException, 1576 UnsupportedFileSystemException, IOException { 1577 FileStatus status = FileContext.this.getFileStatus(f); 1578 if (status.isFile()) { 1579 return new ContentSummary(status.getLen(), 1, 0); 1580 } 1581 long[] summary = {0, 0, 1}; 1582 RemoteIterator<FileStatus> statusIterator = 1583 FileContext.this.listStatus(f); 1584 while(statusIterator.hasNext()) { 1585 FileStatus s = statusIterator.next(); 1586 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : 1587 new ContentSummary(s.getLen(), 1, 0); 1588 summary[0] += c.getLength(); 1589 summary[1] += c.getFileCount(); 1590 summary[2] += c.getDirectoryCount(); 1591 } 1592 return new ContentSummary(summary[0], summary[1], summary[2]); 1593 } 1594 1595 /** 1596 * See {@link #listStatus(Path[], PathFilter)} 1597 */ 1598 public FileStatus[] listStatus(Path[] files) throws AccessControlException, 1599 FileNotFoundException, IOException { 1600 return listStatus(files, DEFAULT_FILTER); 1601 } 1602 1603 /** 1604 * Filter files/directories in the given path using the user-supplied path 1605 * filter. 1606 * 1607 * @param f is the path name 1608 * @param filter is the user-supplied path filter 1609 * 1610 * @return an array of FileStatus objects for the files under the given path 1611 * after applying the filter 1612 * 1613 * @throws AccessControlException If access is denied 1614 * @throws FileNotFoundException If <code>f</code> does not exist 1615 * @throws UnsupportedFileSystemException If file system for 1616 * <code>pathPattern</code> is not supported 1617 * @throws IOException If an I/O error occurred 1618 * 1619 * Exceptions applicable to file systems accessed over RPC: 1620 * @throws RpcClientException If an exception occurred in the RPC client 1621 * @throws RpcServerException If an exception occurred in the RPC server 1622 * @throws UnexpectedServerException If server implementation throws 1623 * undeclared exception to RPC server 1624 */ 1625 public FileStatus[] listStatus(Path f, PathFilter filter) 1626 throws AccessControlException, FileNotFoundException, 1627 UnsupportedFileSystemException, IOException { 1628 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1629 listStatus(results, f, filter); 1630 return results.toArray(new FileStatus[results.size()]); 1631 } 1632 1633 /** 1634 * Filter files/directories in the given list of paths using user-supplied 1635 * path filter. 1636 * 1637 * @param files is a list of paths 1638 * @param filter is the filter 1639 * 1640 * @return a list of statuses for the files under the given paths after 1641 * applying the filter 1642 * 1643 * @throws AccessControlException If access is denied 1644 * @throws FileNotFoundException If a file in <code>files</code> does not 1645 * exist 1646 * @throws IOException If an I/O error occurred 1647 * 1648 * Exceptions applicable to file systems accessed over RPC: 1649 * @throws RpcClientException If an exception occurred in the RPC client 1650 * @throws RpcServerException If an exception occurred in the RPC server 1651 * @throws UnexpectedServerException If server implementation throws 1652 * undeclared exception to RPC server 1653 */ 1654 public FileStatus[] listStatus(Path[] files, PathFilter filter) 1655 throws AccessControlException, FileNotFoundException, IOException { 1656 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1657 for (int i = 0; i < files.length; i++) { 1658 listStatus(results, files[i], filter); 1659 } 1660 return results.toArray(new FileStatus[results.size()]); 1661 } 1662 1663 /* 1664 * Filter files/directories in the given path using the user-supplied path 1665 * filter. Results are added to the given array <code>results</code>. 1666 */ 1667 private void listStatus(ArrayList<FileStatus> results, Path f, 1668 PathFilter filter) throws AccessControlException, 1669 FileNotFoundException, IOException { 1670 FileStatus[] listing = listStatus(f); 1671 if (listing != null) { 1672 for (int i = 0; i < listing.length; i++) { 1673 if (filter.accept(listing[i].getPath())) { 1674 results.add(listing[i]); 1675 } 1676 } 1677 } 1678 } 1679 1680 /** 1681 * List the statuses of the files/directories in the given path 1682 * if the path is a directory. 1683 * 1684 * @param f is the path 1685 * 1686 * @return an array that contains statuses of the files/directories 1687 * in the given path 1688 * 1689 * @throws AccessControlException If access is denied 1690 * @throws FileNotFoundException If <code>f</code> does not exist 1691 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1692 * not supported 1693 * @throws IOException If an I/O error occurred 1694 * 1695 * Exceptions applicable to file systems accessed over RPC: 1696 * @throws RpcClientException If an exception occurred in the RPC client 1697 * @throws RpcServerException If an exception occurred in the RPC server 1698 * @throws UnexpectedServerException If server implementation throws 1699 * undeclared exception to RPC server 1700 */ 1701 public FileStatus[] listStatus(final Path f) throws AccessControlException, 1702 FileNotFoundException, UnsupportedFileSystemException, 1703 IOException { 1704 final Path absF = fixRelativePart(f); 1705 return new FSLinkResolver<FileStatus[]>() { 1706 public FileStatus[] next(final AbstractFileSystem fs, final Path p) 1707 throws IOException, UnresolvedLinkException { 1708 return fs.listStatus(p); 1709 } 1710 }.resolve(FileContext.this, absF); 1711 } 1712 1713 /** 1714 * List the statuses and block locations of the files in the given path. 1715 * 1716 * If the path is a directory, 1717 * if recursive is false, returns files in the directory; 1718 * if recursive is true, return files in the subtree rooted at the path. 1719 * The subtree is traversed in the depth-first order. 1720 * If the path is a file, return the file's status and block locations. 1721 * Files across symbolic links are also returned. 1722 * 1723 * @param f is the path 1724 * @param recursive if the subdirectories need to be traversed recursively 1725 * 1726 * @return an iterator that traverses statuses of the files 1727 * If any IO exception (for example a sub-directory gets deleted while 1728 * listing is being executed), next() or hasNext() of the returned iterator 1729 * may throw a RuntimeException with the IO exception as the cause. 1730 * 1731 * @throws AccessControlException If access is denied 1732 * @throws FileNotFoundException If <code>f</code> does not exist 1733 * @throws UnsupportedFileSystemException If file system for <code>f</code> 1734 * is not supported 1735 * @throws IOException If an I/O error occurred 1736 * 1737 * Exceptions applicable to file systems accessed over RPC: 1738 * @throws RpcClientException If an exception occurred in the RPC client 1739 * @throws RpcServerException If an exception occurred in the RPC server 1740 * @throws UnexpectedServerException If server implementation throws 1741 * undeclared exception to RPC server 1742 */ 1743 public RemoteIterator<LocatedFileStatus> listFiles( 1744 final Path f, final boolean recursive) throws AccessControlException, 1745 FileNotFoundException, UnsupportedFileSystemException, 1746 IOException { 1747 return new RemoteIterator<LocatedFileStatus>() { 1748 private Stack<RemoteIterator<LocatedFileStatus>> itors = 1749 new Stack<RemoteIterator<LocatedFileStatus>>(); 1750 RemoteIterator<LocatedFileStatus> curItor = listLocatedStatus(f); 1751 LocatedFileStatus curFile; 1752 1753 /** 1754 * Returns <tt>true</tt> if the iterator has more files. 1755 * 1756 * @return <tt>true</tt> if the iterator has more files. 1757 * @throws AccessControlException if not allowed to access next 1758 * file's status or locations 1759 * @throws FileNotFoundException if next file does not exist any more 1760 * @throws UnsupportedFileSystemException if next file's 1761 * fs is unsupported 1762 * @throws IOException for all other IO errors 1763 * for example, NameNode is not avaialbe or 1764 * NameNode throws IOException due to an error 1765 * while getting the status or block locations 1766 */ 1767 @Override 1768 public boolean hasNext() throws IOException { 1769 while (curFile == null) { 1770 if (curItor.hasNext()) { 1771 handleFileStat(curItor.next()); 1772 } else if (!itors.empty()) { 1773 curItor = itors.pop(); 1774 } else { 1775 return false; 1776 } 1777 } 1778 return true; 1779 } 1780 1781 /** 1782 * Process the input stat. 1783 * If it is a file, return the file stat. 1784 * If it is a directory, traverse the directory if recursive is true; 1785 * ignore it if recursive is false. 1786 * If it is a symlink, resolve the symlink first and then process it 1787 * depending on if it is a file or directory. 1788 * @param stat input status 1789 * @throws AccessControlException if access is denied 1790 * @throws FileNotFoundException if file is not found 1791 * @throws UnsupportedFileSystemException if fs is not supported 1792 * @throws IOException for all other IO errors 1793 */ 1794 private void handleFileStat(LocatedFileStatus stat) 1795 throws IOException { 1796 if (stat.isFile()) { // file 1797 curFile = stat; 1798 } else if (stat.isSymlink()) { // symbolic link 1799 // resolve symbolic link 1800 FileStatus symstat = FileContext.this.getFileStatus( 1801 stat.getSymlink()); 1802 if (symstat.isFile() || (recursive && symstat.isDirectory())) { 1803 itors.push(curItor); 1804 curItor = listLocatedStatus(stat.getPath()); 1805 } 1806 } else if (recursive) { // directory 1807 itors.push(curItor); 1808 curItor = listLocatedStatus(stat.getPath()); 1809 } 1810 } 1811 1812 /** 1813 * Returns the next file's status with its block locations 1814 * 1815 * @throws AccessControlException if not allowed to access next 1816 * file's status or locations 1817 * @throws FileNotFoundException if next file does not exist any more 1818 * @throws UnsupportedFileSystemException if next file's 1819 * fs is unsupported 1820 * @throws IOException for all other IO errors 1821 * for example, NameNode is not avaialbe or 1822 * NameNode throws IOException due to an error 1823 * while getting the status or block locations 1824 */ 1825 @Override 1826 public LocatedFileStatus next() throws IOException { 1827 if (hasNext()) { 1828 LocatedFileStatus result = curFile; 1829 curFile = null; 1830 return result; 1831 } 1832 throw new java.util.NoSuchElementException("No more entry in " + f); 1833 } 1834 }; 1835 } 1836 1837 /** 1838 * <p>Return all the files that match filePattern and are not checksum 1839 * files. Results are sorted by their names. 1840 * 1841 * <p> 1842 * A filename pattern is composed of <i>regular</i> characters and 1843 * <i>special pattern matching</i> characters, which are: 1844 * 1845 * <dl> 1846 * <dd> 1847 * <dl> 1848 * <p> 1849 * <dt> <tt> ? </tt> 1850 * <dd> Matches any single character. 1851 * 1852 * <p> 1853 * <dt> <tt> * </tt> 1854 * <dd> Matches zero or more characters. 1855 * 1856 * <p> 1857 * <dt> <tt> [<i>abc</i>] </tt> 1858 * <dd> Matches a single character from character set 1859 * <tt>{<i>a,b,c</i>}</tt>. 1860 * 1861 * <p> 1862 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt> 1863 * <dd> Matches a single character from the character range 1864 * <tt>{<i>a...b</i>}</tt>. Note: character <tt><i>a</i></tt> must be 1865 * lexicographically less than or equal to character <tt><i>b</i></tt>. 1866 * 1867 * <p> 1868 * <dt> <tt> [^<i>a</i>] </tt> 1869 * <dd> Matches a single char that is not from character set or range 1870 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur 1871 * immediately to the right of the opening bracket. 1872 * 1873 * <p> 1874 * <dt> <tt> \<i>c</i> </tt> 1875 * <dd> Removes (escapes) any special meaning of character <i>c</i>. 1876 * 1877 * <p> 1878 * <dt> <tt> {ab,cd} </tt> 1879 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> 1880 * 1881 * <p> 1882 * <dt> <tt> {ab,c{de,fh}} </tt> 1883 * <dd> Matches a string from string set <tt>{<i>ab, cde, cfh</i>}</tt> 1884 * 1885 * </dl> 1886 * </dd> 1887 * </dl> 1888 * 1889 * @param pathPattern a regular expression specifying a pth pattern 1890 * 1891 * @return an array of paths that match the path pattern 1892 * 1893 * @throws AccessControlException If access is denied 1894 * @throws UnsupportedFileSystemException If file system for 1895 * <code>pathPattern</code> is not supported 1896 * @throws IOException If an I/O error occurred 1897 * 1898 * Exceptions applicable to file systems accessed over RPC: 1899 * @throws RpcClientException If an exception occurred in the RPC client 1900 * @throws RpcServerException If an exception occurred in the RPC server 1901 * @throws UnexpectedServerException If server implementation throws 1902 * undeclared exception to RPC server 1903 */ 1904 public FileStatus[] globStatus(Path pathPattern) 1905 throws AccessControlException, UnsupportedFileSystemException, 1906 IOException { 1907 return globStatus(pathPattern, DEFAULT_FILTER); 1908 } 1909 1910 /** 1911 * Return an array of FileStatus objects whose path names match pathPattern 1912 * and is accepted by the user-supplied path filter. Results are sorted by 1913 * their path names. 1914 * Return null if pathPattern has no glob and the path does not exist. 1915 * Return an empty array if pathPattern has a glob and no path matches it. 1916 * 1917 * @param pathPattern regular expression specifying the path pattern 1918 * @param filter user-supplied path filter 1919 * 1920 * @return an array of FileStatus objects 1921 * 1922 * @throws AccessControlException If access is denied 1923 * @throws UnsupportedFileSystemException If file system for 1924 * <code>pathPattern</code> is not supported 1925 * @throws IOException If an I/O error occurred 1926 * 1927 * Exceptions applicable to file systems accessed over RPC: 1928 * @throws RpcClientException If an exception occurred in the RPC client 1929 * @throws RpcServerException If an exception occurred in the RPC server 1930 * @throws UnexpectedServerException If server implementation throws 1931 * undeclared exception to RPC server 1932 */ 1933 public FileStatus[] globStatus(final Path pathPattern, 1934 final PathFilter filter) throws AccessControlException, 1935 UnsupportedFileSystemException, IOException { 1936 URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri(); 1937 1938 String filename = pathPattern.toUri().getPath(); 1939 1940 List<String> filePatterns = GlobExpander.expand(filename); 1941 if (filePatterns.size() == 1) { 1942 Path absPathPattern = fixRelativePart(pathPattern); 1943 return globStatusInternal(uri, new Path(absPathPattern.toUri() 1944 .getPath()), filter); 1945 } else { 1946 List<FileStatus> results = new ArrayList<FileStatus>(); 1947 for (String iFilePattern : filePatterns) { 1948 Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern)); 1949 FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter); 1950 for (FileStatus file : files) { 1951 results.add(file); 1952 } 1953 } 1954 return results.toArray(new FileStatus[results.size()]); 1955 } 1956 } 1957 1958 /** 1959 * 1960 * @param uri for all the inPathPattern 1961 * @param inPathPattern - without the scheme & authority (take from uri) 1962 * @param filter 1963 * 1964 * @return an array of FileStatus objects 1965 * 1966 * @throws AccessControlException If access is denied 1967 * @throws IOException If an I/O error occurred 1968 */ 1969 private FileStatus[] globStatusInternal(final URI uri, 1970 final Path inPathPattern, final PathFilter filter) 1971 throws AccessControlException, IOException 1972 { 1973 Path[] parents = new Path[1]; 1974 int level = 0; 1975 1976 assert(inPathPattern.toUri().getScheme() == null && 1977 inPathPattern.toUri().getAuthority() == null && 1978 inPathPattern.isUriPathAbsolute()); 1979 1980 1981 String filename = inPathPattern.toUri().getPath(); 1982 1983 // path has only zero component 1984 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) { 1985 Path p = inPathPattern.makeQualified(uri, null); 1986 return getFileStatus(new Path[]{p}); 1987 } 1988 1989 // path has at least one component 1990 String[] components = filename.split(Path.SEPARATOR); 1991 1992 // Path is absolute, first component is "/" hence first component 1993 // is the uri root 1994 parents[0] = new Path(new Path(uri), new Path("/")); 1995 level = 1; 1996 1997 // glob the paths that match the parent path, ie. [0, components.length-1] 1998 boolean[] hasGlob = new boolean[]{false}; 1999 Path[] relParentPaths = 2000 globPathsLevel(parents, components, level, hasGlob); 2001 FileStatus[] results; 2002 2003 if (relParentPaths == null || relParentPaths.length == 0) { 2004 results = null; 2005 } else { 2006 // fix the pathes to be abs 2007 Path[] parentPaths = new Path [relParentPaths.length]; 2008 for(int i=0; i<relParentPaths.length; i++) { 2009 parentPaths[i] = relParentPaths[i].makeQualified(uri, null); 2010 } 2011 2012 // Now work on the last component of the path 2013 GlobFilter fp = 2014 new GlobFilter(components[components.length - 1], filter); 2015 if (fp.hasPattern()) { // last component has a pattern 2016 // list parent directories and then glob the results 2017 try { 2018 results = listStatus(parentPaths, fp); 2019 } catch (FileNotFoundException e) { 2020 results = null; 2021 } 2022 hasGlob[0] = true; 2023 } else { // last component does not have a pattern 2024 // get all the path names 2025 ArrayList<Path> filteredPaths = 2026 new ArrayList<Path>(parentPaths.length); 2027 for (int i = 0; i < parentPaths.length; i++) { 2028 parentPaths[i] = new Path(parentPaths[i], 2029 components[components.length - 1]); 2030 if (fp.accept(parentPaths[i])) { 2031 filteredPaths.add(parentPaths[i]); 2032 } 2033 } 2034 // get all their statuses 2035 results = getFileStatus( 2036 filteredPaths.toArray(new Path[filteredPaths.size()])); 2037 } 2038 } 2039 2040 // Decide if the pathPattern contains a glob or not 2041 if (results == null) { 2042 if (hasGlob[0]) { 2043 results = new FileStatus[0]; 2044 } 2045 } else { 2046 if (results.length == 0) { 2047 if (!hasGlob[0]) { 2048 results = null; 2049 } 2050 } else { 2051 Arrays.sort(results); 2052 } 2053 } 2054 return results; 2055 } 2056 2057 /* 2058 * For a path of N components, return a list of paths that match the 2059 * components [<code>level</code>, <code>N-1</code>]. 2060 */ 2061 private Path[] globPathsLevel(Path[] parents, String[] filePattern, 2062 int level, boolean[] hasGlob) throws AccessControlException, 2063 FileNotFoundException, IOException { 2064 if (level == filePattern.length - 1) { 2065 return parents; 2066 } 2067 if (parents == null || parents.length == 0) { 2068 return null; 2069 } 2070 GlobFilter fp = new GlobFilter(filePattern[level]); 2071 if (fp.hasPattern()) { 2072 try { 2073 parents = FileUtil.stat2Paths(listStatus(parents, fp)); 2074 } catch (FileNotFoundException e) { 2075 parents = null; 2076 } 2077 hasGlob[0] = true; 2078 } else { 2079 for (int i = 0; i < parents.length; i++) { 2080 parents[i] = new Path(parents[i], filePattern[level]); 2081 } 2082 } 2083 return globPathsLevel(parents, filePattern, level + 1, hasGlob); 2084 } 2085 2086 /** 2087 * Copy file from src to dest. See 2088 * {@link #copy(Path, Path, boolean, boolean)} 2089 */ 2090 public boolean copy(final Path src, final Path dst) 2091 throws AccessControlException, FileAlreadyExistsException, 2092 FileNotFoundException, ParentNotDirectoryException, 2093 UnsupportedFileSystemException, IOException { 2094 return copy(src, dst, false, false); 2095 } 2096 2097 /** 2098 * Copy from src to dst, optionally deleting src and overwriting dst. 2099 * @param src 2100 * @param dst 2101 * @param deleteSource - delete src if true 2102 * @param overwrite overwrite dst if true; throw IOException if dst exists 2103 * and overwrite is false. 2104 * 2105 * @return true if copy is successful 2106 * 2107 * @throws AccessControlException If access is denied 2108 * @throws FileAlreadyExistsException If <code>dst</code> already exists 2109 * @throws FileNotFoundException If <code>src</code> does not exist 2110 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not 2111 * a directory 2112 * @throws UnsupportedFileSystemException If file system for 2113 * <code>src</code> or <code>dst</code> is not supported 2114 * @throws IOException If an I/O error occurred 2115 * 2116 * Exceptions applicable to file systems accessed over RPC: 2117 * @throws RpcClientException If an exception occurred in the RPC client 2118 * @throws RpcServerException If an exception occurred in the RPC server 2119 * @throws UnexpectedServerException If server implementation throws 2120 * undeclared exception to RPC server 2121 * 2122 * RuntimeExceptions: 2123 * @throws InvalidPathException If path <code>dst</code> is invalid 2124 */ 2125 public boolean copy(final Path src, final Path dst, boolean deleteSource, 2126 boolean overwrite) throws AccessControlException, 2127 FileAlreadyExistsException, FileNotFoundException, 2128 ParentNotDirectoryException, UnsupportedFileSystemException, 2129 IOException { 2130 checkNotSchemeWithRelative(src); 2131 checkNotSchemeWithRelative(dst); 2132 Path qSrc = makeQualified(src); 2133 Path qDst = makeQualified(dst); 2134 checkDest(qSrc.getName(), qDst, overwrite); 2135 FileStatus fs = FileContext.this.getFileStatus(qSrc); 2136 if (fs.isDirectory()) { 2137 checkDependencies(qSrc, qDst); 2138 mkdir(qDst, FsPermission.getDefault(), true); 2139 FileStatus[] contents = listStatus(qSrc); 2140 for (FileStatus content : contents) { 2141 copy(makeQualified(content.getPath()), makeQualified(new Path(qDst, 2142 content.getPath().getName())), deleteSource, overwrite); 2143 } 2144 } else { 2145 InputStream in=null; 2146 OutputStream out = null; 2147 try { 2148 in = open(qSrc); 2149 EnumSet<CreateFlag> createFlag = overwrite ? EnumSet.of( 2150 CreateFlag.CREATE, CreateFlag.OVERWRITE) : 2151 EnumSet.of(CreateFlag.CREATE); 2152 out = create(qDst, createFlag); 2153 IOUtils.copyBytes(in, out, conf, true); 2154 } catch (IOException e) { 2155 IOUtils.closeStream(out); 2156 IOUtils.closeStream(in); 2157 throw e; 2158 } 2159 } 2160 if (deleteSource) { 2161 return delete(qSrc, true); 2162 } else { 2163 return true; 2164 } 2165 } 2166 } 2167 2168 /** 2169 * Check if copying srcName to dst would overwrite an existing 2170 * file or directory. 2171 * @param srcName File or directory to be copied. 2172 * @param dst Destination to copy srcName to. 2173 * @param overwrite Whether it's ok to overwrite an existing file. 2174 * @throws AccessControlException If access is denied. 2175 * @throws IOException If dst is an existing directory, or dst is an 2176 * existing file and the overwrite option is not passed. 2177 */ 2178 private void checkDest(String srcName, Path dst, boolean overwrite) 2179 throws AccessControlException, IOException { 2180 try { 2181 FileStatus dstFs = getFileStatus(dst); 2182 if (dstFs.isDirectory()) { 2183 if (null == srcName) { 2184 throw new IOException("Target " + dst + " is a directory"); 2185 } 2186 // Recurse to check if dst/srcName exists. 2187 checkDest(null, new Path(dst, srcName), overwrite); 2188 } else if (!overwrite) { 2189 throw new IOException("Target " + new Path(dst, srcName) 2190 + " already exists"); 2191 } 2192 } catch (FileNotFoundException e) { 2193 // dst does not exist - OK to copy. 2194 } 2195 } 2196 2197 // 2198 // If the destination is a subdirectory of the source, then 2199 // generate exception 2200 // 2201 private static void checkDependencies(Path qualSrc, Path qualDst) 2202 throws IOException { 2203 if (isSameFS(qualSrc, qualDst)) { 2204 String srcq = qualSrc.toString() + Path.SEPARATOR; 2205 String dstq = qualDst.toString() + Path.SEPARATOR; 2206 if (dstq.startsWith(srcq)) { 2207 if (srcq.length() == dstq.length()) { 2208 throw new IOException("Cannot copy " + qualSrc + " to itself."); 2209 } else { 2210 throw new IOException("Cannot copy " + qualSrc + 2211 " to its subdirectory " + qualDst); 2212 } 2213 } 2214 } 2215 } 2216 2217 /** 2218 * Are qualSrc and qualDst of the same file system? 2219 * @param qualPath1 - fully qualified path 2220 * @param qualPath2 - fully qualified path 2221 * @return 2222 */ 2223 private static boolean isSameFS(Path qualPath1, Path qualPath2) { 2224 URI srcUri = qualPath1.toUri(); 2225 URI dstUri = qualPath2.toUri(); 2226 return (srcUri.getScheme().equals(dstUri.getScheme()) && 2227 !(srcUri.getAuthority() != null && dstUri.getAuthority() != null && srcUri 2228 .getAuthority().equals(dstUri.getAuthority()))); 2229 } 2230 2231 /** 2232 * Deletes all the paths in deleteOnExit on JVM shutdown. 2233 */ 2234 static class FileContextFinalizer implements Runnable { 2235 public synchronized void run() { 2236 processDeleteOnExit(); 2237 } 2238 } 2239 2240 /** 2241 * Resolves all symbolic links in the specified path. 2242 * Returns the new path object. 2243 */ 2244 protected Path resolve(final Path f) throws FileNotFoundException, 2245 UnresolvedLinkException, AccessControlException, IOException { 2246 return new FSLinkResolver<Path>() { 2247 public Path next(final AbstractFileSystem fs, final Path p) 2248 throws IOException, UnresolvedLinkException { 2249 return fs.resolvePath(p); 2250 } 2251 }.resolve(this, f); 2252 } 2253 2254 /** 2255 * Resolves all symbolic links in the specified path leading up 2256 * to, but not including the final path component. 2257 * @param f path to resolve 2258 * @return the new path object. 2259 */ 2260 protected Path resolveIntermediate(final Path f) throws IOException { 2261 return new FSLinkResolver<FileStatus>() { 2262 public FileStatus next(final AbstractFileSystem fs, final Path p) 2263 throws IOException, UnresolvedLinkException { 2264 return fs.getFileLinkStatus(p); 2265 } 2266 }.resolve(this, f).getPath(); 2267 } 2268 2269 /** 2270 * Returns the list of AbstractFileSystems accessed in the path. The list may 2271 * contain more than one AbstractFileSystems objects in case of symlinks. 2272 * 2273 * @param f 2274 * Path which needs to be resolved 2275 * @return List of AbstractFileSystems accessed in the path 2276 * @throws IOException 2277 */ 2278 Set<AbstractFileSystem> resolveAbstractFileSystems(final Path f) 2279 throws IOException { 2280 final Path absF = fixRelativePart(f); 2281 final HashSet<AbstractFileSystem> result 2282 = new HashSet<AbstractFileSystem>(); 2283 new FSLinkResolver<Void>() { 2284 public Void next(final AbstractFileSystem fs, final Path p) 2285 throws IOException, UnresolvedLinkException { 2286 result.add(fs); 2287 fs.getFileStatus(p); 2288 return null; 2289 } 2290 }.resolve(this, absF); 2291 return result; 2292 } 2293 2294 /** 2295 * Class used to perform an operation on and resolve symlinks in a 2296 * path. The operation may potentially span multiple file systems. 2297 */ 2298 protected abstract class FSLinkResolver<T> { 2299 // The maximum number of symbolic link components in a path 2300 private static final int MAX_PATH_LINKS = 32; 2301 2302 /** 2303 * Generic helper function overridden on instantiation to perform a 2304 * specific operation on the given file system using the given path 2305 * which may result in an UnresolvedLinkException. 2306 * @param fs AbstractFileSystem to perform the operation on. 2307 * @param p Path given the file system. 2308 * @return Generic type determined by the specific implementation. 2309 * @throws UnresolvedLinkException If symbolic link <code>path</code> could 2310 * not be resolved 2311 * @throws IOException an I/O error occured 2312 */ 2313 public abstract T next(final AbstractFileSystem fs, final Path p) 2314 throws IOException, UnresolvedLinkException; 2315 2316 /** 2317 * Performs the operation specified by the next function, calling it 2318 * repeatedly until all symlinks in the given path are resolved. 2319 * @param fc FileContext used to access file systems. 2320 * @param p The path to resolve symlinks in. 2321 * @return Generic type determined by the implementation of next. 2322 * @throws IOException 2323 */ 2324 public T resolve(final FileContext fc, Path p) throws IOException { 2325 int count = 0; 2326 T in = null; 2327 Path first = p; 2328 // NB: More than one AbstractFileSystem can match a scheme, eg 2329 // "file" resolves to LocalFs but could have come by RawLocalFs. 2330 AbstractFileSystem fs = fc.getFSofPath(p); 2331 2332 // Loop until all symlinks are resolved or the limit is reached 2333 for (boolean isLink = true; isLink;) { 2334 try { 2335 in = next(fs, p); 2336 isLink = false; 2337 } catch (UnresolvedLinkException e) { 2338 if (count++ > MAX_PATH_LINKS) { 2339 throw new IOException("Possible cyclic loop while " + 2340 "following symbolic link " + first); 2341 } 2342 // Resolve the first unresolved path component 2343 p = qualifySymlinkTarget(fs, p, fs.getLinkTarget(p)); 2344 fs = fc.getFSofPath(p); 2345 } 2346 } 2347 return in; 2348 } 2349 } 2350 2351 /** 2352 * Get the statistics for a particular file system 2353 * 2354 * @param uri 2355 * the uri to lookup the statistics. Only scheme and authority part 2356 * of the uri are used as the key to store and lookup. 2357 * @return a statistics object 2358 */ 2359 public static Statistics getStatistics(URI uri) { 2360 return AbstractFileSystem.getStatistics(uri); 2361 } 2362 2363 /** 2364 * Clears all the statistics stored in AbstractFileSystem, for all the file 2365 * systems. 2366 */ 2367 public static void clearStatistics() { 2368 AbstractFileSystem.clearStatistics(); 2369 } 2370 2371 /** 2372 * Prints the statistics to standard output. File System is identified by the 2373 * scheme and authority. 2374 */ 2375 public static void printStatistics() { 2376 AbstractFileSystem.printStatistics(); 2377 } 2378 2379 /** 2380 * @return Map of uri and statistics for each filesystem instantiated. The uri 2381 * consists of scheme and authority for the filesystem. 2382 */ 2383 public static Map<URI, Statistics> getAllStatistics() { 2384 return AbstractFileSystem.getAllStatistics(); 2385 } 2386 2387 /** 2388 * Get delegation tokens for the file systems accessed for a given 2389 * path. 2390 * @param p Path for which delegations tokens are requested. 2391 * @param renewer the account name that is allowed to renew the token. 2392 * @return List of delegation tokens. 2393 * @throws IOException 2394 */ 2395 @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" }) 2396 public List<Token<?>> getDelegationTokens( 2397 Path p, String renewer) throws IOException { 2398 Set<AbstractFileSystem> afsSet = resolveAbstractFileSystems(p); 2399 List<Token<?>> tokenList = 2400 new ArrayList<Token<?>>(); 2401 for (AbstractFileSystem afs : afsSet) { 2402 List<Token<?>> afsTokens = afs.getDelegationTokens(renewer); 2403 tokenList.addAll(afsTokens); 2404 } 2405 return tokenList; 2406 } 2407 }