001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.fs; 019 020 import java.io.Closeable; 021 import java.io.FileNotFoundException; 022 import java.io.IOException; 023 import java.net.URI; 024 import java.net.URISyntaxException; 025 import java.security.PrivilegedExceptionAction; 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.Collections; 029 import java.util.EnumSet; 030 import java.util.HashMap; 031 import java.util.HashSet; 032 import java.util.IdentityHashMap; 033 import java.util.Iterator; 034 import java.util.List; 035 import java.util.Map; 036 import java.util.NoSuchElementException; 037 import java.util.ServiceLoader; 038 import java.util.Set; 039 import java.util.Stack; 040 import java.util.TreeSet; 041 import java.util.concurrent.atomic.AtomicInteger; 042 import java.util.concurrent.atomic.AtomicLong; 043 044 import org.apache.commons.logging.Log; 045 import org.apache.commons.logging.LogFactory; 046 import org.apache.hadoop.classification.InterfaceAudience; 047 import org.apache.hadoop.classification.InterfaceStability; 048 import org.apache.hadoop.conf.Configuration; 049 import org.apache.hadoop.conf.Configured; 050 import org.apache.hadoop.fs.Options.ChecksumOpt; 051 import org.apache.hadoop.fs.Options.Rename; 052 import org.apache.hadoop.fs.permission.FsPermission; 053 import org.apache.hadoop.io.MultipleIOException; 054 import org.apache.hadoop.io.Text; 055 import org.apache.hadoop.net.NetUtils; 056 import org.apache.hadoop.security.AccessControlException; 057 import org.apache.hadoop.security.Credentials; 058 import org.apache.hadoop.security.SecurityUtil; 059 import org.apache.hadoop.security.UserGroupInformation; 060 import org.apache.hadoop.security.token.Token; 061 import org.apache.hadoop.util.DataChecksum; 062 import org.apache.hadoop.util.Progressable; 063 import org.apache.hadoop.util.ReflectionUtils; 064 import org.apache.hadoop.util.ShutdownHookManager; 065 066 import com.google.common.annotations.VisibleForTesting; 067 068 /**************************************************************** 069 * An abstract base class for a fairly generic filesystem. It 070 * may be implemented as a distributed filesystem, or as a "local" 071 * one that reflects the locally-connected disk. The local version 072 * exists for small Hadoop instances and for testing. 073 * 074 * <p> 075 * 076 * All user code that may potentially use the Hadoop Distributed 077 * File System should be written to use a FileSystem object. The 078 * Hadoop DFS is a multi-machine system that appears as a single 079 * disk. It's useful because of its fault tolerance and potentially 080 * very large capacity. 081 * 082 * <p> 083 * The local implementation is {@link LocalFileSystem} and distributed 084 * implementation is DistributedFileSystem. 085 *****************************************************************/ 086 @InterfaceAudience.Public 087 @InterfaceStability.Stable 088 public abstract class FileSystem extends Configured implements Closeable { 089 public static final String FS_DEFAULT_NAME_KEY = 090 CommonConfigurationKeys.FS_DEFAULT_NAME_KEY; 091 public static final String DEFAULT_FS = 092 CommonConfigurationKeys.FS_DEFAULT_NAME_DEFAULT; 093 094 public static final Log LOG = LogFactory.getLog(FileSystem.class); 095 096 /** 097 * Priority of the FileSystem shutdown hook. 098 */ 099 public static final int SHUTDOWN_HOOK_PRIORITY = 10; 100 101 /** FileSystem cache */ 102 static final Cache CACHE = new Cache(); 103 104 /** The key this instance is stored under in the cache. */ 105 private Cache.Key key; 106 107 /** Recording statistics per a FileSystem class */ 108 private static final Map<Class<? extends FileSystem>, Statistics> 109 statisticsTable = 110 new IdentityHashMap<Class<? extends FileSystem>, Statistics>(); 111 112 /** 113 * The statistics for this file system. 114 */ 115 protected Statistics statistics; 116 117 /** 118 * A cache of files that should be deleted when filsystem is closed 119 * or the JVM is exited. 120 */ 121 private Set<Path> deleteOnExit = new TreeSet<Path>(); 122 123 boolean resolveSymlinks; 124 /** 125 * This method adds a file system for testing so that we can find it later. It 126 * is only for testing. 127 * @param uri the uri to store it under 128 * @param conf the configuration to store it under 129 * @param fs the file system to store 130 * @throws IOException 131 */ 132 static void addFileSystemForTesting(URI uri, Configuration conf, 133 FileSystem fs) throws IOException { 134 CACHE.map.put(new Cache.Key(uri, conf), fs); 135 } 136 137 /** 138 * Get a filesystem instance based on the uri, the passed 139 * configuration and the user 140 * @param uri of the filesystem 141 * @param conf the configuration to use 142 * @param user to perform the get as 143 * @return the filesystem instance 144 * @throws IOException 145 * @throws InterruptedException 146 */ 147 public static FileSystem get(final URI uri, final Configuration conf, 148 final String user) throws IOException, InterruptedException { 149 String ticketCachePath = 150 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH); 151 UserGroupInformation ugi = 152 UserGroupInformation.getBestUGI(ticketCachePath, user); 153 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 154 @Override 155 public FileSystem run() throws IOException { 156 return get(uri, conf); 157 } 158 }); 159 } 160 161 /** 162 * Returns the configured filesystem implementation. 163 * @param conf the configuration to use 164 */ 165 public static FileSystem get(Configuration conf) throws IOException { 166 return get(getDefaultUri(conf), conf); 167 } 168 169 /** Get the default filesystem URI from a configuration. 170 * @param conf the configuration to use 171 * @return the uri of the default filesystem 172 */ 173 public static URI getDefaultUri(Configuration conf) { 174 return URI.create(fixName(conf.get(FS_DEFAULT_NAME_KEY, DEFAULT_FS))); 175 } 176 177 /** Set the default filesystem URI in a configuration. 178 * @param conf the configuration to alter 179 * @param uri the new default filesystem uri 180 */ 181 public static void setDefaultUri(Configuration conf, URI uri) { 182 conf.set(FS_DEFAULT_NAME_KEY, uri.toString()); 183 } 184 185 /** Set the default filesystem URI in a configuration. 186 * @param conf the configuration to alter 187 * @param uri the new default filesystem uri 188 */ 189 public static void setDefaultUri(Configuration conf, String uri) { 190 setDefaultUri(conf, URI.create(fixName(uri))); 191 } 192 193 /** Called after a new FileSystem instance is constructed. 194 * @param name a uri whose authority section names the host, port, etc. 195 * for this FileSystem 196 * @param conf the configuration 197 */ 198 public void initialize(URI name, Configuration conf) throws IOException { 199 statistics = getStatistics(name.getScheme(), getClass()); 200 resolveSymlinks = conf.getBoolean( 201 CommonConfigurationKeys.FS_CLIENT_RESOLVE_REMOTE_SYMLINKS_KEY, 202 CommonConfigurationKeys.FS_CLIENT_RESOLVE_REMOTE_SYMLINKS_DEFAULT); 203 } 204 205 /** 206 * Return the protocol scheme for the FileSystem. 207 * <p/> 208 * This implementation throws an <code>UnsupportedOperationException</code>. 209 * 210 * @return the protocol scheme for the FileSystem. 211 */ 212 public String getScheme() { 213 throw new UnsupportedOperationException("Not implemented by the " + getClass().getSimpleName() + " FileSystem implementation"); 214 } 215 216 /** Returns a URI whose scheme and authority identify this FileSystem.*/ 217 public abstract URI getUri(); 218 219 /** 220 * Return a canonicalized form of this FileSystem's URI. 221 * 222 * The default implementation simply calls {@link #canonicalizeUri(URI)} 223 * on the filesystem's own URI, so subclasses typically only need to 224 * implement that method. 225 * 226 * @see #canonicalizeUri(URI) 227 */ 228 protected URI getCanonicalUri() { 229 return canonicalizeUri(getUri()); 230 } 231 232 /** 233 * Canonicalize the given URI. 234 * 235 * This is filesystem-dependent, but may for example consist of 236 * canonicalizing the hostname using DNS and adding the default 237 * port if not specified. 238 * 239 * The default implementation simply fills in the default port if 240 * not specified and if the filesystem has a default port. 241 * 242 * @return URI 243 * @see NetUtils#getCanonicalUri(URI, int) 244 */ 245 protected URI canonicalizeUri(URI uri) { 246 if (uri.getPort() == -1 && getDefaultPort() > 0) { 247 // reconstruct the uri with the default port set 248 try { 249 uri = new URI(uri.getScheme(), uri.getUserInfo(), 250 uri.getHost(), getDefaultPort(), 251 uri.getPath(), uri.getQuery(), uri.getFragment()); 252 } catch (URISyntaxException e) { 253 // Should never happen! 254 throw new AssertionError("Valid URI became unparseable: " + 255 uri); 256 } 257 } 258 259 return uri; 260 } 261 262 /** 263 * Get the default port for this file system. 264 * @return the default port or 0 if there isn't one 265 */ 266 protected int getDefaultPort() { 267 return 0; 268 } 269 270 protected static FileSystem getFSofPath(final Path absOrFqPath, 271 final Configuration conf) 272 throws UnsupportedFileSystemException, IOException { 273 absOrFqPath.checkNotSchemeWithRelative(); 274 absOrFqPath.checkNotRelative(); 275 276 // Uses the default file system if not fully qualified 277 return get(absOrFqPath.toUri(), conf); 278 } 279 280 /** 281 * Get a canonical service name for this file system. The token cache is 282 * the only user of the canonical service name, and uses it to lookup this 283 * filesystem's service tokens. 284 * If file system provides a token of its own then it must have a canonical 285 * name, otherwise canonical name can be null. 286 * 287 * Default Impl: If the file system has child file systems 288 * (such as an embedded file system) then it is assumed that the fs has no 289 * tokens of its own and hence returns a null name; otherwise a service 290 * name is built using Uri and port. 291 * 292 * @return a service string that uniquely identifies this file system, null 293 * if the filesystem does not implement tokens 294 * @see SecurityUtil#buildDTServiceName(URI, int) 295 */ 296 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 297 public String getCanonicalServiceName() { 298 return (getChildFileSystems() == null) 299 ? SecurityUtil.buildDTServiceName(getUri(), getDefaultPort()) 300 : null; 301 } 302 303 /** @deprecated call #getUri() instead.*/ 304 @Deprecated 305 public String getName() { return getUri().toString(); } 306 307 /** @deprecated call #get(URI,Configuration) instead. */ 308 @Deprecated 309 public static FileSystem getNamed(String name, Configuration conf) 310 throws IOException { 311 return get(URI.create(fixName(name)), conf); 312 } 313 314 /** Update old-format filesystem names, for back-compatibility. This should 315 * eventually be replaced with a checkName() method that throws an exception 316 * for old-format names. */ 317 private static String fixName(String name) { 318 // convert old-format name to new-format name 319 if (name.equals("local")) { // "local" is now "file:///". 320 LOG.warn("\"local\" is a deprecated filesystem name." 321 +" Use \"file:///\" instead."); 322 name = "file:///"; 323 } else if (name.indexOf('/')==-1) { // unqualified is "hdfs://" 324 LOG.warn("\""+name+"\" is a deprecated filesystem name." 325 +" Use \"hdfs://"+name+"/\" instead."); 326 name = "hdfs://"+name; 327 } 328 return name; 329 } 330 331 /** 332 * Get the local file system. 333 * @param conf the configuration to configure the file system with 334 * @return a LocalFileSystem 335 */ 336 public static LocalFileSystem getLocal(Configuration conf) 337 throws IOException { 338 return (LocalFileSystem)get(LocalFileSystem.NAME, conf); 339 } 340 341 /** Returns the FileSystem for this URI's scheme and authority. The scheme 342 * of the URI determines a configuration property name, 343 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 344 * The entire URI is passed to the FileSystem instance's initialize method. 345 */ 346 public static FileSystem get(URI uri, Configuration conf) throws IOException { 347 String scheme = uri.getScheme(); 348 String authority = uri.getAuthority(); 349 350 if (scheme == null && authority == null) { // use default FS 351 return get(conf); 352 } 353 354 if (scheme != null && authority == null) { // no authority 355 URI defaultUri = getDefaultUri(conf); 356 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 357 && defaultUri.getAuthority() != null) { // & default has authority 358 return get(defaultUri, conf); // return default 359 } 360 } 361 362 String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme); 363 if (conf.getBoolean(disableCacheName, false)) { 364 return createFileSystem(uri, conf); 365 } 366 367 return CACHE.get(uri, conf); 368 } 369 370 /** 371 * Returns the FileSystem for this URI's scheme and authority and the 372 * passed user. Internally invokes {@link #newInstance(URI, Configuration)} 373 * @param uri of the filesystem 374 * @param conf the configuration to use 375 * @param user to perform the get as 376 * @return filesystem instance 377 * @throws IOException 378 * @throws InterruptedException 379 */ 380 public static FileSystem newInstance(final URI uri, final Configuration conf, 381 final String user) throws IOException, InterruptedException { 382 String ticketCachePath = 383 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH); 384 UserGroupInformation ugi = 385 UserGroupInformation.getBestUGI(ticketCachePath, user); 386 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 387 @Override 388 public FileSystem run() throws IOException { 389 return newInstance(uri,conf); 390 } 391 }); 392 } 393 /** Returns the FileSystem for this URI's scheme and authority. The scheme 394 * of the URI determines a configuration property name, 395 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 396 * The entire URI is passed to the FileSystem instance's initialize method. 397 * This always returns a new FileSystem object. 398 */ 399 public static FileSystem newInstance(URI uri, Configuration conf) throws IOException { 400 String scheme = uri.getScheme(); 401 String authority = uri.getAuthority(); 402 403 if (scheme == null) { // no scheme: use default FS 404 return newInstance(conf); 405 } 406 407 if (authority == null) { // no authority 408 URI defaultUri = getDefaultUri(conf); 409 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 410 && defaultUri.getAuthority() != null) { // & default has authority 411 return newInstance(defaultUri, conf); // return default 412 } 413 } 414 return CACHE.getUnique(uri, conf); 415 } 416 417 /** Returns a unique configured filesystem implementation. 418 * This always returns a new FileSystem object. 419 * @param conf the configuration to use 420 */ 421 public static FileSystem newInstance(Configuration conf) throws IOException { 422 return newInstance(getDefaultUri(conf), conf); 423 } 424 425 /** 426 * Get a unique local file system object 427 * @param conf the configuration to configure the file system with 428 * @return a LocalFileSystem 429 * This always returns a new FileSystem object. 430 */ 431 public static LocalFileSystem newInstanceLocal(Configuration conf) 432 throws IOException { 433 return (LocalFileSystem)newInstance(LocalFileSystem.NAME, conf); 434 } 435 436 /** 437 * Close all cached filesystems. Be sure those filesystems are not 438 * used anymore. 439 * 440 * @throws IOException 441 */ 442 public static void closeAll() throws IOException { 443 CACHE.closeAll(); 444 } 445 446 /** 447 * Close all cached filesystems for a given UGI. Be sure those filesystems 448 * are not used anymore. 449 * @param ugi user group info to close 450 * @throws IOException 451 */ 452 public static void closeAllForUGI(UserGroupInformation ugi) 453 throws IOException { 454 CACHE.closeAll(ugi); 455 } 456 457 /** 458 * Make sure that a path specifies a FileSystem. 459 * @param path to use 460 */ 461 public Path makeQualified(Path path) { 462 checkPath(path); 463 return path.makeQualified(this.getUri(), this.getWorkingDirectory()); 464 } 465 466 /** 467 * Get a new delegation token for this file system. 468 * This is an internal method that should have been declared protected 469 * but wasn't historically. 470 * Callers should use {@link #addDelegationTokens(String, Credentials)} 471 * 472 * @param renewer the account name that is allowed to renew the token. 473 * @return a new delegation token 474 * @throws IOException 475 */ 476 @InterfaceAudience.Private() 477 public Token<?> getDelegationToken(String renewer) throws IOException { 478 return null; 479 } 480 481 /** 482 * Obtain all delegation tokens used by this FileSystem that are not 483 * already present in the given Credentials. Existing tokens will neither 484 * be verified as valid nor having the given renewer. Missing tokens will 485 * be acquired and added to the given Credentials. 486 * 487 * Default Impl: works for simple fs with its own token 488 * and also for an embedded fs whose tokens are those of its 489 * children file system (i.e. the embedded fs has not tokens of its 490 * own). 491 * 492 * @param renewer the user allowed to renew the delegation tokens 493 * @param credentials cache in which to add new delegation tokens 494 * @return list of new delegation tokens 495 * @throws IOException 496 */ 497 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 498 public Token<?>[] addDelegationTokens( 499 final String renewer, Credentials credentials) throws IOException { 500 if (credentials == null) { 501 credentials = new Credentials(); 502 } 503 final List<Token<?>> tokens = new ArrayList<Token<?>>(); 504 collectDelegationTokens(renewer, credentials, tokens); 505 return tokens.toArray(new Token<?>[tokens.size()]); 506 } 507 508 /** 509 * Recursively obtain the tokens for this FileSystem and all descended 510 * FileSystems as determined by getChildFileSystems(). 511 * @param renewer the user allowed to renew the delegation tokens 512 * @param credentials cache in which to add the new delegation tokens 513 * @param tokens list in which to add acquired tokens 514 * @throws IOException 515 */ 516 private void collectDelegationTokens(final String renewer, 517 final Credentials credentials, 518 final List<Token<?>> tokens) 519 throws IOException { 520 final String serviceName = getCanonicalServiceName(); 521 // Collect token of the this filesystem and then of its embedded children 522 if (serviceName != null) { // fs has token, grab it 523 final Text service = new Text(serviceName); 524 Token<?> token = credentials.getToken(service); 525 if (token == null) { 526 token = getDelegationToken(renewer); 527 if (token != null) { 528 tokens.add(token); 529 credentials.addToken(service, token); 530 } 531 } 532 } 533 // Now collect the tokens from the children 534 final FileSystem[] children = getChildFileSystems(); 535 if (children != null) { 536 for (final FileSystem fs : children) { 537 fs.collectDelegationTokens(renewer, credentials, tokens); 538 } 539 } 540 } 541 542 /** 543 * Get all the immediate child FileSystems embedded in this FileSystem. 544 * It does not recurse and get grand children. If a FileSystem 545 * has multiple child FileSystems, then it should return a unique list 546 * of those FileSystems. Default is to return null to signify no children. 547 * 548 * @return FileSystems used by this FileSystem 549 */ 550 @InterfaceAudience.LimitedPrivate({ "HDFS" }) 551 @VisibleForTesting 552 public FileSystem[] getChildFileSystems() { 553 return null; 554 } 555 556 /** create a file with the provided permission 557 * The permission of the file is set to be the provided permission as in 558 * setPermission, not permission&~umask 559 * 560 * It is implemented using two RPCs. It is understood that it is inefficient, 561 * but the implementation is thread-safe. The other option is to change the 562 * value of umask in configuration to be 0, but it is not thread-safe. 563 * 564 * @param fs file system handle 565 * @param file the name of the file to be created 566 * @param permission the permission of the file 567 * @return an output stream 568 * @throws IOException 569 */ 570 public static FSDataOutputStream create(FileSystem fs, 571 Path file, FsPermission permission) throws IOException { 572 // create the file with default permission 573 FSDataOutputStream out = fs.create(file); 574 // set its permission to the supplied one 575 fs.setPermission(file, permission); 576 return out; 577 } 578 579 /** create a directory with the provided permission 580 * The permission of the directory is set to be the provided permission as in 581 * setPermission, not permission&~umask 582 * 583 * @see #create(FileSystem, Path, FsPermission) 584 * 585 * @param fs file system handle 586 * @param dir the name of the directory to be created 587 * @param permission the permission of the directory 588 * @return true if the directory creation succeeds; false otherwise 589 * @throws IOException 590 */ 591 public static boolean mkdirs(FileSystem fs, Path dir, FsPermission permission) 592 throws IOException { 593 // create the directory using the default permission 594 boolean result = fs.mkdirs(dir); 595 // set its permission to be the supplied one 596 fs.setPermission(dir, permission); 597 return result; 598 } 599 600 /////////////////////////////////////////////////////////////// 601 // FileSystem 602 /////////////////////////////////////////////////////////////// 603 604 protected FileSystem() { 605 super(null); 606 } 607 608 /** 609 * Check that a Path belongs to this FileSystem. 610 * @param path to check 611 */ 612 protected void checkPath(Path path) { 613 URI uri = path.toUri(); 614 String thatScheme = uri.getScheme(); 615 if (thatScheme == null) // fs is relative 616 return; 617 URI thisUri = getCanonicalUri(); 618 String thisScheme = thisUri.getScheme(); 619 //authority and scheme are not case sensitive 620 if (thisScheme.equalsIgnoreCase(thatScheme)) {// schemes match 621 String thisAuthority = thisUri.getAuthority(); 622 String thatAuthority = uri.getAuthority(); 623 if (thatAuthority == null && // path's authority is null 624 thisAuthority != null) { // fs has an authority 625 URI defaultUri = getDefaultUri(getConf()); 626 if (thisScheme.equalsIgnoreCase(defaultUri.getScheme())) { 627 uri = defaultUri; // schemes match, so use this uri instead 628 } else { 629 uri = null; // can't determine auth of the path 630 } 631 } 632 if (uri != null) { 633 // canonicalize uri before comparing with this fs 634 uri = canonicalizeUri(uri); 635 thatAuthority = uri.getAuthority(); 636 if (thisAuthority == thatAuthority || // authorities match 637 (thisAuthority != null && 638 thisAuthority.equalsIgnoreCase(thatAuthority))) 639 return; 640 } 641 } 642 throw new IllegalArgumentException("Wrong FS: "+path+ 643 ", expected: "+this.getUri()); 644 } 645 646 /** 647 * Return an array containing hostnames, offset and size of 648 * portions of the given file. For a nonexistent 649 * file or regions, null will be returned. 650 * 651 * This call is most helpful with DFS, where it returns 652 * hostnames of machines that contain the given file. 653 * 654 * The FileSystem will simply return an elt containing 'localhost'. 655 * 656 * @param file FilesStatus to get data from 657 * @param start offset into the given file 658 * @param len length for which to get locations for 659 */ 660 public BlockLocation[] getFileBlockLocations(FileStatus file, 661 long start, long len) throws IOException { 662 if (file == null) { 663 return null; 664 } 665 666 if (start < 0 || len < 0) { 667 throw new IllegalArgumentException("Invalid start or len parameter"); 668 } 669 670 if (file.getLen() <= start) { 671 return new BlockLocation[0]; 672 673 } 674 String[] name = { "localhost:50010" }; 675 String[] host = { "localhost" }; 676 return new BlockLocation[] { 677 new BlockLocation(name, host, 0, file.getLen()) }; 678 } 679 680 681 /** 682 * Return an array containing hostnames, offset and size of 683 * portions of the given file. For a nonexistent 684 * file or regions, null will be returned. 685 * 686 * This call is most helpful with DFS, where it returns 687 * hostnames of machines that contain the given file. 688 * 689 * The FileSystem will simply return an elt containing 'localhost'. 690 * 691 * @param p path is used to identify an FS since an FS could have 692 * another FS that it could be delegating the call to 693 * @param start offset into the given file 694 * @param len length for which to get locations for 695 */ 696 public BlockLocation[] getFileBlockLocations(Path p, 697 long start, long len) throws IOException { 698 if (p == null) { 699 throw new NullPointerException(); 700 } 701 FileStatus file = getFileStatus(p); 702 return getFileBlockLocations(file, start, len); 703 } 704 705 /** 706 * Return a set of server default configuration values 707 * @return server default configuration values 708 * @throws IOException 709 * @deprecated use {@link #getServerDefaults(Path)} instead 710 */ 711 @Deprecated 712 public FsServerDefaults getServerDefaults() throws IOException { 713 Configuration conf = getConf(); 714 // CRC32 is chosen as default as it is available in all 715 // releases that support checksum. 716 // The client trash configuration is ignored. 717 return new FsServerDefaults(getDefaultBlockSize(), 718 conf.getInt("io.bytes.per.checksum", 512), 719 64 * 1024, 720 getDefaultReplication(), 721 conf.getInt("io.file.buffer.size", 4096), 722 false, 723 CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT, 724 DataChecksum.Type.CRC32); 725 } 726 727 /** 728 * Return a set of server default configuration values 729 * @param p path is used to identify an FS since an FS could have 730 * another FS that it could be delegating the call to 731 * @return server default configuration values 732 * @throws IOException 733 */ 734 public FsServerDefaults getServerDefaults(Path p) throws IOException { 735 return getServerDefaults(); 736 } 737 738 /** 739 * Return the fully-qualified path of path f resolving the path 740 * through any symlinks or mount point 741 * @param p path to be resolved 742 * @return fully qualified path 743 * @throws FileNotFoundException 744 */ 745 public Path resolvePath(final Path p) throws IOException { 746 checkPath(p); 747 return getFileStatus(p).getPath(); 748 } 749 750 /** 751 * Opens an FSDataInputStream at the indicated Path. 752 * @param f the file name to open 753 * @param bufferSize the size of the buffer to be used. 754 */ 755 public abstract FSDataInputStream open(Path f, int bufferSize) 756 throws IOException; 757 758 /** 759 * Opens an FSDataInputStream at the indicated Path. 760 * @param f the file to open 761 */ 762 public FSDataInputStream open(Path f) throws IOException { 763 return open(f, getConf().getInt("io.file.buffer.size", 4096)); 764 } 765 766 /** 767 * Create an FSDataOutputStream at the indicated Path. 768 * Files are overwritten by default. 769 * @param f the file to create 770 */ 771 public FSDataOutputStream create(Path f) throws IOException { 772 return create(f, true); 773 } 774 775 /** 776 * Create an FSDataOutputStream at the indicated Path. 777 * @param f the file to create 778 * @param overwrite if a file with this name already exists, then if true, 779 * the file will be overwritten, and if false an exception will be thrown. 780 */ 781 public FSDataOutputStream create(Path f, boolean overwrite) 782 throws IOException { 783 return create(f, overwrite, 784 getConf().getInt("io.file.buffer.size", 4096), 785 getDefaultReplication(f), 786 getDefaultBlockSize(f)); 787 } 788 789 /** 790 * Create an FSDataOutputStream at the indicated Path with write-progress 791 * reporting. 792 * Files are overwritten by default. 793 * @param f the file to create 794 * @param progress to report progress 795 */ 796 public FSDataOutputStream create(Path f, Progressable progress) 797 throws IOException { 798 return create(f, true, 799 getConf().getInt("io.file.buffer.size", 4096), 800 getDefaultReplication(f), 801 getDefaultBlockSize(f), progress); 802 } 803 804 /** 805 * Create an FSDataOutputStream at the indicated Path. 806 * Files are overwritten by default. 807 * @param f the file to create 808 * @param replication the replication factor 809 */ 810 public FSDataOutputStream create(Path f, short replication) 811 throws IOException { 812 return create(f, true, 813 getConf().getInt("io.file.buffer.size", 4096), 814 replication, 815 getDefaultBlockSize(f)); 816 } 817 818 /** 819 * Create an FSDataOutputStream at the indicated Path with write-progress 820 * reporting. 821 * Files are overwritten by default. 822 * @param f the file to create 823 * @param replication the replication factor 824 * @param progress to report progress 825 */ 826 public FSDataOutputStream create(Path f, short replication, 827 Progressable progress) throws IOException { 828 return create(f, true, 829 getConf().getInt( 830 CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY, 831 CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT), 832 replication, 833 getDefaultBlockSize(f), progress); 834 } 835 836 837 /** 838 * Create an FSDataOutputStream at the indicated Path. 839 * @param f the file name to create 840 * @param overwrite if a file with this name already exists, then if true, 841 * the file will be overwritten, and if false an error will be thrown. 842 * @param bufferSize the size of the buffer to be used. 843 */ 844 public FSDataOutputStream create(Path f, 845 boolean overwrite, 846 int bufferSize 847 ) throws IOException { 848 return create(f, overwrite, bufferSize, 849 getDefaultReplication(f), 850 getDefaultBlockSize(f)); 851 } 852 853 /** 854 * Create an FSDataOutputStream at the indicated Path with write-progress 855 * reporting. 856 * @param f the path of the file to open 857 * @param overwrite if a file with this name already exists, then if true, 858 * the file will be overwritten, and if false an error will be thrown. 859 * @param bufferSize the size of the buffer to be used. 860 */ 861 public FSDataOutputStream create(Path f, 862 boolean overwrite, 863 int bufferSize, 864 Progressable progress 865 ) throws IOException { 866 return create(f, overwrite, bufferSize, 867 getDefaultReplication(f), 868 getDefaultBlockSize(f), progress); 869 } 870 871 872 /** 873 * Create an FSDataOutputStream at the indicated Path. 874 * @param f the file name to open 875 * @param overwrite if a file with this name already exists, then if true, 876 * the file will be overwritten, and if false an error will be thrown. 877 * @param bufferSize the size of the buffer to be used. 878 * @param replication required block replication for the file. 879 */ 880 public FSDataOutputStream create(Path f, 881 boolean overwrite, 882 int bufferSize, 883 short replication, 884 long blockSize 885 ) throws IOException { 886 return create(f, overwrite, bufferSize, replication, blockSize, null); 887 } 888 889 /** 890 * Create an FSDataOutputStream at the indicated Path with write-progress 891 * reporting. 892 * @param f the file name to open 893 * @param overwrite if a file with this name already exists, then if true, 894 * the file will be overwritten, and if false an error will be thrown. 895 * @param bufferSize the size of the buffer to be used. 896 * @param replication required block replication for the file. 897 */ 898 public FSDataOutputStream create(Path f, 899 boolean overwrite, 900 int bufferSize, 901 short replication, 902 long blockSize, 903 Progressable progress 904 ) throws IOException { 905 return this.create(f, FsPermission.getFileDefault().applyUMask( 906 FsPermission.getUMask(getConf())), overwrite, bufferSize, 907 replication, blockSize, progress); 908 } 909 910 /** 911 * Create an FSDataOutputStream at the indicated Path with write-progress 912 * reporting. 913 * @param f the file name to open 914 * @param permission 915 * @param overwrite if a file with this name already exists, then if true, 916 * the file will be overwritten, and if false an error will be thrown. 917 * @param bufferSize the size of the buffer to be used. 918 * @param replication required block replication for the file. 919 * @param blockSize 920 * @param progress 921 * @throws IOException 922 * @see #setPermission(Path, FsPermission) 923 */ 924 public abstract FSDataOutputStream create(Path f, 925 FsPermission permission, 926 boolean overwrite, 927 int bufferSize, 928 short replication, 929 long blockSize, 930 Progressable progress) throws IOException; 931 932 /** 933 * Create an FSDataOutputStream at the indicated Path with write-progress 934 * reporting. 935 * @param f the file name to open 936 * @param permission 937 * @param flags {@link CreateFlag}s to use for this stream. 938 * @param bufferSize the size of the buffer to be used. 939 * @param replication required block replication for the file. 940 * @param blockSize 941 * @param progress 942 * @throws IOException 943 * @see #setPermission(Path, FsPermission) 944 */ 945 public FSDataOutputStream create(Path f, 946 FsPermission permission, 947 EnumSet<CreateFlag> flags, 948 int bufferSize, 949 short replication, 950 long blockSize, 951 Progressable progress) throws IOException { 952 return create(f, permission, flags, bufferSize, replication, 953 blockSize, progress, null); 954 } 955 956 /** 957 * Create an FSDataOutputStream at the indicated Path with a custom 958 * checksum option 959 * @param f the file name to open 960 * @param permission 961 * @param flags {@link CreateFlag}s to use for this stream. 962 * @param bufferSize the size of the buffer to be used. 963 * @param replication required block replication for the file. 964 * @param blockSize 965 * @param progress 966 * @param checksumOpt checksum parameter. If null, the values 967 * found in conf will be used. 968 * @throws IOException 969 * @see #setPermission(Path, FsPermission) 970 */ 971 public FSDataOutputStream create(Path f, 972 FsPermission permission, 973 EnumSet<CreateFlag> flags, 974 int bufferSize, 975 short replication, 976 long blockSize, 977 Progressable progress, 978 ChecksumOpt checksumOpt) throws IOException { 979 // Checksum options are ignored by default. The file systems that 980 // implement checksum need to override this method. The full 981 // support is currently only available in DFS. 982 return create(f, permission, flags.contains(CreateFlag.OVERWRITE), 983 bufferSize, replication, blockSize, progress); 984 } 985 986 /*. 987 * This create has been added to support the FileContext that processes 988 * the permission 989 * with umask before calling this method. 990 * This a temporary method added to support the transition from FileSystem 991 * to FileContext for user applications. 992 */ 993 @Deprecated 994 protected FSDataOutputStream primitiveCreate(Path f, 995 FsPermission absolutePermission, EnumSet<CreateFlag> flag, int bufferSize, 996 short replication, long blockSize, Progressable progress, 997 ChecksumOpt checksumOpt) throws IOException { 998 999 boolean pathExists = exists(f); 1000 CreateFlag.validate(f, pathExists, flag); 1001 1002 // Default impl assumes that permissions do not matter and 1003 // nor does the bytesPerChecksum hence 1004 // calling the regular create is good enough. 1005 // FSs that implement permissions should override this. 1006 1007 if (pathExists && flag.contains(CreateFlag.APPEND)) { 1008 return append(f, bufferSize, progress); 1009 } 1010 1011 return this.create(f, absolutePermission, 1012 flag.contains(CreateFlag.OVERWRITE), bufferSize, replication, 1013 blockSize, progress); 1014 } 1015 1016 /** 1017 * This version of the mkdirs method assumes that the permission is absolute. 1018 * It has been added to support the FileContext that processes the permission 1019 * with umask before calling this method. 1020 * This a temporary method added to support the transition from FileSystem 1021 * to FileContext for user applications. 1022 */ 1023 @Deprecated 1024 protected boolean primitiveMkdir(Path f, FsPermission absolutePermission) 1025 throws IOException { 1026 // Default impl is to assume that permissions do not matter and hence 1027 // calling the regular mkdirs is good enough. 1028 // FSs that implement permissions should override this. 1029 return this.mkdirs(f, absolutePermission); 1030 } 1031 1032 1033 /** 1034 * This version of the mkdirs method assumes that the permission is absolute. 1035 * It has been added to support the FileContext that processes the permission 1036 * with umask before calling this method. 1037 * This a temporary method added to support the transition from FileSystem 1038 * to FileContext for user applications. 1039 */ 1040 @Deprecated 1041 protected void primitiveMkdir(Path f, FsPermission absolutePermission, 1042 boolean createParent) 1043 throws IOException { 1044 1045 if (!createParent) { // parent must exist. 1046 // since the this.mkdirs makes parent dirs automatically 1047 // we must throw exception if parent does not exist. 1048 final FileStatus stat = getFileStatus(f.getParent()); 1049 if (stat == null) { 1050 throw new FileNotFoundException("Missing parent:" + f); 1051 } 1052 if (!stat.isDirectory()) { 1053 throw new ParentNotDirectoryException("parent is not a dir"); 1054 } 1055 // parent does exist - go ahead with mkdir of leaf 1056 } 1057 // Default impl is to assume that permissions do not matter and hence 1058 // calling the regular mkdirs is good enough. 1059 // FSs that implement permissions should override this. 1060 if (!this.mkdirs(f, absolutePermission)) { 1061 throw new IOException("mkdir of "+ f + " failed"); 1062 } 1063 } 1064 1065 /** 1066 * Opens an FSDataOutputStream at the indicated Path with write-progress 1067 * reporting. Same as create(), except fails if parent directory doesn't 1068 * already exist. 1069 * @param f the file name to open 1070 * @param overwrite if a file with this name already exists, then if true, 1071 * the file will be overwritten, and if false an error will be thrown. 1072 * @param bufferSize the size of the buffer to be used. 1073 * @param replication required block replication for the file. 1074 * @param blockSize 1075 * @param progress 1076 * @throws IOException 1077 * @see #setPermission(Path, FsPermission) 1078 * @deprecated API only for 0.20-append 1079 */ 1080 @Deprecated 1081 public FSDataOutputStream createNonRecursive(Path f, 1082 boolean overwrite, 1083 int bufferSize, short replication, long blockSize, 1084 Progressable progress) throws IOException { 1085 return this.createNonRecursive(f, FsPermission.getFileDefault(), 1086 overwrite, bufferSize, replication, blockSize, progress); 1087 } 1088 1089 /** 1090 * Opens an FSDataOutputStream at the indicated Path with write-progress 1091 * reporting. Same as create(), except fails if parent directory doesn't 1092 * already exist. 1093 * @param f the file name to open 1094 * @param permission 1095 * @param overwrite if a file with this name already exists, then if true, 1096 * the file will be overwritten, and if false an error will be thrown. 1097 * @param bufferSize the size of the buffer to be used. 1098 * @param replication required block replication for the file. 1099 * @param blockSize 1100 * @param progress 1101 * @throws IOException 1102 * @see #setPermission(Path, FsPermission) 1103 * @deprecated API only for 0.20-append 1104 */ 1105 @Deprecated 1106 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 1107 boolean overwrite, int bufferSize, short replication, long blockSize, 1108 Progressable progress) throws IOException { 1109 return createNonRecursive(f, permission, 1110 overwrite ? EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE) 1111 : EnumSet.of(CreateFlag.CREATE), bufferSize, 1112 replication, blockSize, progress); 1113 } 1114 1115 /** 1116 * Opens an FSDataOutputStream at the indicated Path with write-progress 1117 * reporting. Same as create(), except fails if parent directory doesn't 1118 * already exist. 1119 * @param f the file name to open 1120 * @param permission 1121 * @param flags {@link CreateFlag}s to use for this stream. 1122 * @param bufferSize the size of the buffer to be used. 1123 * @param replication required block replication for the file. 1124 * @param blockSize 1125 * @param progress 1126 * @throws IOException 1127 * @see #setPermission(Path, FsPermission) 1128 * @deprecated API only for 0.20-append 1129 */ 1130 @Deprecated 1131 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 1132 EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize, 1133 Progressable progress) throws IOException { 1134 throw new IOException("createNonRecursive unsupported for this filesystem " 1135 + this.getClass()); 1136 } 1137 1138 /** 1139 * Creates the given Path as a brand-new zero-length file. If 1140 * create fails, or if it already existed, return false. 1141 * 1142 * @param f path to use for create 1143 */ 1144 public boolean createNewFile(Path f) throws IOException { 1145 if (exists(f)) { 1146 return false; 1147 } else { 1148 create(f, false, getConf().getInt("io.file.buffer.size", 4096)).close(); 1149 return true; 1150 } 1151 } 1152 1153 /** 1154 * Append to an existing file (optional operation). 1155 * Same as append(f, getConf().getInt("io.file.buffer.size", 4096), null) 1156 * @param f the existing file to be appended. 1157 * @throws IOException 1158 */ 1159 public FSDataOutputStream append(Path f) throws IOException { 1160 return append(f, getConf().getInt("io.file.buffer.size", 4096), null); 1161 } 1162 /** 1163 * Append to an existing file (optional operation). 1164 * Same as append(f, bufferSize, null). 1165 * @param f the existing file to be appended. 1166 * @param bufferSize the size of the buffer to be used. 1167 * @throws IOException 1168 */ 1169 public FSDataOutputStream append(Path f, int bufferSize) throws IOException { 1170 return append(f, bufferSize, null); 1171 } 1172 1173 /** 1174 * Append to an existing file (optional operation). 1175 * @param f the existing file to be appended. 1176 * @param bufferSize the size of the buffer to be used. 1177 * @param progress for reporting progress if it is not null. 1178 * @throws IOException 1179 */ 1180 public abstract FSDataOutputStream append(Path f, int bufferSize, 1181 Progressable progress) throws IOException; 1182 1183 /** 1184 * Concat existing files together. 1185 * @param trg the path to the target destination. 1186 * @param psrcs the paths to the sources to use for the concatenation. 1187 * @throws IOException 1188 */ 1189 public void concat(final Path trg, final Path [] psrcs) throws IOException { 1190 throw new UnsupportedOperationException("Not implemented by the " + 1191 getClass().getSimpleName() + " FileSystem implementation"); 1192 } 1193 1194 /** 1195 * Get replication. 1196 * 1197 * @deprecated Use getFileStatus() instead 1198 * @param src file name 1199 * @return file replication 1200 * @throws IOException 1201 */ 1202 @Deprecated 1203 public short getReplication(Path src) throws IOException { 1204 return getFileStatus(src).getReplication(); 1205 } 1206 1207 /** 1208 * Set replication for an existing file. 1209 * 1210 * @param src file name 1211 * @param replication new replication 1212 * @throws IOException 1213 * @return true if successful; 1214 * false if file does not exist or is a directory 1215 */ 1216 public boolean setReplication(Path src, short replication) 1217 throws IOException { 1218 return true; 1219 } 1220 1221 /** 1222 * Renames Path src to Path dst. Can take place on local fs 1223 * or remote DFS. 1224 * @param src path to be renamed 1225 * @param dst new path after rename 1226 * @throws IOException on failure 1227 * @return true if rename is successful 1228 */ 1229 public abstract boolean rename(Path src, Path dst) throws IOException; 1230 1231 /** 1232 * Renames Path src to Path dst 1233 * <ul> 1234 * <li 1235 * <li>Fails if src is a file and dst is a directory. 1236 * <li>Fails if src is a directory and dst is a file. 1237 * <li>Fails if the parent of dst does not exist or is a file. 1238 * </ul> 1239 * <p> 1240 * If OVERWRITE option is not passed as an argument, rename fails 1241 * if the dst already exists. 1242 * <p> 1243 * If OVERWRITE option is passed as an argument, rename overwrites 1244 * the dst if it is a file or an empty directory. Rename fails if dst is 1245 * a non-empty directory. 1246 * <p> 1247 * Note that atomicity of rename is dependent on the file system 1248 * implementation. Please refer to the file system documentation for 1249 * details. This default implementation is non atomic. 1250 * <p> 1251 * This method is deprecated since it is a temporary method added to 1252 * support the transition from FileSystem to FileContext for user 1253 * applications. 1254 * 1255 * @param src path to be renamed 1256 * @param dst new path after rename 1257 * @throws IOException on failure 1258 */ 1259 @Deprecated 1260 protected void rename(final Path src, final Path dst, 1261 final Rename... options) throws IOException { 1262 // Default implementation 1263 final FileStatus srcStatus = getFileLinkStatus(src); 1264 if (srcStatus == null) { 1265 throw new FileNotFoundException("rename source " + src + " not found."); 1266 } 1267 1268 boolean overwrite = false; 1269 if (null != options) { 1270 for (Rename option : options) { 1271 if (option == Rename.OVERWRITE) { 1272 overwrite = true; 1273 } 1274 } 1275 } 1276 1277 FileStatus dstStatus; 1278 try { 1279 dstStatus = getFileLinkStatus(dst); 1280 } catch (IOException e) { 1281 dstStatus = null; 1282 } 1283 if (dstStatus != null) { 1284 if (srcStatus.isDirectory() != dstStatus.isDirectory()) { 1285 throw new IOException("Source " + src + " Destination " + dst 1286 + " both should be either file or directory"); 1287 } 1288 if (!overwrite) { 1289 throw new FileAlreadyExistsException("rename destination " + dst 1290 + " already exists."); 1291 } 1292 // Delete the destination that is a file or an empty directory 1293 if (dstStatus.isDirectory()) { 1294 FileStatus[] list = listStatus(dst); 1295 if (list != null && list.length != 0) { 1296 throw new IOException( 1297 "rename cannot overwrite non empty destination directory " + dst); 1298 } 1299 } 1300 delete(dst, false); 1301 } else { 1302 final Path parent = dst.getParent(); 1303 final FileStatus parentStatus = getFileStatus(parent); 1304 if (parentStatus == null) { 1305 throw new FileNotFoundException("rename destination parent " + parent 1306 + " not found."); 1307 } 1308 if (!parentStatus.isDirectory()) { 1309 throw new ParentNotDirectoryException("rename destination parent " + parent 1310 + " is a file."); 1311 } 1312 } 1313 if (!rename(src, dst)) { 1314 throw new IOException("rename from " + src + " to " + dst + " failed."); 1315 } 1316 } 1317 1318 /** 1319 * Delete a file 1320 * @deprecated Use {@link #delete(Path, boolean)} instead. 1321 */ 1322 @Deprecated 1323 public boolean delete(Path f) throws IOException { 1324 return delete(f, true); 1325 } 1326 1327 /** Delete a file. 1328 * 1329 * @param f the path to delete. 1330 * @param recursive if path is a directory and set to 1331 * true, the directory is deleted else throws an exception. In 1332 * case of a file the recursive can be set to either true or false. 1333 * @return true if delete is successful else false. 1334 * @throws IOException 1335 */ 1336 public abstract boolean delete(Path f, boolean recursive) throws IOException; 1337 1338 /** 1339 * Mark a path to be deleted when FileSystem is closed. 1340 * When the JVM shuts down, 1341 * all FileSystem objects will be closed automatically. 1342 * Then, 1343 * the marked path will be deleted as a result of closing the FileSystem. 1344 * 1345 * The path has to exist in the file system. 1346 * 1347 * @param f the path to delete. 1348 * @return true if deleteOnExit is successful, otherwise false. 1349 * @throws IOException 1350 */ 1351 public boolean deleteOnExit(Path f) throws IOException { 1352 if (!exists(f)) { 1353 return false; 1354 } 1355 synchronized (deleteOnExit) { 1356 deleteOnExit.add(f); 1357 } 1358 return true; 1359 } 1360 1361 /** 1362 * Cancel the deletion of the path when the FileSystem is closed 1363 * @param f the path to cancel deletion 1364 */ 1365 public boolean cancelDeleteOnExit(Path f) { 1366 synchronized (deleteOnExit) { 1367 return deleteOnExit.remove(f); 1368 } 1369 } 1370 1371 /** 1372 * Delete all files that were marked as delete-on-exit. This recursively 1373 * deletes all files in the specified paths. 1374 */ 1375 protected void processDeleteOnExit() { 1376 synchronized (deleteOnExit) { 1377 for (Iterator<Path> iter = deleteOnExit.iterator(); iter.hasNext();) { 1378 Path path = iter.next(); 1379 try { 1380 if (exists(path)) { 1381 delete(path, true); 1382 } 1383 } 1384 catch (IOException e) { 1385 LOG.info("Ignoring failure to deleteOnExit for path " + path); 1386 } 1387 iter.remove(); 1388 } 1389 } 1390 } 1391 1392 /** Check if exists. 1393 * @param f source file 1394 */ 1395 public boolean exists(Path f) throws IOException { 1396 try { 1397 return getFileStatus(f) != null; 1398 } catch (FileNotFoundException e) { 1399 return false; 1400 } 1401 } 1402 1403 /** True iff the named path is a directory. 1404 * Note: Avoid using this method. Instead reuse the FileStatus 1405 * returned by getFileStatus() or listStatus() methods. 1406 * @param f path to check 1407 */ 1408 public boolean isDirectory(Path f) throws IOException { 1409 try { 1410 return getFileStatus(f).isDirectory(); 1411 } catch (FileNotFoundException e) { 1412 return false; // f does not exist 1413 } 1414 } 1415 1416 /** True iff the named path is a regular file. 1417 * Note: Avoid using this method. Instead reuse the FileStatus 1418 * returned by getFileStatus() or listStatus() methods. 1419 * @param f path to check 1420 */ 1421 public boolean isFile(Path f) throws IOException { 1422 try { 1423 return getFileStatus(f).isFile(); 1424 } catch (FileNotFoundException e) { 1425 return false; // f does not exist 1426 } 1427 } 1428 1429 /** The number of bytes in a file. */ 1430 /** @deprecated Use getFileStatus() instead */ 1431 @Deprecated 1432 public long getLength(Path f) throws IOException { 1433 return getFileStatus(f).getLen(); 1434 } 1435 1436 /** Return the {@link ContentSummary} of a given {@link Path}. 1437 * @param f path to use 1438 */ 1439 public ContentSummary getContentSummary(Path f) throws IOException { 1440 FileStatus status = getFileStatus(f); 1441 if (status.isFile()) { 1442 // f is a file 1443 return new ContentSummary(status.getLen(), 1, 0); 1444 } 1445 // f is a directory 1446 long[] summary = {0, 0, 1}; 1447 for(FileStatus s : listStatus(f)) { 1448 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : 1449 new ContentSummary(s.getLen(), 1, 0); 1450 summary[0] += c.getLength(); 1451 summary[1] += c.getFileCount(); 1452 summary[2] += c.getDirectoryCount(); 1453 } 1454 return new ContentSummary(summary[0], summary[1], summary[2]); 1455 } 1456 1457 final private static PathFilter DEFAULT_FILTER = new PathFilter() { 1458 @Override 1459 public boolean accept(Path file) { 1460 return true; 1461 } 1462 }; 1463 1464 /** 1465 * List the statuses of the files/directories in the given path if the path is 1466 * a directory. 1467 * 1468 * @param f given path 1469 * @return the statuses of the files/directories in the given patch 1470 * @throws FileNotFoundException when the path does not exist; 1471 * IOException see specific implementation 1472 */ 1473 public abstract FileStatus[] listStatus(Path f) throws FileNotFoundException, 1474 IOException; 1475 1476 /* 1477 * Filter files/directories in the given path using the user-supplied path 1478 * filter. Results are added to the given array <code>results</code>. 1479 */ 1480 private void listStatus(ArrayList<FileStatus> results, Path f, 1481 PathFilter filter) throws FileNotFoundException, IOException { 1482 FileStatus listing[] = listStatus(f); 1483 if (listing == null) { 1484 throw new IOException("Error accessing " + f); 1485 } 1486 1487 for (int i = 0; i < listing.length; i++) { 1488 if (filter.accept(listing[i].getPath())) { 1489 results.add(listing[i]); 1490 } 1491 } 1492 } 1493 1494 /** 1495 * @return an iterator over the corrupt files under the given path 1496 * (may contain duplicates if a file has more than one corrupt block) 1497 * @throws IOException 1498 */ 1499 public RemoteIterator<Path> listCorruptFileBlocks(Path path) 1500 throws IOException { 1501 throw new UnsupportedOperationException(getClass().getCanonicalName() + 1502 " does not support" + 1503 " listCorruptFileBlocks"); 1504 } 1505 1506 /** 1507 * Filter files/directories in the given path using the user-supplied path 1508 * filter. 1509 * 1510 * @param f 1511 * a path name 1512 * @param filter 1513 * the user-supplied path filter 1514 * @return an array of FileStatus objects for the files under the given path 1515 * after applying the filter 1516 * @throws FileNotFoundException when the path does not exist; 1517 * IOException see specific implementation 1518 */ 1519 public FileStatus[] listStatus(Path f, PathFilter filter) 1520 throws FileNotFoundException, IOException { 1521 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1522 listStatus(results, f, filter); 1523 return results.toArray(new FileStatus[results.size()]); 1524 } 1525 1526 /** 1527 * Filter files/directories in the given list of paths using default 1528 * path filter. 1529 * 1530 * @param files 1531 * a list of paths 1532 * @return a list of statuses for the files under the given paths after 1533 * applying the filter default Path filter 1534 * @throws FileNotFoundException when the path does not exist; 1535 * IOException see specific implementation 1536 */ 1537 public FileStatus[] listStatus(Path[] files) 1538 throws FileNotFoundException, IOException { 1539 return listStatus(files, DEFAULT_FILTER); 1540 } 1541 1542 /** 1543 * Filter files/directories in the given list of paths using user-supplied 1544 * path filter. 1545 * 1546 * @param files 1547 * a list of paths 1548 * @param filter 1549 * the user-supplied path filter 1550 * @return a list of statuses for the files under the given paths after 1551 * applying the filter 1552 * @throws FileNotFoundException when the path does not exist; 1553 * IOException see specific implementation 1554 */ 1555 public FileStatus[] listStatus(Path[] files, PathFilter filter) 1556 throws FileNotFoundException, IOException { 1557 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1558 for (int i = 0; i < files.length; i++) { 1559 listStatus(results, files[i], filter); 1560 } 1561 return results.toArray(new FileStatus[results.size()]); 1562 } 1563 1564 /** 1565 * <p>Return all the files that match filePattern and are not checksum 1566 * files. Results are sorted by their names. 1567 * 1568 * <p> 1569 * A filename pattern is composed of <i>regular</i> characters and 1570 * <i>special pattern matching</i> characters, which are: 1571 * 1572 * <dl> 1573 * <dd> 1574 * <dl> 1575 * <p> 1576 * <dt> <tt> ? </tt> 1577 * <dd> Matches any single character. 1578 * 1579 * <p> 1580 * <dt> <tt> * </tt> 1581 * <dd> Matches zero or more characters. 1582 * 1583 * <p> 1584 * <dt> <tt> [<i>abc</i>] </tt> 1585 * <dd> Matches a single character from character set 1586 * <tt>{<i>a,b,c</i>}</tt>. 1587 * 1588 * <p> 1589 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt> 1590 * <dd> Matches a single character from the character range 1591 * <tt>{<i>a...b</i>}</tt>. Note that character <tt><i>a</i></tt> must be 1592 * lexicographically less than or equal to character <tt><i>b</i></tt>. 1593 * 1594 * <p> 1595 * <dt> <tt> [^<i>a</i>] </tt> 1596 * <dd> Matches a single character that is not from character set or range 1597 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur 1598 * immediately to the right of the opening bracket. 1599 * 1600 * <p> 1601 * <dt> <tt> \<i>c</i> </tt> 1602 * <dd> Removes (escapes) any special meaning of character <i>c</i>. 1603 * 1604 * <p> 1605 * <dt> <tt> {ab,cd} </tt> 1606 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> 1607 * 1608 * <p> 1609 * <dt> <tt> {ab,c{de,fh}} </tt> 1610 * <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt> 1611 * 1612 * </dl> 1613 * </dd> 1614 * </dl> 1615 * 1616 * @param pathPattern a regular expression specifying a pth pattern 1617 1618 * @return an array of paths that match the path pattern 1619 * @throws IOException 1620 */ 1621 public FileStatus[] globStatus(Path pathPattern) throws IOException { 1622 return globStatus(pathPattern, DEFAULT_FILTER); 1623 } 1624 1625 /** 1626 * Return an array of FileStatus objects whose path names match pathPattern 1627 * and is accepted by the user-supplied path filter. Results are sorted by 1628 * their path names. 1629 * Return null if pathPattern has no glob and the path does not exist. 1630 * Return an empty array if pathPattern has a glob and no path matches it. 1631 * 1632 * @param pathPattern 1633 * a regular expression specifying the path pattern 1634 * @param filter 1635 * a user-supplied path filter 1636 * @return an array of FileStatus objects 1637 * @throws IOException if any I/O error occurs when fetching file status 1638 */ 1639 public FileStatus[] globStatus(Path pathPattern, PathFilter filter) 1640 throws IOException { 1641 String filename = pathPattern.toUri().getPath(); 1642 List<FileStatus> allMatches = null; 1643 1644 List<String> filePatterns = GlobExpander.expand(filename); 1645 for (String filePattern : filePatterns) { 1646 Path path = new Path(filePattern.isEmpty() ? Path.CUR_DIR : filePattern); 1647 List<FileStatus> matches = globStatusInternal(path, filter); 1648 if (matches != null) { 1649 if (allMatches == null) { 1650 allMatches = matches; 1651 } else { 1652 allMatches.addAll(matches); 1653 } 1654 } 1655 } 1656 1657 FileStatus[] results = null; 1658 if (allMatches != null) { 1659 results = allMatches.toArray(new FileStatus[allMatches.size()]); 1660 } else if (filePatterns.size() > 1) { 1661 // no matches with multiple expansions is a non-matching glob 1662 results = new FileStatus[0]; 1663 } 1664 return results; 1665 } 1666 1667 // sort gripes because FileStatus Comparable isn't parameterized... 1668 @SuppressWarnings("unchecked") 1669 private List<FileStatus> globStatusInternal(Path pathPattern, 1670 PathFilter filter) throws IOException { 1671 boolean patternHasGlob = false; // pathPattern has any globs 1672 List<FileStatus> matches = new ArrayList<FileStatus>(); 1673 1674 // determine starting point 1675 int level = 0; 1676 String baseDir = Path.CUR_DIR; 1677 if (pathPattern.isAbsolute()) { 1678 level = 1; // need to skip empty item at beginning of split list 1679 baseDir = Path.SEPARATOR; 1680 } 1681 1682 // parse components and determine if it's a glob 1683 String[] components = null; 1684 GlobFilter[] filters = null; 1685 String filename = pathPattern.toUri().getPath(); 1686 if (!filename.isEmpty() && !Path.SEPARATOR.equals(filename)) { 1687 components = filename.split(Path.SEPARATOR); 1688 filters = new GlobFilter[components.length]; 1689 for (int i=level; i < components.length; i++) { 1690 filters[i] = new GlobFilter(components[i]); 1691 patternHasGlob |= filters[i].hasPattern(); 1692 } 1693 if (!patternHasGlob) { 1694 baseDir = unquotePathComponent(filename); 1695 components = null; // short through to filter check 1696 } 1697 } 1698 1699 // seed the parent directory path, return if it doesn't exist 1700 try { 1701 matches.add(getFileStatus(new Path(baseDir))); 1702 } catch (FileNotFoundException e) { 1703 return patternHasGlob ? matches : null; 1704 } 1705 1706 // skip if there are no components other than the basedir 1707 if (components != null) { 1708 // iterate through each path component 1709 for (int i=level; (i < components.length) && !matches.isEmpty(); i++) { 1710 List<FileStatus> children = new ArrayList<FileStatus>(); 1711 for (FileStatus match : matches) { 1712 // don't look for children in a file matched by a glob 1713 if (!match.isDirectory()) { 1714 continue; 1715 } 1716 try { 1717 if (filters[i].hasPattern()) { 1718 // get all children matching the filter 1719 FileStatus[] statuses = listStatus(match.getPath(), filters[i]); 1720 children.addAll(Arrays.asList(statuses)); 1721 } else { 1722 // the component does not have a pattern 1723 String component = unquotePathComponent(components[i]); 1724 Path child = new Path(match.getPath(), component); 1725 children.add(getFileStatus(child)); 1726 } 1727 } catch (FileNotFoundException e) { 1728 // don't care 1729 } 1730 } 1731 matches = children; 1732 } 1733 } 1734 // remove anything that didn't match the filter 1735 if (!matches.isEmpty()) { 1736 Iterator<FileStatus> iter = matches.iterator(); 1737 while (iter.hasNext()) { 1738 if (!filter.accept(iter.next().getPath())) { 1739 iter.remove(); 1740 } 1741 } 1742 } 1743 // no final paths, if there were any globs return empty list 1744 if (matches.isEmpty()) { 1745 return patternHasGlob ? matches : null; 1746 } 1747 Collections.sort(matches); 1748 return matches; 1749 } 1750 1751 /** 1752 * The glob filter builds a regexp per path component. If the component 1753 * does not contain a shell metachar, then it falls back to appending the 1754 * raw string to the list of built up paths. This raw path needs to have 1755 * the quoting removed. Ie. convert all occurances of "\X" to "X" 1756 * @param name of the path component 1757 * @return the unquoted path component 1758 */ 1759 private String unquotePathComponent(String name) { 1760 return name.replaceAll("\\\\(.)", "$1"); 1761 } 1762 1763 /** 1764 * List the statuses of the files/directories in the given path if the path is 1765 * a directory. 1766 * Return the file's status and block locations If the path is a file. 1767 * 1768 * If a returned status is a file, it contains the file's block locations. 1769 * 1770 * @param f is the path 1771 * 1772 * @return an iterator that traverses statuses of the files/directories 1773 * in the given path 1774 * 1775 * @throws FileNotFoundException If <code>f</code> does not exist 1776 * @throws IOException If an I/O error occurred 1777 */ 1778 public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f) 1779 throws FileNotFoundException, IOException { 1780 return listLocatedStatus(f, DEFAULT_FILTER); 1781 } 1782 1783 /** 1784 * Listing a directory 1785 * The returned results include its block location if it is a file 1786 * The results are filtered by the given path filter 1787 * @param f a path 1788 * @param filter a path filter 1789 * @return an iterator that traverses statuses of the files/directories 1790 * in the given path 1791 * @throws FileNotFoundException if <code>f</code> does not exist 1792 * @throws IOException if any I/O error occurred 1793 */ 1794 protected RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f, 1795 final PathFilter filter) 1796 throws FileNotFoundException, IOException { 1797 return new RemoteIterator<LocatedFileStatus>() { 1798 private final FileStatus[] stats = listStatus(f, filter); 1799 private int i = 0; 1800 1801 @Override 1802 public boolean hasNext() { 1803 return i<stats.length; 1804 } 1805 1806 @Override 1807 public LocatedFileStatus next() throws IOException { 1808 if (!hasNext()) { 1809 throw new NoSuchElementException("No more entry in " + f); 1810 } 1811 FileStatus result = stats[i++]; 1812 BlockLocation[] locs = result.isFile() ? 1813 getFileBlockLocations(result.getPath(), 0, result.getLen()) : 1814 null; 1815 return new LocatedFileStatus(result, locs); 1816 } 1817 }; 1818 } 1819 1820 /** 1821 * List the statuses and block locations of the files in the given path. 1822 * 1823 * If the path is a directory, 1824 * if recursive is false, returns files in the directory; 1825 * if recursive is true, return files in the subtree rooted at the path. 1826 * If the path is a file, return the file's status and block locations. 1827 * 1828 * @param f is the path 1829 * @param recursive if the subdirectories need to be traversed recursively 1830 * 1831 * @return an iterator that traverses statuses of the files 1832 * 1833 * @throws FileNotFoundException when the path does not exist; 1834 * IOException see specific implementation 1835 */ 1836 public RemoteIterator<LocatedFileStatus> listFiles( 1837 final Path f, final boolean recursive) 1838 throws FileNotFoundException, IOException { 1839 return new RemoteIterator<LocatedFileStatus>() { 1840 private Stack<RemoteIterator<LocatedFileStatus>> itors = 1841 new Stack<RemoteIterator<LocatedFileStatus>>(); 1842 private RemoteIterator<LocatedFileStatus> curItor = 1843 listLocatedStatus(f); 1844 private LocatedFileStatus curFile; 1845 1846 @Override 1847 public boolean hasNext() throws IOException { 1848 while (curFile == null) { 1849 if (curItor.hasNext()) { 1850 handleFileStat(curItor.next()); 1851 } else if (!itors.empty()) { 1852 curItor = itors.pop(); 1853 } else { 1854 return false; 1855 } 1856 } 1857 return true; 1858 } 1859 1860 /** 1861 * Process the input stat. 1862 * If it is a file, return the file stat. 1863 * If it is a directory, traverse the directory if recursive is true; 1864 * ignore it if recursive is false. 1865 * @param stat input status 1866 * @throws IOException if any IO error occurs 1867 */ 1868 private void handleFileStat(LocatedFileStatus stat) throws IOException { 1869 if (stat.isFile()) { // file 1870 curFile = stat; 1871 } else if (recursive) { // directory 1872 itors.push(curItor); 1873 curItor = listLocatedStatus(stat.getPath()); 1874 } 1875 } 1876 1877 @Override 1878 public LocatedFileStatus next() throws IOException { 1879 if (hasNext()) { 1880 LocatedFileStatus result = curFile; 1881 curFile = null; 1882 return result; 1883 } 1884 throw new java.util.NoSuchElementException("No more entry in " + f); 1885 } 1886 }; 1887 } 1888 1889 /** Return the current user's home directory in this filesystem. 1890 * The default implementation returns "/user/$USER/". 1891 */ 1892 public Path getHomeDirectory() { 1893 return this.makeQualified( 1894 new Path("/user/"+System.getProperty("user.name"))); 1895 } 1896 1897 1898 /** 1899 * Set the current working directory for the given file system. All relative 1900 * paths will be resolved relative to it. 1901 * 1902 * @param new_dir 1903 */ 1904 public abstract void setWorkingDirectory(Path new_dir); 1905 1906 /** 1907 * Get the current working directory for the given file system 1908 * @return the directory pathname 1909 */ 1910 public abstract Path getWorkingDirectory(); 1911 1912 1913 /** 1914 * Note: with the new FilesContext class, getWorkingDirectory() 1915 * will be removed. 1916 * The working directory is implemented in FilesContext. 1917 * 1918 * Some file systems like LocalFileSystem have an initial workingDir 1919 * that we use as the starting workingDir. For other file systems 1920 * like HDFS there is no built in notion of an initial workingDir. 1921 * 1922 * @return if there is built in notion of workingDir then it 1923 * is returned; else a null is returned. 1924 */ 1925 protected Path getInitialWorkingDirectory() { 1926 return null; 1927 } 1928 1929 /** 1930 * Call {@link #mkdirs(Path, FsPermission)} with default permission. 1931 */ 1932 public boolean mkdirs(Path f) throws IOException { 1933 return mkdirs(f, FsPermission.getDirDefault()); 1934 } 1935 1936 /** 1937 * Make the given file and all non-existent parents into 1938 * directories. Has the semantics of Unix 'mkdir -p'. 1939 * Existence of the directory hierarchy is not an error. 1940 * @param f path to create 1941 * @param permission to apply to f 1942 */ 1943 public abstract boolean mkdirs(Path f, FsPermission permission 1944 ) throws IOException; 1945 1946 /** 1947 * The src file is on the local disk. Add it to FS at 1948 * the given dst name and the source is kept intact afterwards 1949 * @param src path 1950 * @param dst path 1951 */ 1952 public void copyFromLocalFile(Path src, Path dst) 1953 throws IOException { 1954 copyFromLocalFile(false, src, dst); 1955 } 1956 1957 /** 1958 * The src files is on the local disk. Add it to FS at 1959 * the given dst name, removing the source afterwards. 1960 * @param srcs path 1961 * @param dst path 1962 */ 1963 public void moveFromLocalFile(Path[] srcs, Path dst) 1964 throws IOException { 1965 copyFromLocalFile(true, true, srcs, dst); 1966 } 1967 1968 /** 1969 * The src file is on the local disk. Add it to FS at 1970 * the given dst name, removing the source afterwards. 1971 * @param src path 1972 * @param dst path 1973 */ 1974 public void moveFromLocalFile(Path src, Path dst) 1975 throws IOException { 1976 copyFromLocalFile(true, src, dst); 1977 } 1978 1979 /** 1980 * The src file is on the local disk. Add it to FS at 1981 * the given dst name. 1982 * delSrc indicates if the source should be removed 1983 * @param delSrc whether to delete the src 1984 * @param src path 1985 * @param dst path 1986 */ 1987 public void copyFromLocalFile(boolean delSrc, Path src, Path dst) 1988 throws IOException { 1989 copyFromLocalFile(delSrc, true, src, dst); 1990 } 1991 1992 /** 1993 * The src files are on the local disk. Add it to FS at 1994 * the given dst name. 1995 * delSrc indicates if the source should be removed 1996 * @param delSrc whether to delete the src 1997 * @param overwrite whether to overwrite an existing file 1998 * @param srcs array of paths which are source 1999 * @param dst path 2000 */ 2001 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 2002 Path[] srcs, Path dst) 2003 throws IOException { 2004 Configuration conf = getConf(); 2005 FileUtil.copy(getLocal(conf), srcs, this, dst, delSrc, overwrite, conf); 2006 } 2007 2008 /** 2009 * The src file is on the local disk. Add it to FS at 2010 * the given dst name. 2011 * delSrc indicates if the source should be removed 2012 * @param delSrc whether to delete the src 2013 * @param overwrite whether to overwrite an existing file 2014 * @param src path 2015 * @param dst path 2016 */ 2017 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 2018 Path src, Path dst) 2019 throws IOException { 2020 Configuration conf = getConf(); 2021 FileUtil.copy(getLocal(conf), src, this, dst, delSrc, overwrite, conf); 2022 } 2023 2024 /** 2025 * The src file is under FS, and the dst is on the local disk. 2026 * Copy it from FS control to the local dst name. 2027 * @param src path 2028 * @param dst path 2029 */ 2030 public void copyToLocalFile(Path src, Path dst) throws IOException { 2031 copyToLocalFile(false, src, dst); 2032 } 2033 2034 /** 2035 * The src file is under FS, and the dst is on the local disk. 2036 * Copy it from FS control to the local dst name. 2037 * Remove the source afterwards 2038 * @param src path 2039 * @param dst path 2040 */ 2041 public void moveToLocalFile(Path src, Path dst) throws IOException { 2042 copyToLocalFile(true, src, dst); 2043 } 2044 2045 /** 2046 * The src file is under FS, and the dst is on the local disk. 2047 * Copy it from FS control to the local dst name. 2048 * delSrc indicates if the src will be removed or not. 2049 * @param delSrc whether to delete the src 2050 * @param src path 2051 * @param dst path 2052 */ 2053 public void copyToLocalFile(boolean delSrc, Path src, Path dst) 2054 throws IOException { 2055 copyToLocalFile(delSrc, src, dst, false); 2056 } 2057 2058 /** 2059 * The src file is under FS, and the dst is on the local disk. Copy it from FS 2060 * control to the local dst name. delSrc indicates if the src will be removed 2061 * or not. useRawLocalFileSystem indicates whether to use RawLocalFileSystem 2062 * as local file system or not. RawLocalFileSystem is non crc file system.So, 2063 * It will not create any crc files at local. 2064 * 2065 * @param delSrc 2066 * whether to delete the src 2067 * @param src 2068 * path 2069 * @param dst 2070 * path 2071 * @param useRawLocalFileSystem 2072 * whether to use RawLocalFileSystem as local file system or not. 2073 * 2074 * @throws IOException 2075 * - if any IO error 2076 */ 2077 public void copyToLocalFile(boolean delSrc, Path src, Path dst, 2078 boolean useRawLocalFileSystem) throws IOException { 2079 Configuration conf = getConf(); 2080 FileSystem local = null; 2081 if (useRawLocalFileSystem) { 2082 local = getLocal(conf).getRawFileSystem(); 2083 } else { 2084 local = getLocal(conf); 2085 } 2086 FileUtil.copy(this, src, local, dst, delSrc, conf); 2087 } 2088 2089 /** 2090 * Returns a local File that the user can write output to. The caller 2091 * provides both the eventual FS target name and the local working 2092 * file. If the FS is local, we write directly into the target. If 2093 * the FS is remote, we write into the tmp local area. 2094 * @param fsOutputFile path of output file 2095 * @param tmpLocalFile path of local tmp file 2096 */ 2097 public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 2098 throws IOException { 2099 return tmpLocalFile; 2100 } 2101 2102 /** 2103 * Called when we're all done writing to the target. A local FS will 2104 * do nothing, because we've written to exactly the right place. A remote 2105 * FS will copy the contents of tmpLocalFile to the correct target at 2106 * fsOutputFile. 2107 * @param fsOutputFile path of output file 2108 * @param tmpLocalFile path to local tmp file 2109 */ 2110 public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 2111 throws IOException { 2112 moveFromLocalFile(tmpLocalFile, fsOutputFile); 2113 } 2114 2115 /** 2116 * No more filesystem operations are needed. Will 2117 * release any held locks. 2118 */ 2119 @Override 2120 public void close() throws IOException { 2121 // delete all files that were marked as delete-on-exit. 2122 processDeleteOnExit(); 2123 CACHE.remove(this.key, this); 2124 } 2125 2126 /** Return the total size of all files in the filesystem.*/ 2127 public long getUsed() throws IOException{ 2128 long used = 0; 2129 FileStatus[] files = listStatus(new Path("/")); 2130 for(FileStatus file:files){ 2131 used += file.getLen(); 2132 } 2133 return used; 2134 } 2135 2136 /** 2137 * Get the block size for a particular file. 2138 * @param f the filename 2139 * @return the number of bytes in a block 2140 */ 2141 /** @deprecated Use getFileStatus() instead */ 2142 @Deprecated 2143 public long getBlockSize(Path f) throws IOException { 2144 return getFileStatus(f).getBlockSize(); 2145 } 2146 2147 /** 2148 * Return the number of bytes that large input files should be optimally 2149 * be split into to minimize i/o time. 2150 * @deprecated use {@link #getDefaultBlockSize(Path)} instead 2151 */ 2152 @Deprecated 2153 public long getDefaultBlockSize() { 2154 // default to 32MB: large enough to minimize the impact of seeks 2155 return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024); 2156 } 2157 2158 /** Return the number of bytes that large input files should be optimally 2159 * be split into to minimize i/o time. The given path will be used to 2160 * locate the actual filesystem. The full path does not have to exist. 2161 * @param f path of file 2162 * @return the default block size for the path's filesystem 2163 */ 2164 public long getDefaultBlockSize(Path f) { 2165 return getDefaultBlockSize(); 2166 } 2167 2168 /** 2169 * Get the default replication. 2170 * @deprecated use {@link #getDefaultReplication(Path)} instead 2171 */ 2172 @Deprecated 2173 public short getDefaultReplication() { return 1; } 2174 2175 /** 2176 * Get the default replication for a path. The given path will be used to 2177 * locate the actual filesystem. The full path does not have to exist. 2178 * @param path of the file 2179 * @return default replication for the path's filesystem 2180 */ 2181 public short getDefaultReplication(Path path) { 2182 return getDefaultReplication(); 2183 } 2184 2185 /** 2186 * Return a file status object that represents the path. 2187 * @param f The path we want information from 2188 * @return a FileStatus object 2189 * @throws FileNotFoundException when the path does not exist; 2190 * IOException see specific implementation 2191 */ 2192 public abstract FileStatus getFileStatus(Path f) throws IOException; 2193 2194 /** 2195 * See {@link FileContext#fixRelativePart} 2196 */ 2197 protected Path fixRelativePart(Path p) { 2198 if (p.isUriPathAbsolute()) { 2199 return p; 2200 } else { 2201 return new Path(getWorkingDirectory(), p); 2202 } 2203 } 2204 2205 /** 2206 * See {@link FileContext#createSymlink(Path, Path, boolean)} 2207 */ 2208 public void createSymlink(final Path target, final Path link, 2209 final boolean createParent) throws AccessControlException, 2210 FileAlreadyExistsException, FileNotFoundException, 2211 ParentNotDirectoryException, UnsupportedFileSystemException, 2212 IOException { 2213 // Supporting filesystems should override this method 2214 throw new UnsupportedOperationException( 2215 "Filesystem does not support symlinks!"); 2216 } 2217 2218 /** 2219 * See {@link FileContext#getFileLinkStatus(Path)} 2220 */ 2221 public FileStatus getFileLinkStatus(final Path f) 2222 throws AccessControlException, FileNotFoundException, 2223 UnsupportedFileSystemException, IOException { 2224 // Supporting filesystems should override this method 2225 return getFileStatus(f); 2226 } 2227 2228 /** 2229 * See {@link AbstractFileSystem#supportsSymlinks()} 2230 */ 2231 public boolean supportsSymlinks() { 2232 return false; 2233 } 2234 2235 /** 2236 * See {@link FileContext#getLinkTarget(Path)} 2237 */ 2238 public Path getLinkTarget(Path f) throws IOException { 2239 // Supporting filesystems should override this method 2240 throw new UnsupportedOperationException( 2241 "Filesystem does not support symlinks!"); 2242 } 2243 2244 /** 2245 * See {@link AbstractFileSystem#getLinkTarget(Path)} 2246 */ 2247 protected Path resolveLink(Path f) throws IOException { 2248 // Supporting filesystems should override this method 2249 throw new UnsupportedOperationException( 2250 "Filesystem does not support symlinks!"); 2251 } 2252 2253 /** 2254 * Get the checksum of a file. 2255 * 2256 * @param f The file path 2257 * @return The file checksum. The default return value is null, 2258 * which indicates that no checksum algorithm is implemented 2259 * in the corresponding FileSystem. 2260 */ 2261 public FileChecksum getFileChecksum(Path f) throws IOException { 2262 return null; 2263 } 2264 2265 /** 2266 * Set the verify checksum flag. This is only applicable if the 2267 * corresponding FileSystem supports checksum. By default doesn't do anything. 2268 * @param verifyChecksum 2269 */ 2270 public void setVerifyChecksum(boolean verifyChecksum) { 2271 //doesn't do anything 2272 } 2273 2274 /** 2275 * Set the write checksum flag. This is only applicable if the 2276 * corresponding FileSystem supports checksum. By default doesn't do anything. 2277 * @param writeChecksum 2278 */ 2279 public void setWriteChecksum(boolean writeChecksum) { 2280 //doesn't do anything 2281 } 2282 2283 /** 2284 * Returns a status object describing the use and capacity of the 2285 * file system. If the file system has multiple partitions, the 2286 * use and capacity of the root partition is reflected. 2287 * 2288 * @return a FsStatus object 2289 * @throws IOException 2290 * see specific implementation 2291 */ 2292 public FsStatus getStatus() throws IOException { 2293 return getStatus(null); 2294 } 2295 2296 /** 2297 * Returns a status object describing the use and capacity of the 2298 * file system. If the file system has multiple partitions, the 2299 * use and capacity of the partition pointed to by the specified 2300 * path is reflected. 2301 * @param p Path for which status should be obtained. null means 2302 * the default partition. 2303 * @return a FsStatus object 2304 * @throws IOException 2305 * see specific implementation 2306 */ 2307 public FsStatus getStatus(Path p) throws IOException { 2308 return new FsStatus(Long.MAX_VALUE, 0, Long.MAX_VALUE); 2309 } 2310 2311 /** 2312 * Set permission of a path. 2313 * @param p 2314 * @param permission 2315 */ 2316 public void setPermission(Path p, FsPermission permission 2317 ) throws IOException { 2318 } 2319 2320 /** 2321 * Set owner of a path (i.e. a file or a directory). 2322 * The parameters username and groupname cannot both be null. 2323 * @param p The path 2324 * @param username If it is null, the original username remains unchanged. 2325 * @param groupname If it is null, the original groupname remains unchanged. 2326 */ 2327 public void setOwner(Path p, String username, String groupname 2328 ) throws IOException { 2329 } 2330 2331 /** 2332 * Set access time of a file 2333 * @param p The path 2334 * @param mtime Set the modification time of this file. 2335 * The number of milliseconds since Jan 1, 1970. 2336 * A value of -1 means that this call should not set modification time. 2337 * @param atime Set the access time of this file. 2338 * The number of milliseconds since Jan 1, 1970. 2339 * A value of -1 means that this call should not set access time. 2340 */ 2341 public void setTimes(Path p, long mtime, long atime 2342 ) throws IOException { 2343 } 2344 2345 /** 2346 * Create a snapshot with a default name. 2347 * @param path The directory where snapshots will be taken. 2348 * @return the snapshot path. 2349 */ 2350 public final Path createSnapshot(Path path) throws IOException { 2351 return createSnapshot(path, null); 2352 } 2353 2354 /** 2355 * Create a snapshot 2356 * @param path The directory where snapshots will be taken. 2357 * @param snapshotName The name of the snapshot 2358 * @return the snapshot path. 2359 */ 2360 public Path createSnapshot(Path path, String snapshotName) 2361 throws IOException { 2362 throw new UnsupportedOperationException(getClass().getSimpleName() 2363 + " doesn't support createSnapshot"); 2364 } 2365 2366 /** 2367 * Rename a snapshot 2368 * @param path The directory path where the snapshot was taken 2369 * @param snapshotOldName Old name of the snapshot 2370 * @param snapshotNewName New name of the snapshot 2371 * @throws IOException 2372 */ 2373 public void renameSnapshot(Path path, String snapshotOldName, 2374 String snapshotNewName) throws IOException { 2375 throw new UnsupportedOperationException(getClass().getSimpleName() 2376 + " doesn't support renameSnapshot"); 2377 } 2378 2379 /** 2380 * Delete a snapshot of a directory 2381 * @param path The directory that the to-be-deleted snapshot belongs to 2382 * @param snapshotName The name of the snapshot 2383 */ 2384 public void deleteSnapshot(Path path, String snapshotName) 2385 throws IOException { 2386 throw new UnsupportedOperationException(getClass().getSimpleName() 2387 + " doesn't support deleteSnapshot"); 2388 } 2389 2390 // making it volatile to be able to do a double checked locking 2391 private volatile static boolean FILE_SYSTEMS_LOADED = false; 2392 2393 private static final Map<String, Class<? extends FileSystem>> 2394 SERVICE_FILE_SYSTEMS = new HashMap<String, Class<? extends FileSystem>>(); 2395 2396 private static void loadFileSystems() { 2397 synchronized (FileSystem.class) { 2398 if (!FILE_SYSTEMS_LOADED) { 2399 ServiceLoader<FileSystem> serviceLoader = ServiceLoader.load(FileSystem.class); 2400 for (FileSystem fs : serviceLoader) { 2401 SERVICE_FILE_SYSTEMS.put(fs.getScheme(), fs.getClass()); 2402 } 2403 FILE_SYSTEMS_LOADED = true; 2404 } 2405 } 2406 } 2407 2408 public static Class<? extends FileSystem> getFileSystemClass(String scheme, 2409 Configuration conf) throws IOException { 2410 if (!FILE_SYSTEMS_LOADED) { 2411 loadFileSystems(); 2412 } 2413 Class<? extends FileSystem> clazz = null; 2414 if (conf != null) { 2415 clazz = (Class<? extends FileSystem>) conf.getClass("fs." + scheme + ".impl", null); 2416 } 2417 if (clazz == null) { 2418 clazz = SERVICE_FILE_SYSTEMS.get(scheme); 2419 } 2420 if (clazz == null) { 2421 throw new IOException("No FileSystem for scheme: " + scheme); 2422 } 2423 return clazz; 2424 } 2425 2426 private static FileSystem createFileSystem(URI uri, Configuration conf 2427 ) throws IOException { 2428 Class<?> clazz = getFileSystemClass(uri.getScheme(), conf); 2429 if (clazz == null) { 2430 throw new IOException("No FileSystem for scheme: " + uri.getScheme()); 2431 } 2432 FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf); 2433 fs.initialize(uri, conf); 2434 return fs; 2435 } 2436 2437 /** Caching FileSystem objects */ 2438 static class Cache { 2439 private final ClientFinalizer clientFinalizer = new ClientFinalizer(); 2440 2441 private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>(); 2442 private final Set<Key> toAutoClose = new HashSet<Key>(); 2443 2444 /** A variable that makes all objects in the cache unique */ 2445 private static AtomicLong unique = new AtomicLong(1); 2446 2447 FileSystem get(URI uri, Configuration conf) throws IOException{ 2448 Key key = new Key(uri, conf); 2449 return getInternal(uri, conf, key); 2450 } 2451 2452 /** The objects inserted into the cache using this method are all unique */ 2453 FileSystem getUnique(URI uri, Configuration conf) throws IOException{ 2454 Key key = new Key(uri, conf, unique.getAndIncrement()); 2455 return getInternal(uri, conf, key); 2456 } 2457 2458 private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{ 2459 FileSystem fs; 2460 synchronized (this) { 2461 fs = map.get(key); 2462 } 2463 if (fs != null) { 2464 return fs; 2465 } 2466 2467 fs = createFileSystem(uri, conf); 2468 synchronized (this) { // refetch the lock again 2469 FileSystem oldfs = map.get(key); 2470 if (oldfs != null) { // a file system is created while lock is releasing 2471 fs.close(); // close the new file system 2472 return oldfs; // return the old file system 2473 } 2474 2475 // now insert the new file system into the map 2476 if (map.isEmpty() 2477 && !ShutdownHookManager.get().isShutdownInProgress()) { 2478 ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY); 2479 } 2480 fs.key = key; 2481 map.put(key, fs); 2482 if (conf.getBoolean("fs.automatic.close", true)) { 2483 toAutoClose.add(key); 2484 } 2485 return fs; 2486 } 2487 } 2488 2489 synchronized void remove(Key key, FileSystem fs) { 2490 if (map.containsKey(key) && fs == map.get(key)) { 2491 map.remove(key); 2492 toAutoClose.remove(key); 2493 } 2494 } 2495 2496 synchronized void closeAll() throws IOException { 2497 closeAll(false); 2498 } 2499 2500 /** 2501 * Close all FileSystem instances in the Cache. 2502 * @param onlyAutomatic only close those that are marked for automatic closing 2503 */ 2504 synchronized void closeAll(boolean onlyAutomatic) throws IOException { 2505 List<IOException> exceptions = new ArrayList<IOException>(); 2506 2507 // Make a copy of the keys in the map since we'll be modifying 2508 // the map while iterating over it, which isn't safe. 2509 List<Key> keys = new ArrayList<Key>(); 2510 keys.addAll(map.keySet()); 2511 2512 for (Key key : keys) { 2513 final FileSystem fs = map.get(key); 2514 2515 if (onlyAutomatic && !toAutoClose.contains(key)) { 2516 continue; 2517 } 2518 2519 //remove from cache 2520 remove(key, fs); 2521 2522 if (fs != null) { 2523 try { 2524 fs.close(); 2525 } 2526 catch(IOException ioe) { 2527 exceptions.add(ioe); 2528 } 2529 } 2530 } 2531 2532 if (!exceptions.isEmpty()) { 2533 throw MultipleIOException.createIOException(exceptions); 2534 } 2535 } 2536 2537 private class ClientFinalizer implements Runnable { 2538 @Override 2539 public synchronized void run() { 2540 try { 2541 closeAll(true); 2542 } catch (IOException e) { 2543 LOG.info("FileSystem.Cache.closeAll() threw an exception:\n" + e); 2544 } 2545 } 2546 } 2547 2548 synchronized void closeAll(UserGroupInformation ugi) throws IOException { 2549 List<FileSystem> targetFSList = new ArrayList<FileSystem>(); 2550 //Make a pass over the list and collect the filesystems to close 2551 //we cannot close inline since close() removes the entry from the Map 2552 for (Map.Entry<Key, FileSystem> entry : map.entrySet()) { 2553 final Key key = entry.getKey(); 2554 final FileSystem fs = entry.getValue(); 2555 if (ugi.equals(key.ugi) && fs != null) { 2556 targetFSList.add(fs); 2557 } 2558 } 2559 List<IOException> exceptions = new ArrayList<IOException>(); 2560 //now make a pass over the target list and close each 2561 for (FileSystem fs : targetFSList) { 2562 try { 2563 fs.close(); 2564 } 2565 catch(IOException ioe) { 2566 exceptions.add(ioe); 2567 } 2568 } 2569 if (!exceptions.isEmpty()) { 2570 throw MultipleIOException.createIOException(exceptions); 2571 } 2572 } 2573 2574 /** FileSystem.Cache.Key */ 2575 static class Key { 2576 final String scheme; 2577 final String authority; 2578 final UserGroupInformation ugi; 2579 final long unique; // an artificial way to make a key unique 2580 2581 Key(URI uri, Configuration conf) throws IOException { 2582 this(uri, conf, 0); 2583 } 2584 2585 Key(URI uri, Configuration conf, long unique) throws IOException { 2586 scheme = uri.getScheme()==null?"":uri.getScheme().toLowerCase(); 2587 authority = uri.getAuthority()==null?"":uri.getAuthority().toLowerCase(); 2588 this.unique = unique; 2589 2590 this.ugi = UserGroupInformation.getCurrentUser(); 2591 } 2592 2593 @Override 2594 public int hashCode() { 2595 return (scheme + authority).hashCode() + ugi.hashCode() + (int)unique; 2596 } 2597 2598 static boolean isEqual(Object a, Object b) { 2599 return a == b || (a != null && a.equals(b)); 2600 } 2601 2602 @Override 2603 public boolean equals(Object obj) { 2604 if (obj == this) { 2605 return true; 2606 } 2607 if (obj != null && obj instanceof Key) { 2608 Key that = (Key)obj; 2609 return isEqual(this.scheme, that.scheme) 2610 && isEqual(this.authority, that.authority) 2611 && isEqual(this.ugi, that.ugi) 2612 && (this.unique == that.unique); 2613 } 2614 return false; 2615 } 2616 2617 @Override 2618 public String toString() { 2619 return "("+ugi.toString() + ")@" + scheme + "://" + authority; 2620 } 2621 } 2622 } 2623 2624 public static final class Statistics { 2625 private final String scheme; 2626 private AtomicLong bytesRead = new AtomicLong(); 2627 private AtomicLong bytesWritten = new AtomicLong(); 2628 private AtomicInteger readOps = new AtomicInteger(); 2629 private AtomicInteger largeReadOps = new AtomicInteger(); 2630 private AtomicInteger writeOps = new AtomicInteger(); 2631 2632 public Statistics(String scheme) { 2633 this.scheme = scheme; 2634 } 2635 2636 /** 2637 * Copy constructor. 2638 * 2639 * @param st 2640 * The input Statistics object which is cloned. 2641 */ 2642 public Statistics(Statistics st) { 2643 this.scheme = st.scheme; 2644 this.bytesRead = new AtomicLong(st.bytesRead.longValue()); 2645 this.bytesWritten = new AtomicLong(st.bytesWritten.longValue()); 2646 } 2647 2648 /** 2649 * Increment the bytes read in the statistics 2650 * @param newBytes the additional bytes read 2651 */ 2652 public void incrementBytesRead(long newBytes) { 2653 bytesRead.getAndAdd(newBytes); 2654 } 2655 2656 /** 2657 * Increment the bytes written in the statistics 2658 * @param newBytes the additional bytes written 2659 */ 2660 public void incrementBytesWritten(long newBytes) { 2661 bytesWritten.getAndAdd(newBytes); 2662 } 2663 2664 /** 2665 * Increment the number of read operations 2666 * @param count number of read operations 2667 */ 2668 public void incrementReadOps(int count) { 2669 readOps.getAndAdd(count); 2670 } 2671 2672 /** 2673 * Increment the number of large read operations 2674 * @param count number of large read operations 2675 */ 2676 public void incrementLargeReadOps(int count) { 2677 largeReadOps.getAndAdd(count); 2678 } 2679 2680 /** 2681 * Increment the number of write operations 2682 * @param count number of write operations 2683 */ 2684 public void incrementWriteOps(int count) { 2685 writeOps.getAndAdd(count); 2686 } 2687 2688 /** 2689 * Get the total number of bytes read 2690 * @return the number of bytes 2691 */ 2692 public long getBytesRead() { 2693 return bytesRead.get(); 2694 } 2695 2696 /** 2697 * Get the total number of bytes written 2698 * @return the number of bytes 2699 */ 2700 public long getBytesWritten() { 2701 return bytesWritten.get(); 2702 } 2703 2704 /** 2705 * Get the number of file system read operations such as list files 2706 * @return number of read operations 2707 */ 2708 public int getReadOps() { 2709 return readOps.get() + largeReadOps.get(); 2710 } 2711 2712 /** 2713 * Get the number of large file system read operations such as list files 2714 * under a large directory 2715 * @return number of large read operations 2716 */ 2717 public int getLargeReadOps() { 2718 return largeReadOps.get(); 2719 } 2720 2721 /** 2722 * Get the number of file system write operations such as create, append 2723 * rename etc. 2724 * @return number of write operations 2725 */ 2726 public int getWriteOps() { 2727 return writeOps.get(); 2728 } 2729 2730 @Override 2731 public String toString() { 2732 return bytesRead + " bytes read, " + bytesWritten + " bytes written, " 2733 + readOps + " read ops, " + largeReadOps + " large read ops, " 2734 + writeOps + " write ops"; 2735 } 2736 2737 /** 2738 * Reset the counts of bytes to 0. 2739 */ 2740 public void reset() { 2741 bytesWritten.set(0); 2742 bytesRead.set(0); 2743 } 2744 2745 /** 2746 * Get the uri scheme associated with this statistics object. 2747 * @return the schema associated with this set of statistics 2748 */ 2749 public String getScheme() { 2750 return scheme; 2751 } 2752 } 2753 2754 /** 2755 * Get the Map of Statistics object indexed by URI Scheme. 2756 * @return a Map having a key as URI scheme and value as Statistics object 2757 * @deprecated use {@link #getAllStatistics} instead 2758 */ 2759 @Deprecated 2760 public static synchronized Map<String, Statistics> getStatistics() { 2761 Map<String, Statistics> result = new HashMap<String, Statistics>(); 2762 for(Statistics stat: statisticsTable.values()) { 2763 result.put(stat.getScheme(), stat); 2764 } 2765 return result; 2766 } 2767 2768 /** 2769 * Return the FileSystem classes that have Statistics 2770 */ 2771 public static synchronized List<Statistics> getAllStatistics() { 2772 return new ArrayList<Statistics>(statisticsTable.values()); 2773 } 2774 2775 /** 2776 * Get the statistics for a particular file system 2777 * @param cls the class to lookup 2778 * @return a statistics object 2779 */ 2780 public static synchronized 2781 Statistics getStatistics(String scheme, Class<? extends FileSystem> cls) { 2782 Statistics result = statisticsTable.get(cls); 2783 if (result == null) { 2784 result = new Statistics(scheme); 2785 statisticsTable.put(cls, result); 2786 } 2787 return result; 2788 } 2789 2790 /** 2791 * Reset all statistics for all file systems 2792 */ 2793 public static synchronized void clearStatistics() { 2794 for(Statistics stat: statisticsTable.values()) { 2795 stat.reset(); 2796 } 2797 } 2798 2799 /** 2800 * Print all statistics for all file systems 2801 */ 2802 public static synchronized 2803 void printStatistics() throws IOException { 2804 for (Map.Entry<Class<? extends FileSystem>, Statistics> pair: 2805 statisticsTable.entrySet()) { 2806 System.out.println(" FileSystem " + pair.getKey().getName() + 2807 ": " + pair.getValue()); 2808 } 2809 } 2810 2811 // Symlinks are temporarily disabled - see Hadoop-10020 2812 private static boolean symlinkEnabled = false; 2813 private static Configuration conf = null; 2814 2815 @Deprecated 2816 @VisibleForTesting 2817 public static boolean isSymlinksEnabled() { 2818 if (conf == null) { 2819 Configuration conf = new Configuration(); 2820 symlinkEnabled = conf.getBoolean("test.SymlinkEnabledForTesting", false); 2821 } 2822 return symlinkEnabled; 2823 } 2824 2825 @Deprecated 2826 @VisibleForTesting 2827 public static void enableSymlinks() { 2828 symlinkEnabled = true; 2829 } 2830 }