001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.fs; 019 020 import java.io.Closeable; 021 import java.io.FileNotFoundException; 022 import java.io.IOException; 023 import java.net.URI; 024 import java.net.URISyntaxException; 025 import java.security.PrivilegedExceptionAction; 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.Collections; 029 import java.util.EnumSet; 030 import java.util.HashMap; 031 import java.util.HashSet; 032 import java.util.IdentityHashMap; 033 import java.util.Iterator; 034 import java.util.List; 035 import java.util.Map; 036 import java.util.NoSuchElementException; 037 import java.util.ServiceLoader; 038 import java.util.Set; 039 import java.util.Stack; 040 import java.util.TreeSet; 041 import java.util.concurrent.atomic.AtomicInteger; 042 import java.util.concurrent.atomic.AtomicLong; 043 044 import org.apache.commons.logging.Log; 045 import org.apache.commons.logging.LogFactory; 046 import org.apache.hadoop.classification.InterfaceAudience; 047 import org.apache.hadoop.classification.InterfaceStability; 048 import org.apache.hadoop.conf.Configuration; 049 import org.apache.hadoop.conf.Configured; 050 import org.apache.hadoop.fs.Options.ChecksumOpt; 051 import org.apache.hadoop.fs.Options.Rename; 052 import org.apache.hadoop.fs.permission.FsPermission; 053 import org.apache.hadoop.io.MultipleIOException; 054 import org.apache.hadoop.io.Text; 055 import org.apache.hadoop.net.NetUtils; 056 import org.apache.hadoop.security.AccessControlException; 057 import org.apache.hadoop.security.Credentials; 058 import org.apache.hadoop.security.SecurityUtil; 059 import org.apache.hadoop.security.UserGroupInformation; 060 import org.apache.hadoop.security.token.Token; 061 import org.apache.hadoop.util.DataChecksum; 062 import org.apache.hadoop.util.Progressable; 063 import org.apache.hadoop.util.ReflectionUtils; 064 import org.apache.hadoop.util.ShutdownHookManager; 065 066 import com.google.common.annotations.VisibleForTesting; 067 068 /**************************************************************** 069 * An abstract base class for a fairly generic filesystem. It 070 * may be implemented as a distributed filesystem, or as a "local" 071 * one that reflects the locally-connected disk. The local version 072 * exists for small Hadoop instances and for testing. 073 * 074 * <p> 075 * 076 * All user code that may potentially use the Hadoop Distributed 077 * File System should be written to use a FileSystem object. The 078 * Hadoop DFS is a multi-machine system that appears as a single 079 * disk. It's useful because of its fault tolerance and potentially 080 * very large capacity. 081 * 082 * <p> 083 * The local implementation is {@link LocalFileSystem} and distributed 084 * implementation is DistributedFileSystem. 085 *****************************************************************/ 086 @InterfaceAudience.Public 087 @InterfaceStability.Stable 088 public abstract class FileSystem extends Configured implements Closeable { 089 public static final String FS_DEFAULT_NAME_KEY = 090 CommonConfigurationKeys.FS_DEFAULT_NAME_KEY; 091 public static final String DEFAULT_FS = 092 CommonConfigurationKeys.FS_DEFAULT_NAME_DEFAULT; 093 094 public static final Log LOG = LogFactory.getLog(FileSystem.class); 095 096 /** 097 * Priority of the FileSystem shutdown hook. 098 */ 099 public static final int SHUTDOWN_HOOK_PRIORITY = 10; 100 101 /** FileSystem cache */ 102 static final Cache CACHE = new Cache(); 103 104 /** The key this instance is stored under in the cache. */ 105 private Cache.Key key; 106 107 /** Recording statistics per a FileSystem class */ 108 private static final Map<Class<? extends FileSystem>, Statistics> 109 statisticsTable = 110 new IdentityHashMap<Class<? extends FileSystem>, Statistics>(); 111 112 /** 113 * The statistics for this file system. 114 */ 115 protected Statistics statistics; 116 117 /** 118 * A cache of files that should be deleted when filsystem is closed 119 * or the JVM is exited. 120 */ 121 private Set<Path> deleteOnExit = new TreeSet<Path>(); 122 123 /** 124 * This method adds a file system for testing so that we can find it later. It 125 * is only for testing. 126 * @param uri the uri to store it under 127 * @param conf the configuration to store it under 128 * @param fs the file system to store 129 * @throws IOException 130 */ 131 static void addFileSystemForTesting(URI uri, Configuration conf, 132 FileSystem fs) throws IOException { 133 CACHE.map.put(new Cache.Key(uri, conf), fs); 134 } 135 136 /** 137 * Get a filesystem instance based on the uri, the passed 138 * configuration and the user 139 * @param uri of the filesystem 140 * @param conf the configuration to use 141 * @param user to perform the get as 142 * @return the filesystem instance 143 * @throws IOException 144 * @throws InterruptedException 145 */ 146 public static FileSystem get(final URI uri, final Configuration conf, 147 final String user) throws IOException, InterruptedException { 148 String ticketCachePath = 149 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH); 150 UserGroupInformation ugi = 151 UserGroupInformation.getBestUGI(ticketCachePath, user); 152 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 153 @Override 154 public FileSystem run() throws IOException { 155 return get(uri, conf); 156 } 157 }); 158 } 159 160 /** 161 * Returns the configured filesystem implementation. 162 * @param conf the configuration to use 163 */ 164 public static FileSystem get(Configuration conf) throws IOException { 165 return get(getDefaultUri(conf), conf); 166 } 167 168 /** Get the default filesystem URI from a configuration. 169 * @param conf the configuration to use 170 * @return the uri of the default filesystem 171 */ 172 public static URI getDefaultUri(Configuration conf) { 173 return URI.create(fixName(conf.get(FS_DEFAULT_NAME_KEY, DEFAULT_FS))); 174 } 175 176 /** Set the default filesystem URI in a configuration. 177 * @param conf the configuration to alter 178 * @param uri the new default filesystem uri 179 */ 180 public static void setDefaultUri(Configuration conf, URI uri) { 181 conf.set(FS_DEFAULT_NAME_KEY, uri.toString()); 182 } 183 184 /** Set the default filesystem URI in a configuration. 185 * @param conf the configuration to alter 186 * @param uri the new default filesystem uri 187 */ 188 public static void setDefaultUri(Configuration conf, String uri) { 189 setDefaultUri(conf, URI.create(fixName(uri))); 190 } 191 192 /** Called after a new FileSystem instance is constructed. 193 * @param name a uri whose authority section names the host, port, etc. 194 * for this FileSystem 195 * @param conf the configuration 196 */ 197 public void initialize(URI name, Configuration conf) throws IOException { 198 statistics = getStatistics(name.getScheme(), getClass()); 199 } 200 201 /** 202 * Return the protocol scheme for the FileSystem. 203 * <p/> 204 * This implementation throws an <code>UnsupportedOperationException</code>. 205 * 206 * @return the protocol scheme for the FileSystem. 207 */ 208 public String getScheme() { 209 throw new UnsupportedOperationException("Not implemented by the " + getClass().getSimpleName() + " FileSystem implementation"); 210 } 211 212 /** Returns a URI whose scheme and authority identify this FileSystem.*/ 213 public abstract URI getUri(); 214 215 /** 216 * Return a canonicalized form of this FileSystem's URI. 217 * 218 * The default implementation simply calls {@link #canonicalizeUri(URI)} 219 * on the filesystem's own URI, so subclasses typically only need to 220 * implement that method. 221 * 222 * @see #canonicalizeUri(URI) 223 */ 224 protected URI getCanonicalUri() { 225 return canonicalizeUri(getUri()); 226 } 227 228 /** 229 * Canonicalize the given URI. 230 * 231 * This is filesystem-dependent, but may for example consist of 232 * canonicalizing the hostname using DNS and adding the default 233 * port if not specified. 234 * 235 * The default implementation simply fills in the default port if 236 * not specified and if the filesystem has a default port. 237 * 238 * @return URI 239 * @see NetUtils#getCanonicalUri(URI, int) 240 */ 241 protected URI canonicalizeUri(URI uri) { 242 if (uri.getPort() == -1 && getDefaultPort() > 0) { 243 // reconstruct the uri with the default port set 244 try { 245 uri = new URI(uri.getScheme(), uri.getUserInfo(), 246 uri.getHost(), getDefaultPort(), 247 uri.getPath(), uri.getQuery(), uri.getFragment()); 248 } catch (URISyntaxException e) { 249 // Should never happen! 250 throw new AssertionError("Valid URI became unparseable: " + 251 uri); 252 } 253 } 254 255 return uri; 256 } 257 258 /** 259 * Get the default port for this file system. 260 * @return the default port or 0 if there isn't one 261 */ 262 protected int getDefaultPort() { 263 return 0; 264 } 265 266 protected static FileSystem getFSofPath(final Path absOrFqPath, 267 final Configuration conf) 268 throws UnsupportedFileSystemException, IOException { 269 absOrFqPath.checkNotSchemeWithRelative(); 270 absOrFqPath.checkNotRelative(); 271 272 // Uses the default file system if not fully qualified 273 return get(absOrFqPath.toUri(), conf); 274 } 275 276 /** 277 * Get a canonical service name for this file system. The token cache is 278 * the only user of the canonical service name, and uses it to lookup this 279 * filesystem's service tokens. 280 * If file system provides a token of its own then it must have a canonical 281 * name, otherwise canonical name can be null. 282 * 283 * Default Impl: If the file system has child file systems 284 * (such as an embedded file system) then it is assumed that the fs has no 285 * tokens of its own and hence returns a null name; otherwise a service 286 * name is built using Uri and port. 287 * 288 * @return a service string that uniquely identifies this file system, null 289 * if the filesystem does not implement tokens 290 * @see SecurityUtil#buildDTServiceName(URI, int) 291 */ 292 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 293 public String getCanonicalServiceName() { 294 return (getChildFileSystems() == null) 295 ? SecurityUtil.buildDTServiceName(getUri(), getDefaultPort()) 296 : null; 297 } 298 299 /** @deprecated call #getUri() instead.*/ 300 @Deprecated 301 public String getName() { return getUri().toString(); } 302 303 /** @deprecated call #get(URI,Configuration) instead. */ 304 @Deprecated 305 public static FileSystem getNamed(String name, Configuration conf) 306 throws IOException { 307 return get(URI.create(fixName(name)), conf); 308 } 309 310 /** Update old-format filesystem names, for back-compatibility. This should 311 * eventually be replaced with a checkName() method that throws an exception 312 * for old-format names. */ 313 private static String fixName(String name) { 314 // convert old-format name to new-format name 315 if (name.equals("local")) { // "local" is now "file:///". 316 LOG.warn("\"local\" is a deprecated filesystem name." 317 +" Use \"file:///\" instead."); 318 name = "file:///"; 319 } else if (name.indexOf('/')==-1) { // unqualified is "hdfs://" 320 LOG.warn("\""+name+"\" is a deprecated filesystem name." 321 +" Use \"hdfs://"+name+"/\" instead."); 322 name = "hdfs://"+name; 323 } 324 return name; 325 } 326 327 /** 328 * Get the local file system. 329 * @param conf the configuration to configure the file system with 330 * @return a LocalFileSystem 331 */ 332 public static LocalFileSystem getLocal(Configuration conf) 333 throws IOException { 334 return (LocalFileSystem)get(LocalFileSystem.NAME, conf); 335 } 336 337 /** Returns the FileSystem for this URI's scheme and authority. The scheme 338 * of the URI determines a configuration property name, 339 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 340 * The entire URI is passed to the FileSystem instance's initialize method. 341 */ 342 public static FileSystem get(URI uri, Configuration conf) throws IOException { 343 String scheme = uri.getScheme(); 344 String authority = uri.getAuthority(); 345 346 if (scheme == null && authority == null) { // use default FS 347 return get(conf); 348 } 349 350 if (scheme != null && authority == null) { // no authority 351 URI defaultUri = getDefaultUri(conf); 352 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 353 && defaultUri.getAuthority() != null) { // & default has authority 354 return get(defaultUri, conf); // return default 355 } 356 } 357 358 String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme); 359 if (conf.getBoolean(disableCacheName, false)) { 360 return createFileSystem(uri, conf); 361 } 362 363 return CACHE.get(uri, conf); 364 } 365 366 /** 367 * Returns the FileSystem for this URI's scheme and authority and the 368 * passed user. Internally invokes {@link #newInstance(URI, Configuration)} 369 * @param uri of the filesystem 370 * @param conf the configuration to use 371 * @param user to perform the get as 372 * @return filesystem instance 373 * @throws IOException 374 * @throws InterruptedException 375 */ 376 public static FileSystem newInstance(final URI uri, final Configuration conf, 377 final String user) throws IOException, InterruptedException { 378 String ticketCachePath = 379 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH); 380 UserGroupInformation ugi = 381 UserGroupInformation.getBestUGI(ticketCachePath, user); 382 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 383 @Override 384 public FileSystem run() throws IOException { 385 return newInstance(uri,conf); 386 } 387 }); 388 } 389 /** Returns the FileSystem for this URI's scheme and authority. The scheme 390 * of the URI determines a configuration property name, 391 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 392 * The entire URI is passed to the FileSystem instance's initialize method. 393 * This always returns a new FileSystem object. 394 */ 395 public static FileSystem newInstance(URI uri, Configuration conf) throws IOException { 396 String scheme = uri.getScheme(); 397 String authority = uri.getAuthority(); 398 399 if (scheme == null) { // no scheme: use default FS 400 return newInstance(conf); 401 } 402 403 if (authority == null) { // no authority 404 URI defaultUri = getDefaultUri(conf); 405 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 406 && defaultUri.getAuthority() != null) { // & default has authority 407 return newInstance(defaultUri, conf); // return default 408 } 409 } 410 return CACHE.getUnique(uri, conf); 411 } 412 413 /** Returns a unique configured filesystem implementation. 414 * This always returns a new FileSystem object. 415 * @param conf the configuration to use 416 */ 417 public static FileSystem newInstance(Configuration conf) throws IOException { 418 return newInstance(getDefaultUri(conf), conf); 419 } 420 421 /** 422 * Get a unique local file system object 423 * @param conf the configuration to configure the file system with 424 * @return a LocalFileSystem 425 * This always returns a new FileSystem object. 426 */ 427 public static LocalFileSystem newInstanceLocal(Configuration conf) 428 throws IOException { 429 return (LocalFileSystem)newInstance(LocalFileSystem.NAME, conf); 430 } 431 432 /** 433 * Close all cached filesystems. Be sure those filesystems are not 434 * used anymore. 435 * 436 * @throws IOException 437 */ 438 public static void closeAll() throws IOException { 439 CACHE.closeAll(); 440 } 441 442 /** 443 * Close all cached filesystems for a given UGI. Be sure those filesystems 444 * are not used anymore. 445 * @param ugi user group info to close 446 * @throws IOException 447 */ 448 public static void closeAllForUGI(UserGroupInformation ugi) 449 throws IOException { 450 CACHE.closeAll(ugi); 451 } 452 453 /** 454 * Make sure that a path specifies a FileSystem. 455 * @param path to use 456 */ 457 public Path makeQualified(Path path) { 458 checkPath(path); 459 return path.makeQualified(this.getUri(), this.getWorkingDirectory()); 460 } 461 462 /** 463 * Get a new delegation token for this file system. 464 * This is an internal method that should have been declared protected 465 * but wasn't historically. 466 * Callers should use {@link #addDelegationTokens(String, Credentials)} 467 * 468 * @param renewer the account name that is allowed to renew the token. 469 * @return a new delegation token 470 * @throws IOException 471 */ 472 @InterfaceAudience.Private() 473 public Token<?> getDelegationToken(String renewer) throws IOException { 474 return null; 475 } 476 477 /** 478 * Obtain all delegation tokens used by this FileSystem that are not 479 * already present in the given Credentials. Existing tokens will neither 480 * be verified as valid nor having the given renewer. Missing tokens will 481 * be acquired and added to the given Credentials. 482 * 483 * Default Impl: works for simple fs with its own token 484 * and also for an embedded fs whose tokens are those of its 485 * children file system (i.e. the embedded fs has not tokens of its 486 * own). 487 * 488 * @param renewer the user allowed to renew the delegation tokens 489 * @param credentials cache in which to add new delegation tokens 490 * @return list of new delegation tokens 491 * @throws IOException 492 */ 493 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 494 public Token<?>[] addDelegationTokens( 495 final String renewer, Credentials credentials) throws IOException { 496 if (credentials == null) { 497 credentials = new Credentials(); 498 } 499 final List<Token<?>> tokens = new ArrayList<Token<?>>(); 500 collectDelegationTokens(renewer, credentials, tokens); 501 return tokens.toArray(new Token<?>[tokens.size()]); 502 } 503 504 /** 505 * Recursively obtain the tokens for this FileSystem and all descended 506 * FileSystems as determined by getChildFileSystems(). 507 * @param renewer the user allowed to renew the delegation tokens 508 * @param credentials cache in which to add the new delegation tokens 509 * @param tokens list in which to add acquired tokens 510 * @throws IOException 511 */ 512 private void collectDelegationTokens(final String renewer, 513 final Credentials credentials, 514 final List<Token<?>> tokens) 515 throws IOException { 516 final String serviceName = getCanonicalServiceName(); 517 // Collect token of the this filesystem and then of its embedded children 518 if (serviceName != null) { // fs has token, grab it 519 final Text service = new Text(serviceName); 520 Token<?> token = credentials.getToken(service); 521 if (token == null) { 522 token = getDelegationToken(renewer); 523 if (token != null) { 524 tokens.add(token); 525 credentials.addToken(service, token); 526 } 527 } 528 } 529 // Now collect the tokens from the children 530 final FileSystem[] children = getChildFileSystems(); 531 if (children != null) { 532 for (final FileSystem fs : children) { 533 fs.collectDelegationTokens(renewer, credentials, tokens); 534 } 535 } 536 } 537 538 /** 539 * Get all the immediate child FileSystems embedded in this FileSystem. 540 * It does not recurse and get grand children. If a FileSystem 541 * has multiple child FileSystems, then it should return a unique list 542 * of those FileSystems. Default is to return null to signify no children. 543 * 544 * @return FileSystems used by this FileSystem 545 */ 546 @InterfaceAudience.LimitedPrivate({ "HDFS" }) 547 @VisibleForTesting 548 public FileSystem[] getChildFileSystems() { 549 return null; 550 } 551 552 /** create a file with the provided permission 553 * The permission of the file is set to be the provided permission as in 554 * setPermission, not permission&~umask 555 * 556 * It is implemented using two RPCs. It is understood that it is inefficient, 557 * but the implementation is thread-safe. The other option is to change the 558 * value of umask in configuration to be 0, but it is not thread-safe. 559 * 560 * @param fs file system handle 561 * @param file the name of the file to be created 562 * @param permission the permission of the file 563 * @return an output stream 564 * @throws IOException 565 */ 566 public static FSDataOutputStream create(FileSystem fs, 567 Path file, FsPermission permission) throws IOException { 568 // create the file with default permission 569 FSDataOutputStream out = fs.create(file); 570 // set its permission to the supplied one 571 fs.setPermission(file, permission); 572 return out; 573 } 574 575 /** create a directory with the provided permission 576 * The permission of the directory is set to be the provided permission as in 577 * setPermission, not permission&~umask 578 * 579 * @see #create(FileSystem, Path, FsPermission) 580 * 581 * @param fs file system handle 582 * @param dir the name of the directory to be created 583 * @param permission the permission of the directory 584 * @return true if the directory creation succeeds; false otherwise 585 * @throws IOException 586 */ 587 public static boolean mkdirs(FileSystem fs, Path dir, FsPermission permission) 588 throws IOException { 589 // create the directory using the default permission 590 boolean result = fs.mkdirs(dir); 591 // set its permission to be the supplied one 592 fs.setPermission(dir, permission); 593 return result; 594 } 595 596 /////////////////////////////////////////////////////////////// 597 // FileSystem 598 /////////////////////////////////////////////////////////////// 599 600 protected FileSystem() { 601 super(null); 602 } 603 604 /** 605 * Check that a Path belongs to this FileSystem. 606 * @param path to check 607 */ 608 protected void checkPath(Path path) { 609 URI uri = path.toUri(); 610 String thatScheme = uri.getScheme(); 611 if (thatScheme == null) // fs is relative 612 return; 613 URI thisUri = getCanonicalUri(); 614 String thisScheme = thisUri.getScheme(); 615 //authority and scheme are not case sensitive 616 if (thisScheme.equalsIgnoreCase(thatScheme)) {// schemes match 617 String thisAuthority = thisUri.getAuthority(); 618 String thatAuthority = uri.getAuthority(); 619 if (thatAuthority == null && // path's authority is null 620 thisAuthority != null) { // fs has an authority 621 URI defaultUri = getDefaultUri(getConf()); 622 if (thisScheme.equalsIgnoreCase(defaultUri.getScheme())) { 623 uri = defaultUri; // schemes match, so use this uri instead 624 } else { 625 uri = null; // can't determine auth of the path 626 } 627 } 628 if (uri != null) { 629 // canonicalize uri before comparing with this fs 630 uri = canonicalizeUri(uri); 631 thatAuthority = uri.getAuthority(); 632 if (thisAuthority == thatAuthority || // authorities match 633 (thisAuthority != null && 634 thisAuthority.equalsIgnoreCase(thatAuthority))) 635 return; 636 } 637 } 638 throw new IllegalArgumentException("Wrong FS: "+path+ 639 ", expected: "+this.getUri()); 640 } 641 642 /** 643 * Return an array containing hostnames, offset and size of 644 * portions of the given file. For a nonexistent 645 * file or regions, null will be returned. 646 * 647 * This call is most helpful with DFS, where it returns 648 * hostnames of machines that contain the given file. 649 * 650 * The FileSystem will simply return an elt containing 'localhost'. 651 * 652 * @param file FilesStatus to get data from 653 * @param start offset into the given file 654 * @param len length for which to get locations for 655 */ 656 public BlockLocation[] getFileBlockLocations(FileStatus file, 657 long start, long len) throws IOException { 658 if (file == null) { 659 return null; 660 } 661 662 if (start < 0 || len < 0) { 663 throw new IllegalArgumentException("Invalid start or len parameter"); 664 } 665 666 if (file.getLen() <= start) { 667 return new BlockLocation[0]; 668 669 } 670 String[] name = { "localhost:50010" }; 671 String[] host = { "localhost" }; 672 return new BlockLocation[] { 673 new BlockLocation(name, host, 0, file.getLen()) }; 674 } 675 676 677 /** 678 * Return an array containing hostnames, offset and size of 679 * portions of the given file. For a nonexistent 680 * file or regions, null will be returned. 681 * 682 * This call is most helpful with DFS, where it returns 683 * hostnames of machines that contain the given file. 684 * 685 * The FileSystem will simply return an elt containing 'localhost'. 686 * 687 * @param p path is used to identify an FS since an FS could have 688 * another FS that it could be delegating the call to 689 * @param start offset into the given file 690 * @param len length for which to get locations for 691 */ 692 public BlockLocation[] getFileBlockLocations(Path p, 693 long start, long len) throws IOException { 694 if (p == null) { 695 throw new NullPointerException(); 696 } 697 FileStatus file = getFileStatus(p); 698 return getFileBlockLocations(file, start, len); 699 } 700 701 /** 702 * Return a set of server default configuration values 703 * @return server default configuration values 704 * @throws IOException 705 * @deprecated use {@link #getServerDefaults(Path)} instead 706 */ 707 @Deprecated 708 public FsServerDefaults getServerDefaults() throws IOException { 709 Configuration conf = getConf(); 710 // CRC32 is chosen as default as it is available in all 711 // releases that support checksum. 712 // The client trash configuration is ignored. 713 return new FsServerDefaults(getDefaultBlockSize(), 714 conf.getInt("io.bytes.per.checksum", 512), 715 64 * 1024, 716 getDefaultReplication(), 717 conf.getInt("io.file.buffer.size", 4096), 718 false, 719 CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT, 720 DataChecksum.Type.CRC32); 721 } 722 723 /** 724 * Return a set of server default configuration values 725 * @param p path is used to identify an FS since an FS could have 726 * another FS that it could be delegating the call to 727 * @return server default configuration values 728 * @throws IOException 729 */ 730 public FsServerDefaults getServerDefaults(Path p) throws IOException { 731 return getServerDefaults(); 732 } 733 734 /** 735 * Return the fully-qualified path of path f resolving the path 736 * through any symlinks or mount point 737 * @param p path to be resolved 738 * @return fully qualified path 739 * @throws FileNotFoundException 740 */ 741 public Path resolvePath(final Path p) throws IOException { 742 checkPath(p); 743 return getFileStatus(p).getPath(); 744 } 745 746 /** 747 * Opens an FSDataInputStream at the indicated Path. 748 * @param f the file name to open 749 * @param bufferSize the size of the buffer to be used. 750 */ 751 public abstract FSDataInputStream open(Path f, int bufferSize) 752 throws IOException; 753 754 /** 755 * Opens an FSDataInputStream at the indicated Path. 756 * @param f the file to open 757 */ 758 public FSDataInputStream open(Path f) throws IOException { 759 return open(f, getConf().getInt("io.file.buffer.size", 4096)); 760 } 761 762 /** 763 * Create an FSDataOutputStream at the indicated Path. 764 * Files are overwritten by default. 765 * @param f the file to create 766 */ 767 public FSDataOutputStream create(Path f) throws IOException { 768 return create(f, true); 769 } 770 771 /** 772 * Create an FSDataOutputStream at the indicated Path. 773 * @param f the file to create 774 * @param overwrite if a file with this name already exists, then if true, 775 * the file will be overwritten, and if false an exception will be thrown. 776 */ 777 public FSDataOutputStream create(Path f, boolean overwrite) 778 throws IOException { 779 return create(f, overwrite, 780 getConf().getInt("io.file.buffer.size", 4096), 781 getDefaultReplication(f), 782 getDefaultBlockSize(f)); 783 } 784 785 /** 786 * Create an FSDataOutputStream at the indicated Path with write-progress 787 * reporting. 788 * Files are overwritten by default. 789 * @param f the file to create 790 * @param progress to report progress 791 */ 792 public FSDataOutputStream create(Path f, Progressable progress) 793 throws IOException { 794 return create(f, true, 795 getConf().getInt("io.file.buffer.size", 4096), 796 getDefaultReplication(f), 797 getDefaultBlockSize(f), progress); 798 } 799 800 /** 801 * Create an FSDataOutputStream at the indicated Path. 802 * Files are overwritten by default. 803 * @param f the file to create 804 * @param replication the replication factor 805 */ 806 public FSDataOutputStream create(Path f, short replication) 807 throws IOException { 808 return create(f, true, 809 getConf().getInt("io.file.buffer.size", 4096), 810 replication, 811 getDefaultBlockSize(f)); 812 } 813 814 /** 815 * Create an FSDataOutputStream at the indicated Path with write-progress 816 * reporting. 817 * Files are overwritten by default. 818 * @param f the file to create 819 * @param replication the replication factor 820 * @param progress to report progress 821 */ 822 public FSDataOutputStream create(Path f, short replication, 823 Progressable progress) throws IOException { 824 return create(f, true, 825 getConf().getInt( 826 CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY, 827 CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT), 828 replication, 829 getDefaultBlockSize(f), progress); 830 } 831 832 833 /** 834 * Create an FSDataOutputStream at the indicated Path. 835 * @param f the file name to create 836 * @param overwrite if a file with this name already exists, then if true, 837 * the file will be overwritten, and if false an error will be thrown. 838 * @param bufferSize the size of the buffer to be used. 839 */ 840 public FSDataOutputStream create(Path f, 841 boolean overwrite, 842 int bufferSize 843 ) throws IOException { 844 return create(f, overwrite, bufferSize, 845 getDefaultReplication(f), 846 getDefaultBlockSize(f)); 847 } 848 849 /** 850 * Create an FSDataOutputStream at the indicated Path with write-progress 851 * reporting. 852 * @param f the path of the file to open 853 * @param overwrite if a file with this name already exists, then if true, 854 * the file will be overwritten, and if false an error will be thrown. 855 * @param bufferSize the size of the buffer to be used. 856 */ 857 public FSDataOutputStream create(Path f, 858 boolean overwrite, 859 int bufferSize, 860 Progressable progress 861 ) throws IOException { 862 return create(f, overwrite, bufferSize, 863 getDefaultReplication(f), 864 getDefaultBlockSize(f), progress); 865 } 866 867 868 /** 869 * Create an FSDataOutputStream at the indicated Path. 870 * @param f the file name to open 871 * @param overwrite if a file with this name already exists, then if true, 872 * the file will be overwritten, and if false an error will be thrown. 873 * @param bufferSize the size of the buffer to be used. 874 * @param replication required block replication for the file. 875 */ 876 public FSDataOutputStream create(Path f, 877 boolean overwrite, 878 int bufferSize, 879 short replication, 880 long blockSize 881 ) throws IOException { 882 return create(f, overwrite, bufferSize, replication, blockSize, null); 883 } 884 885 /** 886 * Create an FSDataOutputStream at the indicated Path with write-progress 887 * reporting. 888 * @param f the file name to open 889 * @param overwrite if a file with this name already exists, then if true, 890 * the file will be overwritten, and if false an error will be thrown. 891 * @param bufferSize the size of the buffer to be used. 892 * @param replication required block replication for the file. 893 */ 894 public FSDataOutputStream create(Path f, 895 boolean overwrite, 896 int bufferSize, 897 short replication, 898 long blockSize, 899 Progressable progress 900 ) throws IOException { 901 return this.create(f, FsPermission.getFileDefault().applyUMask( 902 FsPermission.getUMask(getConf())), overwrite, bufferSize, 903 replication, blockSize, progress); 904 } 905 906 /** 907 * Create an FSDataOutputStream at the indicated Path with write-progress 908 * reporting. 909 * @param f the file name to open 910 * @param permission 911 * @param overwrite if a file with this name already exists, then if true, 912 * the file will be overwritten, and if false an error will be thrown. 913 * @param bufferSize the size of the buffer to be used. 914 * @param replication required block replication for the file. 915 * @param blockSize 916 * @param progress 917 * @throws IOException 918 * @see #setPermission(Path, FsPermission) 919 */ 920 public abstract FSDataOutputStream create(Path f, 921 FsPermission permission, 922 boolean overwrite, 923 int bufferSize, 924 short replication, 925 long blockSize, 926 Progressable progress) throws IOException; 927 928 /** 929 * Create an FSDataOutputStream at the indicated Path with write-progress 930 * reporting. 931 * @param f the file name to open 932 * @param permission 933 * @param flags {@link CreateFlag}s to use for this stream. 934 * @param bufferSize the size of the buffer to be used. 935 * @param replication required block replication for the file. 936 * @param blockSize 937 * @param progress 938 * @throws IOException 939 * @see #setPermission(Path, FsPermission) 940 */ 941 public FSDataOutputStream create(Path f, 942 FsPermission permission, 943 EnumSet<CreateFlag> flags, 944 int bufferSize, 945 short replication, 946 long blockSize, 947 Progressable progress) throws IOException { 948 return create(f, permission, flags, bufferSize, replication, 949 blockSize, progress, null); 950 } 951 952 /** 953 * Create an FSDataOutputStream at the indicated Path with a custom 954 * checksum option 955 * @param f the file name to open 956 * @param permission 957 * @param flags {@link CreateFlag}s to use for this stream. 958 * @param bufferSize the size of the buffer to be used. 959 * @param replication required block replication for the file. 960 * @param blockSize 961 * @param progress 962 * @param checksumOpt checksum parameter. If null, the values 963 * found in conf will be used. 964 * @throws IOException 965 * @see #setPermission(Path, FsPermission) 966 */ 967 public FSDataOutputStream create(Path f, 968 FsPermission permission, 969 EnumSet<CreateFlag> flags, 970 int bufferSize, 971 short replication, 972 long blockSize, 973 Progressable progress, 974 ChecksumOpt checksumOpt) throws IOException { 975 // Checksum options are ignored by default. The file systems that 976 // implement checksum need to override this method. The full 977 // support is currently only available in DFS. 978 return create(f, permission, flags.contains(CreateFlag.OVERWRITE), 979 bufferSize, replication, blockSize, progress); 980 } 981 982 /*. 983 * This create has been added to support the FileContext that processes 984 * the permission 985 * with umask before calling this method. 986 * This a temporary method added to support the transition from FileSystem 987 * to FileContext for user applications. 988 */ 989 @Deprecated 990 protected FSDataOutputStream primitiveCreate(Path f, 991 FsPermission absolutePermission, EnumSet<CreateFlag> flag, int bufferSize, 992 short replication, long blockSize, Progressable progress, 993 ChecksumOpt checksumOpt) throws IOException { 994 995 boolean pathExists = exists(f); 996 CreateFlag.validate(f, pathExists, flag); 997 998 // Default impl assumes that permissions do not matter and 999 // nor does the bytesPerChecksum hence 1000 // calling the regular create is good enough. 1001 // FSs that implement permissions should override this. 1002 1003 if (pathExists && flag.contains(CreateFlag.APPEND)) { 1004 return append(f, bufferSize, progress); 1005 } 1006 1007 return this.create(f, absolutePermission, 1008 flag.contains(CreateFlag.OVERWRITE), bufferSize, replication, 1009 blockSize, progress); 1010 } 1011 1012 /** 1013 * This version of the mkdirs method assumes that the permission is absolute. 1014 * It has been added to support the FileContext that processes the permission 1015 * with umask before calling this method. 1016 * This a temporary method added to support the transition from FileSystem 1017 * to FileContext for user applications. 1018 */ 1019 @Deprecated 1020 protected boolean primitiveMkdir(Path f, FsPermission absolutePermission) 1021 throws IOException { 1022 // Default impl is to assume that permissions do not matter and hence 1023 // calling the regular mkdirs is good enough. 1024 // FSs that implement permissions should override this. 1025 return this.mkdirs(f, absolutePermission); 1026 } 1027 1028 1029 /** 1030 * This version of the mkdirs method assumes that the permission is absolute. 1031 * It has been added to support the FileContext that processes the permission 1032 * with umask before calling this method. 1033 * This a temporary method added to support the transition from FileSystem 1034 * to FileContext for user applications. 1035 */ 1036 @Deprecated 1037 protected void primitiveMkdir(Path f, FsPermission absolutePermission, 1038 boolean createParent) 1039 throws IOException { 1040 1041 if (!createParent) { // parent must exist. 1042 // since the this.mkdirs makes parent dirs automatically 1043 // we must throw exception if parent does not exist. 1044 final FileStatus stat = getFileStatus(f.getParent()); 1045 if (stat == null) { 1046 throw new FileNotFoundException("Missing parent:" + f); 1047 } 1048 if (!stat.isDirectory()) { 1049 throw new ParentNotDirectoryException("parent is not a dir"); 1050 } 1051 // parent does exist - go ahead with mkdir of leaf 1052 } 1053 // Default impl is to assume that permissions do not matter and hence 1054 // calling the regular mkdirs is good enough. 1055 // FSs that implement permissions should override this. 1056 if (!this.mkdirs(f, absolutePermission)) { 1057 throw new IOException("mkdir of "+ f + " failed"); 1058 } 1059 } 1060 1061 /** 1062 * Opens an FSDataOutputStream at the indicated Path with write-progress 1063 * reporting. Same as create(), except fails if parent directory doesn't 1064 * already exist. 1065 * @param f the file name to open 1066 * @param overwrite if a file with this name already exists, then if true, 1067 * the file will be overwritten, and if false an error will be thrown. 1068 * @param bufferSize the size of the buffer to be used. 1069 * @param replication required block replication for the file. 1070 * @param blockSize 1071 * @param progress 1072 * @throws IOException 1073 * @see #setPermission(Path, FsPermission) 1074 * @deprecated API only for 0.20-append 1075 */ 1076 @Deprecated 1077 public FSDataOutputStream createNonRecursive(Path f, 1078 boolean overwrite, 1079 int bufferSize, short replication, long blockSize, 1080 Progressable progress) throws IOException { 1081 return this.createNonRecursive(f, FsPermission.getFileDefault(), 1082 overwrite, bufferSize, replication, blockSize, progress); 1083 } 1084 1085 /** 1086 * Opens an FSDataOutputStream at the indicated Path with write-progress 1087 * reporting. Same as create(), except fails if parent directory doesn't 1088 * already exist. 1089 * @param f the file name to open 1090 * @param permission 1091 * @param overwrite if a file with this name already exists, then if true, 1092 * the file will be overwritten, and if false an error will be thrown. 1093 * @param bufferSize the size of the buffer to be used. 1094 * @param replication required block replication for the file. 1095 * @param blockSize 1096 * @param progress 1097 * @throws IOException 1098 * @see #setPermission(Path, FsPermission) 1099 * @deprecated API only for 0.20-append 1100 */ 1101 @Deprecated 1102 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 1103 boolean overwrite, int bufferSize, short replication, long blockSize, 1104 Progressable progress) throws IOException { 1105 return createNonRecursive(f, permission, 1106 overwrite ? EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE) 1107 : EnumSet.of(CreateFlag.CREATE), bufferSize, 1108 replication, blockSize, progress); 1109 } 1110 1111 /** 1112 * Opens an FSDataOutputStream at the indicated Path with write-progress 1113 * reporting. Same as create(), except fails if parent directory doesn't 1114 * already exist. 1115 * @param f the file name to open 1116 * @param permission 1117 * @param flags {@link CreateFlag}s to use for this stream. 1118 * @param bufferSize the size of the buffer to be used. 1119 * @param replication required block replication for the file. 1120 * @param blockSize 1121 * @param progress 1122 * @throws IOException 1123 * @see #setPermission(Path, FsPermission) 1124 * @deprecated API only for 0.20-append 1125 */ 1126 @Deprecated 1127 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 1128 EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize, 1129 Progressable progress) throws IOException { 1130 throw new IOException("createNonRecursive unsupported for this filesystem " 1131 + this.getClass()); 1132 } 1133 1134 /** 1135 * Creates the given Path as a brand-new zero-length file. If 1136 * create fails, or if it already existed, return false. 1137 * 1138 * @param f path to use for create 1139 */ 1140 public boolean createNewFile(Path f) throws IOException { 1141 if (exists(f)) { 1142 return false; 1143 } else { 1144 create(f, false, getConf().getInt("io.file.buffer.size", 4096)).close(); 1145 return true; 1146 } 1147 } 1148 1149 /** 1150 * Append to an existing file (optional operation). 1151 * Same as append(f, getConf().getInt("io.file.buffer.size", 4096), null) 1152 * @param f the existing file to be appended. 1153 * @throws IOException 1154 */ 1155 public FSDataOutputStream append(Path f) throws IOException { 1156 return append(f, getConf().getInt("io.file.buffer.size", 4096), null); 1157 } 1158 /** 1159 * Append to an existing file (optional operation). 1160 * Same as append(f, bufferSize, null). 1161 * @param f the existing file to be appended. 1162 * @param bufferSize the size of the buffer to be used. 1163 * @throws IOException 1164 */ 1165 public FSDataOutputStream append(Path f, int bufferSize) throws IOException { 1166 return append(f, bufferSize, null); 1167 } 1168 1169 /** 1170 * Append to an existing file (optional operation). 1171 * @param f the existing file to be appended. 1172 * @param bufferSize the size of the buffer to be used. 1173 * @param progress for reporting progress if it is not null. 1174 * @throws IOException 1175 */ 1176 public abstract FSDataOutputStream append(Path f, int bufferSize, 1177 Progressable progress) throws IOException; 1178 1179 /** 1180 * Concat existing files together. 1181 * @param trg the path to the target destination. 1182 * @param psrcs the paths to the sources to use for the concatenation. 1183 * @throws IOException 1184 */ 1185 public void concat(final Path trg, final Path [] psrcs) throws IOException { 1186 throw new UnsupportedOperationException("Not implemented by the " + 1187 getClass().getSimpleName() + " FileSystem implementation"); 1188 } 1189 1190 /** 1191 * Get replication. 1192 * 1193 * @deprecated Use getFileStatus() instead 1194 * @param src file name 1195 * @return file replication 1196 * @throws IOException 1197 */ 1198 @Deprecated 1199 public short getReplication(Path src) throws IOException { 1200 return getFileStatus(src).getReplication(); 1201 } 1202 1203 /** 1204 * Set replication for an existing file. 1205 * 1206 * @param src file name 1207 * @param replication new replication 1208 * @throws IOException 1209 * @return true if successful; 1210 * false if file does not exist or is a directory 1211 */ 1212 public boolean setReplication(Path src, short replication) 1213 throws IOException { 1214 return true; 1215 } 1216 1217 /** 1218 * Renames Path src to Path dst. Can take place on local fs 1219 * or remote DFS. 1220 * @param src path to be renamed 1221 * @param dst new path after rename 1222 * @throws IOException on failure 1223 * @return true if rename is successful 1224 */ 1225 public abstract boolean rename(Path src, Path dst) throws IOException; 1226 1227 /** 1228 * Renames Path src to Path dst 1229 * <ul> 1230 * <li 1231 * <li>Fails if src is a file and dst is a directory. 1232 * <li>Fails if src is a directory and dst is a file. 1233 * <li>Fails if the parent of dst does not exist or is a file. 1234 * </ul> 1235 * <p> 1236 * If OVERWRITE option is not passed as an argument, rename fails 1237 * if the dst already exists. 1238 * <p> 1239 * If OVERWRITE option is passed as an argument, rename overwrites 1240 * the dst if it is a file or an empty directory. Rename fails if dst is 1241 * a non-empty directory. 1242 * <p> 1243 * Note that atomicity of rename is dependent on the file system 1244 * implementation. Please refer to the file system documentation for 1245 * details. This default implementation is non atomic. 1246 * <p> 1247 * This method is deprecated since it is a temporary method added to 1248 * support the transition from FileSystem to FileContext for user 1249 * applications. 1250 * 1251 * @param src path to be renamed 1252 * @param dst new path after rename 1253 * @throws IOException on failure 1254 */ 1255 @Deprecated 1256 protected void rename(final Path src, final Path dst, 1257 final Rename... options) throws IOException { 1258 // Default implementation 1259 final FileStatus srcStatus = getFileLinkStatus(src); 1260 if (srcStatus == null) { 1261 throw new FileNotFoundException("rename source " + src + " not found."); 1262 } 1263 1264 boolean overwrite = false; 1265 if (null != options) { 1266 for (Rename option : options) { 1267 if (option == Rename.OVERWRITE) { 1268 overwrite = true; 1269 } 1270 } 1271 } 1272 1273 FileStatus dstStatus; 1274 try { 1275 dstStatus = getFileLinkStatus(dst); 1276 } catch (IOException e) { 1277 dstStatus = null; 1278 } 1279 if (dstStatus != null) { 1280 if (srcStatus.isDirectory() != dstStatus.isDirectory()) { 1281 throw new IOException("Source " + src + " Destination " + dst 1282 + " both should be either file or directory"); 1283 } 1284 if (!overwrite) { 1285 throw new FileAlreadyExistsException("rename destination " + dst 1286 + " already exists."); 1287 } 1288 // Delete the destination that is a file or an empty directory 1289 if (dstStatus.isDirectory()) { 1290 FileStatus[] list = listStatus(dst); 1291 if (list != null && list.length != 0) { 1292 throw new IOException( 1293 "rename cannot overwrite non empty destination directory " + dst); 1294 } 1295 } 1296 delete(dst, false); 1297 } else { 1298 final Path parent = dst.getParent(); 1299 final FileStatus parentStatus = getFileStatus(parent); 1300 if (parentStatus == null) { 1301 throw new FileNotFoundException("rename destination parent " + parent 1302 + " not found."); 1303 } 1304 if (!parentStatus.isDirectory()) { 1305 throw new ParentNotDirectoryException("rename destination parent " + parent 1306 + " is a file."); 1307 } 1308 } 1309 if (!rename(src, dst)) { 1310 throw new IOException("rename from " + src + " to " + dst + " failed."); 1311 } 1312 } 1313 1314 /** 1315 * Delete a file 1316 * @deprecated Use {@link #delete(Path, boolean)} instead. 1317 */ 1318 @Deprecated 1319 public boolean delete(Path f) throws IOException { 1320 return delete(f, true); 1321 } 1322 1323 /** Delete a file. 1324 * 1325 * @param f the path to delete. 1326 * @param recursive if path is a directory and set to 1327 * true, the directory is deleted else throws an exception. In 1328 * case of a file the recursive can be set to either true or false. 1329 * @return true if delete is successful else false. 1330 * @throws IOException 1331 */ 1332 public abstract boolean delete(Path f, boolean recursive) throws IOException; 1333 1334 /** 1335 * Mark a path to be deleted when FileSystem is closed. 1336 * When the JVM shuts down, 1337 * all FileSystem objects will be closed automatically. 1338 * Then, 1339 * the marked path will be deleted as a result of closing the FileSystem. 1340 * 1341 * The path has to exist in the file system. 1342 * 1343 * @param f the path to delete. 1344 * @return true if deleteOnExit is successful, otherwise false. 1345 * @throws IOException 1346 */ 1347 public boolean deleteOnExit(Path f) throws IOException { 1348 if (!exists(f)) { 1349 return false; 1350 } 1351 synchronized (deleteOnExit) { 1352 deleteOnExit.add(f); 1353 } 1354 return true; 1355 } 1356 1357 /** 1358 * Cancel the deletion of the path when the FileSystem is closed 1359 * @param f the path to cancel deletion 1360 */ 1361 public boolean cancelDeleteOnExit(Path f) { 1362 synchronized (deleteOnExit) { 1363 return deleteOnExit.remove(f); 1364 } 1365 } 1366 1367 /** 1368 * Delete all files that were marked as delete-on-exit. This recursively 1369 * deletes all files in the specified paths. 1370 */ 1371 protected void processDeleteOnExit() { 1372 synchronized (deleteOnExit) { 1373 for (Iterator<Path> iter = deleteOnExit.iterator(); iter.hasNext();) { 1374 Path path = iter.next(); 1375 try { 1376 if (exists(path)) { 1377 delete(path, true); 1378 } 1379 } 1380 catch (IOException e) { 1381 LOG.info("Ignoring failure to deleteOnExit for path " + path); 1382 } 1383 iter.remove(); 1384 } 1385 } 1386 } 1387 1388 /** Check if exists. 1389 * @param f source file 1390 */ 1391 public boolean exists(Path f) throws IOException { 1392 try { 1393 return getFileStatus(f) != null; 1394 } catch (FileNotFoundException e) { 1395 return false; 1396 } 1397 } 1398 1399 /** True iff the named path is a directory. 1400 * Note: Avoid using this method. Instead reuse the FileStatus 1401 * returned by getFileStatus() or listStatus() methods. 1402 * @param f path to check 1403 */ 1404 public boolean isDirectory(Path f) throws IOException { 1405 try { 1406 return getFileStatus(f).isDirectory(); 1407 } catch (FileNotFoundException e) { 1408 return false; // f does not exist 1409 } 1410 } 1411 1412 /** True iff the named path is a regular file. 1413 * Note: Avoid using this method. Instead reuse the FileStatus 1414 * returned by getFileStatus() or listStatus() methods. 1415 * @param f path to check 1416 */ 1417 public boolean isFile(Path f) throws IOException { 1418 try { 1419 return getFileStatus(f).isFile(); 1420 } catch (FileNotFoundException e) { 1421 return false; // f does not exist 1422 } 1423 } 1424 1425 /** The number of bytes in a file. */ 1426 /** @deprecated Use getFileStatus() instead */ 1427 @Deprecated 1428 public long getLength(Path f) throws IOException { 1429 return getFileStatus(f).getLen(); 1430 } 1431 1432 /** Return the {@link ContentSummary} of a given {@link Path}. 1433 * @param f path to use 1434 */ 1435 public ContentSummary getContentSummary(Path f) throws IOException { 1436 FileStatus status = getFileStatus(f); 1437 if (status.isFile()) { 1438 // f is a file 1439 return new ContentSummary(status.getLen(), 1, 0); 1440 } 1441 // f is a directory 1442 long[] summary = {0, 0, 1}; 1443 for(FileStatus s : listStatus(f)) { 1444 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : 1445 new ContentSummary(s.getLen(), 1, 0); 1446 summary[0] += c.getLength(); 1447 summary[1] += c.getFileCount(); 1448 summary[2] += c.getDirectoryCount(); 1449 } 1450 return new ContentSummary(summary[0], summary[1], summary[2]); 1451 } 1452 1453 final private static PathFilter DEFAULT_FILTER = new PathFilter() { 1454 @Override 1455 public boolean accept(Path file) { 1456 return true; 1457 } 1458 }; 1459 1460 /** 1461 * List the statuses of the files/directories in the given path if the path is 1462 * a directory. 1463 * 1464 * @param f given path 1465 * @return the statuses of the files/directories in the given patch 1466 * @throws FileNotFoundException when the path does not exist; 1467 * IOException see specific implementation 1468 */ 1469 public abstract FileStatus[] listStatus(Path f) throws FileNotFoundException, 1470 IOException; 1471 1472 /* 1473 * Filter files/directories in the given path using the user-supplied path 1474 * filter. Results are added to the given array <code>results</code>. 1475 */ 1476 private void listStatus(ArrayList<FileStatus> results, Path f, 1477 PathFilter filter) throws FileNotFoundException, IOException { 1478 FileStatus listing[] = listStatus(f); 1479 if (listing == null) { 1480 throw new IOException("Error accessing " + f); 1481 } 1482 1483 for (int i = 0; i < listing.length; i++) { 1484 if (filter.accept(listing[i].getPath())) { 1485 results.add(listing[i]); 1486 } 1487 } 1488 } 1489 1490 /** 1491 * @return an iterator over the corrupt files under the given path 1492 * (may contain duplicates if a file has more than one corrupt block) 1493 * @throws IOException 1494 */ 1495 public RemoteIterator<Path> listCorruptFileBlocks(Path path) 1496 throws IOException { 1497 throw new UnsupportedOperationException(getClass().getCanonicalName() + 1498 " does not support" + 1499 " listCorruptFileBlocks"); 1500 } 1501 1502 /** 1503 * Filter files/directories in the given path using the user-supplied path 1504 * filter. 1505 * 1506 * @param f 1507 * a path name 1508 * @param filter 1509 * the user-supplied path filter 1510 * @return an array of FileStatus objects for the files under the given path 1511 * after applying the filter 1512 * @throws FileNotFoundException when the path does not exist; 1513 * IOException see specific implementation 1514 */ 1515 public FileStatus[] listStatus(Path f, PathFilter filter) 1516 throws FileNotFoundException, IOException { 1517 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1518 listStatus(results, f, filter); 1519 return results.toArray(new FileStatus[results.size()]); 1520 } 1521 1522 /** 1523 * Filter files/directories in the given list of paths using default 1524 * path filter. 1525 * 1526 * @param files 1527 * a list of paths 1528 * @return a list of statuses for the files under the given paths after 1529 * applying the filter default Path filter 1530 * @throws FileNotFoundException when the path does not exist; 1531 * IOException see specific implementation 1532 */ 1533 public FileStatus[] listStatus(Path[] files) 1534 throws FileNotFoundException, IOException { 1535 return listStatus(files, DEFAULT_FILTER); 1536 } 1537 1538 /** 1539 * Filter files/directories in the given list of paths using user-supplied 1540 * path filter. 1541 * 1542 * @param files 1543 * a list of paths 1544 * @param filter 1545 * the user-supplied path filter 1546 * @return a list of statuses for the files under the given paths after 1547 * applying the filter 1548 * @throws FileNotFoundException when the path does not exist; 1549 * IOException see specific implementation 1550 */ 1551 public FileStatus[] listStatus(Path[] files, PathFilter filter) 1552 throws FileNotFoundException, IOException { 1553 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1554 for (int i = 0; i < files.length; i++) { 1555 listStatus(results, files[i], filter); 1556 } 1557 return results.toArray(new FileStatus[results.size()]); 1558 } 1559 1560 /** 1561 * <p>Return all the files that match filePattern and are not checksum 1562 * files. Results are sorted by their names. 1563 * 1564 * <p> 1565 * A filename pattern is composed of <i>regular</i> characters and 1566 * <i>special pattern matching</i> characters, which are: 1567 * 1568 * <dl> 1569 * <dd> 1570 * <dl> 1571 * <p> 1572 * <dt> <tt> ? </tt> 1573 * <dd> Matches any single character. 1574 * 1575 * <p> 1576 * <dt> <tt> * </tt> 1577 * <dd> Matches zero or more characters. 1578 * 1579 * <p> 1580 * <dt> <tt> [<i>abc</i>] </tt> 1581 * <dd> Matches a single character from character set 1582 * <tt>{<i>a,b,c</i>}</tt>. 1583 * 1584 * <p> 1585 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt> 1586 * <dd> Matches a single character from the character range 1587 * <tt>{<i>a...b</i>}</tt>. Note that character <tt><i>a</i></tt> must be 1588 * lexicographically less than or equal to character <tt><i>b</i></tt>. 1589 * 1590 * <p> 1591 * <dt> <tt> [^<i>a</i>] </tt> 1592 * <dd> Matches a single character that is not from character set or range 1593 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur 1594 * immediately to the right of the opening bracket. 1595 * 1596 * <p> 1597 * <dt> <tt> \<i>c</i> </tt> 1598 * <dd> Removes (escapes) any special meaning of character <i>c</i>. 1599 * 1600 * <p> 1601 * <dt> <tt> {ab,cd} </tt> 1602 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> 1603 * 1604 * <p> 1605 * <dt> <tt> {ab,c{de,fh}} </tt> 1606 * <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt> 1607 * 1608 * </dl> 1609 * </dd> 1610 * </dl> 1611 * 1612 * @param pathPattern a regular expression specifying a pth pattern 1613 1614 * @return an array of paths that match the path pattern 1615 * @throws IOException 1616 */ 1617 public FileStatus[] globStatus(Path pathPattern) throws IOException { 1618 return globStatus(pathPattern, DEFAULT_FILTER); 1619 } 1620 1621 /** 1622 * Return an array of FileStatus objects whose path names match pathPattern 1623 * and is accepted by the user-supplied path filter. Results are sorted by 1624 * their path names. 1625 * Return null if pathPattern has no glob and the path does not exist. 1626 * Return an empty array if pathPattern has a glob and no path matches it. 1627 * 1628 * @param pathPattern 1629 * a regular expression specifying the path pattern 1630 * @param filter 1631 * a user-supplied path filter 1632 * @return an array of FileStatus objects 1633 * @throws IOException if any I/O error occurs when fetching file status 1634 */ 1635 public FileStatus[] globStatus(Path pathPattern, PathFilter filter) 1636 throws IOException { 1637 String filename = pathPattern.toUri().getPath(); 1638 List<FileStatus> allMatches = null; 1639 1640 List<String> filePatterns = GlobExpander.expand(filename); 1641 for (String filePattern : filePatterns) { 1642 Path path = new Path(filePattern.isEmpty() ? Path.CUR_DIR : filePattern); 1643 List<FileStatus> matches = globStatusInternal(path, filter); 1644 if (matches != null) { 1645 if (allMatches == null) { 1646 allMatches = matches; 1647 } else { 1648 allMatches.addAll(matches); 1649 } 1650 } 1651 } 1652 1653 FileStatus[] results = null; 1654 if (allMatches != null) { 1655 results = allMatches.toArray(new FileStatus[allMatches.size()]); 1656 } else if (filePatterns.size() > 1) { 1657 // no matches with multiple expansions is a non-matching glob 1658 results = new FileStatus[0]; 1659 } 1660 return results; 1661 } 1662 1663 // sort gripes because FileStatus Comparable isn't parameterized... 1664 @SuppressWarnings("unchecked") 1665 private List<FileStatus> globStatusInternal(Path pathPattern, 1666 PathFilter filter) throws IOException { 1667 boolean patternHasGlob = false; // pathPattern has any globs 1668 List<FileStatus> matches = new ArrayList<FileStatus>(); 1669 1670 // determine starting point 1671 int level = 0; 1672 String baseDir = Path.CUR_DIR; 1673 if (pathPattern.isAbsolute()) { 1674 level = 1; // need to skip empty item at beginning of split list 1675 baseDir = Path.SEPARATOR; 1676 } 1677 1678 // parse components and determine if it's a glob 1679 String[] components = null; 1680 GlobFilter[] filters = null; 1681 String filename = pathPattern.toUri().getPath(); 1682 if (!filename.isEmpty() && !Path.SEPARATOR.equals(filename)) { 1683 components = filename.split(Path.SEPARATOR); 1684 filters = new GlobFilter[components.length]; 1685 for (int i=level; i < components.length; i++) { 1686 filters[i] = new GlobFilter(components[i]); 1687 patternHasGlob |= filters[i].hasPattern(); 1688 } 1689 if (!patternHasGlob) { 1690 baseDir = unquotePathComponent(filename); 1691 components = null; // short through to filter check 1692 } 1693 } 1694 1695 // seed the parent directory path, return if it doesn't exist 1696 try { 1697 matches.add(getFileStatus(new Path(baseDir))); 1698 } catch (FileNotFoundException e) { 1699 return patternHasGlob ? matches : null; 1700 } 1701 1702 // skip if there are no components other than the basedir 1703 if (components != null) { 1704 // iterate through each path component 1705 for (int i=level; (i < components.length) && !matches.isEmpty(); i++) { 1706 List<FileStatus> children = new ArrayList<FileStatus>(); 1707 for (FileStatus match : matches) { 1708 // don't look for children in a file matched by a glob 1709 if (!match.isDirectory()) { 1710 continue; 1711 } 1712 try { 1713 if (filters[i].hasPattern()) { 1714 // get all children matching the filter 1715 FileStatus[] statuses = listStatus(match.getPath(), filters[i]); 1716 children.addAll(Arrays.asList(statuses)); 1717 } else { 1718 // the component does not have a pattern 1719 String component = unquotePathComponent(components[i]); 1720 Path child = new Path(match.getPath(), component); 1721 children.add(getFileStatus(child)); 1722 } 1723 } catch (FileNotFoundException e) { 1724 // don't care 1725 } 1726 } 1727 matches = children; 1728 } 1729 } 1730 // remove anything that didn't match the filter 1731 if (!matches.isEmpty()) { 1732 Iterator<FileStatus> iter = matches.iterator(); 1733 while (iter.hasNext()) { 1734 if (!filter.accept(iter.next().getPath())) { 1735 iter.remove(); 1736 } 1737 } 1738 } 1739 // no final paths, if there were any globs return empty list 1740 if (matches.isEmpty()) { 1741 return patternHasGlob ? matches : null; 1742 } 1743 Collections.sort(matches); 1744 return matches; 1745 } 1746 1747 /** 1748 * The glob filter builds a regexp per path component. If the component 1749 * does not contain a shell metachar, then it falls back to appending the 1750 * raw string to the list of built up paths. This raw path needs to have 1751 * the quoting removed. Ie. convert all occurances of "\X" to "X" 1752 * @param name of the path component 1753 * @return the unquoted path component 1754 */ 1755 private String unquotePathComponent(String name) { 1756 return name.replaceAll("\\\\(.)", "$1"); 1757 } 1758 1759 /** 1760 * List the statuses of the files/directories in the given path if the path is 1761 * a directory. 1762 * Return the file's status and block locations If the path is a file. 1763 * 1764 * If a returned status is a file, it contains the file's block locations. 1765 * 1766 * @param f is the path 1767 * 1768 * @return an iterator that traverses statuses of the files/directories 1769 * in the given path 1770 * 1771 * @throws FileNotFoundException If <code>f</code> does not exist 1772 * @throws IOException If an I/O error occurred 1773 */ 1774 public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f) 1775 throws FileNotFoundException, IOException { 1776 return listLocatedStatus(f, DEFAULT_FILTER); 1777 } 1778 1779 /** 1780 * Listing a directory 1781 * The returned results include its block location if it is a file 1782 * The results are filtered by the given path filter 1783 * @param f a path 1784 * @param filter a path filter 1785 * @return an iterator that traverses statuses of the files/directories 1786 * in the given path 1787 * @throws FileNotFoundException if <code>f</code> does not exist 1788 * @throws IOException if any I/O error occurred 1789 */ 1790 protected RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f, 1791 final PathFilter filter) 1792 throws FileNotFoundException, IOException { 1793 return new RemoteIterator<LocatedFileStatus>() { 1794 private final FileStatus[] stats = listStatus(f, filter); 1795 private int i = 0; 1796 1797 @Override 1798 public boolean hasNext() { 1799 return i<stats.length; 1800 } 1801 1802 @Override 1803 public LocatedFileStatus next() throws IOException { 1804 if (!hasNext()) { 1805 throw new NoSuchElementException("No more entry in " + f); 1806 } 1807 FileStatus result = stats[i++]; 1808 BlockLocation[] locs = result.isFile() ? 1809 getFileBlockLocations(result.getPath(), 0, result.getLen()) : 1810 null; 1811 return new LocatedFileStatus(result, locs); 1812 } 1813 }; 1814 } 1815 1816 /** 1817 * List the statuses and block locations of the files in the given path. 1818 * 1819 * If the path is a directory, 1820 * if recursive is false, returns files in the directory; 1821 * if recursive is true, return files in the subtree rooted at the path. 1822 * If the path is a file, return the file's status and block locations. 1823 * 1824 * @param f is the path 1825 * @param recursive if the subdirectories need to be traversed recursively 1826 * 1827 * @return an iterator that traverses statuses of the files 1828 * 1829 * @throws FileNotFoundException when the path does not exist; 1830 * IOException see specific implementation 1831 */ 1832 public RemoteIterator<LocatedFileStatus> listFiles( 1833 final Path f, final boolean recursive) 1834 throws FileNotFoundException, IOException { 1835 return new RemoteIterator<LocatedFileStatus>() { 1836 private Stack<RemoteIterator<LocatedFileStatus>> itors = 1837 new Stack<RemoteIterator<LocatedFileStatus>>(); 1838 private RemoteIterator<LocatedFileStatus> curItor = 1839 listLocatedStatus(f); 1840 private LocatedFileStatus curFile; 1841 1842 @Override 1843 public boolean hasNext() throws IOException { 1844 while (curFile == null) { 1845 if (curItor.hasNext()) { 1846 handleFileStat(curItor.next()); 1847 } else if (!itors.empty()) { 1848 curItor = itors.pop(); 1849 } else { 1850 return false; 1851 } 1852 } 1853 return true; 1854 } 1855 1856 /** 1857 * Process the input stat. 1858 * If it is a file, return the file stat. 1859 * If it is a directory, traverse the directory if recursive is true; 1860 * ignore it if recursive is false. 1861 * @param stat input status 1862 * @throws IOException if any IO error occurs 1863 */ 1864 private void handleFileStat(LocatedFileStatus stat) throws IOException { 1865 if (stat.isFile()) { // file 1866 curFile = stat; 1867 } else if (recursive) { // directory 1868 itors.push(curItor); 1869 curItor = listLocatedStatus(stat.getPath()); 1870 } 1871 } 1872 1873 @Override 1874 public LocatedFileStatus next() throws IOException { 1875 if (hasNext()) { 1876 LocatedFileStatus result = curFile; 1877 curFile = null; 1878 return result; 1879 } 1880 throw new java.util.NoSuchElementException("No more entry in " + f); 1881 } 1882 }; 1883 } 1884 1885 /** Return the current user's home directory in this filesystem. 1886 * The default implementation returns "/user/$USER/". 1887 */ 1888 public Path getHomeDirectory() { 1889 return this.makeQualified( 1890 new Path("/user/"+System.getProperty("user.name"))); 1891 } 1892 1893 1894 /** 1895 * Set the current working directory for the given file system. All relative 1896 * paths will be resolved relative to it. 1897 * 1898 * @param new_dir 1899 */ 1900 public abstract void setWorkingDirectory(Path new_dir); 1901 1902 /** 1903 * Get the current working directory for the given file system 1904 * @return the directory pathname 1905 */ 1906 public abstract Path getWorkingDirectory(); 1907 1908 1909 /** 1910 * Note: with the new FilesContext class, getWorkingDirectory() 1911 * will be removed. 1912 * The working directory is implemented in FilesContext. 1913 * 1914 * Some file systems like LocalFileSystem have an initial workingDir 1915 * that we use as the starting workingDir. For other file systems 1916 * like HDFS there is no built in notion of an initial workingDir. 1917 * 1918 * @return if there is built in notion of workingDir then it 1919 * is returned; else a null is returned. 1920 */ 1921 protected Path getInitialWorkingDirectory() { 1922 return null; 1923 } 1924 1925 /** 1926 * Call {@link #mkdirs(Path, FsPermission)} with default permission. 1927 */ 1928 public boolean mkdirs(Path f) throws IOException { 1929 return mkdirs(f, FsPermission.getDirDefault()); 1930 } 1931 1932 /** 1933 * Make the given file and all non-existent parents into 1934 * directories. Has the semantics of Unix 'mkdir -p'. 1935 * Existence of the directory hierarchy is not an error. 1936 * @param f path to create 1937 * @param permission to apply to f 1938 */ 1939 public abstract boolean mkdirs(Path f, FsPermission permission 1940 ) throws IOException; 1941 1942 /** 1943 * The src file is on the local disk. Add it to FS at 1944 * the given dst name and the source is kept intact afterwards 1945 * @param src path 1946 * @param dst path 1947 */ 1948 public void copyFromLocalFile(Path src, Path dst) 1949 throws IOException { 1950 copyFromLocalFile(false, src, dst); 1951 } 1952 1953 /** 1954 * The src files is on the local disk. Add it to FS at 1955 * the given dst name, removing the source afterwards. 1956 * @param srcs path 1957 * @param dst path 1958 */ 1959 public void moveFromLocalFile(Path[] srcs, Path dst) 1960 throws IOException { 1961 copyFromLocalFile(true, true, srcs, dst); 1962 } 1963 1964 /** 1965 * The src file is on the local disk. Add it to FS at 1966 * the given dst name, removing the source afterwards. 1967 * @param src path 1968 * @param dst path 1969 */ 1970 public void moveFromLocalFile(Path src, Path dst) 1971 throws IOException { 1972 copyFromLocalFile(true, src, dst); 1973 } 1974 1975 /** 1976 * The src file is on the local disk. Add it to FS at 1977 * the given dst name. 1978 * delSrc indicates if the source should be removed 1979 * @param delSrc whether to delete the src 1980 * @param src path 1981 * @param dst path 1982 */ 1983 public void copyFromLocalFile(boolean delSrc, Path src, Path dst) 1984 throws IOException { 1985 copyFromLocalFile(delSrc, true, src, dst); 1986 } 1987 1988 /** 1989 * The src files are on the local disk. Add it to FS at 1990 * the given dst name. 1991 * delSrc indicates if the source should be removed 1992 * @param delSrc whether to delete the src 1993 * @param overwrite whether to overwrite an existing file 1994 * @param srcs array of paths which are source 1995 * @param dst path 1996 */ 1997 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 1998 Path[] srcs, Path dst) 1999 throws IOException { 2000 Configuration conf = getConf(); 2001 FileUtil.copy(getLocal(conf), srcs, this, dst, delSrc, overwrite, conf); 2002 } 2003 2004 /** 2005 * The src file is on the local disk. Add it to FS at 2006 * the given dst name. 2007 * delSrc indicates if the source should be removed 2008 * @param delSrc whether to delete the src 2009 * @param overwrite whether to overwrite an existing file 2010 * @param src path 2011 * @param dst path 2012 */ 2013 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 2014 Path src, Path dst) 2015 throws IOException { 2016 Configuration conf = getConf(); 2017 FileUtil.copy(getLocal(conf), src, this, dst, delSrc, overwrite, conf); 2018 } 2019 2020 /** 2021 * The src file is under FS, and the dst is on the local disk. 2022 * Copy it from FS control to the local dst name. 2023 * @param src path 2024 * @param dst path 2025 */ 2026 public void copyToLocalFile(Path src, Path dst) throws IOException { 2027 copyToLocalFile(false, src, dst); 2028 } 2029 2030 /** 2031 * The src file is under FS, and the dst is on the local disk. 2032 * Copy it from FS control to the local dst name. 2033 * Remove the source afterwards 2034 * @param src path 2035 * @param dst path 2036 */ 2037 public void moveToLocalFile(Path src, Path dst) throws IOException { 2038 copyToLocalFile(true, src, dst); 2039 } 2040 2041 /** 2042 * The src file is under FS, and the dst is on the local disk. 2043 * Copy it from FS control to the local dst name. 2044 * delSrc indicates if the src will be removed or not. 2045 * @param delSrc whether to delete the src 2046 * @param src path 2047 * @param dst path 2048 */ 2049 public void copyToLocalFile(boolean delSrc, Path src, Path dst) 2050 throws IOException { 2051 copyToLocalFile(delSrc, src, dst, false); 2052 } 2053 2054 /** 2055 * The src file is under FS, and the dst is on the local disk. Copy it from FS 2056 * control to the local dst name. delSrc indicates if the src will be removed 2057 * or not. useRawLocalFileSystem indicates whether to use RawLocalFileSystem 2058 * as local file system or not. RawLocalFileSystem is non crc file system.So, 2059 * It will not create any crc files at local. 2060 * 2061 * @param delSrc 2062 * whether to delete the src 2063 * @param src 2064 * path 2065 * @param dst 2066 * path 2067 * @param useRawLocalFileSystem 2068 * whether to use RawLocalFileSystem as local file system or not. 2069 * 2070 * @throws IOException 2071 * - if any IO error 2072 */ 2073 public void copyToLocalFile(boolean delSrc, Path src, Path dst, 2074 boolean useRawLocalFileSystem) throws IOException { 2075 Configuration conf = getConf(); 2076 FileSystem local = null; 2077 if (useRawLocalFileSystem) { 2078 local = getLocal(conf).getRawFileSystem(); 2079 } else { 2080 local = getLocal(conf); 2081 } 2082 FileUtil.copy(this, src, local, dst, delSrc, conf); 2083 } 2084 2085 /** 2086 * Returns a local File that the user can write output to. The caller 2087 * provides both the eventual FS target name and the local working 2088 * file. If the FS is local, we write directly into the target. If 2089 * the FS is remote, we write into the tmp local area. 2090 * @param fsOutputFile path of output file 2091 * @param tmpLocalFile path of local tmp file 2092 */ 2093 public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 2094 throws IOException { 2095 return tmpLocalFile; 2096 } 2097 2098 /** 2099 * Called when we're all done writing to the target. A local FS will 2100 * do nothing, because we've written to exactly the right place. A remote 2101 * FS will copy the contents of tmpLocalFile to the correct target at 2102 * fsOutputFile. 2103 * @param fsOutputFile path of output file 2104 * @param tmpLocalFile path to local tmp file 2105 */ 2106 public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 2107 throws IOException { 2108 moveFromLocalFile(tmpLocalFile, fsOutputFile); 2109 } 2110 2111 /** 2112 * No more filesystem operations are needed. Will 2113 * release any held locks. 2114 */ 2115 @Override 2116 public void close() throws IOException { 2117 // delete all files that were marked as delete-on-exit. 2118 processDeleteOnExit(); 2119 CACHE.remove(this.key, this); 2120 } 2121 2122 /** Return the total size of all files in the filesystem.*/ 2123 public long getUsed() throws IOException{ 2124 long used = 0; 2125 FileStatus[] files = listStatus(new Path("/")); 2126 for(FileStatus file:files){ 2127 used += file.getLen(); 2128 } 2129 return used; 2130 } 2131 2132 /** 2133 * Get the block size for a particular file. 2134 * @param f the filename 2135 * @return the number of bytes in a block 2136 */ 2137 /** @deprecated Use getFileStatus() instead */ 2138 @Deprecated 2139 public long getBlockSize(Path f) throws IOException { 2140 return getFileStatus(f).getBlockSize(); 2141 } 2142 2143 /** 2144 * Return the number of bytes that large input files should be optimally 2145 * be split into to minimize i/o time. 2146 * @deprecated use {@link #getDefaultBlockSize(Path)} instead 2147 */ 2148 @Deprecated 2149 public long getDefaultBlockSize() { 2150 // default to 32MB: large enough to minimize the impact of seeks 2151 return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024); 2152 } 2153 2154 /** Return the number of bytes that large input files should be optimally 2155 * be split into to minimize i/o time. The given path will be used to 2156 * locate the actual filesystem. The full path does not have to exist. 2157 * @param f path of file 2158 * @return the default block size for the path's filesystem 2159 */ 2160 public long getDefaultBlockSize(Path f) { 2161 return getDefaultBlockSize(); 2162 } 2163 2164 /** 2165 * Get the default replication. 2166 * @deprecated use {@link #getDefaultReplication(Path)} instead 2167 */ 2168 @Deprecated 2169 public short getDefaultReplication() { return 1; } 2170 2171 /** 2172 * Get the default replication for a path. The given path will be used to 2173 * locate the actual filesystem. The full path does not have to exist. 2174 * @param path of the file 2175 * @return default replication for the path's filesystem 2176 */ 2177 public short getDefaultReplication(Path path) { 2178 return getDefaultReplication(); 2179 } 2180 2181 /** 2182 * Return a file status object that represents the path. 2183 * @param f The path we want information from 2184 * @return a FileStatus object 2185 * @throws FileNotFoundException when the path does not exist; 2186 * IOException see specific implementation 2187 */ 2188 public abstract FileStatus getFileStatus(Path f) throws IOException; 2189 2190 /** 2191 * See {@link FileContext#fixRelativePart} 2192 */ 2193 protected Path fixRelativePart(Path p) { 2194 if (p.isUriPathAbsolute()) { 2195 return p; 2196 } else { 2197 return new Path(getWorkingDirectory(), p); 2198 } 2199 } 2200 2201 /** 2202 * See {@link FileContext#createSymlink(Path, Path, boolean)} 2203 */ 2204 public void createSymlink(final Path target, final Path link, 2205 final boolean createParent) throws AccessControlException, 2206 FileAlreadyExistsException, FileNotFoundException, 2207 ParentNotDirectoryException, UnsupportedFileSystemException, 2208 IOException { 2209 // Supporting filesystems should override this method 2210 throw new UnsupportedOperationException( 2211 "Filesystem does not support symlinks!"); 2212 } 2213 2214 /** 2215 * See {@link FileContext#getFileLinkStatus(Path)} 2216 */ 2217 public FileStatus getFileLinkStatus(final Path f) 2218 throws AccessControlException, FileNotFoundException, 2219 UnsupportedFileSystemException, IOException { 2220 // Supporting filesystems should override this method 2221 return getFileStatus(f); 2222 } 2223 2224 /** 2225 * See {@link AbstractFileSystem#supportsSymlinks()} 2226 */ 2227 public boolean supportsSymlinks() { 2228 return false; 2229 } 2230 2231 /** 2232 * See {@link FileContext#getLinkTarget(Path)} 2233 */ 2234 public Path getLinkTarget(Path f) throws IOException { 2235 // Supporting filesystems should override this method 2236 throw new UnsupportedOperationException( 2237 "Filesystem does not support symlinks!"); 2238 } 2239 2240 /** 2241 * See {@link AbstractFileSystem#getLinkTarget(Path)} 2242 */ 2243 protected Path resolveLink(Path f) throws IOException { 2244 // Supporting filesystems should override this method 2245 throw new UnsupportedOperationException( 2246 "Filesystem does not support symlinks!"); 2247 } 2248 2249 /** 2250 * Get the checksum of a file. 2251 * 2252 * @param f The file path 2253 * @return The file checksum. The default return value is null, 2254 * which indicates that no checksum algorithm is implemented 2255 * in the corresponding FileSystem. 2256 */ 2257 public FileChecksum getFileChecksum(Path f) throws IOException { 2258 return null; 2259 } 2260 2261 /** 2262 * Set the verify checksum flag. This is only applicable if the 2263 * corresponding FileSystem supports checksum. By default doesn't do anything. 2264 * @param verifyChecksum 2265 */ 2266 public void setVerifyChecksum(boolean verifyChecksum) { 2267 //doesn't do anything 2268 } 2269 2270 /** 2271 * Set the write checksum flag. This is only applicable if the 2272 * corresponding FileSystem supports checksum. By default doesn't do anything. 2273 * @param writeChecksum 2274 */ 2275 public void setWriteChecksum(boolean writeChecksum) { 2276 //doesn't do anything 2277 } 2278 2279 /** 2280 * Returns a status object describing the use and capacity of the 2281 * file system. If the file system has multiple partitions, the 2282 * use and capacity of the root partition is reflected. 2283 * 2284 * @return a FsStatus object 2285 * @throws IOException 2286 * see specific implementation 2287 */ 2288 public FsStatus getStatus() throws IOException { 2289 return getStatus(null); 2290 } 2291 2292 /** 2293 * Returns a status object describing the use and capacity of the 2294 * file system. If the file system has multiple partitions, the 2295 * use and capacity of the partition pointed to by the specified 2296 * path is reflected. 2297 * @param p Path for which status should be obtained. null means 2298 * the default partition. 2299 * @return a FsStatus object 2300 * @throws IOException 2301 * see specific implementation 2302 */ 2303 public FsStatus getStatus(Path p) throws IOException { 2304 return new FsStatus(Long.MAX_VALUE, 0, Long.MAX_VALUE); 2305 } 2306 2307 /** 2308 * Set permission of a path. 2309 * @param p 2310 * @param permission 2311 */ 2312 public void setPermission(Path p, FsPermission permission 2313 ) throws IOException { 2314 } 2315 2316 /** 2317 * Set owner of a path (i.e. a file or a directory). 2318 * The parameters username and groupname cannot both be null. 2319 * @param p The path 2320 * @param username If it is null, the original username remains unchanged. 2321 * @param groupname If it is null, the original groupname remains unchanged. 2322 */ 2323 public void setOwner(Path p, String username, String groupname 2324 ) throws IOException { 2325 } 2326 2327 /** 2328 * Set access time of a file 2329 * @param p The path 2330 * @param mtime Set the modification time of this file. 2331 * The number of milliseconds since Jan 1, 1970. 2332 * A value of -1 means that this call should not set modification time. 2333 * @param atime Set the access time of this file. 2334 * The number of milliseconds since Jan 1, 1970. 2335 * A value of -1 means that this call should not set access time. 2336 */ 2337 public void setTimes(Path p, long mtime, long atime 2338 ) throws IOException { 2339 } 2340 2341 /** 2342 * Create a snapshot with a default name. 2343 * @param path The directory where snapshots will be taken. 2344 * @return the snapshot path. 2345 */ 2346 public final Path createSnapshot(Path path) throws IOException { 2347 return createSnapshot(path, null); 2348 } 2349 2350 /** 2351 * Create a snapshot 2352 * @param path The directory where snapshots will be taken. 2353 * @param snapshotName The name of the snapshot 2354 * @return the snapshot path. 2355 */ 2356 public Path createSnapshot(Path path, String snapshotName) 2357 throws IOException { 2358 throw new UnsupportedOperationException(getClass().getSimpleName() 2359 + " doesn't support createSnapshot"); 2360 } 2361 2362 /** 2363 * Rename a snapshot 2364 * @param path The directory path where the snapshot was taken 2365 * @param snapshotOldName Old name of the snapshot 2366 * @param snapshotNewName New name of the snapshot 2367 * @throws IOException 2368 */ 2369 public void renameSnapshot(Path path, String snapshotOldName, 2370 String snapshotNewName) throws IOException { 2371 throw new UnsupportedOperationException(getClass().getSimpleName() 2372 + " doesn't support renameSnapshot"); 2373 } 2374 2375 /** 2376 * Delete a snapshot of a directory 2377 * @param path The directory that the to-be-deleted snapshot belongs to 2378 * @param snapshotName The name of the snapshot 2379 */ 2380 public void deleteSnapshot(Path path, String snapshotName) 2381 throws IOException { 2382 throw new UnsupportedOperationException(getClass().getSimpleName() 2383 + " doesn't support deleteSnapshot"); 2384 } 2385 2386 // making it volatile to be able to do a double checked locking 2387 private volatile static boolean FILE_SYSTEMS_LOADED = false; 2388 2389 private static final Map<String, Class<? extends FileSystem>> 2390 SERVICE_FILE_SYSTEMS = new HashMap<String, Class<? extends FileSystem>>(); 2391 2392 private static void loadFileSystems() { 2393 synchronized (FileSystem.class) { 2394 if (!FILE_SYSTEMS_LOADED) { 2395 ServiceLoader<FileSystem> serviceLoader = ServiceLoader.load(FileSystem.class); 2396 for (FileSystem fs : serviceLoader) { 2397 SERVICE_FILE_SYSTEMS.put(fs.getScheme(), fs.getClass()); 2398 } 2399 FILE_SYSTEMS_LOADED = true; 2400 } 2401 } 2402 } 2403 2404 public static Class<? extends FileSystem> getFileSystemClass(String scheme, 2405 Configuration conf) throws IOException { 2406 if (!FILE_SYSTEMS_LOADED) { 2407 loadFileSystems(); 2408 } 2409 Class<? extends FileSystem> clazz = null; 2410 if (conf != null) { 2411 clazz = (Class<? extends FileSystem>) conf.getClass("fs." + scheme + ".impl", null); 2412 } 2413 if (clazz == null) { 2414 clazz = SERVICE_FILE_SYSTEMS.get(scheme); 2415 } 2416 if (clazz == null) { 2417 throw new IOException("No FileSystem for scheme: " + scheme); 2418 } 2419 return clazz; 2420 } 2421 2422 private static FileSystem createFileSystem(URI uri, Configuration conf 2423 ) throws IOException { 2424 Class<?> clazz = getFileSystemClass(uri.getScheme(), conf); 2425 if (clazz == null) { 2426 throw new IOException("No FileSystem for scheme: " + uri.getScheme()); 2427 } 2428 FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf); 2429 fs.initialize(uri, conf); 2430 return fs; 2431 } 2432 2433 /** Caching FileSystem objects */ 2434 static class Cache { 2435 private final ClientFinalizer clientFinalizer = new ClientFinalizer(); 2436 2437 private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>(); 2438 private final Set<Key> toAutoClose = new HashSet<Key>(); 2439 2440 /** A variable that makes all objects in the cache unique */ 2441 private static AtomicLong unique = new AtomicLong(1); 2442 2443 FileSystem get(URI uri, Configuration conf) throws IOException{ 2444 Key key = new Key(uri, conf); 2445 return getInternal(uri, conf, key); 2446 } 2447 2448 /** The objects inserted into the cache using this method are all unique */ 2449 FileSystem getUnique(URI uri, Configuration conf) throws IOException{ 2450 Key key = new Key(uri, conf, unique.getAndIncrement()); 2451 return getInternal(uri, conf, key); 2452 } 2453 2454 private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{ 2455 FileSystem fs; 2456 synchronized (this) { 2457 fs = map.get(key); 2458 } 2459 if (fs != null) { 2460 return fs; 2461 } 2462 2463 fs = createFileSystem(uri, conf); 2464 synchronized (this) { // refetch the lock again 2465 FileSystem oldfs = map.get(key); 2466 if (oldfs != null) { // a file system is created while lock is releasing 2467 fs.close(); // close the new file system 2468 return oldfs; // return the old file system 2469 } 2470 2471 // now insert the new file system into the map 2472 if (map.isEmpty() 2473 && !ShutdownHookManager.get().isShutdownInProgress()) { 2474 ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY); 2475 } 2476 fs.key = key; 2477 map.put(key, fs); 2478 if (conf.getBoolean("fs.automatic.close", true)) { 2479 toAutoClose.add(key); 2480 } 2481 return fs; 2482 } 2483 } 2484 2485 synchronized void remove(Key key, FileSystem fs) { 2486 if (map.containsKey(key) && fs == map.get(key)) { 2487 map.remove(key); 2488 toAutoClose.remove(key); 2489 } 2490 } 2491 2492 synchronized void closeAll() throws IOException { 2493 closeAll(false); 2494 } 2495 2496 /** 2497 * Close all FileSystem instances in the Cache. 2498 * @param onlyAutomatic only close those that are marked for automatic closing 2499 */ 2500 synchronized void closeAll(boolean onlyAutomatic) throws IOException { 2501 List<IOException> exceptions = new ArrayList<IOException>(); 2502 2503 // Make a copy of the keys in the map since we'll be modifying 2504 // the map while iterating over it, which isn't safe. 2505 List<Key> keys = new ArrayList<Key>(); 2506 keys.addAll(map.keySet()); 2507 2508 for (Key key : keys) { 2509 final FileSystem fs = map.get(key); 2510 2511 if (onlyAutomatic && !toAutoClose.contains(key)) { 2512 continue; 2513 } 2514 2515 //remove from cache 2516 remove(key, fs); 2517 2518 if (fs != null) { 2519 try { 2520 fs.close(); 2521 } 2522 catch(IOException ioe) { 2523 exceptions.add(ioe); 2524 } 2525 } 2526 } 2527 2528 if (!exceptions.isEmpty()) { 2529 throw MultipleIOException.createIOException(exceptions); 2530 } 2531 } 2532 2533 private class ClientFinalizer implements Runnable { 2534 @Override 2535 public synchronized void run() { 2536 try { 2537 closeAll(true); 2538 } catch (IOException e) { 2539 LOG.info("FileSystem.Cache.closeAll() threw an exception:\n" + e); 2540 } 2541 } 2542 } 2543 2544 synchronized void closeAll(UserGroupInformation ugi) throws IOException { 2545 List<FileSystem> targetFSList = new ArrayList<FileSystem>(); 2546 //Make a pass over the list and collect the filesystems to close 2547 //we cannot close inline since close() removes the entry from the Map 2548 for (Map.Entry<Key, FileSystem> entry : map.entrySet()) { 2549 final Key key = entry.getKey(); 2550 final FileSystem fs = entry.getValue(); 2551 if (ugi.equals(key.ugi) && fs != null) { 2552 targetFSList.add(fs); 2553 } 2554 } 2555 List<IOException> exceptions = new ArrayList<IOException>(); 2556 //now make a pass over the target list and close each 2557 for (FileSystem fs : targetFSList) { 2558 try { 2559 fs.close(); 2560 } 2561 catch(IOException ioe) { 2562 exceptions.add(ioe); 2563 } 2564 } 2565 if (!exceptions.isEmpty()) { 2566 throw MultipleIOException.createIOException(exceptions); 2567 } 2568 } 2569 2570 /** FileSystem.Cache.Key */ 2571 static class Key { 2572 final String scheme; 2573 final String authority; 2574 final UserGroupInformation ugi; 2575 final long unique; // an artificial way to make a key unique 2576 2577 Key(URI uri, Configuration conf) throws IOException { 2578 this(uri, conf, 0); 2579 } 2580 2581 Key(URI uri, Configuration conf, long unique) throws IOException { 2582 scheme = uri.getScheme()==null?"":uri.getScheme().toLowerCase(); 2583 authority = uri.getAuthority()==null?"":uri.getAuthority().toLowerCase(); 2584 this.unique = unique; 2585 2586 this.ugi = UserGroupInformation.getCurrentUser(); 2587 } 2588 2589 @Override 2590 public int hashCode() { 2591 return (scheme + authority).hashCode() + ugi.hashCode() + (int)unique; 2592 } 2593 2594 static boolean isEqual(Object a, Object b) { 2595 return a == b || (a != null && a.equals(b)); 2596 } 2597 2598 @Override 2599 public boolean equals(Object obj) { 2600 if (obj == this) { 2601 return true; 2602 } 2603 if (obj != null && obj instanceof Key) { 2604 Key that = (Key)obj; 2605 return isEqual(this.scheme, that.scheme) 2606 && isEqual(this.authority, that.authority) 2607 && isEqual(this.ugi, that.ugi) 2608 && (this.unique == that.unique); 2609 } 2610 return false; 2611 } 2612 2613 @Override 2614 public String toString() { 2615 return "("+ugi.toString() + ")@" + scheme + "://" + authority; 2616 } 2617 } 2618 } 2619 2620 public static final class Statistics { 2621 private final String scheme; 2622 private AtomicLong bytesRead = new AtomicLong(); 2623 private AtomicLong bytesWritten = new AtomicLong(); 2624 private AtomicInteger readOps = new AtomicInteger(); 2625 private AtomicInteger largeReadOps = new AtomicInteger(); 2626 private AtomicInteger writeOps = new AtomicInteger(); 2627 2628 public Statistics(String scheme) { 2629 this.scheme = scheme; 2630 } 2631 2632 /** 2633 * Copy constructor. 2634 * 2635 * @param st 2636 * The input Statistics object which is cloned. 2637 */ 2638 public Statistics(Statistics st) { 2639 this.scheme = st.scheme; 2640 this.bytesRead = new AtomicLong(st.bytesRead.longValue()); 2641 this.bytesWritten = new AtomicLong(st.bytesWritten.longValue()); 2642 } 2643 2644 /** 2645 * Increment the bytes read in the statistics 2646 * @param newBytes the additional bytes read 2647 */ 2648 public void incrementBytesRead(long newBytes) { 2649 bytesRead.getAndAdd(newBytes); 2650 } 2651 2652 /** 2653 * Increment the bytes written in the statistics 2654 * @param newBytes the additional bytes written 2655 */ 2656 public void incrementBytesWritten(long newBytes) { 2657 bytesWritten.getAndAdd(newBytes); 2658 } 2659 2660 /** 2661 * Increment the number of read operations 2662 * @param count number of read operations 2663 */ 2664 public void incrementReadOps(int count) { 2665 readOps.getAndAdd(count); 2666 } 2667 2668 /** 2669 * Increment the number of large read operations 2670 * @param count number of large read operations 2671 */ 2672 public void incrementLargeReadOps(int count) { 2673 largeReadOps.getAndAdd(count); 2674 } 2675 2676 /** 2677 * Increment the number of write operations 2678 * @param count number of write operations 2679 */ 2680 public void incrementWriteOps(int count) { 2681 writeOps.getAndAdd(count); 2682 } 2683 2684 /** 2685 * Get the total number of bytes read 2686 * @return the number of bytes 2687 */ 2688 public long getBytesRead() { 2689 return bytesRead.get(); 2690 } 2691 2692 /** 2693 * Get the total number of bytes written 2694 * @return the number of bytes 2695 */ 2696 public long getBytesWritten() { 2697 return bytesWritten.get(); 2698 } 2699 2700 /** 2701 * Get the number of file system read operations such as list files 2702 * @return number of read operations 2703 */ 2704 public int getReadOps() { 2705 return readOps.get() + largeReadOps.get(); 2706 } 2707 2708 /** 2709 * Get the number of large file system read operations such as list files 2710 * under a large directory 2711 * @return number of large read operations 2712 */ 2713 public int getLargeReadOps() { 2714 return largeReadOps.get(); 2715 } 2716 2717 /** 2718 * Get the number of file system write operations such as create, append 2719 * rename etc. 2720 * @return number of write operations 2721 */ 2722 public int getWriteOps() { 2723 return writeOps.get(); 2724 } 2725 2726 @Override 2727 public String toString() { 2728 return bytesRead + " bytes read, " + bytesWritten + " bytes written, " 2729 + readOps + " read ops, " + largeReadOps + " large read ops, " 2730 + writeOps + " write ops"; 2731 } 2732 2733 /** 2734 * Reset the counts of bytes to 0. 2735 */ 2736 public void reset() { 2737 bytesWritten.set(0); 2738 bytesRead.set(0); 2739 } 2740 2741 /** 2742 * Get the uri scheme associated with this statistics object. 2743 * @return the schema associated with this set of statistics 2744 */ 2745 public String getScheme() { 2746 return scheme; 2747 } 2748 } 2749 2750 /** 2751 * Get the Map of Statistics object indexed by URI Scheme. 2752 * @return a Map having a key as URI scheme and value as Statistics object 2753 * @deprecated use {@link #getAllStatistics} instead 2754 */ 2755 @Deprecated 2756 public static synchronized Map<String, Statistics> getStatistics() { 2757 Map<String, Statistics> result = new HashMap<String, Statistics>(); 2758 for(Statistics stat: statisticsTable.values()) { 2759 result.put(stat.getScheme(), stat); 2760 } 2761 return result; 2762 } 2763 2764 /** 2765 * Return the FileSystem classes that have Statistics 2766 */ 2767 public static synchronized List<Statistics> getAllStatistics() { 2768 return new ArrayList<Statistics>(statisticsTable.values()); 2769 } 2770 2771 /** 2772 * Get the statistics for a particular file system 2773 * @param cls the class to lookup 2774 * @return a statistics object 2775 */ 2776 public static synchronized 2777 Statistics getStatistics(String scheme, Class<? extends FileSystem> cls) { 2778 Statistics result = statisticsTable.get(cls); 2779 if (result == null) { 2780 result = new Statistics(scheme); 2781 statisticsTable.put(cls, result); 2782 } 2783 return result; 2784 } 2785 2786 /** 2787 * Reset all statistics for all file systems 2788 */ 2789 public static synchronized void clearStatistics() { 2790 for(Statistics stat: statisticsTable.values()) { 2791 stat.reset(); 2792 } 2793 } 2794 2795 /** 2796 * Print all statistics for all file systems 2797 */ 2798 public static synchronized 2799 void printStatistics() throws IOException { 2800 for (Map.Entry<Class<? extends FileSystem>, Statistics> pair: 2801 statisticsTable.entrySet()) { 2802 System.out.println(" FileSystem " + pair.getKey().getName() + 2803 ": " + pair.getValue()); 2804 } 2805 } 2806 }