001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.fs; 019 020 import java.io.Closeable; 021 import java.io.FileNotFoundException; 022 import java.io.IOException; 023 import java.net.URI; 024 import java.security.PrivilegedExceptionAction; 025 import java.util.ArrayList; 026 import java.util.Arrays; 027 import java.util.EnumSet; 028 import java.util.HashMap; 029 import java.util.HashSet; 030 import java.util.IdentityHashMap; 031 import java.util.Iterator; 032 import java.util.List; 033 import java.util.Map; 034 import java.util.NoSuchElementException; 035 import java.util.ServiceLoader; 036 import java.util.Set; 037 import java.util.Stack; 038 import java.util.TreeSet; 039 import java.util.concurrent.atomic.AtomicInteger; 040 import java.util.concurrent.atomic.AtomicLong; 041 042 import org.apache.commons.logging.Log; 043 import org.apache.commons.logging.LogFactory; 044 import org.apache.hadoop.classification.InterfaceAudience; 045 import org.apache.hadoop.classification.InterfaceStability; 046 import org.apache.hadoop.conf.Configuration; 047 import org.apache.hadoop.conf.Configured; 048 import org.apache.hadoop.fs.Options.ChecksumOpt; 049 import org.apache.hadoop.fs.Options.Rename; 050 import org.apache.hadoop.fs.permission.FsPermission; 051 import org.apache.hadoop.io.MultipleIOException; 052 import org.apache.hadoop.io.Text; 053 import org.apache.hadoop.net.NetUtils; 054 import org.apache.hadoop.security.Credentials; 055 import org.apache.hadoop.security.SecurityUtil; 056 import org.apache.hadoop.security.UserGroupInformation; 057 import org.apache.hadoop.security.token.Token; 058 import org.apache.hadoop.util.DataChecksum; 059 import org.apache.hadoop.util.Progressable; 060 import org.apache.hadoop.util.ReflectionUtils; 061 import org.apache.hadoop.util.ShutdownHookManager; 062 063 import com.google.common.annotations.VisibleForTesting; 064 065 /**************************************************************** 066 * An abstract base class for a fairly generic filesystem. It 067 * may be implemented as a distributed filesystem, or as a "local" 068 * one that reflects the locally-connected disk. The local version 069 * exists for small Hadoop instances and for testing. 070 * 071 * <p> 072 * 073 * All user code that may potentially use the Hadoop Distributed 074 * File System should be written to use a FileSystem object. The 075 * Hadoop DFS is a multi-machine system that appears as a single 076 * disk. It's useful because of its fault tolerance and potentially 077 * very large capacity. 078 * 079 * <p> 080 * The local implementation is {@link LocalFileSystem} and distributed 081 * implementation is DistributedFileSystem. 082 *****************************************************************/ 083 @InterfaceAudience.Public 084 @InterfaceStability.Stable 085 public abstract class FileSystem extends Configured implements Closeable { 086 public static final String FS_DEFAULT_NAME_KEY = 087 CommonConfigurationKeys.FS_DEFAULT_NAME_KEY; 088 public static final String DEFAULT_FS = 089 CommonConfigurationKeys.FS_DEFAULT_NAME_DEFAULT; 090 091 public static final Log LOG = LogFactory.getLog(FileSystem.class); 092 093 /** 094 * Priority of the FileSystem shutdown hook. 095 */ 096 public static final int SHUTDOWN_HOOK_PRIORITY = 10; 097 098 /** FileSystem cache */ 099 static final Cache CACHE = new Cache(); 100 101 /** The key this instance is stored under in the cache. */ 102 private Cache.Key key; 103 104 /** Recording statistics per a FileSystem class */ 105 private static final Map<Class<? extends FileSystem>, Statistics> 106 statisticsTable = 107 new IdentityHashMap<Class<? extends FileSystem>, Statistics>(); 108 109 /** 110 * The statistics for this file system. 111 */ 112 protected Statistics statistics; 113 114 /** 115 * A cache of files that should be deleted when filsystem is closed 116 * or the JVM is exited. 117 */ 118 private Set<Path> deleteOnExit = new TreeSet<Path>(); 119 120 /** 121 * This method adds a file system for testing so that we can find it later. It 122 * is only for testing. 123 * @param uri the uri to store it under 124 * @param conf the configuration to store it under 125 * @param fs the file system to store 126 * @throws IOException 127 */ 128 static void addFileSystemForTesting(URI uri, Configuration conf, 129 FileSystem fs) throws IOException { 130 CACHE.map.put(new Cache.Key(uri, conf), fs); 131 } 132 133 /** 134 * Get a filesystem instance based on the uri, the passed 135 * configuration and the user 136 * @param uri of the filesystem 137 * @param conf the configuration to use 138 * @param user to perform the get as 139 * @return the filesystem instance 140 * @throws IOException 141 * @throws InterruptedException 142 */ 143 public static FileSystem get(final URI uri, final Configuration conf, 144 final String user) throws IOException, InterruptedException { 145 String ticketCachePath = 146 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH); 147 UserGroupInformation ugi = 148 UserGroupInformation.getBestUGI(ticketCachePath, user); 149 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 150 @Override 151 public FileSystem run() throws IOException { 152 return get(uri, conf); 153 } 154 }); 155 } 156 157 /** 158 * Returns the configured filesystem implementation. 159 * @param conf the configuration to use 160 */ 161 public static FileSystem get(Configuration conf) throws IOException { 162 return get(getDefaultUri(conf), conf); 163 } 164 165 /** Get the default filesystem URI from a configuration. 166 * @param conf the configuration to use 167 * @return the uri of the default filesystem 168 */ 169 public static URI getDefaultUri(Configuration conf) { 170 return URI.create(fixName(conf.get(FS_DEFAULT_NAME_KEY, DEFAULT_FS))); 171 } 172 173 /** Set the default filesystem URI in a configuration. 174 * @param conf the configuration to alter 175 * @param uri the new default filesystem uri 176 */ 177 public static void setDefaultUri(Configuration conf, URI uri) { 178 conf.set(FS_DEFAULT_NAME_KEY, uri.toString()); 179 } 180 181 /** Set the default filesystem URI in a configuration. 182 * @param conf the configuration to alter 183 * @param uri the new default filesystem uri 184 */ 185 public static void setDefaultUri(Configuration conf, String uri) { 186 setDefaultUri(conf, URI.create(fixName(uri))); 187 } 188 189 /** Called after a new FileSystem instance is constructed. 190 * @param name a uri whose authority section names the host, port, etc. 191 * for this FileSystem 192 * @param conf the configuration 193 */ 194 public void initialize(URI name, Configuration conf) throws IOException { 195 statistics = getStatistics(name.getScheme(), getClass()); 196 } 197 198 /** 199 * Return the protocol scheme for the FileSystem. 200 * <p/> 201 * This implementation throws an <code>UnsupportedOperationException</code>. 202 * 203 * @return the protocol scheme for the FileSystem. 204 */ 205 public String getScheme() { 206 throw new UnsupportedOperationException("Not implemented by the " + getClass().getSimpleName() + " FileSystem implementation"); 207 } 208 209 /** Returns a URI whose scheme and authority identify this FileSystem.*/ 210 public abstract URI getUri(); 211 212 /** 213 * Resolve the uri's hostname and add the default port if not in the uri 214 * @return URI 215 * @see NetUtils#getCanonicalUri(URI, int) 216 */ 217 protected URI getCanonicalUri() { 218 return NetUtils.getCanonicalUri(getUri(), getDefaultPort()); 219 } 220 221 /** 222 * Get the default port for this file system. 223 * @return the default port or 0 if there isn't one 224 */ 225 protected int getDefaultPort() { 226 return 0; 227 } 228 229 /** 230 * Get a canonical service name for this file system. The token cache is 231 * the only user of the canonical service name, and uses it to lookup this 232 * filesystem's service tokens. 233 * If file system provides a token of its own then it must have a canonical 234 * name, otherwise canonical name can be null. 235 * 236 * Default Impl: If the file system has child file systems 237 * (such as an embedded file system) then it is assumed that the fs has no 238 * tokens of its own and hence returns a null name; otherwise a service 239 * name is built using Uri and port. 240 * 241 * @return a service string that uniquely identifies this file system, null 242 * if the filesystem does not implement tokens 243 * @see SecurityUtil#buildDTServiceName(URI, int) 244 */ 245 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 246 public String getCanonicalServiceName() { 247 return (getChildFileSystems() == null) 248 ? SecurityUtil.buildDTServiceName(getUri(), getDefaultPort()) 249 : null; 250 } 251 252 /** @deprecated call #getUri() instead.*/ 253 @Deprecated 254 public String getName() { return getUri().toString(); } 255 256 /** @deprecated call #get(URI,Configuration) instead. */ 257 @Deprecated 258 public static FileSystem getNamed(String name, Configuration conf) 259 throws IOException { 260 return get(URI.create(fixName(name)), conf); 261 } 262 263 /** Update old-format filesystem names, for back-compatibility. This should 264 * eventually be replaced with a checkName() method that throws an exception 265 * for old-format names. */ 266 private static String fixName(String name) { 267 // convert old-format name to new-format name 268 if (name.equals("local")) { // "local" is now "file:///". 269 LOG.warn("\"local\" is a deprecated filesystem name." 270 +" Use \"file:///\" instead."); 271 name = "file:///"; 272 } else if (name.indexOf('/')==-1) { // unqualified is "hdfs://" 273 LOG.warn("\""+name+"\" is a deprecated filesystem name." 274 +" Use \"hdfs://"+name+"/\" instead."); 275 name = "hdfs://"+name; 276 } 277 return name; 278 } 279 280 /** 281 * Get the local file system. 282 * @param conf the configuration to configure the file system with 283 * @return a LocalFileSystem 284 */ 285 public static LocalFileSystem getLocal(Configuration conf) 286 throws IOException { 287 return (LocalFileSystem)get(LocalFileSystem.NAME, conf); 288 } 289 290 /** Returns the FileSystem for this URI's scheme and authority. The scheme 291 * of the URI determines a configuration property name, 292 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 293 * The entire URI is passed to the FileSystem instance's initialize method. 294 */ 295 public static FileSystem get(URI uri, Configuration conf) throws IOException { 296 String scheme = uri.getScheme(); 297 String authority = uri.getAuthority(); 298 299 if (scheme == null && authority == null) { // use default FS 300 return get(conf); 301 } 302 303 if (scheme != null && authority == null) { // no authority 304 URI defaultUri = getDefaultUri(conf); 305 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 306 && defaultUri.getAuthority() != null) { // & default has authority 307 return get(defaultUri, conf); // return default 308 } 309 } 310 311 String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme); 312 if (conf.getBoolean(disableCacheName, false)) { 313 return createFileSystem(uri, conf); 314 } 315 316 return CACHE.get(uri, conf); 317 } 318 319 /** 320 * Returns the FileSystem for this URI's scheme and authority and the 321 * passed user. Internally invokes {@link #newInstance(URI, Configuration)} 322 * @param uri of the filesystem 323 * @param conf the configuration to use 324 * @param user to perform the get as 325 * @return filesystem instance 326 * @throws IOException 327 * @throws InterruptedException 328 */ 329 public static FileSystem newInstance(final URI uri, final Configuration conf, 330 final String user) throws IOException, InterruptedException { 331 String ticketCachePath = 332 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH); 333 UserGroupInformation ugi = 334 UserGroupInformation.getBestUGI(ticketCachePath, user); 335 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 336 @Override 337 public FileSystem run() throws IOException { 338 return newInstance(uri,conf); 339 } 340 }); 341 } 342 /** Returns the FileSystem for this URI's scheme and authority. The scheme 343 * of the URI determines a configuration property name, 344 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 345 * The entire URI is passed to the FileSystem instance's initialize method. 346 * This always returns a new FileSystem object. 347 */ 348 public static FileSystem newInstance(URI uri, Configuration conf) throws IOException { 349 String scheme = uri.getScheme(); 350 String authority = uri.getAuthority(); 351 352 if (scheme == null) { // no scheme: use default FS 353 return newInstance(conf); 354 } 355 356 if (authority == null) { // no authority 357 URI defaultUri = getDefaultUri(conf); 358 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 359 && defaultUri.getAuthority() != null) { // & default has authority 360 return newInstance(defaultUri, conf); // return default 361 } 362 } 363 return CACHE.getUnique(uri, conf); 364 } 365 366 /** Returns a unique configured filesystem implementation. 367 * This always returns a new FileSystem object. 368 * @param conf the configuration to use 369 */ 370 public static FileSystem newInstance(Configuration conf) throws IOException { 371 return newInstance(getDefaultUri(conf), conf); 372 } 373 374 /** 375 * Get a unique local file system object 376 * @param conf the configuration to configure the file system with 377 * @return a LocalFileSystem 378 * This always returns a new FileSystem object. 379 */ 380 public static LocalFileSystem newInstanceLocal(Configuration conf) 381 throws IOException { 382 return (LocalFileSystem)newInstance(LocalFileSystem.NAME, conf); 383 } 384 385 /** 386 * Close all cached filesystems. Be sure those filesystems are not 387 * used anymore. 388 * 389 * @throws IOException 390 */ 391 public static void closeAll() throws IOException { 392 CACHE.closeAll(); 393 } 394 395 /** 396 * Close all cached filesystems for a given UGI. Be sure those filesystems 397 * are not used anymore. 398 * @param ugi user group info to close 399 * @throws IOException 400 */ 401 public static void closeAllForUGI(UserGroupInformation ugi) 402 throws IOException { 403 CACHE.closeAll(ugi); 404 } 405 406 /** 407 * Make sure that a path specifies a FileSystem. 408 * @param path to use 409 */ 410 public Path makeQualified(Path path) { 411 checkPath(path); 412 return path.makeQualified(this.getUri(), this.getWorkingDirectory()); 413 } 414 415 /** 416 * Get a new delegation token for this file system. 417 * This is an internal method that should have been declared protected 418 * but wasn't historically. 419 * Callers should use {@link #addDelegationTokens(String, Credentials)} 420 * 421 * @param renewer the account name that is allowed to renew the token. 422 * @return a new delegation token 423 * @throws IOException 424 */ 425 @InterfaceAudience.Private() 426 public Token<?> getDelegationToken(String renewer) throws IOException { 427 return null; 428 } 429 430 /** 431 * Obtain all delegation tokens used by this FileSystem that are not 432 * already present in the given Credentials. Existing tokens will neither 433 * be verified as valid nor having the given renewer. Missing tokens will 434 * be acquired and added to the given Credentials. 435 * 436 * Default Impl: works for simple fs with its own token 437 * and also for an embedded fs whose tokens are those of its 438 * children file system (i.e. the embedded fs has not tokens of its 439 * own). 440 * 441 * @param renewer the user allowed to renew the delegation tokens 442 * @param credentials cache in which to add new delegation tokens 443 * @return list of new delegation tokens 444 * @throws IOException 445 */ 446 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 447 public Token<?>[] addDelegationTokens( 448 final String renewer, Credentials credentials) throws IOException { 449 if (credentials == null) { 450 credentials = new Credentials(); 451 } 452 final List<Token<?>> tokens = new ArrayList<Token<?>>(); 453 collectDelegationTokens(renewer, credentials, tokens); 454 return tokens.toArray(new Token<?>[tokens.size()]); 455 } 456 457 /** 458 * Recursively obtain the tokens for this FileSystem and all descended 459 * FileSystems as determined by getChildFileSystems(). 460 * @param renewer the user allowed to renew the delegation tokens 461 * @param credentials cache in which to add the new delegation tokens 462 * @param tokens list in which to add acquired tokens 463 * @throws IOException 464 */ 465 private void collectDelegationTokens(final String renewer, 466 final Credentials credentials, 467 final List<Token<?>> tokens) 468 throws IOException { 469 final String serviceName = getCanonicalServiceName(); 470 // Collect token of the this filesystem and then of its embedded children 471 if (serviceName != null) { // fs has token, grab it 472 final Text service = new Text(serviceName); 473 Token<?> token = credentials.getToken(service); 474 if (token == null) { 475 token = getDelegationToken(renewer); 476 if (token != null) { 477 tokens.add(token); 478 credentials.addToken(service, token); 479 } 480 } 481 } 482 // Now collect the tokens from the children 483 final FileSystem[] children = getChildFileSystems(); 484 if (children != null) { 485 for (final FileSystem fs : children) { 486 fs.collectDelegationTokens(renewer, credentials, tokens); 487 } 488 } 489 } 490 491 /** 492 * Get all the immediate child FileSystems embedded in this FileSystem. 493 * It does not recurse and get grand children. If a FileSystem 494 * has multiple child FileSystems, then it should return a unique list 495 * of those FileSystems. Default is to return null to signify no children. 496 * 497 * @return FileSystems used by this FileSystem 498 */ 499 @InterfaceAudience.LimitedPrivate({ "HDFS" }) 500 @VisibleForTesting 501 public FileSystem[] getChildFileSystems() { 502 return null; 503 } 504 505 /** create a file with the provided permission 506 * The permission of the file is set to be the provided permission as in 507 * setPermission, not permission&~umask 508 * 509 * It is implemented using two RPCs. It is understood that it is inefficient, 510 * but the implementation is thread-safe. The other option is to change the 511 * value of umask in configuration to be 0, but it is not thread-safe. 512 * 513 * @param fs file system handle 514 * @param file the name of the file to be created 515 * @param permission the permission of the file 516 * @return an output stream 517 * @throws IOException 518 */ 519 public static FSDataOutputStream create(FileSystem fs, 520 Path file, FsPermission permission) throws IOException { 521 // create the file with default permission 522 FSDataOutputStream out = fs.create(file); 523 // set its permission to the supplied one 524 fs.setPermission(file, permission); 525 return out; 526 } 527 528 /** create a directory with the provided permission 529 * The permission of the directory is set to be the provided permission as in 530 * setPermission, not permission&~umask 531 * 532 * @see #create(FileSystem, Path, FsPermission) 533 * 534 * @param fs file system handle 535 * @param dir the name of the directory to be created 536 * @param permission the permission of the directory 537 * @return true if the directory creation succeeds; false otherwise 538 * @throws IOException 539 */ 540 public static boolean mkdirs(FileSystem fs, Path dir, FsPermission permission) 541 throws IOException { 542 // create the directory using the default permission 543 boolean result = fs.mkdirs(dir); 544 // set its permission to be the supplied one 545 fs.setPermission(dir, permission); 546 return result; 547 } 548 549 /////////////////////////////////////////////////////////////// 550 // FileSystem 551 /////////////////////////////////////////////////////////////// 552 553 protected FileSystem() { 554 super(null); 555 } 556 557 /** 558 * Check that a Path belongs to this FileSystem. 559 * @param path to check 560 */ 561 protected void checkPath(Path path) { 562 URI uri = path.toUri(); 563 String thatScheme = uri.getScheme(); 564 if (thatScheme == null) // fs is relative 565 return; 566 URI thisUri = getCanonicalUri(); 567 String thisScheme = thisUri.getScheme(); 568 //authority and scheme are not case sensitive 569 if (thisScheme.equalsIgnoreCase(thatScheme)) {// schemes match 570 String thisAuthority = thisUri.getAuthority(); 571 String thatAuthority = uri.getAuthority(); 572 if (thatAuthority == null && // path's authority is null 573 thisAuthority != null) { // fs has an authority 574 URI defaultUri = getDefaultUri(getConf()); 575 if (thisScheme.equalsIgnoreCase(defaultUri.getScheme())) { 576 uri = defaultUri; // schemes match, so use this uri instead 577 } else { 578 uri = null; // can't determine auth of the path 579 } 580 } 581 if (uri != null) { 582 // canonicalize uri before comparing with this fs 583 uri = NetUtils.getCanonicalUri(uri, getDefaultPort()); 584 thatAuthority = uri.getAuthority(); 585 if (thisAuthority == thatAuthority || // authorities match 586 (thisAuthority != null && 587 thisAuthority.equalsIgnoreCase(thatAuthority))) 588 return; 589 } 590 } 591 throw new IllegalArgumentException("Wrong FS: "+path+ 592 ", expected: "+this.getUri()); 593 } 594 595 /** 596 * Return an array containing hostnames, offset and size of 597 * portions of the given file. For a nonexistent 598 * file or regions, null will be returned. 599 * 600 * This call is most helpful with DFS, where it returns 601 * hostnames of machines that contain the given file. 602 * 603 * The FileSystem will simply return an elt containing 'localhost'. 604 * 605 * @param file FilesStatus to get data from 606 * @param start offset into the given file 607 * @param len length for which to get locations for 608 */ 609 public BlockLocation[] getFileBlockLocations(FileStatus file, 610 long start, long len) throws IOException { 611 if (file == null) { 612 return null; 613 } 614 615 if (start < 0 || len < 0) { 616 throw new IllegalArgumentException("Invalid start or len parameter"); 617 } 618 619 if (file.getLen() <= start) { 620 return new BlockLocation[0]; 621 622 } 623 String[] name = { "localhost:50010" }; 624 String[] host = { "localhost" }; 625 return new BlockLocation[] { 626 new BlockLocation(name, host, 0, file.getLen()) }; 627 } 628 629 630 /** 631 * Return an array containing hostnames, offset and size of 632 * portions of the given file. For a nonexistent 633 * file or regions, null will be returned. 634 * 635 * This call is most helpful with DFS, where it returns 636 * hostnames of machines that contain the given file. 637 * 638 * The FileSystem will simply return an elt containing 'localhost'. 639 * 640 * @param p path is used to identify an FS since an FS could have 641 * another FS that it could be delegating the call to 642 * @param start offset into the given file 643 * @param len length for which to get locations for 644 */ 645 public BlockLocation[] getFileBlockLocations(Path p, 646 long start, long len) throws IOException { 647 if (p == null) { 648 throw new NullPointerException(); 649 } 650 FileStatus file = getFileStatus(p); 651 return getFileBlockLocations(file, start, len); 652 } 653 654 /** 655 * Return a set of server default configuration values 656 * @return server default configuration values 657 * @throws IOException 658 * @deprecated use {@link #getServerDefaults(Path)} instead 659 */ 660 @Deprecated 661 public FsServerDefaults getServerDefaults() throws IOException { 662 Configuration conf = getConf(); 663 // CRC32 is chosen as default as it is available in all 664 // releases that support checksum. 665 // The client trash configuration is ignored. 666 return new FsServerDefaults(getDefaultBlockSize(), 667 conf.getInt("io.bytes.per.checksum", 512), 668 64 * 1024, 669 getDefaultReplication(), 670 conf.getInt("io.file.buffer.size", 4096), 671 false, 672 CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT, 673 DataChecksum.Type.CRC32); 674 } 675 676 /** 677 * Return a set of server default configuration values 678 * @param p path is used to identify an FS since an FS could have 679 * another FS that it could be delegating the call to 680 * @return server default configuration values 681 * @throws IOException 682 */ 683 public FsServerDefaults getServerDefaults(Path p) throws IOException { 684 return getServerDefaults(); 685 } 686 687 /** 688 * Return the fully-qualified path of path f resolving the path 689 * through any symlinks or mount point 690 * @param p path to be resolved 691 * @return fully qualified path 692 * @throws FileNotFoundException 693 */ 694 public Path resolvePath(final Path p) throws IOException { 695 checkPath(p); 696 return getFileStatus(p).getPath(); 697 } 698 699 /** 700 * Opens an FSDataInputStream at the indicated Path. 701 * @param f the file name to open 702 * @param bufferSize the size of the buffer to be used. 703 */ 704 public abstract FSDataInputStream open(Path f, int bufferSize) 705 throws IOException; 706 707 /** 708 * Opens an FSDataInputStream at the indicated Path. 709 * @param f the file to open 710 */ 711 public FSDataInputStream open(Path f) throws IOException { 712 return open(f, getConf().getInt("io.file.buffer.size", 4096)); 713 } 714 715 /** 716 * Create an FSDataOutputStream at the indicated Path. 717 * Files are overwritten by default. 718 * @param f the file to create 719 */ 720 public FSDataOutputStream create(Path f) throws IOException { 721 return create(f, true); 722 } 723 724 /** 725 * Create an FSDataOutputStream at the indicated Path. 726 * @param f the file to create 727 * @param overwrite if a file with this name already exists, then if true, 728 * the file will be overwritten, and if false an exception will be thrown. 729 */ 730 public FSDataOutputStream create(Path f, boolean overwrite) 731 throws IOException { 732 return create(f, overwrite, 733 getConf().getInt("io.file.buffer.size", 4096), 734 getDefaultReplication(f), 735 getDefaultBlockSize(f)); 736 } 737 738 /** 739 * Create an FSDataOutputStream at the indicated Path with write-progress 740 * reporting. 741 * Files are overwritten by default. 742 * @param f the file to create 743 * @param progress to report progress 744 */ 745 public FSDataOutputStream create(Path f, Progressable progress) 746 throws IOException { 747 return create(f, true, 748 getConf().getInt("io.file.buffer.size", 4096), 749 getDefaultReplication(f), 750 getDefaultBlockSize(f), progress); 751 } 752 753 /** 754 * Create an FSDataOutputStream at the indicated Path. 755 * Files are overwritten by default. 756 * @param f the file to create 757 * @param replication the replication factor 758 */ 759 public FSDataOutputStream create(Path f, short replication) 760 throws IOException { 761 return create(f, true, 762 getConf().getInt("io.file.buffer.size", 4096), 763 replication, 764 getDefaultBlockSize(f)); 765 } 766 767 /** 768 * Create an FSDataOutputStream at the indicated Path with write-progress 769 * reporting. 770 * Files are overwritten by default. 771 * @param f the file to create 772 * @param replication the replication factor 773 * @param progress to report progress 774 */ 775 public FSDataOutputStream create(Path f, short replication, 776 Progressable progress) throws IOException { 777 return create(f, true, 778 getConf().getInt("io.file.buffer.size", 4096), 779 replication, 780 getDefaultBlockSize(f), progress); 781 } 782 783 784 /** 785 * Create an FSDataOutputStream at the indicated Path. 786 * @param f the file name to create 787 * @param overwrite if a file with this name already exists, then if true, 788 * the file will be overwritten, and if false an error will be thrown. 789 * @param bufferSize the size of the buffer to be used. 790 */ 791 public FSDataOutputStream create(Path f, 792 boolean overwrite, 793 int bufferSize 794 ) throws IOException { 795 return create(f, overwrite, bufferSize, 796 getDefaultReplication(f), 797 getDefaultBlockSize(f)); 798 } 799 800 /** 801 * Create an FSDataOutputStream at the indicated Path with write-progress 802 * reporting. 803 * @param f the path of the file to open 804 * @param overwrite if a file with this name already exists, then if true, 805 * the file will be overwritten, and if false an error will be thrown. 806 * @param bufferSize the size of the buffer to be used. 807 */ 808 public FSDataOutputStream create(Path f, 809 boolean overwrite, 810 int bufferSize, 811 Progressable progress 812 ) throws IOException { 813 return create(f, overwrite, bufferSize, 814 getDefaultReplication(f), 815 getDefaultBlockSize(f), progress); 816 } 817 818 819 /** 820 * Create an FSDataOutputStream at the indicated Path. 821 * @param f the file name to open 822 * @param overwrite if a file with this name already exists, then if true, 823 * the file will be overwritten, and if false an error will be thrown. 824 * @param bufferSize the size of the buffer to be used. 825 * @param replication required block replication for the file. 826 */ 827 public FSDataOutputStream create(Path f, 828 boolean overwrite, 829 int bufferSize, 830 short replication, 831 long blockSize 832 ) throws IOException { 833 return create(f, overwrite, bufferSize, replication, blockSize, null); 834 } 835 836 /** 837 * Create an FSDataOutputStream at the indicated Path with write-progress 838 * reporting. 839 * @param f the file name to open 840 * @param overwrite if a file with this name already exists, then if true, 841 * the file will be overwritten, and if false an error will be thrown. 842 * @param bufferSize the size of the buffer to be used. 843 * @param replication required block replication for the file. 844 */ 845 public FSDataOutputStream create(Path f, 846 boolean overwrite, 847 int bufferSize, 848 short replication, 849 long blockSize, 850 Progressable progress 851 ) throws IOException { 852 return this.create(f, FsPermission.getDefault().applyUMask( 853 FsPermission.getUMask(getConf())), overwrite, bufferSize, 854 replication, blockSize, progress); 855 } 856 857 /** 858 * Create an FSDataOutputStream at the indicated Path with write-progress 859 * reporting. 860 * @param f the file name to open 861 * @param permission 862 * @param overwrite if a file with this name already exists, then if true, 863 * the file will be overwritten, and if false an error will be thrown. 864 * @param bufferSize the size of the buffer to be used. 865 * @param replication required block replication for the file. 866 * @param blockSize 867 * @param progress 868 * @throws IOException 869 * @see #setPermission(Path, FsPermission) 870 */ 871 public abstract FSDataOutputStream create(Path f, 872 FsPermission permission, 873 boolean overwrite, 874 int bufferSize, 875 short replication, 876 long blockSize, 877 Progressable progress) throws IOException; 878 879 /** 880 * Create an FSDataOutputStream at the indicated Path with write-progress 881 * reporting. 882 * @param f the file name to open 883 * @param permission 884 * @param flags {@link CreateFlag}s to use for this stream. 885 * @param bufferSize the size of the buffer to be used. 886 * @param replication required block replication for the file. 887 * @param blockSize 888 * @param progress 889 * @throws IOException 890 * @see #setPermission(Path, FsPermission) 891 */ 892 public FSDataOutputStream create(Path f, 893 FsPermission permission, 894 EnumSet<CreateFlag> flags, 895 int bufferSize, 896 short replication, 897 long blockSize, 898 Progressable progress) throws IOException { 899 return create(f, permission, flags, bufferSize, replication, 900 blockSize, progress, null); 901 } 902 903 /** 904 * Create an FSDataOutputStream at the indicated Path with a custom 905 * checksum option 906 * @param f the file name to open 907 * @param permission 908 * @param flags {@link CreateFlag}s to use for this stream. 909 * @param bufferSize the size of the buffer to be used. 910 * @param replication required block replication for the file. 911 * @param blockSize 912 * @param progress 913 * @param checksumOpt checksum parameter. If null, the values 914 * found in conf will be used. 915 * @throws IOException 916 * @see #setPermission(Path, FsPermission) 917 */ 918 public FSDataOutputStream create(Path f, 919 FsPermission permission, 920 EnumSet<CreateFlag> flags, 921 int bufferSize, 922 short replication, 923 long blockSize, 924 Progressable progress, 925 ChecksumOpt checksumOpt) throws IOException { 926 // Checksum options are ignored by default. The file systems that 927 // implement checksum need to override this method. The full 928 // support is currently only available in DFS. 929 return create(f, permission, flags.contains(CreateFlag.OVERWRITE), 930 bufferSize, replication, blockSize, progress); 931 } 932 933 /*. 934 * This create has been added to support the FileContext that processes 935 * the permission 936 * with umask before calling this method. 937 * This a temporary method added to support the transition from FileSystem 938 * to FileContext for user applications. 939 */ 940 @Deprecated 941 protected FSDataOutputStream primitiveCreate(Path f, 942 FsPermission absolutePermission, EnumSet<CreateFlag> flag, int bufferSize, 943 short replication, long blockSize, Progressable progress, 944 ChecksumOpt checksumOpt) throws IOException { 945 946 boolean pathExists = exists(f); 947 CreateFlag.validate(f, pathExists, flag); 948 949 // Default impl assumes that permissions do not matter and 950 // nor does the bytesPerChecksum hence 951 // calling the regular create is good enough. 952 // FSs that implement permissions should override this. 953 954 if (pathExists && flag.contains(CreateFlag.APPEND)) { 955 return append(f, bufferSize, progress); 956 } 957 958 return this.create(f, absolutePermission, 959 flag.contains(CreateFlag.OVERWRITE), bufferSize, replication, 960 blockSize, progress); 961 } 962 963 /** 964 * This version of the mkdirs method assumes that the permission is absolute. 965 * It has been added to support the FileContext that processes the permission 966 * with umask before calling this method. 967 * This a temporary method added to support the transition from FileSystem 968 * to FileContext for user applications. 969 */ 970 @Deprecated 971 protected boolean primitiveMkdir(Path f, FsPermission absolutePermission) 972 throws IOException { 973 // Default impl is to assume that permissions do not matter and hence 974 // calling the regular mkdirs is good enough. 975 // FSs that implement permissions should override this. 976 return this.mkdirs(f, absolutePermission); 977 } 978 979 980 /** 981 * This version of the mkdirs method assumes that the permission is absolute. 982 * It has been added to support the FileContext that processes the permission 983 * with umask before calling this method. 984 * This a temporary method added to support the transition from FileSystem 985 * to FileContext for user applications. 986 */ 987 @Deprecated 988 protected void primitiveMkdir(Path f, FsPermission absolutePermission, 989 boolean createParent) 990 throws IOException { 991 992 if (!createParent) { // parent must exist. 993 // since the this.mkdirs makes parent dirs automatically 994 // we must throw exception if parent does not exist. 995 final FileStatus stat = getFileStatus(f.getParent()); 996 if (stat == null) { 997 throw new FileNotFoundException("Missing parent:" + f); 998 } 999 if (!stat.isDirectory()) { 1000 throw new ParentNotDirectoryException("parent is not a dir"); 1001 } 1002 // parent does exist - go ahead with mkdir of leaf 1003 } 1004 // Default impl is to assume that permissions do not matter and hence 1005 // calling the regular mkdirs is good enough. 1006 // FSs that implement permissions should override this. 1007 if (!this.mkdirs(f, absolutePermission)) { 1008 throw new IOException("mkdir of "+ f + " failed"); 1009 } 1010 } 1011 1012 /** 1013 * Opens an FSDataOutputStream at the indicated Path with write-progress 1014 * reporting. Same as create(), except fails if parent directory doesn't 1015 * already exist. 1016 * @param f the file name to open 1017 * @param overwrite if a file with this name already exists, then if true, 1018 * the file will be overwritten, and if false an error will be thrown. 1019 * @param bufferSize the size of the buffer to be used. 1020 * @param replication required block replication for the file. 1021 * @param blockSize 1022 * @param progress 1023 * @throws IOException 1024 * @see #setPermission(Path, FsPermission) 1025 * @deprecated API only for 0.20-append 1026 */ 1027 @Deprecated 1028 public FSDataOutputStream createNonRecursive(Path f, 1029 boolean overwrite, 1030 int bufferSize, short replication, long blockSize, 1031 Progressable progress) throws IOException { 1032 return this.createNonRecursive(f, FsPermission.getDefault(), 1033 overwrite, bufferSize, replication, blockSize, progress); 1034 } 1035 1036 /** 1037 * Opens an FSDataOutputStream at the indicated Path with write-progress 1038 * reporting. Same as create(), except fails if parent directory doesn't 1039 * already exist. 1040 * @param f the file name to open 1041 * @param permission 1042 * @param overwrite if a file with this name already exists, then if true, 1043 * the file will be overwritten, and if false an error will be thrown. 1044 * @param bufferSize the size of the buffer to be used. 1045 * @param replication required block replication for the file. 1046 * @param blockSize 1047 * @param progress 1048 * @throws IOException 1049 * @see #setPermission(Path, FsPermission) 1050 * @deprecated API only for 0.20-append 1051 */ 1052 @Deprecated 1053 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 1054 boolean overwrite, int bufferSize, short replication, long blockSize, 1055 Progressable progress) throws IOException { 1056 return createNonRecursive(f, permission, 1057 overwrite ? EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE) 1058 : EnumSet.of(CreateFlag.CREATE), bufferSize, 1059 replication, blockSize, progress); 1060 } 1061 1062 /** 1063 * Opens an FSDataOutputStream at the indicated Path with write-progress 1064 * reporting. Same as create(), except fails if parent directory doesn't 1065 * already exist. 1066 * @param f the file name to open 1067 * @param permission 1068 * @param flags {@link CreateFlag}s to use for this stream. 1069 * @param bufferSize the size of the buffer to be used. 1070 * @param replication required block replication for the file. 1071 * @param blockSize 1072 * @param progress 1073 * @throws IOException 1074 * @see #setPermission(Path, FsPermission) 1075 * @deprecated API only for 0.20-append 1076 */ 1077 @Deprecated 1078 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 1079 EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize, 1080 Progressable progress) throws IOException { 1081 throw new IOException("createNonRecursive unsupported for this filesystem " 1082 + this.getClass()); 1083 } 1084 1085 /** 1086 * Creates the given Path as a brand-new zero-length file. If 1087 * create fails, or if it already existed, return false. 1088 * 1089 * @param f path to use for create 1090 */ 1091 public boolean createNewFile(Path f) throws IOException { 1092 if (exists(f)) { 1093 return false; 1094 } else { 1095 create(f, false, getConf().getInt("io.file.buffer.size", 4096)).close(); 1096 return true; 1097 } 1098 } 1099 1100 /** 1101 * Append to an existing file (optional operation). 1102 * Same as append(f, getConf().getInt("io.file.buffer.size", 4096), null) 1103 * @param f the existing file to be appended. 1104 * @throws IOException 1105 */ 1106 public FSDataOutputStream append(Path f) throws IOException { 1107 return append(f, getConf().getInt("io.file.buffer.size", 4096), null); 1108 } 1109 /** 1110 * Append to an existing file (optional operation). 1111 * Same as append(f, bufferSize, null). 1112 * @param f the existing file to be appended. 1113 * @param bufferSize the size of the buffer to be used. 1114 * @throws IOException 1115 */ 1116 public FSDataOutputStream append(Path f, int bufferSize) throws IOException { 1117 return append(f, bufferSize, null); 1118 } 1119 1120 /** 1121 * Append to an existing file (optional operation). 1122 * @param f the existing file to be appended. 1123 * @param bufferSize the size of the buffer to be used. 1124 * @param progress for reporting progress if it is not null. 1125 * @throws IOException 1126 */ 1127 public abstract FSDataOutputStream append(Path f, int bufferSize, 1128 Progressable progress) throws IOException; 1129 1130 /** 1131 * Get replication. 1132 * 1133 * @deprecated Use getFileStatus() instead 1134 * @param src file name 1135 * @return file replication 1136 * @throws IOException 1137 */ 1138 @Deprecated 1139 public short getReplication(Path src) throws IOException { 1140 return getFileStatus(src).getReplication(); 1141 } 1142 1143 /** 1144 * Set replication for an existing file. 1145 * 1146 * @param src file name 1147 * @param replication new replication 1148 * @throws IOException 1149 * @return true if successful; 1150 * false if file does not exist or is a directory 1151 */ 1152 public boolean setReplication(Path src, short replication) 1153 throws IOException { 1154 return true; 1155 } 1156 1157 /** 1158 * Renames Path src to Path dst. Can take place on local fs 1159 * or remote DFS. 1160 * @param src path to be renamed 1161 * @param dst new path after rename 1162 * @throws IOException on failure 1163 * @return true if rename is successful 1164 */ 1165 public abstract boolean rename(Path src, Path dst) throws IOException; 1166 1167 /** 1168 * Renames Path src to Path dst 1169 * <ul> 1170 * <li 1171 * <li>Fails if src is a file and dst is a directory. 1172 * <li>Fails if src is a directory and dst is a file. 1173 * <li>Fails if the parent of dst does not exist or is a file. 1174 * </ul> 1175 * <p> 1176 * If OVERWRITE option is not passed as an argument, rename fails 1177 * if the dst already exists. 1178 * <p> 1179 * If OVERWRITE option is passed as an argument, rename overwrites 1180 * the dst if it is a file or an empty directory. Rename fails if dst is 1181 * a non-empty directory. 1182 * <p> 1183 * Note that atomicity of rename is dependent on the file system 1184 * implementation. Please refer to the file system documentation for 1185 * details. This default implementation is non atomic. 1186 * <p> 1187 * This method is deprecated since it is a temporary method added to 1188 * support the transition from FileSystem to FileContext for user 1189 * applications. 1190 * 1191 * @param src path to be renamed 1192 * @param dst new path after rename 1193 * @throws IOException on failure 1194 */ 1195 @Deprecated 1196 protected void rename(final Path src, final Path dst, 1197 final Rename... options) throws IOException { 1198 // Default implementation 1199 final FileStatus srcStatus = getFileStatus(src); 1200 if (srcStatus == null) { 1201 throw new FileNotFoundException("rename source " + src + " not found."); 1202 } 1203 1204 boolean overwrite = false; 1205 if (null != options) { 1206 for (Rename option : options) { 1207 if (option == Rename.OVERWRITE) { 1208 overwrite = true; 1209 } 1210 } 1211 } 1212 1213 FileStatus dstStatus; 1214 try { 1215 dstStatus = getFileStatus(dst); 1216 } catch (IOException e) { 1217 dstStatus = null; 1218 } 1219 if (dstStatus != null) { 1220 if (srcStatus.isDirectory() != dstStatus.isDirectory()) { 1221 throw new IOException("Source " + src + " Destination " + dst 1222 + " both should be either file or directory"); 1223 } 1224 if (!overwrite) { 1225 throw new FileAlreadyExistsException("rename destination " + dst 1226 + " already exists."); 1227 } 1228 // Delete the destination that is a file or an empty directory 1229 if (dstStatus.isDirectory()) { 1230 FileStatus[] list = listStatus(dst); 1231 if (list != null && list.length != 0) { 1232 throw new IOException( 1233 "rename cannot overwrite non empty destination directory " + dst); 1234 } 1235 } 1236 delete(dst, false); 1237 } else { 1238 final Path parent = dst.getParent(); 1239 final FileStatus parentStatus = getFileStatus(parent); 1240 if (parentStatus == null) { 1241 throw new FileNotFoundException("rename destination parent " + parent 1242 + " not found."); 1243 } 1244 if (!parentStatus.isDirectory()) { 1245 throw new ParentNotDirectoryException("rename destination parent " + parent 1246 + " is a file."); 1247 } 1248 } 1249 if (!rename(src, dst)) { 1250 throw new IOException("rename from " + src + " to " + dst + " failed."); 1251 } 1252 } 1253 1254 /** 1255 * Delete a file 1256 * @deprecated Use {@link #delete(Path, boolean)} instead. 1257 */ 1258 @Deprecated 1259 public boolean delete(Path f) throws IOException { 1260 return delete(f, true); 1261 } 1262 1263 /** Delete a file. 1264 * 1265 * @param f the path to delete. 1266 * @param recursive if path is a directory and set to 1267 * true, the directory is deleted else throws an exception. In 1268 * case of a file the recursive can be set to either true or false. 1269 * @return true if delete is successful else false. 1270 * @throws IOException 1271 */ 1272 public abstract boolean delete(Path f, boolean recursive) throws IOException; 1273 1274 /** 1275 * Mark a path to be deleted when FileSystem is closed. 1276 * When the JVM shuts down, 1277 * all FileSystem objects will be closed automatically. 1278 * Then, 1279 * the marked path will be deleted as a result of closing the FileSystem. 1280 * 1281 * The path has to exist in the file system. 1282 * 1283 * @param f the path to delete. 1284 * @return true if deleteOnExit is successful, otherwise false. 1285 * @throws IOException 1286 */ 1287 public boolean deleteOnExit(Path f) throws IOException { 1288 if (!exists(f)) { 1289 return false; 1290 } 1291 synchronized (deleteOnExit) { 1292 deleteOnExit.add(f); 1293 } 1294 return true; 1295 } 1296 1297 /** 1298 * Cancel the deletion of the path when the FileSystem is closed 1299 * @param f the path to cancel deletion 1300 */ 1301 public boolean cancelDeleteOnExit(Path f) { 1302 synchronized (deleteOnExit) { 1303 return deleteOnExit.remove(f); 1304 } 1305 } 1306 1307 /** 1308 * Delete all files that were marked as delete-on-exit. This recursively 1309 * deletes all files in the specified paths. 1310 */ 1311 protected void processDeleteOnExit() { 1312 synchronized (deleteOnExit) { 1313 for (Iterator<Path> iter = deleteOnExit.iterator(); iter.hasNext();) { 1314 Path path = iter.next(); 1315 try { 1316 if (exists(path)) { 1317 delete(path, true); 1318 } 1319 } 1320 catch (IOException e) { 1321 LOG.info("Ignoring failure to deleteOnExit for path " + path); 1322 } 1323 iter.remove(); 1324 } 1325 } 1326 } 1327 1328 /** Check if exists. 1329 * @param f source file 1330 */ 1331 public boolean exists(Path f) throws IOException { 1332 try { 1333 return getFileStatus(f) != null; 1334 } catch (FileNotFoundException e) { 1335 return false; 1336 } 1337 } 1338 1339 /** True iff the named path is a directory. 1340 * Note: Avoid using this method. Instead reuse the FileStatus 1341 * returned by getFileStatus() or listStatus() methods. 1342 * @param f path to check 1343 */ 1344 public boolean isDirectory(Path f) throws IOException { 1345 try { 1346 return getFileStatus(f).isDirectory(); 1347 } catch (FileNotFoundException e) { 1348 return false; // f does not exist 1349 } 1350 } 1351 1352 /** True iff the named path is a regular file. 1353 * Note: Avoid using this method. Instead reuse the FileStatus 1354 * returned by getFileStatus() or listStatus() methods. 1355 * @param f path to check 1356 */ 1357 public boolean isFile(Path f) throws IOException { 1358 try { 1359 return getFileStatus(f).isFile(); 1360 } catch (FileNotFoundException e) { 1361 return false; // f does not exist 1362 } 1363 } 1364 1365 /** The number of bytes in a file. */ 1366 /** @deprecated Use getFileStatus() instead */ 1367 @Deprecated 1368 public long getLength(Path f) throws IOException { 1369 return getFileStatus(f).getLen(); 1370 } 1371 1372 /** Return the {@link ContentSummary} of a given {@link Path}. 1373 * @param f path to use 1374 */ 1375 public ContentSummary getContentSummary(Path f) throws IOException { 1376 FileStatus status = getFileStatus(f); 1377 if (status.isFile()) { 1378 // f is a file 1379 return new ContentSummary(status.getLen(), 1, 0); 1380 } 1381 // f is a directory 1382 long[] summary = {0, 0, 1}; 1383 for(FileStatus s : listStatus(f)) { 1384 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : 1385 new ContentSummary(s.getLen(), 1, 0); 1386 summary[0] += c.getLength(); 1387 summary[1] += c.getFileCount(); 1388 summary[2] += c.getDirectoryCount(); 1389 } 1390 return new ContentSummary(summary[0], summary[1], summary[2]); 1391 } 1392 1393 final private static PathFilter DEFAULT_FILTER = new PathFilter() { 1394 @Override 1395 public boolean accept(Path file) { 1396 return true; 1397 } 1398 }; 1399 1400 /** 1401 * List the statuses of the files/directories in the given path if the path is 1402 * a directory. 1403 * 1404 * @param f given path 1405 * @return the statuses of the files/directories in the given patch 1406 * @throws FileNotFoundException when the path does not exist; 1407 * IOException see specific implementation 1408 */ 1409 public abstract FileStatus[] listStatus(Path f) throws FileNotFoundException, 1410 IOException; 1411 1412 /* 1413 * Filter files/directories in the given path using the user-supplied path 1414 * filter. Results are added to the given array <code>results</code>. 1415 */ 1416 private void listStatus(ArrayList<FileStatus> results, Path f, 1417 PathFilter filter) throws FileNotFoundException, IOException { 1418 FileStatus listing[] = listStatus(f); 1419 if (listing == null) { 1420 throw new IOException("Error accessing " + f); 1421 } 1422 1423 for (int i = 0; i < listing.length; i++) { 1424 if (filter.accept(listing[i].getPath())) { 1425 results.add(listing[i]); 1426 } 1427 } 1428 } 1429 1430 /** 1431 * @return an iterator over the corrupt files under the given path 1432 * (may contain duplicates if a file has more than one corrupt block) 1433 * @throws IOException 1434 */ 1435 public RemoteIterator<Path> listCorruptFileBlocks(Path path) 1436 throws IOException { 1437 throw new UnsupportedOperationException(getClass().getCanonicalName() + 1438 " does not support" + 1439 " listCorruptFileBlocks"); 1440 } 1441 1442 /** 1443 * Filter files/directories in the given path using the user-supplied path 1444 * filter. 1445 * 1446 * @param f 1447 * a path name 1448 * @param filter 1449 * the user-supplied path filter 1450 * @return an array of FileStatus objects for the files under the given path 1451 * after applying the filter 1452 * @throws FileNotFoundException when the path does not exist; 1453 * IOException see specific implementation 1454 */ 1455 public FileStatus[] listStatus(Path f, PathFilter filter) 1456 throws FileNotFoundException, IOException { 1457 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1458 listStatus(results, f, filter); 1459 return results.toArray(new FileStatus[results.size()]); 1460 } 1461 1462 /** 1463 * Filter files/directories in the given list of paths using default 1464 * path filter. 1465 * 1466 * @param files 1467 * a list of paths 1468 * @return a list of statuses for the files under the given paths after 1469 * applying the filter default Path filter 1470 * @throws FileNotFoundException when the path does not exist; 1471 * IOException see specific implementation 1472 */ 1473 public FileStatus[] listStatus(Path[] files) 1474 throws FileNotFoundException, IOException { 1475 return listStatus(files, DEFAULT_FILTER); 1476 } 1477 1478 /** 1479 * Filter files/directories in the given list of paths using user-supplied 1480 * path filter. 1481 * 1482 * @param files 1483 * a list of paths 1484 * @param filter 1485 * the user-supplied path filter 1486 * @return a list of statuses for the files under the given paths after 1487 * applying the filter 1488 * @throws FileNotFoundException when the path does not exist; 1489 * IOException see specific implementation 1490 */ 1491 public FileStatus[] listStatus(Path[] files, PathFilter filter) 1492 throws FileNotFoundException, IOException { 1493 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1494 for (int i = 0; i < files.length; i++) { 1495 listStatus(results, files[i], filter); 1496 } 1497 return results.toArray(new FileStatus[results.size()]); 1498 } 1499 1500 /** 1501 * <p>Return all the files that match filePattern and are not checksum 1502 * files. Results are sorted by their names. 1503 * 1504 * <p> 1505 * A filename pattern is composed of <i>regular</i> characters and 1506 * <i>special pattern matching</i> characters, which are: 1507 * 1508 * <dl> 1509 * <dd> 1510 * <dl> 1511 * <p> 1512 * <dt> <tt> ? </tt> 1513 * <dd> Matches any single character. 1514 * 1515 * <p> 1516 * <dt> <tt> * </tt> 1517 * <dd> Matches zero or more characters. 1518 * 1519 * <p> 1520 * <dt> <tt> [<i>abc</i>] </tt> 1521 * <dd> Matches a single character from character set 1522 * <tt>{<i>a,b,c</i>}</tt>. 1523 * 1524 * <p> 1525 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt> 1526 * <dd> Matches a single character from the character range 1527 * <tt>{<i>a...b</i>}</tt>. Note that character <tt><i>a</i></tt> must be 1528 * lexicographically less than or equal to character <tt><i>b</i></tt>. 1529 * 1530 * <p> 1531 * <dt> <tt> [^<i>a</i>] </tt> 1532 * <dd> Matches a single character that is not from character set or range 1533 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur 1534 * immediately to the right of the opening bracket. 1535 * 1536 * <p> 1537 * <dt> <tt> \<i>c</i> </tt> 1538 * <dd> Removes (escapes) any special meaning of character <i>c</i>. 1539 * 1540 * <p> 1541 * <dt> <tt> {ab,cd} </tt> 1542 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> 1543 * 1544 * <p> 1545 * <dt> <tt> {ab,c{de,fh}} </tt> 1546 * <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt> 1547 * 1548 * </dl> 1549 * </dd> 1550 * </dl> 1551 * 1552 * @param pathPattern a regular expression specifying a pth pattern 1553 1554 * @return an array of paths that match the path pattern 1555 * @throws IOException 1556 */ 1557 public FileStatus[] globStatus(Path pathPattern) throws IOException { 1558 return globStatus(pathPattern, DEFAULT_FILTER); 1559 } 1560 1561 /** 1562 * Return an array of FileStatus objects whose path names match pathPattern 1563 * and is accepted by the user-supplied path filter. Results are sorted by 1564 * their path names. 1565 * Return null if pathPattern has no glob and the path does not exist. 1566 * Return an empty array if pathPattern has a glob and no path matches it. 1567 * 1568 * @param pathPattern 1569 * a regular expression specifying the path pattern 1570 * @param filter 1571 * a user-supplied path filter 1572 * @return an array of FileStatus objects 1573 * @throws IOException if any I/O error occurs when fetching file status 1574 */ 1575 public FileStatus[] globStatus(Path pathPattern, PathFilter filter) 1576 throws IOException { 1577 String filename = pathPattern.toUri().getPath(); 1578 List<String> filePatterns = GlobExpander.expand(filename); 1579 if (filePatterns.size() == 1) { 1580 return globStatusInternal(pathPattern, filter); 1581 } else { 1582 List<FileStatus> results = new ArrayList<FileStatus>(); 1583 for (String filePattern : filePatterns) { 1584 FileStatus[] files = globStatusInternal(new Path(filePattern), filter); 1585 for (FileStatus file : files) { 1586 results.add(file); 1587 } 1588 } 1589 return results.toArray(new FileStatus[results.size()]); 1590 } 1591 } 1592 1593 private FileStatus[] globStatusInternal(Path pathPattern, PathFilter filter) 1594 throws IOException { 1595 Path[] parents = new Path[1]; 1596 int level = 0; 1597 String filename = pathPattern.toUri().getPath(); 1598 1599 // path has only zero component 1600 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) { 1601 return getFileStatus(new Path[]{pathPattern}); 1602 } 1603 1604 // path has at least one component 1605 String[] components = filename.split(Path.SEPARATOR); 1606 // get the first component 1607 if (pathPattern.isAbsolute()) { 1608 parents[0] = new Path(Path.SEPARATOR); 1609 level = 1; 1610 } else { 1611 parents[0] = new Path(Path.CUR_DIR); 1612 } 1613 1614 // glob the paths that match the parent path, i.e., [0, components.length-1] 1615 boolean[] hasGlob = new boolean[]{false}; 1616 Path[] parentPaths = globPathsLevel(parents, components, level, hasGlob); 1617 FileStatus[] results; 1618 if (parentPaths == null || parentPaths.length == 0) { 1619 results = null; 1620 } else { 1621 // Now work on the last component of the path 1622 GlobFilter fp = new GlobFilter(components[components.length - 1], filter); 1623 if (fp.hasPattern()) { // last component has a pattern 1624 // list parent directories and then glob the results 1625 try { 1626 results = listStatus(parentPaths, fp); 1627 } catch (FileNotFoundException e) { 1628 results = null; 1629 } 1630 hasGlob[0] = true; 1631 } else { // last component does not have a pattern 1632 // remove the quoting of metachars in a non-regexp expansion 1633 String name = unquotePathComponent(components[components.length - 1]); 1634 // get all the path names 1635 ArrayList<Path> filteredPaths = new ArrayList<Path>(parentPaths.length); 1636 for (int i = 0; i < parentPaths.length; i++) { 1637 parentPaths[i] = new Path(parentPaths[i], name); 1638 if (fp.accept(parentPaths[i])) { 1639 filteredPaths.add(parentPaths[i]); 1640 } 1641 } 1642 // get all their statuses 1643 results = getFileStatus( 1644 filteredPaths.toArray(new Path[filteredPaths.size()])); 1645 } 1646 } 1647 1648 // Decide if the pathPattern contains a glob or not 1649 if (results == null) { 1650 if (hasGlob[0]) { 1651 results = new FileStatus[0]; 1652 } 1653 } else { 1654 if (results.length == 0 ) { 1655 if (!hasGlob[0]) { 1656 results = null; 1657 } 1658 } else { 1659 Arrays.sort(results); 1660 } 1661 } 1662 return results; 1663 } 1664 1665 /* 1666 * For a path of N components, return a list of paths that match the 1667 * components [<code>level</code>, <code>N-1</code>]. 1668 */ 1669 private Path[] globPathsLevel(Path[] parents, String[] filePattern, 1670 int level, boolean[] hasGlob) throws IOException { 1671 if (level == filePattern.length - 1) 1672 return parents; 1673 if (parents == null || parents.length == 0) { 1674 return null; 1675 } 1676 GlobFilter fp = new GlobFilter(filePattern[level]); 1677 if (fp.hasPattern()) { 1678 try { 1679 parents = FileUtil.stat2Paths(listStatus(parents, fp)); 1680 } catch (FileNotFoundException e) { 1681 parents = null; 1682 } 1683 hasGlob[0] = true; 1684 } else { // the component does not have a pattern 1685 // remove the quoting of metachars in a non-regexp expansion 1686 String name = unquotePathComponent(filePattern[level]); 1687 for (int i = 0; i < parents.length; i++) { 1688 parents[i] = new Path(parents[i], name); 1689 } 1690 } 1691 return globPathsLevel(parents, filePattern, level + 1, hasGlob); 1692 } 1693 1694 /** 1695 * The glob filter builds a regexp per path component. If the component 1696 * does not contain a shell metachar, then it falls back to appending the 1697 * raw string to the list of built up paths. This raw path needs to have 1698 * the quoting removed. Ie. convert all occurances of "\X" to "X" 1699 * @param name of the path component 1700 * @return the unquoted path component 1701 */ 1702 private String unquotePathComponent(String name) { 1703 return name.replaceAll("\\\\(.)", "$1"); 1704 } 1705 1706 /** 1707 * List the statuses of the files/directories in the given path if the path is 1708 * a directory. 1709 * Return the file's status and block locations If the path is a file. 1710 * 1711 * If a returned status is a file, it contains the file's block locations. 1712 * 1713 * @param f is the path 1714 * 1715 * @return an iterator that traverses statuses of the files/directories 1716 * in the given path 1717 * 1718 * @throws FileNotFoundException If <code>f</code> does not exist 1719 * @throws IOException If an I/O error occurred 1720 */ 1721 public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f) 1722 throws FileNotFoundException, IOException { 1723 return listLocatedStatus(f, DEFAULT_FILTER); 1724 } 1725 1726 /** 1727 * Listing a directory 1728 * The returned results include its block location if it is a file 1729 * The results are filtered by the given path filter 1730 * @param f a path 1731 * @param filter a path filter 1732 * @return an iterator that traverses statuses of the files/directories 1733 * in the given path 1734 * @throws FileNotFoundException if <code>f</code> does not exist 1735 * @throws IOException if any I/O error occurred 1736 */ 1737 protected RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f, 1738 final PathFilter filter) 1739 throws FileNotFoundException, IOException { 1740 return new RemoteIterator<LocatedFileStatus>() { 1741 private final FileStatus[] stats = listStatus(f, filter); 1742 private int i = 0; 1743 1744 @Override 1745 public boolean hasNext() { 1746 return i<stats.length; 1747 } 1748 1749 @Override 1750 public LocatedFileStatus next() throws IOException { 1751 if (!hasNext()) { 1752 throw new NoSuchElementException("No more entry in " + f); 1753 } 1754 FileStatus result = stats[i++]; 1755 BlockLocation[] locs = result.isFile() ? 1756 getFileBlockLocations(result.getPath(), 0, result.getLen()) : 1757 null; 1758 return new LocatedFileStatus(result, locs); 1759 } 1760 }; 1761 } 1762 1763 /** 1764 * List the statuses and block locations of the files in the given path. 1765 * 1766 * If the path is a directory, 1767 * if recursive is false, returns files in the directory; 1768 * if recursive is true, return files in the subtree rooted at the path. 1769 * If the path is a file, return the file's status and block locations. 1770 * 1771 * @param f is the path 1772 * @param recursive if the subdirectories need to be traversed recursively 1773 * 1774 * @return an iterator that traverses statuses of the files 1775 * 1776 * @throws FileNotFoundException when the path does not exist; 1777 * IOException see specific implementation 1778 */ 1779 public RemoteIterator<LocatedFileStatus> listFiles( 1780 final Path f, final boolean recursive) 1781 throws FileNotFoundException, IOException { 1782 return new RemoteIterator<LocatedFileStatus>() { 1783 private Stack<RemoteIterator<LocatedFileStatus>> itors = 1784 new Stack<RemoteIterator<LocatedFileStatus>>(); 1785 private RemoteIterator<LocatedFileStatus> curItor = 1786 listLocatedStatus(f); 1787 private LocatedFileStatus curFile; 1788 1789 @Override 1790 public boolean hasNext() throws IOException { 1791 while (curFile == null) { 1792 if (curItor.hasNext()) { 1793 handleFileStat(curItor.next()); 1794 } else if (!itors.empty()) { 1795 curItor = itors.pop(); 1796 } else { 1797 return false; 1798 } 1799 } 1800 return true; 1801 } 1802 1803 /** 1804 * Process the input stat. 1805 * If it is a file, return the file stat. 1806 * If it is a directory, traverse the directory if recursive is true; 1807 * ignore it if recursive is false. 1808 * @param stat input status 1809 * @throws IOException if any IO error occurs 1810 */ 1811 private void handleFileStat(LocatedFileStatus stat) throws IOException { 1812 if (stat.isFile()) { // file 1813 curFile = stat; 1814 } else if (recursive) { // directory 1815 itors.push(curItor); 1816 curItor = listLocatedStatus(stat.getPath()); 1817 } 1818 } 1819 1820 @Override 1821 public LocatedFileStatus next() throws IOException { 1822 if (hasNext()) { 1823 LocatedFileStatus result = curFile; 1824 curFile = null; 1825 return result; 1826 } 1827 throw new java.util.NoSuchElementException("No more entry in " + f); 1828 } 1829 }; 1830 } 1831 1832 /** Return the current user's home directory in this filesystem. 1833 * The default implementation returns "/user/$USER/". 1834 */ 1835 public Path getHomeDirectory() { 1836 return this.makeQualified( 1837 new Path("/user/"+System.getProperty("user.name"))); 1838 } 1839 1840 1841 /** 1842 * Set the current working directory for the given file system. All relative 1843 * paths will be resolved relative to it. 1844 * 1845 * @param new_dir 1846 */ 1847 public abstract void setWorkingDirectory(Path new_dir); 1848 1849 /** 1850 * Get the current working directory for the given file system 1851 * @return the directory pathname 1852 */ 1853 public abstract Path getWorkingDirectory(); 1854 1855 1856 /** 1857 * Note: with the new FilesContext class, getWorkingDirectory() 1858 * will be removed. 1859 * The working directory is implemented in FilesContext. 1860 * 1861 * Some file systems like LocalFileSystem have an initial workingDir 1862 * that we use as the starting workingDir. For other file systems 1863 * like HDFS there is no built in notion of an inital workingDir. 1864 * 1865 * @return if there is built in notion of workingDir then it 1866 * is returned; else a null is returned. 1867 */ 1868 protected Path getInitialWorkingDirectory() { 1869 return null; 1870 } 1871 1872 /** 1873 * Call {@link #mkdirs(Path, FsPermission)} with default permission. 1874 */ 1875 public boolean mkdirs(Path f) throws IOException { 1876 return mkdirs(f, FsPermission.getDefault()); 1877 } 1878 1879 /** 1880 * Make the given file and all non-existent parents into 1881 * directories. Has the semantics of Unix 'mkdir -p'. 1882 * Existence of the directory hierarchy is not an error. 1883 * @param f path to create 1884 * @param permission to apply to f 1885 */ 1886 public abstract boolean mkdirs(Path f, FsPermission permission 1887 ) throws IOException; 1888 1889 /** 1890 * The src file is on the local disk. Add it to FS at 1891 * the given dst name and the source is kept intact afterwards 1892 * @param src path 1893 * @param dst path 1894 */ 1895 public void copyFromLocalFile(Path src, Path dst) 1896 throws IOException { 1897 copyFromLocalFile(false, src, dst); 1898 } 1899 1900 /** 1901 * The src files is on the local disk. Add it to FS at 1902 * the given dst name, removing the source afterwards. 1903 * @param srcs path 1904 * @param dst path 1905 */ 1906 public void moveFromLocalFile(Path[] srcs, Path dst) 1907 throws IOException { 1908 copyFromLocalFile(true, true, srcs, dst); 1909 } 1910 1911 /** 1912 * The src file is on the local disk. Add it to FS at 1913 * the given dst name, removing the source afterwards. 1914 * @param src path 1915 * @param dst path 1916 */ 1917 public void moveFromLocalFile(Path src, Path dst) 1918 throws IOException { 1919 copyFromLocalFile(true, src, dst); 1920 } 1921 1922 /** 1923 * The src file is on the local disk. Add it to FS at 1924 * the given dst name. 1925 * delSrc indicates if the source should be removed 1926 * @param delSrc whether to delete the src 1927 * @param src path 1928 * @param dst path 1929 */ 1930 public void copyFromLocalFile(boolean delSrc, Path src, Path dst) 1931 throws IOException { 1932 copyFromLocalFile(delSrc, true, src, dst); 1933 } 1934 1935 /** 1936 * The src files are on the local disk. Add it to FS at 1937 * the given dst name. 1938 * delSrc indicates if the source should be removed 1939 * @param delSrc whether to delete the src 1940 * @param overwrite whether to overwrite an existing file 1941 * @param srcs array of paths which are source 1942 * @param dst path 1943 */ 1944 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 1945 Path[] srcs, Path dst) 1946 throws IOException { 1947 Configuration conf = getConf(); 1948 FileUtil.copy(getLocal(conf), srcs, this, dst, delSrc, overwrite, conf); 1949 } 1950 1951 /** 1952 * The src file is on the local disk. Add it to FS at 1953 * the given dst name. 1954 * delSrc indicates if the source should be removed 1955 * @param delSrc whether to delete the src 1956 * @param overwrite whether to overwrite an existing file 1957 * @param src path 1958 * @param dst path 1959 */ 1960 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 1961 Path src, Path dst) 1962 throws IOException { 1963 Configuration conf = getConf(); 1964 FileUtil.copy(getLocal(conf), src, this, dst, delSrc, overwrite, conf); 1965 } 1966 1967 /** 1968 * The src file is under FS, and the dst is on the local disk. 1969 * Copy it from FS control to the local dst name. 1970 * @param src path 1971 * @param dst path 1972 */ 1973 public void copyToLocalFile(Path src, Path dst) throws IOException { 1974 copyToLocalFile(false, src, dst); 1975 } 1976 1977 /** 1978 * The src file is under FS, and the dst is on the local disk. 1979 * Copy it from FS control to the local dst name. 1980 * Remove the source afterwards 1981 * @param src path 1982 * @param dst path 1983 */ 1984 public void moveToLocalFile(Path src, Path dst) throws IOException { 1985 copyToLocalFile(true, src, dst); 1986 } 1987 1988 /** 1989 * The src file is under FS, and the dst is on the local disk. 1990 * Copy it from FS control to the local dst name. 1991 * delSrc indicates if the src will be removed or not. 1992 * @param delSrc whether to delete the src 1993 * @param src path 1994 * @param dst path 1995 */ 1996 public void copyToLocalFile(boolean delSrc, Path src, Path dst) 1997 throws IOException { 1998 copyToLocalFile(delSrc, src, dst, false); 1999 } 2000 2001 /** 2002 * The src file is under FS, and the dst is on the local disk. Copy it from FS 2003 * control to the local dst name. delSrc indicates if the src will be removed 2004 * or not. useRawLocalFileSystem indicates whether to use RawLocalFileSystem 2005 * as local file system or not. RawLocalFileSystem is non crc file system.So, 2006 * It will not create any crc files at local. 2007 * 2008 * @param delSrc 2009 * whether to delete the src 2010 * @param src 2011 * path 2012 * @param dst 2013 * path 2014 * @param useRawLocalFileSystem 2015 * whether to use RawLocalFileSystem as local file system or not. 2016 * 2017 * @throws IOException 2018 * - if any IO error 2019 */ 2020 public void copyToLocalFile(boolean delSrc, Path src, Path dst, 2021 boolean useRawLocalFileSystem) throws IOException { 2022 Configuration conf = getConf(); 2023 FileSystem local = null; 2024 if (useRawLocalFileSystem) { 2025 local = getLocal(conf).getRawFileSystem(); 2026 } else { 2027 local = getLocal(conf); 2028 } 2029 FileUtil.copy(this, src, local, dst, delSrc, conf); 2030 } 2031 2032 /** 2033 * Returns a local File that the user can write output to. The caller 2034 * provides both the eventual FS target name and the local working 2035 * file. If the FS is local, we write directly into the target. If 2036 * the FS is remote, we write into the tmp local area. 2037 * @param fsOutputFile path of output file 2038 * @param tmpLocalFile path of local tmp file 2039 */ 2040 public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 2041 throws IOException { 2042 return tmpLocalFile; 2043 } 2044 2045 /** 2046 * Called when we're all done writing to the target. A local FS will 2047 * do nothing, because we've written to exactly the right place. A remote 2048 * FS will copy the contents of tmpLocalFile to the correct target at 2049 * fsOutputFile. 2050 * @param fsOutputFile path of output file 2051 * @param tmpLocalFile path to local tmp file 2052 */ 2053 public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 2054 throws IOException { 2055 moveFromLocalFile(tmpLocalFile, fsOutputFile); 2056 } 2057 2058 /** 2059 * No more filesystem operations are needed. Will 2060 * release any held locks. 2061 */ 2062 @Override 2063 public void close() throws IOException { 2064 // delete all files that were marked as delete-on-exit. 2065 processDeleteOnExit(); 2066 CACHE.remove(this.key, this); 2067 } 2068 2069 /** Return the total size of all files in the filesystem.*/ 2070 public long getUsed() throws IOException{ 2071 long used = 0; 2072 FileStatus[] files = listStatus(new Path("/")); 2073 for(FileStatus file:files){ 2074 used += file.getLen(); 2075 } 2076 return used; 2077 } 2078 2079 /** 2080 * Get the block size for a particular file. 2081 * @param f the filename 2082 * @return the number of bytes in a block 2083 */ 2084 /** @deprecated Use getFileStatus() instead */ 2085 @Deprecated 2086 public long getBlockSize(Path f) throws IOException { 2087 return getFileStatus(f).getBlockSize(); 2088 } 2089 2090 /** 2091 * Return the number of bytes that large input files should be optimally 2092 * be split into to minimize i/o time. 2093 * @deprecated use {@link #getDefaultBlockSize(Path)} instead 2094 */ 2095 @Deprecated 2096 public long getDefaultBlockSize() { 2097 // default to 32MB: large enough to minimize the impact of seeks 2098 return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024); 2099 } 2100 2101 /** Return the number of bytes that large input files should be optimally 2102 * be split into to minimize i/o time. The given path will be used to 2103 * locate the actual filesystem. The full path does not have to exist. 2104 * @param f path of file 2105 * @return the default block size for the path's filesystem 2106 */ 2107 public long getDefaultBlockSize(Path f) { 2108 return getDefaultBlockSize(); 2109 } 2110 2111 /** 2112 * Get the default replication. 2113 * @deprecated use {@link #getDefaultReplication(Path)} instead 2114 */ 2115 @Deprecated 2116 public short getDefaultReplication() { return 1; } 2117 2118 /** 2119 * Get the default replication for a path. The given path will be used to 2120 * locate the actual filesystem. The full path does not have to exist. 2121 * @param path of the file 2122 * @return default replication for the path's filesystem 2123 */ 2124 public short getDefaultReplication(Path path) { 2125 return getDefaultReplication(); 2126 } 2127 2128 /** 2129 * Return a file status object that represents the path. 2130 * @param f The path we want information from 2131 * @return a FileStatus object 2132 * @throws FileNotFoundException when the path does not exist; 2133 * IOException see specific implementation 2134 */ 2135 public abstract FileStatus getFileStatus(Path f) throws IOException; 2136 2137 /** 2138 * Get the checksum of a file. 2139 * 2140 * @param f The file path 2141 * @return The file checksum. The default return value is null, 2142 * which indicates that no checksum algorithm is implemented 2143 * in the corresponding FileSystem. 2144 */ 2145 public FileChecksum getFileChecksum(Path f) throws IOException { 2146 return null; 2147 } 2148 2149 /** 2150 * Set the verify checksum flag. This is only applicable if the 2151 * corresponding FileSystem supports checksum. By default doesn't do anything. 2152 * @param verifyChecksum 2153 */ 2154 public void setVerifyChecksum(boolean verifyChecksum) { 2155 //doesn't do anything 2156 } 2157 2158 /** 2159 * Set the write checksum flag. This is only applicable if the 2160 * corresponding FileSystem supports checksum. By default doesn't do anything. 2161 * @param writeChecksum 2162 */ 2163 public void setWriteChecksum(boolean writeChecksum) { 2164 //doesn't do anything 2165 } 2166 2167 /** 2168 * Return a list of file status objects that corresponds to the list of paths 2169 * excluding those non-existent paths. 2170 * 2171 * @param paths 2172 * the list of paths we want information from 2173 * @return a list of FileStatus objects 2174 * @throws IOException 2175 * see specific implementation 2176 */ 2177 private FileStatus[] getFileStatus(Path[] paths) throws IOException { 2178 if (paths == null) { 2179 return null; 2180 } 2181 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length); 2182 for (int i = 0; i < paths.length; i++) { 2183 try { 2184 results.add(getFileStatus(paths[i])); 2185 } catch (FileNotFoundException e) { // do nothing 2186 } 2187 } 2188 return results.toArray(new FileStatus[results.size()]); 2189 } 2190 2191 /** 2192 * Returns a status object describing the use and capacity of the 2193 * file system. If the file system has multiple partitions, the 2194 * use and capacity of the root partition is reflected. 2195 * 2196 * @return a FsStatus object 2197 * @throws IOException 2198 * see specific implementation 2199 */ 2200 public FsStatus getStatus() throws IOException { 2201 return getStatus(null); 2202 } 2203 2204 /** 2205 * Returns a status object describing the use and capacity of the 2206 * file system. If the file system has multiple partitions, the 2207 * use and capacity of the partition pointed to by the specified 2208 * path is reflected. 2209 * @param p Path for which status should be obtained. null means 2210 * the default partition. 2211 * @return a FsStatus object 2212 * @throws IOException 2213 * see specific implementation 2214 */ 2215 public FsStatus getStatus(Path p) throws IOException { 2216 return new FsStatus(Long.MAX_VALUE, 0, Long.MAX_VALUE); 2217 } 2218 2219 /** 2220 * Set permission of a path. 2221 * @param p 2222 * @param permission 2223 */ 2224 public void setPermission(Path p, FsPermission permission 2225 ) throws IOException { 2226 } 2227 2228 /** 2229 * Set owner of a path (i.e. a file or a directory). 2230 * The parameters username and groupname cannot both be null. 2231 * @param p The path 2232 * @param username If it is null, the original username remains unchanged. 2233 * @param groupname If it is null, the original groupname remains unchanged. 2234 */ 2235 public void setOwner(Path p, String username, String groupname 2236 ) throws IOException { 2237 } 2238 2239 /** 2240 * Set access time of a file 2241 * @param p The path 2242 * @param mtime Set the modification time of this file. 2243 * The number of milliseconds since Jan 1, 1970. 2244 * A value of -1 means that this call should not set modification time. 2245 * @param atime Set the access time of this file. 2246 * The number of milliseconds since Jan 1, 1970. 2247 * A value of -1 means that this call should not set access time. 2248 */ 2249 public void setTimes(Path p, long mtime, long atime 2250 ) throws IOException { 2251 } 2252 2253 // making it volatile to be able to do a double checked locking 2254 private volatile static boolean FILE_SYSTEMS_LOADED = false; 2255 2256 private static final Map<String, Class<? extends FileSystem>> 2257 SERVICE_FILE_SYSTEMS = new HashMap<String, Class<? extends FileSystem>>(); 2258 2259 private static void loadFileSystems() { 2260 synchronized (FileSystem.class) { 2261 if (!FILE_SYSTEMS_LOADED) { 2262 ServiceLoader<FileSystem> serviceLoader = ServiceLoader.load(FileSystem.class); 2263 for (FileSystem fs : serviceLoader) { 2264 SERVICE_FILE_SYSTEMS.put(fs.getScheme(), fs.getClass()); 2265 } 2266 FILE_SYSTEMS_LOADED = true; 2267 } 2268 } 2269 } 2270 2271 public static Class<? extends FileSystem> getFileSystemClass(String scheme, 2272 Configuration conf) throws IOException { 2273 if (!FILE_SYSTEMS_LOADED) { 2274 loadFileSystems(); 2275 } 2276 Class<? extends FileSystem> clazz = null; 2277 if (conf != null) { 2278 clazz = (Class<? extends FileSystem>) conf.getClass("fs." + scheme + ".impl", null); 2279 } 2280 if (clazz == null) { 2281 clazz = SERVICE_FILE_SYSTEMS.get(scheme); 2282 } 2283 if (clazz == null) { 2284 throw new IOException("No FileSystem for scheme: " + scheme); 2285 } 2286 return clazz; 2287 } 2288 2289 private static FileSystem createFileSystem(URI uri, Configuration conf 2290 ) throws IOException { 2291 Class<?> clazz = getFileSystemClass(uri.getScheme(), conf); 2292 if (clazz == null) { 2293 throw new IOException("No FileSystem for scheme: " + uri.getScheme()); 2294 } 2295 FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf); 2296 fs.initialize(uri, conf); 2297 return fs; 2298 } 2299 2300 /** Caching FileSystem objects */ 2301 static class Cache { 2302 private final ClientFinalizer clientFinalizer = new ClientFinalizer(); 2303 2304 private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>(); 2305 private final Set<Key> toAutoClose = new HashSet<Key>(); 2306 2307 /** A variable that makes all objects in the cache unique */ 2308 private static AtomicLong unique = new AtomicLong(1); 2309 2310 FileSystem get(URI uri, Configuration conf) throws IOException{ 2311 Key key = new Key(uri, conf); 2312 return getInternal(uri, conf, key); 2313 } 2314 2315 /** The objects inserted into the cache using this method are all unique */ 2316 FileSystem getUnique(URI uri, Configuration conf) throws IOException{ 2317 Key key = new Key(uri, conf, unique.getAndIncrement()); 2318 return getInternal(uri, conf, key); 2319 } 2320 2321 private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{ 2322 FileSystem fs; 2323 synchronized (this) { 2324 fs = map.get(key); 2325 } 2326 if (fs != null) { 2327 return fs; 2328 } 2329 2330 fs = createFileSystem(uri, conf); 2331 synchronized (this) { // refetch the lock again 2332 FileSystem oldfs = map.get(key); 2333 if (oldfs != null) { // a file system is created while lock is releasing 2334 fs.close(); // close the new file system 2335 return oldfs; // return the old file system 2336 } 2337 2338 // now insert the new file system into the map 2339 if (map.isEmpty() ) { 2340 ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY); 2341 } 2342 fs.key = key; 2343 map.put(key, fs); 2344 if (conf.getBoolean("fs.automatic.close", true)) { 2345 toAutoClose.add(key); 2346 } 2347 return fs; 2348 } 2349 } 2350 2351 synchronized void remove(Key key, FileSystem fs) { 2352 if (map.containsKey(key) && fs == map.get(key)) { 2353 map.remove(key); 2354 toAutoClose.remove(key); 2355 } 2356 } 2357 2358 synchronized void closeAll() throws IOException { 2359 closeAll(false); 2360 } 2361 2362 /** 2363 * Close all FileSystem instances in the Cache. 2364 * @param onlyAutomatic only close those that are marked for automatic closing 2365 */ 2366 synchronized void closeAll(boolean onlyAutomatic) throws IOException { 2367 List<IOException> exceptions = new ArrayList<IOException>(); 2368 2369 // Make a copy of the keys in the map since we'll be modifying 2370 // the map while iterating over it, which isn't safe. 2371 List<Key> keys = new ArrayList<Key>(); 2372 keys.addAll(map.keySet()); 2373 2374 for (Key key : keys) { 2375 final FileSystem fs = map.get(key); 2376 2377 if (onlyAutomatic && !toAutoClose.contains(key)) { 2378 continue; 2379 } 2380 2381 //remove from cache 2382 remove(key, fs); 2383 2384 if (fs != null) { 2385 try { 2386 fs.close(); 2387 } 2388 catch(IOException ioe) { 2389 exceptions.add(ioe); 2390 } 2391 } 2392 } 2393 2394 if (!exceptions.isEmpty()) { 2395 throw MultipleIOException.createIOException(exceptions); 2396 } 2397 } 2398 2399 private class ClientFinalizer implements Runnable { 2400 @Override 2401 public synchronized void run() { 2402 try { 2403 closeAll(true); 2404 } catch (IOException e) { 2405 LOG.info("FileSystem.Cache.closeAll() threw an exception:\n" + e); 2406 } 2407 } 2408 } 2409 2410 synchronized void closeAll(UserGroupInformation ugi) throws IOException { 2411 List<FileSystem> targetFSList = new ArrayList<FileSystem>(); 2412 //Make a pass over the list and collect the filesystems to close 2413 //we cannot close inline since close() removes the entry from the Map 2414 for (Map.Entry<Key, FileSystem> entry : map.entrySet()) { 2415 final Key key = entry.getKey(); 2416 final FileSystem fs = entry.getValue(); 2417 if (ugi.equals(key.ugi) && fs != null) { 2418 targetFSList.add(fs); 2419 } 2420 } 2421 List<IOException> exceptions = new ArrayList<IOException>(); 2422 //now make a pass over the target list and close each 2423 for (FileSystem fs : targetFSList) { 2424 try { 2425 fs.close(); 2426 } 2427 catch(IOException ioe) { 2428 exceptions.add(ioe); 2429 } 2430 } 2431 if (!exceptions.isEmpty()) { 2432 throw MultipleIOException.createIOException(exceptions); 2433 } 2434 } 2435 2436 /** FileSystem.Cache.Key */ 2437 static class Key { 2438 final String scheme; 2439 final String authority; 2440 final UserGroupInformation ugi; 2441 final long unique; // an artificial way to make a key unique 2442 2443 Key(URI uri, Configuration conf) throws IOException { 2444 this(uri, conf, 0); 2445 } 2446 2447 Key(URI uri, Configuration conf, long unique) throws IOException { 2448 scheme = uri.getScheme()==null?"":uri.getScheme().toLowerCase(); 2449 authority = uri.getAuthority()==null?"":uri.getAuthority().toLowerCase(); 2450 this.unique = unique; 2451 2452 this.ugi = UserGroupInformation.getCurrentUser(); 2453 } 2454 2455 @Override 2456 public int hashCode() { 2457 return (scheme + authority).hashCode() + ugi.hashCode() + (int)unique; 2458 } 2459 2460 static boolean isEqual(Object a, Object b) { 2461 return a == b || (a != null && a.equals(b)); 2462 } 2463 2464 @Override 2465 public boolean equals(Object obj) { 2466 if (obj == this) { 2467 return true; 2468 } 2469 if (obj != null && obj instanceof Key) { 2470 Key that = (Key)obj; 2471 return isEqual(this.scheme, that.scheme) 2472 && isEqual(this.authority, that.authority) 2473 && isEqual(this.ugi, that.ugi) 2474 && (this.unique == that.unique); 2475 } 2476 return false; 2477 } 2478 2479 @Override 2480 public String toString() { 2481 return "("+ugi.toString() + ")@" + scheme + "://" + authority; 2482 } 2483 } 2484 } 2485 2486 public static final class Statistics { 2487 private final String scheme; 2488 private AtomicLong bytesRead = new AtomicLong(); 2489 private AtomicLong bytesWritten = new AtomicLong(); 2490 private AtomicInteger readOps = new AtomicInteger(); 2491 private AtomicInteger largeReadOps = new AtomicInteger(); 2492 private AtomicInteger writeOps = new AtomicInteger(); 2493 2494 public Statistics(String scheme) { 2495 this.scheme = scheme; 2496 } 2497 2498 /** 2499 * Copy constructor. 2500 * 2501 * @param st 2502 * The input Statistics object which is cloned. 2503 */ 2504 public Statistics(Statistics st) { 2505 this.scheme = st.scheme; 2506 this.bytesRead = new AtomicLong(st.bytesRead.longValue()); 2507 this.bytesWritten = new AtomicLong(st.bytesWritten.longValue()); 2508 } 2509 2510 /** 2511 * Increment the bytes read in the statistics 2512 * @param newBytes the additional bytes read 2513 */ 2514 public void incrementBytesRead(long newBytes) { 2515 bytesRead.getAndAdd(newBytes); 2516 } 2517 2518 /** 2519 * Increment the bytes written in the statistics 2520 * @param newBytes the additional bytes written 2521 */ 2522 public void incrementBytesWritten(long newBytes) { 2523 bytesWritten.getAndAdd(newBytes); 2524 } 2525 2526 /** 2527 * Increment the number of read operations 2528 * @param count number of read operations 2529 */ 2530 public void incrementReadOps(int count) { 2531 readOps.getAndAdd(count); 2532 } 2533 2534 /** 2535 * Increment the number of large read operations 2536 * @param count number of large read operations 2537 */ 2538 public void incrementLargeReadOps(int count) { 2539 largeReadOps.getAndAdd(count); 2540 } 2541 2542 /** 2543 * Increment the number of write operations 2544 * @param count number of write operations 2545 */ 2546 public void incrementWriteOps(int count) { 2547 writeOps.getAndAdd(count); 2548 } 2549 2550 /** 2551 * Get the total number of bytes read 2552 * @return the number of bytes 2553 */ 2554 public long getBytesRead() { 2555 return bytesRead.get(); 2556 } 2557 2558 /** 2559 * Get the total number of bytes written 2560 * @return the number of bytes 2561 */ 2562 public long getBytesWritten() { 2563 return bytesWritten.get(); 2564 } 2565 2566 /** 2567 * Get the number of file system read operations such as list files 2568 * @return number of read operations 2569 */ 2570 public int getReadOps() { 2571 return readOps.get() + largeReadOps.get(); 2572 } 2573 2574 /** 2575 * Get the number of large file system read operations such as list files 2576 * under a large directory 2577 * @return number of large read operations 2578 */ 2579 public int getLargeReadOps() { 2580 return largeReadOps.get(); 2581 } 2582 2583 /** 2584 * Get the number of file system write operations such as create, append 2585 * rename etc. 2586 * @return number of write operations 2587 */ 2588 public int getWriteOps() { 2589 return writeOps.get(); 2590 } 2591 2592 @Override 2593 public String toString() { 2594 return bytesRead + " bytes read, " + bytesWritten + " bytes written, " 2595 + readOps + " read ops, " + largeReadOps + " large read ops, " 2596 + writeOps + " write ops"; 2597 } 2598 2599 /** 2600 * Reset the counts of bytes to 0. 2601 */ 2602 public void reset() { 2603 bytesWritten.set(0); 2604 bytesRead.set(0); 2605 } 2606 2607 /** 2608 * Get the uri scheme associated with this statistics object. 2609 * @return the schema associated with this set of statistics 2610 */ 2611 public String getScheme() { 2612 return scheme; 2613 } 2614 } 2615 2616 /** 2617 * Get the Map of Statistics object indexed by URI Scheme. 2618 * @return a Map having a key as URI scheme and value as Statistics object 2619 * @deprecated use {@link #getAllStatistics} instead 2620 */ 2621 @Deprecated 2622 public static synchronized Map<String, Statistics> getStatistics() { 2623 Map<String, Statistics> result = new HashMap<String, Statistics>(); 2624 for(Statistics stat: statisticsTable.values()) { 2625 result.put(stat.getScheme(), stat); 2626 } 2627 return result; 2628 } 2629 2630 /** 2631 * Return the FileSystem classes that have Statistics 2632 */ 2633 public static synchronized List<Statistics> getAllStatistics() { 2634 return new ArrayList<Statistics>(statisticsTable.values()); 2635 } 2636 2637 /** 2638 * Get the statistics for a particular file system 2639 * @param cls the class to lookup 2640 * @return a statistics object 2641 */ 2642 public static synchronized 2643 Statistics getStatistics(String scheme, Class<? extends FileSystem> cls) { 2644 Statistics result = statisticsTable.get(cls); 2645 if (result == null) { 2646 result = new Statistics(scheme); 2647 statisticsTable.put(cls, result); 2648 } 2649 return result; 2650 } 2651 2652 /** 2653 * Reset all statistics for all file systems 2654 */ 2655 public static synchronized void clearStatistics() { 2656 for(Statistics stat: statisticsTable.values()) { 2657 stat.reset(); 2658 } 2659 } 2660 2661 /** 2662 * Print all statistics for all file systems 2663 */ 2664 public static synchronized 2665 void printStatistics() throws IOException { 2666 for (Map.Entry<Class<? extends FileSystem>, Statistics> pair: 2667 statisticsTable.entrySet()) { 2668 System.out.println(" FileSystem " + pair.getKey().getName() + 2669 ": " + pair.getValue()); 2670 } 2671 } 2672 }