001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.fs; 019 020import java.io.Closeable; 021import java.io.FileNotFoundException; 022import java.io.IOException; 023import java.net.URI; 024import java.security.PrivilegedExceptionAction; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collections; 028import java.util.EnumSet; 029import java.util.HashMap; 030import java.util.HashSet; 031import java.util.IdentityHashMap; 032import java.util.Iterator; 033import java.util.List; 034import java.util.Map; 035import java.util.NoSuchElementException; 036import java.util.ServiceLoader; 037import java.util.Set; 038import java.util.Stack; 039import java.util.TreeSet; 040import java.util.concurrent.atomic.AtomicInteger; 041import java.util.concurrent.atomic.AtomicLong; 042 043import org.apache.commons.logging.Log; 044import org.apache.commons.logging.LogFactory; 045import org.apache.hadoop.classification.InterfaceAudience; 046import org.apache.hadoop.classification.InterfaceStability; 047import org.apache.hadoop.conf.Configuration; 048import org.apache.hadoop.conf.Configured; 049import org.apache.hadoop.fs.Options.ChecksumOpt; 050import org.apache.hadoop.fs.Options.Rename; 051import org.apache.hadoop.fs.permission.FsPermission; 052import org.apache.hadoop.io.MultipleIOException; 053import org.apache.hadoop.io.Text; 054import org.apache.hadoop.net.NetUtils; 055import org.apache.hadoop.security.Credentials; 056import org.apache.hadoop.security.SecurityUtil; 057import org.apache.hadoop.security.UserGroupInformation; 058import org.apache.hadoop.security.token.Token; 059import org.apache.hadoop.util.DataChecksum; 060import org.apache.hadoop.util.Progressable; 061import org.apache.hadoop.util.ReflectionUtils; 062import org.apache.hadoop.util.ShutdownHookManager; 063 064import com.google.common.annotations.VisibleForTesting; 065 066/**************************************************************** 067 * An abstract base class for a fairly generic filesystem. It 068 * may be implemented as a distributed filesystem, or as a "local" 069 * one that reflects the locally-connected disk. The local version 070 * exists for small Hadoop instances and for testing. 071 * 072 * <p> 073 * 074 * All user code that may potentially use the Hadoop Distributed 075 * File System should be written to use a FileSystem object. The 076 * Hadoop DFS is a multi-machine system that appears as a single 077 * disk. It's useful because of its fault tolerance and potentially 078 * very large capacity. 079 * 080 * <p> 081 * The local implementation is {@link LocalFileSystem} and distributed 082 * implementation is DistributedFileSystem. 083 *****************************************************************/ 084@InterfaceAudience.Public 085@InterfaceStability.Stable 086public abstract class FileSystem extends Configured implements Closeable { 087 public static final String FS_DEFAULT_NAME_KEY = 088 CommonConfigurationKeys.FS_DEFAULT_NAME_KEY; 089 public static final String DEFAULT_FS = 090 CommonConfigurationKeys.FS_DEFAULT_NAME_DEFAULT; 091 092 public static final Log LOG = LogFactory.getLog(FileSystem.class); 093 094 /** 095 * Priority of the FileSystem shutdown hook. 096 */ 097 public static final int SHUTDOWN_HOOK_PRIORITY = 10; 098 099 /** FileSystem cache */ 100 static final Cache CACHE = new Cache(); 101 102 /** The key this instance is stored under in the cache. */ 103 private Cache.Key key; 104 105 /** Recording statistics per a FileSystem class */ 106 private static final Map<Class<? extends FileSystem>, Statistics> 107 statisticsTable = 108 new IdentityHashMap<Class<? extends FileSystem>, Statistics>(); 109 110 /** 111 * The statistics for this file system. 112 */ 113 protected Statistics statistics; 114 115 /** 116 * A cache of files that should be deleted when filsystem is closed 117 * or the JVM is exited. 118 */ 119 private Set<Path> deleteOnExit = new TreeSet<Path>(); 120 121 /** 122 * This method adds a file system for testing so that we can find it later. It 123 * is only for testing. 124 * @param uri the uri to store it under 125 * @param conf the configuration to store it under 126 * @param fs the file system to store 127 * @throws IOException 128 */ 129 static void addFileSystemForTesting(URI uri, Configuration conf, 130 FileSystem fs) throws IOException { 131 CACHE.map.put(new Cache.Key(uri, conf), fs); 132 } 133 134 /** 135 * Get a filesystem instance based on the uri, the passed 136 * configuration and the user 137 * @param uri of the filesystem 138 * @param conf the configuration to use 139 * @param user to perform the get as 140 * @return the filesystem instance 141 * @throws IOException 142 * @throws InterruptedException 143 */ 144 public static FileSystem get(final URI uri, final Configuration conf, 145 final String user) throws IOException, InterruptedException { 146 String ticketCachePath = 147 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH); 148 UserGroupInformation ugi = 149 UserGroupInformation.getBestUGI(ticketCachePath, user); 150 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 151 @Override 152 public FileSystem run() throws IOException { 153 return get(uri, conf); 154 } 155 }); 156 } 157 158 /** 159 * Returns the configured filesystem implementation. 160 * @param conf the configuration to use 161 */ 162 public static FileSystem get(Configuration conf) throws IOException { 163 return get(getDefaultUri(conf), conf); 164 } 165 166 /** Get the default filesystem URI from a configuration. 167 * @param conf the configuration to use 168 * @return the uri of the default filesystem 169 */ 170 public static URI getDefaultUri(Configuration conf) { 171 return URI.create(fixName(conf.get(FS_DEFAULT_NAME_KEY, DEFAULT_FS))); 172 } 173 174 /** Set the default filesystem URI in a configuration. 175 * @param conf the configuration to alter 176 * @param uri the new default filesystem uri 177 */ 178 public static void setDefaultUri(Configuration conf, URI uri) { 179 conf.set(FS_DEFAULT_NAME_KEY, uri.toString()); 180 } 181 182 /** Set the default filesystem URI in a configuration. 183 * @param conf the configuration to alter 184 * @param uri the new default filesystem uri 185 */ 186 public static void setDefaultUri(Configuration conf, String uri) { 187 setDefaultUri(conf, URI.create(fixName(uri))); 188 } 189 190 /** Called after a new FileSystem instance is constructed. 191 * @param name a uri whose authority section names the host, port, etc. 192 * for this FileSystem 193 * @param conf the configuration 194 */ 195 public void initialize(URI name, Configuration conf) throws IOException { 196 statistics = getStatistics(name.getScheme(), getClass()); 197 } 198 199 /** 200 * Return the protocol scheme for the FileSystem. 201 * <p/> 202 * This implementation throws an <code>UnsupportedOperationException</code>. 203 * 204 * @return the protocol scheme for the FileSystem. 205 */ 206 public String getScheme() { 207 throw new UnsupportedOperationException("Not implemented by the " + getClass().getSimpleName() + " FileSystem implementation"); 208 } 209 210 /** Returns a URI whose scheme and authority identify this FileSystem.*/ 211 public abstract URI getUri(); 212 213 /** 214 * Resolve the uri's hostname and add the default port if not in the uri 215 * @return URI 216 * @see NetUtils#getCanonicalUri(URI, int) 217 */ 218 protected URI getCanonicalUri() { 219 return NetUtils.getCanonicalUri(getUri(), getDefaultPort()); 220 } 221 222 /** 223 * Get the default port for this file system. 224 * @return the default port or 0 if there isn't one 225 */ 226 protected int getDefaultPort() { 227 return 0; 228 } 229 230 /** 231 * Get a canonical service name for this file system. The token cache is 232 * the only user of the canonical service name, and uses it to lookup this 233 * filesystem's service tokens. 234 * If file system provides a token of its own then it must have a canonical 235 * name, otherwise canonical name can be null. 236 * 237 * Default Impl: If the file system has child file systems 238 * (such as an embedded file system) then it is assumed that the fs has no 239 * tokens of its own and hence returns a null name; otherwise a service 240 * name is built using Uri and port. 241 * 242 * @return a service string that uniquely identifies this file system, null 243 * if the filesystem does not implement tokens 244 * @see SecurityUtil#buildDTServiceName(URI, int) 245 */ 246 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 247 public String getCanonicalServiceName() { 248 return (getChildFileSystems() == null) 249 ? SecurityUtil.buildDTServiceName(getUri(), getDefaultPort()) 250 : null; 251 } 252 253 /** @deprecated call #getUri() instead.*/ 254 @Deprecated 255 public String getName() { return getUri().toString(); } 256 257 /** @deprecated call #get(URI,Configuration) instead. */ 258 @Deprecated 259 public static FileSystem getNamed(String name, Configuration conf) 260 throws IOException { 261 return get(URI.create(fixName(name)), conf); 262 } 263 264 /** Update old-format filesystem names, for back-compatibility. This should 265 * eventually be replaced with a checkName() method that throws an exception 266 * for old-format names. */ 267 private static String fixName(String name) { 268 // convert old-format name to new-format name 269 if (name.equals("local")) { // "local" is now "file:///". 270 LOG.warn("\"local\" is a deprecated filesystem name." 271 +" Use \"file:///\" instead."); 272 name = "file:///"; 273 } else if (name.indexOf('/')==-1) { // unqualified is "hdfs://" 274 LOG.warn("\""+name+"\" is a deprecated filesystem name." 275 +" Use \"hdfs://"+name+"/\" instead."); 276 name = "hdfs://"+name; 277 } 278 return name; 279 } 280 281 /** 282 * Get the local file system. 283 * @param conf the configuration to configure the file system with 284 * @return a LocalFileSystem 285 */ 286 public static LocalFileSystem getLocal(Configuration conf) 287 throws IOException { 288 return (LocalFileSystem)get(LocalFileSystem.NAME, conf); 289 } 290 291 /** Returns the FileSystem for this URI's scheme and authority. The scheme 292 * of the URI determines a configuration property name, 293 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 294 * The entire URI is passed to the FileSystem instance's initialize method. 295 */ 296 public static FileSystem get(URI uri, Configuration conf) throws IOException { 297 String scheme = uri.getScheme(); 298 String authority = uri.getAuthority(); 299 300 if (scheme == null && authority == null) { // use default FS 301 return get(conf); 302 } 303 304 if (scheme != null && authority == null) { // no authority 305 URI defaultUri = getDefaultUri(conf); 306 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 307 && defaultUri.getAuthority() != null) { // & default has authority 308 return get(defaultUri, conf); // return default 309 } 310 } 311 312 String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme); 313 if (conf.getBoolean(disableCacheName, false)) { 314 return createFileSystem(uri, conf); 315 } 316 317 return CACHE.get(uri, conf); 318 } 319 320 /** 321 * Returns the FileSystem for this URI's scheme and authority and the 322 * passed user. Internally invokes {@link #newInstance(URI, Configuration)} 323 * @param uri of the filesystem 324 * @param conf the configuration to use 325 * @param user to perform the get as 326 * @return filesystem instance 327 * @throws IOException 328 * @throws InterruptedException 329 */ 330 public static FileSystem newInstance(final URI uri, final Configuration conf, 331 final String user) throws IOException, InterruptedException { 332 String ticketCachePath = 333 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH); 334 UserGroupInformation ugi = 335 UserGroupInformation.getBestUGI(ticketCachePath, user); 336 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 337 @Override 338 public FileSystem run() throws IOException { 339 return newInstance(uri,conf); 340 } 341 }); 342 } 343 /** Returns the FileSystem for this URI's scheme and authority. The scheme 344 * of the URI determines a configuration property name, 345 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 346 * The entire URI is passed to the FileSystem instance's initialize method. 347 * This always returns a new FileSystem object. 348 */ 349 public static FileSystem newInstance(URI uri, Configuration conf) throws IOException { 350 String scheme = uri.getScheme(); 351 String authority = uri.getAuthority(); 352 353 if (scheme == null) { // no scheme: use default FS 354 return newInstance(conf); 355 } 356 357 if (authority == null) { // no authority 358 URI defaultUri = getDefaultUri(conf); 359 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 360 && defaultUri.getAuthority() != null) { // & default has authority 361 return newInstance(defaultUri, conf); // return default 362 } 363 } 364 return CACHE.getUnique(uri, conf); 365 } 366 367 /** Returns a unique configured filesystem implementation. 368 * This always returns a new FileSystem object. 369 * @param conf the configuration to use 370 */ 371 public static FileSystem newInstance(Configuration conf) throws IOException { 372 return newInstance(getDefaultUri(conf), conf); 373 } 374 375 /** 376 * Get a unique local file system object 377 * @param conf the configuration to configure the file system with 378 * @return a LocalFileSystem 379 * This always returns a new FileSystem object. 380 */ 381 public static LocalFileSystem newInstanceLocal(Configuration conf) 382 throws IOException { 383 return (LocalFileSystem)newInstance(LocalFileSystem.NAME, conf); 384 } 385 386 /** 387 * Close all cached filesystems. Be sure those filesystems are not 388 * used anymore. 389 * 390 * @throws IOException 391 */ 392 public static void closeAll() throws IOException { 393 CACHE.closeAll(); 394 } 395 396 /** 397 * Close all cached filesystems for a given UGI. Be sure those filesystems 398 * are not used anymore. 399 * @param ugi user group info to close 400 * @throws IOException 401 */ 402 public static void closeAllForUGI(UserGroupInformation ugi) 403 throws IOException { 404 CACHE.closeAll(ugi); 405 } 406 407 /** 408 * Make sure that a path specifies a FileSystem. 409 * @param path to use 410 */ 411 public Path makeQualified(Path path) { 412 checkPath(path); 413 return path.makeQualified(this.getUri(), this.getWorkingDirectory()); 414 } 415 416 /** 417 * Get a new delegation token for this file system. 418 * This is an internal method that should have been declared protected 419 * but wasn't historically. 420 * Callers should use {@link #addDelegationTokens(String, Credentials)} 421 * 422 * @param renewer the account name that is allowed to renew the token. 423 * @return a new delegation token 424 * @throws IOException 425 */ 426 @InterfaceAudience.Private() 427 public Token<?> getDelegationToken(String renewer) throws IOException { 428 return null; 429 } 430 431 /** 432 * Obtain all delegation tokens used by this FileSystem that are not 433 * already present in the given Credentials. Existing tokens will neither 434 * be verified as valid nor having the given renewer. Missing tokens will 435 * be acquired and added to the given Credentials. 436 * 437 * Default Impl: works for simple fs with its own token 438 * and also for an embedded fs whose tokens are those of its 439 * children file system (i.e. the embedded fs has not tokens of its 440 * own). 441 * 442 * @param renewer the user allowed to renew the delegation tokens 443 * @param credentials cache in which to add new delegation tokens 444 * @return list of new delegation tokens 445 * @throws IOException 446 */ 447 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 448 public Token<?>[] addDelegationTokens( 449 final String renewer, Credentials credentials) throws IOException { 450 if (credentials == null) { 451 credentials = new Credentials(); 452 } 453 final List<Token<?>> tokens = new ArrayList<Token<?>>(); 454 collectDelegationTokens(renewer, credentials, tokens); 455 return tokens.toArray(new Token<?>[tokens.size()]); 456 } 457 458 /** 459 * Recursively obtain the tokens for this FileSystem and all descended 460 * FileSystems as determined by getChildFileSystems(). 461 * @param renewer the user allowed to renew the delegation tokens 462 * @param credentials cache in which to add the new delegation tokens 463 * @param tokens list in which to add acquired tokens 464 * @throws IOException 465 */ 466 private void collectDelegationTokens(final String renewer, 467 final Credentials credentials, 468 final List<Token<?>> tokens) 469 throws IOException { 470 final String serviceName = getCanonicalServiceName(); 471 // Collect token of the this filesystem and then of its embedded children 472 if (serviceName != null) { // fs has token, grab it 473 final Text service = new Text(serviceName); 474 Token<?> token = credentials.getToken(service); 475 if (token == null) { 476 token = getDelegationToken(renewer); 477 if (token != null) { 478 tokens.add(token); 479 credentials.addToken(service, token); 480 } 481 } 482 } 483 // Now collect the tokens from the children 484 final FileSystem[] children = getChildFileSystems(); 485 if (children != null) { 486 for (final FileSystem fs : children) { 487 fs.collectDelegationTokens(renewer, credentials, tokens); 488 } 489 } 490 } 491 492 /** 493 * Get all the immediate child FileSystems embedded in this FileSystem. 494 * It does not recurse and get grand children. If a FileSystem 495 * has multiple child FileSystems, then it should return a unique list 496 * of those FileSystems. Default is to return null to signify no children. 497 * 498 * @return FileSystems used by this FileSystem 499 */ 500 @InterfaceAudience.LimitedPrivate({ "HDFS" }) 501 @VisibleForTesting 502 public FileSystem[] getChildFileSystems() { 503 return null; 504 } 505 506 /** create a file with the provided permission 507 * The permission of the file is set to be the provided permission as in 508 * setPermission, not permission&~umask 509 * 510 * It is implemented using two RPCs. It is understood that it is inefficient, 511 * but the implementation is thread-safe. The other option is to change the 512 * value of umask in configuration to be 0, but it is not thread-safe. 513 * 514 * @param fs file system handle 515 * @param file the name of the file to be created 516 * @param permission the permission of the file 517 * @return an output stream 518 * @throws IOException 519 */ 520 public static FSDataOutputStream create(FileSystem fs, 521 Path file, FsPermission permission) throws IOException { 522 // create the file with default permission 523 FSDataOutputStream out = fs.create(file); 524 // set its permission to the supplied one 525 fs.setPermission(file, permission); 526 return out; 527 } 528 529 /** create a directory with the provided permission 530 * The permission of the directory is set to be the provided permission as in 531 * setPermission, not permission&~umask 532 * 533 * @see #create(FileSystem, Path, FsPermission) 534 * 535 * @param fs file system handle 536 * @param dir the name of the directory to be created 537 * @param permission the permission of the directory 538 * @return true if the directory creation succeeds; false otherwise 539 * @throws IOException 540 */ 541 public static boolean mkdirs(FileSystem fs, Path dir, FsPermission permission) 542 throws IOException { 543 // create the directory using the default permission 544 boolean result = fs.mkdirs(dir); 545 // set its permission to be the supplied one 546 fs.setPermission(dir, permission); 547 return result; 548 } 549 550 /////////////////////////////////////////////////////////////// 551 // FileSystem 552 /////////////////////////////////////////////////////////////// 553 554 protected FileSystem() { 555 super(null); 556 } 557 558 /** 559 * Check that a Path belongs to this FileSystem. 560 * @param path to check 561 */ 562 protected void checkPath(Path path) { 563 URI uri = path.toUri(); 564 String thatScheme = uri.getScheme(); 565 if (thatScheme == null) // fs is relative 566 return; 567 URI thisUri = getCanonicalUri(); 568 String thisScheme = thisUri.getScheme(); 569 //authority and scheme are not case sensitive 570 if (thisScheme.equalsIgnoreCase(thatScheme)) {// schemes match 571 String thisAuthority = thisUri.getAuthority(); 572 String thatAuthority = uri.getAuthority(); 573 if (thatAuthority == null && // path's authority is null 574 thisAuthority != null) { // fs has an authority 575 URI defaultUri = getDefaultUri(getConf()); 576 if (thisScheme.equalsIgnoreCase(defaultUri.getScheme())) { 577 uri = defaultUri; // schemes match, so use this uri instead 578 } else { 579 uri = null; // can't determine auth of the path 580 } 581 } 582 if (uri != null) { 583 // canonicalize uri before comparing with this fs 584 uri = NetUtils.getCanonicalUri(uri, getDefaultPort()); 585 thatAuthority = uri.getAuthority(); 586 if (thisAuthority == thatAuthority || // authorities match 587 (thisAuthority != null && 588 thisAuthority.equalsIgnoreCase(thatAuthority))) 589 return; 590 } 591 } 592 throw new IllegalArgumentException("Wrong FS: "+path+ 593 ", expected: "+this.getUri()); 594 } 595 596 /** 597 * Return an array containing hostnames, offset and size of 598 * portions of the given file. For a nonexistent 599 * file or regions, null will be returned. 600 * 601 * This call is most helpful with DFS, where it returns 602 * hostnames of machines that contain the given file. 603 * 604 * The FileSystem will simply return an elt containing 'localhost'. 605 * 606 * @param file FilesStatus to get data from 607 * @param start offset into the given file 608 * @param len length for which to get locations for 609 */ 610 public BlockLocation[] getFileBlockLocations(FileStatus file, 611 long start, long len) throws IOException { 612 if (file == null) { 613 return null; 614 } 615 616 if (start < 0 || len < 0) { 617 throw new IllegalArgumentException("Invalid start or len parameter"); 618 } 619 620 if (file.getLen() <= start) { 621 return new BlockLocation[0]; 622 623 } 624 String[] name = { "localhost:50010" }; 625 String[] host = { "localhost" }; 626 return new BlockLocation[] { 627 new BlockLocation(name, host, 0, file.getLen()) }; 628 } 629 630 631 /** 632 * Return an array containing hostnames, offset and size of 633 * portions of the given file. For a nonexistent 634 * file or regions, null will be returned. 635 * 636 * This call is most helpful with DFS, where it returns 637 * hostnames of machines that contain the given file. 638 * 639 * The FileSystem will simply return an elt containing 'localhost'. 640 * 641 * @param p path is used to identify an FS since an FS could have 642 * another FS that it could be delegating the call to 643 * @param start offset into the given file 644 * @param len length for which to get locations for 645 */ 646 public BlockLocation[] getFileBlockLocations(Path p, 647 long start, long len) throws IOException { 648 if (p == null) { 649 throw new NullPointerException(); 650 } 651 FileStatus file = getFileStatus(p); 652 return getFileBlockLocations(file, start, len); 653 } 654 655 /** 656 * Return a set of server default configuration values 657 * @return server default configuration values 658 * @throws IOException 659 * @deprecated use {@link #getServerDefaults(Path)} instead 660 */ 661 @Deprecated 662 public FsServerDefaults getServerDefaults() throws IOException { 663 Configuration conf = getConf(); 664 // CRC32 is chosen as default as it is available in all 665 // releases that support checksum. 666 // The client trash configuration is ignored. 667 return new FsServerDefaults(getDefaultBlockSize(), 668 conf.getInt("io.bytes.per.checksum", 512), 669 64 * 1024, 670 getDefaultReplication(), 671 conf.getInt("io.file.buffer.size", 4096), 672 false, 673 CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT, 674 DataChecksum.Type.CRC32); 675 } 676 677 /** 678 * Return a set of server default configuration values 679 * @param p path is used to identify an FS since an FS could have 680 * another FS that it could be delegating the call to 681 * @return server default configuration values 682 * @throws IOException 683 */ 684 public FsServerDefaults getServerDefaults(Path p) throws IOException { 685 return getServerDefaults(); 686 } 687 688 /** 689 * Return the fully-qualified path of path f resolving the path 690 * through any symlinks or mount point 691 * @param p path to be resolved 692 * @return fully qualified path 693 * @throws FileNotFoundException 694 */ 695 public Path resolvePath(final Path p) throws IOException { 696 checkPath(p); 697 return getFileStatus(p).getPath(); 698 } 699 700 /** 701 * Opens an FSDataInputStream at the indicated Path. 702 * @param f the file name to open 703 * @param bufferSize the size of the buffer to be used. 704 */ 705 public abstract FSDataInputStream open(Path f, int bufferSize) 706 throws IOException; 707 708 /** 709 * Opens an FSDataInputStream at the indicated Path. 710 * @param f the file to open 711 */ 712 public FSDataInputStream open(Path f) throws IOException { 713 return open(f, getConf().getInt("io.file.buffer.size", 4096)); 714 } 715 716 /** 717 * Create an FSDataOutputStream at the indicated Path. 718 * Files are overwritten by default. 719 * @param f the file to create 720 */ 721 public FSDataOutputStream create(Path f) throws IOException { 722 return create(f, true); 723 } 724 725 /** 726 * Create an FSDataOutputStream at the indicated Path. 727 * @param f the file to create 728 * @param overwrite if a file with this name already exists, then if true, 729 * the file will be overwritten, and if false an exception will be thrown. 730 */ 731 public FSDataOutputStream create(Path f, boolean overwrite) 732 throws IOException { 733 return create(f, overwrite, 734 getConf().getInt("io.file.buffer.size", 4096), 735 getDefaultReplication(f), 736 getDefaultBlockSize(f)); 737 } 738 739 /** 740 * Create an FSDataOutputStream at the indicated Path with write-progress 741 * reporting. 742 * Files are overwritten by default. 743 * @param f the file to create 744 * @param progress to report progress 745 */ 746 public FSDataOutputStream create(Path f, Progressable progress) 747 throws IOException { 748 return create(f, true, 749 getConf().getInt("io.file.buffer.size", 4096), 750 getDefaultReplication(f), 751 getDefaultBlockSize(f), progress); 752 } 753 754 /** 755 * Create an FSDataOutputStream at the indicated Path. 756 * Files are overwritten by default. 757 * @param f the file to create 758 * @param replication the replication factor 759 */ 760 public FSDataOutputStream create(Path f, short replication) 761 throws IOException { 762 return create(f, true, 763 getConf().getInt("io.file.buffer.size", 4096), 764 replication, 765 getDefaultBlockSize(f)); 766 } 767 768 /** 769 * Create an FSDataOutputStream at the indicated Path with write-progress 770 * reporting. 771 * Files are overwritten by default. 772 * @param f the file to create 773 * @param replication the replication factor 774 * @param progress to report progress 775 */ 776 public FSDataOutputStream create(Path f, short replication, 777 Progressable progress) throws IOException { 778 return create(f, true, 779 getConf().getInt("io.file.buffer.size", 4096), 780 replication, 781 getDefaultBlockSize(f), progress); 782 } 783 784 785 /** 786 * Create an FSDataOutputStream at the indicated Path. 787 * @param f the file name to create 788 * @param overwrite if a file with this name already exists, then if true, 789 * the file will be overwritten, and if false an error will be thrown. 790 * @param bufferSize the size of the buffer to be used. 791 */ 792 public FSDataOutputStream create(Path f, 793 boolean overwrite, 794 int bufferSize 795 ) throws IOException { 796 return create(f, overwrite, bufferSize, 797 getDefaultReplication(f), 798 getDefaultBlockSize(f)); 799 } 800 801 /** 802 * Create an FSDataOutputStream at the indicated Path with write-progress 803 * reporting. 804 * @param f the path of the file to open 805 * @param overwrite if a file with this name already exists, then if true, 806 * the file will be overwritten, and if false an error will be thrown. 807 * @param bufferSize the size of the buffer to be used. 808 */ 809 public FSDataOutputStream create(Path f, 810 boolean overwrite, 811 int bufferSize, 812 Progressable progress 813 ) throws IOException { 814 return create(f, overwrite, bufferSize, 815 getDefaultReplication(f), 816 getDefaultBlockSize(f), progress); 817 } 818 819 820 /** 821 * Create an FSDataOutputStream at the indicated Path. 822 * @param f the file name to open 823 * @param overwrite if a file with this name already exists, then if true, 824 * the file will be overwritten, and if false an error will be thrown. 825 * @param bufferSize the size of the buffer to be used. 826 * @param replication required block replication for the file. 827 */ 828 public FSDataOutputStream create(Path f, 829 boolean overwrite, 830 int bufferSize, 831 short replication, 832 long blockSize 833 ) throws IOException { 834 return create(f, overwrite, bufferSize, replication, blockSize, null); 835 } 836 837 /** 838 * Create an FSDataOutputStream at the indicated Path with write-progress 839 * reporting. 840 * @param f the file name to open 841 * @param overwrite if a file with this name already exists, then if true, 842 * the file will be overwritten, and if false an error will be thrown. 843 * @param bufferSize the size of the buffer to be used. 844 * @param replication required block replication for the file. 845 */ 846 public FSDataOutputStream create(Path f, 847 boolean overwrite, 848 int bufferSize, 849 short replication, 850 long blockSize, 851 Progressable progress 852 ) throws IOException { 853 return this.create(f, FsPermission.getFileDefault().applyUMask( 854 FsPermission.getUMask(getConf())), overwrite, bufferSize, 855 replication, blockSize, progress); 856 } 857 858 /** 859 * Create an FSDataOutputStream at the indicated Path with write-progress 860 * reporting. 861 * @param f the file name to open 862 * @param permission 863 * @param overwrite if a file with this name already exists, then if true, 864 * the file will be overwritten, and if false an error will be thrown. 865 * @param bufferSize the size of the buffer to be used. 866 * @param replication required block replication for the file. 867 * @param blockSize 868 * @param progress 869 * @throws IOException 870 * @see #setPermission(Path, FsPermission) 871 */ 872 public abstract FSDataOutputStream create(Path f, 873 FsPermission permission, 874 boolean overwrite, 875 int bufferSize, 876 short replication, 877 long blockSize, 878 Progressable progress) throws IOException; 879 880 /** 881 * Create an FSDataOutputStream at the indicated Path with write-progress 882 * reporting. 883 * @param f the file name to open 884 * @param permission 885 * @param flags {@link CreateFlag}s to use for this stream. 886 * @param bufferSize the size of the buffer to be used. 887 * @param replication required block replication for the file. 888 * @param blockSize 889 * @param progress 890 * @throws IOException 891 * @see #setPermission(Path, FsPermission) 892 */ 893 public FSDataOutputStream create(Path f, 894 FsPermission permission, 895 EnumSet<CreateFlag> flags, 896 int bufferSize, 897 short replication, 898 long blockSize, 899 Progressable progress) throws IOException { 900 return create(f, permission, flags, bufferSize, replication, 901 blockSize, progress, null); 902 } 903 904 /** 905 * Create an FSDataOutputStream at the indicated Path with a custom 906 * checksum option 907 * @param f the file name to open 908 * @param permission 909 * @param flags {@link CreateFlag}s to use for this stream. 910 * @param bufferSize the size of the buffer to be used. 911 * @param replication required block replication for the file. 912 * @param blockSize 913 * @param progress 914 * @param checksumOpt checksum parameter. If null, the values 915 * found in conf will be used. 916 * @throws IOException 917 * @see #setPermission(Path, FsPermission) 918 */ 919 public FSDataOutputStream create(Path f, 920 FsPermission permission, 921 EnumSet<CreateFlag> flags, 922 int bufferSize, 923 short replication, 924 long blockSize, 925 Progressable progress, 926 ChecksumOpt checksumOpt) throws IOException { 927 // Checksum options are ignored by default. The file systems that 928 // implement checksum need to override this method. The full 929 // support is currently only available in DFS. 930 return create(f, permission, flags.contains(CreateFlag.OVERWRITE), 931 bufferSize, replication, blockSize, progress); 932 } 933 934 /*. 935 * This create has been added to support the FileContext that processes 936 * the permission 937 * with umask before calling this method. 938 * This a temporary method added to support the transition from FileSystem 939 * to FileContext for user applications. 940 */ 941 @Deprecated 942 protected FSDataOutputStream primitiveCreate(Path f, 943 FsPermission absolutePermission, EnumSet<CreateFlag> flag, int bufferSize, 944 short replication, long blockSize, Progressable progress, 945 ChecksumOpt checksumOpt) throws IOException { 946 947 boolean pathExists = exists(f); 948 CreateFlag.validate(f, pathExists, flag); 949 950 // Default impl assumes that permissions do not matter and 951 // nor does the bytesPerChecksum hence 952 // calling the regular create is good enough. 953 // FSs that implement permissions should override this. 954 955 if (pathExists && flag.contains(CreateFlag.APPEND)) { 956 return append(f, bufferSize, progress); 957 } 958 959 return this.create(f, absolutePermission, 960 flag.contains(CreateFlag.OVERWRITE), bufferSize, replication, 961 blockSize, progress); 962 } 963 964 /** 965 * This version of the mkdirs method assumes that the permission is absolute. 966 * It has been added to support the FileContext that processes the permission 967 * with umask before calling this method. 968 * This a temporary method added to support the transition from FileSystem 969 * to FileContext for user applications. 970 */ 971 @Deprecated 972 protected boolean primitiveMkdir(Path f, FsPermission absolutePermission) 973 throws IOException { 974 // Default impl is to assume that permissions do not matter and hence 975 // calling the regular mkdirs is good enough. 976 // FSs that implement permissions should override this. 977 return this.mkdirs(f, absolutePermission); 978 } 979 980 981 /** 982 * This version of the mkdirs method assumes that the permission is absolute. 983 * It has been added to support the FileContext that processes the permission 984 * with umask before calling this method. 985 * This a temporary method added to support the transition from FileSystem 986 * to FileContext for user applications. 987 */ 988 @Deprecated 989 protected void primitiveMkdir(Path f, FsPermission absolutePermission, 990 boolean createParent) 991 throws IOException { 992 993 if (!createParent) { // parent must exist. 994 // since the this.mkdirs makes parent dirs automatically 995 // we must throw exception if parent does not exist. 996 final FileStatus stat = getFileStatus(f.getParent()); 997 if (stat == null) { 998 throw new FileNotFoundException("Missing parent:" + f); 999 } 1000 if (!stat.isDirectory()) { 1001 throw new ParentNotDirectoryException("parent is not a dir"); 1002 } 1003 // parent does exist - go ahead with mkdir of leaf 1004 } 1005 // Default impl is to assume that permissions do not matter and hence 1006 // calling the regular mkdirs is good enough. 1007 // FSs that implement permissions should override this. 1008 if (!this.mkdirs(f, absolutePermission)) { 1009 throw new IOException("mkdir of "+ f + " failed"); 1010 } 1011 } 1012 1013 /** 1014 * Opens an FSDataOutputStream at the indicated Path with write-progress 1015 * reporting. Same as create(), except fails if parent directory doesn't 1016 * already exist. 1017 * @param f the file name to open 1018 * @param overwrite if a file with this name already exists, then if true, 1019 * the file will be overwritten, and if false an error will be thrown. 1020 * @param bufferSize the size of the buffer to be used. 1021 * @param replication required block replication for the file. 1022 * @param blockSize 1023 * @param progress 1024 * @throws IOException 1025 * @see #setPermission(Path, FsPermission) 1026 * @deprecated API only for 0.20-append 1027 */ 1028 @Deprecated 1029 public FSDataOutputStream createNonRecursive(Path f, 1030 boolean overwrite, 1031 int bufferSize, short replication, long blockSize, 1032 Progressable progress) throws IOException { 1033 return this.createNonRecursive(f, FsPermission.getFileDefault(), 1034 overwrite, bufferSize, replication, blockSize, progress); 1035 } 1036 1037 /** 1038 * Opens an FSDataOutputStream at the indicated Path with write-progress 1039 * reporting. Same as create(), except fails if parent directory doesn't 1040 * already exist. 1041 * @param f the file name to open 1042 * @param permission 1043 * @param overwrite if a file with this name already exists, then if true, 1044 * the file will be overwritten, and if false an error will be thrown. 1045 * @param bufferSize the size of the buffer to be used. 1046 * @param replication required block replication for the file. 1047 * @param blockSize 1048 * @param progress 1049 * @throws IOException 1050 * @see #setPermission(Path, FsPermission) 1051 * @deprecated API only for 0.20-append 1052 */ 1053 @Deprecated 1054 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 1055 boolean overwrite, int bufferSize, short replication, long blockSize, 1056 Progressable progress) throws IOException { 1057 return createNonRecursive(f, permission, 1058 overwrite ? EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE) 1059 : EnumSet.of(CreateFlag.CREATE), bufferSize, 1060 replication, blockSize, progress); 1061 } 1062 1063 /** 1064 * Opens an FSDataOutputStream at the indicated Path with write-progress 1065 * reporting. Same as create(), except fails if parent directory doesn't 1066 * already exist. 1067 * @param f the file name to open 1068 * @param permission 1069 * @param flags {@link CreateFlag}s to use for this stream. 1070 * @param bufferSize the size of the buffer to be used. 1071 * @param replication required block replication for the file. 1072 * @param blockSize 1073 * @param progress 1074 * @throws IOException 1075 * @see #setPermission(Path, FsPermission) 1076 * @deprecated API only for 0.20-append 1077 */ 1078 @Deprecated 1079 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 1080 EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize, 1081 Progressable progress) throws IOException { 1082 throw new IOException("createNonRecursive unsupported for this filesystem " 1083 + this.getClass()); 1084 } 1085 1086 /** 1087 * Creates the given Path as a brand-new zero-length file. If 1088 * create fails, or if it already existed, return false. 1089 * 1090 * @param f path to use for create 1091 */ 1092 public boolean createNewFile(Path f) throws IOException { 1093 if (exists(f)) { 1094 return false; 1095 } else { 1096 create(f, false, getConf().getInt("io.file.buffer.size", 4096)).close(); 1097 return true; 1098 } 1099 } 1100 1101 /** 1102 * Append to an existing file (optional operation). 1103 * Same as append(f, getConf().getInt("io.file.buffer.size", 4096), null) 1104 * @param f the existing file to be appended. 1105 * @throws IOException 1106 */ 1107 public FSDataOutputStream append(Path f) throws IOException { 1108 return append(f, getConf().getInt("io.file.buffer.size", 4096), null); 1109 } 1110 /** 1111 * Append to an existing file (optional operation). 1112 * Same as append(f, bufferSize, null). 1113 * @param f the existing file to be appended. 1114 * @param bufferSize the size of the buffer to be used. 1115 * @throws IOException 1116 */ 1117 public FSDataOutputStream append(Path f, int bufferSize) throws IOException { 1118 return append(f, bufferSize, null); 1119 } 1120 1121 /** 1122 * Append to an existing file (optional operation). 1123 * @param f the existing file to be appended. 1124 * @param bufferSize the size of the buffer to be used. 1125 * @param progress for reporting progress if it is not null. 1126 * @throws IOException 1127 */ 1128 public abstract FSDataOutputStream append(Path f, int bufferSize, 1129 Progressable progress) throws IOException; 1130 1131 /** 1132 * Concat existing files together. 1133 * @param trg the path to the target destination. 1134 * @param psrcs the paths to the sources to use for the concatenation. 1135 * @throws IOException 1136 */ 1137 public void concat(final Path trg, final Path [] psrcs) throws IOException { 1138 throw new UnsupportedOperationException("Not implemented by the " + 1139 getClass().getSimpleName() + " FileSystem implementation"); 1140 } 1141 1142 /** 1143 * Get replication. 1144 * 1145 * @deprecated Use getFileStatus() instead 1146 * @param src file name 1147 * @return file replication 1148 * @throws IOException 1149 */ 1150 @Deprecated 1151 public short getReplication(Path src) throws IOException { 1152 return getFileStatus(src).getReplication(); 1153 } 1154 1155 /** 1156 * Set replication for an existing file. 1157 * 1158 * @param src file name 1159 * @param replication new replication 1160 * @throws IOException 1161 * @return true if successful; 1162 * false if file does not exist or is a directory 1163 */ 1164 public boolean setReplication(Path src, short replication) 1165 throws IOException { 1166 return true; 1167 } 1168 1169 /** 1170 * Renames Path src to Path dst. Can take place on local fs 1171 * or remote DFS. 1172 * @param src path to be renamed 1173 * @param dst new path after rename 1174 * @throws IOException on failure 1175 * @return true if rename is successful 1176 */ 1177 public abstract boolean rename(Path src, Path dst) throws IOException; 1178 1179 /** 1180 * Renames Path src to Path dst 1181 * <ul> 1182 * <li 1183 * <li>Fails if src is a file and dst is a directory. 1184 * <li>Fails if src is a directory and dst is a file. 1185 * <li>Fails if the parent of dst does not exist or is a file. 1186 * </ul> 1187 * <p> 1188 * If OVERWRITE option is not passed as an argument, rename fails 1189 * if the dst already exists. 1190 * <p> 1191 * If OVERWRITE option is passed as an argument, rename overwrites 1192 * the dst if it is a file or an empty directory. Rename fails if dst is 1193 * a non-empty directory. 1194 * <p> 1195 * Note that atomicity of rename is dependent on the file system 1196 * implementation. Please refer to the file system documentation for 1197 * details. This default implementation is non atomic. 1198 * <p> 1199 * This method is deprecated since it is a temporary method added to 1200 * support the transition from FileSystem to FileContext for user 1201 * applications. 1202 * 1203 * @param src path to be renamed 1204 * @param dst new path after rename 1205 * @throws IOException on failure 1206 */ 1207 @Deprecated 1208 protected void rename(final Path src, final Path dst, 1209 final Rename... options) throws IOException { 1210 // Default implementation 1211 final FileStatus srcStatus = getFileStatus(src); 1212 if (srcStatus == null) { 1213 throw new FileNotFoundException("rename source " + src + " not found."); 1214 } 1215 1216 boolean overwrite = false; 1217 if (null != options) { 1218 for (Rename option : options) { 1219 if (option == Rename.OVERWRITE) { 1220 overwrite = true; 1221 } 1222 } 1223 } 1224 1225 FileStatus dstStatus; 1226 try { 1227 dstStatus = getFileStatus(dst); 1228 } catch (IOException e) { 1229 dstStatus = null; 1230 } 1231 if (dstStatus != null) { 1232 if (srcStatus.isDirectory() != dstStatus.isDirectory()) { 1233 throw new IOException("Source " + src + " Destination " + dst 1234 + " both should be either file or directory"); 1235 } 1236 if (!overwrite) { 1237 throw new FileAlreadyExistsException("rename destination " + dst 1238 + " already exists."); 1239 } 1240 // Delete the destination that is a file or an empty directory 1241 if (dstStatus.isDirectory()) { 1242 FileStatus[] list = listStatus(dst); 1243 if (list != null && list.length != 0) { 1244 throw new IOException( 1245 "rename cannot overwrite non empty destination directory " + dst); 1246 } 1247 } 1248 delete(dst, false); 1249 } else { 1250 final Path parent = dst.getParent(); 1251 final FileStatus parentStatus = getFileStatus(parent); 1252 if (parentStatus == null) { 1253 throw new FileNotFoundException("rename destination parent " + parent 1254 + " not found."); 1255 } 1256 if (!parentStatus.isDirectory()) { 1257 throw new ParentNotDirectoryException("rename destination parent " + parent 1258 + " is a file."); 1259 } 1260 } 1261 if (!rename(src, dst)) { 1262 throw new IOException("rename from " + src + " to " + dst + " failed."); 1263 } 1264 } 1265 1266 /** 1267 * Delete a file 1268 * @deprecated Use {@link #delete(Path, boolean)} instead. 1269 */ 1270 @Deprecated 1271 public boolean delete(Path f) throws IOException { 1272 return delete(f, true); 1273 } 1274 1275 /** Delete a file. 1276 * 1277 * @param f the path to delete. 1278 * @param recursive if path is a directory and set to 1279 * true, the directory is deleted else throws an exception. In 1280 * case of a file the recursive can be set to either true or false. 1281 * @return true if delete is successful else false. 1282 * @throws IOException 1283 */ 1284 public abstract boolean delete(Path f, boolean recursive) throws IOException; 1285 1286 /** 1287 * Mark a path to be deleted when FileSystem is closed. 1288 * When the JVM shuts down, 1289 * all FileSystem objects will be closed automatically. 1290 * Then, 1291 * the marked path will be deleted as a result of closing the FileSystem. 1292 * 1293 * The path has to exist in the file system. 1294 * 1295 * @param f the path to delete. 1296 * @return true if deleteOnExit is successful, otherwise false. 1297 * @throws IOException 1298 */ 1299 public boolean deleteOnExit(Path f) throws IOException { 1300 if (!exists(f)) { 1301 return false; 1302 } 1303 synchronized (deleteOnExit) { 1304 deleteOnExit.add(f); 1305 } 1306 return true; 1307 } 1308 1309 /** 1310 * Cancel the deletion of the path when the FileSystem is closed 1311 * @param f the path to cancel deletion 1312 */ 1313 public boolean cancelDeleteOnExit(Path f) { 1314 synchronized (deleteOnExit) { 1315 return deleteOnExit.remove(f); 1316 } 1317 } 1318 1319 /** 1320 * Delete all files that were marked as delete-on-exit. This recursively 1321 * deletes all files in the specified paths. 1322 */ 1323 protected void processDeleteOnExit() { 1324 synchronized (deleteOnExit) { 1325 for (Iterator<Path> iter = deleteOnExit.iterator(); iter.hasNext();) { 1326 Path path = iter.next(); 1327 try { 1328 if (exists(path)) { 1329 delete(path, true); 1330 } 1331 } 1332 catch (IOException e) { 1333 LOG.info("Ignoring failure to deleteOnExit for path " + path); 1334 } 1335 iter.remove(); 1336 } 1337 } 1338 } 1339 1340 /** Check if exists. 1341 * @param f source file 1342 */ 1343 public boolean exists(Path f) throws IOException { 1344 try { 1345 return getFileStatus(f) != null; 1346 } catch (FileNotFoundException e) { 1347 return false; 1348 } 1349 } 1350 1351 /** True iff the named path is a directory. 1352 * Note: Avoid using this method. Instead reuse the FileStatus 1353 * returned by getFileStatus() or listStatus() methods. 1354 * @param f path to check 1355 */ 1356 public boolean isDirectory(Path f) throws IOException { 1357 try { 1358 return getFileStatus(f).isDirectory(); 1359 } catch (FileNotFoundException e) { 1360 return false; // f does not exist 1361 } 1362 } 1363 1364 /** True iff the named path is a regular file. 1365 * Note: Avoid using this method. Instead reuse the FileStatus 1366 * returned by getFileStatus() or listStatus() methods. 1367 * @param f path to check 1368 */ 1369 public boolean isFile(Path f) throws IOException { 1370 try { 1371 return getFileStatus(f).isFile(); 1372 } catch (FileNotFoundException e) { 1373 return false; // f does not exist 1374 } 1375 } 1376 1377 /** The number of bytes in a file. */ 1378 /** @deprecated Use getFileStatus() instead */ 1379 @Deprecated 1380 public long getLength(Path f) throws IOException { 1381 return getFileStatus(f).getLen(); 1382 } 1383 1384 /** Return the {@link ContentSummary} of a given {@link Path}. 1385 * @param f path to use 1386 */ 1387 public ContentSummary getContentSummary(Path f) throws IOException { 1388 FileStatus status = getFileStatus(f); 1389 if (status.isFile()) { 1390 // f is a file 1391 return new ContentSummary(status.getLen(), 1, 0); 1392 } 1393 // f is a directory 1394 long[] summary = {0, 0, 1}; 1395 for(FileStatus s : listStatus(f)) { 1396 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : 1397 new ContentSummary(s.getLen(), 1, 0); 1398 summary[0] += c.getLength(); 1399 summary[1] += c.getFileCount(); 1400 summary[2] += c.getDirectoryCount(); 1401 } 1402 return new ContentSummary(summary[0], summary[1], summary[2]); 1403 } 1404 1405 final private static PathFilter DEFAULT_FILTER = new PathFilter() { 1406 @Override 1407 public boolean accept(Path file) { 1408 return true; 1409 } 1410 }; 1411 1412 /** 1413 * List the statuses of the files/directories in the given path if the path is 1414 * a directory. 1415 * 1416 * @param f given path 1417 * @return the statuses of the files/directories in the given patch 1418 * @throws FileNotFoundException when the path does not exist; 1419 * IOException see specific implementation 1420 */ 1421 public abstract FileStatus[] listStatus(Path f) throws FileNotFoundException, 1422 IOException; 1423 1424 /* 1425 * Filter files/directories in the given path using the user-supplied path 1426 * filter. Results are added to the given array <code>results</code>. 1427 */ 1428 private void listStatus(ArrayList<FileStatus> results, Path f, 1429 PathFilter filter) throws FileNotFoundException, IOException { 1430 FileStatus listing[] = listStatus(f); 1431 if (listing == null) { 1432 throw new IOException("Error accessing " + f); 1433 } 1434 1435 for (int i = 0; i < listing.length; i++) { 1436 if (filter.accept(listing[i].getPath())) { 1437 results.add(listing[i]); 1438 } 1439 } 1440 } 1441 1442 /** 1443 * @return an iterator over the corrupt files under the given path 1444 * (may contain duplicates if a file has more than one corrupt block) 1445 * @throws IOException 1446 */ 1447 public RemoteIterator<Path> listCorruptFileBlocks(Path path) 1448 throws IOException { 1449 throw new UnsupportedOperationException(getClass().getCanonicalName() + 1450 " does not support" + 1451 " listCorruptFileBlocks"); 1452 } 1453 1454 /** 1455 * Filter files/directories in the given path using the user-supplied path 1456 * filter. 1457 * 1458 * @param f 1459 * a path name 1460 * @param filter 1461 * the user-supplied path filter 1462 * @return an array of FileStatus objects for the files under the given path 1463 * after applying the filter 1464 * @throws FileNotFoundException when the path does not exist; 1465 * IOException see specific implementation 1466 */ 1467 public FileStatus[] listStatus(Path f, PathFilter filter) 1468 throws FileNotFoundException, IOException { 1469 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1470 listStatus(results, f, filter); 1471 return results.toArray(new FileStatus[results.size()]); 1472 } 1473 1474 /** 1475 * Filter files/directories in the given list of paths using default 1476 * path filter. 1477 * 1478 * @param files 1479 * a list of paths 1480 * @return a list of statuses for the files under the given paths after 1481 * applying the filter default Path filter 1482 * @throws FileNotFoundException when the path does not exist; 1483 * IOException see specific implementation 1484 */ 1485 public FileStatus[] listStatus(Path[] files) 1486 throws FileNotFoundException, IOException { 1487 return listStatus(files, DEFAULT_FILTER); 1488 } 1489 1490 /** 1491 * Filter files/directories in the given list of paths using user-supplied 1492 * path filter. 1493 * 1494 * @param files 1495 * a list of paths 1496 * @param filter 1497 * the user-supplied path filter 1498 * @return a list of statuses for the files under the given paths after 1499 * applying the filter 1500 * @throws FileNotFoundException when the path does not exist; 1501 * IOException see specific implementation 1502 */ 1503 public FileStatus[] listStatus(Path[] files, PathFilter filter) 1504 throws FileNotFoundException, IOException { 1505 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1506 for (int i = 0; i < files.length; i++) { 1507 listStatus(results, files[i], filter); 1508 } 1509 return results.toArray(new FileStatus[results.size()]); 1510 } 1511 1512 /** 1513 * <p>Return all the files that match filePattern and are not checksum 1514 * files. Results are sorted by their names. 1515 * 1516 * <p> 1517 * A filename pattern is composed of <i>regular</i> characters and 1518 * <i>special pattern matching</i> characters, which are: 1519 * 1520 * <dl> 1521 * <dd> 1522 * <dl> 1523 * <p> 1524 * <dt> <tt> ? </tt> 1525 * <dd> Matches any single character. 1526 * 1527 * <p> 1528 * <dt> <tt> * </tt> 1529 * <dd> Matches zero or more characters. 1530 * 1531 * <p> 1532 * <dt> <tt> [<i>abc</i>] </tt> 1533 * <dd> Matches a single character from character set 1534 * <tt>{<i>a,b,c</i>}</tt>. 1535 * 1536 * <p> 1537 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt> 1538 * <dd> Matches a single character from the character range 1539 * <tt>{<i>a...b</i>}</tt>. Note that character <tt><i>a</i></tt> must be 1540 * lexicographically less than or equal to character <tt><i>b</i></tt>. 1541 * 1542 * <p> 1543 * <dt> <tt> [^<i>a</i>] </tt> 1544 * <dd> Matches a single character that is not from character set or range 1545 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur 1546 * immediately to the right of the opening bracket. 1547 * 1548 * <p> 1549 * <dt> <tt> \<i>c</i> </tt> 1550 * <dd> Removes (escapes) any special meaning of character <i>c</i>. 1551 * 1552 * <p> 1553 * <dt> <tt> {ab,cd} </tt> 1554 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> 1555 * 1556 * <p> 1557 * <dt> <tt> {ab,c{de,fh}} </tt> 1558 * <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt> 1559 * 1560 * </dl> 1561 * </dd> 1562 * </dl> 1563 * 1564 * @param pathPattern a regular expression specifying a pth pattern 1565 1566 * @return an array of paths that match the path pattern 1567 * @throws IOException 1568 */ 1569 public FileStatus[] globStatus(Path pathPattern) throws IOException { 1570 return globStatus(pathPattern, DEFAULT_FILTER); 1571 } 1572 1573 /** 1574 * Return an array of FileStatus objects whose path names match pathPattern 1575 * and is accepted by the user-supplied path filter. Results are sorted by 1576 * their path names. 1577 * Return null if pathPattern has no glob and the path does not exist. 1578 * Return an empty array if pathPattern has a glob and no path matches it. 1579 * 1580 * @param pathPattern 1581 * a regular expression specifying the path pattern 1582 * @param filter 1583 * a user-supplied path filter 1584 * @return an array of FileStatus objects 1585 * @throws IOException if any I/O error occurs when fetching file status 1586 */ 1587 public FileStatus[] globStatus(Path pathPattern, PathFilter filter) 1588 throws IOException { 1589 String filename = pathPattern.toUri().getPath(); 1590 List<FileStatus> allMatches = null; 1591 1592 List<String> filePatterns = GlobExpander.expand(filename); 1593 for (String filePattern : filePatterns) { 1594 Path path = new Path(filePattern.isEmpty() ? Path.CUR_DIR : filePattern); 1595 List<FileStatus> matches = globStatusInternal(path, filter); 1596 if (matches != null) { 1597 if (allMatches == null) { 1598 allMatches = matches; 1599 } else { 1600 allMatches.addAll(matches); 1601 } 1602 } 1603 } 1604 1605 FileStatus[] results = null; 1606 if (allMatches != null) { 1607 results = allMatches.toArray(new FileStatus[allMatches.size()]); 1608 } else if (filePatterns.size() > 1) { 1609 // no matches with multiple expansions is a non-matching glob 1610 results = new FileStatus[0]; 1611 } 1612 return results; 1613 } 1614 1615 // sort gripes because FileStatus Comparable isn't parameterized... 1616 @SuppressWarnings("unchecked") 1617 private List<FileStatus> globStatusInternal(Path pathPattern, 1618 PathFilter filter) throws IOException { 1619 boolean patternHasGlob = false; // pathPattern has any globs 1620 List<FileStatus> matches = new ArrayList<FileStatus>(); 1621 1622 // determine starting point 1623 int level = 0; 1624 String baseDir = Path.CUR_DIR; 1625 if (pathPattern.isAbsolute()) { 1626 level = 1; // need to skip empty item at beginning of split list 1627 baseDir = Path.SEPARATOR; 1628 } 1629 1630 // parse components and determine if it's a glob 1631 String[] components = null; 1632 GlobFilter[] filters = null; 1633 String filename = pathPattern.toUri().getPath(); 1634 if (!filename.isEmpty() && !Path.SEPARATOR.equals(filename)) { 1635 components = filename.split(Path.SEPARATOR); 1636 filters = new GlobFilter[components.length]; 1637 for (int i=level; i < components.length; i++) { 1638 filters[i] = new GlobFilter(components[i]); 1639 patternHasGlob |= filters[i].hasPattern(); 1640 } 1641 if (!patternHasGlob) { 1642 baseDir = unquotePathComponent(filename); 1643 components = null; // short through to filter check 1644 } 1645 } 1646 1647 // seed the parent directory path, return if it doesn't exist 1648 try { 1649 matches.add(getFileStatus(new Path(baseDir))); 1650 } catch (FileNotFoundException e) { 1651 return patternHasGlob ? matches : null; 1652 } 1653 1654 // skip if there are no components other than the basedir 1655 if (components != null) { 1656 // iterate through each path component 1657 for (int i=level; (i < components.length) && !matches.isEmpty(); i++) { 1658 List<FileStatus> children = new ArrayList<FileStatus>(); 1659 for (FileStatus match : matches) { 1660 // don't look for children in a file matched by a glob 1661 if (!match.isDirectory()) { 1662 continue; 1663 } 1664 try { 1665 if (filters[i].hasPattern()) { 1666 // get all children matching the filter 1667 FileStatus[] statuses = listStatus(match.getPath(), filters[i]); 1668 children.addAll(Arrays.asList(statuses)); 1669 } else { 1670 // the component does not have a pattern 1671 String component = unquotePathComponent(components[i]); 1672 Path child = new Path(match.getPath(), component); 1673 children.add(getFileStatus(child)); 1674 } 1675 } catch (FileNotFoundException e) { 1676 // don't care 1677 } 1678 } 1679 matches = children; 1680 } 1681 } 1682 // remove anything that didn't match the filter 1683 if (!matches.isEmpty()) { 1684 Iterator<FileStatus> iter = matches.iterator(); 1685 while (iter.hasNext()) { 1686 if (!filter.accept(iter.next().getPath())) { 1687 iter.remove(); 1688 } 1689 } 1690 } 1691 // no final paths, if there were any globs return empty list 1692 if (matches.isEmpty()) { 1693 return patternHasGlob ? matches : null; 1694 } 1695 Collections.sort(matches); 1696 return matches; 1697 } 1698 1699 /** 1700 * The glob filter builds a regexp per path component. If the component 1701 * does not contain a shell metachar, then it falls back to appending the 1702 * raw string to the list of built up paths. This raw path needs to have 1703 * the quoting removed. Ie. convert all occurances of "\X" to "X" 1704 * @param name of the path component 1705 * @return the unquoted path component 1706 */ 1707 private String unquotePathComponent(String name) { 1708 return name.replaceAll("\\\\(.)", "$1"); 1709 } 1710 1711 /** 1712 * List the statuses of the files/directories in the given path if the path is 1713 * a directory. 1714 * Return the file's status and block locations If the path is a file. 1715 * 1716 * If a returned status is a file, it contains the file's block locations. 1717 * 1718 * @param f is the path 1719 * 1720 * @return an iterator that traverses statuses of the files/directories 1721 * in the given path 1722 * 1723 * @throws FileNotFoundException If <code>f</code> does not exist 1724 * @throws IOException If an I/O error occurred 1725 */ 1726 public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f) 1727 throws FileNotFoundException, IOException { 1728 return listLocatedStatus(f, DEFAULT_FILTER); 1729 } 1730 1731 /** 1732 * Listing a directory 1733 * The returned results include its block location if it is a file 1734 * The results are filtered by the given path filter 1735 * @param f a path 1736 * @param filter a path filter 1737 * @return an iterator that traverses statuses of the files/directories 1738 * in the given path 1739 * @throws FileNotFoundException if <code>f</code> does not exist 1740 * @throws IOException if any I/O error occurred 1741 */ 1742 protected RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f, 1743 final PathFilter filter) 1744 throws FileNotFoundException, IOException { 1745 return new RemoteIterator<LocatedFileStatus>() { 1746 private final FileStatus[] stats = listStatus(f, filter); 1747 private int i = 0; 1748 1749 @Override 1750 public boolean hasNext() { 1751 return i<stats.length; 1752 } 1753 1754 @Override 1755 public LocatedFileStatus next() throws IOException { 1756 if (!hasNext()) { 1757 throw new NoSuchElementException("No more entry in " + f); 1758 } 1759 FileStatus result = stats[i++]; 1760 BlockLocation[] locs = result.isFile() ? 1761 getFileBlockLocations(result.getPath(), 0, result.getLen()) : 1762 null; 1763 return new LocatedFileStatus(result, locs); 1764 } 1765 }; 1766 } 1767 1768 /** 1769 * List the statuses and block locations of the files in the given path. 1770 * 1771 * If the path is a directory, 1772 * if recursive is false, returns files in the directory; 1773 * if recursive is true, return files in the subtree rooted at the path. 1774 * If the path is a file, return the file's status and block locations. 1775 * 1776 * @param f is the path 1777 * @param recursive if the subdirectories need to be traversed recursively 1778 * 1779 * @return an iterator that traverses statuses of the files 1780 * 1781 * @throws FileNotFoundException when the path does not exist; 1782 * IOException see specific implementation 1783 */ 1784 public RemoteIterator<LocatedFileStatus> listFiles( 1785 final Path f, final boolean recursive) 1786 throws FileNotFoundException, IOException { 1787 return new RemoteIterator<LocatedFileStatus>() { 1788 private Stack<RemoteIterator<LocatedFileStatus>> itors = 1789 new Stack<RemoteIterator<LocatedFileStatus>>(); 1790 private RemoteIterator<LocatedFileStatus> curItor = 1791 listLocatedStatus(f); 1792 private LocatedFileStatus curFile; 1793 1794 @Override 1795 public boolean hasNext() throws IOException { 1796 while (curFile == null) { 1797 if (curItor.hasNext()) { 1798 handleFileStat(curItor.next()); 1799 } else if (!itors.empty()) { 1800 curItor = itors.pop(); 1801 } else { 1802 return false; 1803 } 1804 } 1805 return true; 1806 } 1807 1808 /** 1809 * Process the input stat. 1810 * If it is a file, return the file stat. 1811 * If it is a directory, traverse the directory if recursive is true; 1812 * ignore it if recursive is false. 1813 * @param stat input status 1814 * @throws IOException if any IO error occurs 1815 */ 1816 private void handleFileStat(LocatedFileStatus stat) throws IOException { 1817 if (stat.isFile()) { // file 1818 curFile = stat; 1819 } else if (recursive) { // directory 1820 itors.push(curItor); 1821 curItor = listLocatedStatus(stat.getPath()); 1822 } 1823 } 1824 1825 @Override 1826 public LocatedFileStatus next() throws IOException { 1827 if (hasNext()) { 1828 LocatedFileStatus result = curFile; 1829 curFile = null; 1830 return result; 1831 } 1832 throw new java.util.NoSuchElementException("No more entry in " + f); 1833 } 1834 }; 1835 } 1836 1837 /** Return the current user's home directory in this filesystem. 1838 * The default implementation returns "/user/$USER/". 1839 */ 1840 public Path getHomeDirectory() { 1841 return this.makeQualified( 1842 new Path("/user/"+System.getProperty("user.name"))); 1843 } 1844 1845 1846 /** 1847 * Set the current working directory for the given file system. All relative 1848 * paths will be resolved relative to it. 1849 * 1850 * @param new_dir 1851 */ 1852 public abstract void setWorkingDirectory(Path new_dir); 1853 1854 /** 1855 * Get the current working directory for the given file system 1856 * @return the directory pathname 1857 */ 1858 public abstract Path getWorkingDirectory(); 1859 1860 1861 /** 1862 * Note: with the new FilesContext class, getWorkingDirectory() 1863 * will be removed. 1864 * The working directory is implemented in FilesContext. 1865 * 1866 * Some file systems like LocalFileSystem have an initial workingDir 1867 * that we use as the starting workingDir. For other file systems 1868 * like HDFS there is no built in notion of an initial workingDir. 1869 * 1870 * @return if there is built in notion of workingDir then it 1871 * is returned; else a null is returned. 1872 */ 1873 protected Path getInitialWorkingDirectory() { 1874 return null; 1875 } 1876 1877 /** 1878 * Call {@link #mkdirs(Path, FsPermission)} with default permission. 1879 */ 1880 public boolean mkdirs(Path f) throws IOException { 1881 return mkdirs(f, FsPermission.getDirDefault()); 1882 } 1883 1884 /** 1885 * Make the given file and all non-existent parents into 1886 * directories. Has the semantics of Unix 'mkdir -p'. 1887 * Existence of the directory hierarchy is not an error. 1888 * @param f path to create 1889 * @param permission to apply to f 1890 */ 1891 public abstract boolean mkdirs(Path f, FsPermission permission 1892 ) throws IOException; 1893 1894 /** 1895 * The src file is on the local disk. Add it to FS at 1896 * the given dst name and the source is kept intact afterwards 1897 * @param src path 1898 * @param dst path 1899 */ 1900 public void copyFromLocalFile(Path src, Path dst) 1901 throws IOException { 1902 copyFromLocalFile(false, src, dst); 1903 } 1904 1905 /** 1906 * The src files is on the local disk. Add it to FS at 1907 * the given dst name, removing the source afterwards. 1908 * @param srcs path 1909 * @param dst path 1910 */ 1911 public void moveFromLocalFile(Path[] srcs, Path dst) 1912 throws IOException { 1913 copyFromLocalFile(true, true, srcs, dst); 1914 } 1915 1916 /** 1917 * The src file is on the local disk. Add it to FS at 1918 * the given dst name, removing the source afterwards. 1919 * @param src path 1920 * @param dst path 1921 */ 1922 public void moveFromLocalFile(Path src, Path dst) 1923 throws IOException { 1924 copyFromLocalFile(true, src, dst); 1925 } 1926 1927 /** 1928 * The src file is on the local disk. Add it to FS at 1929 * the given dst name. 1930 * delSrc indicates if the source should be removed 1931 * @param delSrc whether to delete the src 1932 * @param src path 1933 * @param dst path 1934 */ 1935 public void copyFromLocalFile(boolean delSrc, Path src, Path dst) 1936 throws IOException { 1937 copyFromLocalFile(delSrc, true, src, dst); 1938 } 1939 1940 /** 1941 * The src files are on the local disk. Add it to FS at 1942 * the given dst name. 1943 * delSrc indicates if the source should be removed 1944 * @param delSrc whether to delete the src 1945 * @param overwrite whether to overwrite an existing file 1946 * @param srcs array of paths which are source 1947 * @param dst path 1948 */ 1949 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 1950 Path[] srcs, Path dst) 1951 throws IOException { 1952 Configuration conf = getConf(); 1953 FileUtil.copy(getLocal(conf), srcs, this, dst, delSrc, overwrite, conf); 1954 } 1955 1956 /** 1957 * The src file is on the local disk. Add it to FS at 1958 * the given dst name. 1959 * delSrc indicates if the source should be removed 1960 * @param delSrc whether to delete the src 1961 * @param overwrite whether to overwrite an existing file 1962 * @param src path 1963 * @param dst path 1964 */ 1965 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 1966 Path src, Path dst) 1967 throws IOException { 1968 Configuration conf = getConf(); 1969 FileUtil.copy(getLocal(conf), src, this, dst, delSrc, overwrite, conf); 1970 } 1971 1972 /** 1973 * The src file is under FS, and the dst is on the local disk. 1974 * Copy it from FS control to the local dst name. 1975 * @param src path 1976 * @param dst path 1977 */ 1978 public void copyToLocalFile(Path src, Path dst) throws IOException { 1979 copyToLocalFile(false, src, dst); 1980 } 1981 1982 /** 1983 * The src file is under FS, and the dst is on the local disk. 1984 * Copy it from FS control to the local dst name. 1985 * Remove the source afterwards 1986 * @param src path 1987 * @param dst path 1988 */ 1989 public void moveToLocalFile(Path src, Path dst) throws IOException { 1990 copyToLocalFile(true, src, dst); 1991 } 1992 1993 /** 1994 * The src file is under FS, and the dst is on the local disk. 1995 * Copy it from FS control to the local dst name. 1996 * delSrc indicates if the src will be removed or not. 1997 * @param delSrc whether to delete the src 1998 * @param src path 1999 * @param dst path 2000 */ 2001 public void copyToLocalFile(boolean delSrc, Path src, Path dst) 2002 throws IOException { 2003 copyToLocalFile(delSrc, src, dst, false); 2004 } 2005 2006 /** 2007 * The src file is under FS, and the dst is on the local disk. Copy it from FS 2008 * control to the local dst name. delSrc indicates if the src will be removed 2009 * or not. useRawLocalFileSystem indicates whether to use RawLocalFileSystem 2010 * as local file system or not. RawLocalFileSystem is non crc file system.So, 2011 * It will not create any crc files at local. 2012 * 2013 * @param delSrc 2014 * whether to delete the src 2015 * @param src 2016 * path 2017 * @param dst 2018 * path 2019 * @param useRawLocalFileSystem 2020 * whether to use RawLocalFileSystem as local file system or not. 2021 * 2022 * @throws IOException 2023 * - if any IO error 2024 */ 2025 public void copyToLocalFile(boolean delSrc, Path src, Path dst, 2026 boolean useRawLocalFileSystem) throws IOException { 2027 Configuration conf = getConf(); 2028 FileSystem local = null; 2029 if (useRawLocalFileSystem) { 2030 local = getLocal(conf).getRawFileSystem(); 2031 } else { 2032 local = getLocal(conf); 2033 } 2034 FileUtil.copy(this, src, local, dst, delSrc, conf); 2035 } 2036 2037 /** 2038 * Returns a local File that the user can write output to. The caller 2039 * provides both the eventual FS target name and the local working 2040 * file. If the FS is local, we write directly into the target. If 2041 * the FS is remote, we write into the tmp local area. 2042 * @param fsOutputFile path of output file 2043 * @param tmpLocalFile path of local tmp file 2044 */ 2045 public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 2046 throws IOException { 2047 return tmpLocalFile; 2048 } 2049 2050 /** 2051 * Called when we're all done writing to the target. A local FS will 2052 * do nothing, because we've written to exactly the right place. A remote 2053 * FS will copy the contents of tmpLocalFile to the correct target at 2054 * fsOutputFile. 2055 * @param fsOutputFile path of output file 2056 * @param tmpLocalFile path to local tmp file 2057 */ 2058 public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 2059 throws IOException { 2060 moveFromLocalFile(tmpLocalFile, fsOutputFile); 2061 } 2062 2063 /** 2064 * No more filesystem operations are needed. Will 2065 * release any held locks. 2066 */ 2067 @Override 2068 public void close() throws IOException { 2069 // delete all files that were marked as delete-on-exit. 2070 processDeleteOnExit(); 2071 CACHE.remove(this.key, this); 2072 } 2073 2074 /** Return the total size of all files in the filesystem.*/ 2075 public long getUsed() throws IOException{ 2076 long used = 0; 2077 FileStatus[] files = listStatus(new Path("/")); 2078 for(FileStatus file:files){ 2079 used += file.getLen(); 2080 } 2081 return used; 2082 } 2083 2084 /** 2085 * Get the block size for a particular file. 2086 * @param f the filename 2087 * @return the number of bytes in a block 2088 */ 2089 /** @deprecated Use getFileStatus() instead */ 2090 @Deprecated 2091 public long getBlockSize(Path f) throws IOException { 2092 return getFileStatus(f).getBlockSize(); 2093 } 2094 2095 /** 2096 * Return the number of bytes that large input files should be optimally 2097 * be split into to minimize i/o time. 2098 * @deprecated use {@link #getDefaultBlockSize(Path)} instead 2099 */ 2100 @Deprecated 2101 public long getDefaultBlockSize() { 2102 // default to 32MB: large enough to minimize the impact of seeks 2103 return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024); 2104 } 2105 2106 /** Return the number of bytes that large input files should be optimally 2107 * be split into to minimize i/o time. The given path will be used to 2108 * locate the actual filesystem. The full path does not have to exist. 2109 * @param f path of file 2110 * @return the default block size for the path's filesystem 2111 */ 2112 public long getDefaultBlockSize(Path f) { 2113 return getDefaultBlockSize(); 2114 } 2115 2116 /** 2117 * Get the default replication. 2118 * @deprecated use {@link #getDefaultReplication(Path)} instead 2119 */ 2120 @Deprecated 2121 public short getDefaultReplication() { return 1; } 2122 2123 /** 2124 * Get the default replication for a path. The given path will be used to 2125 * locate the actual filesystem. The full path does not have to exist. 2126 * @param path of the file 2127 * @return default replication for the path's filesystem 2128 */ 2129 public short getDefaultReplication(Path path) { 2130 return getDefaultReplication(); 2131 } 2132 2133 /** 2134 * Return a file status object that represents the path. 2135 * @param f The path we want information from 2136 * @return a FileStatus object 2137 * @throws FileNotFoundException when the path does not exist; 2138 * IOException see specific implementation 2139 */ 2140 public abstract FileStatus getFileStatus(Path f) throws IOException; 2141 2142 /** 2143 * Get the checksum of a file. 2144 * 2145 * @param f The file path 2146 * @return The file checksum. The default return value is null, 2147 * which indicates that no checksum algorithm is implemented 2148 * in the corresponding FileSystem. 2149 */ 2150 public FileChecksum getFileChecksum(Path f) throws IOException { 2151 return null; 2152 } 2153 2154 /** 2155 * Set the verify checksum flag. This is only applicable if the 2156 * corresponding FileSystem supports checksum. By default doesn't do anything. 2157 * @param verifyChecksum 2158 */ 2159 public void setVerifyChecksum(boolean verifyChecksum) { 2160 //doesn't do anything 2161 } 2162 2163 /** 2164 * Set the write checksum flag. This is only applicable if the 2165 * corresponding FileSystem supports checksum. By default doesn't do anything. 2166 * @param writeChecksum 2167 */ 2168 public void setWriteChecksum(boolean writeChecksum) { 2169 //doesn't do anything 2170 } 2171 2172 /** 2173 * Returns a status object describing the use and capacity of the 2174 * file system. If the file system has multiple partitions, the 2175 * use and capacity of the root partition is reflected. 2176 * 2177 * @return a FsStatus object 2178 * @throws IOException 2179 * see specific implementation 2180 */ 2181 public FsStatus getStatus() throws IOException { 2182 return getStatus(null); 2183 } 2184 2185 /** 2186 * Returns a status object describing the use and capacity of the 2187 * file system. If the file system has multiple partitions, the 2188 * use and capacity of the partition pointed to by the specified 2189 * path is reflected. 2190 * @param p Path for which status should be obtained. null means 2191 * the default partition. 2192 * @return a FsStatus object 2193 * @throws IOException 2194 * see specific implementation 2195 */ 2196 public FsStatus getStatus(Path p) throws IOException { 2197 return new FsStatus(Long.MAX_VALUE, 0, Long.MAX_VALUE); 2198 } 2199 2200 /** 2201 * Set permission of a path. 2202 * @param p 2203 * @param permission 2204 */ 2205 public void setPermission(Path p, FsPermission permission 2206 ) throws IOException { 2207 } 2208 2209 /** 2210 * Set owner of a path (i.e. a file or a directory). 2211 * The parameters username and groupname cannot both be null. 2212 * @param p The path 2213 * @param username If it is null, the original username remains unchanged. 2214 * @param groupname If it is null, the original groupname remains unchanged. 2215 */ 2216 public void setOwner(Path p, String username, String groupname 2217 ) throws IOException { 2218 } 2219 2220 /** 2221 * Set access time of a file 2222 * @param p The path 2223 * @param mtime Set the modification time of this file. 2224 * The number of milliseconds since Jan 1, 1970. 2225 * A value of -1 means that this call should not set modification time. 2226 * @param atime Set the access time of this file. 2227 * The number of milliseconds since Jan 1, 1970. 2228 * A value of -1 means that this call should not set access time. 2229 */ 2230 public void setTimes(Path p, long mtime, long atime 2231 ) throws IOException { 2232 } 2233 2234 // making it volatile to be able to do a double checked locking 2235 private volatile static boolean FILE_SYSTEMS_LOADED = false; 2236 2237 private static final Map<String, Class<? extends FileSystem>> 2238 SERVICE_FILE_SYSTEMS = new HashMap<String, Class<? extends FileSystem>>(); 2239 2240 private static void loadFileSystems() { 2241 synchronized (FileSystem.class) { 2242 if (!FILE_SYSTEMS_LOADED) { 2243 ServiceLoader<FileSystem> serviceLoader = ServiceLoader.load(FileSystem.class); 2244 for (FileSystem fs : serviceLoader) { 2245 SERVICE_FILE_SYSTEMS.put(fs.getScheme(), fs.getClass()); 2246 } 2247 FILE_SYSTEMS_LOADED = true; 2248 } 2249 } 2250 } 2251 2252 public static Class<? extends FileSystem> getFileSystemClass(String scheme, 2253 Configuration conf) throws IOException { 2254 if (!FILE_SYSTEMS_LOADED) { 2255 loadFileSystems(); 2256 } 2257 Class<? extends FileSystem> clazz = null; 2258 if (conf != null) { 2259 clazz = (Class<? extends FileSystem>) conf.getClass("fs." + scheme + ".impl", null); 2260 } 2261 if (clazz == null) { 2262 clazz = SERVICE_FILE_SYSTEMS.get(scheme); 2263 } 2264 if (clazz == null) { 2265 throw new IOException("No FileSystem for scheme: " + scheme); 2266 } 2267 return clazz; 2268 } 2269 2270 private static FileSystem createFileSystem(URI uri, Configuration conf 2271 ) throws IOException { 2272 Class<?> clazz = getFileSystemClass(uri.getScheme(), conf); 2273 if (clazz == null) { 2274 throw new IOException("No FileSystem for scheme: " + uri.getScheme()); 2275 } 2276 FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf); 2277 fs.initialize(uri, conf); 2278 return fs; 2279 } 2280 2281 /** Caching FileSystem objects */ 2282 static class Cache { 2283 private final ClientFinalizer clientFinalizer = new ClientFinalizer(); 2284 2285 private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>(); 2286 private final Set<Key> toAutoClose = new HashSet<Key>(); 2287 2288 /** A variable that makes all objects in the cache unique */ 2289 private static AtomicLong unique = new AtomicLong(1); 2290 2291 FileSystem get(URI uri, Configuration conf) throws IOException{ 2292 Key key = new Key(uri, conf); 2293 return getInternal(uri, conf, key); 2294 } 2295 2296 /** The objects inserted into the cache using this method are all unique */ 2297 FileSystem getUnique(URI uri, Configuration conf) throws IOException{ 2298 Key key = new Key(uri, conf, unique.getAndIncrement()); 2299 return getInternal(uri, conf, key); 2300 } 2301 2302 private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{ 2303 FileSystem fs; 2304 synchronized (this) { 2305 fs = map.get(key); 2306 } 2307 if (fs != null) { 2308 return fs; 2309 } 2310 2311 fs = createFileSystem(uri, conf); 2312 synchronized (this) { // refetch the lock again 2313 FileSystem oldfs = map.get(key); 2314 if (oldfs != null) { // a file system is created while lock is releasing 2315 fs.close(); // close the new file system 2316 return oldfs; // return the old file system 2317 } 2318 2319 // now insert the new file system into the map 2320 if (map.isEmpty() ) { 2321 ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY); 2322 } 2323 fs.key = key; 2324 map.put(key, fs); 2325 if (conf.getBoolean("fs.automatic.close", true)) { 2326 toAutoClose.add(key); 2327 } 2328 return fs; 2329 } 2330 } 2331 2332 synchronized void remove(Key key, FileSystem fs) { 2333 if (map.containsKey(key) && fs == map.get(key)) { 2334 map.remove(key); 2335 toAutoClose.remove(key); 2336 } 2337 } 2338 2339 synchronized void closeAll() throws IOException { 2340 closeAll(false); 2341 } 2342 2343 /** 2344 * Close all FileSystem instances in the Cache. 2345 * @param onlyAutomatic only close those that are marked for automatic closing 2346 */ 2347 synchronized void closeAll(boolean onlyAutomatic) throws IOException { 2348 List<IOException> exceptions = new ArrayList<IOException>(); 2349 2350 // Make a copy of the keys in the map since we'll be modifying 2351 // the map while iterating over it, which isn't safe. 2352 List<Key> keys = new ArrayList<Key>(); 2353 keys.addAll(map.keySet()); 2354 2355 for (Key key : keys) { 2356 final FileSystem fs = map.get(key); 2357 2358 if (onlyAutomatic && !toAutoClose.contains(key)) { 2359 continue; 2360 } 2361 2362 //remove from cache 2363 remove(key, fs); 2364 2365 if (fs != null) { 2366 try { 2367 fs.close(); 2368 } 2369 catch(IOException ioe) { 2370 exceptions.add(ioe); 2371 } 2372 } 2373 } 2374 2375 if (!exceptions.isEmpty()) { 2376 throw MultipleIOException.createIOException(exceptions); 2377 } 2378 } 2379 2380 private class ClientFinalizer implements Runnable { 2381 @Override 2382 public synchronized void run() { 2383 try { 2384 closeAll(true); 2385 } catch (IOException e) { 2386 LOG.info("FileSystem.Cache.closeAll() threw an exception:\n" + e); 2387 } 2388 } 2389 } 2390 2391 synchronized void closeAll(UserGroupInformation ugi) throws IOException { 2392 List<FileSystem> targetFSList = new ArrayList<FileSystem>(); 2393 //Make a pass over the list and collect the filesystems to close 2394 //we cannot close inline since close() removes the entry from the Map 2395 for (Map.Entry<Key, FileSystem> entry : map.entrySet()) { 2396 final Key key = entry.getKey(); 2397 final FileSystem fs = entry.getValue(); 2398 if (ugi.equals(key.ugi) && fs != null) { 2399 targetFSList.add(fs); 2400 } 2401 } 2402 List<IOException> exceptions = new ArrayList<IOException>(); 2403 //now make a pass over the target list and close each 2404 for (FileSystem fs : targetFSList) { 2405 try { 2406 fs.close(); 2407 } 2408 catch(IOException ioe) { 2409 exceptions.add(ioe); 2410 } 2411 } 2412 if (!exceptions.isEmpty()) { 2413 throw MultipleIOException.createIOException(exceptions); 2414 } 2415 } 2416 2417 /** FileSystem.Cache.Key */ 2418 static class Key { 2419 final String scheme; 2420 final String authority; 2421 final UserGroupInformation ugi; 2422 final long unique; // an artificial way to make a key unique 2423 2424 Key(URI uri, Configuration conf) throws IOException { 2425 this(uri, conf, 0); 2426 } 2427 2428 Key(URI uri, Configuration conf, long unique) throws IOException { 2429 scheme = uri.getScheme()==null?"":uri.getScheme().toLowerCase(); 2430 authority = uri.getAuthority()==null?"":uri.getAuthority().toLowerCase(); 2431 this.unique = unique; 2432 2433 this.ugi = UserGroupInformation.getCurrentUser(); 2434 } 2435 2436 @Override 2437 public int hashCode() { 2438 return (scheme + authority).hashCode() + ugi.hashCode() + (int)unique; 2439 } 2440 2441 static boolean isEqual(Object a, Object b) { 2442 return a == b || (a != null && a.equals(b)); 2443 } 2444 2445 @Override 2446 public boolean equals(Object obj) { 2447 if (obj == this) { 2448 return true; 2449 } 2450 if (obj != null && obj instanceof Key) { 2451 Key that = (Key)obj; 2452 return isEqual(this.scheme, that.scheme) 2453 && isEqual(this.authority, that.authority) 2454 && isEqual(this.ugi, that.ugi) 2455 && (this.unique == that.unique); 2456 } 2457 return false; 2458 } 2459 2460 @Override 2461 public String toString() { 2462 return "("+ugi.toString() + ")@" + scheme + "://" + authority; 2463 } 2464 } 2465 } 2466 2467 public static final class Statistics { 2468 private final String scheme; 2469 private AtomicLong bytesRead = new AtomicLong(); 2470 private AtomicLong bytesWritten = new AtomicLong(); 2471 private AtomicInteger readOps = new AtomicInteger(); 2472 private AtomicInteger largeReadOps = new AtomicInteger(); 2473 private AtomicInteger writeOps = new AtomicInteger(); 2474 2475 public Statistics(String scheme) { 2476 this.scheme = scheme; 2477 } 2478 2479 /** 2480 * Copy constructor. 2481 * 2482 * @param st 2483 * The input Statistics object which is cloned. 2484 */ 2485 public Statistics(Statistics st) { 2486 this.scheme = st.scheme; 2487 this.bytesRead = new AtomicLong(st.bytesRead.longValue()); 2488 this.bytesWritten = new AtomicLong(st.bytesWritten.longValue()); 2489 } 2490 2491 /** 2492 * Increment the bytes read in the statistics 2493 * @param newBytes the additional bytes read 2494 */ 2495 public void incrementBytesRead(long newBytes) { 2496 bytesRead.getAndAdd(newBytes); 2497 } 2498 2499 /** 2500 * Increment the bytes written in the statistics 2501 * @param newBytes the additional bytes written 2502 */ 2503 public void incrementBytesWritten(long newBytes) { 2504 bytesWritten.getAndAdd(newBytes); 2505 } 2506 2507 /** 2508 * Increment the number of read operations 2509 * @param count number of read operations 2510 */ 2511 public void incrementReadOps(int count) { 2512 readOps.getAndAdd(count); 2513 } 2514 2515 /** 2516 * Increment the number of large read operations 2517 * @param count number of large read operations 2518 */ 2519 public void incrementLargeReadOps(int count) { 2520 largeReadOps.getAndAdd(count); 2521 } 2522 2523 /** 2524 * Increment the number of write operations 2525 * @param count number of write operations 2526 */ 2527 public void incrementWriteOps(int count) { 2528 writeOps.getAndAdd(count); 2529 } 2530 2531 /** 2532 * Get the total number of bytes read 2533 * @return the number of bytes 2534 */ 2535 public long getBytesRead() { 2536 return bytesRead.get(); 2537 } 2538 2539 /** 2540 * Get the total number of bytes written 2541 * @return the number of bytes 2542 */ 2543 public long getBytesWritten() { 2544 return bytesWritten.get(); 2545 } 2546 2547 /** 2548 * Get the number of file system read operations such as list files 2549 * @return number of read operations 2550 */ 2551 public int getReadOps() { 2552 return readOps.get() + largeReadOps.get(); 2553 } 2554 2555 /** 2556 * Get the number of large file system read operations such as list files 2557 * under a large directory 2558 * @return number of large read operations 2559 */ 2560 public int getLargeReadOps() { 2561 return largeReadOps.get(); 2562 } 2563 2564 /** 2565 * Get the number of file system write operations such as create, append 2566 * rename etc. 2567 * @return number of write operations 2568 */ 2569 public int getWriteOps() { 2570 return writeOps.get(); 2571 } 2572 2573 @Override 2574 public String toString() { 2575 return bytesRead + " bytes read, " + bytesWritten + " bytes written, " 2576 + readOps + " read ops, " + largeReadOps + " large read ops, " 2577 + writeOps + " write ops"; 2578 } 2579 2580 /** 2581 * Reset the counts of bytes to 0. 2582 */ 2583 public void reset() { 2584 bytesWritten.set(0); 2585 bytesRead.set(0); 2586 } 2587 2588 /** 2589 * Get the uri scheme associated with this statistics object. 2590 * @return the schema associated with this set of statistics 2591 */ 2592 public String getScheme() { 2593 return scheme; 2594 } 2595 } 2596 2597 /** 2598 * Get the Map of Statistics object indexed by URI Scheme. 2599 * @return a Map having a key as URI scheme and value as Statistics object 2600 * @deprecated use {@link #getAllStatistics} instead 2601 */ 2602 @Deprecated 2603 public static synchronized Map<String, Statistics> getStatistics() { 2604 Map<String, Statistics> result = new HashMap<String, Statistics>(); 2605 for(Statistics stat: statisticsTable.values()) { 2606 result.put(stat.getScheme(), stat); 2607 } 2608 return result; 2609 } 2610 2611 /** 2612 * Return the FileSystem classes that have Statistics 2613 */ 2614 public static synchronized List<Statistics> getAllStatistics() { 2615 return new ArrayList<Statistics>(statisticsTable.values()); 2616 } 2617 2618 /** 2619 * Get the statistics for a particular file system 2620 * @param cls the class to lookup 2621 * @return a statistics object 2622 */ 2623 public static synchronized 2624 Statistics getStatistics(String scheme, Class<? extends FileSystem> cls) { 2625 Statistics result = statisticsTable.get(cls); 2626 if (result == null) { 2627 result = new Statistics(scheme); 2628 statisticsTable.put(cls, result); 2629 } 2630 return result; 2631 } 2632 2633 /** 2634 * Reset all statistics for all file systems 2635 */ 2636 public static synchronized void clearStatistics() { 2637 for(Statistics stat: statisticsTable.values()) { 2638 stat.reset(); 2639 } 2640 } 2641 2642 /** 2643 * Print all statistics for all file systems 2644 */ 2645 public static synchronized 2646 void printStatistics() throws IOException { 2647 for (Map.Entry<Class<? extends FileSystem>, Statistics> pair: 2648 statisticsTable.entrySet()) { 2649 System.out.println(" FileSystem " + pair.getKey().getName() + 2650 ": " + pair.getValue()); 2651 } 2652 } 2653}