001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.fs; 019 020 import java.io.Closeable; 021 import java.io.FileNotFoundException; 022 import java.io.IOException; 023 import java.net.URI; 024 import java.security.PrivilegedExceptionAction; 025 import java.util.ArrayList; 026 import java.util.Arrays; 027 import java.util.EnumSet; 028 import java.util.HashMap; 029 import java.util.HashSet; 030 import java.util.IdentityHashMap; 031 import java.util.Iterator; 032 import java.util.List; 033 import java.util.Map; 034 import java.util.NoSuchElementException; 035 import java.util.Set; 036 import java.util.Stack; 037 import java.util.TreeSet; 038 import java.util.concurrent.atomic.AtomicInteger; 039 import java.util.concurrent.atomic.AtomicLong; 040 041 import org.apache.commons.logging.Log; 042 import org.apache.commons.logging.LogFactory; 043 import org.apache.hadoop.classification.InterfaceAudience; 044 import org.apache.hadoop.classification.InterfaceStability; 045 import org.apache.hadoop.conf.Configuration; 046 import org.apache.hadoop.conf.Configured; 047 import org.apache.hadoop.fs.Options.ChecksumOpt; 048 import org.apache.hadoop.fs.Options.Rename; 049 import org.apache.hadoop.fs.permission.FsPermission; 050 import org.apache.hadoop.io.MultipleIOException; 051 import org.apache.hadoop.io.Text; 052 import org.apache.hadoop.net.NetUtils; 053 import org.apache.hadoop.security.Credentials; 054 import org.apache.hadoop.security.SecurityUtil; 055 import org.apache.hadoop.security.UserGroupInformation; 056 import org.apache.hadoop.security.token.Token; 057 import org.apache.hadoop.util.DataChecksum; 058 import org.apache.hadoop.util.Progressable; 059 import org.apache.hadoop.util.ReflectionUtils; 060 import org.apache.hadoop.util.ShutdownHookManager; 061 062 import com.google.common.annotations.VisibleForTesting; 063 064 /**************************************************************** 065 * An abstract base class for a fairly generic filesystem. It 066 * may be implemented as a distributed filesystem, or as a "local" 067 * one that reflects the locally-connected disk. The local version 068 * exists for small Hadoop instances and for testing. 069 * 070 * <p> 071 * 072 * All user code that may potentially use the Hadoop Distributed 073 * File System should be written to use a FileSystem object. The 074 * Hadoop DFS is a multi-machine system that appears as a single 075 * disk. It's useful because of its fault tolerance and potentially 076 * very large capacity. 077 * 078 * <p> 079 * The local implementation is {@link LocalFileSystem} and distributed 080 * implementation is DistributedFileSystem. 081 *****************************************************************/ 082 @InterfaceAudience.Public 083 @InterfaceStability.Stable 084 public abstract class FileSystem extends Configured implements Closeable { 085 public static final String FS_DEFAULT_NAME_KEY = 086 CommonConfigurationKeys.FS_DEFAULT_NAME_KEY; 087 public static final String DEFAULT_FS = 088 CommonConfigurationKeys.FS_DEFAULT_NAME_DEFAULT; 089 090 public static final Log LOG = LogFactory.getLog(FileSystem.class); 091 092 /** 093 * Priority of the FileSystem shutdown hook. 094 */ 095 public static final int SHUTDOWN_HOOK_PRIORITY = 10; 096 097 /** FileSystem cache */ 098 static final Cache CACHE = new Cache(); 099 100 /** The key this instance is stored under in the cache. */ 101 private Cache.Key key; 102 103 /** Recording statistics per a FileSystem class */ 104 private static final Map<Class<? extends FileSystem>, Statistics> 105 statisticsTable = 106 new IdentityHashMap<Class<? extends FileSystem>, Statistics>(); 107 108 /** 109 * The statistics for this file system. 110 */ 111 protected Statistics statistics; 112 113 /** 114 * A cache of files that should be deleted when filsystem is closed 115 * or the JVM is exited. 116 */ 117 private Set<Path> deleteOnExit = new TreeSet<Path>(); 118 119 /** 120 * This method adds a file system for testing so that we can find it later. It 121 * is only for testing. 122 * @param uri the uri to store it under 123 * @param conf the configuration to store it under 124 * @param fs the file system to store 125 * @throws IOException 126 */ 127 static void addFileSystemForTesting(URI uri, Configuration conf, 128 FileSystem fs) throws IOException { 129 CACHE.map.put(new Cache.Key(uri, conf), fs); 130 } 131 132 /** 133 * Get a filesystem instance based on the uri, the passed 134 * configuration and the user 135 * @param uri of the filesystem 136 * @param conf the configuration to use 137 * @param user to perform the get as 138 * @return the filesystem instance 139 * @throws IOException 140 * @throws InterruptedException 141 */ 142 public static FileSystem get(final URI uri, final Configuration conf, 143 final String user) throws IOException, InterruptedException { 144 UserGroupInformation ugi; 145 if (user == null) { 146 ugi = UserGroupInformation.getCurrentUser(); 147 } else { 148 ugi = UserGroupInformation.createRemoteUser(user); 149 } 150 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 151 public FileSystem run() throws IOException { 152 return get(uri, conf); 153 } 154 }); 155 } 156 157 /** 158 * Returns the configured filesystem implementation. 159 * @param conf the configuration to use 160 */ 161 public static FileSystem get(Configuration conf) throws IOException { 162 return get(getDefaultUri(conf), conf); 163 } 164 165 /** Get the default filesystem URI from a configuration. 166 * @param conf the configuration to use 167 * @return the uri of the default filesystem 168 */ 169 public static URI getDefaultUri(Configuration conf) { 170 return URI.create(fixName(conf.get(FS_DEFAULT_NAME_KEY, DEFAULT_FS))); 171 } 172 173 /** Set the default filesystem URI in a configuration. 174 * @param conf the configuration to alter 175 * @param uri the new default filesystem uri 176 */ 177 public static void setDefaultUri(Configuration conf, URI uri) { 178 conf.set(FS_DEFAULT_NAME_KEY, uri.toString()); 179 } 180 181 /** Set the default filesystem URI in a configuration. 182 * @param conf the configuration to alter 183 * @param uri the new default filesystem uri 184 */ 185 public static void setDefaultUri(Configuration conf, String uri) { 186 setDefaultUri(conf, URI.create(fixName(uri))); 187 } 188 189 /** Called after a new FileSystem instance is constructed. 190 * @param name a uri whose authority section names the host, port, etc. 191 * for this FileSystem 192 * @param conf the configuration 193 */ 194 public void initialize(URI name, Configuration conf) throws IOException { 195 statistics = getStatistics(name.getScheme(), getClass()); 196 } 197 198 /** Returns a URI whose scheme and authority identify this FileSystem.*/ 199 public abstract URI getUri(); 200 201 /** 202 * Resolve the uri's hostname and add the default port if not in the uri 203 * @return URI 204 * @see NetUtils#getCanonicalUri(URI, int) 205 */ 206 protected URI getCanonicalUri() { 207 return NetUtils.getCanonicalUri(getUri(), getDefaultPort()); 208 } 209 210 /** 211 * Get the default port for this file system. 212 * @return the default port or 0 if there isn't one 213 */ 214 protected int getDefaultPort() { 215 return 0; 216 } 217 218 /** 219 * Get a canonical service name for this file system. The token cache is 220 * the only user of the canonical service name, and uses it to lookup this 221 * filesystem's service tokens. 222 * If file system provides a token of its own then it must have a canonical 223 * name, otherwise canonical name can be null. 224 * 225 * Default Impl: If the file system has child file systems 226 * (such as an embedded file system) then it is assumed that the fs has no 227 * tokens of its own and hence returns a null name; otherwise a service 228 * name is built using Uri and port. 229 * 230 * @return a service string that uniquely identifies this file system, null 231 * if the filesystem does not implement tokens 232 * @see SecurityUtil#buildDTServiceName(URI, int) 233 */ 234 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 235 public String getCanonicalServiceName() { 236 return (getChildFileSystems() == null) 237 ? SecurityUtil.buildDTServiceName(getUri(), getDefaultPort()) 238 : null; 239 } 240 241 /** @deprecated call #getUri() instead.*/ 242 @Deprecated 243 public String getName() { return getUri().toString(); } 244 245 /** @deprecated call #get(URI,Configuration) instead. */ 246 @Deprecated 247 public static FileSystem getNamed(String name, Configuration conf) 248 throws IOException { 249 return get(URI.create(fixName(name)), conf); 250 } 251 252 /** Update old-format filesystem names, for back-compatibility. This should 253 * eventually be replaced with a checkName() method that throws an exception 254 * for old-format names. */ 255 private static String fixName(String name) { 256 // convert old-format name to new-format name 257 if (name.equals("local")) { // "local" is now "file:///". 258 LOG.warn("\"local\" is a deprecated filesystem name." 259 +" Use \"file:///\" instead."); 260 name = "file:///"; 261 } else if (name.indexOf('/')==-1) { // unqualified is "hdfs://" 262 LOG.warn("\""+name+"\" is a deprecated filesystem name." 263 +" Use \"hdfs://"+name+"/\" instead."); 264 name = "hdfs://"+name; 265 } 266 return name; 267 } 268 269 /** 270 * Get the local file system. 271 * @param conf the configuration to configure the file system with 272 * @return a LocalFileSystem 273 */ 274 public static LocalFileSystem getLocal(Configuration conf) 275 throws IOException { 276 return (LocalFileSystem)get(LocalFileSystem.NAME, conf); 277 } 278 279 /** Returns the FileSystem for this URI's scheme and authority. The scheme 280 * of the URI determines a configuration property name, 281 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 282 * The entire URI is passed to the FileSystem instance's initialize method. 283 */ 284 public static FileSystem get(URI uri, Configuration conf) throws IOException { 285 String scheme = uri.getScheme(); 286 String authority = uri.getAuthority(); 287 288 if (scheme == null && authority == null) { // use default FS 289 return get(conf); 290 } 291 292 if (scheme != null && authority == null) { // no authority 293 URI defaultUri = getDefaultUri(conf); 294 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 295 && defaultUri.getAuthority() != null) { // & default has authority 296 return get(defaultUri, conf); // return default 297 } 298 } 299 300 String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme); 301 if (conf.getBoolean(disableCacheName, false)) { 302 return createFileSystem(uri, conf); 303 } 304 305 return CACHE.get(uri, conf); 306 } 307 308 /** 309 * Returns the FileSystem for this URI's scheme and authority and the 310 * passed user. Internally invokes {@link #newInstance(URI, Configuration)} 311 * @param uri of the filesystem 312 * @param conf the configuration to use 313 * @param user to perform the get as 314 * @return filesystem instance 315 * @throws IOException 316 * @throws InterruptedException 317 */ 318 public static FileSystem newInstance(final URI uri, final Configuration conf, 319 final String user) throws IOException, InterruptedException { 320 UserGroupInformation ugi; 321 if (user == null) { 322 ugi = UserGroupInformation.getCurrentUser(); 323 } else { 324 ugi = UserGroupInformation.createRemoteUser(user); 325 } 326 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 327 public FileSystem run() throws IOException { 328 return newInstance(uri,conf); 329 } 330 }); 331 } 332 /** Returns the FileSystem for this URI's scheme and authority. The scheme 333 * of the URI determines a configuration property name, 334 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 335 * The entire URI is passed to the FileSystem instance's initialize method. 336 * This always returns a new FileSystem object. 337 */ 338 public static FileSystem newInstance(URI uri, Configuration conf) throws IOException { 339 String scheme = uri.getScheme(); 340 String authority = uri.getAuthority(); 341 342 if (scheme == null) { // no scheme: use default FS 343 return newInstance(conf); 344 } 345 346 if (authority == null) { // no authority 347 URI defaultUri = getDefaultUri(conf); 348 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 349 && defaultUri.getAuthority() != null) { // & default has authority 350 return newInstance(defaultUri, conf); // return default 351 } 352 } 353 return CACHE.getUnique(uri, conf); 354 } 355 356 /** Returns a unique configured filesystem implementation. 357 * This always returns a new FileSystem object. 358 * @param conf the configuration to use 359 */ 360 public static FileSystem newInstance(Configuration conf) throws IOException { 361 return newInstance(getDefaultUri(conf), conf); 362 } 363 364 /** 365 * Get a unique local file system object 366 * @param conf the configuration to configure the file system with 367 * @return a LocalFileSystem 368 * This always returns a new FileSystem object. 369 */ 370 public static LocalFileSystem newInstanceLocal(Configuration conf) 371 throws IOException { 372 return (LocalFileSystem)newInstance(LocalFileSystem.NAME, conf); 373 } 374 375 /** 376 * Close all cached filesystems. Be sure those filesystems are not 377 * used anymore. 378 * 379 * @throws IOException 380 */ 381 public static void closeAll() throws IOException { 382 CACHE.closeAll(); 383 } 384 385 /** 386 * Close all cached filesystems for a given UGI. Be sure those filesystems 387 * are not used anymore. 388 * @param ugi user group info to close 389 * @throws IOException 390 */ 391 public static void closeAllForUGI(UserGroupInformation ugi) 392 throws IOException { 393 CACHE.closeAll(ugi); 394 } 395 396 /** 397 * Make sure that a path specifies a FileSystem. 398 * @param path to use 399 */ 400 public Path makeQualified(Path path) { 401 checkPath(path); 402 return path.makeQualified(this.getUri(), this.getWorkingDirectory()); 403 } 404 405 /** 406 * Get a new delegation token for this file system. 407 * This is an internal method that should have been declared protected 408 * but wasn't historically. 409 * Callers should use {@link #addDelegationTokens(String, Credentials)} 410 * 411 * @param renewer the account name that is allowed to renew the token. 412 * @return a new delegation token 413 * @throws IOException 414 */ 415 @InterfaceAudience.Private() 416 public Token<?> getDelegationToken(String renewer) throws IOException { 417 return null; 418 } 419 420 /** 421 * Obtain all delegation tokens used by this FileSystem that are not 422 * already present in the given Credentials. Existing tokens will neither 423 * be verified as valid nor having the given renewer. Missing tokens will 424 * be acquired and added to the given Credentials. 425 * 426 * Default Impl: works for simple fs with its own token 427 * and also for an embedded fs whose tokens are those of its 428 * children file system (i.e. the embedded fs has not tokens of its 429 * own). 430 * 431 * @param renewer the user allowed to renew the delegation tokens 432 * @param credentials cache in which to add new delegation tokens 433 * @return list of new delegation tokens 434 * @throws IOException 435 */ 436 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 437 public Token<?>[] addDelegationTokens( 438 final String renewer, Credentials credentials) throws IOException { 439 if (credentials == null) { 440 credentials = new Credentials(); 441 } 442 final List<Token<?>> tokens = new ArrayList<Token<?>>(); 443 collectDelegationTokens(renewer, credentials, tokens); 444 return tokens.toArray(new Token<?>[tokens.size()]); 445 } 446 447 /** 448 * Recursively obtain the tokens for this FileSystem and all descended 449 * FileSystems as determined by getChildFileSystems(). 450 * @param renewer the user allowed to renew the delegation tokens 451 * @param credentials cache in which to add the new delegation tokens 452 * @param tokens list in which to add acquired tokens 453 * @throws IOException 454 */ 455 private void collectDelegationTokens(final String renewer, 456 final Credentials credentials, 457 final List<Token<?>> tokens) 458 throws IOException { 459 final String serviceName = getCanonicalServiceName(); 460 // Collect token of the this filesystem and then of its embedded children 461 if (serviceName != null) { // fs has token, grab it 462 final Text service = new Text(serviceName); 463 Token<?> token = credentials.getToken(service); 464 if (token == null) { 465 token = getDelegationToken(renewer); 466 if (token != null) { 467 tokens.add(token); 468 credentials.addToken(service, token); 469 } 470 } 471 } 472 // Now collect the tokens from the children 473 final FileSystem[] children = getChildFileSystems(); 474 if (children != null) { 475 for (final FileSystem fs : children) { 476 fs.collectDelegationTokens(renewer, credentials, tokens); 477 } 478 } 479 } 480 481 /** 482 * Get all the immediate child FileSystems embedded in this FileSystem. 483 * It does not recurse and get grand children. If a FileSystem 484 * has multiple child FileSystems, then it should return a unique list 485 * of those FileSystems. Default is to return null to signify no children. 486 * 487 * @return FileSystems used by this FileSystem 488 */ 489 @InterfaceAudience.LimitedPrivate({ "HDFS" }) 490 @VisibleForTesting 491 public FileSystem[] getChildFileSystems() { 492 return null; 493 } 494 495 /** create a file with the provided permission 496 * The permission of the file is set to be the provided permission as in 497 * setPermission, not permission&~umask 498 * 499 * It is implemented using two RPCs. It is understood that it is inefficient, 500 * but the implementation is thread-safe. The other option is to change the 501 * value of umask in configuration to be 0, but it is not thread-safe. 502 * 503 * @param fs file system handle 504 * @param file the name of the file to be created 505 * @param permission the permission of the file 506 * @return an output stream 507 * @throws IOException 508 */ 509 public static FSDataOutputStream create(FileSystem fs, 510 Path file, FsPermission permission) throws IOException { 511 // create the file with default permission 512 FSDataOutputStream out = fs.create(file); 513 // set its permission to the supplied one 514 fs.setPermission(file, permission); 515 return out; 516 } 517 518 /** create a directory with the provided permission 519 * The permission of the directory is set to be the provided permission as in 520 * setPermission, not permission&~umask 521 * 522 * @see #create(FileSystem, Path, FsPermission) 523 * 524 * @param fs file system handle 525 * @param dir the name of the directory to be created 526 * @param permission the permission of the directory 527 * @return true if the directory creation succeeds; false otherwise 528 * @throws IOException 529 */ 530 public static boolean mkdirs(FileSystem fs, Path dir, FsPermission permission) 531 throws IOException { 532 // create the directory using the default permission 533 boolean result = fs.mkdirs(dir); 534 // set its permission to be the supplied one 535 fs.setPermission(dir, permission); 536 return result; 537 } 538 539 /////////////////////////////////////////////////////////////// 540 // FileSystem 541 /////////////////////////////////////////////////////////////// 542 543 protected FileSystem() { 544 super(null); 545 } 546 547 /** 548 * Check that a Path belongs to this FileSystem. 549 * @param path to check 550 */ 551 protected void checkPath(Path path) { 552 URI uri = path.toUri(); 553 String thatScheme = uri.getScheme(); 554 if (thatScheme == null) // fs is relative 555 return; 556 URI thisUri = getCanonicalUri(); 557 String thisScheme = thisUri.getScheme(); 558 //authority and scheme are not case sensitive 559 if (thisScheme.equalsIgnoreCase(thatScheme)) {// schemes match 560 String thisAuthority = thisUri.getAuthority(); 561 String thatAuthority = uri.getAuthority(); 562 if (thatAuthority == null && // path's authority is null 563 thisAuthority != null) { // fs has an authority 564 URI defaultUri = getDefaultUri(getConf()); 565 if (thisScheme.equalsIgnoreCase(defaultUri.getScheme())) { 566 uri = defaultUri; // schemes match, so use this uri instead 567 } else { 568 uri = null; // can't determine auth of the path 569 } 570 } 571 if (uri != null) { 572 // canonicalize uri before comparing with this fs 573 uri = NetUtils.getCanonicalUri(uri, getDefaultPort()); 574 thatAuthority = uri.getAuthority(); 575 if (thisAuthority == thatAuthority || // authorities match 576 (thisAuthority != null && 577 thisAuthority.equalsIgnoreCase(thatAuthority))) 578 return; 579 } 580 } 581 throw new IllegalArgumentException("Wrong FS: "+path+ 582 ", expected: "+this.getUri()); 583 } 584 585 /** 586 * Return an array containing hostnames, offset and size of 587 * portions of the given file. For a nonexistent 588 * file or regions, null will be returned. 589 * 590 * This call is most helpful with DFS, where it returns 591 * hostnames of machines that contain the given file. 592 * 593 * The FileSystem will simply return an elt containing 'localhost'. 594 * 595 * @param file FilesStatus to get data from 596 * @param start offset into the given file 597 * @param len length for which to get locations for 598 */ 599 public BlockLocation[] getFileBlockLocations(FileStatus file, 600 long start, long len) throws IOException { 601 if (file == null) { 602 return null; 603 } 604 605 if (start < 0 || len < 0) { 606 throw new IllegalArgumentException("Invalid start or len parameter"); 607 } 608 609 if (file.getLen() <= start) { 610 return new BlockLocation[0]; 611 612 } 613 String[] name = { "localhost:50010" }; 614 String[] host = { "localhost" }; 615 return new BlockLocation[] { 616 new BlockLocation(name, host, 0, file.getLen()) }; 617 } 618 619 620 /** 621 * Return an array containing hostnames, offset and size of 622 * portions of the given file. For a nonexistent 623 * file or regions, null will be returned. 624 * 625 * This call is most helpful with DFS, where it returns 626 * hostnames of machines that contain the given file. 627 * 628 * The FileSystem will simply return an elt containing 'localhost'. 629 * 630 * @param p path is used to identify an FS since an FS could have 631 * another FS that it could be delegating the call to 632 * @param start offset into the given file 633 * @param len length for which to get locations for 634 */ 635 public BlockLocation[] getFileBlockLocations(Path p, 636 long start, long len) throws IOException { 637 if (p == null) { 638 throw new NullPointerException(); 639 } 640 FileStatus file = getFileStatus(p); 641 return getFileBlockLocations(file, start, len); 642 } 643 644 /** 645 * Return a set of server default configuration values 646 * @return server default configuration values 647 * @throws IOException 648 */ 649 public FsServerDefaults getServerDefaults() throws IOException { 650 Configuration conf = getConf(); 651 // CRC32 is chosen as default as it is available in all 652 // releases that support checksum. 653 return new FsServerDefaults(getDefaultBlockSize(), 654 conf.getInt("io.bytes.per.checksum", 512), 655 64 * 1024, 656 getDefaultReplication(), 657 conf.getInt("io.file.buffer.size", 4096), 658 DataChecksum.Type.CRC32); 659 } 660 661 /** 662 * Return a set of server default configuration values 663 * @param p path is used to identify an FS since an FS could have 664 * another FS that it could be delegating the call to 665 * @return server default configuration values 666 * @throws IOException 667 */ 668 public FsServerDefaults getServerDefaults(Path p) throws IOException { 669 return getServerDefaults(); 670 } 671 672 /** 673 * Return the fully-qualified path of path f resolving the path 674 * through any symlinks or mount point 675 * @param p path to be resolved 676 * @return fully qualified path 677 * @throws FileNotFoundException 678 */ 679 public Path resolvePath(final Path p) throws IOException { 680 checkPath(p); 681 return getFileStatus(p).getPath(); 682 } 683 684 /** 685 * Opens an FSDataInputStream at the indicated Path. 686 * @param f the file name to open 687 * @param bufferSize the size of the buffer to be used. 688 */ 689 public abstract FSDataInputStream open(Path f, int bufferSize) 690 throws IOException; 691 692 /** 693 * Opens an FSDataInputStream at the indicated Path. 694 * @param f the file to open 695 */ 696 public FSDataInputStream open(Path f) throws IOException { 697 return open(f, getConf().getInt("io.file.buffer.size", 4096)); 698 } 699 700 /** 701 * Create an FSDataOutputStream at the indicated Path. 702 * Files are overwritten by default. 703 * @param f the file to create 704 */ 705 public FSDataOutputStream create(Path f) throws IOException { 706 return create(f, true); 707 } 708 709 /** 710 * Create an FSDataOutputStream at the indicated Path. 711 * @param f the file to create 712 * @param overwrite if a file with this name already exists, then if true, 713 * the file will be overwritten, and if false an exception will be thrown. 714 */ 715 public FSDataOutputStream create(Path f, boolean overwrite) 716 throws IOException { 717 return create(f, overwrite, 718 getConf().getInt("io.file.buffer.size", 4096), 719 getDefaultReplication(f), 720 getDefaultBlockSize(f)); 721 } 722 723 /** 724 * Create an FSDataOutputStream at the indicated Path with write-progress 725 * reporting. 726 * Files are overwritten by default. 727 * @param f the file to create 728 * @param progress to report progress 729 */ 730 public FSDataOutputStream create(Path f, Progressable progress) 731 throws IOException { 732 return create(f, true, 733 getConf().getInt("io.file.buffer.size", 4096), 734 getDefaultReplication(f), 735 getDefaultBlockSize(f), progress); 736 } 737 738 /** 739 * Create an FSDataOutputStream at the indicated Path. 740 * Files are overwritten by default. 741 * @param f the file to create 742 * @param replication the replication factor 743 */ 744 public FSDataOutputStream create(Path f, short replication) 745 throws IOException { 746 return create(f, true, 747 getConf().getInt("io.file.buffer.size", 4096), 748 replication, 749 getDefaultBlockSize(f)); 750 } 751 752 /** 753 * Create an FSDataOutputStream at the indicated Path with write-progress 754 * reporting. 755 * Files are overwritten by default. 756 * @param f the file to create 757 * @param replication the replication factor 758 * @param progress to report progress 759 */ 760 public FSDataOutputStream create(Path f, short replication, 761 Progressable progress) throws IOException { 762 return create(f, true, 763 getConf().getInt("io.file.buffer.size", 4096), 764 replication, 765 getDefaultBlockSize(f), progress); 766 } 767 768 769 /** 770 * Create an FSDataOutputStream at the indicated Path. 771 * @param f the file name to create 772 * @param overwrite if a file with this name already exists, then if true, 773 * the file will be overwritten, and if false an error will be thrown. 774 * @param bufferSize the size of the buffer to be used. 775 */ 776 public FSDataOutputStream create(Path f, 777 boolean overwrite, 778 int bufferSize 779 ) throws IOException { 780 return create(f, overwrite, bufferSize, 781 getDefaultReplication(f), 782 getDefaultBlockSize(f)); 783 } 784 785 /** 786 * Create an FSDataOutputStream at the indicated Path with write-progress 787 * reporting. 788 * @param f the path of the file to open 789 * @param overwrite if a file with this name already exists, then if true, 790 * the file will be overwritten, and if false an error will be thrown. 791 * @param bufferSize the size of the buffer to be used. 792 */ 793 public FSDataOutputStream create(Path f, 794 boolean overwrite, 795 int bufferSize, 796 Progressable progress 797 ) throws IOException { 798 return create(f, overwrite, bufferSize, 799 getDefaultReplication(f), 800 getDefaultBlockSize(f), progress); 801 } 802 803 804 /** 805 * Create an FSDataOutputStream at the indicated Path. 806 * @param f the file name to open 807 * @param overwrite if a file with this name already exists, then if true, 808 * the file will be overwritten, and if false an error will be thrown. 809 * @param bufferSize the size of the buffer to be used. 810 * @param replication required block replication for the file. 811 */ 812 public FSDataOutputStream create(Path f, 813 boolean overwrite, 814 int bufferSize, 815 short replication, 816 long blockSize 817 ) throws IOException { 818 return create(f, overwrite, bufferSize, replication, blockSize, null); 819 } 820 821 /** 822 * Create an FSDataOutputStream at the indicated Path with write-progress 823 * reporting. 824 * @param f the file name to open 825 * @param overwrite if a file with this name already exists, then if true, 826 * the file will be overwritten, and if false an error will be thrown. 827 * @param bufferSize the size of the buffer to be used. 828 * @param replication required block replication for the file. 829 */ 830 public FSDataOutputStream create(Path f, 831 boolean overwrite, 832 int bufferSize, 833 short replication, 834 long blockSize, 835 Progressable progress 836 ) throws IOException { 837 return this.create(f, FsPermission.getDefault().applyUMask( 838 FsPermission.getUMask(getConf())), overwrite, bufferSize, 839 replication, blockSize, progress); 840 } 841 842 /** 843 * Create an FSDataOutputStream at the indicated Path with write-progress 844 * reporting. 845 * @param f the file name to open 846 * @param permission 847 * @param overwrite if a file with this name already exists, then if true, 848 * the file will be overwritten, and if false an error will be thrown. 849 * @param bufferSize the size of the buffer to be used. 850 * @param replication required block replication for the file. 851 * @param blockSize 852 * @param progress 853 * @throws IOException 854 * @see #setPermission(Path, FsPermission) 855 */ 856 public abstract FSDataOutputStream create(Path f, 857 FsPermission permission, 858 boolean overwrite, 859 int bufferSize, 860 short replication, 861 long blockSize, 862 Progressable progress) throws IOException; 863 864 /** 865 * Create an FSDataOutputStream at the indicated Path with a custom 866 * checksum option. This create method is the common method to be 867 * used to specify ChecksumOpt in both 0.23.x and 2.x. 868 * 869 * @param f the file name to open 870 * @param permission 871 * @param flags {@link CreateFlag}s to use for this stream. 872 * @param bufferSize the size of the buffer to be used. 873 * @param replication required block replication for the file. 874 * @param blockSize 875 * @param progress 876 * @param checksumOpt checksum parameter. If null, the values 877 * found in conf will be used. 878 * @throws IOException 879 * @see #setPermission(Path, FsPermission) 880 */ 881 public FSDataOutputStream create(Path f, 882 FsPermission permission, 883 EnumSet<CreateFlag> flags, 884 int bufferSize, 885 short replication, 886 long blockSize, 887 Progressable progress, 888 ChecksumOpt checksumOpt) throws IOException { 889 // Checksum options are ignored by default. The file systems that 890 // implement checksum need to override this method. The full 891 // support is currently only available in DFS. 892 return create(f, permission, flags.contains(CreateFlag.OVERWRITE), 893 bufferSize, replication, blockSize, progress); 894 } 895 896 /*. 897 * This create has been added to support the FileContext that processes 898 * the permission 899 * with umask before calling this method. 900 * This a temporary method added to support the transition from FileSystem 901 * to FileContext for user applications. 902 */ 903 @Deprecated 904 protected FSDataOutputStream primitiveCreate(Path f, 905 FsPermission absolutePermission, EnumSet<CreateFlag> flag, int bufferSize, 906 short replication, long blockSize, Progressable progress, 907 ChecksumOpt checksumOpt) throws IOException { 908 909 boolean pathExists = exists(f); 910 CreateFlag.validate(f, pathExists, flag); 911 912 // Default impl assumes that permissions do not matter and 913 // nor does the bytesPerChecksum hence 914 // calling the regular create is good enough. 915 // FSs that implement permissions should override this. 916 917 if (pathExists && flag.contains(CreateFlag.APPEND)) { 918 return append(f, bufferSize, progress); 919 } 920 921 return this.create(f, absolutePermission, 922 flag.contains(CreateFlag.OVERWRITE), bufferSize, replication, 923 blockSize, progress); 924 } 925 926 /** 927 * This version of the mkdirs method assumes that the permission is absolute. 928 * It has been added to support the FileContext that processes the permission 929 * with umask before calling this method. 930 * This a temporary method added to support the transition from FileSystem 931 * to FileContext for user applications. 932 */ 933 @Deprecated 934 protected boolean primitiveMkdir(Path f, FsPermission absolutePermission) 935 throws IOException { 936 // Default impl is to assume that permissions do not matter and hence 937 // calling the regular mkdirs is good enough. 938 // FSs that implement permissions should override this. 939 return this.mkdirs(f, absolutePermission); 940 } 941 942 943 /** 944 * This version of the mkdirs method assumes that the permission is absolute. 945 * It has been added to support the FileContext that processes the permission 946 * with umask before calling this method. 947 * This a temporary method added to support the transition from FileSystem 948 * to FileContext for user applications. 949 */ 950 @Deprecated 951 protected void primitiveMkdir(Path f, FsPermission absolutePermission, 952 boolean createParent) 953 throws IOException { 954 955 if (!createParent) { // parent must exist. 956 // since the this.mkdirs makes parent dirs automatically 957 // we must throw exception if parent does not exist. 958 final FileStatus stat = getFileStatus(f.getParent()); 959 if (stat == null) { 960 throw new FileNotFoundException("Missing parent:" + f); 961 } 962 if (!stat.isDirectory()) { 963 throw new ParentNotDirectoryException("parent is not a dir"); 964 } 965 // parent does exist - go ahead with mkdir of leaf 966 } 967 // Default impl is to assume that permissions do not matter and hence 968 // calling the regular mkdirs is good enough. 969 // FSs that implement permissions should override this. 970 if (!this.mkdirs(f, absolutePermission)) { 971 throw new IOException("mkdir of "+ f + " failed"); 972 } 973 } 974 975 /** 976 * Opens an FSDataOutputStream at the indicated Path with write-progress 977 * reporting. Same as create(), except fails if parent directory doesn't 978 * already exist. 979 * @param f the file name to open 980 * @param overwrite if a file with this name already exists, then if true, 981 * the file will be overwritten, and if false an error will be thrown. 982 * @param bufferSize the size of the buffer to be used. 983 * @param replication required block replication for the file. 984 * @param blockSize 985 * @param progress 986 * @throws IOException 987 * @see #setPermission(Path, FsPermission) 988 * @deprecated API only for 0.20-append 989 */ 990 @Deprecated 991 public FSDataOutputStream createNonRecursive(Path f, 992 boolean overwrite, 993 int bufferSize, short replication, long blockSize, 994 Progressable progress) throws IOException { 995 return this.createNonRecursive(f, FsPermission.getDefault(), 996 overwrite, bufferSize, replication, blockSize, progress); 997 } 998 999 /** 1000 * Opens an FSDataOutputStream at the indicated Path with write-progress 1001 * reporting. Same as create(), except fails if parent directory doesn't 1002 * already exist. 1003 * @param f the file name to open 1004 * @param permission 1005 * @param overwrite if a file with this name already exists, then if true, 1006 * the file will be overwritten, and if false an error will be thrown. 1007 * @param bufferSize the size of the buffer to be used. 1008 * @param replication required block replication for the file. 1009 * @param blockSize 1010 * @param progress 1011 * @throws IOException 1012 * @see #setPermission(Path, FsPermission) 1013 * @deprecated API only for 0.20-append 1014 */ 1015 @Deprecated 1016 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 1017 boolean overwrite, int bufferSize, short replication, long blockSize, 1018 Progressable progress) throws IOException { 1019 throw new IOException("createNonRecursive unsupported for this filesystem " 1020 + this.getClass()); 1021 } 1022 1023 /** 1024 * Creates the given Path as a brand-new zero-length file. If 1025 * create fails, or if it already existed, return false. 1026 * 1027 * @param f path to use for create 1028 */ 1029 public boolean createNewFile(Path f) throws IOException { 1030 if (exists(f)) { 1031 return false; 1032 } else { 1033 create(f, false, getConf().getInt("io.file.buffer.size", 4096)).close(); 1034 return true; 1035 } 1036 } 1037 1038 /** 1039 * Append to an existing file (optional operation). 1040 * Same as append(f, getConf().getInt("io.file.buffer.size", 4096), null) 1041 * @param f the existing file to be appended. 1042 * @throws IOException 1043 */ 1044 public FSDataOutputStream append(Path f) throws IOException { 1045 return append(f, getConf().getInt("io.file.buffer.size", 4096), null); 1046 } 1047 /** 1048 * Append to an existing file (optional operation). 1049 * Same as append(f, bufferSize, null). 1050 * @param f the existing file to be appended. 1051 * @param bufferSize the size of the buffer to be used. 1052 * @throws IOException 1053 */ 1054 public FSDataOutputStream append(Path f, int bufferSize) throws IOException { 1055 return append(f, bufferSize, null); 1056 } 1057 1058 /** 1059 * Append to an existing file (optional operation). 1060 * @param f the existing file to be appended. 1061 * @param bufferSize the size of the buffer to be used. 1062 * @param progress for reporting progress if it is not null. 1063 * @throws IOException 1064 */ 1065 public abstract FSDataOutputStream append(Path f, int bufferSize, 1066 Progressable progress) throws IOException; 1067 1068 /** 1069 * Get replication. 1070 * 1071 * @deprecated Use getFileStatus() instead 1072 * @param src file name 1073 * @return file replication 1074 * @throws IOException 1075 */ 1076 @Deprecated 1077 public short getReplication(Path src) throws IOException { 1078 return getFileStatus(src).getReplication(); 1079 } 1080 1081 /** 1082 * Set replication for an existing file. 1083 * 1084 * @param src file name 1085 * @param replication new replication 1086 * @throws IOException 1087 * @return true if successful; 1088 * false if file does not exist or is a directory 1089 */ 1090 public boolean setReplication(Path src, short replication) 1091 throws IOException { 1092 return true; 1093 } 1094 1095 /** 1096 * Renames Path src to Path dst. Can take place on local fs 1097 * or remote DFS. 1098 * @param src path to be renamed 1099 * @param dst new path after rename 1100 * @throws IOException on failure 1101 * @return true if rename is successful 1102 */ 1103 public abstract boolean rename(Path src, Path dst) throws IOException; 1104 1105 /** 1106 * Renames Path src to Path dst 1107 * <ul> 1108 * <li 1109 * <li>Fails if src is a file and dst is a directory. 1110 * <li>Fails if src is a directory and dst is a file. 1111 * <li>Fails if the parent of dst does not exist or is a file. 1112 * </ul> 1113 * <p> 1114 * If OVERWRITE option is not passed as an argument, rename fails 1115 * if the dst already exists. 1116 * <p> 1117 * If OVERWRITE option is passed as an argument, rename overwrites 1118 * the dst if it is a file or an empty directory. Rename fails if dst is 1119 * a non-empty directory. 1120 * <p> 1121 * Note that atomicity of rename is dependent on the file system 1122 * implementation. Please refer to the file system documentation for 1123 * details. This default implementation is non atomic. 1124 * <p> 1125 * This method is deprecated since it is a temporary method added to 1126 * support the transition from FileSystem to FileContext for user 1127 * applications. 1128 * 1129 * @param src path to be renamed 1130 * @param dst new path after rename 1131 * @throws IOException on failure 1132 */ 1133 @Deprecated 1134 protected void rename(final Path src, final Path dst, 1135 final Rename... options) throws IOException { 1136 // Default implementation 1137 final FileStatus srcStatus = getFileStatus(src); 1138 if (srcStatus == null) { 1139 throw new FileNotFoundException("rename source " + src + " not found."); 1140 } 1141 1142 boolean overwrite = false; 1143 if (null != options) { 1144 for (Rename option : options) { 1145 if (option == Rename.OVERWRITE) { 1146 overwrite = true; 1147 } 1148 } 1149 } 1150 1151 FileStatus dstStatus; 1152 try { 1153 dstStatus = getFileStatus(dst); 1154 } catch (IOException e) { 1155 dstStatus = null; 1156 } 1157 if (dstStatus != null) { 1158 if (srcStatus.isDirectory() != dstStatus.isDirectory()) { 1159 throw new IOException("Source " + src + " Destination " + dst 1160 + " both should be either file or directory"); 1161 } 1162 if (!overwrite) { 1163 throw new FileAlreadyExistsException("rename destination " + dst 1164 + " already exists."); 1165 } 1166 // Delete the destination that is a file or an empty directory 1167 if (dstStatus.isDirectory()) { 1168 FileStatus[] list = listStatus(dst); 1169 if (list != null && list.length != 0) { 1170 throw new IOException( 1171 "rename cannot overwrite non empty destination directory " + dst); 1172 } 1173 } 1174 delete(dst, false); 1175 } else { 1176 final Path parent = dst.getParent(); 1177 final FileStatus parentStatus = getFileStatus(parent); 1178 if (parentStatus == null) { 1179 throw new FileNotFoundException("rename destination parent " + parent 1180 + " not found."); 1181 } 1182 if (!parentStatus.isDirectory()) { 1183 throw new ParentNotDirectoryException("rename destination parent " + parent 1184 + " is a file."); 1185 } 1186 } 1187 if (!rename(src, dst)) { 1188 throw new IOException("rename from " + src + " to " + dst + " failed."); 1189 } 1190 } 1191 1192 /** 1193 * Delete a file 1194 * @deprecated Use {@link #delete(Path, boolean)} instead. 1195 */ 1196 @Deprecated 1197 public boolean delete(Path f) throws IOException { 1198 return delete(f, true); 1199 } 1200 1201 /** Delete a file. 1202 * 1203 * @param f the path to delete. 1204 * @param recursive if path is a directory and set to 1205 * true, the directory is deleted else throws an exception. In 1206 * case of a file the recursive can be set to either true or false. 1207 * @return true if delete is successful else false. 1208 * @throws IOException 1209 */ 1210 public abstract boolean delete(Path f, boolean recursive) throws IOException; 1211 1212 /** 1213 * Mark a path to be deleted when FileSystem is closed. 1214 * When the JVM shuts down, 1215 * all FileSystem objects will be closed automatically. 1216 * Then, 1217 * the marked path will be deleted as a result of closing the FileSystem. 1218 * 1219 * The path has to exist in the file system. 1220 * 1221 * @param f the path to delete. 1222 * @return true if deleteOnExit is successful, otherwise false. 1223 * @throws IOException 1224 */ 1225 public boolean deleteOnExit(Path f) throws IOException { 1226 if (!exists(f)) { 1227 return false; 1228 } 1229 synchronized (deleteOnExit) { 1230 deleteOnExit.add(f); 1231 } 1232 return true; 1233 } 1234 1235 /** 1236 * Cancel the deletion of the path when the FileSystem is closed 1237 * @param f the path to cancel deletion 1238 */ 1239 public boolean cancelDeleteOnExit(Path f) { 1240 synchronized (deleteOnExit) { 1241 return deleteOnExit.remove(f); 1242 } 1243 } 1244 1245 /** 1246 * Delete all files that were marked as delete-on-exit. This recursively 1247 * deletes all files in the specified paths. 1248 */ 1249 protected void processDeleteOnExit() { 1250 synchronized (deleteOnExit) { 1251 for (Iterator<Path> iter = deleteOnExit.iterator(); iter.hasNext();) { 1252 Path path = iter.next(); 1253 try { 1254 if (exists(path)) { 1255 delete(path, true); 1256 } 1257 } 1258 catch (IOException e) { 1259 LOG.info("Ignoring failure to deleteOnExit for path " + path); 1260 } 1261 iter.remove(); 1262 } 1263 } 1264 } 1265 1266 /** Check if exists. 1267 * @param f source file 1268 */ 1269 public boolean exists(Path f) throws IOException { 1270 try { 1271 return getFileStatus(f) != null; 1272 } catch (FileNotFoundException e) { 1273 return false; 1274 } 1275 } 1276 1277 /** True iff the named path is a directory. 1278 * Note: Avoid using this method. Instead reuse the FileStatus 1279 * returned by getFileStatus() or listStatus() methods. 1280 * @param f path to check 1281 */ 1282 public boolean isDirectory(Path f) throws IOException { 1283 try { 1284 return getFileStatus(f).isDirectory(); 1285 } catch (FileNotFoundException e) { 1286 return false; // f does not exist 1287 } 1288 } 1289 1290 /** True iff the named path is a regular file. 1291 * Note: Avoid using this method. Instead reuse the FileStatus 1292 * returned by getFileStatus() or listStatus() methods. 1293 * @param f path to check 1294 */ 1295 public boolean isFile(Path f) throws IOException { 1296 try { 1297 return getFileStatus(f).isFile(); 1298 } catch (FileNotFoundException e) { 1299 return false; // f does not exist 1300 } 1301 } 1302 1303 /** The number of bytes in a file. */ 1304 /** @deprecated Use getFileStatus() instead */ 1305 @Deprecated 1306 public long getLength(Path f) throws IOException { 1307 return getFileStatus(f).getLen(); 1308 } 1309 1310 /** Return the {@link ContentSummary} of a given {@link Path}. 1311 * @param f path to use 1312 */ 1313 public ContentSummary getContentSummary(Path f) throws IOException { 1314 FileStatus status = getFileStatus(f); 1315 if (status.isFile()) { 1316 // f is a file 1317 return new ContentSummary(status.getLen(), 1, 0); 1318 } 1319 // f is a directory 1320 long[] summary = {0, 0, 1}; 1321 for(FileStatus s : listStatus(f)) { 1322 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : 1323 new ContentSummary(s.getLen(), 1, 0); 1324 summary[0] += c.getLength(); 1325 summary[1] += c.getFileCount(); 1326 summary[2] += c.getDirectoryCount(); 1327 } 1328 return new ContentSummary(summary[0], summary[1], summary[2]); 1329 } 1330 1331 final private static PathFilter DEFAULT_FILTER = new PathFilter() { 1332 public boolean accept(Path file) { 1333 return true; 1334 } 1335 }; 1336 1337 /** 1338 * List the statuses of the files/directories in the given path if the path is 1339 * a directory. 1340 * 1341 * @param f given path 1342 * @return the statuses of the files/directories in the given patch 1343 * @throws FileNotFoundException when the path does not exist; 1344 * IOException see specific implementation 1345 */ 1346 public abstract FileStatus[] listStatus(Path f) throws FileNotFoundException, 1347 IOException; 1348 1349 /* 1350 * Filter files/directories in the given path using the user-supplied path 1351 * filter. Results are added to the given array <code>results</code>. 1352 */ 1353 private void listStatus(ArrayList<FileStatus> results, Path f, 1354 PathFilter filter) throws FileNotFoundException, IOException { 1355 FileStatus listing[] = listStatus(f); 1356 if (listing == null) { 1357 throw new IOException("Error accessing " + f); 1358 } 1359 1360 for (int i = 0; i < listing.length; i++) { 1361 if (filter.accept(listing[i].getPath())) { 1362 results.add(listing[i]); 1363 } 1364 } 1365 } 1366 1367 /** 1368 * @return an iterator over the corrupt files under the given path 1369 * (may contain duplicates if a file has more than one corrupt block) 1370 * @throws IOException 1371 */ 1372 public RemoteIterator<Path> listCorruptFileBlocks(Path path) 1373 throws IOException { 1374 throw new UnsupportedOperationException(getClass().getCanonicalName() + 1375 " does not support" + 1376 " listCorruptFileBlocks"); 1377 } 1378 1379 /** 1380 * Filter files/directories in the given path using the user-supplied path 1381 * filter. 1382 * 1383 * @param f 1384 * a path name 1385 * @param filter 1386 * the user-supplied path filter 1387 * @return an array of FileStatus objects for the files under the given path 1388 * after applying the filter 1389 * @throws FileNotFoundException when the path does not exist; 1390 * IOException see specific implementation 1391 */ 1392 public FileStatus[] listStatus(Path f, PathFilter filter) 1393 throws FileNotFoundException, IOException { 1394 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1395 listStatus(results, f, filter); 1396 return results.toArray(new FileStatus[results.size()]); 1397 } 1398 1399 /** 1400 * Filter files/directories in the given list of paths using default 1401 * path filter. 1402 * 1403 * @param files 1404 * a list of paths 1405 * @return a list of statuses for the files under the given paths after 1406 * applying the filter default Path filter 1407 * @throws FileNotFoundException when the path does not exist; 1408 * IOException see specific implementation 1409 */ 1410 public FileStatus[] listStatus(Path[] files) 1411 throws FileNotFoundException, IOException { 1412 return listStatus(files, DEFAULT_FILTER); 1413 } 1414 1415 /** 1416 * Filter files/directories in the given list of paths using user-supplied 1417 * path filter. 1418 * 1419 * @param files 1420 * a list of paths 1421 * @param filter 1422 * the user-supplied path filter 1423 * @return a list of statuses for the files under the given paths after 1424 * applying the filter 1425 * @throws FileNotFoundException when the path does not exist; 1426 * IOException see specific implementation 1427 */ 1428 public FileStatus[] listStatus(Path[] files, PathFilter filter) 1429 throws FileNotFoundException, IOException { 1430 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1431 for (int i = 0; i < files.length; i++) { 1432 listStatus(results, files[i], filter); 1433 } 1434 return results.toArray(new FileStatus[results.size()]); 1435 } 1436 1437 /** 1438 * <p>Return all the files that match filePattern and are not checksum 1439 * files. Results are sorted by their names. 1440 * 1441 * <p> 1442 * A filename pattern is composed of <i>regular</i> characters and 1443 * <i>special pattern matching</i> characters, which are: 1444 * 1445 * <dl> 1446 * <dd> 1447 * <dl> 1448 * <p> 1449 * <dt> <tt> ? </tt> 1450 * <dd> Matches any single character. 1451 * 1452 * <p> 1453 * <dt> <tt> * </tt> 1454 * <dd> Matches zero or more characters. 1455 * 1456 * <p> 1457 * <dt> <tt> [<i>abc</i>] </tt> 1458 * <dd> Matches a single character from character set 1459 * <tt>{<i>a,b,c</i>}</tt>. 1460 * 1461 * <p> 1462 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt> 1463 * <dd> Matches a single character from the character range 1464 * <tt>{<i>a...b</i>}</tt>. Note that character <tt><i>a</i></tt> must be 1465 * lexicographically less than or equal to character <tt><i>b</i></tt>. 1466 * 1467 * <p> 1468 * <dt> <tt> [^<i>a</i>] </tt> 1469 * <dd> Matches a single character that is not from character set or range 1470 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur 1471 * immediately to the right of the opening bracket. 1472 * 1473 * <p> 1474 * <dt> <tt> \<i>c</i> </tt> 1475 * <dd> Removes (escapes) any special meaning of character <i>c</i>. 1476 * 1477 * <p> 1478 * <dt> <tt> {ab,cd} </tt> 1479 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> 1480 * 1481 * <p> 1482 * <dt> <tt> {ab,c{de,fh}} </tt> 1483 * <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt> 1484 * 1485 * </dl> 1486 * </dd> 1487 * </dl> 1488 * 1489 * @param pathPattern a regular expression specifying a pth pattern 1490 1491 * @return an array of paths that match the path pattern 1492 * @throws IOException 1493 */ 1494 public FileStatus[] globStatus(Path pathPattern) throws IOException { 1495 return globStatus(pathPattern, DEFAULT_FILTER); 1496 } 1497 1498 /** 1499 * Return an array of FileStatus objects whose path names match pathPattern 1500 * and is accepted by the user-supplied path filter. Results are sorted by 1501 * their path names. 1502 * Return null if pathPattern has no glob and the path does not exist. 1503 * Return an empty array if pathPattern has a glob and no path matches it. 1504 * 1505 * @param pathPattern 1506 * a regular expression specifying the path pattern 1507 * @param filter 1508 * a user-supplied path filter 1509 * @return an array of FileStatus objects 1510 * @throws IOException if any I/O error occurs when fetching file status 1511 */ 1512 public FileStatus[] globStatus(Path pathPattern, PathFilter filter) 1513 throws IOException { 1514 String filename = pathPattern.toUri().getPath(); 1515 List<String> filePatterns = GlobExpander.expand(filename); 1516 if (filePatterns.size() == 1) { 1517 return globStatusInternal(pathPattern, filter); 1518 } else { 1519 List<FileStatus> results = new ArrayList<FileStatus>(); 1520 for (String filePattern : filePatterns) { 1521 FileStatus[] files = globStatusInternal(new Path(filePattern), filter); 1522 for (FileStatus file : files) { 1523 results.add(file); 1524 } 1525 } 1526 return results.toArray(new FileStatus[results.size()]); 1527 } 1528 } 1529 1530 private FileStatus[] globStatusInternal(Path pathPattern, PathFilter filter) 1531 throws IOException { 1532 Path[] parents = new Path[1]; 1533 int level = 0; 1534 String filename = pathPattern.toUri().getPath(); 1535 1536 // path has only zero component 1537 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) { 1538 return getFileStatus(new Path[]{pathPattern}); 1539 } 1540 1541 // path has at least one component 1542 String[] components = filename.split(Path.SEPARATOR); 1543 // get the first component 1544 if (pathPattern.isAbsolute()) { 1545 parents[0] = new Path(Path.SEPARATOR); 1546 level = 1; 1547 } else { 1548 parents[0] = new Path(Path.CUR_DIR); 1549 } 1550 1551 // glob the paths that match the parent path, i.e., [0, components.length-1] 1552 boolean[] hasGlob = new boolean[]{false}; 1553 Path[] parentPaths = globPathsLevel(parents, components, level, hasGlob); 1554 FileStatus[] results; 1555 if (parentPaths == null || parentPaths.length == 0) { 1556 results = null; 1557 } else { 1558 // Now work on the last component of the path 1559 GlobFilter fp = new GlobFilter(components[components.length - 1], filter); 1560 if (fp.hasPattern()) { // last component has a pattern 1561 // list parent directories and then glob the results 1562 try { 1563 results = listStatus(parentPaths, fp); 1564 } catch (FileNotFoundException e) { 1565 results = null; 1566 } 1567 hasGlob[0] = true; 1568 } else { // last component does not have a pattern 1569 // remove the quoting of metachars in a non-regexp expansion 1570 String name = unquotePathComponent(components[components.length - 1]); 1571 // get all the path names 1572 ArrayList<Path> filteredPaths = new ArrayList<Path>(parentPaths.length); 1573 for (int i = 0; i < parentPaths.length; i++) { 1574 parentPaths[i] = new Path(parentPaths[i], name); 1575 if (fp.accept(parentPaths[i])) { 1576 filteredPaths.add(parentPaths[i]); 1577 } 1578 } 1579 // get all their statuses 1580 results = getFileStatus( 1581 filteredPaths.toArray(new Path[filteredPaths.size()])); 1582 } 1583 } 1584 1585 // Decide if the pathPattern contains a glob or not 1586 if (results == null) { 1587 if (hasGlob[0]) { 1588 results = new FileStatus[0]; 1589 } 1590 } else { 1591 if (results.length == 0 ) { 1592 if (!hasGlob[0]) { 1593 results = null; 1594 } 1595 } else { 1596 Arrays.sort(results); 1597 } 1598 } 1599 return results; 1600 } 1601 1602 /* 1603 * For a path of N components, return a list of paths that match the 1604 * components [<code>level</code>, <code>N-1</code>]. 1605 */ 1606 private Path[] globPathsLevel(Path[] parents, String[] filePattern, 1607 int level, boolean[] hasGlob) throws IOException { 1608 if (level == filePattern.length - 1) 1609 return parents; 1610 if (parents == null || parents.length == 0) { 1611 return null; 1612 } 1613 GlobFilter fp = new GlobFilter(filePattern[level]); 1614 if (fp.hasPattern()) { 1615 try { 1616 parents = FileUtil.stat2Paths(listStatus(parents, fp)); 1617 } catch (FileNotFoundException e) { 1618 parents = null; 1619 } 1620 hasGlob[0] = true; 1621 } else { // the component does not have a pattern 1622 // remove the quoting of metachars in a non-regexp expansion 1623 String name = unquotePathComponent(filePattern[level]); 1624 for (int i = 0; i < parents.length; i++) { 1625 parents[i] = new Path(parents[i], name); 1626 } 1627 } 1628 return globPathsLevel(parents, filePattern, level + 1, hasGlob); 1629 } 1630 1631 /** 1632 * The glob filter builds a regexp per path component. If the component 1633 * does not contain a shell metachar, then it falls back to appending the 1634 * raw string to the list of built up paths. This raw path needs to have 1635 * the quoting removed. Ie. convert all occurances of "\X" to "X" 1636 * @param name of the path component 1637 * @return the unquoted path component 1638 */ 1639 private String unquotePathComponent(String name) { 1640 return name.replaceAll("\\\\(.)", "$1"); 1641 } 1642 1643 /** 1644 * List the statuses of the files/directories in the given path if the path is 1645 * a directory. 1646 * Return the file's status and block locations If the path is a file. 1647 * 1648 * If a returned status is a file, it contains the file's block locations. 1649 * 1650 * @param f is the path 1651 * 1652 * @return an iterator that traverses statuses of the files/directories 1653 * in the given path 1654 * 1655 * @throws FileNotFoundException If <code>f</code> does not exist 1656 * @throws IOException If an I/O error occurred 1657 */ 1658 public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f) 1659 throws FileNotFoundException, IOException { 1660 return listLocatedStatus(f, DEFAULT_FILTER); 1661 } 1662 1663 /** 1664 * Listing a directory 1665 * The returned results include its block location if it is a file 1666 * The results are filtered by the given path filter 1667 * @param f a path 1668 * @param filter a path filter 1669 * @return an iterator that traverses statuses of the files/directories 1670 * in the given path 1671 * @throws FileNotFoundException if <code>f</code> does not exist 1672 * @throws IOException if any I/O error occurred 1673 */ 1674 protected RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f, 1675 final PathFilter filter) 1676 throws FileNotFoundException, IOException { 1677 return new RemoteIterator<LocatedFileStatus>() { 1678 private final FileStatus[] stats = listStatus(f, filter); 1679 private int i = 0; 1680 1681 @Override 1682 public boolean hasNext() { 1683 return i<stats.length; 1684 } 1685 1686 @Override 1687 public LocatedFileStatus next() throws IOException { 1688 if (!hasNext()) { 1689 throw new NoSuchElementException("No more entry in " + f); 1690 } 1691 FileStatus result = stats[i++]; 1692 BlockLocation[] locs = result.isFile() ? 1693 getFileBlockLocations(result.getPath(), 0, result.getLen()) : 1694 null; 1695 return new LocatedFileStatus(result, locs); 1696 } 1697 }; 1698 } 1699 1700 /** 1701 * List the statuses and block locations of the files in the given path. 1702 * 1703 * If the path is a directory, 1704 * if recursive is false, returns files in the directory; 1705 * if recursive is true, return files in the subtree rooted at the path. 1706 * If the path is a file, return the file's status and block locations. 1707 * 1708 * @param f is the path 1709 * @param recursive if the subdirectories need to be traversed recursively 1710 * 1711 * @return an iterator that traverses statuses of the files 1712 * 1713 * @throws FileNotFoundException when the path does not exist; 1714 * IOException see specific implementation 1715 */ 1716 public RemoteIterator<LocatedFileStatus> listFiles( 1717 final Path f, final boolean recursive) 1718 throws FileNotFoundException, IOException { 1719 return new RemoteIterator<LocatedFileStatus>() { 1720 private Stack<RemoteIterator<LocatedFileStatus>> itors = 1721 new Stack<RemoteIterator<LocatedFileStatus>>(); 1722 private RemoteIterator<LocatedFileStatus> curItor = 1723 listLocatedStatus(f); 1724 private LocatedFileStatus curFile; 1725 1726 @Override 1727 public boolean hasNext() throws IOException { 1728 while (curFile == null) { 1729 if (curItor.hasNext()) { 1730 handleFileStat(curItor.next()); 1731 } else if (!itors.empty()) { 1732 curItor = itors.pop(); 1733 } else { 1734 return false; 1735 } 1736 } 1737 return true; 1738 } 1739 1740 /** 1741 * Process the input stat. 1742 * If it is a file, return the file stat. 1743 * If it is a directory, traverse the directory if recursive is true; 1744 * ignore it if recursive is false. 1745 * @param stat input status 1746 * @throws IOException if any IO error occurs 1747 */ 1748 private void handleFileStat(LocatedFileStatus stat) throws IOException { 1749 if (stat.isFile()) { // file 1750 curFile = stat; 1751 } else if (recursive) { // directory 1752 itors.push(curItor); 1753 curItor = listLocatedStatus(stat.getPath()); 1754 } 1755 } 1756 1757 @Override 1758 public LocatedFileStatus next() throws IOException { 1759 if (hasNext()) { 1760 LocatedFileStatus result = curFile; 1761 curFile = null; 1762 return result; 1763 } 1764 throw new java.util.NoSuchElementException("No more entry in " + f); 1765 } 1766 }; 1767 } 1768 1769 /** Return the current user's home directory in this filesystem. 1770 * The default implementation returns "/user/$USER/". 1771 */ 1772 public Path getHomeDirectory() { 1773 return this.makeQualified( 1774 new Path("/user/"+System.getProperty("user.name"))); 1775 } 1776 1777 1778 /** 1779 * Set the current working directory for the given file system. All relative 1780 * paths will be resolved relative to it. 1781 * 1782 * @param new_dir 1783 */ 1784 public abstract void setWorkingDirectory(Path new_dir); 1785 1786 /** 1787 * Get the current working directory for the given file system 1788 * @return the directory pathname 1789 */ 1790 public abstract Path getWorkingDirectory(); 1791 1792 1793 /** 1794 * Note: with the new FilesContext class, getWorkingDirectory() 1795 * will be removed. 1796 * The working directory is implemented in FilesContext. 1797 * 1798 * Some file systems like LocalFileSystem have an initial workingDir 1799 * that we use as the starting workingDir. For other file systems 1800 * like HDFS there is no built in notion of an inital workingDir. 1801 * 1802 * @return if there is built in notion of workingDir then it 1803 * is returned; else a null is returned. 1804 */ 1805 protected Path getInitialWorkingDirectory() { 1806 return null; 1807 } 1808 1809 /** 1810 * Call {@link #mkdirs(Path, FsPermission)} with default permission. 1811 */ 1812 public boolean mkdirs(Path f) throws IOException { 1813 return mkdirs(f, FsPermission.getDefault()); 1814 } 1815 1816 /** 1817 * Make the given file and all non-existent parents into 1818 * directories. Has the semantics of Unix 'mkdir -p'. 1819 * Existence of the directory hierarchy is not an error. 1820 * @param f path to create 1821 * @param permission to apply to f 1822 */ 1823 public abstract boolean mkdirs(Path f, FsPermission permission 1824 ) throws IOException; 1825 1826 /** 1827 * The src file is on the local disk. Add it to FS at 1828 * the given dst name and the source is kept intact afterwards 1829 * @param src path 1830 * @param dst path 1831 */ 1832 public void copyFromLocalFile(Path src, Path dst) 1833 throws IOException { 1834 copyFromLocalFile(false, src, dst); 1835 } 1836 1837 /** 1838 * The src files is on the local disk. Add it to FS at 1839 * the given dst name, removing the source afterwards. 1840 * @param srcs path 1841 * @param dst path 1842 */ 1843 public void moveFromLocalFile(Path[] srcs, Path dst) 1844 throws IOException { 1845 copyFromLocalFile(true, true, srcs, dst); 1846 } 1847 1848 /** 1849 * The src file is on the local disk. Add it to FS at 1850 * the given dst name, removing the source afterwards. 1851 * @param src path 1852 * @param dst path 1853 */ 1854 public void moveFromLocalFile(Path src, Path dst) 1855 throws IOException { 1856 copyFromLocalFile(true, src, dst); 1857 } 1858 1859 /** 1860 * The src file is on the local disk. Add it to FS at 1861 * the given dst name. 1862 * delSrc indicates if the source should be removed 1863 * @param delSrc whether to delete the src 1864 * @param src path 1865 * @param dst path 1866 */ 1867 public void copyFromLocalFile(boolean delSrc, Path src, Path dst) 1868 throws IOException { 1869 copyFromLocalFile(delSrc, true, src, dst); 1870 } 1871 1872 /** 1873 * The src files are on the local disk. Add it to FS at 1874 * the given dst name. 1875 * delSrc indicates if the source should be removed 1876 * @param delSrc whether to delete the src 1877 * @param overwrite whether to overwrite an existing file 1878 * @param srcs array of paths which are source 1879 * @param dst path 1880 */ 1881 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 1882 Path[] srcs, Path dst) 1883 throws IOException { 1884 Configuration conf = getConf(); 1885 FileUtil.copy(getLocal(conf), srcs, this, dst, delSrc, overwrite, conf); 1886 } 1887 1888 /** 1889 * The src file is on the local disk. Add it to FS at 1890 * the given dst name. 1891 * delSrc indicates if the source should be removed 1892 * @param delSrc whether to delete the src 1893 * @param overwrite whether to overwrite an existing file 1894 * @param src path 1895 * @param dst path 1896 */ 1897 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 1898 Path src, Path dst) 1899 throws IOException { 1900 Configuration conf = getConf(); 1901 FileUtil.copy(getLocal(conf), src, this, dst, delSrc, overwrite, conf); 1902 } 1903 1904 /** 1905 * The src file is under FS, and the dst is on the local disk. 1906 * Copy it from FS control to the local dst name. 1907 * @param src path 1908 * @param dst path 1909 */ 1910 public void copyToLocalFile(Path src, Path dst) throws IOException { 1911 copyToLocalFile(false, src, dst); 1912 } 1913 1914 /** 1915 * The src file is under FS, and the dst is on the local disk. 1916 * Copy it from FS control to the local dst name. 1917 * Remove the source afterwards 1918 * @param src path 1919 * @param dst path 1920 */ 1921 public void moveToLocalFile(Path src, Path dst) throws IOException { 1922 copyToLocalFile(true, src, dst); 1923 } 1924 1925 /** 1926 * The src file is under FS, and the dst is on the local disk. 1927 * Copy it from FS control to the local dst name. 1928 * delSrc indicates if the src will be removed or not. 1929 * @param delSrc whether to delete the src 1930 * @param src path 1931 * @param dst path 1932 */ 1933 public void copyToLocalFile(boolean delSrc, Path src, Path dst) 1934 throws IOException { 1935 copyToLocalFile(delSrc, src, dst, false); 1936 } 1937 1938 /** 1939 * The src file is under FS, and the dst is on the local disk. Copy it from FS 1940 * control to the local dst name. delSrc indicates if the src will be removed 1941 * or not. useRawLocalFileSystem indicates whether to use RawLocalFileSystem 1942 * as local file system or not. RawLocalFileSystem is non crc file system.So, 1943 * It will not create any crc files at local. 1944 * 1945 * @param delSrc 1946 * whether to delete the src 1947 * @param src 1948 * path 1949 * @param dst 1950 * path 1951 * @param useRawLocalFileSystem 1952 * whether to use RawLocalFileSystem as local file system or not. 1953 * 1954 * @throws IOException 1955 * - if any IO error 1956 */ 1957 public void copyToLocalFile(boolean delSrc, Path src, Path dst, 1958 boolean useRawLocalFileSystem) throws IOException { 1959 Configuration conf = getConf(); 1960 FileSystem local = null; 1961 if (useRawLocalFileSystem) { 1962 local = getLocal(conf).getRawFileSystem(); 1963 } else { 1964 local = getLocal(conf); 1965 } 1966 FileUtil.copy(this, src, local, dst, delSrc, conf); 1967 } 1968 1969 /** 1970 * Returns a local File that the user can write output to. The caller 1971 * provides both the eventual FS target name and the local working 1972 * file. If the FS is local, we write directly into the target. If 1973 * the FS is remote, we write into the tmp local area. 1974 * @param fsOutputFile path of output file 1975 * @param tmpLocalFile path of local tmp file 1976 */ 1977 public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 1978 throws IOException { 1979 return tmpLocalFile; 1980 } 1981 1982 /** 1983 * Called when we're all done writing to the target. A local FS will 1984 * do nothing, because we've written to exactly the right place. A remote 1985 * FS will copy the contents of tmpLocalFile to the correct target at 1986 * fsOutputFile. 1987 * @param fsOutputFile path of output file 1988 * @param tmpLocalFile path to local tmp file 1989 */ 1990 public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 1991 throws IOException { 1992 moveFromLocalFile(tmpLocalFile, fsOutputFile); 1993 } 1994 1995 /** 1996 * No more filesystem operations are needed. Will 1997 * release any held locks. 1998 */ 1999 public void close() throws IOException { 2000 // delete all files that were marked as delete-on-exit. 2001 processDeleteOnExit(); 2002 CACHE.remove(this.key, this); 2003 } 2004 2005 /** Return the total size of all files in the filesystem.*/ 2006 public long getUsed() throws IOException{ 2007 long used = 0; 2008 FileStatus[] files = listStatus(new Path("/")); 2009 for(FileStatus file:files){ 2010 used += file.getLen(); 2011 } 2012 return used; 2013 } 2014 2015 /** 2016 * Get the block size for a particular file. 2017 * @param f the filename 2018 * @return the number of bytes in a block 2019 */ 2020 /** @deprecated Use getFileStatus() instead */ 2021 @Deprecated 2022 public long getBlockSize(Path f) throws IOException { 2023 return getFileStatus(f).getBlockSize(); 2024 } 2025 2026 /** Return the number of bytes that large input files should be optimally 2027 * be split into to minimize i/o time. */ 2028 public long getDefaultBlockSize() { 2029 // default to 32MB: large enough to minimize the impact of seeks 2030 return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024); 2031 } 2032 2033 /** Return the number of bytes that large input files should be optimally 2034 * be split into to minimize i/o time. The given path will be used to 2035 * locate the actual filesystem. The full path does not have to exist. 2036 * @param f path of file 2037 * @return the default block size for the path's filesystem 2038 */ 2039 public long getDefaultBlockSize(Path f) { 2040 return getDefaultBlockSize(); 2041 } 2042 2043 /** 2044 * Get the default replication. 2045 */ 2046 public short getDefaultReplication() { return 1; } 2047 2048 /** 2049 * Get the default replication for a path. The given path will be used to 2050 * locate the actual filesystem. The full path does not have to exist. 2051 * @param path of the file 2052 * @return default replication for the path's filesystem 2053 */ 2054 public short getDefaultReplication(Path path) { 2055 return getDefaultReplication(); 2056 } 2057 2058 /** 2059 * Return a file status object that represents the path. 2060 * @param f The path we want information from 2061 * @return a FileStatus object 2062 * @throws FileNotFoundException when the path does not exist; 2063 * IOException see specific implementation 2064 */ 2065 public abstract FileStatus getFileStatus(Path f) throws IOException; 2066 2067 /** 2068 * Get the checksum of a file. 2069 * 2070 * @param f The file path 2071 * @return The file checksum. The default return value is null, 2072 * which indicates that no checksum algorithm is implemented 2073 * in the corresponding FileSystem. 2074 */ 2075 public FileChecksum getFileChecksum(Path f) throws IOException { 2076 return null; 2077 } 2078 2079 /** 2080 * Set the verify checksum flag. This is only applicable if the 2081 * corresponding FileSystem supports checksum. By default doesn't do anything. 2082 * @param verifyChecksum 2083 */ 2084 public void setVerifyChecksum(boolean verifyChecksum) { 2085 //doesn't do anything 2086 } 2087 2088 /** 2089 * Set the write checksum flag. This is only applicable if the 2090 * corresponding FileSystem supports checksum. By default doesn't do anything. 2091 * @param writeChecksum 2092 */ 2093 public void setWriteChecksum(boolean writeChecksum) { 2094 //doesn't do anything 2095 } 2096 2097 /** 2098 * Return a list of file status objects that corresponds to the list of paths 2099 * excluding those non-existent paths. 2100 * 2101 * @param paths 2102 * the list of paths we want information from 2103 * @return a list of FileStatus objects 2104 * @throws IOException 2105 * see specific implementation 2106 */ 2107 private FileStatus[] getFileStatus(Path[] paths) throws IOException { 2108 if (paths == null) { 2109 return null; 2110 } 2111 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length); 2112 for (int i = 0; i < paths.length; i++) { 2113 try { 2114 results.add(getFileStatus(paths[i])); 2115 } catch (FileNotFoundException e) { // do nothing 2116 } 2117 } 2118 return results.toArray(new FileStatus[results.size()]); 2119 } 2120 2121 /** 2122 * Returns a status object describing the use and capacity of the 2123 * file system. If the file system has multiple partitions, the 2124 * use and capacity of the root partition is reflected. 2125 * 2126 * @return a FsStatus object 2127 * @throws IOException 2128 * see specific implementation 2129 */ 2130 public FsStatus getStatus() throws IOException { 2131 return getStatus(null); 2132 } 2133 2134 /** 2135 * Returns a status object describing the use and capacity of the 2136 * file system. If the file system has multiple partitions, the 2137 * use and capacity of the partition pointed to by the specified 2138 * path is reflected. 2139 * @param p Path for which status should be obtained. null means 2140 * the default partition. 2141 * @return a FsStatus object 2142 * @throws IOException 2143 * see specific implementation 2144 */ 2145 public FsStatus getStatus(Path p) throws IOException { 2146 return new FsStatus(Long.MAX_VALUE, 0, Long.MAX_VALUE); 2147 } 2148 2149 /** 2150 * Set permission of a path. 2151 * @param p 2152 * @param permission 2153 */ 2154 public void setPermission(Path p, FsPermission permission 2155 ) throws IOException { 2156 } 2157 2158 /** 2159 * Set owner of a path (i.e. a file or a directory). 2160 * The parameters username and groupname cannot both be null. 2161 * @param p The path 2162 * @param username If it is null, the original username remains unchanged. 2163 * @param groupname If it is null, the original groupname remains unchanged. 2164 */ 2165 public void setOwner(Path p, String username, String groupname 2166 ) throws IOException { 2167 } 2168 2169 /** 2170 * Set access time of a file 2171 * @param p The path 2172 * @param mtime Set the modification time of this file. 2173 * The number of milliseconds since Jan 1, 1970. 2174 * A value of -1 means that this call should not set modification time. 2175 * @param atime Set the access time of this file. 2176 * The number of milliseconds since Jan 1, 1970. 2177 * A value of -1 means that this call should not set access time. 2178 */ 2179 public void setTimes(Path p, long mtime, long atime 2180 ) throws IOException { 2181 } 2182 2183 private static FileSystem createFileSystem(URI uri, Configuration conf 2184 ) throws IOException { 2185 Class<?> clazz = conf.getClass("fs." + uri.getScheme() + ".impl", null); 2186 if (clazz == null) { 2187 throw new IOException("No FileSystem for scheme: " + uri.getScheme()); 2188 } 2189 FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf); 2190 fs.initialize(uri, conf); 2191 return fs; 2192 } 2193 2194 /** Caching FileSystem objects */ 2195 static class Cache { 2196 private final ClientFinalizer clientFinalizer = new ClientFinalizer(); 2197 2198 private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>(); 2199 private final Set<Key> toAutoClose = new HashSet<Key>(); 2200 2201 /** A variable that makes all objects in the cache unique */ 2202 private static AtomicLong unique = new AtomicLong(1); 2203 2204 FileSystem get(URI uri, Configuration conf) throws IOException{ 2205 Key key = new Key(uri, conf); 2206 return getInternal(uri, conf, key); 2207 } 2208 2209 /** The objects inserted into the cache using this method are all unique */ 2210 FileSystem getUnique(URI uri, Configuration conf) throws IOException{ 2211 Key key = new Key(uri, conf, unique.getAndIncrement()); 2212 return getInternal(uri, conf, key); 2213 } 2214 2215 private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{ 2216 FileSystem fs; 2217 synchronized (this) { 2218 fs = map.get(key); 2219 } 2220 if (fs != null) { 2221 return fs; 2222 } 2223 2224 fs = createFileSystem(uri, conf); 2225 synchronized (this) { // refetch the lock again 2226 FileSystem oldfs = map.get(key); 2227 if (oldfs != null) { // a file system is created while lock is releasing 2228 fs.close(); // close the new file system 2229 return oldfs; // return the old file system 2230 } 2231 2232 // now insert the new file system into the map 2233 if (map.isEmpty() ) { 2234 ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY); 2235 } 2236 fs.key = key; 2237 map.put(key, fs); 2238 if (conf.getBoolean("fs.automatic.close", true)) { 2239 toAutoClose.add(key); 2240 } 2241 return fs; 2242 } 2243 } 2244 2245 synchronized void remove(Key key, FileSystem fs) { 2246 if (map.containsKey(key) && fs == map.get(key)) { 2247 map.remove(key); 2248 toAutoClose.remove(key); 2249 } 2250 } 2251 2252 synchronized void closeAll() throws IOException { 2253 closeAll(false); 2254 } 2255 2256 /** 2257 * Close all FileSystem instances in the Cache. 2258 * @param onlyAutomatic only close those that are marked for automatic closing 2259 */ 2260 synchronized void closeAll(boolean onlyAutomatic) throws IOException { 2261 List<IOException> exceptions = new ArrayList<IOException>(); 2262 2263 // Make a copy of the keys in the map since we'll be modifying 2264 // the map while iterating over it, which isn't safe. 2265 List<Key> keys = new ArrayList<Key>(); 2266 keys.addAll(map.keySet()); 2267 2268 for (Key key : keys) { 2269 final FileSystem fs = map.get(key); 2270 2271 if (onlyAutomatic && !toAutoClose.contains(key)) { 2272 continue; 2273 } 2274 2275 //remove from cache 2276 remove(key, fs); 2277 2278 if (fs != null) { 2279 try { 2280 fs.close(); 2281 } 2282 catch(IOException ioe) { 2283 exceptions.add(ioe); 2284 } 2285 } 2286 } 2287 2288 if (!exceptions.isEmpty()) { 2289 throw MultipleIOException.createIOException(exceptions); 2290 } 2291 } 2292 2293 private class ClientFinalizer implements Runnable { 2294 public synchronized void run() { 2295 try { 2296 closeAll(true); 2297 } catch (IOException e) { 2298 LOG.info("FileSystem.Cache.closeAll() threw an exception:\n" + e); 2299 } 2300 } 2301 } 2302 2303 synchronized void closeAll(UserGroupInformation ugi) throws IOException { 2304 List<FileSystem> targetFSList = new ArrayList<FileSystem>(); 2305 //Make a pass over the list and collect the filesystems to close 2306 //we cannot close inline since close() removes the entry from the Map 2307 for (Map.Entry<Key, FileSystem> entry : map.entrySet()) { 2308 final Key key = entry.getKey(); 2309 final FileSystem fs = entry.getValue(); 2310 if (ugi.equals(key.ugi) && fs != null) { 2311 targetFSList.add(fs); 2312 } 2313 } 2314 List<IOException> exceptions = new ArrayList<IOException>(); 2315 //now make a pass over the target list and close each 2316 for (FileSystem fs : targetFSList) { 2317 try { 2318 fs.close(); 2319 } 2320 catch(IOException ioe) { 2321 exceptions.add(ioe); 2322 } 2323 } 2324 if (!exceptions.isEmpty()) { 2325 throw MultipleIOException.createIOException(exceptions); 2326 } 2327 } 2328 2329 /** FileSystem.Cache.Key */ 2330 static class Key { 2331 final String scheme; 2332 final String authority; 2333 final UserGroupInformation ugi; 2334 final long unique; // an artificial way to make a key unique 2335 2336 Key(URI uri, Configuration conf) throws IOException { 2337 this(uri, conf, 0); 2338 } 2339 2340 Key(URI uri, Configuration conf, long unique) throws IOException { 2341 scheme = uri.getScheme()==null?"":uri.getScheme().toLowerCase(); 2342 authority = uri.getAuthority()==null?"":uri.getAuthority().toLowerCase(); 2343 this.unique = unique; 2344 2345 this.ugi = UserGroupInformation.getCurrentUser(); 2346 } 2347 2348 /** {@inheritDoc} */ 2349 public int hashCode() { 2350 return (scheme + authority).hashCode() + ugi.hashCode() + (int)unique; 2351 } 2352 2353 static boolean isEqual(Object a, Object b) { 2354 return a == b || (a != null && a.equals(b)); 2355 } 2356 2357 /** {@inheritDoc} */ 2358 public boolean equals(Object obj) { 2359 if (obj == this) { 2360 return true; 2361 } 2362 if (obj != null && obj instanceof Key) { 2363 Key that = (Key)obj; 2364 return isEqual(this.scheme, that.scheme) 2365 && isEqual(this.authority, that.authority) 2366 && isEqual(this.ugi, that.ugi) 2367 && (this.unique == that.unique); 2368 } 2369 return false; 2370 } 2371 2372 /** {@inheritDoc} */ 2373 public String toString() { 2374 return "("+ugi.toString() + ")@" + scheme + "://" + authority; 2375 } 2376 } 2377 } 2378 2379 public static final class Statistics { 2380 private final String scheme; 2381 private AtomicLong bytesRead = new AtomicLong(); 2382 private AtomicLong bytesWritten = new AtomicLong(); 2383 private AtomicInteger readOps = new AtomicInteger(); 2384 private AtomicInteger largeReadOps = new AtomicInteger(); 2385 private AtomicInteger writeOps = new AtomicInteger(); 2386 2387 public Statistics(String scheme) { 2388 this.scheme = scheme; 2389 } 2390 2391 /** 2392 * Copy constructor. 2393 * 2394 * @param st 2395 * The input Statistics object which is cloned. 2396 */ 2397 public Statistics(Statistics st) { 2398 this.scheme = st.scheme; 2399 this.bytesRead = new AtomicLong(st.bytesRead.longValue()); 2400 this.bytesWritten = new AtomicLong(st.bytesWritten.longValue()); 2401 } 2402 2403 /** 2404 * Increment the bytes read in the statistics 2405 * @param newBytes the additional bytes read 2406 */ 2407 public void incrementBytesRead(long newBytes) { 2408 bytesRead.getAndAdd(newBytes); 2409 } 2410 2411 /** 2412 * Increment the bytes written in the statistics 2413 * @param newBytes the additional bytes written 2414 */ 2415 public void incrementBytesWritten(long newBytes) { 2416 bytesWritten.getAndAdd(newBytes); 2417 } 2418 2419 /** 2420 * Increment the number of read operations 2421 * @param count number of read operations 2422 */ 2423 public void incrementReadOps(int count) { 2424 readOps.getAndAdd(count); 2425 } 2426 2427 /** 2428 * Increment the number of large read operations 2429 * @param count number of large read operations 2430 */ 2431 public void incrementLargeReadOps(int count) { 2432 largeReadOps.getAndAdd(count); 2433 } 2434 2435 /** 2436 * Increment the number of write operations 2437 * @param count number of write operations 2438 */ 2439 public void incrementWriteOps(int count) { 2440 writeOps.getAndAdd(count); 2441 } 2442 2443 /** 2444 * Get the total number of bytes read 2445 * @return the number of bytes 2446 */ 2447 public long getBytesRead() { 2448 return bytesRead.get(); 2449 } 2450 2451 /** 2452 * Get the total number of bytes written 2453 * @return the number of bytes 2454 */ 2455 public long getBytesWritten() { 2456 return bytesWritten.get(); 2457 } 2458 2459 /** 2460 * Get the number of file system read operations such as list files 2461 * @return number of read operations 2462 */ 2463 public int getReadOps() { 2464 return readOps.get() + largeReadOps.get(); 2465 } 2466 2467 /** 2468 * Get the number of large file system read operations such as list files 2469 * under a large directory 2470 * @return number of large read operations 2471 */ 2472 public int getLargeReadOps() { 2473 return largeReadOps.get(); 2474 } 2475 2476 /** 2477 * Get the number of file system write operations such as create, append 2478 * rename etc. 2479 * @return number of write operations 2480 */ 2481 public int getWriteOps() { 2482 return writeOps.get(); 2483 } 2484 2485 public String toString() { 2486 return bytesRead + " bytes read, " + bytesWritten + " bytes written, " 2487 + readOps + " read ops, " + largeReadOps + " large read ops, " 2488 + writeOps + " write ops"; 2489 } 2490 2491 /** 2492 * Reset the counts of bytes to 0. 2493 */ 2494 public void reset() { 2495 bytesWritten.set(0); 2496 bytesRead.set(0); 2497 } 2498 2499 /** 2500 * Get the uri scheme associated with this statistics object. 2501 * @return the schema associated with this set of statistics 2502 */ 2503 public String getScheme() { 2504 return scheme; 2505 } 2506 } 2507 2508 /** 2509 * Get the Map of Statistics object indexed by URI Scheme. 2510 * @return a Map having a key as URI scheme and value as Statistics object 2511 * @deprecated use {@link #getAllStatistics} instead 2512 */ 2513 @Deprecated 2514 public static synchronized Map<String, Statistics> getStatistics() { 2515 Map<String, Statistics> result = new HashMap<String, Statistics>(); 2516 for(Statistics stat: statisticsTable.values()) { 2517 result.put(stat.getScheme(), stat); 2518 } 2519 return result; 2520 } 2521 2522 /** 2523 * Return the FileSystem classes that have Statistics 2524 */ 2525 public static synchronized List<Statistics> getAllStatistics() { 2526 return new ArrayList<Statistics>(statisticsTable.values()); 2527 } 2528 2529 /** 2530 * Get the statistics for a particular file system 2531 * @param cls the class to lookup 2532 * @return a statistics object 2533 */ 2534 public static synchronized 2535 Statistics getStatistics(String scheme, Class<? extends FileSystem> cls) { 2536 Statistics result = statisticsTable.get(cls); 2537 if (result == null) { 2538 result = new Statistics(scheme); 2539 statisticsTable.put(cls, result); 2540 } 2541 return result; 2542 } 2543 2544 /** 2545 * Reset all statistics for all file systems 2546 */ 2547 public static synchronized void clearStatistics() { 2548 for(Statistics stat: statisticsTable.values()) { 2549 stat.reset(); 2550 } 2551 } 2552 2553 /** 2554 * Print all statistics for all file systems 2555 */ 2556 public static synchronized 2557 void printStatistics() throws IOException { 2558 for (Map.Entry<Class<? extends FileSystem>, Statistics> pair: 2559 statisticsTable.entrySet()) { 2560 System.out.println(" FileSystem " + pair.getKey().getName() + 2561 ": " + pair.getValue()); 2562 } 2563 } 2564 }