001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.fs; 020 021import java.io.IOException; 022import java.net.URI; 023import java.net.URISyntaxException; 024import java.util.regex.Pattern; 025 026import org.apache.avro.reflect.Stringable; 027import org.apache.commons.lang.StringUtils; 028import org.apache.hadoop.HadoopIllegalArgumentException; 029import org.apache.hadoop.classification.InterfaceAudience; 030import org.apache.hadoop.classification.InterfaceStability; 031import org.apache.hadoop.conf.Configuration; 032 033/** Names a file or directory in a {@link FileSystem}. 034 * Path strings use slash as the directory separator. A path string is 035 * absolute if it begins with a slash. 036 */ 037@Stringable 038@InterfaceAudience.Public 039@InterfaceStability.Stable 040public class Path implements Comparable { 041 042 /** The directory separator, a slash. */ 043 public static final String SEPARATOR = "/"; 044 public static final char SEPARATOR_CHAR = '/'; 045 046 public static final String CUR_DIR = "."; 047 048 public static final boolean WINDOWS 049 = System.getProperty("os.name").startsWith("Windows"); 050 051 /** 052 * Pre-compiled regular expressions to detect path formats. 053 */ 054 private static final Pattern hasUriScheme = 055 Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:"); 056 private static final Pattern hasDriveLetterSpecifier = 057 Pattern.compile("^/?[a-zA-Z]:"); 058 059 private URI uri; // a hierarchical uri 060 061 /** 062 * Pathnames with scheme and relative path are illegal. 063 */ 064 void checkNotSchemeWithRelative() { 065 if (toUri().isAbsolute() && !isUriPathAbsolute()) { 066 throw new HadoopIllegalArgumentException( 067 "Unsupported name: has scheme but relative path-part"); 068 } 069 } 070 071 void checkNotRelative() { 072 if (!isAbsolute() && toUri().getScheme() == null) { 073 throw new HadoopIllegalArgumentException("Path is relative"); 074 } 075 } 076 077 public static Path getPathWithoutSchemeAndAuthority(Path path) { 078 // This code depends on Path.toString() to remove the leading slash before 079 // the drive specification on Windows. 080 Path newPath = path.isUriPathAbsolute() ? 081 new Path(null, null, path.toUri().getPath()) : 082 path; 083 return newPath; 084 } 085 086 /** Resolve a child path against a parent path. */ 087 public Path(String parent, String child) { 088 this(new Path(parent), new Path(child)); 089 } 090 091 /** Resolve a child path against a parent path. */ 092 public Path(Path parent, String child) { 093 this(parent, new Path(child)); 094 } 095 096 /** Resolve a child path against a parent path. */ 097 public Path(String parent, Path child) { 098 this(new Path(parent), child); 099 } 100 101 /** Resolve a child path against a parent path. */ 102 public Path(Path parent, Path child) { 103 // Add a slash to parent's path so resolution is compatible with URI's 104 URI parentUri = parent.uri; 105 String parentPath = parentUri.getPath(); 106 if (!(parentPath.equals("/") || parentPath.isEmpty())) { 107 try { 108 parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(), 109 parentUri.getPath()+"/", null, parentUri.getFragment()); 110 } catch (URISyntaxException e) { 111 throw new IllegalArgumentException(e); 112 } 113 } 114 URI resolved = parentUri.resolve(child.uri); 115 initialize(resolved.getScheme(), resolved.getAuthority(), 116 resolved.getPath(), resolved.getFragment()); 117 } 118 119 private void checkPathArg( String path ) throws IllegalArgumentException { 120 // disallow construction of a Path from an empty string 121 if ( path == null ) { 122 throw new IllegalArgumentException( 123 "Can not create a Path from a null string"); 124 } 125 if( path.length() == 0 ) { 126 throw new IllegalArgumentException( 127 "Can not create a Path from an empty string"); 128 } 129 } 130 131 /** Construct a path from a String. Path strings are URIs, but with 132 * unescaped elements and some additional normalization. */ 133 public Path(String pathString) throws IllegalArgumentException { 134 checkPathArg( pathString ); 135 136 // We can't use 'new URI(String)' directly, since it assumes things are 137 // escaped, which we don't require of Paths. 138 139 // add a slash in front of paths with Windows drive letters 140 if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') { 141 pathString = "/" + pathString; 142 } 143 144 // parse uri components 145 String scheme = null; 146 String authority = null; 147 148 int start = 0; 149 150 // parse uri scheme, if any 151 int colon = pathString.indexOf(':'); 152 int slash = pathString.indexOf('/'); 153 if ((colon != -1) && 154 ((slash == -1) || (colon < slash))) { // has a scheme 155 scheme = pathString.substring(0, colon); 156 start = colon+1; 157 } 158 159 // parse uri authority, if any 160 if (pathString.startsWith("//", start) && 161 (pathString.length()-start > 2)) { // has authority 162 int nextSlash = pathString.indexOf('/', start+2); 163 int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); 164 authority = pathString.substring(start+2, authEnd); 165 start = authEnd; 166 } 167 168 // uri path is the rest of the string -- query & fragment not supported 169 String path = pathString.substring(start, pathString.length()); 170 171 initialize(scheme, authority, path, null); 172 } 173 174 /** 175 * Construct a path from a URI 176 */ 177 public Path(URI aUri) { 178 uri = aUri.normalize(); 179 } 180 181 /** Construct a Path from components. */ 182 public Path(String scheme, String authority, String path) { 183 checkPathArg( path ); 184 185 // add a slash in front of paths with Windows drive letters 186 if (hasWindowsDrive(path) && path.charAt(0) != '/') { 187 path = "/" + path; 188 } 189 190 // add "./" in front of Linux relative paths so that a path containing 191 // a colon e.q. "a:b" will not be interpreted as scheme "a". 192 if (!WINDOWS && path.charAt(0) != '/') { 193 path = "./" + path; 194 } 195 196 initialize(scheme, authority, path, null); 197 } 198 199 private void initialize(String scheme, String authority, String path, 200 String fragment) { 201 try { 202 this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment) 203 .normalize(); 204 } catch (URISyntaxException e) { 205 throw new IllegalArgumentException(e); 206 } 207 } 208 209 /** 210 * Merge 2 paths such that the second path is appended relative to the first. 211 * The returned path has the scheme and authority of the first path. On 212 * Windows, the drive specification in the second path is discarded. 213 * 214 * @param path1 Path first path 215 * @param path2 Path second path, to be appended relative to path1 216 * @return Path merged path 217 */ 218 public static Path mergePaths(Path path1, Path path2) { 219 String path2Str = path2.toUri().getPath(); 220 path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str)); 221 // Add path components explicitly, because simply concatenating two path 222 // string is not safe, for example: 223 // "/" + "/foo" yields "//foo", which will be parsed as authority in Path 224 return new Path(path1.toUri().getScheme(), 225 path1.toUri().getAuthority(), 226 path1.toUri().getPath() + path2Str); 227 } 228 229 /** 230 * Normalize a path string to use non-duplicated forward slashes as 231 * the path separator and remove any trailing path separators. 232 * @param scheme Supplies the URI scheme. Used to deduce whether we 233 * should replace backslashes or not. 234 * @param path Supplies the scheme-specific part 235 * @return Normalized path string. 236 */ 237 private static String normalizePath(String scheme, String path) { 238 // Remove double forward slashes. 239 path = StringUtils.replace(path, "//", "/"); 240 241 // Remove backslashes if this looks like a Windows path. Avoid 242 // the substitution if it looks like a non-local URI. 243 if (WINDOWS && 244 (hasWindowsDrive(path) || 245 (scheme == null) || 246 (scheme.isEmpty()) || 247 (scheme.equals("file")))) { 248 path = StringUtils.replace(path, "\\", "/"); 249 } 250 251 // trim trailing slash from non-root path (ignoring windows drive) 252 int minLength = startPositionWithoutWindowsDrive(path) + 1; 253 if (path.length() > minLength && path.endsWith(SEPARATOR)) { 254 path = path.substring(0, path.length()-1); 255 } 256 257 return path; 258 } 259 260 private static boolean hasWindowsDrive(String path) { 261 return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find()); 262 } 263 264 private static int startPositionWithoutWindowsDrive(String path) { 265 if (hasWindowsDrive(path)) { 266 return path.charAt(0) == SEPARATOR_CHAR ? 3 : 2; 267 } else { 268 return 0; 269 } 270 } 271 272 /** 273 * Determine whether a given path string represents an absolute path on 274 * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not. 275 * 276 * @param pathString Supplies the path string to evaluate. 277 * @param slashed true if the given path is prefixed with "/". 278 * @return true if the supplied path looks like an absolute path with a Windows 279 * drive-specifier. 280 */ 281 public static boolean isWindowsAbsolutePath(final String pathString, 282 final boolean slashed) { 283 int start = startPositionWithoutWindowsDrive(pathString); 284 return start > 0 285 && pathString.length() > start 286 && ((pathString.charAt(start) == SEPARATOR_CHAR) || 287 (pathString.charAt(start) == '\\')); 288 } 289 290 /** Convert this to a URI. */ 291 public URI toUri() { return uri; } 292 293 /** Return the FileSystem that owns this Path. */ 294 public FileSystem getFileSystem(Configuration conf) throws IOException { 295 return FileSystem.get(this.toUri(), conf); 296 } 297 298 /** 299 * Is an absolute path (ie a slash relative path part) 300 * AND a scheme is null AND authority is null. 301 */ 302 public boolean isAbsoluteAndSchemeAuthorityNull() { 303 return (isUriPathAbsolute() && 304 uri.getScheme() == null && uri.getAuthority() == null); 305 } 306 307 /** 308 * True if the path component (i.e. directory) of this URI is absolute. 309 */ 310 public boolean isUriPathAbsolute() { 311 int start = startPositionWithoutWindowsDrive(uri.getPath()); 312 return uri.getPath().startsWith(SEPARATOR, start); 313 } 314 315 /** True if the path component of this URI is absolute. */ 316 /** 317 * There is some ambiguity here. An absolute path is a slash 318 * relative name without a scheme or an authority. 319 * So either this method was incorrectly named or its 320 * implementation is incorrect. This method returns true 321 * even if there is a scheme and authority. 322 */ 323 public boolean isAbsolute() { 324 return isUriPathAbsolute(); 325 } 326 327 /** 328 * @return true if and only if this path represents the root of a file system 329 */ 330 public boolean isRoot() { 331 return getParent() == null; 332 } 333 334 /** Returns the final component of this path.*/ 335 public String getName() { 336 String path = uri.getPath(); 337 int slash = path.lastIndexOf(SEPARATOR); 338 return path.substring(slash+1); 339 } 340 341 /** Returns the parent of a path or null if at root. */ 342 public Path getParent() { 343 String path = uri.getPath(); 344 int lastSlash = path.lastIndexOf('/'); 345 int start = startPositionWithoutWindowsDrive(path); 346 if ((path.length() == start) || // empty path 347 (lastSlash == start && path.length() == start+1)) { // at root 348 return null; 349 } 350 String parent; 351 if (lastSlash==-1) { 352 parent = CUR_DIR; 353 } else { 354 parent = path.substring(0, lastSlash==start?start+1:lastSlash); 355 } 356 return new Path(uri.getScheme(), uri.getAuthority(), parent); 357 } 358 359 /** Adds a suffix to the final name in the path.*/ 360 public Path suffix(String suffix) { 361 return new Path(getParent(), getName()+suffix); 362 } 363 364 @Override 365 public String toString() { 366 // we can't use uri.toString(), which escapes everything, because we want 367 // illegal characters unescaped in the string, for glob processing, etc. 368 StringBuilder buffer = new StringBuilder(); 369 if (uri.getScheme() != null) { 370 buffer.append(uri.getScheme()); 371 buffer.append(":"); 372 } 373 if (uri.getAuthority() != null) { 374 buffer.append("//"); 375 buffer.append(uri.getAuthority()); 376 } 377 if (uri.getPath() != null) { 378 String path = uri.getPath(); 379 if (path.indexOf('/')==0 && 380 hasWindowsDrive(path) && // has windows drive 381 uri.getScheme() == null && // but no scheme 382 uri.getAuthority() == null) // or authority 383 path = path.substring(1); // remove slash before drive 384 buffer.append(path); 385 } 386 if (uri.getFragment() != null) { 387 buffer.append("#"); 388 buffer.append(uri.getFragment()); 389 } 390 return buffer.toString(); 391 } 392 393 @Override 394 public boolean equals(Object o) { 395 if (!(o instanceof Path)) { 396 return false; 397 } 398 Path that = (Path)o; 399 return this.uri.equals(that.uri); 400 } 401 402 @Override 403 public int hashCode() { 404 return uri.hashCode(); 405 } 406 407 @Override 408 public int compareTo(Object o) { 409 Path that = (Path)o; 410 return this.uri.compareTo(that.uri); 411 } 412 413 /** Return the number of elements in this path. */ 414 public int depth() { 415 String path = uri.getPath(); 416 int depth = 0; 417 int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0; 418 while (slash != -1) { 419 depth++; 420 slash = path.indexOf(SEPARATOR, slash+1); 421 } 422 return depth; 423 } 424 425 /** 426 * Returns a qualified path object. 427 * 428 * Deprecated - use {@link #makeQualified(URI, Path)} 429 */ 430 @Deprecated 431 public Path makeQualified(FileSystem fs) { 432 return makeQualified(fs.getUri(), fs.getWorkingDirectory()); 433 } 434 435 /** Returns a qualified path object. */ 436 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 437 public Path makeQualified(URI defaultUri, Path workingDir ) { 438 Path path = this; 439 if (!isAbsolute()) { 440 path = new Path(workingDir, this); 441 } 442 443 URI pathUri = path.toUri(); 444 445 String scheme = pathUri.getScheme(); 446 String authority = pathUri.getAuthority(); 447 String fragment = pathUri.getFragment(); 448 449 if (scheme != null && 450 (authority != null || defaultUri.getAuthority() == null)) 451 return path; 452 453 if (scheme == null) { 454 scheme = defaultUri.getScheme(); 455 } 456 457 if (authority == null) { 458 authority = defaultUri.getAuthority(); 459 if (authority == null) { 460 authority = ""; 461 } 462 } 463 464 URI newUri = null; 465 try { 466 newUri = new URI(scheme, authority , 467 normalizePath(scheme, pathUri.getPath()), null, fragment); 468 } catch (URISyntaxException e) { 469 throw new IllegalArgumentException(e); 470 } 471 return new Path(newUri); 472 } 473}