001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.fs; 020 021 import java.io.IOException; 022 import java.net.URI; 023 import java.net.URISyntaxException; 024 import java.util.regex.Pattern; 025 026 import org.apache.avro.reflect.Stringable; 027 import org.apache.commons.lang.StringUtils; 028 import org.apache.hadoop.HadoopIllegalArgumentException; 029 import org.apache.hadoop.classification.InterfaceAudience; 030 import org.apache.hadoop.classification.InterfaceStability; 031 import org.apache.hadoop.conf.Configuration; 032 033 /** Names a file or directory in a {@link FileSystem}. 034 * Path strings use slash as the directory separator. A path string is 035 * absolute if it begins with a slash. 036 */ 037 @Stringable 038 @InterfaceAudience.Public 039 @InterfaceStability.Stable 040 public class Path implements Comparable { 041 042 /** The directory separator, a slash. */ 043 public static final String SEPARATOR = "/"; 044 public static final char SEPARATOR_CHAR = '/'; 045 046 public static final String CUR_DIR = "."; 047 048 public static final boolean WINDOWS 049 = System.getProperty("os.name").startsWith("Windows"); 050 051 /** 052 * Pre-compiled regular expressions to detect path formats. 053 */ 054 private static final Pattern hasUriScheme = 055 Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:"); 056 private static final Pattern hasDriveLetterSpecifier = 057 Pattern.compile("^/?[a-zA-Z]:"); 058 059 private URI uri; // a hierarchical uri 060 061 /** 062 * Pathnames with scheme and relative path are illegal. 063 * @param path to be checked 064 */ 065 void checkNotSchemeWithRelative() { 066 if (toUri().isAbsolute() && !isUriPathAbsolute()) { 067 throw new HadoopIllegalArgumentException( 068 "Unsupported name: has scheme but relative path-part"); 069 } 070 } 071 072 void checkNotRelative() { 073 if (!isAbsolute() && toUri().getScheme() == null) { 074 throw new HadoopIllegalArgumentException("Path is relative"); 075 } 076 } 077 078 public static Path getPathWithoutSchemeAndAuthority(Path path) { 079 // This code depends on Path.toString() to remove the leading slash before 080 // the drive specification on Windows. 081 Path newPath = path.isUriPathAbsolute() ? 082 new Path(null, null, path.toUri().getPath()) : 083 path; 084 return newPath; 085 } 086 087 /** Resolve a child path against a parent path. */ 088 public Path(String parent, String child) { 089 this(new Path(parent), new Path(child)); 090 } 091 092 /** Resolve a child path against a parent path. */ 093 public Path(Path parent, String child) { 094 this(parent, new Path(child)); 095 } 096 097 /** Resolve a child path against a parent path. */ 098 public Path(String parent, Path child) { 099 this(new Path(parent), child); 100 } 101 102 /** Resolve a child path against a parent path. */ 103 public Path(Path parent, Path child) { 104 // Add a slash to parent's path so resolution is compatible with URI's 105 URI parentUri = parent.uri; 106 String parentPath = parentUri.getPath(); 107 if (!(parentPath.equals("/") || parentPath.isEmpty())) { 108 try { 109 parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(), 110 parentUri.getPath()+"/", null, parentUri.getFragment()); 111 } catch (URISyntaxException e) { 112 throw new IllegalArgumentException(e); 113 } 114 } 115 URI resolved = parentUri.resolve(child.uri); 116 initialize(resolved.getScheme(), resolved.getAuthority(), 117 resolved.getPath(), resolved.getFragment()); 118 } 119 120 private void checkPathArg( String path ) throws IllegalArgumentException { 121 // disallow construction of a Path from an empty string 122 if ( path == null ) { 123 throw new IllegalArgumentException( 124 "Can not create a Path from a null string"); 125 } 126 if( path.length() == 0 ) { 127 throw new IllegalArgumentException( 128 "Can not create a Path from an empty string"); 129 } 130 } 131 132 /** Construct a path from a String. Path strings are URIs, but with 133 * unescaped elements and some additional normalization. */ 134 public Path(String pathString) throws IllegalArgumentException { 135 checkPathArg( pathString ); 136 137 // We can't use 'new URI(String)' directly, since it assumes things are 138 // escaped, which we don't require of Paths. 139 140 // add a slash in front of paths with Windows drive letters 141 if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') { 142 pathString = "/" + pathString; 143 } 144 145 // parse uri components 146 String scheme = null; 147 String authority = null; 148 149 int start = 0; 150 151 // parse uri scheme, if any 152 int colon = pathString.indexOf(':'); 153 int slash = pathString.indexOf('/'); 154 if ((colon != -1) && 155 ((slash == -1) || (colon < slash))) { // has a scheme 156 scheme = pathString.substring(0, colon); 157 start = colon+1; 158 } 159 160 // parse uri authority, if any 161 if (pathString.startsWith("//", start) && 162 (pathString.length()-start > 2)) { // has authority 163 int nextSlash = pathString.indexOf('/', start+2); 164 int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); 165 authority = pathString.substring(start+2, authEnd); 166 start = authEnd; 167 } 168 169 // uri path is the rest of the string -- query & fragment not supported 170 String path = pathString.substring(start, pathString.length()); 171 172 initialize(scheme, authority, path, null); 173 } 174 175 /** 176 * Construct a path from a URI 177 */ 178 public Path(URI aUri) { 179 uri = aUri.normalize(); 180 } 181 182 /** Construct a Path from components. */ 183 public Path(String scheme, String authority, String path) { 184 checkPathArg( path ); 185 186 // add a slash in front of paths with Windows drive letters 187 if (hasWindowsDrive(path) && path.charAt(0) != '/') { 188 path = "/" + path; 189 } 190 191 // add "./" in front of Linux relative paths so that a path containing 192 // a colon e.q. "a:b" will not be interpreted as scheme "a". 193 if (!WINDOWS && path.charAt(0) != '/') { 194 path = "./" + path; 195 } 196 197 initialize(scheme, authority, path, null); 198 } 199 200 private void initialize(String scheme, String authority, String path, 201 String fragment) { 202 try { 203 this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment) 204 .normalize(); 205 } catch (URISyntaxException e) { 206 throw new IllegalArgumentException(e); 207 } 208 } 209 210 /** 211 * Merge 2 paths such that the second path is appended relative to the first. 212 * The returned path has the scheme and authority of the first path. On 213 * Windows, the drive specification in the second path is discarded. 214 * 215 * @param path1 Path first path 216 * @param path2 Path second path, to be appended relative to path1 217 * @return Path merged path 218 */ 219 public static Path mergePaths(Path path1, Path path2) { 220 String path2Str = path2.toUri().getPath(); 221 if(hasWindowsDrive(path2Str)) { 222 path2Str = path2Str.substring(path2Str.indexOf(':')+1); 223 } 224 return new Path(path1 + path2Str); 225 } 226 227 /** 228 * Normalize a path string to use non-duplicated forward slashes as 229 * the path separator and remove any trailing path separators. 230 * @param scheme Supplies the URI scheme. Used to deduce whether we 231 * should replace backslashes or not. 232 * @param path Supplies the scheme-specific part 233 * @return Normalized path string. 234 */ 235 private static String normalizePath(String scheme, String path) { 236 // Remove double forward slashes. 237 path = StringUtils.replace(path, "//", "/"); 238 239 // Remove backslashes if this looks like a Windows path. Avoid 240 // the substitution if it looks like a non-local URI. 241 if (WINDOWS && 242 (hasWindowsDrive(path) || 243 (scheme == null) || 244 (scheme.isEmpty()) || 245 (scheme.equals("file")))) { 246 path = StringUtils.replace(path, "\\", "/"); 247 } 248 249 // trim trailing slash from non-root path (ignoring windows drive) 250 int minLength = hasWindowsDrive(path) ? 4 : 1; 251 if (path.length() > minLength && path.endsWith("/")) { 252 path = path.substring(0, path.length()-1); 253 } 254 255 return path; 256 } 257 258 private static boolean hasWindowsDrive(String path) { 259 return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find()); 260 } 261 262 /** 263 * Determine whether a given path string represents an absolute path on 264 * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not. 265 * 266 * @param pathString Supplies the path string to evaluate. 267 * @param slashed true if the given path is prefixed with "/". 268 * @return true if the supplied path looks like an absolute path with a Windows 269 * drive-specifier. 270 */ 271 public static boolean isWindowsAbsolutePath(final String pathString, 272 final boolean slashed) { 273 int start = (slashed ? 1 : 0); 274 275 return 276 hasWindowsDrive(pathString) && 277 pathString.length() >= (start + 3) && 278 ((pathString.charAt(start + 2) == SEPARATOR_CHAR) || 279 (pathString.charAt(start + 2) == '\\')); 280 } 281 282 /** Convert this to a URI. */ 283 public URI toUri() { return uri; } 284 285 /** Return the FileSystem that owns this Path. */ 286 public FileSystem getFileSystem(Configuration conf) throws IOException { 287 return FileSystem.get(this.toUri(), conf); 288 } 289 290 /** 291 * Is an absolute path (ie a slash relative path part) 292 * AND a scheme is null AND authority is null. 293 */ 294 public boolean isAbsoluteAndSchemeAuthorityNull() { 295 return (isUriPathAbsolute() && 296 uri.getScheme() == null && uri.getAuthority() == null); 297 } 298 299 /** 300 * True if the path component (i.e. directory) of this URI is absolute. 301 */ 302 public boolean isUriPathAbsolute() { 303 int start = hasWindowsDrive(uri.getPath()) ? 3 : 0; 304 return uri.getPath().startsWith(SEPARATOR, start); 305 } 306 307 /** True if the path component of this URI is absolute. */ 308 /** 309 * There is some ambiguity here. An absolute path is a slash 310 * relative name without a scheme or an authority. 311 * So either this method was incorrectly named or its 312 * implementation is incorrect. This method returns true 313 * even if there is a scheme and authority. 314 */ 315 public boolean isAbsolute() { 316 return isUriPathAbsolute(); 317 } 318 319 /** 320 * @return true if and only if this path represents the root of a file system 321 */ 322 public boolean isRoot() { 323 return getParent() == null; 324 } 325 326 /** Returns the final component of this path.*/ 327 public String getName() { 328 String path = uri.getPath(); 329 int slash = path.lastIndexOf(SEPARATOR); 330 return path.substring(slash+1); 331 } 332 333 /** Returns the parent of a path or null if at root. */ 334 public Path getParent() { 335 String path = uri.getPath(); 336 int lastSlash = path.lastIndexOf('/'); 337 int start = hasWindowsDrive(path) ? 3 : 0; 338 if ((path.length() == start) || // empty path 339 (lastSlash == start && path.length() == start+1)) { // at root 340 return null; 341 } 342 String parent; 343 if (lastSlash==-1) { 344 parent = CUR_DIR; 345 } else { 346 int end = hasWindowsDrive(path) ? 3 : 0; 347 parent = path.substring(0, lastSlash==end?end+1:lastSlash); 348 } 349 return new Path(uri.getScheme(), uri.getAuthority(), parent); 350 } 351 352 /** Adds a suffix to the final name in the path.*/ 353 public Path suffix(String suffix) { 354 return new Path(getParent(), getName()+suffix); 355 } 356 357 @Override 358 public String toString() { 359 // we can't use uri.toString(), which escapes everything, because we want 360 // illegal characters unescaped in the string, for glob processing, etc. 361 StringBuilder buffer = new StringBuilder(); 362 if (uri.getScheme() != null) { 363 buffer.append(uri.getScheme()); 364 buffer.append(":"); 365 } 366 if (uri.getAuthority() != null) { 367 buffer.append("//"); 368 buffer.append(uri.getAuthority()); 369 } 370 if (uri.getPath() != null) { 371 String path = uri.getPath(); 372 if (path.indexOf('/')==0 && 373 hasWindowsDrive(path) && // has windows drive 374 uri.getScheme() == null && // but no scheme 375 uri.getAuthority() == null) // or authority 376 path = path.substring(1); // remove slash before drive 377 buffer.append(path); 378 } 379 if (uri.getFragment() != null) { 380 buffer.append("#"); 381 buffer.append(uri.getFragment()); 382 } 383 return buffer.toString(); 384 } 385 386 @Override 387 public boolean equals(Object o) { 388 if (!(o instanceof Path)) { 389 return false; 390 } 391 Path that = (Path)o; 392 return this.uri.equals(that.uri); 393 } 394 395 @Override 396 public int hashCode() { 397 return uri.hashCode(); 398 } 399 400 @Override 401 public int compareTo(Object o) { 402 Path that = (Path)o; 403 return this.uri.compareTo(that.uri); 404 } 405 406 /** Return the number of elements in this path. */ 407 public int depth() { 408 String path = uri.getPath(); 409 int depth = 0; 410 int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0; 411 while (slash != -1) { 412 depth++; 413 slash = path.indexOf(SEPARATOR, slash+1); 414 } 415 return depth; 416 } 417 418 /** 419 * Returns a qualified path object. 420 * 421 * Deprecated - use {@link #makeQualified(URI, Path)} 422 */ 423 @Deprecated 424 public Path makeQualified(FileSystem fs) { 425 return makeQualified(fs.getUri(), fs.getWorkingDirectory()); 426 } 427 428 /** Returns a qualified path object. */ 429 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 430 public Path makeQualified(URI defaultUri, Path workingDir ) { 431 Path path = this; 432 if (!isAbsolute()) { 433 path = new Path(workingDir, this); 434 } 435 436 URI pathUri = path.toUri(); 437 438 String scheme = pathUri.getScheme(); 439 String authority = pathUri.getAuthority(); 440 String fragment = pathUri.getFragment(); 441 442 if (scheme != null && 443 (authority != null || defaultUri.getAuthority() == null)) 444 return path; 445 446 if (scheme == null) { 447 scheme = defaultUri.getScheme(); 448 } 449 450 if (authority == null) { 451 authority = defaultUri.getAuthority(); 452 if (authority == null) { 453 authority = ""; 454 } 455 } 456 457 URI newUri = null; 458 try { 459 newUri = new URI(scheme, authority , 460 normalizePath(scheme, pathUri.getPath()), null, fragment); 461 } catch (URISyntaxException e) { 462 throw new IllegalArgumentException(e); 463 } 464 return new Path(newUri); 465 } 466 }