001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.fs; 020 021import java.io.BufferedReader; 022import java.io.File; 023import java.io.FileNotFoundException; 024import java.io.IOException; 025import java.io.InputStreamReader; 026import java.util.Arrays; 027 028/** 029 * Class for creating hardlinks. 030 * Supports Unix/Linux, WinXP/2003/Vista via Cygwin, and Mac OS X. 031 * 032 * The HardLink class was formerly a static inner class of FSUtil, 033 * and the methods provided were blatantly non-thread-safe. 034 * To enable volume-parallel Update snapshots, we now provide static 035 * threadsafe methods that allocate new buffer string arrays 036 * upon each call. We also provide an API to hardlink all files in a 037 * directory with a single command, which is up to 128 times more 038 * efficient - and minimizes the impact of the extra buffer creations. 039 */ 040public class HardLink { 041 042 public enum OSType { 043 OS_TYPE_UNIX, 044 OS_TYPE_WINXP, 045 OS_TYPE_SOLARIS, 046 OS_TYPE_MAC 047 } 048 049 public static OSType osType; 050 private static HardLinkCommandGetter getHardLinkCommand; 051 052 public final LinkStats linkStats; //not static 053 054 //initialize the command "getters" statically, so can use their 055 //methods without instantiating the HardLink object 056 static { 057 osType = getOSType(); 058 if (osType == OSType.OS_TYPE_WINXP) { 059 // Windows 060 getHardLinkCommand = new HardLinkCGWin(); 061 } else { 062 // Unix 063 getHardLinkCommand = new HardLinkCGUnix(); 064 //override getLinkCountCommand for the particular Unix variant 065 //Linux is already set as the default - {"stat","-c%h", null} 066 if (osType == OSType.OS_TYPE_MAC) { 067 String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null}; 068 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate); 069 } else if (osType == OSType.OS_TYPE_SOLARIS) { 070 String[] linkCountCmdTemplate = {"ls","-l", null}; 071 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate); 072 } 073 } 074 } 075 076 public HardLink() { 077 linkStats = new LinkStats(); 078 } 079 080 static private OSType getOSType() { 081 String osName = System.getProperty("os.name"); 082 if (osName.contains("Windows") && 083 (osName.contains("XP") 084 || osName.contains("2003") 085 || osName.contains("Vista") 086 || osName.contains("Windows_7") 087 || osName.contains("Windows 7") 088 || osName.contains("Windows7"))) { 089 return OSType.OS_TYPE_WINXP; 090 } 091 else if (osName.contains("SunOS") 092 || osName.contains("Solaris")) { 093 return OSType.OS_TYPE_SOLARIS; 094 } 095 else if (osName.contains("Mac")) { 096 return OSType.OS_TYPE_MAC; 097 } 098 else { 099 return OSType.OS_TYPE_UNIX; 100 } 101 } 102 103 /** 104 * This abstract class bridges the OS-dependent implementations of the 105 * needed functionality for creating hardlinks and querying link counts. 106 * The particular implementation class is chosen during 107 * static initialization phase of the HardLink class. 108 * The "getter" methods construct shell command strings for various purposes. 109 */ 110 private static abstract class HardLinkCommandGetter { 111 112 /** 113 * Get the command string needed to hardlink a bunch of files from 114 * a single source directory into a target directory. The source directory 115 * is not specified here, but the command will be executed using the source 116 * directory as the "current working directory" of the shell invocation. 117 * 118 * @param fileBaseNames - array of path-less file names, relative 119 * to the source directory 120 * @param linkDir - target directory where the hardlinks will be put 121 * @return - an array of Strings suitable for use as a single shell command 122 * with {@link Runtime.exec()} 123 * @throws IOException - if any of the file or path names misbehave 124 */ 125 abstract String[] linkMult(String[] fileBaseNames, File linkDir) 126 throws IOException; 127 128 /** 129 * Get the command string needed to hardlink a single file 130 */ 131 abstract String[] linkOne(File file, File linkName) throws IOException; 132 133 /** 134 * Get the command string to query the hardlink count of a file 135 */ 136 abstract String[] linkCount(File file) throws IOException; 137 138 /** 139 * Calculate the total string length of the shell command 140 * resulting from execution of linkMult, plus the length of the 141 * source directory name (which will also be provided to the shell) 142 * 143 * @param fileDir - source directory, parent of fileBaseNames 144 * @param fileBaseNames - array of path-less file names, relative 145 * to the source directory 146 * @param linkDir - target directory where the hardlinks will be put 147 * @return - total data length (must not exceed maxAllowedCmdArgLength) 148 * @throws IOException 149 */ 150 abstract int getLinkMultArgLength( 151 File fileDir, String[] fileBaseNames, File linkDir) 152 throws IOException; 153 154 /** 155 * Get the maximum allowed string length of a shell command on this OS, 156 * which is just the documented minimum guaranteed supported command 157 * length - aprx. 32KB for Unix, and 8KB for Windows. 158 */ 159 abstract int getMaxAllowedCmdArgLength(); 160 } 161 162 /** 163 * Implementation of HardLinkCommandGetter class for Unix 164 */ 165 static class HardLinkCGUnix extends HardLinkCommandGetter { 166 private static String[] hardLinkCommand = {"ln", null, null}; 167 private static String[] hardLinkMultPrefix = {"ln"}; 168 private static String[] hardLinkMultSuffix = {null}; 169 private static String[] getLinkCountCommand = {"stat","-c%h", null}; 170 //Unix guarantees at least 32K bytes cmd length. 171 //Subtract another 64b to allow for Java 'exec' overhead 172 private static final int maxAllowedCmdArgLength = 32*1024 - 65; 173 174 private static synchronized 175 void setLinkCountCmdTemplate(String[] template) { 176 //May update this for specific unix variants, 177 //after static initialization phase 178 getLinkCountCommand = template; 179 } 180 181 /* 182 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) 183 */ 184 @Override 185 String[] linkOne(File file, File linkName) 186 throws IOException { 187 String[] buf = new String[hardLinkCommand.length]; 188 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); 189 //unix wants argument order: "ln <existing> <new>" 190 buf[1] = FileUtil.makeShellPath(file, true); 191 buf[2] = FileUtil.makeShellPath(linkName, true); 192 return buf; 193 } 194 195 /* 196 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) 197 */ 198 @Override 199 String[] linkMult(String[] fileBaseNames, File linkDir) 200 throws IOException { 201 String[] buf = new String[fileBaseNames.length 202 + hardLinkMultPrefix.length 203 + hardLinkMultSuffix.length]; 204 int mark=0; 205 System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 206 hardLinkMultPrefix.length); 207 mark += hardLinkMultPrefix.length; 208 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); 209 mark += fileBaseNames.length; 210 buf[mark] = FileUtil.makeShellPath(linkDir, true); 211 return buf; 212 } 213 214 /* 215 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) 216 */ 217 @Override 218 String[] linkCount(File file) 219 throws IOException { 220 String[] buf = new String[getLinkCountCommand.length]; 221 System.arraycopy(getLinkCountCommand, 0, buf, 0, 222 getLinkCountCommand.length); 223 buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true); 224 return buf; 225 } 226 227 /* 228 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) 229 */ 230 @Override 231 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 232 throws IOException{ 233 int sum = 0; 234 for (String x : fileBaseNames) { 235 // add 1 to account for terminal null or delimiter space 236 sum += 1 + ((x == null) ? 0 : x.length()); 237 } 238 sum += 2 + FileUtil.makeShellPath(fileDir, true).length() 239 + FileUtil.makeShellPath(linkDir, true).length(); 240 //add the fixed overhead of the hardLinkMult prefix and suffix 241 sum += 3; //length("ln") + 1 242 return sum; 243 } 244 245 /* 246 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() 247 */ 248 @Override 249 int getMaxAllowedCmdArgLength() { 250 return maxAllowedCmdArgLength; 251 } 252 } 253 254 255 /** 256 * Implementation of HardLinkCommandGetter class for Windows 257 * 258 * Note that the linkCount shell command for Windows is actually 259 * a Cygwin shell command, and depends on ${cygwin}/bin 260 * being in the Windows PATH environment variable, so 261 * stat.exe can be found. 262 */ 263 static class HardLinkCGWin extends HardLinkCommandGetter { 264 //The Windows command getter impl class and its member fields are 265 //package-private ("default") access instead of "private" to assist 266 //unit testing (sort of) on non-Win servers 267 268 static String[] hardLinkCommand = { 269 "fsutil","hardlink","create", null, null}; 270 static String[] hardLinkMultPrefix = { 271 "cmd","/q","/c","for", "%f", "in", "("}; 272 static String hardLinkMultDir = "\\%f"; 273 static String[] hardLinkMultSuffix = { 274 ")", "do", "fsutil", "hardlink", "create", null, 275 "%f", "1>NUL"}; 276 static String[] getLinkCountCommand = {"stat","-c%h", null}; 277 //Windows guarantees only 8K - 1 bytes cmd length. 278 //Subtract another 64b to allow for Java 'exec' overhead 279 static final int maxAllowedCmdArgLength = 8*1024 - 65; 280 281 /* 282 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) 283 */ 284 @Override 285 String[] linkOne(File file, File linkName) 286 throws IOException { 287 String[] buf = new String[hardLinkCommand.length]; 288 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); 289 //windows wants argument order: "create <new> <existing>" 290 buf[4] = file.getCanonicalPath(); 291 buf[3] = linkName.getCanonicalPath(); 292 return buf; 293 } 294 295 /* 296 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) 297 */ 298 @Override 299 String[] linkMult(String[] fileBaseNames, File linkDir) 300 throws IOException { 301 String[] buf = new String[fileBaseNames.length 302 + hardLinkMultPrefix.length 303 + hardLinkMultSuffix.length]; 304 String td = linkDir.getCanonicalPath() + hardLinkMultDir; 305 int mark=0; 306 System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 307 hardLinkMultPrefix.length); 308 mark += hardLinkMultPrefix.length; 309 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); 310 mark += fileBaseNames.length; 311 System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 312 hardLinkMultSuffix.length); 313 mark += hardLinkMultSuffix.length; 314 buf[mark - 3] = td; 315 return buf; 316 } 317 318 /* 319 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) 320 */ 321 @Override 322 String[] linkCount(File file) 323 throws IOException { 324 String[] buf = new String[getLinkCountCommand.length]; 325 System.arraycopy(getLinkCountCommand, 0, buf, 0, 326 getLinkCountCommand.length); 327 //The linkCount command is actually a Cygwin shell command, 328 //not a Windows shell command, so we should use "makeShellPath()" 329 //instead of "getCanonicalPath()". However, that causes another 330 //shell exec to "cygpath.exe", and "stat.exe" actually can handle 331 //DOS-style paths (it just prints a couple hundred bytes of warning 332 //to stderr), so we use the more efficient "getCanonicalPath()". 333 buf[getLinkCountCommand.length - 1] = file.getCanonicalPath(); 334 return buf; 335 } 336 337 /* 338 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) 339 */ 340 @Override 341 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 342 throws IOException { 343 int sum = 0; 344 for (String x : fileBaseNames) { 345 // add 1 to account for terminal null or delimiter space 346 sum += 1 + ((x == null) ? 0 : x.length()); 347 } 348 sum += 2 + fileDir.getCanonicalPath().length() + 349 linkDir.getCanonicalPath().length(); 350 //add the fixed overhead of the hardLinkMult command 351 //(prefix, suffix, and Dir suffix) 352 sum += ("cmd.exe /q /c for %f in ( ) do " 353 + "fsutil hardlink create \\%f %f 1>NUL ").length(); 354 return sum; 355 } 356 357 /* 358 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() 359 */ 360 @Override 361 int getMaxAllowedCmdArgLength() { 362 return maxAllowedCmdArgLength; 363 } 364 } 365 366 367 /** 368 * Calculate the nominal length of all contributors to the total 369 * commandstring length, including fixed overhead of the OS-dependent 370 * command. It's protected rather than private, to assist unit testing, 371 * but real clients are not expected to need it -- see the way 372 * createHardLinkMult() uses it internally so the user doesn't need to worry 373 * about it. 374 * 375 * @param fileDir - source directory, parent of fileBaseNames 376 * @param fileBaseNames - array of path-less file names, relative 377 * to the source directory 378 * @param linkDir - target directory where the hardlinks will be put 379 * @return - total data length (must not exceed maxAllowedCmdArgLength) 380 * @throws IOException 381 */ 382 protected static int getLinkMultArgLength( 383 File fileDir, String[] fileBaseNames, File linkDir) 384 throws IOException { 385 return getHardLinkCommand.getLinkMultArgLength(fileDir, 386 fileBaseNames, linkDir); 387 } 388 389 /** 390 * Return this private value for use by unit tests. 391 * Shell commands are not allowed to have a total string length 392 * exceeding this size. 393 */ 394 protected static int getMaxAllowedCmdArgLength() { 395 return getHardLinkCommand.getMaxAllowedCmdArgLength(); 396 } 397 398 /* 399 * **************************************************** 400 * Complexity is above. User-visible functionality is below 401 * **************************************************** 402 */ 403 404 /** 405 * Creates a hardlink 406 * @param file - existing source file 407 * @param linkName - desired target link file 408 */ 409 public static void createHardLink(File file, File linkName) 410 throws IOException { 411 if (file == null) { 412 throw new IOException( 413 "invalid arguments to createHardLink: source file is null"); 414 } 415 if (linkName == null) { 416 throw new IOException( 417 "invalid arguments to createHardLink: link name is null"); 418 } 419 // construct and execute shell command 420 String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName); 421 Process process = Runtime.getRuntime().exec(hardLinkCommand); 422 try { 423 if (process.waitFor() != 0) { 424 String errMsg = new BufferedReader(new InputStreamReader( 425 process.getInputStream())).readLine(); 426 if (errMsg == null) errMsg = ""; 427 String inpMsg = new BufferedReader(new InputStreamReader( 428 process.getErrorStream())).readLine(); 429 if (inpMsg == null) inpMsg = ""; 430 throw new IOException(errMsg + inpMsg); 431 } 432 } catch (InterruptedException e) { 433 throw new IOException(e); 434 } finally { 435 process.destroy(); 436 } 437 } 438 439 /** 440 * Creates hardlinks from multiple existing files within one parent 441 * directory, into one target directory. 442 * @param parentDir - directory containing source files 443 * @param fileBaseNames - list of path-less file names, as returned by 444 * parentDir.list() 445 * @param linkDir - where the hardlinks should be put. It must already exist. 446 * 447 * If the list of files is too long (overflows maxAllowedCmdArgLength), 448 * we will automatically split it into multiple invocations of the 449 * underlying method. 450 */ 451 public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 452 File linkDir) throws IOException { 453 //This is the public method all non-test clients are expected to use. 454 //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd 455 createHardLinkMult(parentDir, fileBaseNames, linkDir, 456 getHardLinkCommand.getMaxAllowedCmdArgLength()); 457 } 458 459 /* 460 * Implements {@link createHardLinkMult} with added variable "maxLength", 461 * to ease unit testing of the auto-splitting feature for long lists. 462 * Likewise why it returns "callCount", the number of sub-arrays that 463 * the file list had to be split into. 464 * Non-test clients are expected to call the public method instead. 465 */ 466 protected static int createHardLinkMult(File parentDir, 467 String[] fileBaseNames, File linkDir, int maxLength) 468 throws IOException { 469 if (parentDir == null) { 470 throw new IOException( 471 "invalid arguments to createHardLinkMult: parent directory is null"); 472 } 473 if (linkDir == null) { 474 throw new IOException( 475 "invalid arguments to createHardLinkMult: link directory is null"); 476 } 477 if (fileBaseNames == null) { 478 throw new IOException( 479 "invalid arguments to createHardLinkMult: " 480 + "filename list can be empty but not null"); 481 } 482 if (fileBaseNames.length == 0) { 483 //the OS cmds can't handle empty list of filenames, 484 //but it's legal, so just return. 485 return 0; 486 } 487 if (!linkDir.exists()) { 488 throw new FileNotFoundException(linkDir + " not found."); 489 } 490 491 //if the list is too long, split into multiple invocations 492 int callCount = 0; 493 if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength 494 && fileBaseNames.length > 1) { 495 String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2); 496 callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength); 497 String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2, 498 fileBaseNames.length); 499 callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength); 500 return callCount; 501 } else { 502 callCount = 1; 503 } 504 505 // construct and execute shell command 506 String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 507 linkDir); 508 Process process = Runtime.getRuntime().exec(hardLinkCommand, null, 509 parentDir); 510 try { 511 if (process.waitFor() != 0) { 512 String errMsg = new BufferedReader(new InputStreamReader( 513 process.getInputStream())).readLine(); 514 if (errMsg == null) errMsg = ""; 515 String inpMsg = new BufferedReader(new InputStreamReader( 516 process.getErrorStream())).readLine(); 517 if (inpMsg == null) inpMsg = ""; 518 throw new IOException(errMsg + inpMsg); 519 } 520 } catch (InterruptedException e) { 521 throw new IOException(e); 522 } finally { 523 process.destroy(); 524 } 525 return callCount; 526 } 527 528 /** 529 * Retrieves the number of links to the specified file. 530 */ 531 public static int getLinkCount(File fileName) throws IOException { 532 if (fileName == null) { 533 throw new IOException( 534 "invalid argument to getLinkCount: file name is null"); 535 } 536 if (!fileName.exists()) { 537 throw new FileNotFoundException(fileName + " not found."); 538 } 539 540 // construct and execute shell command 541 String[] cmd = getHardLinkCommand.linkCount(fileName); 542 String inpMsg = null; 543 String errMsg = null; 544 int exitValue = -1; 545 BufferedReader in = null; 546 BufferedReader err = null; 547 548 Process process = Runtime.getRuntime().exec(cmd); 549 try { 550 exitValue = process.waitFor(); 551 in = new BufferedReader(new InputStreamReader( 552 process.getInputStream())); 553 inpMsg = in.readLine(); 554 err = new BufferedReader(new InputStreamReader( 555 process.getErrorStream())); 556 errMsg = err.readLine(); 557 if (inpMsg == null || exitValue != 0) { 558 throw createIOException(fileName, inpMsg, errMsg, exitValue, null); 559 } 560 if (osType == OSType.OS_TYPE_SOLARIS) { 561 String[] result = inpMsg.split("\\s+"); 562 return Integer.parseInt(result[1]); 563 } else { 564 return Integer.parseInt(inpMsg); 565 } 566 } catch (NumberFormatException e) { 567 throw createIOException(fileName, inpMsg, errMsg, exitValue, e); 568 } catch (InterruptedException e) { 569 throw createIOException(fileName, inpMsg, errMsg, exitValue, e); 570 } finally { 571 process.destroy(); 572 if (in != null) in.close(); 573 if (err != null) err.close(); 574 } 575 } 576 577 /* Create an IOException for failing to get link count. */ 578 private static IOException createIOException(File f, String message, 579 String error, int exitvalue, Exception cause) { 580 581 final String winErrMsg = "; Windows errors in getLinkCount are often due " 582 + "to Cygwin misconfiguration"; 583 584 final String s = "Failed to get link count on file " + f 585 + ": message=" + message 586 + "; error=" + error 587 + ((osType == OSType.OS_TYPE_WINXP) ? winErrMsg : "") 588 + "; exit value=" + exitvalue; 589 return (cause == null) ? new IOException(s) : new IOException(s, cause); 590 } 591 592 593 /** 594 * HardLink statistics counters and methods. 595 * Not multi-thread safe, obviously. 596 * Init is called during HardLink instantiation, above. 597 * 598 * These are intended for use by knowledgeable clients, not internally, 599 * because many of the internal methods are static and can't update these 600 * per-instance counters. 601 */ 602 public static class LinkStats { 603 public int countDirs = 0; 604 public int countSingleLinks = 0; 605 public int countMultLinks = 0; 606 public int countFilesMultLinks = 0; 607 public int countEmptyDirs = 0; 608 public int countPhysicalFileCopies = 0; 609 610 public void clear() { 611 countDirs = 0; 612 countSingleLinks = 0; 613 countMultLinks = 0; 614 countFilesMultLinks = 0; 615 countEmptyDirs = 0; 616 countPhysicalFileCopies = 0; 617 } 618 619 public String report() { 620 return "HardLinkStats: " + countDirs + " Directories, including " 621 + countEmptyDirs + " Empty Directories, " 622 + countSingleLinks 623 + " single Link operations, " + countMultLinks 624 + " multi-Link operations, linking " + countFilesMultLinks 625 + " files, total " + (countSingleLinks + countFilesMultLinks) 626 + " linkable files. Also physically copied " 627 + countPhysicalFileCopies + " other files."; 628 } 629 } 630} 631