001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.fs; 020 021 import java.io.BufferedReader; 022 import java.io.File; 023 import java.io.FileNotFoundException; 024 import java.io.IOException; 025 import java.io.InputStreamReader; 026 import java.util.Arrays; 027 028 /** 029 * Class for creating hardlinks. 030 * Supports Unix/Linux, WinXP/2003/Vista via Cygwin, and Mac OS X. 031 * 032 * The HardLink class was formerly a static inner class of FSUtil, 033 * and the methods provided were blatantly non-thread-safe. 034 * To enable volume-parallel Update snapshots, we now provide static 035 * threadsafe methods that allocate new buffer string arrays 036 * upon each call. We also provide an API to hardlink all files in a 037 * directory with a single command, which is up to 128 times more 038 * efficient - and minimizes the impact of the extra buffer creations. 039 */ 040 public class HardLink { 041 042 public enum OSType { 043 OS_TYPE_UNIX, 044 OS_TYPE_WINXP, 045 OS_TYPE_SOLARIS, 046 OS_TYPE_MAC, 047 OS_TYPE_FREEBSD 048 } 049 050 public static OSType osType; 051 private static HardLinkCommandGetter getHardLinkCommand; 052 053 public final LinkStats linkStats; //not static 054 055 //initialize the command "getters" statically, so can use their 056 //methods without instantiating the HardLink object 057 static { 058 osType = getOSType(); 059 if (osType == OSType.OS_TYPE_WINXP) { 060 // Windows 061 getHardLinkCommand = new HardLinkCGWin(); 062 } else { 063 // Unix 064 getHardLinkCommand = new HardLinkCGUnix(); 065 //override getLinkCountCommand for the particular Unix variant 066 //Linux is already set as the default - {"stat","-c%h", null} 067 if (osType == OSType.OS_TYPE_MAC || osType == OSType.OS_TYPE_FREEBSD) { 068 String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null}; 069 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate); 070 } else if (osType == OSType.OS_TYPE_SOLARIS) { 071 String[] linkCountCmdTemplate = {"ls","-l", null}; 072 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate); 073 } 074 } 075 } 076 077 public HardLink() { 078 linkStats = new LinkStats(); 079 } 080 081 static private OSType getOSType() { 082 String osName = System.getProperty("os.name"); 083 if (osName.contains("Windows") && 084 (osName.contains("XP") 085 || osName.contains("2003") 086 || osName.contains("Vista") 087 || osName.contains("Windows_7") 088 || osName.contains("Windows 7") 089 || osName.contains("Windows7"))) { 090 return OSType.OS_TYPE_WINXP; 091 } 092 else if (osName.contains("SunOS") 093 || osName.contains("Solaris")) { 094 return OSType.OS_TYPE_SOLARIS; 095 } 096 else if (osName.contains("Mac")) { 097 return OSType.OS_TYPE_MAC; 098 } 099 else if (osName.contains("FreeBSD")) { 100 return OSType.OS_TYPE_FREEBSD; 101 } 102 else { 103 return OSType.OS_TYPE_UNIX; 104 } 105 } 106 107 /** 108 * This abstract class bridges the OS-dependent implementations of the 109 * needed functionality for creating hardlinks and querying link counts. 110 * The particular implementation class is chosen during 111 * static initialization phase of the HardLink class. 112 * The "getter" methods construct shell command strings for various purposes. 113 */ 114 private static abstract class HardLinkCommandGetter { 115 116 /** 117 * Get the command string needed to hardlink a bunch of files from 118 * a single source directory into a target directory. The source directory 119 * is not specified here, but the command will be executed using the source 120 * directory as the "current working directory" of the shell invocation. 121 * 122 * @param fileBaseNames - array of path-less file names, relative 123 * to the source directory 124 * @param linkDir - target directory where the hardlinks will be put 125 * @return - an array of Strings suitable for use as a single shell command 126 * with {@link Runtime.exec()} 127 * @throws IOException - if any of the file or path names misbehave 128 */ 129 abstract String[] linkMult(String[] fileBaseNames, File linkDir) 130 throws IOException; 131 132 /** 133 * Get the command string needed to hardlink a single file 134 */ 135 abstract String[] linkOne(File file, File linkName) throws IOException; 136 137 /** 138 * Get the command string to query the hardlink count of a file 139 */ 140 abstract String[] linkCount(File file) throws IOException; 141 142 /** 143 * Calculate the total string length of the shell command 144 * resulting from execution of linkMult, plus the length of the 145 * source directory name (which will also be provided to the shell) 146 * 147 * @param fileDir - source directory, parent of fileBaseNames 148 * @param fileBaseNames - array of path-less file names, relative 149 * to the source directory 150 * @param linkDir - target directory where the hardlinks will be put 151 * @return - total data length (must not exceed maxAllowedCmdArgLength) 152 * @throws IOException 153 */ 154 abstract int getLinkMultArgLength( 155 File fileDir, String[] fileBaseNames, File linkDir) 156 throws IOException; 157 158 /** 159 * Get the maximum allowed string length of a shell command on this OS, 160 * which is just the documented minimum guaranteed supported command 161 * length - aprx. 32KB for Unix, and 8KB for Windows. 162 */ 163 abstract int getMaxAllowedCmdArgLength(); 164 } 165 166 /** 167 * Implementation of HardLinkCommandGetter class for Unix 168 */ 169 static class HardLinkCGUnix extends HardLinkCommandGetter { 170 private static String[] hardLinkCommand = {"ln", null, null}; 171 private static String[] hardLinkMultPrefix = {"ln"}; 172 private static String[] hardLinkMultSuffix = {null}; 173 private static String[] getLinkCountCommand = {"stat","-c%h", null}; 174 //Unix guarantees at least 32K bytes cmd length. 175 //Subtract another 64b to allow for Java 'exec' overhead 176 private static final int maxAllowedCmdArgLength = 32*1024 - 65; 177 178 private static synchronized 179 void setLinkCountCmdTemplate(String[] template) { 180 //May update this for specific unix variants, 181 //after static initialization phase 182 getLinkCountCommand = template; 183 } 184 185 /* 186 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) 187 */ 188 @Override 189 String[] linkOne(File file, File linkName) 190 throws IOException { 191 String[] buf = new String[hardLinkCommand.length]; 192 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); 193 //unix wants argument order: "ln <existing> <new>" 194 buf[1] = FileUtil.makeShellPath(file, true); 195 buf[2] = FileUtil.makeShellPath(linkName, true); 196 return buf; 197 } 198 199 /* 200 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) 201 */ 202 @Override 203 String[] linkMult(String[] fileBaseNames, File linkDir) 204 throws IOException { 205 String[] buf = new String[fileBaseNames.length 206 + hardLinkMultPrefix.length 207 + hardLinkMultSuffix.length]; 208 int mark=0; 209 System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 210 hardLinkMultPrefix.length); 211 mark += hardLinkMultPrefix.length; 212 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); 213 mark += fileBaseNames.length; 214 buf[mark] = FileUtil.makeShellPath(linkDir, true); 215 return buf; 216 } 217 218 /* 219 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) 220 */ 221 @Override 222 String[] linkCount(File file) 223 throws IOException { 224 String[] buf = new String[getLinkCountCommand.length]; 225 System.arraycopy(getLinkCountCommand, 0, buf, 0, 226 getLinkCountCommand.length); 227 buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true); 228 return buf; 229 } 230 231 /* 232 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) 233 */ 234 @Override 235 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 236 throws IOException{ 237 int sum = 0; 238 for (String x : fileBaseNames) { 239 // add 1 to account for terminal null or delimiter space 240 sum += 1 + ((x == null) ? 0 : x.length()); 241 } 242 sum += 2 + FileUtil.makeShellPath(fileDir, true).length() 243 + FileUtil.makeShellPath(linkDir, true).length(); 244 //add the fixed overhead of the hardLinkMult prefix and suffix 245 sum += 3; //length("ln") + 1 246 return sum; 247 } 248 249 /* 250 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() 251 */ 252 @Override 253 int getMaxAllowedCmdArgLength() { 254 return maxAllowedCmdArgLength; 255 } 256 } 257 258 259 /** 260 * Implementation of HardLinkCommandGetter class for Windows 261 * 262 * Note that the linkCount shell command for Windows is actually 263 * a Cygwin shell command, and depends on ${cygwin}/bin 264 * being in the Windows PATH environment variable, so 265 * stat.exe can be found. 266 */ 267 static class HardLinkCGWin extends HardLinkCommandGetter { 268 //The Windows command getter impl class and its member fields are 269 //package-private ("default") access instead of "private" to assist 270 //unit testing (sort of) on non-Win servers 271 272 static String[] hardLinkCommand = { 273 "fsutil","hardlink","create", null, null}; 274 static String[] hardLinkMultPrefix = { 275 "cmd","/q","/c","for", "%f", "in", "("}; 276 static String hardLinkMultDir = "\\%f"; 277 static String[] hardLinkMultSuffix = { 278 ")", "do", "fsutil", "hardlink", "create", null, 279 "%f", "1>NUL"}; 280 static String[] getLinkCountCommand = {"stat","-c%h", null}; 281 //Windows guarantees only 8K - 1 bytes cmd length. 282 //Subtract another 64b to allow for Java 'exec' overhead 283 static final int maxAllowedCmdArgLength = 8*1024 - 65; 284 285 /* 286 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) 287 */ 288 @Override 289 String[] linkOne(File file, File linkName) 290 throws IOException { 291 String[] buf = new String[hardLinkCommand.length]; 292 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); 293 //windows wants argument order: "create <new> <existing>" 294 buf[4] = file.getCanonicalPath(); 295 buf[3] = linkName.getCanonicalPath(); 296 return buf; 297 } 298 299 /* 300 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) 301 */ 302 @Override 303 String[] linkMult(String[] fileBaseNames, File linkDir) 304 throws IOException { 305 String[] buf = new String[fileBaseNames.length 306 + hardLinkMultPrefix.length 307 + hardLinkMultSuffix.length]; 308 String td = linkDir.getCanonicalPath() + hardLinkMultDir; 309 int mark=0; 310 System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 311 hardLinkMultPrefix.length); 312 mark += hardLinkMultPrefix.length; 313 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); 314 mark += fileBaseNames.length; 315 System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 316 hardLinkMultSuffix.length); 317 mark += hardLinkMultSuffix.length; 318 buf[mark - 3] = td; 319 return buf; 320 } 321 322 /* 323 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) 324 */ 325 @Override 326 String[] linkCount(File file) 327 throws IOException { 328 String[] buf = new String[getLinkCountCommand.length]; 329 System.arraycopy(getLinkCountCommand, 0, buf, 0, 330 getLinkCountCommand.length); 331 //The linkCount command is actually a Cygwin shell command, 332 //not a Windows shell command, so we should use "makeShellPath()" 333 //instead of "getCanonicalPath()". However, that causes another 334 //shell exec to "cygpath.exe", and "stat.exe" actually can handle 335 //DOS-style paths (it just prints a couple hundred bytes of warning 336 //to stderr), so we use the more efficient "getCanonicalPath()". 337 buf[getLinkCountCommand.length - 1] = file.getCanonicalPath(); 338 return buf; 339 } 340 341 /* 342 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) 343 */ 344 @Override 345 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 346 throws IOException { 347 int sum = 0; 348 for (String x : fileBaseNames) { 349 // add 1 to account for terminal null or delimiter space 350 sum += 1 + ((x == null) ? 0 : x.length()); 351 } 352 sum += 2 + fileDir.getCanonicalPath().length() + 353 linkDir.getCanonicalPath().length(); 354 //add the fixed overhead of the hardLinkMult command 355 //(prefix, suffix, and Dir suffix) 356 sum += ("cmd.exe /q /c for %f in ( ) do " 357 + "fsutil hardlink create \\%f %f 1>NUL ").length(); 358 return sum; 359 } 360 361 /* 362 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() 363 */ 364 @Override 365 int getMaxAllowedCmdArgLength() { 366 return maxAllowedCmdArgLength; 367 } 368 } 369 370 371 /** 372 * Calculate the nominal length of all contributors to the total 373 * commandstring length, including fixed overhead of the OS-dependent 374 * command. It's protected rather than private, to assist unit testing, 375 * but real clients are not expected to need it -- see the way 376 * createHardLinkMult() uses it internally so the user doesn't need to worry 377 * about it. 378 * 379 * @param fileDir - source directory, parent of fileBaseNames 380 * @param fileBaseNames - array of path-less file names, relative 381 * to the source directory 382 * @param linkDir - target directory where the hardlinks will be put 383 * @return - total data length (must not exceed maxAllowedCmdArgLength) 384 * @throws IOException 385 */ 386 protected static int getLinkMultArgLength( 387 File fileDir, String[] fileBaseNames, File linkDir) 388 throws IOException { 389 return getHardLinkCommand.getLinkMultArgLength(fileDir, 390 fileBaseNames, linkDir); 391 } 392 393 /** 394 * Return this private value for use by unit tests. 395 * Shell commands are not allowed to have a total string length 396 * exceeding this size. 397 */ 398 protected static int getMaxAllowedCmdArgLength() { 399 return getHardLinkCommand.getMaxAllowedCmdArgLength(); 400 } 401 402 /* 403 * **************************************************** 404 * Complexity is above. User-visible functionality is below 405 * **************************************************** 406 */ 407 408 /** 409 * Creates a hardlink 410 * @param file - existing source file 411 * @param linkName - desired target link file 412 */ 413 public static void createHardLink(File file, File linkName) 414 throws IOException { 415 if (file == null) { 416 throw new IOException( 417 "invalid arguments to createHardLink: source file is null"); 418 } 419 if (linkName == null) { 420 throw new IOException( 421 "invalid arguments to createHardLink: link name is null"); 422 } 423 // construct and execute shell command 424 String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName); 425 Process process = Runtime.getRuntime().exec(hardLinkCommand); 426 try { 427 if (process.waitFor() != 0) { 428 String errMsg = new BufferedReader(new InputStreamReader( 429 process.getInputStream())).readLine(); 430 if (errMsg == null) errMsg = ""; 431 String inpMsg = new BufferedReader(new InputStreamReader( 432 process.getErrorStream())).readLine(); 433 if (inpMsg == null) inpMsg = ""; 434 throw new IOException(errMsg + inpMsg); 435 } 436 } catch (InterruptedException e) { 437 throw new IOException(e); 438 } finally { 439 process.destroy(); 440 } 441 } 442 443 /** 444 * Creates hardlinks from multiple existing files within one parent 445 * directory, into one target directory. 446 * @param parentDir - directory containing source files 447 * @param fileBaseNames - list of path-less file names, as returned by 448 * parentDir.list() 449 * @param linkDir - where the hardlinks should be put. It must already exist. 450 * 451 * If the list of files is too long (overflows maxAllowedCmdArgLength), 452 * we will automatically split it into multiple invocations of the 453 * underlying method. 454 */ 455 public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 456 File linkDir) throws IOException { 457 //This is the public method all non-test clients are expected to use. 458 //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd 459 createHardLinkMult(parentDir, fileBaseNames, linkDir, 460 getHardLinkCommand.getMaxAllowedCmdArgLength()); 461 } 462 463 /* 464 * Implements {@link createHardLinkMult} with added variable "maxLength", 465 * to ease unit testing of the auto-splitting feature for long lists. 466 * Likewise why it returns "callCount", the number of sub-arrays that 467 * the file list had to be split into. 468 * Non-test clients are expected to call the public method instead. 469 */ 470 protected static int createHardLinkMult(File parentDir, 471 String[] fileBaseNames, File linkDir, int maxLength) 472 throws IOException { 473 if (parentDir == null) { 474 throw new IOException( 475 "invalid arguments to createHardLinkMult: parent directory is null"); 476 } 477 if (linkDir == null) { 478 throw new IOException( 479 "invalid arguments to createHardLinkMult: link directory is null"); 480 } 481 if (fileBaseNames == null) { 482 throw new IOException( 483 "invalid arguments to createHardLinkMult: " 484 + "filename list can be empty but not null"); 485 } 486 if (fileBaseNames.length == 0) { 487 //the OS cmds can't handle empty list of filenames, 488 //but it's legal, so just return. 489 return 0; 490 } 491 if (!linkDir.exists()) { 492 throw new FileNotFoundException(linkDir + " not found."); 493 } 494 495 //if the list is too long, split into multiple invocations 496 int callCount = 0; 497 if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength 498 && fileBaseNames.length > 1) { 499 String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2); 500 callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength); 501 String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2, 502 fileBaseNames.length); 503 callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength); 504 return callCount; 505 } else { 506 callCount = 1; 507 } 508 509 // construct and execute shell command 510 String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 511 linkDir); 512 Process process = Runtime.getRuntime().exec(hardLinkCommand, null, 513 parentDir); 514 try { 515 if (process.waitFor() != 0) { 516 String errMsg = new BufferedReader(new InputStreamReader( 517 process.getInputStream())).readLine(); 518 if (errMsg == null) errMsg = ""; 519 String inpMsg = new BufferedReader(new InputStreamReader( 520 process.getErrorStream())).readLine(); 521 if (inpMsg == null) inpMsg = ""; 522 throw new IOException(errMsg + inpMsg); 523 } 524 } catch (InterruptedException e) { 525 throw new IOException(e); 526 } finally { 527 process.destroy(); 528 } 529 return callCount; 530 } 531 532 /** 533 * Retrieves the number of links to the specified file. 534 */ 535 public static int getLinkCount(File fileName) throws IOException { 536 if (fileName == null) { 537 throw new IOException( 538 "invalid argument to getLinkCount: file name is null"); 539 } 540 if (!fileName.exists()) { 541 throw new FileNotFoundException(fileName + " not found."); 542 } 543 544 // construct and execute shell command 545 String[] cmd = getHardLinkCommand.linkCount(fileName); 546 String inpMsg = null; 547 String errMsg = null; 548 int exitValue = -1; 549 BufferedReader in = null; 550 BufferedReader err = null; 551 552 Process process = Runtime.getRuntime().exec(cmd); 553 try { 554 exitValue = process.waitFor(); 555 in = new BufferedReader(new InputStreamReader( 556 process.getInputStream())); 557 inpMsg = in.readLine(); 558 err = new BufferedReader(new InputStreamReader( 559 process.getErrorStream())); 560 errMsg = err.readLine(); 561 if (inpMsg == null || exitValue != 0) { 562 throw createIOException(fileName, inpMsg, errMsg, exitValue, null); 563 } 564 if (osType == OSType.OS_TYPE_SOLARIS) { 565 String[] result = inpMsg.split("\\s+"); 566 return Integer.parseInt(result[1]); 567 } else { 568 return Integer.parseInt(inpMsg); 569 } 570 } catch (NumberFormatException e) { 571 throw createIOException(fileName, inpMsg, errMsg, exitValue, e); 572 } catch (InterruptedException e) { 573 throw createIOException(fileName, inpMsg, errMsg, exitValue, e); 574 } finally { 575 process.destroy(); 576 if (in != null) in.close(); 577 if (err != null) err.close(); 578 } 579 } 580 581 /* Create an IOException for failing to get link count. */ 582 private static IOException createIOException(File f, String message, 583 String error, int exitvalue, Exception cause) { 584 585 final String winErrMsg = "; Windows errors in getLinkCount are often due " 586 + "to Cygwin misconfiguration"; 587 588 final String s = "Failed to get link count on file " + f 589 + ": message=" + message 590 + "; error=" + error 591 + ((osType == OSType.OS_TYPE_WINXP) ? winErrMsg : "") 592 + "; exit value=" + exitvalue; 593 return (cause == null) ? new IOException(s) : new IOException(s, cause); 594 } 595 596 597 /** 598 * HardLink statistics counters and methods. 599 * Not multi-thread safe, obviously. 600 * Init is called during HardLink instantiation, above. 601 * 602 * These are intended for use by knowledgeable clients, not internally, 603 * because many of the internal methods are static and can't update these 604 * per-instance counters. 605 */ 606 public static class LinkStats { 607 public int countDirs = 0; 608 public int countSingleLinks = 0; 609 public int countMultLinks = 0; 610 public int countFilesMultLinks = 0; 611 public int countEmptyDirs = 0; 612 public int countPhysicalFileCopies = 0; 613 614 public void clear() { 615 countDirs = 0; 616 countSingleLinks = 0; 617 countMultLinks = 0; 618 countFilesMultLinks = 0; 619 countEmptyDirs = 0; 620 countPhysicalFileCopies = 0; 621 } 622 623 public String report() { 624 return "HardLinkStats: " + countDirs + " Directories, including " 625 + countEmptyDirs + " Empty Directories, " 626 + countSingleLinks 627 + " single Link operations, " + countMultLinks 628 + " multi-Link operations, linking " + countFilesMultLinks 629 + " files, total " + (countSingleLinks + countFilesMultLinks) 630 + " linkable files. Also physically copied " 631 + countPhysicalFileCopies + " other files."; 632 } 633 } 634 } 635