001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.fs; 020 021 import java.io.BufferedReader; 022 import java.io.File; 023 import java.io.FileNotFoundException; 024 import java.io.IOException; 025 import java.io.InputStreamReader; 026 import java.util.Arrays; 027 028 import org.apache.hadoop.util.Shell; 029 030 /** 031 * Class for creating hardlinks. 032 * Supports Unix/Linux, Windows via winutils , and Mac OS X. 033 * 034 * The HardLink class was formerly a static inner class of FSUtil, 035 * and the methods provided were blatantly non-thread-safe. 036 * To enable volume-parallel Update snapshots, we now provide static 037 * threadsafe methods that allocate new buffer string arrays 038 * upon each call. We also provide an API to hardlink all files in a 039 * directory with a single command, which is up to 128 times more 040 * efficient - and minimizes the impact of the extra buffer creations. 041 */ 042 public class HardLink { 043 044 public enum OSType { 045 OS_TYPE_UNIX, 046 OS_TYPE_WIN, 047 OS_TYPE_SOLARIS, 048 OS_TYPE_MAC, 049 OS_TYPE_FREEBSD 050 } 051 052 public static OSType osType; 053 private static HardLinkCommandGetter getHardLinkCommand; 054 055 public final LinkStats linkStats; //not static 056 057 //initialize the command "getters" statically, so can use their 058 //methods without instantiating the HardLink object 059 static { 060 osType = getOSType(); 061 if (osType == OSType.OS_TYPE_WIN) { 062 // Windows 063 getHardLinkCommand = new HardLinkCGWin(); 064 } else { 065 // Unix 066 getHardLinkCommand = new HardLinkCGUnix(); 067 //override getLinkCountCommand for the particular Unix variant 068 //Linux is already set as the default - {"stat","-c%h", null} 069 if (osType == OSType.OS_TYPE_MAC || osType == OSType.OS_TYPE_FREEBSD) { 070 String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null}; 071 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate); 072 } else if (osType == OSType.OS_TYPE_SOLARIS) { 073 String[] linkCountCmdTemplate = {"ls","-l", null}; 074 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate); 075 } 076 } 077 } 078 079 public HardLink() { 080 linkStats = new LinkStats(); 081 } 082 083 static private OSType getOSType() { 084 String osName = System.getProperty("os.name"); 085 if (Shell.WINDOWS) { 086 return OSType.OS_TYPE_WIN; 087 } 088 else if (osName.contains("SunOS") 089 || osName.contains("Solaris")) { 090 return OSType.OS_TYPE_SOLARIS; 091 } 092 else if (osName.contains("Mac")) { 093 return OSType.OS_TYPE_MAC; 094 } 095 else if (osName.contains("FreeBSD")) { 096 return OSType.OS_TYPE_FREEBSD; 097 } 098 else { 099 return OSType.OS_TYPE_UNIX; 100 } 101 } 102 103 /** 104 * This abstract class bridges the OS-dependent implementations of the 105 * needed functionality for creating hardlinks and querying link counts. 106 * The particular implementation class is chosen during 107 * static initialization phase of the HardLink class. 108 * The "getter" methods construct shell command strings for various purposes. 109 */ 110 private static abstract class HardLinkCommandGetter { 111 112 /** 113 * Get the command string needed to hardlink a bunch of files from 114 * a single source directory into a target directory. The source directory 115 * is not specified here, but the command will be executed using the source 116 * directory as the "current working directory" of the shell invocation. 117 * 118 * @param fileBaseNames - array of path-less file names, relative 119 * to the source directory 120 * @param linkDir - target directory where the hardlinks will be put 121 * @return - an array of Strings suitable for use as a single shell command 122 * with {@link Runtime.exec()} 123 * @throws IOException - if any of the file or path names misbehave 124 */ 125 abstract String[] linkMult(String[] fileBaseNames, File linkDir) 126 throws IOException; 127 128 /** 129 * Get the command string needed to hardlink a single file 130 */ 131 abstract String[] linkOne(File file, File linkName) throws IOException; 132 133 /** 134 * Get the command string to query the hardlink count of a file 135 */ 136 abstract String[] linkCount(File file) throws IOException; 137 138 /** 139 * Calculate the total string length of the shell command 140 * resulting from execution of linkMult, plus the length of the 141 * source directory name (which will also be provided to the shell) 142 * 143 * @param fileDir - source directory, parent of fileBaseNames 144 * @param fileBaseNames - array of path-less file names, relative 145 * to the source directory 146 * @param linkDir - target directory where the hardlinks will be put 147 * @return - total data length (must not exceed maxAllowedCmdArgLength) 148 * @throws IOException 149 */ 150 abstract int getLinkMultArgLength( 151 File fileDir, String[] fileBaseNames, File linkDir) 152 throws IOException; 153 154 /** 155 * Get the maximum allowed string length of a shell command on this OS, 156 * which is just the documented minimum guaranteed supported command 157 * length - aprx. 32KB for Unix, and 8KB for Windows. 158 */ 159 abstract int getMaxAllowedCmdArgLength(); 160 } 161 162 /** 163 * Implementation of HardLinkCommandGetter class for Unix 164 */ 165 static class HardLinkCGUnix extends HardLinkCommandGetter { 166 private static String[] hardLinkCommand = {"ln", null, null}; 167 private static String[] hardLinkMultPrefix = {"ln"}; 168 private static String[] hardLinkMultSuffix = {null}; 169 private static String[] getLinkCountCommand = {"stat","-c%h", null}; 170 //Unix guarantees at least 32K bytes cmd length. 171 //Subtract another 64b to allow for Java 'exec' overhead 172 private static final int maxAllowedCmdArgLength = 32*1024 - 65; 173 174 private static synchronized 175 void setLinkCountCmdTemplate(String[] template) { 176 //May update this for specific unix variants, 177 //after static initialization phase 178 getLinkCountCommand = template; 179 } 180 181 /* 182 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) 183 */ 184 @Override 185 String[] linkOne(File file, File linkName) 186 throws IOException { 187 String[] buf = new String[hardLinkCommand.length]; 188 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); 189 //unix wants argument order: "ln <existing> <new>" 190 buf[1] = FileUtil.makeShellPath(file, true); 191 buf[2] = FileUtil.makeShellPath(linkName, true); 192 return buf; 193 } 194 195 /* 196 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) 197 */ 198 @Override 199 String[] linkMult(String[] fileBaseNames, File linkDir) 200 throws IOException { 201 String[] buf = new String[fileBaseNames.length 202 + hardLinkMultPrefix.length 203 + hardLinkMultSuffix.length]; 204 int mark=0; 205 System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 206 hardLinkMultPrefix.length); 207 mark += hardLinkMultPrefix.length; 208 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); 209 mark += fileBaseNames.length; 210 buf[mark] = FileUtil.makeShellPath(linkDir, true); 211 return buf; 212 } 213 214 /* 215 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) 216 */ 217 @Override 218 String[] linkCount(File file) 219 throws IOException { 220 String[] buf = new String[getLinkCountCommand.length]; 221 System.arraycopy(getLinkCountCommand, 0, buf, 0, 222 getLinkCountCommand.length); 223 buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true); 224 return buf; 225 } 226 227 /* 228 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) 229 */ 230 @Override 231 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 232 throws IOException{ 233 int sum = 0; 234 for (String x : fileBaseNames) { 235 // add 1 to account for terminal null or delimiter space 236 sum += 1 + ((x == null) ? 0 : x.length()); 237 } 238 sum += 2 + FileUtil.makeShellPath(fileDir, true).length() 239 + FileUtil.makeShellPath(linkDir, true).length(); 240 //add the fixed overhead of the hardLinkMult prefix and suffix 241 sum += 3; //length("ln") + 1 242 return sum; 243 } 244 245 /* 246 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() 247 */ 248 @Override 249 int getMaxAllowedCmdArgLength() { 250 return maxAllowedCmdArgLength; 251 } 252 } 253 254 255 /** 256 * Implementation of HardLinkCommandGetter class for Windows 257 */ 258 static class HardLinkCGWin extends HardLinkCommandGetter { 259 //The Windows command getter impl class and its member fields are 260 //package-private ("default") access instead of "private" to assist 261 //unit testing (sort of) on non-Win servers 262 263 static String[] hardLinkCommand = { 264 Shell.WINUTILS,"hardlink","create", null, null}; 265 static String[] hardLinkMultPrefix = { 266 "cmd","/q","/c","for", "%f", "in", "("}; 267 static String hardLinkMultDir = "\\%f"; 268 static String[] hardLinkMultSuffix = { 269 ")", "do", Shell.WINUTILS, "hardlink", "create", null, 270 "%f", "1>NUL"}; 271 static String[] getLinkCountCommand = { 272 Shell.WINUTILS, "hardlink", 273 "stat", null}; 274 //Windows guarantees only 8K - 1 bytes cmd length. 275 //Subtract another 64b to allow for Java 'exec' overhead 276 static final int maxAllowedCmdArgLength = 8*1024 - 65; 277 278 /* 279 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) 280 */ 281 @Override 282 String[] linkOne(File file, File linkName) 283 throws IOException { 284 String[] buf = new String[hardLinkCommand.length]; 285 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); 286 //windows wants argument order: "create <new> <existing>" 287 buf[4] = file.getCanonicalPath(); 288 buf[3] = linkName.getCanonicalPath(); 289 return buf; 290 } 291 292 /* 293 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) 294 */ 295 @Override 296 String[] linkMult(String[] fileBaseNames, File linkDir) 297 throws IOException { 298 String[] buf = new String[fileBaseNames.length 299 + hardLinkMultPrefix.length 300 + hardLinkMultSuffix.length]; 301 String td = linkDir.getCanonicalPath() + hardLinkMultDir; 302 int mark=0; 303 System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 304 hardLinkMultPrefix.length); 305 mark += hardLinkMultPrefix.length; 306 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); 307 mark += fileBaseNames.length; 308 System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 309 hardLinkMultSuffix.length); 310 mark += hardLinkMultSuffix.length; 311 buf[mark - 3] = td; 312 return buf; 313 } 314 315 /* 316 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) 317 */ 318 @Override 319 String[] linkCount(File file) 320 throws IOException { 321 String[] buf = new String[getLinkCountCommand.length]; 322 System.arraycopy(getLinkCountCommand, 0, buf, 0, 323 getLinkCountCommand.length); 324 buf[getLinkCountCommand.length - 1] = file.getCanonicalPath(); 325 return buf; 326 } 327 328 /* 329 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) 330 */ 331 @Override 332 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 333 throws IOException { 334 int sum = 0; 335 for (String x : fileBaseNames) { 336 // add 1 to account for terminal null or delimiter space 337 sum += 1 + ((x == null) ? 0 : x.length()); 338 } 339 sum += 2 + fileDir.getCanonicalPath().length() + 340 linkDir.getCanonicalPath().length(); 341 //add the fixed overhead of the hardLinkMult command 342 //(prefix, suffix, and Dir suffix) 343 sum += ("cmd.exe /q /c for %f in ( ) do " 344 + Shell.WINUTILS + " hardlink create \\%f %f 1>NUL ").length(); 345 return sum; 346 } 347 348 /* 349 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() 350 */ 351 @Override 352 int getMaxAllowedCmdArgLength() { 353 return maxAllowedCmdArgLength; 354 } 355 } 356 357 358 /** 359 * Calculate the nominal length of all contributors to the total 360 * commandstring length, including fixed overhead of the OS-dependent 361 * command. It's protected rather than private, to assist unit testing, 362 * but real clients are not expected to need it -- see the way 363 * createHardLinkMult() uses it internally so the user doesn't need to worry 364 * about it. 365 * 366 * @param fileDir - source directory, parent of fileBaseNames 367 * @param fileBaseNames - array of path-less file names, relative 368 * to the source directory 369 * @param linkDir - target directory where the hardlinks will be put 370 * @return - total data length (must not exceed maxAllowedCmdArgLength) 371 * @throws IOException 372 */ 373 protected static int getLinkMultArgLength( 374 File fileDir, String[] fileBaseNames, File linkDir) 375 throws IOException { 376 return getHardLinkCommand.getLinkMultArgLength(fileDir, 377 fileBaseNames, linkDir); 378 } 379 380 /** 381 * Return this private value for use by unit tests. 382 * Shell commands are not allowed to have a total string length 383 * exceeding this size. 384 */ 385 protected static int getMaxAllowedCmdArgLength() { 386 return getHardLinkCommand.getMaxAllowedCmdArgLength(); 387 } 388 389 /* 390 * **************************************************** 391 * Complexity is above. User-visible functionality is below 392 * **************************************************** 393 */ 394 395 /** 396 * Creates a hardlink 397 * @param file - existing source file 398 * @param linkName - desired target link file 399 */ 400 public static void createHardLink(File file, File linkName) 401 throws IOException { 402 if (file == null) { 403 throw new IOException( 404 "invalid arguments to createHardLink: source file is null"); 405 } 406 if (linkName == null) { 407 throw new IOException( 408 "invalid arguments to createHardLink: link name is null"); 409 } 410 // construct and execute shell command 411 String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName); 412 Process process = Runtime.getRuntime().exec(hardLinkCommand); 413 try { 414 if (process.waitFor() != 0) { 415 String errMsg = new BufferedReader(new InputStreamReader( 416 process.getInputStream())).readLine(); 417 if (errMsg == null) errMsg = ""; 418 String inpMsg = new BufferedReader(new InputStreamReader( 419 process.getErrorStream())).readLine(); 420 if (inpMsg == null) inpMsg = ""; 421 throw new IOException(errMsg + inpMsg); 422 } 423 } catch (InterruptedException e) { 424 throw new IOException(e); 425 } finally { 426 process.destroy(); 427 } 428 } 429 430 /** 431 * Creates hardlinks from multiple existing files within one parent 432 * directory, into one target directory. 433 * @param parentDir - directory containing source files 434 * @param fileBaseNames - list of path-less file names, as returned by 435 * parentDir.list() 436 * @param linkDir - where the hardlinks should be put. It must already exist. 437 * 438 * If the list of files is too long (overflows maxAllowedCmdArgLength), 439 * we will automatically split it into multiple invocations of the 440 * underlying method. 441 */ 442 public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 443 File linkDir) throws IOException { 444 //This is the public method all non-test clients are expected to use. 445 //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd 446 createHardLinkMult(parentDir, fileBaseNames, linkDir, 447 getHardLinkCommand.getMaxAllowedCmdArgLength()); 448 } 449 450 /* 451 * Implements {@link createHardLinkMult} with added variable "maxLength", 452 * to ease unit testing of the auto-splitting feature for long lists. 453 * Likewise why it returns "callCount", the number of sub-arrays that 454 * the file list had to be split into. 455 * Non-test clients are expected to call the public method instead. 456 */ 457 protected static int createHardLinkMult(File parentDir, 458 String[] fileBaseNames, File linkDir, int maxLength) 459 throws IOException { 460 if (parentDir == null) { 461 throw new IOException( 462 "invalid arguments to createHardLinkMult: parent directory is null"); 463 } 464 if (linkDir == null) { 465 throw new IOException( 466 "invalid arguments to createHardLinkMult: link directory is null"); 467 } 468 if (fileBaseNames == null) { 469 throw new IOException( 470 "invalid arguments to createHardLinkMult: " 471 + "filename list can be empty but not null"); 472 } 473 if (fileBaseNames.length == 0) { 474 //the OS cmds can't handle empty list of filenames, 475 //but it's legal, so just return. 476 return 0; 477 } 478 if (!linkDir.exists()) { 479 throw new FileNotFoundException(linkDir + " not found."); 480 } 481 482 //if the list is too long, split into multiple invocations 483 int callCount = 0; 484 if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength 485 && fileBaseNames.length > 1) { 486 String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2); 487 callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength); 488 String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2, 489 fileBaseNames.length); 490 callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength); 491 return callCount; 492 } else { 493 callCount = 1; 494 } 495 496 // construct and execute shell command 497 String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 498 linkDir); 499 Process process = Runtime.getRuntime().exec(hardLinkCommand, null, 500 parentDir); 501 try { 502 if (process.waitFor() != 0) { 503 String errMsg = new BufferedReader(new InputStreamReader( 504 process.getInputStream())).readLine(); 505 if (errMsg == null) errMsg = ""; 506 String inpMsg = new BufferedReader(new InputStreamReader( 507 process.getErrorStream())).readLine(); 508 if (inpMsg == null) inpMsg = ""; 509 throw new IOException(errMsg + inpMsg); 510 } 511 } catch (InterruptedException e) { 512 throw new IOException(e); 513 } finally { 514 process.destroy(); 515 } 516 return callCount; 517 } 518 519 /** 520 * Retrieves the number of links to the specified file. 521 */ 522 public static int getLinkCount(File fileName) throws IOException { 523 if (fileName == null) { 524 throw new IOException( 525 "invalid argument to getLinkCount: file name is null"); 526 } 527 if (!fileName.exists()) { 528 throw new FileNotFoundException(fileName + " not found."); 529 } 530 531 // construct and execute shell command 532 String[] cmd = getHardLinkCommand.linkCount(fileName); 533 String inpMsg = null; 534 String errMsg = null; 535 int exitValue = -1; 536 BufferedReader in = null; 537 BufferedReader err = null; 538 539 Process process = Runtime.getRuntime().exec(cmd); 540 try { 541 exitValue = process.waitFor(); 542 in = new BufferedReader(new InputStreamReader( 543 process.getInputStream())); 544 inpMsg = in.readLine(); 545 err = new BufferedReader(new InputStreamReader( 546 process.getErrorStream())); 547 errMsg = err.readLine(); 548 if (inpMsg == null || exitValue != 0) { 549 throw createIOException(fileName, inpMsg, errMsg, exitValue, null); 550 } 551 if (osType == OSType.OS_TYPE_SOLARIS) { 552 String[] result = inpMsg.split("\\s+"); 553 return Integer.parseInt(result[1]); 554 } else { 555 return Integer.parseInt(inpMsg); 556 } 557 } catch (NumberFormatException e) { 558 throw createIOException(fileName, inpMsg, errMsg, exitValue, e); 559 } catch (InterruptedException e) { 560 throw createIOException(fileName, inpMsg, errMsg, exitValue, e); 561 } finally { 562 process.destroy(); 563 if (in != null) in.close(); 564 if (err != null) err.close(); 565 } 566 } 567 568 /* Create an IOException for failing to get link count. */ 569 private static IOException createIOException(File f, String message, 570 String error, int exitvalue, Exception cause) { 571 572 final String s = "Failed to get link count on file " + f 573 + ": message=" + message 574 + "; error=" + error 575 + "; exit value=" + exitvalue; 576 return (cause == null) ? new IOException(s) : new IOException(s, cause); 577 } 578 579 580 /** 581 * HardLink statistics counters and methods. 582 * Not multi-thread safe, obviously. 583 * Init is called during HardLink instantiation, above. 584 * 585 * These are intended for use by knowledgeable clients, not internally, 586 * because many of the internal methods are static and can't update these 587 * per-instance counters. 588 */ 589 public static class LinkStats { 590 public int countDirs = 0; 591 public int countSingleLinks = 0; 592 public int countMultLinks = 0; 593 public int countFilesMultLinks = 0; 594 public int countEmptyDirs = 0; 595 public int countPhysicalFileCopies = 0; 596 597 public void clear() { 598 countDirs = 0; 599 countSingleLinks = 0; 600 countMultLinks = 0; 601 countFilesMultLinks = 0; 602 countEmptyDirs = 0; 603 countPhysicalFileCopies = 0; 604 } 605 606 public String report() { 607 return "HardLinkStats: " + countDirs + " Directories, including " 608 + countEmptyDirs + " Empty Directories, " 609 + countSingleLinks 610 + " single Link operations, " + countMultLinks 611 + " multi-Link operations, linking " + countFilesMultLinks 612 + " files, total " + (countSingleLinks + countFilesMultLinks) 613 + " linkable files. Also physically copied " 614 + countPhysicalFileCopies + " other files."; 615 } 616 } 617 } 618