001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.fs; 020 021 import java.io.BufferedReader; 022 import java.io.File; 023 import java.io.FileNotFoundException; 024 import java.io.IOException; 025 import java.io.InputStreamReader; 026 import java.util.Arrays; 027 028 import org.apache.hadoop.util.Shell; 029 030 /** 031 * Class for creating hardlinks. 032 * Supports Unix/Linux, Windows via winutils , and Mac OS X. 033 * 034 * The HardLink class was formerly a static inner class of FSUtil, 035 * and the methods provided were blatantly non-thread-safe. 036 * To enable volume-parallel Update snapshots, we now provide static 037 * threadsafe methods that allocate new buffer string arrays 038 * upon each call. We also provide an API to hardlink all files in a 039 * directory with a single command, which is up to 128 times more 040 * efficient - and minimizes the impact of the extra buffer creations. 041 */ 042 public class HardLink { 043 044 private static HardLinkCommandGetter getHardLinkCommand; 045 046 public final LinkStats linkStats; //not static 047 048 //initialize the command "getters" statically, so can use their 049 //methods without instantiating the HardLink object 050 static { 051 if (Shell.WINDOWS) { 052 // Windows 053 getHardLinkCommand = new HardLinkCGWin(); 054 } else { 055 // Unix or Linux 056 getHardLinkCommand = new HardLinkCGUnix(); 057 //override getLinkCountCommand for the particular Unix variant 058 //Linux is already set as the default - {"stat","-c%h", null} 059 if (Shell.MAC || Shell.FREEBSD) { 060 String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null}; 061 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate); 062 } else if (Shell.SOLARIS) { 063 String[] linkCountCmdTemplate = {"ls","-l", null}; 064 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate); 065 } 066 } 067 } 068 069 public HardLink() { 070 linkStats = new LinkStats(); 071 } 072 073 /** 074 * This abstract class bridges the OS-dependent implementations of the 075 * needed functionality for creating hardlinks and querying link counts. 076 * The particular implementation class is chosen during 077 * static initialization phase of the HardLink class. 078 * The "getter" methods construct shell command strings for various purposes. 079 */ 080 private static abstract class HardLinkCommandGetter { 081 082 /** 083 * Get the command string needed to hardlink a bunch of files from 084 * a single source directory into a target directory. The source directory 085 * is not specified here, but the command will be executed using the source 086 * directory as the "current working directory" of the shell invocation. 087 * 088 * @param fileBaseNames - array of path-less file names, relative 089 * to the source directory 090 * @param linkDir - target directory where the hardlinks will be put 091 * @return - an array of Strings suitable for use as a single shell command 092 * with {@link Runtime.exec()} 093 * @throws IOException - if any of the file or path names misbehave 094 */ 095 abstract String[] linkMult(String[] fileBaseNames, File linkDir) 096 throws IOException; 097 098 /** 099 * Get the command string needed to hardlink a single file 100 */ 101 abstract String[] linkOne(File file, File linkName) throws IOException; 102 103 /** 104 * Get the command string to query the hardlink count of a file 105 */ 106 abstract String[] linkCount(File file) throws IOException; 107 108 /** 109 * Calculate the total string length of the shell command 110 * resulting from execution of linkMult, plus the length of the 111 * source directory name (which will also be provided to the shell) 112 * 113 * @param fileDir - source directory, parent of fileBaseNames 114 * @param fileBaseNames - array of path-less file names, relative 115 * to the source directory 116 * @param linkDir - target directory where the hardlinks will be put 117 * @return - total data length (must not exceed maxAllowedCmdArgLength) 118 * @throws IOException 119 */ 120 abstract int getLinkMultArgLength( 121 File fileDir, String[] fileBaseNames, File linkDir) 122 throws IOException; 123 124 /** 125 * Get the maximum allowed string length of a shell command on this OS, 126 * which is just the documented minimum guaranteed supported command 127 * length - aprx. 32KB for Unix, and 8KB for Windows. 128 */ 129 abstract int getMaxAllowedCmdArgLength(); 130 } 131 132 /** 133 * Implementation of HardLinkCommandGetter class for Unix 134 */ 135 static class HardLinkCGUnix extends HardLinkCommandGetter { 136 private static String[] hardLinkCommand = {"ln", null, null}; 137 private static String[] hardLinkMultPrefix = {"ln"}; 138 private static String[] hardLinkMultSuffix = {null}; 139 private static String[] getLinkCountCommand = {"stat","-c%h", null}; 140 //Unix guarantees at least 32K bytes cmd length. 141 //Subtract another 64b to allow for Java 'exec' overhead 142 private static final int maxAllowedCmdArgLength = 32*1024 - 65; 143 144 private static synchronized 145 void setLinkCountCmdTemplate(String[] template) { 146 //May update this for specific unix variants, 147 //after static initialization phase 148 getLinkCountCommand = template; 149 } 150 151 /* 152 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) 153 */ 154 @Override 155 String[] linkOne(File file, File linkName) 156 throws IOException { 157 String[] buf = new String[hardLinkCommand.length]; 158 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); 159 //unix wants argument order: "ln <existing> <new>" 160 buf[1] = FileUtil.makeShellPath(file, true); 161 buf[2] = FileUtil.makeShellPath(linkName, true); 162 return buf; 163 } 164 165 /* 166 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) 167 */ 168 @Override 169 String[] linkMult(String[] fileBaseNames, File linkDir) 170 throws IOException { 171 String[] buf = new String[fileBaseNames.length 172 + hardLinkMultPrefix.length 173 + hardLinkMultSuffix.length]; 174 int mark=0; 175 System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 176 hardLinkMultPrefix.length); 177 mark += hardLinkMultPrefix.length; 178 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); 179 mark += fileBaseNames.length; 180 buf[mark] = FileUtil.makeShellPath(linkDir, true); 181 return buf; 182 } 183 184 /* 185 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) 186 */ 187 @Override 188 String[] linkCount(File file) 189 throws IOException { 190 String[] buf = new String[getLinkCountCommand.length]; 191 System.arraycopy(getLinkCountCommand, 0, buf, 0, 192 getLinkCountCommand.length); 193 buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true); 194 return buf; 195 } 196 197 /* 198 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) 199 */ 200 @Override 201 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 202 throws IOException{ 203 int sum = 0; 204 for (String x : fileBaseNames) { 205 // add 1 to account for terminal null or delimiter space 206 sum += 1 + ((x == null) ? 0 : x.length()); 207 } 208 sum += 2 + FileUtil.makeShellPath(fileDir, true).length() 209 + FileUtil.makeShellPath(linkDir, true).length(); 210 //add the fixed overhead of the hardLinkMult prefix and suffix 211 sum += 3; //length("ln") + 1 212 return sum; 213 } 214 215 /* 216 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() 217 */ 218 @Override 219 int getMaxAllowedCmdArgLength() { 220 return maxAllowedCmdArgLength; 221 } 222 } 223 224 225 /** 226 * Implementation of HardLinkCommandGetter class for Windows 227 */ 228 static class HardLinkCGWin extends HardLinkCommandGetter { 229 //The Windows command getter impl class and its member fields are 230 //package-private ("default") access instead of "private" to assist 231 //unit testing (sort of) on non-Win servers 232 233 static String[] hardLinkCommand = { 234 Shell.WINUTILS,"hardlink","create", null, null}; 235 static String[] hardLinkMultPrefix = { 236 "cmd","/q","/c","for", "%f", "in", "("}; 237 static String hardLinkMultDir = "\\%f"; 238 static String[] hardLinkMultSuffix = { 239 ")", "do", Shell.WINUTILS, "hardlink", "create", null, 240 "%f", "1>NUL"}; 241 static String[] getLinkCountCommand = { 242 Shell.WINUTILS, "hardlink", 243 "stat", null}; 244 //Windows guarantees only 8K - 1 bytes cmd length. 245 //Subtract another 64b to allow for Java 'exec' overhead 246 static final int maxAllowedCmdArgLength = 8*1024 - 65; 247 248 /* 249 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) 250 */ 251 @Override 252 String[] linkOne(File file, File linkName) 253 throws IOException { 254 String[] buf = new String[hardLinkCommand.length]; 255 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); 256 //windows wants argument order: "create <new> <existing>" 257 buf[4] = file.getCanonicalPath(); 258 buf[3] = linkName.getCanonicalPath(); 259 return buf; 260 } 261 262 /* 263 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) 264 */ 265 @Override 266 String[] linkMult(String[] fileBaseNames, File linkDir) 267 throws IOException { 268 String[] buf = new String[fileBaseNames.length 269 + hardLinkMultPrefix.length 270 + hardLinkMultSuffix.length]; 271 String td = linkDir.getCanonicalPath() + hardLinkMultDir; 272 int mark=0; 273 System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 274 hardLinkMultPrefix.length); 275 mark += hardLinkMultPrefix.length; 276 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); 277 mark += fileBaseNames.length; 278 System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 279 hardLinkMultSuffix.length); 280 mark += hardLinkMultSuffix.length; 281 buf[mark - 3] = td; 282 return buf; 283 } 284 285 /* 286 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) 287 */ 288 @Override 289 String[] linkCount(File file) 290 throws IOException { 291 String[] buf = new String[getLinkCountCommand.length]; 292 System.arraycopy(getLinkCountCommand, 0, buf, 0, 293 getLinkCountCommand.length); 294 buf[getLinkCountCommand.length - 1] = file.getCanonicalPath(); 295 return buf; 296 } 297 298 /* 299 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) 300 */ 301 @Override 302 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 303 throws IOException { 304 int sum = 0; 305 for (String x : fileBaseNames) { 306 // add 1 to account for terminal null or delimiter space 307 sum += 1 + ((x == null) ? 0 : x.length()); 308 } 309 sum += 2 + fileDir.getCanonicalPath().length() + 310 linkDir.getCanonicalPath().length(); 311 //add the fixed overhead of the hardLinkMult command 312 //(prefix, suffix, and Dir suffix) 313 sum += ("cmd.exe /q /c for %f in ( ) do " 314 + Shell.WINUTILS + " hardlink create \\%f %f 1>NUL ").length(); 315 return sum; 316 } 317 318 /* 319 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() 320 */ 321 @Override 322 int getMaxAllowedCmdArgLength() { 323 return maxAllowedCmdArgLength; 324 } 325 } 326 327 328 /** 329 * Calculate the nominal length of all contributors to the total 330 * commandstring length, including fixed overhead of the OS-dependent 331 * command. It's protected rather than private, to assist unit testing, 332 * but real clients are not expected to need it -- see the way 333 * createHardLinkMult() uses it internally so the user doesn't need to worry 334 * about it. 335 * 336 * @param fileDir - source directory, parent of fileBaseNames 337 * @param fileBaseNames - array of path-less file names, relative 338 * to the source directory 339 * @param linkDir - target directory where the hardlinks will be put 340 * @return - total data length (must not exceed maxAllowedCmdArgLength) 341 * @throws IOException 342 */ 343 protected static int getLinkMultArgLength( 344 File fileDir, String[] fileBaseNames, File linkDir) 345 throws IOException { 346 return getHardLinkCommand.getLinkMultArgLength(fileDir, 347 fileBaseNames, linkDir); 348 } 349 350 /** 351 * Return this private value for use by unit tests. 352 * Shell commands are not allowed to have a total string length 353 * exceeding this size. 354 */ 355 protected static int getMaxAllowedCmdArgLength() { 356 return getHardLinkCommand.getMaxAllowedCmdArgLength(); 357 } 358 359 /* 360 * **************************************************** 361 * Complexity is above. User-visible functionality is below 362 * **************************************************** 363 */ 364 365 /** 366 * Creates a hardlink 367 * @param file - existing source file 368 * @param linkName - desired target link file 369 */ 370 public static void createHardLink(File file, File linkName) 371 throws IOException { 372 if (file == null) { 373 throw new IOException( 374 "invalid arguments to createHardLink: source file is null"); 375 } 376 if (linkName == null) { 377 throw new IOException( 378 "invalid arguments to createHardLink: link name is null"); 379 } 380 // construct and execute shell command 381 String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName); 382 Process process = Runtime.getRuntime().exec(hardLinkCommand); 383 try { 384 if (process.waitFor() != 0) { 385 String errMsg = new BufferedReader(new InputStreamReader( 386 process.getInputStream())).readLine(); 387 if (errMsg == null) errMsg = ""; 388 String inpMsg = new BufferedReader(new InputStreamReader( 389 process.getErrorStream())).readLine(); 390 if (inpMsg == null) inpMsg = ""; 391 throw new IOException(errMsg + inpMsg); 392 } 393 } catch (InterruptedException e) { 394 throw new IOException(e); 395 } finally { 396 process.destroy(); 397 } 398 } 399 400 /** 401 * Creates hardlinks from multiple existing files within one parent 402 * directory, into one target directory. 403 * @param parentDir - directory containing source files 404 * @param fileBaseNames - list of path-less file names, as returned by 405 * parentDir.list() 406 * @param linkDir - where the hardlinks should be put. It must already exist. 407 * 408 * If the list of files is too long (overflows maxAllowedCmdArgLength), 409 * we will automatically split it into multiple invocations of the 410 * underlying method. 411 */ 412 public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 413 File linkDir) throws IOException { 414 //This is the public method all non-test clients are expected to use. 415 //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd 416 createHardLinkMult(parentDir, fileBaseNames, linkDir, 417 getHardLinkCommand.getMaxAllowedCmdArgLength()); 418 } 419 420 /* 421 * Implements {@link createHardLinkMult} with added variable "maxLength", 422 * to ease unit testing of the auto-splitting feature for long lists. 423 * Likewise why it returns "callCount", the number of sub-arrays that 424 * the file list had to be split into. 425 * Non-test clients are expected to call the public method instead. 426 */ 427 protected static int createHardLinkMult(File parentDir, 428 String[] fileBaseNames, File linkDir, int maxLength) 429 throws IOException { 430 if (parentDir == null) { 431 throw new IOException( 432 "invalid arguments to createHardLinkMult: parent directory is null"); 433 } 434 if (linkDir == null) { 435 throw new IOException( 436 "invalid arguments to createHardLinkMult: link directory is null"); 437 } 438 if (fileBaseNames == null) { 439 throw new IOException( 440 "invalid arguments to createHardLinkMult: " 441 + "filename list can be empty but not null"); 442 } 443 if (fileBaseNames.length == 0) { 444 //the OS cmds can't handle empty list of filenames, 445 //but it's legal, so just return. 446 return 0; 447 } 448 if (!linkDir.exists()) { 449 throw new FileNotFoundException(linkDir + " not found."); 450 } 451 452 //if the list is too long, split into multiple invocations 453 int callCount = 0; 454 if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength 455 && fileBaseNames.length > 1) { 456 String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2); 457 callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength); 458 String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2, 459 fileBaseNames.length); 460 callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength); 461 return callCount; 462 } else { 463 callCount = 1; 464 } 465 466 // construct and execute shell command 467 String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 468 linkDir); 469 Process process = Runtime.getRuntime().exec(hardLinkCommand, null, 470 parentDir); 471 try { 472 if (process.waitFor() != 0) { 473 String errMsg = new BufferedReader(new InputStreamReader( 474 process.getInputStream())).readLine(); 475 if (errMsg == null) errMsg = ""; 476 String inpMsg = new BufferedReader(new InputStreamReader( 477 process.getErrorStream())).readLine(); 478 if (inpMsg == null) inpMsg = ""; 479 throw new IOException(errMsg + inpMsg); 480 } 481 } catch (InterruptedException e) { 482 throw new IOException(e); 483 } finally { 484 process.destroy(); 485 } 486 return callCount; 487 } 488 489 /** 490 * Retrieves the number of links to the specified file. 491 */ 492 public static int getLinkCount(File fileName) throws IOException { 493 if (fileName == null) { 494 throw new IOException( 495 "invalid argument to getLinkCount: file name is null"); 496 } 497 if (!fileName.exists()) { 498 throw new FileNotFoundException(fileName + " not found."); 499 } 500 501 // construct and execute shell command 502 String[] cmd = getHardLinkCommand.linkCount(fileName); 503 String inpMsg = null; 504 String errMsg = null; 505 int exitValue = -1; 506 BufferedReader in = null; 507 BufferedReader err = null; 508 509 Process process = Runtime.getRuntime().exec(cmd); 510 try { 511 exitValue = process.waitFor(); 512 in = new BufferedReader(new InputStreamReader( 513 process.getInputStream())); 514 inpMsg = in.readLine(); 515 err = new BufferedReader(new InputStreamReader( 516 process.getErrorStream())); 517 errMsg = err.readLine(); 518 if (inpMsg == null || exitValue != 0) { 519 throw createIOException(fileName, inpMsg, errMsg, exitValue, null); 520 } 521 if (Shell.SOLARIS) { 522 String[] result = inpMsg.split("\\s+"); 523 return Integer.parseInt(result[1]); 524 } else { 525 return Integer.parseInt(inpMsg); 526 } 527 } catch (NumberFormatException e) { 528 throw createIOException(fileName, inpMsg, errMsg, exitValue, e); 529 } catch (InterruptedException e) { 530 throw createIOException(fileName, inpMsg, errMsg, exitValue, e); 531 } finally { 532 process.destroy(); 533 if (in != null) in.close(); 534 if (err != null) err.close(); 535 } 536 } 537 538 /* Create an IOException for failing to get link count. */ 539 private static IOException createIOException(File f, String message, 540 String error, int exitvalue, Exception cause) { 541 542 final String s = "Failed to get link count on file " + f 543 + ": message=" + message 544 + "; error=" + error 545 + "; exit value=" + exitvalue; 546 return (cause == null) ? new IOException(s) : new IOException(s, cause); 547 } 548 549 550 /** 551 * HardLink statistics counters and methods. 552 * Not multi-thread safe, obviously. 553 * Init is called during HardLink instantiation, above. 554 * 555 * These are intended for use by knowledgeable clients, not internally, 556 * because many of the internal methods are static and can't update these 557 * per-instance counters. 558 */ 559 public static class LinkStats { 560 public int countDirs = 0; 561 public int countSingleLinks = 0; 562 public int countMultLinks = 0; 563 public int countFilesMultLinks = 0; 564 public int countEmptyDirs = 0; 565 public int countPhysicalFileCopies = 0; 566 567 public void clear() { 568 countDirs = 0; 569 countSingleLinks = 0; 570 countMultLinks = 0; 571 countFilesMultLinks = 0; 572 countEmptyDirs = 0; 573 countPhysicalFileCopies = 0; 574 } 575 576 public String report() { 577 return "HardLinkStats: " + countDirs + " Directories, including " 578 + countEmptyDirs + " Empty Directories, " 579 + countSingleLinks 580 + " single Link operations, " + countMultLinks 581 + " multi-Link operations, linking " + countFilesMultLinks 582 + " files, total " + (countSingleLinks + countFilesMultLinks) 583 + " linkable files. Also physically copied " 584 + countPhysicalFileCopies + " other files."; 585 } 586 } 587 } 588