001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.fs; 020 021 import java.io.BufferedReader; 022 import java.io.File; 023 import java.io.FileNotFoundException; 024 import java.io.IOException; 025 import java.io.StringReader; 026 import java.util.Arrays; 027 028 import org.apache.hadoop.io.IOUtils; 029 import org.apache.hadoop.util.Shell; 030 import org.apache.hadoop.util.Shell.ExitCodeException; 031 import org.apache.hadoop.util.Shell.ShellCommandExecutor; 032 033 /** 034 * Class for creating hardlinks. 035 * Supports Unix/Linux, Windows via winutils , and Mac OS X. 036 * 037 * The HardLink class was formerly a static inner class of FSUtil, 038 * and the methods provided were blatantly non-thread-safe. 039 * To enable volume-parallel Update snapshots, we now provide static 040 * threadsafe methods that allocate new buffer string arrays 041 * upon each call. We also provide an API to hardlink all files in a 042 * directory with a single command, which is up to 128 times more 043 * efficient - and minimizes the impact of the extra buffer creations. 044 */ 045 public class HardLink { 046 047 private static HardLinkCommandGetter getHardLinkCommand; 048 049 public final LinkStats linkStats; //not static 050 051 //initialize the command "getters" statically, so can use their 052 //methods without instantiating the HardLink object 053 static { 054 if (Shell.WINDOWS) { 055 // Windows 056 getHardLinkCommand = new HardLinkCGWin(); 057 } else { 058 // Unix or Linux 059 getHardLinkCommand = new HardLinkCGUnix(); 060 //override getLinkCountCommand for the particular Unix variant 061 //Linux is already set as the default - {"stat","-c%h", null} 062 if (Shell.MAC || Shell.FREEBSD) { 063 String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null}; 064 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate); 065 } else if (Shell.SOLARIS) { 066 String[] linkCountCmdTemplate = {"ls","-l", null}; 067 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate); 068 } 069 } 070 } 071 072 public HardLink() { 073 linkStats = new LinkStats(); 074 } 075 076 /** 077 * This abstract class bridges the OS-dependent implementations of the 078 * needed functionality for creating hardlinks and querying link counts. 079 * The particular implementation class is chosen during 080 * static initialization phase of the HardLink class. 081 * The "getter" methods construct shell command strings for various purposes. 082 */ 083 private static abstract class HardLinkCommandGetter { 084 085 /** 086 * Get the command string needed to hardlink a bunch of files from 087 * a single source directory into a target directory. The source directory 088 * is not specified here, but the command will be executed using the source 089 * directory as the "current working directory" of the shell invocation. 090 * 091 * @param fileBaseNames - array of path-less file names, relative 092 * to the source directory 093 * @param linkDir - target directory where the hardlinks will be put 094 * @return - an array of Strings suitable for use as a single shell command 095 * @throws IOException - if any of the file or path names misbehave 096 */ 097 abstract String[] linkMult(String[] fileBaseNames, File linkDir) 098 throws IOException; 099 100 /** 101 * Get the command string needed to hardlink a single file 102 */ 103 abstract String[] linkOne(File file, File linkName) throws IOException; 104 105 /** 106 * Get the command string to query the hardlink count of a file 107 */ 108 abstract String[] linkCount(File file) throws IOException; 109 110 /** 111 * Calculate the total string length of the shell command 112 * resulting from execution of linkMult, plus the length of the 113 * source directory name (which will also be provided to the shell) 114 * 115 * @param fileDir - source directory, parent of fileBaseNames 116 * @param fileBaseNames - array of path-less file names, relative 117 * to the source directory 118 * @param linkDir - target directory where the hardlinks will be put 119 * @return - total data length (must not exceed maxAllowedCmdArgLength) 120 * @throws IOException 121 */ 122 abstract int getLinkMultArgLength( 123 File fileDir, String[] fileBaseNames, File linkDir) 124 throws IOException; 125 126 /** 127 * Get the maximum allowed string length of a shell command on this OS, 128 * which is just the documented minimum guaranteed supported command 129 * length - aprx. 32KB for Unix, and 8KB for Windows. 130 */ 131 abstract int getMaxAllowedCmdArgLength(); 132 } 133 134 /** 135 * Implementation of HardLinkCommandGetter class for Unix 136 */ 137 static class HardLinkCGUnix extends HardLinkCommandGetter { 138 private static String[] hardLinkCommand = {"ln", null, null}; 139 private static String[] hardLinkMultPrefix = {"ln"}; 140 private static String[] hardLinkMultSuffix = {null}; 141 private static String[] getLinkCountCommand = {"stat","-c%h", null}; 142 //Unix guarantees at least 32K bytes cmd length. 143 //Subtract another 64b to allow for Java 'exec' overhead 144 private static final int maxAllowedCmdArgLength = 32*1024 - 65; 145 146 private static synchronized 147 void setLinkCountCmdTemplate(String[] template) { 148 //May update this for specific unix variants, 149 //after static initialization phase 150 getLinkCountCommand = template; 151 } 152 153 /* 154 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) 155 */ 156 @Override 157 String[] linkOne(File file, File linkName) 158 throws IOException { 159 String[] buf = new String[hardLinkCommand.length]; 160 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); 161 //unix wants argument order: "ln <existing> <new>" 162 buf[1] = FileUtil.makeShellPath(file, true); 163 buf[2] = FileUtil.makeShellPath(linkName, true); 164 return buf; 165 } 166 167 /* 168 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) 169 */ 170 @Override 171 String[] linkMult(String[] fileBaseNames, File linkDir) 172 throws IOException { 173 String[] buf = new String[fileBaseNames.length 174 + hardLinkMultPrefix.length 175 + hardLinkMultSuffix.length]; 176 int mark=0; 177 System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 178 hardLinkMultPrefix.length); 179 mark += hardLinkMultPrefix.length; 180 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); 181 mark += fileBaseNames.length; 182 buf[mark] = FileUtil.makeShellPath(linkDir, true); 183 return buf; 184 } 185 186 /* 187 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) 188 */ 189 @Override 190 String[] linkCount(File file) 191 throws IOException { 192 String[] buf = new String[getLinkCountCommand.length]; 193 System.arraycopy(getLinkCountCommand, 0, buf, 0, 194 getLinkCountCommand.length); 195 buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true); 196 return buf; 197 } 198 199 /* 200 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) 201 */ 202 @Override 203 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 204 throws IOException{ 205 int sum = 0; 206 for (String x : fileBaseNames) { 207 // add 1 to account for terminal null or delimiter space 208 sum += 1 + ((x == null) ? 0 : x.length()); 209 } 210 sum += 2 + FileUtil.makeShellPath(fileDir, true).length() 211 + FileUtil.makeShellPath(linkDir, true).length(); 212 //add the fixed overhead of the hardLinkMult prefix and suffix 213 sum += 3; //length("ln") + 1 214 return sum; 215 } 216 217 /* 218 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() 219 */ 220 @Override 221 int getMaxAllowedCmdArgLength() { 222 return maxAllowedCmdArgLength; 223 } 224 } 225 226 227 /** 228 * Implementation of HardLinkCommandGetter class for Windows 229 */ 230 static class HardLinkCGWin extends HardLinkCommandGetter { 231 //The Windows command getter impl class and its member fields are 232 //package-private ("default") access instead of "private" to assist 233 //unit testing (sort of) on non-Win servers 234 235 static String CMD_EXE = "cmd.exe"; 236 static String[] hardLinkCommand = { 237 Shell.WINUTILS,"hardlink","create", null, null}; 238 static String[] hardLinkMultPrefix = { 239 CMD_EXE, "/q", "/c", "for", "%f", "in", "("}; 240 static String hardLinkMultDir = "\\%f"; 241 static String[] hardLinkMultSuffix = { 242 ")", "do", Shell.WINUTILS, "hardlink", "create", null, 243 "%f"}; 244 static String[] getLinkCountCommand = { 245 Shell.WINUTILS, "hardlink", "stat", null}; 246 //Windows guarantees only 8K - 1 bytes cmd length. 247 //Subtract another 64b to allow for Java 'exec' overhead 248 static final int maxAllowedCmdArgLength = 8*1024 - 65; 249 250 /* 251 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) 252 */ 253 @Override 254 String[] linkOne(File file, File linkName) 255 throws IOException { 256 String[] buf = new String[hardLinkCommand.length]; 257 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); 258 //windows wants argument order: "create <new> <existing>" 259 buf[4] = file.getCanonicalPath(); 260 buf[3] = linkName.getCanonicalPath(); 261 return buf; 262 } 263 264 /* 265 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) 266 */ 267 @Override 268 String[] linkMult(String[] fileBaseNames, File linkDir) 269 throws IOException { 270 String[] buf = new String[fileBaseNames.length 271 + hardLinkMultPrefix.length 272 + hardLinkMultSuffix.length]; 273 String td = linkDir.getCanonicalPath() + hardLinkMultDir; 274 int mark=0; 275 System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 276 hardLinkMultPrefix.length); 277 mark += hardLinkMultPrefix.length; 278 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); 279 mark += fileBaseNames.length; 280 System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 281 hardLinkMultSuffix.length); 282 mark += hardLinkMultSuffix.length; 283 buf[mark - 2] = td; 284 return buf; 285 } 286 287 /* 288 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) 289 */ 290 @Override 291 String[] linkCount(File file) 292 throws IOException { 293 String[] buf = new String[getLinkCountCommand.length]; 294 System.arraycopy(getLinkCountCommand, 0, buf, 0, 295 getLinkCountCommand.length); 296 buf[getLinkCountCommand.length - 1] = file.getCanonicalPath(); 297 return buf; 298 } 299 300 /* 301 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) 302 */ 303 @Override 304 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 305 throws IOException { 306 int sum = 0; 307 for (String x : fileBaseNames) { 308 // add 1 to account for terminal null or delimiter space 309 sum += 1 + ((x == null) ? 0 : x.length()); 310 } 311 sum += 2 + fileDir.getCanonicalPath().length() + 312 linkDir.getCanonicalPath().length(); 313 //add the fixed overhead of the hardLinkMult command 314 //(prefix, suffix, and Dir suffix) 315 sum += (CMD_EXE + " /q /c for %f in ( ) do " 316 + Shell.WINUTILS + " hardlink create \\%f %f").length(); 317 return sum; 318 } 319 320 /* 321 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() 322 */ 323 @Override 324 int getMaxAllowedCmdArgLength() { 325 return maxAllowedCmdArgLength; 326 } 327 } 328 329 330 /** 331 * Calculate the nominal length of all contributors to the total 332 * commandstring length, including fixed overhead of the OS-dependent 333 * command. It's protected rather than private, to assist unit testing, 334 * but real clients are not expected to need it -- see the way 335 * createHardLinkMult() uses it internally so the user doesn't need to worry 336 * about it. 337 * 338 * @param fileDir - source directory, parent of fileBaseNames 339 * @param fileBaseNames - array of path-less file names, relative 340 * to the source directory 341 * @param linkDir - target directory where the hardlinks will be put 342 * @return - total data length (must not exceed maxAllowedCmdArgLength) 343 * @throws IOException 344 */ 345 protected static int getLinkMultArgLength( 346 File fileDir, String[] fileBaseNames, File linkDir) 347 throws IOException { 348 return getHardLinkCommand.getLinkMultArgLength(fileDir, 349 fileBaseNames, linkDir); 350 } 351 352 /** 353 * Return this private value for use by unit tests. 354 * Shell commands are not allowed to have a total string length 355 * exceeding this size. 356 */ 357 protected static int getMaxAllowedCmdArgLength() { 358 return getHardLinkCommand.getMaxAllowedCmdArgLength(); 359 } 360 361 /* 362 * **************************************************** 363 * Complexity is above. User-visible functionality is below 364 * **************************************************** 365 */ 366 367 /** 368 * Creates a hardlink 369 * @param file - existing source file 370 * @param linkName - desired target link file 371 */ 372 public static void createHardLink(File file, File linkName) 373 throws IOException { 374 if (file == null) { 375 throw new IOException( 376 "invalid arguments to createHardLink: source file is null"); 377 } 378 if (linkName == null) { 379 throw new IOException( 380 "invalid arguments to createHardLink: link name is null"); 381 } 382 // construct and execute shell command 383 String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName); 384 ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand); 385 try { 386 shexec.execute(); 387 } catch (ExitCodeException e) { 388 throw new IOException("Failed to execute command " + 389 Arrays.toString(hardLinkCommand) + 390 "; command output: \"" + shexec.getOutput() + "\"" + 391 "; WrappedException: \"" + e.getMessage() + "\""); 392 } 393 } 394 395 /** 396 * Creates hardlinks from multiple existing files within one parent 397 * directory, into one target directory. 398 * @param parentDir - directory containing source files 399 * @param fileBaseNames - list of path-less file names, as returned by 400 * parentDir.list() 401 * @param linkDir - where the hardlinks should be put. It must already exist. 402 * 403 * If the list of files is too long (overflows maxAllowedCmdArgLength), 404 * we will automatically split it into multiple invocations of the 405 * underlying method. 406 */ 407 public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 408 File linkDir) throws IOException { 409 //This is the public method all non-test clients are expected to use. 410 //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd 411 createHardLinkMult(parentDir, fileBaseNames, linkDir, 412 getHardLinkCommand.getMaxAllowedCmdArgLength()); 413 } 414 415 /* 416 * Implements {@link createHardLinkMult} with added variable "maxLength", 417 * to ease unit testing of the auto-splitting feature for long lists. 418 * Likewise why it returns "callCount", the number of sub-arrays that 419 * the file list had to be split into. 420 * Non-test clients are expected to call the public method instead. 421 */ 422 protected static int createHardLinkMult(File parentDir, 423 String[] fileBaseNames, File linkDir, int maxLength) 424 throws IOException { 425 if (parentDir == null) { 426 throw new IOException( 427 "invalid arguments to createHardLinkMult: parent directory is null"); 428 } 429 if (linkDir == null) { 430 throw new IOException( 431 "invalid arguments to createHardLinkMult: link directory is null"); 432 } 433 if (fileBaseNames == null) { 434 throw new IOException( 435 "invalid arguments to createHardLinkMult: " 436 + "filename list can be empty but not null"); 437 } 438 if (fileBaseNames.length == 0) { 439 //the OS cmds can't handle empty list of filenames, 440 //but it's legal, so just return. 441 return 0; 442 } 443 if (!linkDir.exists()) { 444 throw new FileNotFoundException(linkDir + " not found."); 445 } 446 447 //if the list is too long, split into multiple invocations 448 int callCount = 0; 449 if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength 450 && fileBaseNames.length > 1) { 451 String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2); 452 callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength); 453 String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2, 454 fileBaseNames.length); 455 callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength); 456 return callCount; 457 } else { 458 callCount = 1; 459 } 460 461 // construct and execute shell command 462 String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 463 linkDir); 464 ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand, 465 parentDir, null, 0L); 466 try { 467 shexec.execute(); 468 } catch (ExitCodeException e) { 469 throw new IOException(shexec.getOutput() + e.getMessage()); 470 } 471 return callCount; 472 } 473 474 /** 475 * Retrieves the number of links to the specified file. 476 */ 477 public static int getLinkCount(File fileName) throws IOException { 478 if (fileName == null) { 479 throw new IOException( 480 "invalid argument to getLinkCount: file name is null"); 481 } 482 if (!fileName.exists()) { 483 throw new FileNotFoundException(fileName + " not found."); 484 } 485 486 // construct and execute shell command 487 String[] cmd = getHardLinkCommand.linkCount(fileName); 488 String inpMsg = null; 489 String errMsg = null; 490 int exitValue = -1; 491 BufferedReader in = null; 492 493 ShellCommandExecutor shexec = new ShellCommandExecutor(cmd); 494 try { 495 shexec.execute(); 496 in = new BufferedReader(new StringReader(shexec.getOutput())); 497 inpMsg = in.readLine(); 498 exitValue = shexec.getExitCode(); 499 if (inpMsg == null || exitValue != 0) { 500 throw createIOException(fileName, inpMsg, errMsg, exitValue, null); 501 } 502 if (Shell.SOLARIS) { 503 String[] result = inpMsg.split("\\s+"); 504 return Integer.parseInt(result[1]); 505 } else { 506 return Integer.parseInt(inpMsg); 507 } 508 } catch (ExitCodeException e) { 509 inpMsg = shexec.getOutput(); 510 errMsg = e.getMessage(); 511 exitValue = e.getExitCode(); 512 throw createIOException(fileName, inpMsg, errMsg, exitValue, e); 513 } catch (NumberFormatException e) { 514 throw createIOException(fileName, inpMsg, errMsg, exitValue, e); 515 } finally { 516 IOUtils.closeStream(in); 517 } 518 } 519 520 /* Create an IOException for failing to get link count. */ 521 private static IOException createIOException(File f, String message, 522 String error, int exitvalue, Exception cause) { 523 524 final String s = "Failed to get link count on file " + f 525 + ": message=" + message 526 + "; error=" + error 527 + "; exit value=" + exitvalue; 528 return (cause == null) ? new IOException(s) : new IOException(s, cause); 529 } 530 531 532 /** 533 * HardLink statistics counters and methods. 534 * Not multi-thread safe, obviously. 535 * Init is called during HardLink instantiation, above. 536 * 537 * These are intended for use by knowledgeable clients, not internally, 538 * because many of the internal methods are static and can't update these 539 * per-instance counters. 540 */ 541 public static class LinkStats { 542 public int countDirs = 0; 543 public int countSingleLinks = 0; 544 public int countMultLinks = 0; 545 public int countFilesMultLinks = 0; 546 public int countEmptyDirs = 0; 547 public int countPhysicalFileCopies = 0; 548 549 public void clear() { 550 countDirs = 0; 551 countSingleLinks = 0; 552 countMultLinks = 0; 553 countFilesMultLinks = 0; 554 countEmptyDirs = 0; 555 countPhysicalFileCopies = 0; 556 } 557 558 public String report() { 559 return "HardLinkStats: " + countDirs + " Directories, including " 560 + countEmptyDirs + " Empty Directories, " 561 + countSingleLinks 562 + " single Link operations, " + countMultLinks 563 + " multi-Link operations, linking " + countFilesMultLinks 564 + " files, total " + (countSingleLinks + countFilesMultLinks) 565 + " linkable files. Also physically copied " 566 + countPhysicalFileCopies + " other files."; 567 } 568 } 569 } 570