001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.fs;
020
021 import java.io.BufferedReader;
022 import java.io.File;
023 import java.io.FileNotFoundException;
024 import java.io.IOException;
025 import java.io.InputStreamReader;
026 import java.util.Arrays;
027
028 /**
029 * Class for creating hardlinks.
030 * Supports Unix/Linux, WinXP/2003/Vista via Cygwin, and Mac OS X.
031 *
032 * The HardLink class was formerly a static inner class of FSUtil,
033 * and the methods provided were blatantly non-thread-safe.
034 * To enable volume-parallel Update snapshots, we now provide static
035 * threadsafe methods that allocate new buffer string arrays
036 * upon each call. We also provide an API to hardlink all files in a
037 * directory with a single command, which is up to 128 times more
038 * efficient - and minimizes the impact of the extra buffer creations.
039 */
040 public class HardLink {
041
042 public enum OSType {
043 OS_TYPE_UNIX,
044 OS_TYPE_WINXP,
045 OS_TYPE_SOLARIS,
046 OS_TYPE_MAC
047 }
048
049 public static OSType osType;
050 private static HardLinkCommandGetter getHardLinkCommand;
051
052 public final LinkStats linkStats; //not static
053
054 //initialize the command "getters" statically, so can use their
055 //methods without instantiating the HardLink object
056 static {
057 osType = getOSType();
058 if (osType == OSType.OS_TYPE_WINXP) {
059 // Windows
060 getHardLinkCommand = new HardLinkCGWin();
061 } else {
062 // Unix
063 getHardLinkCommand = new HardLinkCGUnix();
064 //override getLinkCountCommand for the particular Unix variant
065 //Linux is already set as the default - {"stat","-c%h", null}
066 if (osType == OSType.OS_TYPE_MAC) {
067 String[] linkCountCmdTemplate = {"stat","-f%l", null};
068 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
069 } else if (osType == OSType.OS_TYPE_SOLARIS) {
070 String[] linkCountCmdTemplate = {"ls","-l", null};
071 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
072 }
073 }
074 }
075
076 public HardLink() {
077 linkStats = new LinkStats();
078 }
079
080 static private OSType getOSType() {
081 String osName = System.getProperty("os.name");
082 if (osName.contains("Windows") &&
083 (osName.contains("XP")
084 || osName.contains("2003")
085 || osName.contains("Vista")
086 || osName.contains("Windows_7")
087 || osName.contains("Windows 7")
088 || osName.contains("Windows7"))) {
089 return OSType.OS_TYPE_WINXP;
090 }
091 else if (osName.contains("SunOS")
092 || osName.contains("Solaris")) {
093 return OSType.OS_TYPE_SOLARIS;
094 }
095 else if (osName.contains("Mac")) {
096 return OSType.OS_TYPE_MAC;
097 }
098 else {
099 return OSType.OS_TYPE_UNIX;
100 }
101 }
102
103 /**
104 * This abstract class bridges the OS-dependent implementations of the
105 * needed functionality for creating hardlinks and querying link counts.
106 * The particular implementation class is chosen during
107 * static initialization phase of the HardLink class.
108 * The "getter" methods construct shell command strings for various purposes.
109 */
110 private static abstract class HardLinkCommandGetter {
111
112 /**
113 * Get the command string needed to hardlink a bunch of files from
114 * a single source directory into a target directory. The source directory
115 * is not specified here, but the command will be executed using the source
116 * directory as the "current working directory" of the shell invocation.
117 *
118 * @param fileBaseNames - array of path-less file names, relative
119 * to the source directory
120 * @param linkDir - target directory where the hardlinks will be put
121 * @return - an array of Strings suitable for use as a single shell command
122 * with {@link Runtime.exec()}
123 * @throws IOException - if any of the file or path names misbehave
124 */
125 abstract String[] linkMult(String[] fileBaseNames, File linkDir)
126 throws IOException;
127
128 /**
129 * Get the command string needed to hardlink a single file
130 */
131 abstract String[] linkOne(File file, File linkName) throws IOException;
132
133 /**
134 * Get the command string to query the hardlink count of a file
135 */
136 abstract String[] linkCount(File file) throws IOException;
137
138 /**
139 * Calculate the total string length of the shell command
140 * resulting from execution of linkMult, plus the length of the
141 * source directory name (which will also be provided to the shell)
142 *
143 * @param fileDir - source directory, parent of fileBaseNames
144 * @param fileBaseNames - array of path-less file names, relative
145 * to the source directory
146 * @param linkDir - target directory where the hardlinks will be put
147 * @return - total data length (must not exceed maxAllowedCmdArgLength)
148 * @throws IOException
149 */
150 abstract int getLinkMultArgLength(
151 File fileDir, String[] fileBaseNames, File linkDir)
152 throws IOException;
153
154 /**
155 * Get the maximum allowed string length of a shell command on this OS,
156 * which is just the documented minimum guaranteed supported command
157 * length - aprx. 32KB for Unix, and 8KB for Windows.
158 */
159 abstract int getMaxAllowedCmdArgLength();
160 }
161
162 /**
163 * Implementation of HardLinkCommandGetter class for Unix
164 */
165 static class HardLinkCGUnix extends HardLinkCommandGetter {
166 private static String[] hardLinkCommand = {"ln", null, null};
167 private static String[] hardLinkMultPrefix = {"ln"};
168 private static String[] hardLinkMultSuffix = {null};
169 private static String[] getLinkCountCommand = {"stat","-c%h", null};
170 //Unix guarantees at least 32K bytes cmd length.
171 //Subtract another 64b to allow for Java 'exec' overhead
172 private static final int maxAllowedCmdArgLength = 32*1024 - 65;
173
174 private static synchronized
175 void setLinkCountCmdTemplate(String[] template) {
176 //May update this for specific unix variants,
177 //after static initialization phase
178 getLinkCountCommand = template;
179 }
180
181 /*
182 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
183 */
184 @Override
185 String[] linkOne(File file, File linkName)
186 throws IOException {
187 String[] buf = new String[hardLinkCommand.length];
188 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
189 //unix wants argument order: "ln <existing> <new>"
190 buf[1] = FileUtil.makeShellPath(file, true);
191 buf[2] = FileUtil.makeShellPath(linkName, true);
192 return buf;
193 }
194
195 /*
196 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
197 */
198 @Override
199 String[] linkMult(String[] fileBaseNames, File linkDir)
200 throws IOException {
201 String[] buf = new String[fileBaseNames.length
202 + hardLinkMultPrefix.length
203 + hardLinkMultSuffix.length];
204 int mark=0;
205 System.arraycopy(hardLinkMultPrefix, 0, buf, mark,
206 hardLinkMultPrefix.length);
207 mark += hardLinkMultPrefix.length;
208 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
209 mark += fileBaseNames.length;
210 buf[mark] = FileUtil.makeShellPath(linkDir, true);
211 return buf;
212 }
213
214 /*
215 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
216 */
217 @Override
218 String[] linkCount(File file)
219 throws IOException {
220 String[] buf = new String[getLinkCountCommand.length];
221 System.arraycopy(getLinkCountCommand, 0, buf, 0,
222 getLinkCountCommand.length);
223 buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
224 return buf;
225 }
226
227 /*
228 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
229 */
230 @Override
231 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir)
232 throws IOException{
233 int sum = 0;
234 for (String x : fileBaseNames) {
235 // add 1 to account for terminal null or delimiter space
236 sum += 1 + ((x == null) ? 0 : x.length());
237 }
238 sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
239 + FileUtil.makeShellPath(linkDir, true).length();
240 //add the fixed overhead of the hardLinkMult prefix and suffix
241 sum += 3; //length("ln") + 1
242 return sum;
243 }
244
245 /*
246 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
247 */
248 @Override
249 int getMaxAllowedCmdArgLength() {
250 return maxAllowedCmdArgLength;
251 }
252 }
253
254
255 /**
256 * Implementation of HardLinkCommandGetter class for Windows
257 *
258 * Note that the linkCount shell command for Windows is actually
259 * a Cygwin shell command, and depends on ${cygwin}/bin
260 * being in the Windows PATH environment variable, so
261 * stat.exe can be found.
262 */
263 static class HardLinkCGWin extends HardLinkCommandGetter {
264 //The Windows command getter impl class and its member fields are
265 //package-private ("default") access instead of "private" to assist
266 //unit testing (sort of) on non-Win servers
267
268 static String[] hardLinkCommand = {
269 "fsutil","hardlink","create", null, null};
270 static String[] hardLinkMultPrefix = {
271 "cmd","/q","/c","for", "%f", "in", "("};
272 static String hardLinkMultDir = "\\%f";
273 static String[] hardLinkMultSuffix = {
274 ")", "do", "fsutil", "hardlink", "create", null,
275 "%f", "1>NUL"};
276 static String[] getLinkCountCommand = {"stat","-c%h", null};
277 //Windows guarantees only 8K - 1 bytes cmd length.
278 //Subtract another 64b to allow for Java 'exec' overhead
279 static final int maxAllowedCmdArgLength = 8*1024 - 65;
280
281 /*
282 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
283 */
284 @Override
285 String[] linkOne(File file, File linkName)
286 throws IOException {
287 String[] buf = new String[hardLinkCommand.length];
288 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
289 //windows wants argument order: "create <new> <existing>"
290 buf[4] = file.getCanonicalPath();
291 buf[3] = linkName.getCanonicalPath();
292 return buf;
293 }
294
295 /*
296 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
297 */
298 @Override
299 String[] linkMult(String[] fileBaseNames, File linkDir)
300 throws IOException {
301 String[] buf = new String[fileBaseNames.length
302 + hardLinkMultPrefix.length
303 + hardLinkMultSuffix.length];
304 String td = linkDir.getCanonicalPath() + hardLinkMultDir;
305 int mark=0;
306 System.arraycopy(hardLinkMultPrefix, 0, buf, mark,
307 hardLinkMultPrefix.length);
308 mark += hardLinkMultPrefix.length;
309 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
310 mark += fileBaseNames.length;
311 System.arraycopy(hardLinkMultSuffix, 0, buf, mark,
312 hardLinkMultSuffix.length);
313 mark += hardLinkMultSuffix.length;
314 buf[mark - 3] = td;
315 return buf;
316 }
317
318 /*
319 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
320 */
321 @Override
322 String[] linkCount(File file)
323 throws IOException {
324 String[] buf = new String[getLinkCountCommand.length];
325 System.arraycopy(getLinkCountCommand, 0, buf, 0,
326 getLinkCountCommand.length);
327 //The linkCount command is actually a Cygwin shell command,
328 //not a Windows shell command, so we should use "makeShellPath()"
329 //instead of "getCanonicalPath()". However, that causes another
330 //shell exec to "cygpath.exe", and "stat.exe" actually can handle
331 //DOS-style paths (it just prints a couple hundred bytes of warning
332 //to stderr), so we use the more efficient "getCanonicalPath()".
333 buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
334 return buf;
335 }
336
337 /*
338 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
339 */
340 @Override
341 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir)
342 throws IOException {
343 int sum = 0;
344 for (String x : fileBaseNames) {
345 // add 1 to account for terminal null or delimiter space
346 sum += 1 + ((x == null) ? 0 : x.length());
347 }
348 sum += 2 + fileDir.getCanonicalPath().length() +
349 linkDir.getCanonicalPath().length();
350 //add the fixed overhead of the hardLinkMult command
351 //(prefix, suffix, and Dir suffix)
352 sum += ("cmd.exe /q /c for %f in ( ) do "
353 + "fsutil hardlink create \\%f %f 1>NUL ").length();
354 return sum;
355 }
356
357 /*
358 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
359 */
360 @Override
361 int getMaxAllowedCmdArgLength() {
362 return maxAllowedCmdArgLength;
363 }
364 }
365
366
367 /**
368 * Calculate the nominal length of all contributors to the total
369 * commandstring length, including fixed overhead of the OS-dependent
370 * command. It's protected rather than private, to assist unit testing,
371 * but real clients are not expected to need it -- see the way
372 * createHardLinkMult() uses it internally so the user doesn't need to worry
373 * about it.
374 *
375 * @param fileDir - source directory, parent of fileBaseNames
376 * @param fileBaseNames - array of path-less file names, relative
377 * to the source directory
378 * @param linkDir - target directory where the hardlinks will be put
379 * @return - total data length (must not exceed maxAllowedCmdArgLength)
380 * @throws IOException
381 */
382 protected static int getLinkMultArgLength(
383 File fileDir, String[] fileBaseNames, File linkDir)
384 throws IOException {
385 return getHardLinkCommand.getLinkMultArgLength(fileDir,
386 fileBaseNames, linkDir);
387 }
388
389 /**
390 * Return this private value for use by unit tests.
391 * Shell commands are not allowed to have a total string length
392 * exceeding this size.
393 */
394 protected static int getMaxAllowedCmdArgLength() {
395 return getHardLinkCommand.getMaxAllowedCmdArgLength();
396 }
397
398 /*
399 * ****************************************************
400 * Complexity is above. User-visible functionality is below
401 * ****************************************************
402 */
403
404 /**
405 * Creates a hardlink
406 * @param file - existing source file
407 * @param linkName - desired target link file
408 */
409 public static void createHardLink(File file, File linkName)
410 throws IOException {
411 if (file == null) {
412 throw new IOException(
413 "invalid arguments to createHardLink: source file is null");
414 }
415 if (linkName == null) {
416 throw new IOException(
417 "invalid arguments to createHardLink: link name is null");
418 }
419 // construct and execute shell command
420 String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
421 Process process = Runtime.getRuntime().exec(hardLinkCommand);
422 try {
423 if (process.waitFor() != 0) {
424 String errMsg = new BufferedReader(new InputStreamReader(
425 process.getInputStream())).readLine();
426 if (errMsg == null) errMsg = "";
427 String inpMsg = new BufferedReader(new InputStreamReader(
428 process.getErrorStream())).readLine();
429 if (inpMsg == null) inpMsg = "";
430 throw new IOException(errMsg + inpMsg);
431 }
432 } catch (InterruptedException e) {
433 throw new IOException(e);
434 } finally {
435 process.destroy();
436 }
437 }
438
439 /**
440 * Creates hardlinks from multiple existing files within one parent
441 * directory, into one target directory.
442 * @param parentDir - directory containing source files
443 * @param fileBaseNames - list of path-less file names, as returned by
444 * parentDir.list()
445 * @param linkDir - where the hardlinks should be put. It must already exist.
446 *
447 * If the list of files is too long (overflows maxAllowedCmdArgLength),
448 * we will automatically split it into multiple invocations of the
449 * underlying method.
450 */
451 public static void createHardLinkMult(File parentDir, String[] fileBaseNames,
452 File linkDir) throws IOException {
453 //This is the public method all non-test clients are expected to use.
454 //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
455 createHardLinkMult(parentDir, fileBaseNames, linkDir,
456 getHardLinkCommand.getMaxAllowedCmdArgLength());
457 }
458
459 /*
460 * Implements {@link createHardLinkMult} with added variable "maxLength",
461 * to ease unit testing of the auto-splitting feature for long lists.
462 * Likewise why it returns "callCount", the number of sub-arrays that
463 * the file list had to be split into.
464 * Non-test clients are expected to call the public method instead.
465 */
466 protected static int createHardLinkMult(File parentDir,
467 String[] fileBaseNames, File linkDir, int maxLength)
468 throws IOException {
469 if (parentDir == null) {
470 throw new IOException(
471 "invalid arguments to createHardLinkMult: parent directory is null");
472 }
473 if (linkDir == null) {
474 throw new IOException(
475 "invalid arguments to createHardLinkMult: link directory is null");
476 }
477 if (fileBaseNames == null) {
478 throw new IOException(
479 "invalid arguments to createHardLinkMult: "
480 + "filename list can be empty but not null");
481 }
482 if (fileBaseNames.length == 0) {
483 //the OS cmds can't handle empty list of filenames,
484 //but it's legal, so just return.
485 return 0;
486 }
487 if (!linkDir.exists()) {
488 throw new FileNotFoundException(linkDir + " not found.");
489 }
490
491 //if the list is too long, split into multiple invocations
492 int callCount = 0;
493 if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
494 && fileBaseNames.length > 1) {
495 String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
496 callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
497 String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
498 fileBaseNames.length);
499 callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);
500 return callCount;
501 } else {
502 callCount = 1;
503 }
504
505 // construct and execute shell command
506 String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames,
507 linkDir);
508 Process process = Runtime.getRuntime().exec(hardLinkCommand, null,
509 parentDir);
510 try {
511 if (process.waitFor() != 0) {
512 String errMsg = new BufferedReader(new InputStreamReader(
513 process.getInputStream())).readLine();
514 if (errMsg == null) errMsg = "";
515 String inpMsg = new BufferedReader(new InputStreamReader(
516 process.getErrorStream())).readLine();
517 if (inpMsg == null) inpMsg = "";
518 throw new IOException(errMsg + inpMsg);
519 }
520 } catch (InterruptedException e) {
521 throw new IOException(e);
522 } finally {
523 process.destroy();
524 }
525 return callCount;
526 }
527
528 /**
529 * Retrieves the number of links to the specified file.
530 */
531 public static int getLinkCount(File fileName) throws IOException {
532 if (fileName == null) {
533 throw new IOException(
534 "invalid argument to getLinkCount: file name is null");
535 }
536 if (!fileName.exists()) {
537 throw new FileNotFoundException(fileName + " not found.");
538 }
539
540 // construct and execute shell command
541 String[] cmd = getHardLinkCommand.linkCount(fileName);
542 String inpMsg = null;
543 String errMsg = null;
544 int exitValue = -1;
545 BufferedReader in = null;
546 BufferedReader err = null;
547
548 Process process = Runtime.getRuntime().exec(cmd);
549 try {
550 exitValue = process.waitFor();
551 in = new BufferedReader(new InputStreamReader(
552 process.getInputStream()));
553 inpMsg = in.readLine();
554 err = new BufferedReader(new InputStreamReader(
555 process.getErrorStream()));
556 errMsg = err.readLine();
557 if (inpMsg == null || exitValue != 0) {
558 throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
559 }
560 if (osType == OSType.OS_TYPE_SOLARIS) {
561 String[] result = inpMsg.split("\\s+");
562 return Integer.parseInt(result[1]);
563 } else {
564 return Integer.parseInt(inpMsg);
565 }
566 } catch (NumberFormatException e) {
567 throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
568 } catch (InterruptedException e) {
569 throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
570 } finally {
571 process.destroy();
572 if (in != null) in.close();
573 if (err != null) err.close();
574 }
575 }
576
577 /* Create an IOException for failing to get link count. */
578 private static IOException createIOException(File f, String message,
579 String error, int exitvalue, Exception cause) {
580
581 final String winErrMsg = "; Windows errors in getLinkCount are often due "
582 + "to Cygwin misconfiguration";
583
584 final String s = "Failed to get link count on file " + f
585 + ": message=" + message
586 + "; error=" + error
587 + ((osType == OSType.OS_TYPE_WINXP) ? winErrMsg : "")
588 + "; exit value=" + exitvalue;
589 return (cause == null) ? new IOException(s) : new IOException(s, cause);
590 }
591
592
593 /**
594 * HardLink statistics counters and methods.
595 * Not multi-thread safe, obviously.
596 * Init is called during HardLink instantiation, above.
597 *
598 * These are intended for use by knowledgeable clients, not internally,
599 * because many of the internal methods are static and can't update these
600 * per-instance counters.
601 */
602 public static class LinkStats {
603 public int countDirs = 0;
604 public int countSingleLinks = 0;
605 public int countMultLinks = 0;
606 public int countFilesMultLinks = 0;
607 public int countEmptyDirs = 0;
608 public int countPhysicalFileCopies = 0;
609
610 public void clear() {
611 countDirs = 0;
612 countSingleLinks = 0;
613 countMultLinks = 0;
614 countFilesMultLinks = 0;
615 countEmptyDirs = 0;
616 countPhysicalFileCopies = 0;
617 }
618
619 public String report() {
620 return "HardLinkStats: " + countDirs + " Directories, including "
621 + countEmptyDirs + " Empty Directories, "
622 + countSingleLinks
623 + " single Link operations, " + countMultLinks
624 + " multi-Link operations, linking " + countFilesMultLinks
625 + " files, total " + (countSingleLinks + countFilesMultLinks)
626 + " linkable files. Also physically copied "
627 + countPhysicalFileCopies + " other files.";
628 }
629 }
630 }
631