001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs;
020
021import java.io.BufferedReader;
022import java.io.File;
023import java.io.FileNotFoundException;
024import java.io.IOException;
025import java.io.InputStreamReader;
026import java.util.Arrays;
027
028/**
029 * Class for creating hardlinks.
030 * Supports Unix/Linux, WinXP/2003/Vista via Cygwin, and Mac OS X.
031 * 
032 * The HardLink class was formerly a static inner class of FSUtil,
033 * and the methods provided were blatantly non-thread-safe.
034 * To enable volume-parallel Update snapshots, we now provide static 
035 * threadsafe methods that allocate new buffer string arrays
036 * upon each call.  We also provide an API to hardlink all files in a
037 * directory with a single command, which is up to 128 times more 
038 * efficient - and minimizes the impact of the extra buffer creations.
039 */
040public class HardLink { 
041
042  public enum OSType {
043    OS_TYPE_UNIX,
044    OS_TYPE_WINXP,
045    OS_TYPE_SOLARIS,
046    OS_TYPE_MAC
047  }
048  
049  public static OSType osType;
050  private static HardLinkCommandGetter getHardLinkCommand;
051  
052  public final LinkStats linkStats; //not static
053  
054  //initialize the command "getters" statically, so can use their 
055  //methods without instantiating the HardLink object
056  static { 
057    osType = getOSType();
058    if (osType == OSType.OS_TYPE_WINXP) {
059      // Windows
060      getHardLinkCommand = new HardLinkCGWin();
061    } else {
062      // Unix
063      getHardLinkCommand = new HardLinkCGUnix();
064      //override getLinkCountCommand for the particular Unix variant
065      //Linux is already set as the default - {"stat","-c%h", null}
066      if (osType == OSType.OS_TYPE_MAC) {
067        String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null};
068        HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
069      } else if (osType == OSType.OS_TYPE_SOLARIS) {
070        String[] linkCountCmdTemplate = {"ls","-l", null};
071        HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);        
072      }
073    }
074  }
075
076  public HardLink() {
077    linkStats = new LinkStats();
078  }
079  
080  static private OSType getOSType() {
081    String osName = System.getProperty("os.name");
082    if (osName.contains("Windows") &&
083            (osName.contains("XP") 
084            || osName.contains("2003") 
085            || osName.contains("Vista")
086            || osName.contains("Windows_7")
087            || osName.contains("Windows 7") 
088            || osName.contains("Windows7"))) {
089      return OSType.OS_TYPE_WINXP;
090    }
091    else if (osName.contains("SunOS") 
092            || osName.contains("Solaris")) {
093       return OSType.OS_TYPE_SOLARIS;
094    }
095    else if (osName.contains("Mac")) {
096       return OSType.OS_TYPE_MAC;
097    }
098    else {
099      return OSType.OS_TYPE_UNIX;
100    }
101  }
102  
103  /**
104   * This abstract class bridges the OS-dependent implementations of the 
105   * needed functionality for creating hardlinks and querying link counts.
106   * The particular implementation class is chosen during 
107   * static initialization phase of the HardLink class.
108   * The "getter" methods construct shell command strings for various purposes.
109   */
110  private static abstract class HardLinkCommandGetter {
111
112    /**
113     * Get the command string needed to hardlink a bunch of files from
114     * a single source directory into a target directory.  The source directory
115     * is not specified here, but the command will be executed using the source
116     * directory as the "current working directory" of the shell invocation.
117     * 
118     * @param fileBaseNames - array of path-less file names, relative
119     *            to the source directory
120     * @param linkDir - target directory where the hardlinks will be put
121     * @return - an array of Strings suitable for use as a single shell command
122     *            with {@link Runtime.exec()}
123     * @throws IOException - if any of the file or path names misbehave
124     */
125    abstract String[] linkMult(String[] fileBaseNames, File linkDir) 
126                          throws IOException;
127    
128    /**
129     * Get the command string needed to hardlink a single file
130     */
131    abstract String[] linkOne(File file, File linkName) throws IOException;
132    
133    /**
134     * Get the command string to query the hardlink count of a file
135     */
136    abstract String[] linkCount(File file) throws IOException;
137    
138    /**
139     * Calculate the total string length of the shell command
140     * resulting from execution of linkMult, plus the length of the
141     * source directory name (which will also be provided to the shell)
142     * 
143     * @param fileDir - source directory, parent of fileBaseNames
144     * @param fileBaseNames - array of path-less file names, relative
145     *            to the source directory
146     * @param linkDir - target directory where the hardlinks will be put
147     * @return - total data length (must not exceed maxAllowedCmdArgLength)
148     * @throws IOException
149     */
150    abstract int getLinkMultArgLength(
151                     File fileDir, String[] fileBaseNames, File linkDir) 
152                     throws IOException;
153    
154    /**
155     * Get the maximum allowed string length of a shell command on this OS,
156     * which is just the documented minimum guaranteed supported command
157     * length - aprx. 32KB for Unix, and 8KB for Windows.
158     */
159    abstract int getMaxAllowedCmdArgLength(); 
160  }
161  
162  /**
163   * Implementation of HardLinkCommandGetter class for Unix
164   */
165  static class HardLinkCGUnix extends HardLinkCommandGetter {
166    private static String[] hardLinkCommand = {"ln", null, null};
167    private static String[] hardLinkMultPrefix = {"ln"};
168    private static String[] hardLinkMultSuffix = {null};
169    private static String[] getLinkCountCommand = {"stat","-c%h", null};
170    //Unix guarantees at least 32K bytes cmd length.
171    //Subtract another 64b to allow for Java 'exec' overhead
172    private static final int maxAllowedCmdArgLength = 32*1024 - 65;
173    
174    private static synchronized 
175    void setLinkCountCmdTemplate(String[] template) {
176      //May update this for specific unix variants, 
177      //after static initialization phase
178      getLinkCountCommand = template;
179    }
180    
181    /*
182     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
183     */
184    @Override
185    String[] linkOne(File file, File linkName) 
186    throws IOException {
187      String[] buf = new String[hardLinkCommand.length];
188      System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
189      //unix wants argument order: "ln <existing> <new>"
190      buf[1] = FileUtil.makeShellPath(file, true); 
191      buf[2] = FileUtil.makeShellPath(linkName, true);
192      return buf;
193    }
194    
195    /*
196     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
197     */
198    @Override
199    String[] linkMult(String[] fileBaseNames, File linkDir) 
200    throws IOException {
201      String[] buf = new String[fileBaseNames.length 
202                                + hardLinkMultPrefix.length 
203                                + hardLinkMultSuffix.length];
204      int mark=0;
205      System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
206                       hardLinkMultPrefix.length);
207      mark += hardLinkMultPrefix.length;
208      System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
209      mark += fileBaseNames.length;
210      buf[mark] = FileUtil.makeShellPath(linkDir, true);
211      return buf;
212    }
213    
214    /*
215     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
216     */
217    @Override
218    String[] linkCount(File file) 
219    throws IOException {
220      String[] buf = new String[getLinkCountCommand.length];
221      System.arraycopy(getLinkCountCommand, 0, buf, 0, 
222                       getLinkCountCommand.length);
223      buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
224      return buf;
225    }
226    
227    /*
228     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
229     */
230    @Override
231    int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
232    throws IOException{
233      int sum = 0;
234      for (String x : fileBaseNames) {
235        // add 1 to account for terminal null or delimiter space
236        sum += 1 + ((x == null) ? 0 : x.length());
237      }
238      sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
239             + FileUtil.makeShellPath(linkDir, true).length();
240      //add the fixed overhead of the hardLinkMult prefix and suffix
241      sum += 3; //length("ln") + 1
242      return sum;
243    }
244    
245    /*
246     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
247     */
248    @Override
249    int getMaxAllowedCmdArgLength() {
250      return maxAllowedCmdArgLength;
251    }
252  }
253  
254  
255  /**
256   * Implementation of HardLinkCommandGetter class for Windows
257   * 
258   * Note that the linkCount shell command for Windows is actually
259   * a Cygwin shell command, and depends on ${cygwin}/bin
260   * being in the Windows PATH environment variable, so
261   * stat.exe can be found.
262   */
263  static class HardLinkCGWin extends HardLinkCommandGetter {
264    //The Windows command getter impl class and its member fields are
265    //package-private ("default") access instead of "private" to assist 
266    //unit testing (sort of) on non-Win servers
267
268    static String[] hardLinkCommand = {
269                        "fsutil","hardlink","create", null, null};
270    static String[] hardLinkMultPrefix = {
271                        "cmd","/q","/c","for", "%f", "in", "("};
272    static String   hardLinkMultDir = "\\%f";
273    static String[] hardLinkMultSuffix = {
274                        ")", "do", "fsutil", "hardlink", "create", null, 
275                        "%f", "1>NUL"};
276    static String[] getLinkCountCommand = {"stat","-c%h", null};
277    //Windows guarantees only 8K - 1 bytes cmd length.
278    //Subtract another 64b to allow for Java 'exec' overhead
279    static final int maxAllowedCmdArgLength = 8*1024 - 65;
280
281    /*
282     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
283     */
284    @Override
285    String[] linkOne(File file, File linkName) 
286    throws IOException {
287      String[] buf = new String[hardLinkCommand.length];
288      System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
289      //windows wants argument order: "create <new> <existing>"
290      buf[4] = file.getCanonicalPath(); 
291      buf[3] = linkName.getCanonicalPath();
292      return buf;
293    }
294    
295    /*
296     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
297     */
298    @Override
299    String[] linkMult(String[] fileBaseNames, File linkDir) 
300    throws IOException {
301      String[] buf = new String[fileBaseNames.length 
302                                + hardLinkMultPrefix.length 
303                                + hardLinkMultSuffix.length];
304      String td = linkDir.getCanonicalPath() + hardLinkMultDir;
305      int mark=0;
306      System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
307                       hardLinkMultPrefix.length);
308      mark += hardLinkMultPrefix.length;
309      System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
310      mark += fileBaseNames.length;
311      System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 
312                       hardLinkMultSuffix.length);
313      mark += hardLinkMultSuffix.length;
314      buf[mark - 3] = td;
315      return buf;
316    }
317    
318    /*
319     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
320     */
321    @Override
322    String[] linkCount(File file) 
323    throws IOException {
324      String[] buf = new String[getLinkCountCommand.length];
325      System.arraycopy(getLinkCountCommand, 0, buf, 0, 
326                       getLinkCountCommand.length);
327      //The linkCount command is actually a Cygwin shell command,
328      //not a Windows shell command, so we should use "makeShellPath()"
329      //instead of "getCanonicalPath()".  However, that causes another
330      //shell exec to "cygpath.exe", and "stat.exe" actually can handle
331      //DOS-style paths (it just prints a couple hundred bytes of warning
332      //to stderr), so we use the more efficient "getCanonicalPath()".
333      buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
334      return buf;
335    }
336    
337    /*
338     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
339     */
340    @Override
341    int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
342    throws IOException {
343      int sum = 0;
344      for (String x : fileBaseNames) {
345        // add 1 to account for terminal null or delimiter space
346        sum += 1 + ((x == null) ? 0 : x.length());
347      }
348      sum += 2 + fileDir.getCanonicalPath().length() +
349               linkDir.getCanonicalPath().length();
350      //add the fixed overhead of the hardLinkMult command 
351      //(prefix, suffix, and Dir suffix)
352      sum += ("cmd.exe /q /c for %f in ( ) do "
353              + "fsutil hardlink create \\%f %f 1>NUL ").length();
354      return sum;
355    }
356    
357    /*
358     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
359     */
360    @Override
361    int getMaxAllowedCmdArgLength() {
362      return maxAllowedCmdArgLength;
363    }
364  }
365  
366  
367  /**
368   * Calculate the nominal length of all contributors to the total 
369   * commandstring length, including fixed overhead of the OS-dependent 
370   * command.  It's protected rather than private, to assist unit testing,
371   * but real clients are not expected to need it -- see the way 
372   * createHardLinkMult() uses it internally so the user doesn't need to worry
373   * about it.
374   * 
375   * @param fileDir - source directory, parent of fileBaseNames
376   * @param fileBaseNames - array of path-less file names, relative
377   *            to the source directory
378   * @param linkDir - target directory where the hardlinks will be put
379   * @return - total data length (must not exceed maxAllowedCmdArgLength)
380   * @throws IOException
381   */
382  protected static int getLinkMultArgLength(
383          File fileDir, String[] fileBaseNames, File linkDir) 
384  throws IOException {
385    return getHardLinkCommand.getLinkMultArgLength(fileDir, 
386          fileBaseNames, linkDir);
387  }
388  
389  /**
390   * Return this private value for use by unit tests.
391   * Shell commands are not allowed to have a total string length
392   * exceeding this size.
393   */
394  protected static int getMaxAllowedCmdArgLength() {
395    return getHardLinkCommand.getMaxAllowedCmdArgLength();
396  }
397  
398  /*
399   * ****************************************************
400   * Complexity is above.  User-visible functionality is below
401   * ****************************************************
402   */
403
404  /**
405   * Creates a hardlink 
406   * @param file - existing source file
407   * @param linkName - desired target link file
408   */
409  public static void createHardLink(File file, File linkName) 
410  throws IOException {
411    if (file == null) {
412      throw new IOException(
413          "invalid arguments to createHardLink: source file is null");
414    }
415    if (linkName == null) {
416      throw new IOException(
417          "invalid arguments to createHardLink: link name is null");
418    }
419          // construct and execute shell command
420    String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
421    Process process = Runtime.getRuntime().exec(hardLinkCommand);
422    try {
423      if (process.waitFor() != 0) {
424        String errMsg = new BufferedReader(new InputStreamReader(
425            process.getInputStream())).readLine();
426        if (errMsg == null)  errMsg = "";
427        String inpMsg = new BufferedReader(new InputStreamReader(
428            process.getErrorStream())).readLine();
429        if (inpMsg == null)  inpMsg = "";
430        throw new IOException(errMsg + inpMsg);
431      }
432    } catch (InterruptedException e) {
433      throw new IOException(e);
434    } finally {
435      process.destroy();
436    }
437  }
438
439  /**
440   * Creates hardlinks from multiple existing files within one parent
441   * directory, into one target directory.
442   * @param parentDir - directory containing source files
443   * @param fileBaseNames - list of path-less file names, as returned by 
444   *                        parentDir.list()
445   * @param linkDir - where the hardlinks should be put.  It must already exist.
446   * 
447   * If the list of files is too long (overflows maxAllowedCmdArgLength),
448   * we will automatically split it into multiple invocations of the
449   * underlying method.
450   */
451  public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 
452      File linkDir) throws IOException {
453    //This is the public method all non-test clients are expected to use.
454    //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
455    createHardLinkMult(parentDir, fileBaseNames, linkDir, 
456                       getHardLinkCommand.getMaxAllowedCmdArgLength());
457  }
458
459  /*
460   * Implements {@link createHardLinkMult} with added variable  "maxLength",
461   * to ease unit testing of the auto-splitting feature for long lists.
462   * Likewise why it returns "callCount", the number of sub-arrays that
463   * the file list had to be split into.
464   * Non-test clients are expected to call the public method instead.
465   */
466  protected static int createHardLinkMult(File parentDir, 
467      String[] fileBaseNames, File linkDir, int maxLength) 
468  throws IOException {
469    if (parentDir == null) {
470      throw new IOException(
471          "invalid arguments to createHardLinkMult: parent directory is null");
472    }
473    if (linkDir == null) {
474      throw new IOException(
475          "invalid arguments to createHardLinkMult: link directory is null");
476    }
477    if (fileBaseNames == null) {
478      throw new IOException(
479          "invalid arguments to createHardLinkMult: "
480          + "filename list can be empty but not null");
481    }
482    if (fileBaseNames.length == 0) {
483      //the OS cmds can't handle empty list of filenames, 
484      //but it's legal, so just return.
485      return 0; 
486    }
487    if (!linkDir.exists()) {
488      throw new FileNotFoundException(linkDir + " not found.");
489    }
490
491    //if the list is too long, split into multiple invocations
492    int callCount = 0;
493    if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
494          && fileBaseNames.length > 1) {
495      String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
496      callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
497      String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
498          fileBaseNames.length);
499      callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);  
500      return callCount;
501    } else {
502      callCount = 1;
503    }
504    
505    // construct and execute shell command
506    String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 
507        linkDir);
508    Process process = Runtime.getRuntime().exec(hardLinkCommand, null, 
509        parentDir);
510    try {
511      if (process.waitFor() != 0) {
512        String errMsg = new BufferedReader(new InputStreamReader(
513            process.getInputStream())).readLine();
514        if (errMsg == null)  errMsg = "";
515        String inpMsg = new BufferedReader(new InputStreamReader(
516            process.getErrorStream())).readLine();
517        if (inpMsg == null)  inpMsg = "";
518        throw new IOException(errMsg + inpMsg);
519      }
520    } catch (InterruptedException e) {
521      throw new IOException(e);
522    } finally {
523      process.destroy();
524    }
525    return callCount;
526  }
527
528   /**
529   * Retrieves the number of links to the specified file.
530   */
531  public static int getLinkCount(File fileName) throws IOException {
532    if (fileName == null) {
533      throw new IOException(
534          "invalid argument to getLinkCount: file name is null");
535    }
536    if (!fileName.exists()) {
537      throw new FileNotFoundException(fileName + " not found.");
538    }
539
540    // construct and execute shell command
541    String[] cmd = getHardLinkCommand.linkCount(fileName);
542    String inpMsg = null;
543    String errMsg = null;
544    int exitValue = -1;
545    BufferedReader in = null;
546    BufferedReader err = null;
547
548    Process process = Runtime.getRuntime().exec(cmd);
549    try {
550      exitValue = process.waitFor();
551      in = new BufferedReader(new InputStreamReader(
552                                  process.getInputStream()));
553      inpMsg = in.readLine();
554      err = new BufferedReader(new InputStreamReader(
555                                   process.getErrorStream()));
556      errMsg = err.readLine();
557      if (inpMsg == null || exitValue != 0) {
558        throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
559      }
560      if (osType == OSType.OS_TYPE_SOLARIS) {
561        String[] result = inpMsg.split("\\s+");
562        return Integer.parseInt(result[1]);
563      } else {
564        return Integer.parseInt(inpMsg);
565      }
566    } catch (NumberFormatException e) {
567      throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
568    } catch (InterruptedException e) {
569      throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
570    } finally {
571      process.destroy();
572      if (in != null) in.close();
573      if (err != null) err.close();
574    }
575  }
576  
577  /* Create an IOException for failing to get link count. */
578  private static IOException createIOException(File f, String message,
579      String error, int exitvalue, Exception cause) {
580    
581    final String winErrMsg = "; Windows errors in getLinkCount are often due "
582         + "to Cygwin misconfiguration";
583
584    final String s = "Failed to get link count on file " + f
585        + ": message=" + message
586        + "; error=" + error
587        + ((osType == OSType.OS_TYPE_WINXP) ? winErrMsg : "")
588        + "; exit value=" + exitvalue;
589    return (cause == null) ? new IOException(s) : new IOException(s, cause);
590  }
591  
592  
593  /**
594   * HardLink statistics counters and methods.
595   * Not multi-thread safe, obviously.
596   * Init is called during HardLink instantiation, above.
597   * 
598   * These are intended for use by knowledgeable clients, not internally, 
599   * because many of the internal methods are static and can't update these
600   * per-instance counters.
601   */
602  public static class LinkStats {
603    public int countDirs = 0; 
604    public int countSingleLinks = 0; 
605    public int countMultLinks = 0; 
606    public int countFilesMultLinks = 0; 
607    public int countEmptyDirs = 0; 
608    public int countPhysicalFileCopies = 0;
609  
610    public void clear() {
611      countDirs = 0; 
612      countSingleLinks = 0; 
613      countMultLinks = 0; 
614      countFilesMultLinks = 0; 
615      countEmptyDirs = 0; 
616      countPhysicalFileCopies = 0;
617    }
618    
619    public String report() {
620      return "HardLinkStats: " + countDirs + " Directories, including " 
621      + countEmptyDirs + " Empty Directories, " 
622      + countSingleLinks 
623      + " single Link operations, " + countMultLinks 
624      + " multi-Link operations, linking " + countFilesMultLinks 
625      + " files, total " + (countSingleLinks + countFilesMultLinks) 
626      + " linkable files.  Also physically copied " 
627      + countPhysicalFileCopies + " other files.";
628    }
629  }
630}
631