001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs;
020
021import java.io.BufferedReader;
022import java.io.File;
023import java.io.FileNotFoundException;
024import java.io.IOException;
025import java.io.StringReader;
026import java.util.Arrays;
027
028import org.apache.hadoop.io.IOUtils;
029import org.apache.hadoop.util.Shell;
030import org.apache.hadoop.util.Shell.ExitCodeException;
031import org.apache.hadoop.util.Shell.ShellCommandExecutor;
032
033/**
034 * Class for creating hardlinks.
035 * Supports Unix/Linux, Windows via winutils , and Mac OS X.
036 * 
037 * The HardLink class was formerly a static inner class of FSUtil,
038 * and the methods provided were blatantly non-thread-safe.
039 * To enable volume-parallel Update snapshots, we now provide static 
040 * threadsafe methods that allocate new buffer string arrays
041 * upon each call.  We also provide an API to hardlink all files in a
042 * directory with a single command, which is up to 128 times more 
043 * efficient - and minimizes the impact of the extra buffer creations.
044 */
045public class HardLink { 
046
047  private static HardLinkCommandGetter getHardLinkCommand;
048  
049  public final LinkStats linkStats; //not static
050  
051  //initialize the command "getters" statically, so can use their 
052  //methods without instantiating the HardLink object
053  static { 
054    if (Shell.WINDOWS) {
055      // Windows
056      getHardLinkCommand = new HardLinkCGWin();
057    } else {
058      // Unix or Linux
059      getHardLinkCommand = new HardLinkCGUnix();
060      //override getLinkCountCommand for the particular Unix variant
061      //Linux is already set as the default - {"stat","-c%h", null}
062      if (Shell.MAC || Shell.FREEBSD) {
063        String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null};
064        HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
065      } else if (Shell.SOLARIS) {
066        String[] linkCountCmdTemplate = {"ls","-l", null};
067        HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);        
068      }
069    }
070  }
071
072  public HardLink() {
073    linkStats = new LinkStats();
074  }
075  
076  /**
077   * This abstract class bridges the OS-dependent implementations of the 
078   * needed functionality for creating hardlinks and querying link counts.
079   * The particular implementation class is chosen during 
080   * static initialization phase of the HardLink class.
081   * The "getter" methods construct shell command strings for various purposes.
082   */
083  private static abstract class HardLinkCommandGetter {
084
085    /**
086     * Get the command string needed to hardlink a bunch of files from
087     * a single source directory into a target directory.  The source directory
088     * is not specified here, but the command will be executed using the source
089     * directory as the "current working directory" of the shell invocation.
090     * 
091     * @param fileBaseNames - array of path-less file names, relative
092     *            to the source directory
093     * @param linkDir - target directory where the hardlinks will be put
094     * @return - an array of Strings suitable for use as a single shell command
095     * @throws IOException - if any of the file or path names misbehave
096     */
097    abstract String[] linkMult(String[] fileBaseNames, File linkDir) 
098                          throws IOException;
099    
100    /**
101     * Get the command string needed to hardlink a single file
102     */
103    abstract String[] linkOne(File file, File linkName) throws IOException;
104    
105    /**
106     * Get the command string to query the hardlink count of a file
107     */
108    abstract String[] linkCount(File file) throws IOException;
109    
110    /**
111     * Calculate the total string length of the shell command
112     * resulting from execution of linkMult, plus the length of the
113     * source directory name (which will also be provided to the shell)
114     * 
115     * @param fileDir - source directory, parent of fileBaseNames
116     * @param fileBaseNames - array of path-less file names, relative
117     *            to the source directory
118     * @param linkDir - target directory where the hardlinks will be put
119     * @return - total data length (must not exceed maxAllowedCmdArgLength)
120     * @throws IOException
121     */
122    abstract int getLinkMultArgLength(
123                     File fileDir, String[] fileBaseNames, File linkDir) 
124                     throws IOException;
125    
126    /**
127     * Get the maximum allowed string length of a shell command on this OS,
128     * which is just the documented minimum guaranteed supported command
129     * length - aprx. 32KB for Unix, and 8KB for Windows.
130     */
131    abstract int getMaxAllowedCmdArgLength(); 
132  }
133  
134  /**
135   * Implementation of HardLinkCommandGetter class for Unix
136   */
137  static class HardLinkCGUnix extends HardLinkCommandGetter {
138    private static String[] hardLinkCommand = {"ln", null, null};
139    private static String[] hardLinkMultPrefix = {"ln"};
140    private static String[] hardLinkMultSuffix = {null};
141    private static String[] getLinkCountCommand = {"stat","-c%h", null};
142    //Unix guarantees at least 32K bytes cmd length.
143    //Subtract another 64b to allow for Java 'exec' overhead
144    private static final int maxAllowedCmdArgLength = 32*1024 - 65;
145    
146    private static synchronized 
147    void setLinkCountCmdTemplate(String[] template) {
148      //May update this for specific unix variants, 
149      //after static initialization phase
150      getLinkCountCommand = template;
151    }
152    
153    /*
154     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
155     */
156    @Override
157    String[] linkOne(File file, File linkName) 
158    throws IOException {
159      String[] buf = new String[hardLinkCommand.length];
160      System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
161      //unix wants argument order: "ln <existing> <new>"
162      buf[1] = FileUtil.makeShellPath(file, true); 
163      buf[2] = FileUtil.makeShellPath(linkName, true);
164      return buf;
165    }
166    
167    /*
168     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
169     */
170    @Override
171    String[] linkMult(String[] fileBaseNames, File linkDir) 
172    throws IOException {
173      String[] buf = new String[fileBaseNames.length 
174                                + hardLinkMultPrefix.length 
175                                + hardLinkMultSuffix.length];
176      int mark=0;
177      System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
178                       hardLinkMultPrefix.length);
179      mark += hardLinkMultPrefix.length;
180      System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
181      mark += fileBaseNames.length;
182      buf[mark] = FileUtil.makeShellPath(linkDir, true);
183      return buf;
184    }
185    
186    /*
187     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
188     */
189    @Override
190    String[] linkCount(File file) 
191    throws IOException {
192      String[] buf = new String[getLinkCountCommand.length];
193      System.arraycopy(getLinkCountCommand, 0, buf, 0, 
194                       getLinkCountCommand.length);
195      buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
196      return buf;
197    }
198    
199    /*
200     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
201     */
202    @Override
203    int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
204    throws IOException{
205      int sum = 0;
206      for (String x : fileBaseNames) {
207        // add 1 to account for terminal null or delimiter space
208        sum += 1 + ((x == null) ? 0 : x.length());
209      }
210      sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
211             + FileUtil.makeShellPath(linkDir, true).length();
212      //add the fixed overhead of the hardLinkMult prefix and suffix
213      sum += 3; //length("ln") + 1
214      return sum;
215    }
216    
217    /*
218     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
219     */
220    @Override
221    int getMaxAllowedCmdArgLength() {
222      return maxAllowedCmdArgLength;
223    }
224  }
225  
226  
227  /**
228   * Implementation of HardLinkCommandGetter class for Windows
229   */
230  static class HardLinkCGWin extends HardLinkCommandGetter {
231    //The Windows command getter impl class and its member fields are
232    //package-private ("default") access instead of "private" to assist 
233    //unit testing (sort of) on non-Win servers
234
235    static String CMD_EXE = "cmd.exe";
236    static String[] hardLinkCommand = {
237                        Shell.WINUTILS,"hardlink","create", null, null};
238    static String[] hardLinkMultPrefix = {
239        CMD_EXE, "/q", "/c", "for", "%f", "in", "("};
240    static String   hardLinkMultDir = "\\%f";
241    static String[] hardLinkMultSuffix = {
242        ")", "do", Shell.WINUTILS, "hardlink", "create", null,
243        "%f"};
244    static String[] getLinkCountCommand = {
245        Shell.WINUTILS, "hardlink", "stat", null};
246    //Windows guarantees only 8K - 1 bytes cmd length.
247    //Subtract another 64b to allow for Java 'exec' overhead
248    static final int maxAllowedCmdArgLength = 8*1024 - 65;
249
250    /*
251     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
252     */
253    @Override
254    String[] linkOne(File file, File linkName) 
255    throws IOException {
256      String[] buf = new String[hardLinkCommand.length];
257      System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
258      //windows wants argument order: "create <new> <existing>"
259      buf[4] = file.getCanonicalPath(); 
260      buf[3] = linkName.getCanonicalPath();
261      return buf;
262    }
263    
264    /*
265     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
266     */
267    @Override
268    String[] linkMult(String[] fileBaseNames, File linkDir) 
269    throws IOException {
270      String[] buf = new String[fileBaseNames.length 
271                                + hardLinkMultPrefix.length 
272                                + hardLinkMultSuffix.length];
273      String td = linkDir.getCanonicalPath() + hardLinkMultDir;
274      int mark=0;
275      System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
276                       hardLinkMultPrefix.length);
277      mark += hardLinkMultPrefix.length;
278      System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
279      mark += fileBaseNames.length;
280      System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 
281                       hardLinkMultSuffix.length);
282      mark += hardLinkMultSuffix.length;
283      buf[mark - 2] = td;
284      return buf;
285    }
286    
287    /*
288     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
289     */
290    @Override
291    String[] linkCount(File file) 
292    throws IOException {
293      String[] buf = new String[getLinkCountCommand.length];
294      System.arraycopy(getLinkCountCommand, 0, buf, 0, 
295                       getLinkCountCommand.length);
296      buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
297      return buf;
298    }
299    
300    /*
301     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
302     */
303    @Override
304    int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
305    throws IOException {
306      int sum = 0;
307      for (String x : fileBaseNames) {
308        // add 1 to account for terminal null or delimiter space
309        sum += 1 + ((x == null) ? 0 : x.length());
310      }
311      sum += 2 + fileDir.getCanonicalPath().length() +
312               linkDir.getCanonicalPath().length();
313      //add the fixed overhead of the hardLinkMult command 
314      //(prefix, suffix, and Dir suffix)
315      sum += (CMD_EXE + " /q /c for %f in ( ) do "
316              + Shell.WINUTILS + " hardlink create \\%f %f").length();
317      return sum;
318    }
319    
320    /*
321     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
322     */
323    @Override
324    int getMaxAllowedCmdArgLength() {
325      return maxAllowedCmdArgLength;
326    }
327  }
328  
329  
330  /**
331   * Calculate the nominal length of all contributors to the total 
332   * commandstring length, including fixed overhead of the OS-dependent 
333   * command.  It's protected rather than private, to assist unit testing,
334   * but real clients are not expected to need it -- see the way 
335   * createHardLinkMult() uses it internally so the user doesn't need to worry
336   * about it.
337   * 
338   * @param fileDir - source directory, parent of fileBaseNames
339   * @param fileBaseNames - array of path-less file names, relative
340   *            to the source directory
341   * @param linkDir - target directory where the hardlinks will be put
342   * @return - total data length (must not exceed maxAllowedCmdArgLength)
343   * @throws IOException
344   */
345  protected static int getLinkMultArgLength(
346          File fileDir, String[] fileBaseNames, File linkDir) 
347  throws IOException {
348    return getHardLinkCommand.getLinkMultArgLength(fileDir, 
349          fileBaseNames, linkDir);
350  }
351  
352  /**
353   * Return this private value for use by unit tests.
354   * Shell commands are not allowed to have a total string length
355   * exceeding this size.
356   */
357  protected static int getMaxAllowedCmdArgLength() {
358    return getHardLinkCommand.getMaxAllowedCmdArgLength();
359  }
360  
361  /*
362   * ****************************************************
363   * Complexity is above.  User-visible functionality is below
364   * ****************************************************
365   */
366
367  /**
368   * Creates a hardlink 
369   * @param file - existing source file
370   * @param linkName - desired target link file
371   */
372  public static void createHardLink(File file, File linkName) 
373  throws IOException {
374    if (file == null) {
375      throw new IOException(
376          "invalid arguments to createHardLink: source file is null");
377    }
378    if (linkName == null) {
379      throw new IOException(
380          "invalid arguments to createHardLink: link name is null");
381    }
382          // construct and execute shell command
383    String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
384    ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand);
385    try {
386      shexec.execute();
387    } catch (ExitCodeException e) {
388      throw new IOException("Failed to execute command " +
389          Arrays.toString(hardLinkCommand) +
390          "; command output: \"" + shexec.getOutput() + "\"" +
391          "; WrappedException: \"" + e.getMessage() + "\"");
392    }
393  }
394
395  /**
396   * Creates hardlinks from multiple existing files within one parent
397   * directory, into one target directory.
398   * @param parentDir - directory containing source files
399   * @param fileBaseNames - list of path-less file names, as returned by 
400   *                        parentDir.list()
401   * @param linkDir - where the hardlinks should be put.  It must already exist.
402   * 
403   * If the list of files is too long (overflows maxAllowedCmdArgLength),
404   * we will automatically split it into multiple invocations of the
405   * underlying method.
406   */
407  public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 
408      File linkDir) throws IOException {
409    //This is the public method all non-test clients are expected to use.
410    //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
411    createHardLinkMult(parentDir, fileBaseNames, linkDir, 
412                       getHardLinkCommand.getMaxAllowedCmdArgLength());
413  }
414
415  /*
416   * Implements {@link createHardLinkMult} with added variable  "maxLength",
417   * to ease unit testing of the auto-splitting feature for long lists.
418   * Likewise why it returns "callCount", the number of sub-arrays that
419   * the file list had to be split into.
420   * Non-test clients are expected to call the public method instead.
421   */
422  protected static int createHardLinkMult(File parentDir, 
423      String[] fileBaseNames, File linkDir, int maxLength) 
424  throws IOException {
425    if (parentDir == null) {
426      throw new IOException(
427          "invalid arguments to createHardLinkMult: parent directory is null");
428    }
429    if (linkDir == null) {
430      throw new IOException(
431          "invalid arguments to createHardLinkMult: link directory is null");
432    }
433    if (fileBaseNames == null) {
434      throw new IOException(
435          "invalid arguments to createHardLinkMult: "
436          + "filename list can be empty but not null");
437    }
438    if (fileBaseNames.length == 0) {
439      //the OS cmds can't handle empty list of filenames, 
440      //but it's legal, so just return.
441      return 0; 
442    }
443    if (!linkDir.exists()) {
444      throw new FileNotFoundException(linkDir + " not found.");
445    }
446
447    //if the list is too long, split into multiple invocations
448    int callCount = 0;
449    if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
450          && fileBaseNames.length > 1) {
451      String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
452      callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
453      String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
454          fileBaseNames.length);
455      callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);  
456      return callCount;
457    } else {
458      callCount = 1;
459    }
460    
461    // construct and execute shell command
462    String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 
463        linkDir);
464    ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand,
465      parentDir, null, 0L);
466    try {
467      shexec.execute();
468    } catch (ExitCodeException e) {
469      throw new IOException(shexec.getOutput() + e.getMessage());
470    }
471    return callCount;
472  }
473
474   /**
475   * Retrieves the number of links to the specified file.
476   */
477  public static int getLinkCount(File fileName) throws IOException {
478    if (fileName == null) {
479      throw new IOException(
480          "invalid argument to getLinkCount: file name is null");
481    }
482    if (!fileName.exists()) {
483      throw new FileNotFoundException(fileName + " not found.");
484    }
485
486    // construct and execute shell command
487    String[] cmd = getHardLinkCommand.linkCount(fileName);
488    String inpMsg = null;
489    String errMsg = null;
490    int exitValue = -1;
491    BufferedReader in = null;
492
493    ShellCommandExecutor shexec = new ShellCommandExecutor(cmd);
494    try {
495      shexec.execute();
496      in = new BufferedReader(new StringReader(shexec.getOutput()));
497      inpMsg = in.readLine();
498      exitValue = shexec.getExitCode();
499      if (inpMsg == null || exitValue != 0) {
500        throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
501      }
502      if (Shell.SOLARIS) {
503        String[] result = inpMsg.split("\\s+");
504        return Integer.parseInt(result[1]);
505      } else {
506        return Integer.parseInt(inpMsg);
507      }
508    } catch (ExitCodeException e) {
509      inpMsg = shexec.getOutput();
510      errMsg = e.getMessage();
511      exitValue = e.getExitCode();
512      throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
513    } catch (NumberFormatException e) {
514      throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
515    } finally {
516      IOUtils.closeStream(in);
517    }
518  }
519  
520  /* Create an IOException for failing to get link count. */
521  private static IOException createIOException(File f, String message,
522      String error, int exitvalue, Exception cause) {
523
524    final String s = "Failed to get link count on file " + f
525        + ": message=" + message
526        + "; error=" + error
527        + "; exit value=" + exitvalue;
528    return (cause == null) ? new IOException(s) : new IOException(s, cause);
529  }
530  
531  
532  /**
533   * HardLink statistics counters and methods.
534   * Not multi-thread safe, obviously.
535   * Init is called during HardLink instantiation, above.
536   * 
537   * These are intended for use by knowledgeable clients, not internally, 
538   * because many of the internal methods are static and can't update these
539   * per-instance counters.
540   */
541  public static class LinkStats {
542    public int countDirs = 0; 
543    public int countSingleLinks = 0; 
544    public int countMultLinks = 0; 
545    public int countFilesMultLinks = 0; 
546    public int countEmptyDirs = 0; 
547    public int countPhysicalFileCopies = 0;
548  
549    public void clear() {
550      countDirs = 0; 
551      countSingleLinks = 0; 
552      countMultLinks = 0; 
553      countFilesMultLinks = 0; 
554      countEmptyDirs = 0; 
555      countPhysicalFileCopies = 0;
556    }
557    
558    public String report() {
559      return "HardLinkStats: " + countDirs + " Directories, including " 
560      + countEmptyDirs + " Empty Directories, " 
561      + countSingleLinks 
562      + " single Link operations, " + countMultLinks 
563      + " multi-Link operations, linking " + countFilesMultLinks 
564      + " files, total " + (countSingleLinks + countFilesMultLinks) 
565      + " linkable files.  Also physically copied " 
566      + countPhysicalFileCopies + " other files.";
567    }
568  }
569}
570