001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.fs;
020    
021    import java.io.BufferedReader;
022    import java.io.File;
023    import java.io.FileNotFoundException;
024    import java.io.IOException;
025    import java.io.StringReader;
026    import java.util.Arrays;
027    
028    import org.apache.hadoop.io.IOUtils;
029    import org.apache.hadoop.util.Shell;
030    import org.apache.hadoop.util.Shell.ExitCodeException;
031    import org.apache.hadoop.util.Shell.ShellCommandExecutor;
032    
033    /**
034     * Class for creating hardlinks.
035     * Supports Unix/Linux, Windows via winutils , and Mac OS X.
036     * 
037     * The HardLink class was formerly a static inner class of FSUtil,
038     * and the methods provided were blatantly non-thread-safe.
039     * To enable volume-parallel Update snapshots, we now provide static 
040     * threadsafe methods that allocate new buffer string arrays
041     * upon each call.  We also provide an API to hardlink all files in a
042     * directory with a single command, which is up to 128 times more 
043     * efficient - and minimizes the impact of the extra buffer creations.
044     */
045    public class HardLink { 
046    
047      private static HardLinkCommandGetter getHardLinkCommand;
048      
049      public final LinkStats linkStats; //not static
050      
051      //initialize the command "getters" statically, so can use their 
052      //methods without instantiating the HardLink object
053      static { 
054        if (Shell.WINDOWS) {
055          // Windows
056          getHardLinkCommand = new HardLinkCGWin();
057        } else {
058          // Unix or Linux
059          getHardLinkCommand = new HardLinkCGUnix();
060          //override getLinkCountCommand for the particular Unix variant
061          //Linux is already set as the default - {"stat","-c%h", null}
062          if (Shell.MAC || Shell.FREEBSD) {
063            String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null};
064            HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
065          } else if (Shell.SOLARIS) {
066            String[] linkCountCmdTemplate = {"ls","-l", null};
067            HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);        
068          }
069        }
070      }
071    
072      public HardLink() {
073        linkStats = new LinkStats();
074      }
075      
076      /**
077       * This abstract class bridges the OS-dependent implementations of the 
078       * needed functionality for creating hardlinks and querying link counts.
079       * The particular implementation class is chosen during 
080       * static initialization phase of the HardLink class.
081       * The "getter" methods construct shell command strings for various purposes.
082       */
083      private static abstract class HardLinkCommandGetter {
084    
085        /**
086         * Get the command string needed to hardlink a bunch of files from
087         * a single source directory into a target directory.  The source directory
088         * is not specified here, but the command will be executed using the source
089         * directory as the "current working directory" of the shell invocation.
090         * 
091         * @param fileBaseNames - array of path-less file names, relative
092         *            to the source directory
093         * @param linkDir - target directory where the hardlinks will be put
094         * @return - an array of Strings suitable for use as a single shell command
095         * @throws IOException - if any of the file or path names misbehave
096         */
097        abstract String[] linkMult(String[] fileBaseNames, File linkDir) 
098                              throws IOException;
099        
100        /**
101         * Get the command string needed to hardlink a single file
102         */
103        abstract String[] linkOne(File file, File linkName) throws IOException;
104        
105        /**
106         * Get the command string to query the hardlink count of a file
107         */
108        abstract String[] linkCount(File file) throws IOException;
109        
110        /**
111         * Calculate the total string length of the shell command
112         * resulting from execution of linkMult, plus the length of the
113         * source directory name (which will also be provided to the shell)
114         * 
115         * @param fileDir - source directory, parent of fileBaseNames
116         * @param fileBaseNames - array of path-less file names, relative
117         *            to the source directory
118         * @param linkDir - target directory where the hardlinks will be put
119         * @return - total data length (must not exceed maxAllowedCmdArgLength)
120         * @throws IOException
121         */
122        abstract int getLinkMultArgLength(
123                         File fileDir, String[] fileBaseNames, File linkDir) 
124                         throws IOException;
125        
126        /**
127         * Get the maximum allowed string length of a shell command on this OS,
128         * which is just the documented minimum guaranteed supported command
129         * length - aprx. 32KB for Unix, and 8KB for Windows.
130         */
131        abstract int getMaxAllowedCmdArgLength(); 
132      }
133      
134      /**
135       * Implementation of HardLinkCommandGetter class for Unix
136       */
137      static class HardLinkCGUnix extends HardLinkCommandGetter {
138        private static String[] hardLinkCommand = {"ln", null, null};
139        private static String[] hardLinkMultPrefix = {"ln"};
140        private static String[] hardLinkMultSuffix = {null};
141        private static String[] getLinkCountCommand = {"stat","-c%h", null};
142        //Unix guarantees at least 32K bytes cmd length.
143        //Subtract another 64b to allow for Java 'exec' overhead
144        private static final int maxAllowedCmdArgLength = 32*1024 - 65;
145        
146        private static synchronized 
147        void setLinkCountCmdTemplate(String[] template) {
148          //May update this for specific unix variants, 
149          //after static initialization phase
150          getLinkCountCommand = template;
151        }
152        
153        /*
154         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
155         */
156        @Override
157        String[] linkOne(File file, File linkName) 
158        throws IOException {
159          String[] buf = new String[hardLinkCommand.length];
160          System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
161          //unix wants argument order: "ln <existing> <new>"
162          buf[1] = FileUtil.makeShellPath(file, true); 
163          buf[2] = FileUtil.makeShellPath(linkName, true);
164          return buf;
165        }
166        
167        /*
168         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
169         */
170        @Override
171        String[] linkMult(String[] fileBaseNames, File linkDir) 
172        throws IOException {
173          String[] buf = new String[fileBaseNames.length 
174                                    + hardLinkMultPrefix.length 
175                                    + hardLinkMultSuffix.length];
176          int mark=0;
177          System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
178                           hardLinkMultPrefix.length);
179          mark += hardLinkMultPrefix.length;
180          System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
181          mark += fileBaseNames.length;
182          buf[mark] = FileUtil.makeShellPath(linkDir, true);
183          return buf;
184        }
185        
186        /*
187         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
188         */
189        @Override
190        String[] linkCount(File file) 
191        throws IOException {
192          String[] buf = new String[getLinkCountCommand.length];
193          System.arraycopy(getLinkCountCommand, 0, buf, 0, 
194                           getLinkCountCommand.length);
195          buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
196          return buf;
197        }
198        
199        /*
200         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
201         */
202        @Override
203        int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
204        throws IOException{
205          int sum = 0;
206          for (String x : fileBaseNames) {
207            // add 1 to account for terminal null or delimiter space
208            sum += 1 + ((x == null) ? 0 : x.length());
209          }
210          sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
211                 + FileUtil.makeShellPath(linkDir, true).length();
212          //add the fixed overhead of the hardLinkMult prefix and suffix
213          sum += 3; //length("ln") + 1
214          return sum;
215        }
216        
217        /*
218         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
219         */
220        @Override
221        int getMaxAllowedCmdArgLength() {
222          return maxAllowedCmdArgLength;
223        }
224      }
225      
226      
227      /**
228       * Implementation of HardLinkCommandGetter class for Windows
229       */
230      static class HardLinkCGWin extends HardLinkCommandGetter {
231        //The Windows command getter impl class and its member fields are
232        //package-private ("default") access instead of "private" to assist 
233        //unit testing (sort of) on non-Win servers
234    
235        static String CMD_EXE = "cmd.exe";
236        static String[] hardLinkCommand = {
237                            Shell.WINUTILS,"hardlink","create", null, null};
238        static String[] hardLinkMultPrefix = {
239            CMD_EXE, "/q", "/c", "for", "%f", "in", "("};
240        static String   hardLinkMultDir = "\\%f";
241        static String[] hardLinkMultSuffix = {
242            ")", "do", Shell.WINUTILS, "hardlink", "create", null,
243            "%f"};
244        static String[] getLinkCountCommand = {
245            Shell.WINUTILS, "hardlink", "stat", null};
246        //Windows guarantees only 8K - 1 bytes cmd length.
247        //Subtract another 64b to allow for Java 'exec' overhead
248        static final int maxAllowedCmdArgLength = 8*1024 - 65;
249    
250        /*
251         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
252         */
253        @Override
254        String[] linkOne(File file, File linkName) 
255        throws IOException {
256          String[] buf = new String[hardLinkCommand.length];
257          System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
258          //windows wants argument order: "create <new> <existing>"
259          buf[4] = file.getCanonicalPath(); 
260          buf[3] = linkName.getCanonicalPath();
261          return buf;
262        }
263        
264        /*
265         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
266         */
267        @Override
268        String[] linkMult(String[] fileBaseNames, File linkDir) 
269        throws IOException {
270          String[] buf = new String[fileBaseNames.length 
271                                    + hardLinkMultPrefix.length 
272                                    + hardLinkMultSuffix.length];
273          String td = linkDir.getCanonicalPath() + hardLinkMultDir;
274          int mark=0;
275          System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
276                           hardLinkMultPrefix.length);
277          mark += hardLinkMultPrefix.length;
278          System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
279          mark += fileBaseNames.length;
280          System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 
281                           hardLinkMultSuffix.length);
282          mark += hardLinkMultSuffix.length;
283          buf[mark - 2] = td;
284          return buf;
285        }
286        
287        /*
288         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
289         */
290        @Override
291        String[] linkCount(File file) 
292        throws IOException {
293          String[] buf = new String[getLinkCountCommand.length];
294          System.arraycopy(getLinkCountCommand, 0, buf, 0, 
295                           getLinkCountCommand.length);
296          buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
297          return buf;
298        }
299        
300        /*
301         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
302         */
303        @Override
304        int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
305        throws IOException {
306          int sum = 0;
307          for (String x : fileBaseNames) {
308            // add 1 to account for terminal null or delimiter space
309            sum += 1 + ((x == null) ? 0 : x.length());
310          }
311          sum += 2 + fileDir.getCanonicalPath().length() +
312                   linkDir.getCanonicalPath().length();
313          //add the fixed overhead of the hardLinkMult command 
314          //(prefix, suffix, and Dir suffix)
315          sum += (CMD_EXE + " /q /c for %f in ( ) do "
316                  + Shell.WINUTILS + " hardlink create \\%f %f").length();
317          return sum;
318        }
319        
320        /*
321         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
322         */
323        @Override
324        int getMaxAllowedCmdArgLength() {
325          return maxAllowedCmdArgLength;
326        }
327      }
328      
329      
330      /**
331       * Calculate the nominal length of all contributors to the total 
332       * commandstring length, including fixed overhead of the OS-dependent 
333       * command.  It's protected rather than private, to assist unit testing,
334       * but real clients are not expected to need it -- see the way 
335       * createHardLinkMult() uses it internally so the user doesn't need to worry
336       * about it.
337       * 
338       * @param fileDir - source directory, parent of fileBaseNames
339       * @param fileBaseNames - array of path-less file names, relative
340       *            to the source directory
341       * @param linkDir - target directory where the hardlinks will be put
342       * @return - total data length (must not exceed maxAllowedCmdArgLength)
343       * @throws IOException
344       */
345      protected static int getLinkMultArgLength(
346              File fileDir, String[] fileBaseNames, File linkDir) 
347      throws IOException {
348        return getHardLinkCommand.getLinkMultArgLength(fileDir, 
349              fileBaseNames, linkDir);
350      }
351      
352      /**
353       * Return this private value for use by unit tests.
354       * Shell commands are not allowed to have a total string length
355       * exceeding this size.
356       */
357      protected static int getMaxAllowedCmdArgLength() {
358        return getHardLinkCommand.getMaxAllowedCmdArgLength();
359      }
360      
361      /*
362       * ****************************************************
363       * Complexity is above.  User-visible functionality is below
364       * ****************************************************
365       */
366    
367      /**
368       * Creates a hardlink 
369       * @param file - existing source file
370       * @param linkName - desired target link file
371       */
372      public static void createHardLink(File file, File linkName) 
373      throws IOException {
374        if (file == null) {
375          throw new IOException(
376              "invalid arguments to createHardLink: source file is null");
377        }
378        if (linkName == null) {
379          throw new IOException(
380              "invalid arguments to createHardLink: link name is null");
381        }
382              // construct and execute shell command
383        String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
384        ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand);
385        try {
386          shexec.execute();
387        } catch (ExitCodeException e) {
388          throw new IOException("Failed to execute command " +
389              Arrays.toString(hardLinkCommand) +
390              "; command output: \"" + shexec.getOutput() + "\"" +
391              "; WrappedException: \"" + e.getMessage() + "\"");
392        }
393      }
394    
395      /**
396       * Creates hardlinks from multiple existing files within one parent
397       * directory, into one target directory.
398       * @param parentDir - directory containing source files
399       * @param fileBaseNames - list of path-less file names, as returned by 
400       *                        parentDir.list()
401       * @param linkDir - where the hardlinks should be put.  It must already exist.
402       * 
403       * If the list of files is too long (overflows maxAllowedCmdArgLength),
404       * we will automatically split it into multiple invocations of the
405       * underlying method.
406       */
407      public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 
408          File linkDir) throws IOException {
409        //This is the public method all non-test clients are expected to use.
410        //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
411        createHardLinkMult(parentDir, fileBaseNames, linkDir, 
412                           getHardLinkCommand.getMaxAllowedCmdArgLength());
413      }
414    
415      /*
416       * Implements {@link createHardLinkMult} with added variable  "maxLength",
417       * to ease unit testing of the auto-splitting feature for long lists.
418       * Likewise why it returns "callCount", the number of sub-arrays that
419       * the file list had to be split into.
420       * Non-test clients are expected to call the public method instead.
421       */
422      protected static int createHardLinkMult(File parentDir, 
423          String[] fileBaseNames, File linkDir, int maxLength) 
424      throws IOException {
425        if (parentDir == null) {
426          throw new IOException(
427              "invalid arguments to createHardLinkMult: parent directory is null");
428        }
429        if (linkDir == null) {
430          throw new IOException(
431              "invalid arguments to createHardLinkMult: link directory is null");
432        }
433        if (fileBaseNames == null) {
434          throw new IOException(
435              "invalid arguments to createHardLinkMult: "
436              + "filename list can be empty but not null");
437        }
438        if (fileBaseNames.length == 0) {
439          //the OS cmds can't handle empty list of filenames, 
440          //but it's legal, so just return.
441          return 0; 
442        }
443        if (!linkDir.exists()) {
444          throw new FileNotFoundException(linkDir + " not found.");
445        }
446    
447        //if the list is too long, split into multiple invocations
448        int callCount = 0;
449        if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
450              && fileBaseNames.length > 1) {
451          String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
452          callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
453          String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
454              fileBaseNames.length);
455          callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);  
456          return callCount;
457        } else {
458          callCount = 1;
459        }
460        
461        // construct and execute shell command
462        String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 
463            linkDir);
464        ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand,
465          parentDir, null, 0L);
466        try {
467          shexec.execute();
468        } catch (ExitCodeException e) {
469          throw new IOException(shexec.getOutput() + e.getMessage());
470        }
471        return callCount;
472      }
473    
474       /**
475       * Retrieves the number of links to the specified file.
476       */
477      public static int getLinkCount(File fileName) throws IOException {
478        if (fileName == null) {
479          throw new IOException(
480              "invalid argument to getLinkCount: file name is null");
481        }
482        if (!fileName.exists()) {
483          throw new FileNotFoundException(fileName + " not found.");
484        }
485    
486        // construct and execute shell command
487        String[] cmd = getHardLinkCommand.linkCount(fileName);
488        String inpMsg = null;
489        String errMsg = null;
490        int exitValue = -1;
491        BufferedReader in = null;
492    
493        ShellCommandExecutor shexec = new ShellCommandExecutor(cmd);
494        try {
495          shexec.execute();
496          in = new BufferedReader(new StringReader(shexec.getOutput()));
497          inpMsg = in.readLine();
498          exitValue = shexec.getExitCode();
499          if (inpMsg == null || exitValue != 0) {
500            throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
501          }
502          if (Shell.SOLARIS) {
503            String[] result = inpMsg.split("\\s+");
504            return Integer.parseInt(result[1]);
505          } else {
506            return Integer.parseInt(inpMsg);
507          }
508        } catch (ExitCodeException e) {
509          inpMsg = shexec.getOutput();
510          errMsg = e.getMessage();
511          exitValue = e.getExitCode();
512          throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
513        } catch (NumberFormatException e) {
514          throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
515        } finally {
516          IOUtils.closeStream(in);
517        }
518      }
519      
520      /* Create an IOException for failing to get link count. */
521      private static IOException createIOException(File f, String message,
522          String error, int exitvalue, Exception cause) {
523    
524        final String s = "Failed to get link count on file " + f
525            + ": message=" + message
526            + "; error=" + error
527            + "; exit value=" + exitvalue;
528        return (cause == null) ? new IOException(s) : new IOException(s, cause);
529      }
530      
531      
532      /**
533       * HardLink statistics counters and methods.
534       * Not multi-thread safe, obviously.
535       * Init is called during HardLink instantiation, above.
536       * 
537       * These are intended for use by knowledgeable clients, not internally, 
538       * because many of the internal methods are static and can't update these
539       * per-instance counters.
540       */
541      public static class LinkStats {
542        public int countDirs = 0; 
543        public int countSingleLinks = 0; 
544        public int countMultLinks = 0; 
545        public int countFilesMultLinks = 0; 
546        public int countEmptyDirs = 0; 
547        public int countPhysicalFileCopies = 0;
548      
549        public void clear() {
550          countDirs = 0; 
551          countSingleLinks = 0; 
552          countMultLinks = 0; 
553          countFilesMultLinks = 0; 
554          countEmptyDirs = 0; 
555          countPhysicalFileCopies = 0;
556        }
557        
558        public String report() {
559          return "HardLinkStats: " + countDirs + " Directories, including " 
560          + countEmptyDirs + " Empty Directories, " 
561          + countSingleLinks 
562          + " single Link operations, " + countMultLinks 
563          + " multi-Link operations, linking " + countFilesMultLinks 
564          + " files, total " + (countSingleLinks + countFilesMultLinks) 
565          + " linkable files.  Also physically copied " 
566          + countPhysicalFileCopies + " other files.";
567        }
568      }
569    }
570