001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.fs;
020    
021    import java.io.BufferedReader;
022    import java.io.File;
023    import java.io.FileNotFoundException;
024    import java.io.IOException;
025    import java.io.InputStreamReader;
026    import java.util.Arrays;
027    
028    import org.apache.hadoop.util.Shell;
029    
030    /**
031     * Class for creating hardlinks.
032     * Supports Unix/Linux, Windows via winutils , and Mac OS X.
033     * 
034     * The HardLink class was formerly a static inner class of FSUtil,
035     * and the methods provided were blatantly non-thread-safe.
036     * To enable volume-parallel Update snapshots, we now provide static 
037     * threadsafe methods that allocate new buffer string arrays
038     * upon each call.  We also provide an API to hardlink all files in a
039     * directory with a single command, which is up to 128 times more 
040     * efficient - and minimizes the impact of the extra buffer creations.
041     */
042    public class HardLink { 
043    
044      private static HardLinkCommandGetter getHardLinkCommand;
045      
046      public final LinkStats linkStats; //not static
047      
048      //initialize the command "getters" statically, so can use their 
049      //methods without instantiating the HardLink object
050      static { 
051        if (Shell.WINDOWS) {
052          // Windows
053          getHardLinkCommand = new HardLinkCGWin();
054        } else {
055          // Unix or Linux
056          getHardLinkCommand = new HardLinkCGUnix();
057          //override getLinkCountCommand for the particular Unix variant
058          //Linux is already set as the default - {"stat","-c%h", null}
059          if (Shell.MAC || Shell.FREEBSD) {
060            String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null};
061            HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
062          } else if (Shell.SOLARIS) {
063            String[] linkCountCmdTemplate = {"ls","-l", null};
064            HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);        
065          }
066        }
067      }
068    
069      public HardLink() {
070        linkStats = new LinkStats();
071      }
072      
073      /**
074       * This abstract class bridges the OS-dependent implementations of the 
075       * needed functionality for creating hardlinks and querying link counts.
076       * The particular implementation class is chosen during 
077       * static initialization phase of the HardLink class.
078       * The "getter" methods construct shell command strings for various purposes.
079       */
080      private static abstract class HardLinkCommandGetter {
081    
082        /**
083         * Get the command string needed to hardlink a bunch of files from
084         * a single source directory into a target directory.  The source directory
085         * is not specified here, but the command will be executed using the source
086         * directory as the "current working directory" of the shell invocation.
087         * 
088         * @param fileBaseNames - array of path-less file names, relative
089         *            to the source directory
090         * @param linkDir - target directory where the hardlinks will be put
091         * @return - an array of Strings suitable for use as a single shell command
092         *            with {@link Runtime.exec()}
093         * @throws IOException - if any of the file or path names misbehave
094         */
095        abstract String[] linkMult(String[] fileBaseNames, File linkDir) 
096                              throws IOException;
097        
098        /**
099         * Get the command string needed to hardlink a single file
100         */
101        abstract String[] linkOne(File file, File linkName) throws IOException;
102        
103        /**
104         * Get the command string to query the hardlink count of a file
105         */
106        abstract String[] linkCount(File file) throws IOException;
107        
108        /**
109         * Calculate the total string length of the shell command
110         * resulting from execution of linkMult, plus the length of the
111         * source directory name (which will also be provided to the shell)
112         * 
113         * @param fileDir - source directory, parent of fileBaseNames
114         * @param fileBaseNames - array of path-less file names, relative
115         *            to the source directory
116         * @param linkDir - target directory where the hardlinks will be put
117         * @return - total data length (must not exceed maxAllowedCmdArgLength)
118         * @throws IOException
119         */
120        abstract int getLinkMultArgLength(
121                         File fileDir, String[] fileBaseNames, File linkDir) 
122                         throws IOException;
123        
124        /**
125         * Get the maximum allowed string length of a shell command on this OS,
126         * which is just the documented minimum guaranteed supported command
127         * length - aprx. 32KB for Unix, and 8KB for Windows.
128         */
129        abstract int getMaxAllowedCmdArgLength(); 
130      }
131      
132      /**
133       * Implementation of HardLinkCommandGetter class for Unix
134       */
135      static class HardLinkCGUnix extends HardLinkCommandGetter {
136        private static String[] hardLinkCommand = {"ln", null, null};
137        private static String[] hardLinkMultPrefix = {"ln"};
138        private static String[] hardLinkMultSuffix = {null};
139        private static String[] getLinkCountCommand = {"stat","-c%h", null};
140        //Unix guarantees at least 32K bytes cmd length.
141        //Subtract another 64b to allow for Java 'exec' overhead
142        private static final int maxAllowedCmdArgLength = 32*1024 - 65;
143        
144        private static synchronized 
145        void setLinkCountCmdTemplate(String[] template) {
146          //May update this for specific unix variants, 
147          //after static initialization phase
148          getLinkCountCommand = template;
149        }
150        
151        /*
152         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
153         */
154        @Override
155        String[] linkOne(File file, File linkName) 
156        throws IOException {
157          String[] buf = new String[hardLinkCommand.length];
158          System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
159          //unix wants argument order: "ln <existing> <new>"
160          buf[1] = FileUtil.makeShellPath(file, true); 
161          buf[2] = FileUtil.makeShellPath(linkName, true);
162          return buf;
163        }
164        
165        /*
166         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
167         */
168        @Override
169        String[] linkMult(String[] fileBaseNames, File linkDir) 
170        throws IOException {
171          String[] buf = new String[fileBaseNames.length 
172                                    + hardLinkMultPrefix.length 
173                                    + hardLinkMultSuffix.length];
174          int mark=0;
175          System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
176                           hardLinkMultPrefix.length);
177          mark += hardLinkMultPrefix.length;
178          System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
179          mark += fileBaseNames.length;
180          buf[mark] = FileUtil.makeShellPath(linkDir, true);
181          return buf;
182        }
183        
184        /*
185         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
186         */
187        @Override
188        String[] linkCount(File file) 
189        throws IOException {
190          String[] buf = new String[getLinkCountCommand.length];
191          System.arraycopy(getLinkCountCommand, 0, buf, 0, 
192                           getLinkCountCommand.length);
193          buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
194          return buf;
195        }
196        
197        /*
198         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
199         */
200        @Override
201        int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
202        throws IOException{
203          int sum = 0;
204          for (String x : fileBaseNames) {
205            // add 1 to account for terminal null or delimiter space
206            sum += 1 + ((x == null) ? 0 : x.length());
207          }
208          sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
209                 + FileUtil.makeShellPath(linkDir, true).length();
210          //add the fixed overhead of the hardLinkMult prefix and suffix
211          sum += 3; //length("ln") + 1
212          return sum;
213        }
214        
215        /*
216         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
217         */
218        @Override
219        int getMaxAllowedCmdArgLength() {
220          return maxAllowedCmdArgLength;
221        }
222      }
223      
224      
225      /**
226       * Implementation of HardLinkCommandGetter class for Windows
227       */
228      static class HardLinkCGWin extends HardLinkCommandGetter {
229        //The Windows command getter impl class and its member fields are
230        //package-private ("default") access instead of "private" to assist 
231        //unit testing (sort of) on non-Win servers
232    
233        static String[] hardLinkCommand = {
234                            Shell.WINUTILS,"hardlink","create", null, null};
235        static String[] hardLinkMultPrefix = {
236                            "cmd","/q","/c","for", "%f", "in", "("};
237        static String   hardLinkMultDir = "\\%f";
238        static String[] hardLinkMultSuffix = {
239                            ")", "do", Shell.WINUTILS, "hardlink", "create", null,
240                            "%f", "1>NUL"};
241        static String[] getLinkCountCommand = {
242                            Shell.WINUTILS, "hardlink",
243                            "stat", null};
244        //Windows guarantees only 8K - 1 bytes cmd length.
245        //Subtract another 64b to allow for Java 'exec' overhead
246        static final int maxAllowedCmdArgLength = 8*1024 - 65;
247    
248        /*
249         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
250         */
251        @Override
252        String[] linkOne(File file, File linkName) 
253        throws IOException {
254          String[] buf = new String[hardLinkCommand.length];
255          System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
256          //windows wants argument order: "create <new> <existing>"
257          buf[4] = file.getCanonicalPath(); 
258          buf[3] = linkName.getCanonicalPath();
259          return buf;
260        }
261        
262        /*
263         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
264         */
265        @Override
266        String[] linkMult(String[] fileBaseNames, File linkDir) 
267        throws IOException {
268          String[] buf = new String[fileBaseNames.length 
269                                    + hardLinkMultPrefix.length 
270                                    + hardLinkMultSuffix.length];
271          String td = linkDir.getCanonicalPath() + hardLinkMultDir;
272          int mark=0;
273          System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
274                           hardLinkMultPrefix.length);
275          mark += hardLinkMultPrefix.length;
276          System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
277          mark += fileBaseNames.length;
278          System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 
279                           hardLinkMultSuffix.length);
280          mark += hardLinkMultSuffix.length;
281          buf[mark - 3] = td;
282          return buf;
283        }
284        
285        /*
286         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
287         */
288        @Override
289        String[] linkCount(File file) 
290        throws IOException {
291          String[] buf = new String[getLinkCountCommand.length];
292          System.arraycopy(getLinkCountCommand, 0, buf, 0, 
293                           getLinkCountCommand.length);
294          buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
295          return buf;
296        }
297        
298        /*
299         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
300         */
301        @Override
302        int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
303        throws IOException {
304          int sum = 0;
305          for (String x : fileBaseNames) {
306            // add 1 to account for terminal null or delimiter space
307            sum += 1 + ((x == null) ? 0 : x.length());
308          }
309          sum += 2 + fileDir.getCanonicalPath().length() +
310                   linkDir.getCanonicalPath().length();
311          //add the fixed overhead of the hardLinkMult command 
312          //(prefix, suffix, and Dir suffix)
313          sum += ("cmd.exe /q /c for %f in ( ) do "
314                  + Shell.WINUTILS + " hardlink create \\%f %f 1>NUL ").length();
315          return sum;
316        }
317        
318        /*
319         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
320         */
321        @Override
322        int getMaxAllowedCmdArgLength() {
323          return maxAllowedCmdArgLength;
324        }
325      }
326      
327      
328      /**
329       * Calculate the nominal length of all contributors to the total 
330       * commandstring length, including fixed overhead of the OS-dependent 
331       * command.  It's protected rather than private, to assist unit testing,
332       * but real clients are not expected to need it -- see the way 
333       * createHardLinkMult() uses it internally so the user doesn't need to worry
334       * about it.
335       * 
336       * @param fileDir - source directory, parent of fileBaseNames
337       * @param fileBaseNames - array of path-less file names, relative
338       *            to the source directory
339       * @param linkDir - target directory where the hardlinks will be put
340       * @return - total data length (must not exceed maxAllowedCmdArgLength)
341       * @throws IOException
342       */
343      protected static int getLinkMultArgLength(
344              File fileDir, String[] fileBaseNames, File linkDir) 
345      throws IOException {
346        return getHardLinkCommand.getLinkMultArgLength(fileDir, 
347              fileBaseNames, linkDir);
348      }
349      
350      /**
351       * Return this private value for use by unit tests.
352       * Shell commands are not allowed to have a total string length
353       * exceeding this size.
354       */
355      protected static int getMaxAllowedCmdArgLength() {
356        return getHardLinkCommand.getMaxAllowedCmdArgLength();
357      }
358      
359      /*
360       * ****************************************************
361       * Complexity is above.  User-visible functionality is below
362       * ****************************************************
363       */
364    
365      /**
366       * Creates a hardlink 
367       * @param file - existing source file
368       * @param linkName - desired target link file
369       */
370      public static void createHardLink(File file, File linkName) 
371      throws IOException {
372        if (file == null) {
373          throw new IOException(
374              "invalid arguments to createHardLink: source file is null");
375        }
376        if (linkName == null) {
377          throw new IOException(
378              "invalid arguments to createHardLink: link name is null");
379        }
380              // construct and execute shell command
381        String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
382        Process process = Runtime.getRuntime().exec(hardLinkCommand);
383        try {
384          if (process.waitFor() != 0) {
385            String errMsg = new BufferedReader(new InputStreamReader(
386                process.getInputStream())).readLine();
387            if (errMsg == null)  errMsg = "";
388            String inpMsg = new BufferedReader(new InputStreamReader(
389                process.getErrorStream())).readLine();
390            if (inpMsg == null)  inpMsg = "";
391            throw new IOException(errMsg + inpMsg);
392          }
393        } catch (InterruptedException e) {
394          throw new IOException(e);
395        } finally {
396          process.destroy();
397        }
398      }
399    
400      /**
401       * Creates hardlinks from multiple existing files within one parent
402       * directory, into one target directory.
403       * @param parentDir - directory containing source files
404       * @param fileBaseNames - list of path-less file names, as returned by 
405       *                        parentDir.list()
406       * @param linkDir - where the hardlinks should be put.  It must already exist.
407       * 
408       * If the list of files is too long (overflows maxAllowedCmdArgLength),
409       * we will automatically split it into multiple invocations of the
410       * underlying method.
411       */
412      public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 
413          File linkDir) throws IOException {
414        //This is the public method all non-test clients are expected to use.
415        //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
416        createHardLinkMult(parentDir, fileBaseNames, linkDir, 
417                           getHardLinkCommand.getMaxAllowedCmdArgLength());
418      }
419    
420      /*
421       * Implements {@link createHardLinkMult} with added variable  "maxLength",
422       * to ease unit testing of the auto-splitting feature for long lists.
423       * Likewise why it returns "callCount", the number of sub-arrays that
424       * the file list had to be split into.
425       * Non-test clients are expected to call the public method instead.
426       */
427      protected static int createHardLinkMult(File parentDir, 
428          String[] fileBaseNames, File linkDir, int maxLength) 
429      throws IOException {
430        if (parentDir == null) {
431          throw new IOException(
432              "invalid arguments to createHardLinkMult: parent directory is null");
433        }
434        if (linkDir == null) {
435          throw new IOException(
436              "invalid arguments to createHardLinkMult: link directory is null");
437        }
438        if (fileBaseNames == null) {
439          throw new IOException(
440              "invalid arguments to createHardLinkMult: "
441              + "filename list can be empty but not null");
442        }
443        if (fileBaseNames.length == 0) {
444          //the OS cmds can't handle empty list of filenames, 
445          //but it's legal, so just return.
446          return 0; 
447        }
448        if (!linkDir.exists()) {
449          throw new FileNotFoundException(linkDir + " not found.");
450        }
451    
452        //if the list is too long, split into multiple invocations
453        int callCount = 0;
454        if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
455              && fileBaseNames.length > 1) {
456          String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
457          callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
458          String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
459              fileBaseNames.length);
460          callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);  
461          return callCount;
462        } else {
463          callCount = 1;
464        }
465        
466        // construct and execute shell command
467        String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 
468            linkDir);
469        Process process = Runtime.getRuntime().exec(hardLinkCommand, null, 
470            parentDir);
471        try {
472          if (process.waitFor() != 0) {
473            String errMsg = new BufferedReader(new InputStreamReader(
474                process.getInputStream())).readLine();
475            if (errMsg == null)  errMsg = "";
476            String inpMsg = new BufferedReader(new InputStreamReader(
477                process.getErrorStream())).readLine();
478            if (inpMsg == null)  inpMsg = "";
479            throw new IOException(errMsg + inpMsg);
480          }
481        } catch (InterruptedException e) {
482          throw new IOException(e);
483        } finally {
484          process.destroy();
485        }
486        return callCount;
487      }
488    
489       /**
490       * Retrieves the number of links to the specified file.
491       */
492      public static int getLinkCount(File fileName) throws IOException {
493        if (fileName == null) {
494          throw new IOException(
495              "invalid argument to getLinkCount: file name is null");
496        }
497        if (!fileName.exists()) {
498          throw new FileNotFoundException(fileName + " not found.");
499        }
500    
501        // construct and execute shell command
502        String[] cmd = getHardLinkCommand.linkCount(fileName);
503        String inpMsg = null;
504        String errMsg = null;
505        int exitValue = -1;
506        BufferedReader in = null;
507        BufferedReader err = null;
508    
509        Process process = Runtime.getRuntime().exec(cmd);
510        try {
511          exitValue = process.waitFor();
512          in = new BufferedReader(new InputStreamReader(
513                                      process.getInputStream()));
514          inpMsg = in.readLine();
515          err = new BufferedReader(new InputStreamReader(
516                                       process.getErrorStream()));
517          errMsg = err.readLine();
518          if (inpMsg == null || exitValue != 0) {
519            throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
520          }
521          if (Shell.SOLARIS) {
522            String[] result = inpMsg.split("\\s+");
523            return Integer.parseInt(result[1]);
524          } else {
525            return Integer.parseInt(inpMsg);
526          }
527        } catch (NumberFormatException e) {
528          throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
529        } catch (InterruptedException e) {
530          throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
531        } finally {
532          process.destroy();
533          if (in != null) in.close();
534          if (err != null) err.close();
535        }
536      }
537      
538      /* Create an IOException for failing to get link count. */
539      private static IOException createIOException(File f, String message,
540          String error, int exitvalue, Exception cause) {
541    
542        final String s = "Failed to get link count on file " + f
543            + ": message=" + message
544            + "; error=" + error
545            + "; exit value=" + exitvalue;
546        return (cause == null) ? new IOException(s) : new IOException(s, cause);
547      }
548      
549      
550      /**
551       * HardLink statistics counters and methods.
552       * Not multi-thread safe, obviously.
553       * Init is called during HardLink instantiation, above.
554       * 
555       * These are intended for use by knowledgeable clients, not internally, 
556       * because many of the internal methods are static and can't update these
557       * per-instance counters.
558       */
559      public static class LinkStats {
560        public int countDirs = 0; 
561        public int countSingleLinks = 0; 
562        public int countMultLinks = 0; 
563        public int countFilesMultLinks = 0; 
564        public int countEmptyDirs = 0; 
565        public int countPhysicalFileCopies = 0;
566      
567        public void clear() {
568          countDirs = 0; 
569          countSingleLinks = 0; 
570          countMultLinks = 0; 
571          countFilesMultLinks = 0; 
572          countEmptyDirs = 0; 
573          countPhysicalFileCopies = 0;
574        }
575        
576        public String report() {
577          return "HardLinkStats: " + countDirs + " Directories, including " 
578          + countEmptyDirs + " Empty Directories, " 
579          + countSingleLinks 
580          + " single Link operations, " + countMultLinks 
581          + " multi-Link operations, linking " + countFilesMultLinks 
582          + " files, total " + (countSingleLinks + countFilesMultLinks) 
583          + " linkable files.  Also physically copied " 
584          + countPhysicalFileCopies + " other files.";
585        }
586      }
587    }
588