001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.fs;
020    
021    import java.io.BufferedReader;
022    import java.io.File;
023    import java.io.FileNotFoundException;
024    import java.io.IOException;
025    import java.io.InputStreamReader;
026    import java.util.Arrays;
027    
028    /**
029     * Class for creating hardlinks.
030     * Supports Unix/Linux, WinXP/2003/Vista via Cygwin, and Mac OS X.
031     * 
032     * The HardLink class was formerly a static inner class of FSUtil,
033     * and the methods provided were blatantly non-thread-safe.
034     * To enable volume-parallel Update snapshots, we now provide static 
035     * threadsafe methods that allocate new buffer string arrays
036     * upon each call.  We also provide an API to hardlink all files in a
037     * directory with a single command, which is up to 128 times more 
038     * efficient - and minimizes the impact of the extra buffer creations.
039     */
040    public class HardLink { 
041    
042      public enum OSType {
043        OS_TYPE_UNIX,
044        OS_TYPE_WINXP,
045        OS_TYPE_SOLARIS,
046        OS_TYPE_MAC
047      }
048      
049      public static OSType osType;
050      private static HardLinkCommandGetter getHardLinkCommand;
051      
052      public final LinkStats linkStats; //not static
053      
054      //initialize the command "getters" statically, so can use their 
055      //methods without instantiating the HardLink object
056      static { 
057        osType = getOSType();
058        if (osType == OSType.OS_TYPE_WINXP) {
059          // Windows
060          getHardLinkCommand = new HardLinkCGWin();
061        } else {
062          // Unix
063          getHardLinkCommand = new HardLinkCGUnix();
064          //override getLinkCountCommand for the particular Unix variant
065          //Linux is already set as the default - {"stat","-c%h", null}
066          if (osType == OSType.OS_TYPE_MAC) {
067            String[] linkCountCmdTemplate = {"stat","-f%l", null};
068            HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
069          } else if (osType == OSType.OS_TYPE_SOLARIS) {
070            String[] linkCountCmdTemplate = {"ls","-l", null};
071            HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);        
072          }
073        }
074      }
075    
076      public HardLink() {
077        linkStats = new LinkStats();
078      }
079      
080      static private OSType getOSType() {
081        String osName = System.getProperty("os.name");
082        if (osName.contains("Windows") &&
083                (osName.contains("XP") 
084                || osName.contains("2003") 
085                || osName.contains("Vista")
086                || osName.contains("Windows_7")
087                || osName.contains("Windows 7") 
088                || osName.contains("Windows7"))) {
089          return OSType.OS_TYPE_WINXP;
090        }
091        else if (osName.contains("SunOS") 
092                || osName.contains("Solaris")) {
093           return OSType.OS_TYPE_SOLARIS;
094        }
095        else if (osName.contains("Mac")) {
096           return OSType.OS_TYPE_MAC;
097        }
098        else {
099          return OSType.OS_TYPE_UNIX;
100        }
101      }
102      
103      /**
104       * This abstract class bridges the OS-dependent implementations of the 
105       * needed functionality for creating hardlinks and querying link counts.
106       * The particular implementation class is chosen during 
107       * static initialization phase of the HardLink class.
108       * The "getter" methods construct shell command strings for various purposes.
109       */
110      private static abstract class HardLinkCommandGetter {
111    
112        /**
113         * Get the command string needed to hardlink a bunch of files from
114         * a single source directory into a target directory.  The source directory
115         * is not specified here, but the command will be executed using the source
116         * directory as the "current working directory" of the shell invocation.
117         * 
118         * @param fileBaseNames - array of path-less file names, relative
119         *            to the source directory
120         * @param linkDir - target directory where the hardlinks will be put
121         * @return - an array of Strings suitable for use as a single shell command
122         *            with {@link Runtime.exec()}
123         * @throws IOException - if any of the file or path names misbehave
124         */
125        abstract String[] linkMult(String[] fileBaseNames, File linkDir) 
126                              throws IOException;
127        
128        /**
129         * Get the command string needed to hardlink a single file
130         */
131        abstract String[] linkOne(File file, File linkName) throws IOException;
132        
133        /**
134         * Get the command string to query the hardlink count of a file
135         */
136        abstract String[] linkCount(File file) throws IOException;
137        
138        /**
139         * Calculate the total string length of the shell command
140         * resulting from execution of linkMult, plus the length of the
141         * source directory name (which will also be provided to the shell)
142         * 
143         * @param fileDir - source directory, parent of fileBaseNames
144         * @param fileBaseNames - array of path-less file names, relative
145         *            to the source directory
146         * @param linkDir - target directory where the hardlinks will be put
147         * @return - total data length (must not exceed maxAllowedCmdArgLength)
148         * @throws IOException
149         */
150        abstract int getLinkMultArgLength(
151                         File fileDir, String[] fileBaseNames, File linkDir) 
152                         throws IOException;
153        
154        /**
155         * Get the maximum allowed string length of a shell command on this OS,
156         * which is just the documented minimum guaranteed supported command
157         * length - aprx. 32KB for Unix, and 8KB for Windows.
158         */
159        abstract int getMaxAllowedCmdArgLength(); 
160      }
161      
162      /**
163       * Implementation of HardLinkCommandGetter class for Unix
164       */
165      static class HardLinkCGUnix extends HardLinkCommandGetter {
166        private static String[] hardLinkCommand = {"ln", null, null};
167        private static String[] hardLinkMultPrefix = {"ln"};
168        private static String[] hardLinkMultSuffix = {null};
169        private static String[] getLinkCountCommand = {"stat","-c%h", null};
170        //Unix guarantees at least 32K bytes cmd length.
171        //Subtract another 64b to allow for Java 'exec' overhead
172        private static final int maxAllowedCmdArgLength = 32*1024 - 65;
173        
174        private static synchronized 
175        void setLinkCountCmdTemplate(String[] template) {
176          //May update this for specific unix variants, 
177          //after static initialization phase
178          getLinkCountCommand = template;
179        }
180        
181        /*
182         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
183         */
184        @Override
185        String[] linkOne(File file, File linkName) 
186        throws IOException {
187          String[] buf = new String[hardLinkCommand.length];
188          System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
189          //unix wants argument order: "ln <existing> <new>"
190          buf[1] = FileUtil.makeShellPath(file, true); 
191          buf[2] = FileUtil.makeShellPath(linkName, true);
192          return buf;
193        }
194        
195        /*
196         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
197         */
198        @Override
199        String[] linkMult(String[] fileBaseNames, File linkDir) 
200        throws IOException {
201          String[] buf = new String[fileBaseNames.length 
202                                    + hardLinkMultPrefix.length 
203                                    + hardLinkMultSuffix.length];
204          int mark=0;
205          System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
206                           hardLinkMultPrefix.length);
207          mark += hardLinkMultPrefix.length;
208          System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
209          mark += fileBaseNames.length;
210          buf[mark] = FileUtil.makeShellPath(linkDir, true);
211          return buf;
212        }
213        
214        /*
215         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
216         */
217        @Override
218        String[] linkCount(File file) 
219        throws IOException {
220          String[] buf = new String[getLinkCountCommand.length];
221          System.arraycopy(getLinkCountCommand, 0, buf, 0, 
222                           getLinkCountCommand.length);
223          buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
224          return buf;
225        }
226        
227        /*
228         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
229         */
230        @Override
231        int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
232        throws IOException{
233          int sum = 0;
234          for (String x : fileBaseNames) {
235            // add 1 to account for terminal null or delimiter space
236            sum += 1 + ((x == null) ? 0 : x.length());
237          }
238          sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
239                 + FileUtil.makeShellPath(linkDir, true).length();
240          //add the fixed overhead of the hardLinkMult prefix and suffix
241          sum += 3; //length("ln") + 1
242          return sum;
243        }
244        
245        /*
246         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
247         */
248        @Override
249        int getMaxAllowedCmdArgLength() {
250          return maxAllowedCmdArgLength;
251        }
252      }
253      
254      
255      /**
256       * Implementation of HardLinkCommandGetter class for Windows
257       * 
258       * Note that the linkCount shell command for Windows is actually
259       * a Cygwin shell command, and depends on ${cygwin}/bin
260       * being in the Windows PATH environment variable, so
261       * stat.exe can be found.
262       */
263      static class HardLinkCGWin extends HardLinkCommandGetter {
264        //The Windows command getter impl class and its member fields are
265        //package-private ("default") access instead of "private" to assist 
266        //unit testing (sort of) on non-Win servers
267    
268        static String[] hardLinkCommand = {
269                            "fsutil","hardlink","create", null, null};
270        static String[] hardLinkMultPrefix = {
271                            "cmd","/q","/c","for", "%f", "in", "("};
272        static String   hardLinkMultDir = "\\%f";
273        static String[] hardLinkMultSuffix = {
274                            ")", "do", "fsutil", "hardlink", "create", null, 
275                            "%f", "1>NUL"};
276        static String[] getLinkCountCommand = {"stat","-c%h", null};
277        //Windows guarantees only 8K - 1 bytes cmd length.
278        //Subtract another 64b to allow for Java 'exec' overhead
279        static final int maxAllowedCmdArgLength = 8*1024 - 65;
280    
281        /*
282         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
283         */
284        @Override
285        String[] linkOne(File file, File linkName) 
286        throws IOException {
287          String[] buf = new String[hardLinkCommand.length];
288          System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
289          //windows wants argument order: "create <new> <existing>"
290          buf[4] = file.getCanonicalPath(); 
291          buf[3] = linkName.getCanonicalPath();
292          return buf;
293        }
294        
295        /*
296         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
297         */
298        @Override
299        String[] linkMult(String[] fileBaseNames, File linkDir) 
300        throws IOException {
301          String[] buf = new String[fileBaseNames.length 
302                                    + hardLinkMultPrefix.length 
303                                    + hardLinkMultSuffix.length];
304          String td = linkDir.getCanonicalPath() + hardLinkMultDir;
305          int mark=0;
306          System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
307                           hardLinkMultPrefix.length);
308          mark += hardLinkMultPrefix.length;
309          System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
310          mark += fileBaseNames.length;
311          System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 
312                           hardLinkMultSuffix.length);
313          mark += hardLinkMultSuffix.length;
314          buf[mark - 3] = td;
315          return buf;
316        }
317        
318        /*
319         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
320         */
321        @Override
322        String[] linkCount(File file) 
323        throws IOException {
324          String[] buf = new String[getLinkCountCommand.length];
325          System.arraycopy(getLinkCountCommand, 0, buf, 0, 
326                           getLinkCountCommand.length);
327          //The linkCount command is actually a Cygwin shell command,
328          //not a Windows shell command, so we should use "makeShellPath()"
329          //instead of "getCanonicalPath()".  However, that causes another
330          //shell exec to "cygpath.exe", and "stat.exe" actually can handle
331          //DOS-style paths (it just prints a couple hundred bytes of warning
332          //to stderr), so we use the more efficient "getCanonicalPath()".
333          buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
334          return buf;
335        }
336        
337        /*
338         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
339         */
340        @Override
341        int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
342        throws IOException {
343          int sum = 0;
344          for (String x : fileBaseNames) {
345            // add 1 to account for terminal null or delimiter space
346            sum += 1 + ((x == null) ? 0 : x.length());
347          }
348          sum += 2 + fileDir.getCanonicalPath().length() +
349                   linkDir.getCanonicalPath().length();
350          //add the fixed overhead of the hardLinkMult command 
351          //(prefix, suffix, and Dir suffix)
352          sum += ("cmd.exe /q /c for %f in ( ) do "
353                  + "fsutil hardlink create \\%f %f 1>NUL ").length();
354          return sum;
355        }
356        
357        /*
358         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
359         */
360        @Override
361        int getMaxAllowedCmdArgLength() {
362          return maxAllowedCmdArgLength;
363        }
364      }
365      
366      
367      /**
368       * Calculate the nominal length of all contributors to the total 
369       * commandstring length, including fixed overhead of the OS-dependent 
370       * command.  It's protected rather than private, to assist unit testing,
371       * but real clients are not expected to need it -- see the way 
372       * createHardLinkMult() uses it internally so the user doesn't need to worry
373       * about it.
374       * 
375       * @param fileDir - source directory, parent of fileBaseNames
376       * @param fileBaseNames - array of path-less file names, relative
377       *            to the source directory
378       * @param linkDir - target directory where the hardlinks will be put
379       * @return - total data length (must not exceed maxAllowedCmdArgLength)
380       * @throws IOException
381       */
382      protected static int getLinkMultArgLength(
383              File fileDir, String[] fileBaseNames, File linkDir) 
384      throws IOException {
385        return getHardLinkCommand.getLinkMultArgLength(fileDir, 
386              fileBaseNames, linkDir);
387      }
388      
389      /**
390       * Return this private value for use by unit tests.
391       * Shell commands are not allowed to have a total string length
392       * exceeding this size.
393       */
394      protected static int getMaxAllowedCmdArgLength() {
395        return getHardLinkCommand.getMaxAllowedCmdArgLength();
396      }
397      
398      /*
399       * ****************************************************
400       * Complexity is above.  User-visible functionality is below
401       * ****************************************************
402       */
403    
404      /**
405       * Creates a hardlink 
406       * @param file - existing source file
407       * @param linkName - desired target link file
408       */
409      public static void createHardLink(File file, File linkName) 
410      throws IOException {
411        if (file == null) {
412          throw new IOException(
413              "invalid arguments to createHardLink: source file is null");
414        }
415        if (linkName == null) {
416          throw new IOException(
417              "invalid arguments to createHardLink: link name is null");
418        }
419              // construct and execute shell command
420        String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
421        Process process = Runtime.getRuntime().exec(hardLinkCommand);
422        try {
423          if (process.waitFor() != 0) {
424            String errMsg = new BufferedReader(new InputStreamReader(
425                process.getInputStream())).readLine();
426            if (errMsg == null)  errMsg = "";
427            String inpMsg = new BufferedReader(new InputStreamReader(
428                process.getErrorStream())).readLine();
429            if (inpMsg == null)  inpMsg = "";
430            throw new IOException(errMsg + inpMsg);
431          }
432        } catch (InterruptedException e) {
433          throw new IOException(e);
434        } finally {
435          process.destroy();
436        }
437      }
438    
439      /**
440       * Creates hardlinks from multiple existing files within one parent
441       * directory, into one target directory.
442       * @param parentDir - directory containing source files
443       * @param fileBaseNames - list of path-less file names, as returned by 
444       *                        parentDir.list()
445       * @param linkDir - where the hardlinks should be put.  It must already exist.
446       * 
447       * If the list of files is too long (overflows maxAllowedCmdArgLength),
448       * we will automatically split it into multiple invocations of the
449       * underlying method.
450       */
451      public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 
452          File linkDir) throws IOException {
453        //This is the public method all non-test clients are expected to use.
454        //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
455        createHardLinkMult(parentDir, fileBaseNames, linkDir, 
456                           getHardLinkCommand.getMaxAllowedCmdArgLength());
457      }
458    
459      /*
460       * Implements {@link createHardLinkMult} with added variable  "maxLength",
461       * to ease unit testing of the auto-splitting feature for long lists.
462       * Likewise why it returns "callCount", the number of sub-arrays that
463       * the file list had to be split into.
464       * Non-test clients are expected to call the public method instead.
465       */
466      protected static int createHardLinkMult(File parentDir, 
467          String[] fileBaseNames, File linkDir, int maxLength) 
468      throws IOException {
469        if (parentDir == null) {
470          throw new IOException(
471              "invalid arguments to createHardLinkMult: parent directory is null");
472        }
473        if (linkDir == null) {
474          throw new IOException(
475              "invalid arguments to createHardLinkMult: link directory is null");
476        }
477        if (fileBaseNames == null) {
478          throw new IOException(
479              "invalid arguments to createHardLinkMult: "
480              + "filename list can be empty but not null");
481        }
482        if (fileBaseNames.length == 0) {
483          //the OS cmds can't handle empty list of filenames, 
484          //but it's legal, so just return.
485          return 0; 
486        }
487        if (!linkDir.exists()) {
488          throw new FileNotFoundException(linkDir + " not found.");
489        }
490    
491        //if the list is too long, split into multiple invocations
492        int callCount = 0;
493        if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
494              && fileBaseNames.length > 1) {
495          String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
496          callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
497          String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
498              fileBaseNames.length);
499          callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);  
500          return callCount;
501        } else {
502          callCount = 1;
503        }
504        
505        // construct and execute shell command
506        String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 
507            linkDir);
508        Process process = Runtime.getRuntime().exec(hardLinkCommand, null, 
509            parentDir);
510        try {
511          if (process.waitFor() != 0) {
512            String errMsg = new BufferedReader(new InputStreamReader(
513                process.getInputStream())).readLine();
514            if (errMsg == null)  errMsg = "";
515            String inpMsg = new BufferedReader(new InputStreamReader(
516                process.getErrorStream())).readLine();
517            if (inpMsg == null)  inpMsg = "";
518            throw new IOException(errMsg + inpMsg);
519          }
520        } catch (InterruptedException e) {
521          throw new IOException(e);
522        } finally {
523          process.destroy();
524        }
525        return callCount;
526      }
527    
528       /**
529       * Retrieves the number of links to the specified file.
530       */
531      public static int getLinkCount(File fileName) throws IOException {
532        if (fileName == null) {
533          throw new IOException(
534              "invalid argument to getLinkCount: file name is null");
535        }
536        if (!fileName.exists()) {
537          throw new FileNotFoundException(fileName + " not found.");
538        }
539    
540        // construct and execute shell command
541        String[] cmd = getHardLinkCommand.linkCount(fileName);
542        String inpMsg = null;
543        String errMsg = null;
544        int exitValue = -1;
545        BufferedReader in = null;
546        BufferedReader err = null;
547    
548        Process process = Runtime.getRuntime().exec(cmd);
549        try {
550          exitValue = process.waitFor();
551          in = new BufferedReader(new InputStreamReader(
552                                      process.getInputStream()));
553          inpMsg = in.readLine();
554          err = new BufferedReader(new InputStreamReader(
555                                       process.getErrorStream()));
556          errMsg = err.readLine();
557          if (inpMsg == null || exitValue != 0) {
558            throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
559          }
560          if (osType == OSType.OS_TYPE_SOLARIS) {
561            String[] result = inpMsg.split("\\s+");
562            return Integer.parseInt(result[1]);
563          } else {
564            return Integer.parseInt(inpMsg);
565          }
566        } catch (NumberFormatException e) {
567          throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
568        } catch (InterruptedException e) {
569          throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
570        } finally {
571          process.destroy();
572          if (in != null) in.close();
573          if (err != null) err.close();
574        }
575      }
576      
577      /* Create an IOException for failing to get link count. */
578      private static IOException createIOException(File f, String message,
579          String error, int exitvalue, Exception cause) {
580        
581        final String winErrMsg = "; Windows errors in getLinkCount are often due "
582             + "to Cygwin misconfiguration";
583    
584        final String s = "Failed to get link count on file " + f
585            + ": message=" + message
586            + "; error=" + error
587            + ((osType == OSType.OS_TYPE_WINXP) ? winErrMsg : "")
588            + "; exit value=" + exitvalue;
589        return (cause == null) ? new IOException(s) : new IOException(s, cause);
590      }
591      
592      
593      /**
594       * HardLink statistics counters and methods.
595       * Not multi-thread safe, obviously.
596       * Init is called during HardLink instantiation, above.
597       * 
598       * These are intended for use by knowledgeable clients, not internally, 
599       * because many of the internal methods are static and can't update these
600       * per-instance counters.
601       */
602      public static class LinkStats {
603        public int countDirs = 0; 
604        public int countSingleLinks = 0; 
605        public int countMultLinks = 0; 
606        public int countFilesMultLinks = 0; 
607        public int countEmptyDirs = 0; 
608        public int countPhysicalFileCopies = 0;
609      
610        public void clear() {
611          countDirs = 0; 
612          countSingleLinks = 0; 
613          countMultLinks = 0; 
614          countFilesMultLinks = 0; 
615          countEmptyDirs = 0; 
616          countPhysicalFileCopies = 0;
617        }
618        
619        public String report() {
620          return "HardLinkStats: " + countDirs + " Directories, including " 
621          + countEmptyDirs + " Empty Directories, " 
622          + countSingleLinks 
623          + " single Link operations, " + countMultLinks 
624          + " multi-Link operations, linking " + countFilesMultLinks 
625          + " files, total " + (countSingleLinks + countFilesMultLinks) 
626          + " linkable files.  Also physically copied " 
627          + countPhysicalFileCopies + " other files.";
628        }
629      }
630    }
631