001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.fs;
020    
021    import java.io.BufferedReader;
022    import java.io.File;
023    import java.io.FileNotFoundException;
024    import java.io.IOException;
025    import java.io.InputStreamReader;
026    import java.util.Arrays;
027    
028    /**
029     * Class for creating hardlinks.
030     * Supports Unix/Linux, WinXP/2003/Vista via Cygwin, and Mac OS X.
031     * 
032     * The HardLink class was formerly a static inner class of FSUtil,
033     * and the methods provided were blatantly non-thread-safe.
034     * To enable volume-parallel Update snapshots, we now provide static 
035     * threadsafe methods that allocate new buffer string arrays
036     * upon each call.  We also provide an API to hardlink all files in a
037     * directory with a single command, which is up to 128 times more 
038     * efficient - and minimizes the impact of the extra buffer creations.
039     */
040    public class HardLink { 
041    
042      public enum OSType {
043        OS_TYPE_UNIX,
044        OS_TYPE_WINXP,
045        OS_TYPE_SOLARIS,
046        OS_TYPE_MAC,
047        OS_TYPE_FREEBSD
048      }
049      
050      public static OSType osType;
051      private static HardLinkCommandGetter getHardLinkCommand;
052      
053      public final LinkStats linkStats; //not static
054      
055      //initialize the command "getters" statically, so can use their 
056      //methods without instantiating the HardLink object
057      static { 
058        osType = getOSType();
059        if (osType == OSType.OS_TYPE_WINXP) {
060          // Windows
061          getHardLinkCommand = new HardLinkCGWin();
062        } else {
063          // Unix
064          getHardLinkCommand = new HardLinkCGUnix();
065          //override getLinkCountCommand for the particular Unix variant
066          //Linux is already set as the default - {"stat","-c%h", null}
067          if (osType == OSType.OS_TYPE_MAC || osType == OSType.OS_TYPE_FREEBSD) {
068            String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null};
069            HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
070          } else if (osType == OSType.OS_TYPE_SOLARIS) {
071            String[] linkCountCmdTemplate = {"ls","-l", null};
072            HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);        
073          }
074        }
075      }
076    
077      public HardLink() {
078        linkStats = new LinkStats();
079      }
080      
081      static private OSType getOSType() {
082        String osName = System.getProperty("os.name");
083        if (osName.contains("Windows") &&
084                (osName.contains("XP") 
085                || osName.contains("2003") 
086                || osName.contains("Vista")
087                || osName.contains("Windows_7")
088                || osName.contains("Windows 7") 
089                || osName.contains("Windows7"))) {
090          return OSType.OS_TYPE_WINXP;
091        }
092        else if (osName.contains("SunOS") 
093                || osName.contains("Solaris")) {
094           return OSType.OS_TYPE_SOLARIS;
095        }
096        else if (osName.contains("Mac")) {
097           return OSType.OS_TYPE_MAC;
098        }
099        else if (osName.contains("FreeBSD")) {
100           return OSType.OS_TYPE_FREEBSD;
101        }
102        else {
103          return OSType.OS_TYPE_UNIX;
104        }
105      }
106      
107      /**
108       * This abstract class bridges the OS-dependent implementations of the 
109       * needed functionality for creating hardlinks and querying link counts.
110       * The particular implementation class is chosen during 
111       * static initialization phase of the HardLink class.
112       * The "getter" methods construct shell command strings for various purposes.
113       */
114      private static abstract class HardLinkCommandGetter {
115    
116        /**
117         * Get the command string needed to hardlink a bunch of files from
118         * a single source directory into a target directory.  The source directory
119         * is not specified here, but the command will be executed using the source
120         * directory as the "current working directory" of the shell invocation.
121         * 
122         * @param fileBaseNames - array of path-less file names, relative
123         *            to the source directory
124         * @param linkDir - target directory where the hardlinks will be put
125         * @return - an array of Strings suitable for use as a single shell command
126         *            with {@link Runtime.exec()}
127         * @throws IOException - if any of the file or path names misbehave
128         */
129        abstract String[] linkMult(String[] fileBaseNames, File linkDir) 
130                              throws IOException;
131        
132        /**
133         * Get the command string needed to hardlink a single file
134         */
135        abstract String[] linkOne(File file, File linkName) throws IOException;
136        
137        /**
138         * Get the command string to query the hardlink count of a file
139         */
140        abstract String[] linkCount(File file) throws IOException;
141        
142        /**
143         * Calculate the total string length of the shell command
144         * resulting from execution of linkMult, plus the length of the
145         * source directory name (which will also be provided to the shell)
146         * 
147         * @param fileDir - source directory, parent of fileBaseNames
148         * @param fileBaseNames - array of path-less file names, relative
149         *            to the source directory
150         * @param linkDir - target directory where the hardlinks will be put
151         * @return - total data length (must not exceed maxAllowedCmdArgLength)
152         * @throws IOException
153         */
154        abstract int getLinkMultArgLength(
155                         File fileDir, String[] fileBaseNames, File linkDir) 
156                         throws IOException;
157        
158        /**
159         * Get the maximum allowed string length of a shell command on this OS,
160         * which is just the documented minimum guaranteed supported command
161         * length - aprx. 32KB for Unix, and 8KB for Windows.
162         */
163        abstract int getMaxAllowedCmdArgLength(); 
164      }
165      
166      /**
167       * Implementation of HardLinkCommandGetter class for Unix
168       */
169      static class HardLinkCGUnix extends HardLinkCommandGetter {
170        private static String[] hardLinkCommand = {"ln", null, null};
171        private static String[] hardLinkMultPrefix = {"ln"};
172        private static String[] hardLinkMultSuffix = {null};
173        private static String[] getLinkCountCommand = {"stat","-c%h", null};
174        //Unix guarantees at least 32K bytes cmd length.
175        //Subtract another 64b to allow for Java 'exec' overhead
176        private static final int maxAllowedCmdArgLength = 32*1024 - 65;
177        
178        private static synchronized 
179        void setLinkCountCmdTemplate(String[] template) {
180          //May update this for specific unix variants, 
181          //after static initialization phase
182          getLinkCountCommand = template;
183        }
184        
185        /*
186         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
187         */
188        @Override
189        String[] linkOne(File file, File linkName) 
190        throws IOException {
191          String[] buf = new String[hardLinkCommand.length];
192          System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
193          //unix wants argument order: "ln <existing> <new>"
194          buf[1] = FileUtil.makeShellPath(file, true); 
195          buf[2] = FileUtil.makeShellPath(linkName, true);
196          return buf;
197        }
198        
199        /*
200         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
201         */
202        @Override
203        String[] linkMult(String[] fileBaseNames, File linkDir) 
204        throws IOException {
205          String[] buf = new String[fileBaseNames.length 
206                                    + hardLinkMultPrefix.length 
207                                    + hardLinkMultSuffix.length];
208          int mark=0;
209          System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
210                           hardLinkMultPrefix.length);
211          mark += hardLinkMultPrefix.length;
212          System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
213          mark += fileBaseNames.length;
214          buf[mark] = FileUtil.makeShellPath(linkDir, true);
215          return buf;
216        }
217        
218        /*
219         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
220         */
221        @Override
222        String[] linkCount(File file) 
223        throws IOException {
224          String[] buf = new String[getLinkCountCommand.length];
225          System.arraycopy(getLinkCountCommand, 0, buf, 0, 
226                           getLinkCountCommand.length);
227          buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
228          return buf;
229        }
230        
231        /*
232         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
233         */
234        @Override
235        int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
236        throws IOException{
237          int sum = 0;
238          for (String x : fileBaseNames) {
239            // add 1 to account for terminal null or delimiter space
240            sum += 1 + ((x == null) ? 0 : x.length());
241          }
242          sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
243                 + FileUtil.makeShellPath(linkDir, true).length();
244          //add the fixed overhead of the hardLinkMult prefix and suffix
245          sum += 3; //length("ln") + 1
246          return sum;
247        }
248        
249        /*
250         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
251         */
252        @Override
253        int getMaxAllowedCmdArgLength() {
254          return maxAllowedCmdArgLength;
255        }
256      }
257      
258      
259      /**
260       * Implementation of HardLinkCommandGetter class for Windows
261       * 
262       * Note that the linkCount shell command for Windows is actually
263       * a Cygwin shell command, and depends on ${cygwin}/bin
264       * being in the Windows PATH environment variable, so
265       * stat.exe can be found.
266       */
267      static class HardLinkCGWin extends HardLinkCommandGetter {
268        //The Windows command getter impl class and its member fields are
269        //package-private ("default") access instead of "private" to assist 
270        //unit testing (sort of) on non-Win servers
271    
272        static String[] hardLinkCommand = {
273                            "fsutil","hardlink","create", null, null};
274        static String[] hardLinkMultPrefix = {
275                            "cmd","/q","/c","for", "%f", "in", "("};
276        static String   hardLinkMultDir = "\\%f";
277        static String[] hardLinkMultSuffix = {
278                            ")", "do", "fsutil", "hardlink", "create", null, 
279                            "%f", "1>NUL"};
280        static String[] getLinkCountCommand = {"stat","-c%h", null};
281        //Windows guarantees only 8K - 1 bytes cmd length.
282        //Subtract another 64b to allow for Java 'exec' overhead
283        static final int maxAllowedCmdArgLength = 8*1024 - 65;
284    
285        /*
286         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
287         */
288        @Override
289        String[] linkOne(File file, File linkName) 
290        throws IOException {
291          String[] buf = new String[hardLinkCommand.length];
292          System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
293          //windows wants argument order: "create <new> <existing>"
294          buf[4] = file.getCanonicalPath(); 
295          buf[3] = linkName.getCanonicalPath();
296          return buf;
297        }
298        
299        /*
300         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
301         */
302        @Override
303        String[] linkMult(String[] fileBaseNames, File linkDir) 
304        throws IOException {
305          String[] buf = new String[fileBaseNames.length 
306                                    + hardLinkMultPrefix.length 
307                                    + hardLinkMultSuffix.length];
308          String td = linkDir.getCanonicalPath() + hardLinkMultDir;
309          int mark=0;
310          System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
311                           hardLinkMultPrefix.length);
312          mark += hardLinkMultPrefix.length;
313          System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
314          mark += fileBaseNames.length;
315          System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 
316                           hardLinkMultSuffix.length);
317          mark += hardLinkMultSuffix.length;
318          buf[mark - 3] = td;
319          return buf;
320        }
321        
322        /*
323         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
324         */
325        @Override
326        String[] linkCount(File file) 
327        throws IOException {
328          String[] buf = new String[getLinkCountCommand.length];
329          System.arraycopy(getLinkCountCommand, 0, buf, 0, 
330                           getLinkCountCommand.length);
331          //The linkCount command is actually a Cygwin shell command,
332          //not a Windows shell command, so we should use "makeShellPath()"
333          //instead of "getCanonicalPath()".  However, that causes another
334          //shell exec to "cygpath.exe", and "stat.exe" actually can handle
335          //DOS-style paths (it just prints a couple hundred bytes of warning
336          //to stderr), so we use the more efficient "getCanonicalPath()".
337          buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
338          return buf;
339        }
340        
341        /*
342         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
343         */
344        @Override
345        int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
346        throws IOException {
347          int sum = 0;
348          for (String x : fileBaseNames) {
349            // add 1 to account for terminal null or delimiter space
350            sum += 1 + ((x == null) ? 0 : x.length());
351          }
352          sum += 2 + fileDir.getCanonicalPath().length() +
353                   linkDir.getCanonicalPath().length();
354          //add the fixed overhead of the hardLinkMult command 
355          //(prefix, suffix, and Dir suffix)
356          sum += ("cmd.exe /q /c for %f in ( ) do "
357                  + "fsutil hardlink create \\%f %f 1>NUL ").length();
358          return sum;
359        }
360        
361        /*
362         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
363         */
364        @Override
365        int getMaxAllowedCmdArgLength() {
366          return maxAllowedCmdArgLength;
367        }
368      }
369      
370      
371      /**
372       * Calculate the nominal length of all contributors to the total 
373       * commandstring length, including fixed overhead of the OS-dependent 
374       * command.  It's protected rather than private, to assist unit testing,
375       * but real clients are not expected to need it -- see the way 
376       * createHardLinkMult() uses it internally so the user doesn't need to worry
377       * about it.
378       * 
379       * @param fileDir - source directory, parent of fileBaseNames
380       * @param fileBaseNames - array of path-less file names, relative
381       *            to the source directory
382       * @param linkDir - target directory where the hardlinks will be put
383       * @return - total data length (must not exceed maxAllowedCmdArgLength)
384       * @throws IOException
385       */
386      protected static int getLinkMultArgLength(
387              File fileDir, String[] fileBaseNames, File linkDir) 
388      throws IOException {
389        return getHardLinkCommand.getLinkMultArgLength(fileDir, 
390              fileBaseNames, linkDir);
391      }
392      
393      /**
394       * Return this private value for use by unit tests.
395       * Shell commands are not allowed to have a total string length
396       * exceeding this size.
397       */
398      protected static int getMaxAllowedCmdArgLength() {
399        return getHardLinkCommand.getMaxAllowedCmdArgLength();
400      }
401      
402      /*
403       * ****************************************************
404       * Complexity is above.  User-visible functionality is below
405       * ****************************************************
406       */
407    
408      /**
409       * Creates a hardlink 
410       * @param file - existing source file
411       * @param linkName - desired target link file
412       */
413      public static void createHardLink(File file, File linkName) 
414      throws IOException {
415        if (file == null) {
416          throw new IOException(
417              "invalid arguments to createHardLink: source file is null");
418        }
419        if (linkName == null) {
420          throw new IOException(
421              "invalid arguments to createHardLink: link name is null");
422        }
423              // construct and execute shell command
424        String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
425        Process process = Runtime.getRuntime().exec(hardLinkCommand);
426        try {
427          if (process.waitFor() != 0) {
428            String errMsg = new BufferedReader(new InputStreamReader(
429                process.getInputStream())).readLine();
430            if (errMsg == null)  errMsg = "";
431            String inpMsg = new BufferedReader(new InputStreamReader(
432                process.getErrorStream())).readLine();
433            if (inpMsg == null)  inpMsg = "";
434            throw new IOException(errMsg + inpMsg);
435          }
436        } catch (InterruptedException e) {
437          throw new IOException(e);
438        } finally {
439          process.destroy();
440        }
441      }
442    
443      /**
444       * Creates hardlinks from multiple existing files within one parent
445       * directory, into one target directory.
446       * @param parentDir - directory containing source files
447       * @param fileBaseNames - list of path-less file names, as returned by 
448       *                        parentDir.list()
449       * @param linkDir - where the hardlinks should be put.  It must already exist.
450       * 
451       * If the list of files is too long (overflows maxAllowedCmdArgLength),
452       * we will automatically split it into multiple invocations of the
453       * underlying method.
454       */
455      public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 
456          File linkDir) throws IOException {
457        //This is the public method all non-test clients are expected to use.
458        //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
459        createHardLinkMult(parentDir, fileBaseNames, linkDir, 
460                           getHardLinkCommand.getMaxAllowedCmdArgLength());
461      }
462    
463      /*
464       * Implements {@link createHardLinkMult} with added variable  "maxLength",
465       * to ease unit testing of the auto-splitting feature for long lists.
466       * Likewise why it returns "callCount", the number of sub-arrays that
467       * the file list had to be split into.
468       * Non-test clients are expected to call the public method instead.
469       */
470      protected static int createHardLinkMult(File parentDir, 
471          String[] fileBaseNames, File linkDir, int maxLength) 
472      throws IOException {
473        if (parentDir == null) {
474          throw new IOException(
475              "invalid arguments to createHardLinkMult: parent directory is null");
476        }
477        if (linkDir == null) {
478          throw new IOException(
479              "invalid arguments to createHardLinkMult: link directory is null");
480        }
481        if (fileBaseNames == null) {
482          throw new IOException(
483              "invalid arguments to createHardLinkMult: "
484              + "filename list can be empty but not null");
485        }
486        if (fileBaseNames.length == 0) {
487          //the OS cmds can't handle empty list of filenames, 
488          //but it's legal, so just return.
489          return 0; 
490        }
491        if (!linkDir.exists()) {
492          throw new FileNotFoundException(linkDir + " not found.");
493        }
494    
495        //if the list is too long, split into multiple invocations
496        int callCount = 0;
497        if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
498              && fileBaseNames.length > 1) {
499          String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
500          callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
501          String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
502              fileBaseNames.length);
503          callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);  
504          return callCount;
505        } else {
506          callCount = 1;
507        }
508        
509        // construct and execute shell command
510        String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 
511            linkDir);
512        Process process = Runtime.getRuntime().exec(hardLinkCommand, null, 
513            parentDir);
514        try {
515          if (process.waitFor() != 0) {
516            String errMsg = new BufferedReader(new InputStreamReader(
517                process.getInputStream())).readLine();
518            if (errMsg == null)  errMsg = "";
519            String inpMsg = new BufferedReader(new InputStreamReader(
520                process.getErrorStream())).readLine();
521            if (inpMsg == null)  inpMsg = "";
522            throw new IOException(errMsg + inpMsg);
523          }
524        } catch (InterruptedException e) {
525          throw new IOException(e);
526        } finally {
527          process.destroy();
528        }
529        return callCount;
530      }
531    
532       /**
533       * Retrieves the number of links to the specified file.
534       */
535      public static int getLinkCount(File fileName) throws IOException {
536        if (fileName == null) {
537          throw new IOException(
538              "invalid argument to getLinkCount: file name is null");
539        }
540        if (!fileName.exists()) {
541          throw new FileNotFoundException(fileName + " not found.");
542        }
543    
544        // construct and execute shell command
545        String[] cmd = getHardLinkCommand.linkCount(fileName);
546        String inpMsg = null;
547        String errMsg = null;
548        int exitValue = -1;
549        BufferedReader in = null;
550        BufferedReader err = null;
551    
552        Process process = Runtime.getRuntime().exec(cmd);
553        try {
554          exitValue = process.waitFor();
555          in = new BufferedReader(new InputStreamReader(
556                                      process.getInputStream()));
557          inpMsg = in.readLine();
558          err = new BufferedReader(new InputStreamReader(
559                                       process.getErrorStream()));
560          errMsg = err.readLine();
561          if (inpMsg == null || exitValue != 0) {
562            throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
563          }
564          if (osType == OSType.OS_TYPE_SOLARIS) {
565            String[] result = inpMsg.split("\\s+");
566            return Integer.parseInt(result[1]);
567          } else {
568            return Integer.parseInt(inpMsg);
569          }
570        } catch (NumberFormatException e) {
571          throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
572        } catch (InterruptedException e) {
573          throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
574        } finally {
575          process.destroy();
576          if (in != null) in.close();
577          if (err != null) err.close();
578        }
579      }
580      
581      /* Create an IOException for failing to get link count. */
582      private static IOException createIOException(File f, String message,
583          String error, int exitvalue, Exception cause) {
584        
585        final String winErrMsg = "; Windows errors in getLinkCount are often due "
586             + "to Cygwin misconfiguration";
587    
588        final String s = "Failed to get link count on file " + f
589            + ": message=" + message
590            + "; error=" + error
591            + ((osType == OSType.OS_TYPE_WINXP) ? winErrMsg : "")
592            + "; exit value=" + exitvalue;
593        return (cause == null) ? new IOException(s) : new IOException(s, cause);
594      }
595      
596      
597      /**
598       * HardLink statistics counters and methods.
599       * Not multi-thread safe, obviously.
600       * Init is called during HardLink instantiation, above.
601       * 
602       * These are intended for use by knowledgeable clients, not internally, 
603       * because many of the internal methods are static and can't update these
604       * per-instance counters.
605       */
606      public static class LinkStats {
607        public int countDirs = 0; 
608        public int countSingleLinks = 0; 
609        public int countMultLinks = 0; 
610        public int countFilesMultLinks = 0; 
611        public int countEmptyDirs = 0; 
612        public int countPhysicalFileCopies = 0;
613      
614        public void clear() {
615          countDirs = 0; 
616          countSingleLinks = 0; 
617          countMultLinks = 0; 
618          countFilesMultLinks = 0; 
619          countEmptyDirs = 0; 
620          countPhysicalFileCopies = 0;
621        }
622        
623        public String report() {
624          return "HardLinkStats: " + countDirs + " Directories, including " 
625          + countEmptyDirs + " Empty Directories, " 
626          + countSingleLinks 
627          + " single Link operations, " + countMultLinks 
628          + " multi-Link operations, linking " + countFilesMultLinks 
629          + " files, total " + (countSingleLinks + countFilesMultLinks) 
630          + " linkable files.  Also physically copied " 
631          + countPhysicalFileCopies + " other files.";
632        }
633      }
634    }
635