001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.fs;
020    
021    import java.io.BufferedReader;
022    import java.io.File;
023    import java.io.FileNotFoundException;
024    import java.io.IOException;
025    import java.io.InputStreamReader;
026    import java.util.Arrays;
027    
028    import org.apache.hadoop.util.Shell;
029    
030    /**
031     * Class for creating hardlinks.
032     * Supports Unix/Linux, Windows via winutils , and Mac OS X.
033     * 
034     * The HardLink class was formerly a static inner class of FSUtil,
035     * and the methods provided were blatantly non-thread-safe.
036     * To enable volume-parallel Update snapshots, we now provide static 
037     * threadsafe methods that allocate new buffer string arrays
038     * upon each call.  We also provide an API to hardlink all files in a
039     * directory with a single command, which is up to 128 times more 
040     * efficient - and minimizes the impact of the extra buffer creations.
041     */
042    public class HardLink { 
043    
044      public enum OSType {
045        OS_TYPE_UNIX,
046        OS_TYPE_WIN,
047        OS_TYPE_SOLARIS,
048        OS_TYPE_MAC,
049        OS_TYPE_FREEBSD
050      }
051      
052      public static OSType osType;
053      private static HardLinkCommandGetter getHardLinkCommand;
054      
055      public final LinkStats linkStats; //not static
056      
057      //initialize the command "getters" statically, so can use their 
058      //methods without instantiating the HardLink object
059      static { 
060        osType = getOSType();
061        if (osType == OSType.OS_TYPE_WIN) {
062          // Windows
063          getHardLinkCommand = new HardLinkCGWin();
064        } else {
065          // Unix
066          getHardLinkCommand = new HardLinkCGUnix();
067          //override getLinkCountCommand for the particular Unix variant
068          //Linux is already set as the default - {"stat","-c%h", null}
069          if (osType == OSType.OS_TYPE_MAC || osType == OSType.OS_TYPE_FREEBSD) {
070            String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null};
071            HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
072          } else if (osType == OSType.OS_TYPE_SOLARIS) {
073            String[] linkCountCmdTemplate = {"ls","-l", null};
074            HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);        
075          }
076        }
077      }
078    
079      public HardLink() {
080        linkStats = new LinkStats();
081      }
082      
083      static private OSType getOSType() {
084        String osName = System.getProperty("os.name");
085        if (Shell.WINDOWS) {
086          return OSType.OS_TYPE_WIN;
087        }
088        else if (osName.contains("SunOS") 
089                || osName.contains("Solaris")) {
090           return OSType.OS_TYPE_SOLARIS;
091        }
092        else if (osName.contains("Mac")) {
093           return OSType.OS_TYPE_MAC;
094        }
095        else if (osName.contains("FreeBSD")) {
096           return OSType.OS_TYPE_FREEBSD;
097        }
098        else {
099          return OSType.OS_TYPE_UNIX;
100        }
101      }
102      
103      /**
104       * This abstract class bridges the OS-dependent implementations of the 
105       * needed functionality for creating hardlinks and querying link counts.
106       * The particular implementation class is chosen during 
107       * static initialization phase of the HardLink class.
108       * The "getter" methods construct shell command strings for various purposes.
109       */
110      private static abstract class HardLinkCommandGetter {
111    
112        /**
113         * Get the command string needed to hardlink a bunch of files from
114         * a single source directory into a target directory.  The source directory
115         * is not specified here, but the command will be executed using the source
116         * directory as the "current working directory" of the shell invocation.
117         * 
118         * @param fileBaseNames - array of path-less file names, relative
119         *            to the source directory
120         * @param linkDir - target directory where the hardlinks will be put
121         * @return - an array of Strings suitable for use as a single shell command
122         *            with {@link Runtime.exec()}
123         * @throws IOException - if any of the file or path names misbehave
124         */
125        abstract String[] linkMult(String[] fileBaseNames, File linkDir) 
126                              throws IOException;
127        
128        /**
129         * Get the command string needed to hardlink a single file
130         */
131        abstract String[] linkOne(File file, File linkName) throws IOException;
132        
133        /**
134         * Get the command string to query the hardlink count of a file
135         */
136        abstract String[] linkCount(File file) throws IOException;
137        
138        /**
139         * Calculate the total string length of the shell command
140         * resulting from execution of linkMult, plus the length of the
141         * source directory name (which will also be provided to the shell)
142         * 
143         * @param fileDir - source directory, parent of fileBaseNames
144         * @param fileBaseNames - array of path-less file names, relative
145         *            to the source directory
146         * @param linkDir - target directory where the hardlinks will be put
147         * @return - total data length (must not exceed maxAllowedCmdArgLength)
148         * @throws IOException
149         */
150        abstract int getLinkMultArgLength(
151                         File fileDir, String[] fileBaseNames, File linkDir) 
152                         throws IOException;
153        
154        /**
155         * Get the maximum allowed string length of a shell command on this OS,
156         * which is just the documented minimum guaranteed supported command
157         * length - aprx. 32KB for Unix, and 8KB for Windows.
158         */
159        abstract int getMaxAllowedCmdArgLength(); 
160      }
161      
162      /**
163       * Implementation of HardLinkCommandGetter class for Unix
164       */
165      static class HardLinkCGUnix extends HardLinkCommandGetter {
166        private static String[] hardLinkCommand = {"ln", null, null};
167        private static String[] hardLinkMultPrefix = {"ln"};
168        private static String[] hardLinkMultSuffix = {null};
169        private static String[] getLinkCountCommand = {"stat","-c%h", null};
170        //Unix guarantees at least 32K bytes cmd length.
171        //Subtract another 64b to allow for Java 'exec' overhead
172        private static final int maxAllowedCmdArgLength = 32*1024 - 65;
173        
174        private static synchronized 
175        void setLinkCountCmdTemplate(String[] template) {
176          //May update this for specific unix variants, 
177          //after static initialization phase
178          getLinkCountCommand = template;
179        }
180        
181        /*
182         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
183         */
184        @Override
185        String[] linkOne(File file, File linkName) 
186        throws IOException {
187          String[] buf = new String[hardLinkCommand.length];
188          System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
189          //unix wants argument order: "ln <existing> <new>"
190          buf[1] = FileUtil.makeShellPath(file, true); 
191          buf[2] = FileUtil.makeShellPath(linkName, true);
192          return buf;
193        }
194        
195        /*
196         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
197         */
198        @Override
199        String[] linkMult(String[] fileBaseNames, File linkDir) 
200        throws IOException {
201          String[] buf = new String[fileBaseNames.length 
202                                    + hardLinkMultPrefix.length 
203                                    + hardLinkMultSuffix.length];
204          int mark=0;
205          System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
206                           hardLinkMultPrefix.length);
207          mark += hardLinkMultPrefix.length;
208          System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
209          mark += fileBaseNames.length;
210          buf[mark] = FileUtil.makeShellPath(linkDir, true);
211          return buf;
212        }
213        
214        /*
215         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
216         */
217        @Override
218        String[] linkCount(File file) 
219        throws IOException {
220          String[] buf = new String[getLinkCountCommand.length];
221          System.arraycopy(getLinkCountCommand, 0, buf, 0, 
222                           getLinkCountCommand.length);
223          buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
224          return buf;
225        }
226        
227        /*
228         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
229         */
230        @Override
231        int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
232        throws IOException{
233          int sum = 0;
234          for (String x : fileBaseNames) {
235            // add 1 to account for terminal null or delimiter space
236            sum += 1 + ((x == null) ? 0 : x.length());
237          }
238          sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
239                 + FileUtil.makeShellPath(linkDir, true).length();
240          //add the fixed overhead of the hardLinkMult prefix and suffix
241          sum += 3; //length("ln") + 1
242          return sum;
243        }
244        
245        /*
246         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
247         */
248        @Override
249        int getMaxAllowedCmdArgLength() {
250          return maxAllowedCmdArgLength;
251        }
252      }
253      
254      
255      /**
256       * Implementation of HardLinkCommandGetter class for Windows
257       */
258      static class HardLinkCGWin extends HardLinkCommandGetter {
259        //The Windows command getter impl class and its member fields are
260        //package-private ("default") access instead of "private" to assist 
261        //unit testing (sort of) on non-Win servers
262    
263        static String[] hardLinkCommand = {
264                            Shell.WINUTILS,"hardlink","create", null, null};
265        static String[] hardLinkMultPrefix = {
266                            "cmd","/q","/c","for", "%f", "in", "("};
267        static String   hardLinkMultDir = "\\%f";
268        static String[] hardLinkMultSuffix = {
269                            ")", "do", Shell.WINUTILS, "hardlink", "create", null,
270                            "%f", "1>NUL"};
271        static String[] getLinkCountCommand = {
272                            Shell.WINUTILS, "hardlink",
273                            "stat", null};
274        //Windows guarantees only 8K - 1 bytes cmd length.
275        //Subtract another 64b to allow for Java 'exec' overhead
276        static final int maxAllowedCmdArgLength = 8*1024 - 65;
277    
278        /*
279         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
280         */
281        @Override
282        String[] linkOne(File file, File linkName) 
283        throws IOException {
284          String[] buf = new String[hardLinkCommand.length];
285          System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
286          //windows wants argument order: "create <new> <existing>"
287          buf[4] = file.getCanonicalPath(); 
288          buf[3] = linkName.getCanonicalPath();
289          return buf;
290        }
291        
292        /*
293         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
294         */
295        @Override
296        String[] linkMult(String[] fileBaseNames, File linkDir) 
297        throws IOException {
298          String[] buf = new String[fileBaseNames.length 
299                                    + hardLinkMultPrefix.length 
300                                    + hardLinkMultSuffix.length];
301          String td = linkDir.getCanonicalPath() + hardLinkMultDir;
302          int mark=0;
303          System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
304                           hardLinkMultPrefix.length);
305          mark += hardLinkMultPrefix.length;
306          System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
307          mark += fileBaseNames.length;
308          System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 
309                           hardLinkMultSuffix.length);
310          mark += hardLinkMultSuffix.length;
311          buf[mark - 3] = td;
312          return buf;
313        }
314        
315        /*
316         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
317         */
318        @Override
319        String[] linkCount(File file) 
320        throws IOException {
321          String[] buf = new String[getLinkCountCommand.length];
322          System.arraycopy(getLinkCountCommand, 0, buf, 0, 
323                           getLinkCountCommand.length);
324          buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
325          return buf;
326        }
327        
328        /*
329         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
330         */
331        @Override
332        int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
333        throws IOException {
334          int sum = 0;
335          for (String x : fileBaseNames) {
336            // add 1 to account for terminal null or delimiter space
337            sum += 1 + ((x == null) ? 0 : x.length());
338          }
339          sum += 2 + fileDir.getCanonicalPath().length() +
340                   linkDir.getCanonicalPath().length();
341          //add the fixed overhead of the hardLinkMult command 
342          //(prefix, suffix, and Dir suffix)
343          sum += ("cmd.exe /q /c for %f in ( ) do "
344                  + Shell.WINUTILS + " hardlink create \\%f %f 1>NUL ").length();
345          return sum;
346        }
347        
348        /*
349         * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
350         */
351        @Override
352        int getMaxAllowedCmdArgLength() {
353          return maxAllowedCmdArgLength;
354        }
355      }
356      
357      
358      /**
359       * Calculate the nominal length of all contributors to the total 
360       * commandstring length, including fixed overhead of the OS-dependent 
361       * command.  It's protected rather than private, to assist unit testing,
362       * but real clients are not expected to need it -- see the way 
363       * createHardLinkMult() uses it internally so the user doesn't need to worry
364       * about it.
365       * 
366       * @param fileDir - source directory, parent of fileBaseNames
367       * @param fileBaseNames - array of path-less file names, relative
368       *            to the source directory
369       * @param linkDir - target directory where the hardlinks will be put
370       * @return - total data length (must not exceed maxAllowedCmdArgLength)
371       * @throws IOException
372       */
373      protected static int getLinkMultArgLength(
374              File fileDir, String[] fileBaseNames, File linkDir) 
375      throws IOException {
376        return getHardLinkCommand.getLinkMultArgLength(fileDir, 
377              fileBaseNames, linkDir);
378      }
379      
380      /**
381       * Return this private value for use by unit tests.
382       * Shell commands are not allowed to have a total string length
383       * exceeding this size.
384       */
385      protected static int getMaxAllowedCmdArgLength() {
386        return getHardLinkCommand.getMaxAllowedCmdArgLength();
387      }
388      
389      /*
390       * ****************************************************
391       * Complexity is above.  User-visible functionality is below
392       * ****************************************************
393       */
394    
395      /**
396       * Creates a hardlink 
397       * @param file - existing source file
398       * @param linkName - desired target link file
399       */
400      public static void createHardLink(File file, File linkName) 
401      throws IOException {
402        if (file == null) {
403          throw new IOException(
404              "invalid arguments to createHardLink: source file is null");
405        }
406        if (linkName == null) {
407          throw new IOException(
408              "invalid arguments to createHardLink: link name is null");
409        }
410              // construct and execute shell command
411        String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
412        Process process = Runtime.getRuntime().exec(hardLinkCommand);
413        try {
414          if (process.waitFor() != 0) {
415            String errMsg = new BufferedReader(new InputStreamReader(
416                process.getInputStream())).readLine();
417            if (errMsg == null)  errMsg = "";
418            String inpMsg = new BufferedReader(new InputStreamReader(
419                process.getErrorStream())).readLine();
420            if (inpMsg == null)  inpMsg = "";
421            throw new IOException(errMsg + inpMsg);
422          }
423        } catch (InterruptedException e) {
424          throw new IOException(e);
425        } finally {
426          process.destroy();
427        }
428      }
429    
430      /**
431       * Creates hardlinks from multiple existing files within one parent
432       * directory, into one target directory.
433       * @param parentDir - directory containing source files
434       * @param fileBaseNames - list of path-less file names, as returned by 
435       *                        parentDir.list()
436       * @param linkDir - where the hardlinks should be put.  It must already exist.
437       * 
438       * If the list of files is too long (overflows maxAllowedCmdArgLength),
439       * we will automatically split it into multiple invocations of the
440       * underlying method.
441       */
442      public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 
443          File linkDir) throws IOException {
444        //This is the public method all non-test clients are expected to use.
445        //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
446        createHardLinkMult(parentDir, fileBaseNames, linkDir, 
447                           getHardLinkCommand.getMaxAllowedCmdArgLength());
448      }
449    
450      /*
451       * Implements {@link createHardLinkMult} with added variable  "maxLength",
452       * to ease unit testing of the auto-splitting feature for long lists.
453       * Likewise why it returns "callCount", the number of sub-arrays that
454       * the file list had to be split into.
455       * Non-test clients are expected to call the public method instead.
456       */
457      protected static int createHardLinkMult(File parentDir, 
458          String[] fileBaseNames, File linkDir, int maxLength) 
459      throws IOException {
460        if (parentDir == null) {
461          throw new IOException(
462              "invalid arguments to createHardLinkMult: parent directory is null");
463        }
464        if (linkDir == null) {
465          throw new IOException(
466              "invalid arguments to createHardLinkMult: link directory is null");
467        }
468        if (fileBaseNames == null) {
469          throw new IOException(
470              "invalid arguments to createHardLinkMult: "
471              + "filename list can be empty but not null");
472        }
473        if (fileBaseNames.length == 0) {
474          //the OS cmds can't handle empty list of filenames, 
475          //but it's legal, so just return.
476          return 0; 
477        }
478        if (!linkDir.exists()) {
479          throw new FileNotFoundException(linkDir + " not found.");
480        }
481    
482        //if the list is too long, split into multiple invocations
483        int callCount = 0;
484        if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
485              && fileBaseNames.length > 1) {
486          String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
487          callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
488          String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
489              fileBaseNames.length);
490          callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);  
491          return callCount;
492        } else {
493          callCount = 1;
494        }
495        
496        // construct and execute shell command
497        String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 
498            linkDir);
499        Process process = Runtime.getRuntime().exec(hardLinkCommand, null, 
500            parentDir);
501        try {
502          if (process.waitFor() != 0) {
503            String errMsg = new BufferedReader(new InputStreamReader(
504                process.getInputStream())).readLine();
505            if (errMsg == null)  errMsg = "";
506            String inpMsg = new BufferedReader(new InputStreamReader(
507                process.getErrorStream())).readLine();
508            if (inpMsg == null)  inpMsg = "";
509            throw new IOException(errMsg + inpMsg);
510          }
511        } catch (InterruptedException e) {
512          throw new IOException(e);
513        } finally {
514          process.destroy();
515        }
516        return callCount;
517      }
518    
519       /**
520       * Retrieves the number of links to the specified file.
521       */
522      public static int getLinkCount(File fileName) throws IOException {
523        if (fileName == null) {
524          throw new IOException(
525              "invalid argument to getLinkCount: file name is null");
526        }
527        if (!fileName.exists()) {
528          throw new FileNotFoundException(fileName + " not found.");
529        }
530    
531        // construct and execute shell command
532        String[] cmd = getHardLinkCommand.linkCount(fileName);
533        String inpMsg = null;
534        String errMsg = null;
535        int exitValue = -1;
536        BufferedReader in = null;
537        BufferedReader err = null;
538    
539        Process process = Runtime.getRuntime().exec(cmd);
540        try {
541          exitValue = process.waitFor();
542          in = new BufferedReader(new InputStreamReader(
543                                      process.getInputStream()));
544          inpMsg = in.readLine();
545          err = new BufferedReader(new InputStreamReader(
546                                       process.getErrorStream()));
547          errMsg = err.readLine();
548          if (inpMsg == null || exitValue != 0) {
549            throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
550          }
551          if (osType == OSType.OS_TYPE_SOLARIS) {
552            String[] result = inpMsg.split("\\s+");
553            return Integer.parseInt(result[1]);
554          } else {
555            return Integer.parseInt(inpMsg);
556          }
557        } catch (NumberFormatException e) {
558          throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
559        } catch (InterruptedException e) {
560          throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
561        } finally {
562          process.destroy();
563          if (in != null) in.close();
564          if (err != null) err.close();
565        }
566      }
567      
568      /* Create an IOException for failing to get link count. */
569      private static IOException createIOException(File f, String message,
570          String error, int exitvalue, Exception cause) {
571    
572        final String s = "Failed to get link count on file " + f
573            + ": message=" + message
574            + "; error=" + error
575            + "; exit value=" + exitvalue;
576        return (cause == null) ? new IOException(s) : new IOException(s, cause);
577      }
578      
579      
580      /**
581       * HardLink statistics counters and methods.
582       * Not multi-thread safe, obviously.
583       * Init is called during HardLink instantiation, above.
584       * 
585       * These are intended for use by knowledgeable clients, not internally, 
586       * because many of the internal methods are static and can't update these
587       * per-instance counters.
588       */
589      public static class LinkStats {
590        public int countDirs = 0; 
591        public int countSingleLinks = 0; 
592        public int countMultLinks = 0; 
593        public int countFilesMultLinks = 0; 
594        public int countEmptyDirs = 0; 
595        public int countPhysicalFileCopies = 0;
596      
597        public void clear() {
598          countDirs = 0; 
599          countSingleLinks = 0; 
600          countMultLinks = 0; 
601          countFilesMultLinks = 0; 
602          countEmptyDirs = 0; 
603          countPhysicalFileCopies = 0;
604        }
605        
606        public String report() {
607          return "HardLinkStats: " + countDirs + " Directories, including " 
608          + countEmptyDirs + " Empty Directories, " 
609          + countSingleLinks 
610          + " single Link operations, " + countMultLinks 
611          + " multi-Link operations, linking " + countFilesMultLinks 
612          + " files, total " + (countSingleLinks + countFilesMultLinks) 
613          + " linkable files.  Also physically copied " 
614          + countPhysicalFileCopies + " other files.";
615        }
616      }
617    }
618