001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.fs;
020
021 import java.io.*;
022 import java.util.Arrays;
023 import java.util.Enumeration;
024 import java.util.zip.ZipEntry;
025 import java.util.zip.ZipFile;
026
027 import org.apache.hadoop.classification.InterfaceAudience;
028 import org.apache.hadoop.classification.InterfaceStability;
029 import org.apache.hadoop.conf.Configuration;
030 import org.apache.hadoop.io.IOUtils;
031 import org.apache.hadoop.util.Shell;
032 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
033
034 import org.apache.commons.logging.Log;
035 import org.apache.commons.logging.LogFactory;
036
037 /**
038 * A collection of file-processing util methods
039 */
040 @InterfaceAudience.Public
041 @InterfaceStability.Evolving
042 public class FileUtil {
043
044 private static final Log LOG = LogFactory.getLog(FileUtil.class);
045
046 /**
047 * convert an array of FileStatus to an array of Path
048 *
049 * @param stats
050 * an array of FileStatus objects
051 * @return an array of paths corresponding to the input
052 */
053 public static Path[] stat2Paths(FileStatus[] stats) {
054 if (stats == null)
055 return null;
056 Path[] ret = new Path[stats.length];
057 for (int i = 0; i < stats.length; ++i) {
058 ret[i] = stats[i].getPath();
059 }
060 return ret;
061 }
062
063 /**
064 * convert an array of FileStatus to an array of Path.
065 * If stats if null, return path
066 * @param stats
067 * an array of FileStatus objects
068 * @param path
069 * default path to return in stats is null
070 * @return an array of paths corresponding to the input
071 */
072 public static Path[] stat2Paths(FileStatus[] stats, Path path) {
073 if (stats == null)
074 return new Path[]{path};
075 else
076 return stat2Paths(stats);
077 }
078
079 /**
080 * Delete a directory and all its contents. If
081 * we return false, the directory may be partially-deleted.
082 * (1) If dir is symlink to a file, the symlink is deleted. The file pointed
083 * to by the symlink is not deleted.
084 * (2) If dir is symlink to a directory, symlink is deleted. The directory
085 * pointed to by symlink is not deleted.
086 * (3) If dir is a normal file, it is deleted.
087 * (4) If dir is a normal directory, then dir and all its contents recursively
088 * are deleted.
089 */
090 public static boolean fullyDelete(File dir) {
091 if (dir.delete()) {
092 // dir is (a) normal file, (b) symlink to a file, (c) empty directory or
093 // (d) symlink to a directory
094 return true;
095 }
096
097 // handle nonempty directory deletion
098 if (!fullyDeleteContents(dir)) {
099 return false;
100 }
101 return dir.delete();
102 }
103
104 /**
105 * Delete the contents of a directory, not the directory itself. If
106 * we return false, the directory may be partially-deleted.
107 * If dir is a symlink to a directory, all the contents of the actual
108 * directory pointed to by dir will be deleted.
109 */
110 public static boolean fullyDeleteContents(File dir) {
111 boolean deletionSucceeded = true;
112 File contents[] = dir.listFiles();
113 if (contents != null) {
114 for (int i = 0; i < contents.length; i++) {
115 if (contents[i].isFile()) {
116 if (!contents[i].delete()) {// normal file or symlink to another file
117 deletionSucceeded = false;
118 continue; // continue deletion of other files/dirs under dir
119 }
120 } else {
121 // Either directory or symlink to another directory.
122 // Try deleting the directory as this might be a symlink
123 boolean b = false;
124 b = contents[i].delete();
125 if (b){
126 //this was indeed a symlink or an empty directory
127 continue;
128 }
129 // if not an empty directory or symlink let
130 // fullydelete handle it.
131 if (!fullyDelete(contents[i])) {
132 deletionSucceeded = false;
133 continue; // continue deletion of other files/dirs under dir
134 }
135 }
136 }
137 }
138 return deletionSucceeded;
139 }
140
141 /**
142 * Recursively delete a directory.
143 *
144 * @param fs {@link FileSystem} on which the path is present
145 * @param dir directory to recursively delete
146 * @throws IOException
147 * @deprecated Use {@link FileSystem#delete(Path, boolean)}
148 */
149 @Deprecated
150 public static void fullyDelete(FileSystem fs, Path dir)
151 throws IOException {
152 fs.delete(dir, true);
153 }
154
155 //
156 // If the destination is a subdirectory of the source, then
157 // generate exception
158 //
159 private static void checkDependencies(FileSystem srcFS,
160 Path src,
161 FileSystem dstFS,
162 Path dst)
163 throws IOException {
164 if (srcFS == dstFS) {
165 String srcq = src.makeQualified(srcFS).toString() + Path.SEPARATOR;
166 String dstq = dst.makeQualified(dstFS).toString() + Path.SEPARATOR;
167 if (dstq.startsWith(srcq)) {
168 if (srcq.length() == dstq.length()) {
169 throw new IOException("Cannot copy " + src + " to itself.");
170 } else {
171 throw new IOException("Cannot copy " + src + " to its subdirectory " +
172 dst);
173 }
174 }
175 }
176 }
177
178 /** Copy files between FileSystems. */
179 public static boolean copy(FileSystem srcFS, Path src,
180 FileSystem dstFS, Path dst,
181 boolean deleteSource,
182 Configuration conf) throws IOException {
183 return copy(srcFS, src, dstFS, dst, deleteSource, true, conf);
184 }
185
186 public static boolean copy(FileSystem srcFS, Path[] srcs,
187 FileSystem dstFS, Path dst,
188 boolean deleteSource,
189 boolean overwrite, Configuration conf)
190 throws IOException {
191 boolean gotException = false;
192 boolean returnVal = true;
193 StringBuilder exceptions = new StringBuilder();
194
195 if (srcs.length == 1)
196 return copy(srcFS, srcs[0], dstFS, dst, deleteSource, overwrite, conf);
197
198 // Check if dest is directory
199 if (!dstFS.exists(dst)) {
200 throw new IOException("`" + dst +"': specified destination directory " +
201 "doest not exist");
202 } else {
203 FileStatus sdst = dstFS.getFileStatus(dst);
204 if (!sdst.isDirectory())
205 throw new IOException("copying multiple files, but last argument `" +
206 dst + "' is not a directory");
207 }
208
209 for (Path src : srcs) {
210 try {
211 if (!copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf))
212 returnVal = false;
213 } catch (IOException e) {
214 gotException = true;
215 exceptions.append(e.getMessage());
216 exceptions.append("\n");
217 }
218 }
219 if (gotException) {
220 throw new IOException(exceptions.toString());
221 }
222 return returnVal;
223 }
224
225 /** Copy files between FileSystems. */
226 public static boolean copy(FileSystem srcFS, Path src,
227 FileSystem dstFS, Path dst,
228 boolean deleteSource,
229 boolean overwrite,
230 Configuration conf) throws IOException {
231 FileStatus fileStatus = srcFS.getFileStatus(src);
232 return copy(srcFS, fileStatus, dstFS, dst, deleteSource, overwrite, conf);
233 }
234
235 /** Copy files between FileSystems. */
236 private static boolean copy(FileSystem srcFS, FileStatus srcStatus,
237 FileSystem dstFS, Path dst,
238 boolean deleteSource,
239 boolean overwrite,
240 Configuration conf) throws IOException {
241 Path src = srcStatus.getPath();
242 dst = checkDest(src.getName(), dstFS, dst, overwrite);
243 if (srcStatus.isDirectory()) {
244 checkDependencies(srcFS, src, dstFS, dst);
245 if (!dstFS.mkdirs(dst)) {
246 return false;
247 }
248 FileStatus contents[] = srcFS.listStatus(src);
249 for (int i = 0; i < contents.length; i++) {
250 copy(srcFS, contents[i], dstFS,
251 new Path(dst, contents[i].getPath().getName()),
252 deleteSource, overwrite, conf);
253 }
254 } else {
255 InputStream in=null;
256 OutputStream out = null;
257 try {
258 in = srcFS.open(src);
259 out = dstFS.create(dst, overwrite);
260 IOUtils.copyBytes(in, out, conf, true);
261 } catch (IOException e) {
262 IOUtils.closeStream(out);
263 IOUtils.closeStream(in);
264 throw e;
265 }
266 }
267 if (deleteSource) {
268 return srcFS.delete(src, true);
269 } else {
270 return true;
271 }
272
273 }
274
275 /** Copy all files in a directory to one output file (merge). */
276 public static boolean copyMerge(FileSystem srcFS, Path srcDir,
277 FileSystem dstFS, Path dstFile,
278 boolean deleteSource,
279 Configuration conf, String addString) throws IOException {
280 dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false);
281
282 if (!srcFS.getFileStatus(srcDir).isDirectory())
283 return false;
284
285 OutputStream out = dstFS.create(dstFile);
286
287 try {
288 FileStatus contents[] = srcFS.listStatus(srcDir);
289 Arrays.sort(contents);
290 for (int i = 0; i < contents.length; i++) {
291 if (contents[i].isFile()) {
292 InputStream in = srcFS.open(contents[i].getPath());
293 try {
294 IOUtils.copyBytes(in, out, conf, false);
295 if (addString!=null)
296 out.write(addString.getBytes("UTF-8"));
297
298 } finally {
299 in.close();
300 }
301 }
302 }
303 } finally {
304 out.close();
305 }
306
307
308 if (deleteSource) {
309 return srcFS.delete(srcDir, true);
310 } else {
311 return true;
312 }
313 }
314
315 /** Copy local files to a FileSystem. */
316 public static boolean copy(File src,
317 FileSystem dstFS, Path dst,
318 boolean deleteSource,
319 Configuration conf) throws IOException {
320 dst = checkDest(src.getName(), dstFS, dst, false);
321
322 if (src.isDirectory()) {
323 if (!dstFS.mkdirs(dst)) {
324 return false;
325 }
326 File contents[] = listFiles(src);
327 for (int i = 0; i < contents.length; i++) {
328 copy(contents[i], dstFS, new Path(dst, contents[i].getName()),
329 deleteSource, conf);
330 }
331 } else if (src.isFile()) {
332 InputStream in = null;
333 OutputStream out =null;
334 try {
335 in = new FileInputStream(src);
336 out = dstFS.create(dst);
337 IOUtils.copyBytes(in, out, conf);
338 } catch (IOException e) {
339 IOUtils.closeStream( out );
340 IOUtils.closeStream( in );
341 throw e;
342 }
343 } else {
344 throw new IOException(src.toString() +
345 ": No such file or directory");
346 }
347 if (deleteSource) {
348 return FileUtil.fullyDelete(src);
349 } else {
350 return true;
351 }
352 }
353
354 /** Copy FileSystem files to local files. */
355 public static boolean copy(FileSystem srcFS, Path src,
356 File dst, boolean deleteSource,
357 Configuration conf) throws IOException {
358 FileStatus filestatus = srcFS.getFileStatus(src);
359 return copy(srcFS, filestatus, dst, deleteSource, conf);
360 }
361
362 /** Copy FileSystem files to local files. */
363 private static boolean copy(FileSystem srcFS, FileStatus srcStatus,
364 File dst, boolean deleteSource,
365 Configuration conf) throws IOException {
366 Path src = srcStatus.getPath();
367 if (srcStatus.isDirectory()) {
368 if (!dst.mkdirs()) {
369 return false;
370 }
371 FileStatus contents[] = srcFS.listStatus(src);
372 for (int i = 0; i < contents.length; i++) {
373 copy(srcFS, contents[i],
374 new File(dst, contents[i].getPath().getName()),
375 deleteSource, conf);
376 }
377 } else {
378 InputStream in = srcFS.open(src);
379 IOUtils.copyBytes(in, new FileOutputStream(dst), conf);
380 }
381 if (deleteSource) {
382 return srcFS.delete(src, true);
383 } else {
384 return true;
385 }
386 }
387
388 private static Path checkDest(String srcName, FileSystem dstFS, Path dst,
389 boolean overwrite) throws IOException {
390 if (dstFS.exists(dst)) {
391 FileStatus sdst = dstFS.getFileStatus(dst);
392 if (sdst.isDirectory()) {
393 if (null == srcName) {
394 throw new IOException("Target " + dst + " is a directory");
395 }
396 return checkDest(null, dstFS, new Path(dst, srcName), overwrite);
397 } else if (!overwrite) {
398 throw new IOException("Target " + dst + " already exists");
399 }
400 }
401 return dst;
402 }
403
404 /**
405 * This class is only used on windows to invoke the cygpath command.
406 */
407 private static class CygPathCommand extends Shell {
408 String[] command;
409 String result;
410 CygPathCommand(String path) throws IOException {
411 command = new String[]{"cygpath", "-u", path};
412 run();
413 }
414 String getResult() throws IOException {
415 return result;
416 }
417 @Override
418 protected String[] getExecString() {
419 return command;
420 }
421 @Override
422 protected void parseExecResult(BufferedReader lines) throws IOException {
423 String line = lines.readLine();
424 if (line == null) {
425 throw new IOException("Can't convert '" + command[2] +
426 " to a cygwin path");
427 }
428 result = line;
429 }
430 }
431
432 /**
433 * Convert a os-native filename to a path that works for the shell.
434 * @param filename The filename to convert
435 * @return The unix pathname
436 * @throws IOException on windows, there can be problems with the subprocess
437 */
438 public static String makeShellPath(String filename) throws IOException {
439 if (Path.WINDOWS) {
440 return new CygPathCommand(filename).getResult();
441 } else {
442 return filename;
443 }
444 }
445
446 /**
447 * Convert a os-native filename to a path that works for the shell.
448 * @param file The filename to convert
449 * @return The unix pathname
450 * @throws IOException on windows, there can be problems with the subprocess
451 */
452 public static String makeShellPath(File file) throws IOException {
453 return makeShellPath(file, false);
454 }
455
456 /**
457 * Convert a os-native filename to a path that works for the shell.
458 * @param file The filename to convert
459 * @param makeCanonicalPath
460 * Whether to make canonical path for the file passed
461 * @return The unix pathname
462 * @throws IOException on windows, there can be problems with the subprocess
463 */
464 public static String makeShellPath(File file, boolean makeCanonicalPath)
465 throws IOException {
466 if (makeCanonicalPath) {
467 return makeShellPath(file.getCanonicalPath());
468 } else {
469 return makeShellPath(file.toString());
470 }
471 }
472
473 /**
474 * Takes an input dir and returns the du on that local directory. Very basic
475 * implementation.
476 *
477 * @param dir
478 * The input dir to get the disk space of this local dir
479 * @return The total disk space of the input local directory
480 */
481 public static long getDU(File dir) {
482 long size = 0;
483 if (!dir.exists())
484 return 0;
485 if (!dir.isDirectory()) {
486 return dir.length();
487 } else {
488 File[] allFiles = dir.listFiles();
489 if(allFiles != null) {
490 for (int i = 0; i < allFiles.length; i++) {
491 boolean isSymLink;
492 try {
493 isSymLink = org.apache.commons.io.FileUtils.isSymlink(allFiles[i]);
494 } catch(IOException ioe) {
495 isSymLink = true;
496 }
497 if(!isSymLink) {
498 size += getDU(allFiles[i]);
499 }
500 }
501 }
502 return size;
503 }
504 }
505
506 /**
507 * Given a File input it will unzip the file in a the unzip directory
508 * passed as the second parameter
509 * @param inFile The zip file as input
510 * @param unzipDir The unzip directory where to unzip the zip file.
511 * @throws IOException
512 */
513 public static void unZip(File inFile, File unzipDir) throws IOException {
514 Enumeration<? extends ZipEntry> entries;
515 ZipFile zipFile = new ZipFile(inFile);
516
517 try {
518 entries = zipFile.entries();
519 while (entries.hasMoreElements()) {
520 ZipEntry entry = entries.nextElement();
521 if (!entry.isDirectory()) {
522 InputStream in = zipFile.getInputStream(entry);
523 try {
524 File file = new File(unzipDir, entry.getName());
525 if (!file.getParentFile().mkdirs()) {
526 if (!file.getParentFile().isDirectory()) {
527 throw new IOException("Mkdirs failed to create " +
528 file.getParentFile().toString());
529 }
530 }
531 OutputStream out = new FileOutputStream(file);
532 try {
533 byte[] buffer = new byte[8192];
534 int i;
535 while ((i = in.read(buffer)) != -1) {
536 out.write(buffer, 0, i);
537 }
538 } finally {
539 out.close();
540 }
541 } finally {
542 in.close();
543 }
544 }
545 }
546 } finally {
547 zipFile.close();
548 }
549 }
550
551 /**
552 * Given a Tar File as input it will untar the file in a the untar directory
553 * passed as the second parameter
554 *
555 * This utility will untar ".tar" files and ".tar.gz","tgz" files.
556 *
557 * @param inFile The tar file as input.
558 * @param untarDir The untar directory where to untar the tar file.
559 * @throws IOException
560 */
561 public static void unTar(File inFile, File untarDir) throws IOException {
562 if (!untarDir.mkdirs()) {
563 if (!untarDir.isDirectory()) {
564 throw new IOException("Mkdirs failed to create " + untarDir);
565 }
566 }
567
568 StringBuilder untarCommand = new StringBuilder();
569 boolean gzipped = inFile.toString().endsWith("gz");
570 if (gzipped) {
571 untarCommand.append(" gzip -dc '");
572 untarCommand.append(FileUtil.makeShellPath(inFile));
573 untarCommand.append("' | (");
574 }
575 untarCommand.append("cd '");
576 untarCommand.append(FileUtil.makeShellPath(untarDir));
577 untarCommand.append("' ; ");
578 untarCommand.append("tar -xf ");
579
580 if (gzipped) {
581 untarCommand.append(" -)");
582 } else {
583 untarCommand.append(FileUtil.makeShellPath(inFile));
584 }
585 String[] shellCmd = { "bash", "-c", untarCommand.toString() };
586 ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd);
587 shexec.execute();
588 int exitcode = shexec.getExitCode();
589 if (exitcode != 0) {
590 throw new IOException("Error untarring file " + inFile +
591 ". Tar process exited with exit code " + exitcode);
592 }
593 }
594
595 /**
596 * Class for creating hardlinks.
597 * Supports Unix, Cygwin, WindXP.
598 * @deprecated Use {@link org.apache.hadoop.fs.HardLink}
599 */
600 @Deprecated
601 public static class HardLink extends org.apache.hadoop.fs.HardLink {
602 // This is a stub to assist with coordinated change between
603 // COMMON and HDFS projects. It will be removed after the
604 // corresponding change is committed to HDFS.
605 }
606
607 /**
608 * Create a soft link between a src and destination
609 * only on a local disk. HDFS does not support this
610 * @param target the target for symlink
611 * @param linkname the symlink
612 * @return value returned by the command
613 */
614 public static int symLink(String target, String linkname) throws IOException{
615 String cmd = "ln -s " + target + " " + linkname;
616 Process p = Runtime.getRuntime().exec(cmd, null);
617 int returnVal = -1;
618 try{
619 returnVal = p.waitFor();
620 } catch(InterruptedException e){
621 //do nothing as of yet
622 }
623 return returnVal;
624 }
625
626 /**
627 * Change the permissions on a filename.
628 * @param filename the name of the file to change
629 * @param perm the permission string
630 * @return the exit code from the command
631 * @throws IOException
632 * @throws InterruptedException
633 */
634 public static int chmod(String filename, String perm
635 ) throws IOException, InterruptedException {
636 return chmod(filename, perm, false);
637 }
638
639 /**
640 * Change the permissions on a file / directory, recursively, if
641 * needed.
642 * @param filename name of the file whose permissions are to change
643 * @param perm permission string
644 * @param recursive true, if permissions should be changed recursively
645 * @return the exit code from the command.
646 * @throws IOException
647 * @throws InterruptedException
648 */
649 public static int chmod(String filename, String perm, boolean recursive)
650 throws IOException, InterruptedException {
651 StringBuilder cmdBuf = new StringBuilder();
652 cmdBuf.append("chmod ");
653 if (recursive) {
654 cmdBuf.append("-R ");
655 }
656 cmdBuf.append(perm).append(" ");
657 cmdBuf.append(filename);
658 String[] shellCmd = {"bash", "-c" ,cmdBuf.toString()};
659 ShellCommandExecutor shExec = new ShellCommandExecutor(shellCmd);
660 try {
661 shExec.execute();
662 }catch(Exception e) {
663 if (LOG.isDebugEnabled()) {
664 LOG.debug("Error while changing permission : " + filename
665 + " Exception: ", e);
666 }
667 }
668 return shExec.getExitCode();
669 }
670
671 /**
672 * Create a tmp file for a base file.
673 * @param basefile the base file of the tmp
674 * @param prefix file name prefix of tmp
675 * @param isDeleteOnExit if true, the tmp will be deleted when the VM exits
676 * @return a newly created tmp file
677 * @exception IOException If a tmp file cannot created
678 * @see java.io.File#createTempFile(String, String, File)
679 * @see java.io.File#deleteOnExit()
680 */
681 public static final File createLocalTempFile(final File basefile,
682 final String prefix,
683 final boolean isDeleteOnExit)
684 throws IOException {
685 File tmp = File.createTempFile(prefix + basefile.getName(),
686 "", basefile.getParentFile());
687 if (isDeleteOnExit) {
688 tmp.deleteOnExit();
689 }
690 return tmp;
691 }
692
693 /**
694 * Move the src file to the name specified by target.
695 * @param src the source file
696 * @param target the target file
697 * @exception IOException If this operation fails
698 */
699 public static void replaceFile(File src, File target) throws IOException {
700 /* renameTo() has two limitations on Windows platform.
701 * src.renameTo(target) fails if
702 * 1) If target already exists OR
703 * 2) If target is already open for reading/writing.
704 */
705 if (!src.renameTo(target)) {
706 int retries = 5;
707 while (target.exists() && !target.delete() && retries-- >= 0) {
708 try {
709 Thread.sleep(1000);
710 } catch (InterruptedException e) {
711 throw new IOException("replaceFile interrupted.");
712 }
713 }
714 if (!src.renameTo(target)) {
715 throw new IOException("Unable to rename " + src +
716 " to " + target);
717 }
718 }
719 }
720
721 /**
722 * A wrapper for {@link File#listFiles()}. This java.io API returns null
723 * when a dir is not a directory or for any I/O error. Instead of having
724 * null check everywhere File#listFiles() is used, we will add utility API
725 * to get around this problem. For the majority of cases where we prefer
726 * an IOException to be thrown.
727 * @param dir directory for which listing should be performed
728 * @return list of files or empty list
729 * @exception IOException for invalid directory or for a bad disk.
730 */
731 public static File[] listFiles(File dir) throws IOException {
732 File[] files = dir.listFiles();
733 if(files == null) {
734 throw new IOException("Invalid directory or I/O error occurred for dir: "
735 + dir.toString());
736 }
737 return files;
738 }
739
740 /**
741 * A wrapper for {@link File#list()}. This java.io API returns null
742 * when a dir is not a directory or for any I/O error. Instead of having
743 * null check everywhere File#list() is used, we will add utility API
744 * to get around this problem. For the majority of cases where we prefer
745 * an IOException to be thrown.
746 * @param dir directory for which listing should be performed
747 * @return list of file names or empty string list
748 * @exception IOException for invalid directory or for a bad disk.
749 */
750 public static String[] list(File dir) throws IOException {
751 String[] fileNames = dir.list();
752 if(fileNames == null) {
753 throw new IOException("Invalid directory or I/O error occurred for dir: "
754 + dir.toString());
755 }
756 return fileNames;
757 }
758 }