001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.fs; 020 021 import java.io.*; 022 import java.util.Arrays; 023 import java.util.Enumeration; 024 import java.util.zip.ZipEntry; 025 import java.util.zip.ZipFile; 026 027 import org.apache.hadoop.classification.InterfaceAudience; 028 import org.apache.hadoop.classification.InterfaceStability; 029 import org.apache.hadoop.conf.Configuration; 030 import org.apache.hadoop.io.IOUtils; 031 import org.apache.hadoop.util.Shell; 032 import org.apache.hadoop.util.Shell.ShellCommandExecutor; 033 034 import org.apache.commons.logging.Log; 035 import org.apache.commons.logging.LogFactory; 036 037 /** 038 * A collection of file-processing util methods 039 */ 040 @InterfaceAudience.Public 041 @InterfaceStability.Evolving 042 public class FileUtil { 043 044 private static final Log LOG = LogFactory.getLog(FileUtil.class); 045 046 /** 047 * convert an array of FileStatus to an array of Path 048 * 049 * @param stats 050 * an array of FileStatus objects 051 * @return an array of paths corresponding to the input 052 */ 053 public static Path[] stat2Paths(FileStatus[] stats) { 054 if (stats == null) 055 return null; 056 Path[] ret = new Path[stats.length]; 057 for (int i = 0; i < stats.length; ++i) { 058 ret[i] = stats[i].getPath(); 059 } 060 return ret; 061 } 062 063 /** 064 * convert an array of FileStatus to an array of Path. 065 * If stats if null, return path 066 * @param stats 067 * an array of FileStatus objects 068 * @param path 069 * default path to return in stats is null 070 * @return an array of paths corresponding to the input 071 */ 072 public static Path[] stat2Paths(FileStatus[] stats, Path path) { 073 if (stats == null) 074 return new Path[]{path}; 075 else 076 return stat2Paths(stats); 077 } 078 079 /** 080 * Delete a directory and all its contents. If 081 * we return false, the directory may be partially-deleted. 082 * (1) If dir is symlink to a file, the symlink is deleted. The file pointed 083 * to by the symlink is not deleted. 084 * (2) If dir is symlink to a directory, symlink is deleted. The directory 085 * pointed to by symlink is not deleted. 086 * (3) If dir is a normal file, it is deleted. 087 * (4) If dir is a normal directory, then dir and all its contents recursively 088 * are deleted. 089 */ 090 public static boolean fullyDelete(File dir) { 091 if (dir.delete()) { 092 // dir is (a) normal file, (b) symlink to a file, (c) empty directory or 093 // (d) symlink to a directory 094 return true; 095 } 096 097 // handle nonempty directory deletion 098 if (!fullyDeleteContents(dir)) { 099 return false; 100 } 101 return dir.delete(); 102 } 103 104 /** 105 * Delete the contents of a directory, not the directory itself. If 106 * we return false, the directory may be partially-deleted. 107 * If dir is a symlink to a directory, all the contents of the actual 108 * directory pointed to by dir will be deleted. 109 */ 110 public static boolean fullyDeleteContents(File dir) { 111 boolean deletionSucceeded = true; 112 File contents[] = dir.listFiles(); 113 if (contents != null) { 114 for (int i = 0; i < contents.length; i++) { 115 if (contents[i].isFile()) { 116 if (!contents[i].delete()) {// normal file or symlink to another file 117 deletionSucceeded = false; 118 continue; // continue deletion of other files/dirs under dir 119 } 120 } else { 121 // Either directory or symlink to another directory. 122 // Try deleting the directory as this might be a symlink 123 boolean b = false; 124 b = contents[i].delete(); 125 if (b){ 126 //this was indeed a symlink or an empty directory 127 continue; 128 } 129 // if not an empty directory or symlink let 130 // fullydelete handle it. 131 if (!fullyDelete(contents[i])) { 132 deletionSucceeded = false; 133 continue; // continue deletion of other files/dirs under dir 134 } 135 } 136 } 137 } 138 return deletionSucceeded; 139 } 140 141 /** 142 * Recursively delete a directory. 143 * 144 * @param fs {@link FileSystem} on which the path is present 145 * @param dir directory to recursively delete 146 * @throws IOException 147 * @deprecated Use {@link FileSystem#delete(Path, boolean)} 148 */ 149 @Deprecated 150 public static void fullyDelete(FileSystem fs, Path dir) 151 throws IOException { 152 fs.delete(dir, true); 153 } 154 155 // 156 // If the destination is a subdirectory of the source, then 157 // generate exception 158 // 159 private static void checkDependencies(FileSystem srcFS, 160 Path src, 161 FileSystem dstFS, 162 Path dst) 163 throws IOException { 164 if (srcFS == dstFS) { 165 String srcq = src.makeQualified(srcFS).toString() + Path.SEPARATOR; 166 String dstq = dst.makeQualified(dstFS).toString() + Path.SEPARATOR; 167 if (dstq.startsWith(srcq)) { 168 if (srcq.length() == dstq.length()) { 169 throw new IOException("Cannot copy " + src + " to itself."); 170 } else { 171 throw new IOException("Cannot copy " + src + " to its subdirectory " + 172 dst); 173 } 174 } 175 } 176 } 177 178 /** Copy files between FileSystems. */ 179 public static boolean copy(FileSystem srcFS, Path src, 180 FileSystem dstFS, Path dst, 181 boolean deleteSource, 182 Configuration conf) throws IOException { 183 return copy(srcFS, src, dstFS, dst, deleteSource, true, conf); 184 } 185 186 public static boolean copy(FileSystem srcFS, Path[] srcs, 187 FileSystem dstFS, Path dst, 188 boolean deleteSource, 189 boolean overwrite, Configuration conf) 190 throws IOException { 191 boolean gotException = false; 192 boolean returnVal = true; 193 StringBuilder exceptions = new StringBuilder(); 194 195 if (srcs.length == 1) 196 return copy(srcFS, srcs[0], dstFS, dst, deleteSource, overwrite, conf); 197 198 // Check if dest is directory 199 if (!dstFS.exists(dst)) { 200 throw new IOException("`" + dst +"': specified destination directory " + 201 "doest not exist"); 202 } else { 203 FileStatus sdst = dstFS.getFileStatus(dst); 204 if (!sdst.isDirectory()) 205 throw new IOException("copying multiple files, but last argument `" + 206 dst + "' is not a directory"); 207 } 208 209 for (Path src : srcs) { 210 try { 211 if (!copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf)) 212 returnVal = false; 213 } catch (IOException e) { 214 gotException = true; 215 exceptions.append(e.getMessage()); 216 exceptions.append("\n"); 217 } 218 } 219 if (gotException) { 220 throw new IOException(exceptions.toString()); 221 } 222 return returnVal; 223 } 224 225 /** Copy files between FileSystems. */ 226 public static boolean copy(FileSystem srcFS, Path src, 227 FileSystem dstFS, Path dst, 228 boolean deleteSource, 229 boolean overwrite, 230 Configuration conf) throws IOException { 231 FileStatus fileStatus = srcFS.getFileStatus(src); 232 return copy(srcFS, fileStatus, dstFS, dst, deleteSource, overwrite, conf); 233 } 234 235 /** Copy files between FileSystems. */ 236 private static boolean copy(FileSystem srcFS, FileStatus srcStatus, 237 FileSystem dstFS, Path dst, 238 boolean deleteSource, 239 boolean overwrite, 240 Configuration conf) throws IOException { 241 Path src = srcStatus.getPath(); 242 dst = checkDest(src.getName(), dstFS, dst, overwrite); 243 if (srcStatus.isDirectory()) { 244 checkDependencies(srcFS, src, dstFS, dst); 245 if (!dstFS.mkdirs(dst)) { 246 return false; 247 } 248 FileStatus contents[] = srcFS.listStatus(src); 249 for (int i = 0; i < contents.length; i++) { 250 copy(srcFS, contents[i], dstFS, 251 new Path(dst, contents[i].getPath().getName()), 252 deleteSource, overwrite, conf); 253 } 254 } else { 255 InputStream in=null; 256 OutputStream out = null; 257 try { 258 in = srcFS.open(src); 259 out = dstFS.create(dst, overwrite); 260 IOUtils.copyBytes(in, out, conf, true); 261 } catch (IOException e) { 262 IOUtils.closeStream(out); 263 IOUtils.closeStream(in); 264 throw e; 265 } 266 } 267 if (deleteSource) { 268 return srcFS.delete(src, true); 269 } else { 270 return true; 271 } 272 273 } 274 275 /** Copy all files in a directory to one output file (merge). */ 276 public static boolean copyMerge(FileSystem srcFS, Path srcDir, 277 FileSystem dstFS, Path dstFile, 278 boolean deleteSource, 279 Configuration conf, String addString) throws IOException { 280 dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false); 281 282 if (!srcFS.getFileStatus(srcDir).isDirectory()) 283 return false; 284 285 OutputStream out = dstFS.create(dstFile); 286 287 try { 288 FileStatus contents[] = srcFS.listStatus(srcDir); 289 Arrays.sort(contents); 290 for (int i = 0; i < contents.length; i++) { 291 if (contents[i].isFile()) { 292 InputStream in = srcFS.open(contents[i].getPath()); 293 try { 294 IOUtils.copyBytes(in, out, conf, false); 295 if (addString!=null) 296 out.write(addString.getBytes("UTF-8")); 297 298 } finally { 299 in.close(); 300 } 301 } 302 } 303 } finally { 304 out.close(); 305 } 306 307 308 if (deleteSource) { 309 return srcFS.delete(srcDir, true); 310 } else { 311 return true; 312 } 313 } 314 315 /** Copy local files to a FileSystem. */ 316 public static boolean copy(File src, 317 FileSystem dstFS, Path dst, 318 boolean deleteSource, 319 Configuration conf) throws IOException { 320 dst = checkDest(src.getName(), dstFS, dst, false); 321 322 if (src.isDirectory()) { 323 if (!dstFS.mkdirs(dst)) { 324 return false; 325 } 326 File contents[] = listFiles(src); 327 for (int i = 0; i < contents.length; i++) { 328 copy(contents[i], dstFS, new Path(dst, contents[i].getName()), 329 deleteSource, conf); 330 } 331 } else if (src.isFile()) { 332 InputStream in = null; 333 OutputStream out =null; 334 try { 335 in = new FileInputStream(src); 336 out = dstFS.create(dst); 337 IOUtils.copyBytes(in, out, conf); 338 } catch (IOException e) { 339 IOUtils.closeStream( out ); 340 IOUtils.closeStream( in ); 341 throw e; 342 } 343 } else { 344 throw new IOException(src.toString() + 345 ": No such file or directory"); 346 } 347 if (deleteSource) { 348 return FileUtil.fullyDelete(src); 349 } else { 350 return true; 351 } 352 } 353 354 /** Copy FileSystem files to local files. */ 355 public static boolean copy(FileSystem srcFS, Path src, 356 File dst, boolean deleteSource, 357 Configuration conf) throws IOException { 358 FileStatus filestatus = srcFS.getFileStatus(src); 359 return copy(srcFS, filestatus, dst, deleteSource, conf); 360 } 361 362 /** Copy FileSystem files to local files. */ 363 private static boolean copy(FileSystem srcFS, FileStatus srcStatus, 364 File dst, boolean deleteSource, 365 Configuration conf) throws IOException { 366 Path src = srcStatus.getPath(); 367 if (srcStatus.isDirectory()) { 368 if (!dst.mkdirs()) { 369 return false; 370 } 371 FileStatus contents[] = srcFS.listStatus(src); 372 for (int i = 0; i < contents.length; i++) { 373 copy(srcFS, contents[i], 374 new File(dst, contents[i].getPath().getName()), 375 deleteSource, conf); 376 } 377 } else { 378 InputStream in = srcFS.open(src); 379 IOUtils.copyBytes(in, new FileOutputStream(dst), conf); 380 } 381 if (deleteSource) { 382 return srcFS.delete(src, true); 383 } else { 384 return true; 385 } 386 } 387 388 private static Path checkDest(String srcName, FileSystem dstFS, Path dst, 389 boolean overwrite) throws IOException { 390 if (dstFS.exists(dst)) { 391 FileStatus sdst = dstFS.getFileStatus(dst); 392 if (sdst.isDirectory()) { 393 if (null == srcName) { 394 throw new IOException("Target " + dst + " is a directory"); 395 } 396 return checkDest(null, dstFS, new Path(dst, srcName), overwrite); 397 } else if (!overwrite) { 398 throw new IOException("Target " + dst + " already exists"); 399 } 400 } 401 return dst; 402 } 403 404 /** 405 * This class is only used on windows to invoke the cygpath command. 406 */ 407 private static class CygPathCommand extends Shell { 408 String[] command; 409 String result; 410 CygPathCommand(String path) throws IOException { 411 command = new String[]{"cygpath", "-u", path}; 412 run(); 413 } 414 String getResult() throws IOException { 415 return result; 416 } 417 @Override 418 protected String[] getExecString() { 419 return command; 420 } 421 @Override 422 protected void parseExecResult(BufferedReader lines) throws IOException { 423 String line = lines.readLine(); 424 if (line == null) { 425 throw new IOException("Can't convert '" + command[2] + 426 " to a cygwin path"); 427 } 428 result = line; 429 } 430 } 431 432 /** 433 * Convert a os-native filename to a path that works for the shell. 434 * @param filename The filename to convert 435 * @return The unix pathname 436 * @throws IOException on windows, there can be problems with the subprocess 437 */ 438 public static String makeShellPath(String filename) throws IOException { 439 if (Path.WINDOWS) { 440 return new CygPathCommand(filename).getResult(); 441 } else { 442 return filename; 443 } 444 } 445 446 /** 447 * Convert a os-native filename to a path that works for the shell. 448 * @param file The filename to convert 449 * @return The unix pathname 450 * @throws IOException on windows, there can be problems with the subprocess 451 */ 452 public static String makeShellPath(File file) throws IOException { 453 return makeShellPath(file, false); 454 } 455 456 /** 457 * Convert a os-native filename to a path that works for the shell. 458 * @param file The filename to convert 459 * @param makeCanonicalPath 460 * Whether to make canonical path for the file passed 461 * @return The unix pathname 462 * @throws IOException on windows, there can be problems with the subprocess 463 */ 464 public static String makeShellPath(File file, boolean makeCanonicalPath) 465 throws IOException { 466 if (makeCanonicalPath) { 467 return makeShellPath(file.getCanonicalPath()); 468 } else { 469 return makeShellPath(file.toString()); 470 } 471 } 472 473 /** 474 * Takes an input dir and returns the du on that local directory. Very basic 475 * implementation. 476 * 477 * @param dir 478 * The input dir to get the disk space of this local dir 479 * @return The total disk space of the input local directory 480 */ 481 public static long getDU(File dir) { 482 long size = 0; 483 if (!dir.exists()) 484 return 0; 485 if (!dir.isDirectory()) { 486 return dir.length(); 487 } else { 488 File[] allFiles = dir.listFiles(); 489 if(allFiles != null) { 490 for (int i = 0; i < allFiles.length; i++) { 491 boolean isSymLink; 492 try { 493 isSymLink = org.apache.commons.io.FileUtils.isSymlink(allFiles[i]); 494 } catch(IOException ioe) { 495 isSymLink = true; 496 } 497 if(!isSymLink) { 498 size += getDU(allFiles[i]); 499 } 500 } 501 } 502 return size; 503 } 504 } 505 506 /** 507 * Given a File input it will unzip the file in a the unzip directory 508 * passed as the second parameter 509 * @param inFile The zip file as input 510 * @param unzipDir The unzip directory where to unzip the zip file. 511 * @throws IOException 512 */ 513 public static void unZip(File inFile, File unzipDir) throws IOException { 514 Enumeration<? extends ZipEntry> entries; 515 ZipFile zipFile = new ZipFile(inFile); 516 517 try { 518 entries = zipFile.entries(); 519 while (entries.hasMoreElements()) { 520 ZipEntry entry = entries.nextElement(); 521 if (!entry.isDirectory()) { 522 InputStream in = zipFile.getInputStream(entry); 523 try { 524 File file = new File(unzipDir, entry.getName()); 525 if (!file.getParentFile().mkdirs()) { 526 if (!file.getParentFile().isDirectory()) { 527 throw new IOException("Mkdirs failed to create " + 528 file.getParentFile().toString()); 529 } 530 } 531 OutputStream out = new FileOutputStream(file); 532 try { 533 byte[] buffer = new byte[8192]; 534 int i; 535 while ((i = in.read(buffer)) != -1) { 536 out.write(buffer, 0, i); 537 } 538 } finally { 539 out.close(); 540 } 541 } finally { 542 in.close(); 543 } 544 } 545 } 546 } finally { 547 zipFile.close(); 548 } 549 } 550 551 /** 552 * Given a Tar File as input it will untar the file in a the untar directory 553 * passed as the second parameter 554 * 555 * This utility will untar ".tar" files and ".tar.gz","tgz" files. 556 * 557 * @param inFile The tar file as input. 558 * @param untarDir The untar directory where to untar the tar file. 559 * @throws IOException 560 */ 561 public static void unTar(File inFile, File untarDir) throws IOException { 562 if (!untarDir.mkdirs()) { 563 if (!untarDir.isDirectory()) { 564 throw new IOException("Mkdirs failed to create " + untarDir); 565 } 566 } 567 568 StringBuilder untarCommand = new StringBuilder(); 569 boolean gzipped = inFile.toString().endsWith("gz"); 570 if (gzipped) { 571 untarCommand.append(" gzip -dc '"); 572 untarCommand.append(FileUtil.makeShellPath(inFile)); 573 untarCommand.append("' | ("); 574 } 575 untarCommand.append("cd '"); 576 untarCommand.append(FileUtil.makeShellPath(untarDir)); 577 untarCommand.append("' ; "); 578 untarCommand.append("tar -xf "); 579 580 if (gzipped) { 581 untarCommand.append(" -)"); 582 } else { 583 untarCommand.append(FileUtil.makeShellPath(inFile)); 584 } 585 String[] shellCmd = { "bash", "-c", untarCommand.toString() }; 586 ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd); 587 shexec.execute(); 588 int exitcode = shexec.getExitCode(); 589 if (exitcode != 0) { 590 throw new IOException("Error untarring file " + inFile + 591 ". Tar process exited with exit code " + exitcode); 592 } 593 } 594 595 /** 596 * Class for creating hardlinks. 597 * Supports Unix, Cygwin, WindXP. 598 * @deprecated Use {@link org.apache.hadoop.fs.HardLink} 599 */ 600 @Deprecated 601 public static class HardLink extends org.apache.hadoop.fs.HardLink { 602 // This is a stub to assist with coordinated change between 603 // COMMON and HDFS projects. It will be removed after the 604 // corresponding change is committed to HDFS. 605 } 606 607 /** 608 * Create a soft link between a src and destination 609 * only on a local disk. HDFS does not support this 610 * @param target the target for symlink 611 * @param linkname the symlink 612 * @return value returned by the command 613 */ 614 public static int symLink(String target, String linkname) throws IOException{ 615 String cmd = "ln -s " + target + " " + linkname; 616 Process p = Runtime.getRuntime().exec(cmd, null); 617 int returnVal = -1; 618 try{ 619 returnVal = p.waitFor(); 620 } catch(InterruptedException e){ 621 //do nothing as of yet 622 } 623 return returnVal; 624 } 625 626 /** 627 * Change the permissions on a filename. 628 * @param filename the name of the file to change 629 * @param perm the permission string 630 * @return the exit code from the command 631 * @throws IOException 632 * @throws InterruptedException 633 */ 634 public static int chmod(String filename, String perm 635 ) throws IOException, InterruptedException { 636 return chmod(filename, perm, false); 637 } 638 639 /** 640 * Change the permissions on a file / directory, recursively, if 641 * needed. 642 * @param filename name of the file whose permissions are to change 643 * @param perm permission string 644 * @param recursive true, if permissions should be changed recursively 645 * @return the exit code from the command. 646 * @throws IOException 647 * @throws InterruptedException 648 */ 649 public static int chmod(String filename, String perm, boolean recursive) 650 throws IOException, InterruptedException { 651 StringBuilder cmdBuf = new StringBuilder(); 652 cmdBuf.append("chmod "); 653 if (recursive) { 654 cmdBuf.append("-R "); 655 } 656 cmdBuf.append(perm).append(" "); 657 cmdBuf.append(filename); 658 String[] shellCmd = {"bash", "-c" ,cmdBuf.toString()}; 659 ShellCommandExecutor shExec = new ShellCommandExecutor(shellCmd); 660 try { 661 shExec.execute(); 662 }catch(Exception e) { 663 if (LOG.isDebugEnabled()) { 664 LOG.debug("Error while changing permission : " + filename 665 + " Exception: ", e); 666 } 667 } 668 return shExec.getExitCode(); 669 } 670 671 /** 672 * Create a tmp file for a base file. 673 * @param basefile the base file of the tmp 674 * @param prefix file name prefix of tmp 675 * @param isDeleteOnExit if true, the tmp will be deleted when the VM exits 676 * @return a newly created tmp file 677 * @exception IOException If a tmp file cannot created 678 * @see java.io.File#createTempFile(String, String, File) 679 * @see java.io.File#deleteOnExit() 680 */ 681 public static final File createLocalTempFile(final File basefile, 682 final String prefix, 683 final boolean isDeleteOnExit) 684 throws IOException { 685 File tmp = File.createTempFile(prefix + basefile.getName(), 686 "", basefile.getParentFile()); 687 if (isDeleteOnExit) { 688 tmp.deleteOnExit(); 689 } 690 return tmp; 691 } 692 693 /** 694 * Move the src file to the name specified by target. 695 * @param src the source file 696 * @param target the target file 697 * @exception IOException If this operation fails 698 */ 699 public static void replaceFile(File src, File target) throws IOException { 700 /* renameTo() has two limitations on Windows platform. 701 * src.renameTo(target) fails if 702 * 1) If target already exists OR 703 * 2) If target is already open for reading/writing. 704 */ 705 if (!src.renameTo(target)) { 706 int retries = 5; 707 while (target.exists() && !target.delete() && retries-- >= 0) { 708 try { 709 Thread.sleep(1000); 710 } catch (InterruptedException e) { 711 throw new IOException("replaceFile interrupted."); 712 } 713 } 714 if (!src.renameTo(target)) { 715 throw new IOException("Unable to rename " + src + 716 " to " + target); 717 } 718 } 719 } 720 721 /** 722 * A wrapper for {@link File#listFiles()}. This java.io API returns null 723 * when a dir is not a directory or for any I/O error. Instead of having 724 * null check everywhere File#listFiles() is used, we will add utility API 725 * to get around this problem. For the majority of cases where we prefer 726 * an IOException to be thrown. 727 * @param dir directory for which listing should be performed 728 * @return list of files or empty list 729 * @exception IOException for invalid directory or for a bad disk. 730 */ 731 public static File[] listFiles(File dir) throws IOException { 732 File[] files = dir.listFiles(); 733 if(files == null) { 734 throw new IOException("Invalid directory or I/O error occurred for dir: " 735 + dir.toString()); 736 } 737 return files; 738 } 739 740 /** 741 * A wrapper for {@link File#list()}. This java.io API returns null 742 * when a dir is not a directory or for any I/O error. Instead of having 743 * null check everywhere File#list() is used, we will add utility API 744 * to get around this problem. For the majority of cases where we prefer 745 * an IOException to be thrown. 746 * @param dir directory for which listing should be performed 747 * @return list of file names or empty string list 748 * @exception IOException for invalid directory or for a bad disk. 749 */ 750 public static String[] list(File dir) throws IOException { 751 String[] fileNames = dir.list(); 752 if(fileNames == null) { 753 throw new IOException("Invalid directory or I/O error occurred for dir: " 754 + dir.toString()); 755 } 756 return fileNames; 757 } 758 }