001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.fs; 020 021 import java.io.*; 022 import java.util.Arrays; 023 import java.util.Enumeration; 024 import java.util.zip.ZipEntry; 025 import java.util.zip.ZipFile; 026 027 import org.apache.hadoop.classification.InterfaceAudience; 028 import org.apache.hadoop.classification.InterfaceStability; 029 import org.apache.hadoop.conf.Configuration; 030 import org.apache.hadoop.io.IOUtils; 031 import org.apache.hadoop.util.Shell; 032 import org.apache.hadoop.util.Shell.ShellCommandExecutor; 033 034 import org.apache.commons.logging.Log; 035 import org.apache.commons.logging.LogFactory; 036 037 /** 038 * A collection of file-processing util methods 039 */ 040 @InterfaceAudience.Public 041 @InterfaceStability.Evolving 042 public class FileUtil { 043 044 private static final Log LOG = LogFactory.getLog(FileUtil.class); 045 046 /** 047 * convert an array of FileStatus to an array of Path 048 * 049 * @param stats 050 * an array of FileStatus objects 051 * @return an array of paths corresponding to the input 052 */ 053 public static Path[] stat2Paths(FileStatus[] stats) { 054 if (stats == null) 055 return null; 056 Path[] ret = new Path[stats.length]; 057 for (int i = 0; i < stats.length; ++i) { 058 ret[i] = stats[i].getPath(); 059 } 060 return ret; 061 } 062 063 /** 064 * convert an array of FileStatus to an array of Path. 065 * If stats if null, return path 066 * @param stats 067 * an array of FileStatus objects 068 * @param path 069 * default path to return in stats is null 070 * @return an array of paths corresponding to the input 071 */ 072 public static Path[] stat2Paths(FileStatus[] stats, Path path) { 073 if (stats == null) 074 return new Path[]{path}; 075 else 076 return stat2Paths(stats); 077 } 078 079 /** 080 * Delete a directory and all its contents. If 081 * we return false, the directory may be partially-deleted. 082 * (1) If dir is symlink to a file, the symlink is deleted. The file pointed 083 * to by the symlink is not deleted. 084 * (2) If dir is symlink to a directory, symlink is deleted. The directory 085 * pointed to by symlink is not deleted. 086 * (3) If dir is a normal file, it is deleted. 087 * (4) If dir is a normal directory, then dir and all its contents recursively 088 * are deleted. 089 */ 090 public static boolean fullyDelete(File dir) { 091 if (dir.delete()) { 092 // dir is (a) normal file, (b) symlink to a file, (c) empty directory or 093 // (d) symlink to a directory 094 return true; 095 } 096 097 // handle nonempty directory deletion 098 if (!fullyDeleteContents(dir)) { 099 return false; 100 } 101 return dir.delete(); 102 } 103 104 /** 105 * Delete the contents of a directory, not the directory itself. If 106 * we return false, the directory may be partially-deleted. 107 * If dir is a symlink to a directory, all the contents of the actual 108 * directory pointed to by dir will be deleted. 109 */ 110 public static boolean fullyDeleteContents(File dir) { 111 boolean deletionSucceeded = true; 112 File contents[] = dir.listFiles(); 113 if (contents != null) { 114 for (int i = 0; i < contents.length; i++) { 115 if (contents[i].isFile()) { 116 if (!contents[i].delete()) {// normal file or symlink to another file 117 deletionSucceeded = false; 118 continue; // continue deletion of other files/dirs under dir 119 } 120 } else { 121 // Either directory or symlink to another directory. 122 // Try deleting the directory as this might be a symlink 123 boolean b = false; 124 b = contents[i].delete(); 125 if (b){ 126 //this was indeed a symlink or an empty directory 127 continue; 128 } 129 // if not an empty directory or symlink let 130 // fullydelete handle it. 131 if (!fullyDelete(contents[i])) { 132 deletionSucceeded = false; 133 continue; // continue deletion of other files/dirs under dir 134 } 135 } 136 } 137 } 138 return deletionSucceeded; 139 } 140 141 /** 142 * Recursively delete a directory. 143 * 144 * @param fs {@link FileSystem} on which the path is present 145 * @param dir directory to recursively delete 146 * @throws IOException 147 * @deprecated Use {@link FileSystem#delete(Path, boolean)} 148 */ 149 @Deprecated 150 public static void fullyDelete(FileSystem fs, Path dir) 151 throws IOException { 152 fs.delete(dir, true); 153 } 154 155 // 156 // If the destination is a subdirectory of the source, then 157 // generate exception 158 // 159 private static void checkDependencies(FileSystem srcFS, 160 Path src, 161 FileSystem dstFS, 162 Path dst) 163 throws IOException { 164 if (srcFS == dstFS) { 165 String srcq = src.makeQualified(srcFS).toString() + Path.SEPARATOR; 166 String dstq = dst.makeQualified(dstFS).toString() + Path.SEPARATOR; 167 if (dstq.startsWith(srcq)) { 168 if (srcq.length() == dstq.length()) { 169 throw new IOException("Cannot copy " + src + " to itself."); 170 } else { 171 throw new IOException("Cannot copy " + src + " to its subdirectory " + 172 dst); 173 } 174 } 175 } 176 } 177 178 /** Copy files between FileSystems. */ 179 public static boolean copy(FileSystem srcFS, Path src, 180 FileSystem dstFS, Path dst, 181 boolean deleteSource, 182 Configuration conf) throws IOException { 183 return copy(srcFS, src, dstFS, dst, deleteSource, true, conf); 184 } 185 186 public static boolean copy(FileSystem srcFS, Path[] srcs, 187 FileSystem dstFS, Path dst, 188 boolean deleteSource, 189 boolean overwrite, Configuration conf) 190 throws IOException { 191 boolean gotException = false; 192 boolean returnVal = true; 193 StringBuilder exceptions = new StringBuilder(); 194 195 if (srcs.length == 1) 196 return copy(srcFS, srcs[0], dstFS, dst, deleteSource, overwrite, conf); 197 198 // Check if dest is directory 199 if (!dstFS.exists(dst)) { 200 throw new IOException("`" + dst +"': specified destination directory " + 201 "doest not exist"); 202 } else { 203 FileStatus sdst = dstFS.getFileStatus(dst); 204 if (!sdst.isDirectory()) 205 throw new IOException("copying multiple files, but last argument `" + 206 dst + "' is not a directory"); 207 } 208 209 for (Path src : srcs) { 210 try { 211 if (!copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf)) 212 returnVal = false; 213 } catch (IOException e) { 214 gotException = true; 215 exceptions.append(e.getMessage()); 216 exceptions.append("\n"); 217 } 218 } 219 if (gotException) { 220 throw new IOException(exceptions.toString()); 221 } 222 return returnVal; 223 } 224 225 /** Copy files between FileSystems. */ 226 public static boolean copy(FileSystem srcFS, Path src, 227 FileSystem dstFS, Path dst, 228 boolean deleteSource, 229 boolean overwrite, 230 Configuration conf) throws IOException { 231 FileStatus fileStatus = srcFS.getFileStatus(src); 232 return copy(srcFS, fileStatus, dstFS, dst, deleteSource, overwrite, conf); 233 } 234 235 /** Copy files between FileSystems. */ 236 private static boolean copy(FileSystem srcFS, FileStatus srcStatus, 237 FileSystem dstFS, Path dst, 238 boolean deleteSource, 239 boolean overwrite, 240 Configuration conf) throws IOException { 241 Path src = srcStatus.getPath(); 242 dst = checkDest(src.getName(), dstFS, dst, overwrite); 243 if (srcStatus.isDirectory()) { 244 checkDependencies(srcFS, src, dstFS, dst); 245 if (!dstFS.mkdirs(dst)) { 246 return false; 247 } 248 FileStatus contents[] = srcFS.listStatus(src); 249 for (int i = 0; i < contents.length; i++) { 250 copy(srcFS, contents[i], dstFS, 251 new Path(dst, contents[i].getPath().getName()), 252 deleteSource, overwrite, conf); 253 } 254 } else { 255 InputStream in=null; 256 OutputStream out = null; 257 try { 258 in = srcFS.open(src); 259 out = dstFS.create(dst, overwrite); 260 IOUtils.copyBytes(in, out, conf, true); 261 } catch (IOException e) { 262 IOUtils.closeStream(out); 263 IOUtils.closeStream(in); 264 throw e; 265 } 266 } 267 if (deleteSource) { 268 return srcFS.delete(src, true); 269 } else { 270 return true; 271 } 272 273 } 274 275 /** Copy all files in a directory to one output file (merge). */ 276 public static boolean copyMerge(FileSystem srcFS, Path srcDir, 277 FileSystem dstFS, Path dstFile, 278 boolean deleteSource, 279 Configuration conf, String addString) throws IOException { 280 dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false); 281 282 if (!srcFS.getFileStatus(srcDir).isDirectory()) 283 return false; 284 285 OutputStream out = dstFS.create(dstFile); 286 287 try { 288 FileStatus contents[] = srcFS.listStatus(srcDir); 289 Arrays.sort(contents); 290 for (int i = 0; i < contents.length; i++) { 291 if (contents[i].isFile()) { 292 InputStream in = srcFS.open(contents[i].getPath()); 293 try { 294 IOUtils.copyBytes(in, out, conf, false); 295 if (addString!=null) 296 out.write(addString.getBytes("UTF-8")); 297 298 } finally { 299 in.close(); 300 } 301 } 302 } 303 } finally { 304 out.close(); 305 } 306 307 308 if (deleteSource) { 309 return srcFS.delete(srcDir, true); 310 } else { 311 return true; 312 } 313 } 314 315 /** Copy local files to a FileSystem. */ 316 public static boolean copy(File src, 317 FileSystem dstFS, Path dst, 318 boolean deleteSource, 319 Configuration conf) throws IOException { 320 dst = checkDest(src.getName(), dstFS, dst, false); 321 322 if (src.isDirectory()) { 323 if (!dstFS.mkdirs(dst)) { 324 return false; 325 } 326 File contents[] = listFiles(src); 327 for (int i = 0; i < contents.length; i++) { 328 copy(contents[i], dstFS, new Path(dst, contents[i].getName()), 329 deleteSource, conf); 330 } 331 } else if (src.isFile()) { 332 InputStream in = null; 333 OutputStream out =null; 334 try { 335 in = new FileInputStream(src); 336 out = dstFS.create(dst); 337 IOUtils.copyBytes(in, out, conf); 338 } catch (IOException e) { 339 IOUtils.closeStream( out ); 340 IOUtils.closeStream( in ); 341 throw e; 342 } 343 } else { 344 throw new IOException(src.toString() + 345 ": No such file or directory"); 346 } 347 if (deleteSource) { 348 return FileUtil.fullyDelete(src); 349 } else { 350 return true; 351 } 352 } 353 354 /** Copy FileSystem files to local files. */ 355 public static boolean copy(FileSystem srcFS, Path src, 356 File dst, boolean deleteSource, 357 Configuration conf) throws IOException { 358 FileStatus filestatus = srcFS.getFileStatus(src); 359 return copy(srcFS, filestatus, dst, deleteSource, conf); 360 } 361 362 /** Copy FileSystem files to local files. */ 363 private static boolean copy(FileSystem srcFS, FileStatus srcStatus, 364 File dst, boolean deleteSource, 365 Configuration conf) throws IOException { 366 Path src = srcStatus.getPath(); 367 if (srcStatus.isDirectory()) { 368 if (!dst.mkdirs()) { 369 return false; 370 } 371 FileStatus contents[] = srcFS.listStatus(src); 372 for (int i = 0; i < contents.length; i++) { 373 copy(srcFS, contents[i], 374 new File(dst, contents[i].getPath().getName()), 375 deleteSource, conf); 376 } 377 } else { 378 InputStream in = srcFS.open(src); 379 IOUtils.copyBytes(in, new FileOutputStream(dst), conf); 380 } 381 if (deleteSource) { 382 return srcFS.delete(src, true); 383 } else { 384 return true; 385 } 386 } 387 388 private static Path checkDest(String srcName, FileSystem dstFS, Path dst, 389 boolean overwrite) throws IOException { 390 if (dstFS.exists(dst)) { 391 FileStatus sdst = dstFS.getFileStatus(dst); 392 if (sdst.isDirectory()) { 393 if (null == srcName) { 394 throw new IOException("Target " + dst + " is a directory"); 395 } 396 return checkDest(null, dstFS, new Path(dst, srcName), overwrite); 397 } else if (!overwrite) { 398 throw new IOException("Target " + dst + " already exists"); 399 } 400 } 401 return dst; 402 } 403 404 /** 405 * This class is only used on windows to invoke the cygpath command. 406 */ 407 private static class CygPathCommand extends Shell { 408 String[] command; 409 String result; 410 CygPathCommand(String path) throws IOException { 411 command = new String[]{"cygpath", "-u", path}; 412 run(); 413 } 414 String getResult() throws IOException { 415 return result; 416 } 417 protected String[] getExecString() { 418 return command; 419 } 420 protected void parseExecResult(BufferedReader lines) throws IOException { 421 String line = lines.readLine(); 422 if (line == null) { 423 throw new IOException("Can't convert '" + command[2] + 424 " to a cygwin path"); 425 } 426 result = line; 427 } 428 } 429 430 /** 431 * Convert a os-native filename to a path that works for the shell. 432 * @param filename The filename to convert 433 * @return The unix pathname 434 * @throws IOException on windows, there can be problems with the subprocess 435 */ 436 public static String makeShellPath(String filename) throws IOException { 437 if (Path.WINDOWS) { 438 return new CygPathCommand(filename).getResult(); 439 } else { 440 return filename; 441 } 442 } 443 444 /** 445 * Convert a os-native filename to a path that works for the shell. 446 * @param file The filename to convert 447 * @return The unix pathname 448 * @throws IOException on windows, there can be problems with the subprocess 449 */ 450 public static String makeShellPath(File file) throws IOException { 451 return makeShellPath(file, false); 452 } 453 454 /** 455 * Convert a os-native filename to a path that works for the shell. 456 * @param file The filename to convert 457 * @param makeCanonicalPath 458 * Whether to make canonical path for the file passed 459 * @return The unix pathname 460 * @throws IOException on windows, there can be problems with the subprocess 461 */ 462 public static String makeShellPath(File file, boolean makeCanonicalPath) 463 throws IOException { 464 if (makeCanonicalPath) { 465 return makeShellPath(file.getCanonicalPath()); 466 } else { 467 return makeShellPath(file.toString()); 468 } 469 } 470 471 /** 472 * Takes an input dir and returns the du on that local directory. Very basic 473 * implementation. 474 * 475 * @param dir 476 * The input dir to get the disk space of this local dir 477 * @return The total disk space of the input local directory 478 */ 479 public static long getDU(File dir) { 480 long size = 0; 481 if (!dir.exists()) 482 return 0; 483 if (!dir.isDirectory()) { 484 return dir.length(); 485 } else { 486 size = dir.length(); 487 File[] allFiles = dir.listFiles(); 488 if(allFiles != null) { 489 for (int i = 0; i < allFiles.length; i++) { 490 size = size + getDU(allFiles[i]); 491 } 492 } 493 return size; 494 } 495 } 496 497 /** 498 * Given a File input it will unzip the file in a the unzip directory 499 * passed as the second parameter 500 * @param inFile The zip file as input 501 * @param unzipDir The unzip directory where to unzip the zip file. 502 * @throws IOException 503 */ 504 public static void unZip(File inFile, File unzipDir) throws IOException { 505 Enumeration<? extends ZipEntry> entries; 506 ZipFile zipFile = new ZipFile(inFile); 507 508 try { 509 entries = zipFile.entries(); 510 while (entries.hasMoreElements()) { 511 ZipEntry entry = entries.nextElement(); 512 if (!entry.isDirectory()) { 513 InputStream in = zipFile.getInputStream(entry); 514 try { 515 File file = new File(unzipDir, entry.getName()); 516 if (!file.getParentFile().mkdirs()) { 517 if (!file.getParentFile().isDirectory()) { 518 throw new IOException("Mkdirs failed to create " + 519 file.getParentFile().toString()); 520 } 521 } 522 OutputStream out = new FileOutputStream(file); 523 try { 524 byte[] buffer = new byte[8192]; 525 int i; 526 while ((i = in.read(buffer)) != -1) { 527 out.write(buffer, 0, i); 528 } 529 } finally { 530 out.close(); 531 } 532 } finally { 533 in.close(); 534 } 535 } 536 } 537 } finally { 538 zipFile.close(); 539 } 540 } 541 542 /** 543 * Given a Tar File as input it will untar the file in a the untar directory 544 * passed as the second parameter 545 * 546 * This utility will untar ".tar" files and ".tar.gz","tgz" files. 547 * 548 * @param inFile The tar file as input. 549 * @param untarDir The untar directory where to untar the tar file. 550 * @throws IOException 551 */ 552 public static void unTar(File inFile, File untarDir) throws IOException { 553 if (!untarDir.mkdirs()) { 554 if (!untarDir.isDirectory()) { 555 throw new IOException("Mkdirs failed to create " + untarDir); 556 } 557 } 558 559 StringBuilder untarCommand = new StringBuilder(); 560 boolean gzipped = inFile.toString().endsWith("gz"); 561 if (gzipped) { 562 untarCommand.append(" gzip -dc '"); 563 untarCommand.append(FileUtil.makeShellPath(inFile)); 564 untarCommand.append("' | ("); 565 } 566 untarCommand.append("cd '"); 567 untarCommand.append(FileUtil.makeShellPath(untarDir)); 568 untarCommand.append("' ; "); 569 untarCommand.append("tar -xf "); 570 571 if (gzipped) { 572 untarCommand.append(" -)"); 573 } else { 574 untarCommand.append(FileUtil.makeShellPath(inFile)); 575 } 576 String[] shellCmd = { "bash", "-c", untarCommand.toString() }; 577 ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd); 578 shexec.execute(); 579 int exitcode = shexec.getExitCode(); 580 if (exitcode != 0) { 581 throw new IOException("Error untarring file " + inFile + 582 ". Tar process exited with exit code " + exitcode); 583 } 584 } 585 586 /** 587 * Class for creating hardlinks. 588 * Supports Unix, Cygwin, WindXP. 589 * @deprecated Use {@link org.apache.hadoop.fs.HardLink} 590 */ 591 @Deprecated 592 public static class HardLink extends org.apache.hadoop.fs.HardLink { 593 // This is a stub to assist with coordinated change between 594 // COMMON and HDFS projects. It will be removed after the 595 // corresponding change is committed to HDFS. 596 } 597 598 /** 599 * Create a soft link between a src and destination 600 * only on a local disk. HDFS does not support this 601 * @param target the target for symlink 602 * @param linkname the symlink 603 * @return value returned by the command 604 */ 605 public static int symLink(String target, String linkname) throws IOException{ 606 String cmd = "ln -s " + target + " " + linkname; 607 Process p = Runtime.getRuntime().exec(cmd, null); 608 int returnVal = -1; 609 try{ 610 returnVal = p.waitFor(); 611 } catch(InterruptedException e){ 612 //do nothing as of yet 613 } 614 return returnVal; 615 } 616 617 /** 618 * Change the permissions on a filename. 619 * @param filename the name of the file to change 620 * @param perm the permission string 621 * @return the exit code from the command 622 * @throws IOException 623 * @throws InterruptedException 624 */ 625 public static int chmod(String filename, String perm 626 ) throws IOException, InterruptedException { 627 return chmod(filename, perm, false); 628 } 629 630 /** 631 * Change the permissions on a file / directory, recursively, if 632 * needed. 633 * @param filename name of the file whose permissions are to change 634 * @param perm permission string 635 * @param recursive true, if permissions should be changed recursively 636 * @return the exit code from the command. 637 * @throws IOException 638 * @throws InterruptedException 639 */ 640 public static int chmod(String filename, String perm, boolean recursive) 641 throws IOException, InterruptedException { 642 StringBuilder cmdBuf = new StringBuilder(); 643 cmdBuf.append("chmod "); 644 if (recursive) { 645 cmdBuf.append("-R "); 646 } 647 cmdBuf.append(perm).append(" "); 648 cmdBuf.append(filename); 649 String[] shellCmd = {"bash", "-c" ,cmdBuf.toString()}; 650 ShellCommandExecutor shExec = new ShellCommandExecutor(shellCmd); 651 try { 652 shExec.execute(); 653 }catch(Exception e) { 654 if (LOG.isDebugEnabled()) { 655 LOG.debug("Error while changing permission : " + filename 656 + " Exception: ", e); 657 } 658 } 659 return shExec.getExitCode(); 660 } 661 662 /** 663 * Create a tmp file for a base file. 664 * @param basefile the base file of the tmp 665 * @param prefix file name prefix of tmp 666 * @param isDeleteOnExit if true, the tmp will be deleted when the VM exits 667 * @return a newly created tmp file 668 * @exception IOException If a tmp file cannot created 669 * @see java.io.File#createTempFile(String, String, File) 670 * @see java.io.File#deleteOnExit() 671 */ 672 public static final File createLocalTempFile(final File basefile, 673 final String prefix, 674 final boolean isDeleteOnExit) 675 throws IOException { 676 File tmp = File.createTempFile(prefix + basefile.getName(), 677 "", basefile.getParentFile()); 678 if (isDeleteOnExit) { 679 tmp.deleteOnExit(); 680 } 681 return tmp; 682 } 683 684 /** 685 * Move the src file to the name specified by target. 686 * @param src the source file 687 * @param target the target file 688 * @exception IOException If this operation fails 689 */ 690 public static void replaceFile(File src, File target) throws IOException { 691 /* renameTo() has two limitations on Windows platform. 692 * src.renameTo(target) fails if 693 * 1) If target already exists OR 694 * 2) If target is already open for reading/writing. 695 */ 696 if (!src.renameTo(target)) { 697 int retries = 5; 698 while (target.exists() && !target.delete() && retries-- >= 0) { 699 try { 700 Thread.sleep(1000); 701 } catch (InterruptedException e) { 702 throw new IOException("replaceFile interrupted."); 703 } 704 } 705 if (!src.renameTo(target)) { 706 throw new IOException("Unable to rename " + src + 707 " to " + target); 708 } 709 } 710 } 711 712 /** 713 * A wrapper for {@link File#listFiles()}. This java.io API returns null 714 * when a dir is not a directory or for any I/O error. Instead of having 715 * null check everywhere File#listFiles() is used, we will add utility API 716 * to get around this problem. For the majority of cases where we prefer 717 * an IOException to be thrown. 718 * @param dir directory for which listing should be performed 719 * @return list of files or empty list 720 * @exception IOException for invalid directory or for a bad disk. 721 */ 722 public static File[] listFiles(File dir) throws IOException { 723 File[] files = dir.listFiles(); 724 if(files == null) { 725 throw new IOException("Invalid directory or I/O error occurred for dir: " 726 + dir.toString()); 727 } 728 return files; 729 } 730 731 /** 732 * A wrapper for {@link File#list()}. This java.io API returns null 733 * when a dir is not a directory or for any I/O error. Instead of having 734 * null check everywhere File#list() is used, we will add utility API 735 * to get around this problem. For the majority of cases where we prefer 736 * an IOException to be thrown. 737 * @param dir directory for which listing should be performed 738 * @return list of file names or empty string list 739 * @exception IOException for invalid directory or for a bad disk. 740 */ 741 public static String[] list(File dir) throws IOException { 742 String[] fileNames = dir.list(); 743 if(fileNames == null) { 744 throw new IOException("Invalid directory or I/O error occurred for dir: " 745 + dir.toString()); 746 } 747 return fileNames; 748 } 749 }