001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.fs; 020 021 import java.io.*; 022 import java.util.Arrays; 023 import java.util.Enumeration; 024 import java.util.zip.ZipEntry; 025 import java.util.zip.ZipFile; 026 027 import org.apache.hadoop.classification.InterfaceAudience; 028 import org.apache.hadoop.classification.InterfaceStability; 029 import org.apache.hadoop.conf.Configuration; 030 import org.apache.hadoop.io.IOUtils; 031 import org.apache.hadoop.util.Shell; 032 import org.apache.hadoop.util.Shell.ShellCommandExecutor; 033 034 import org.apache.commons.logging.Log; 035 import org.apache.commons.logging.LogFactory; 036 037 /** 038 * A collection of file-processing util methods 039 */ 040 @InterfaceAudience.Public 041 @InterfaceStability.Evolving 042 public class FileUtil { 043 044 private static final Log LOG = LogFactory.getLog(FileUtil.class); 045 046 /** 047 * convert an array of FileStatus to an array of Path 048 * 049 * @param stats 050 * an array of FileStatus objects 051 * @return an array of paths corresponding to the input 052 */ 053 public static Path[] stat2Paths(FileStatus[] stats) { 054 if (stats == null) 055 return null; 056 Path[] ret = new Path[stats.length]; 057 for (int i = 0; i < stats.length; ++i) { 058 ret[i] = stats[i].getPath(); 059 } 060 return ret; 061 } 062 063 /** 064 * convert an array of FileStatus to an array of Path. 065 * If stats if null, return path 066 * @param stats 067 * an array of FileStatus objects 068 * @param path 069 * default path to return in stats is null 070 * @return an array of paths corresponding to the input 071 */ 072 public static Path[] stat2Paths(FileStatus[] stats, Path path) { 073 if (stats == null) 074 return new Path[]{path}; 075 else 076 return stat2Paths(stats); 077 } 078 079 /** 080 * Delete a directory and all its contents. If 081 * we return false, the directory may be partially-deleted. 082 * (1) If dir is symlink to a file, the symlink is deleted. The file pointed 083 * to by the symlink is not deleted. 084 * (2) If dir is symlink to a directory, symlink is deleted. The directory 085 * pointed to by symlink is not deleted. 086 * (3) If dir is a normal file, it is deleted. 087 * (4) If dir is a normal directory, then dir and all its contents recursively 088 * are deleted. 089 */ 090 public static boolean fullyDelete(File dir) { 091 if (dir.delete()) { 092 // dir is (a) normal file, (b) symlink to a file, (c) empty directory or 093 // (d) symlink to a directory 094 return true; 095 } 096 097 // handle nonempty directory deletion 098 if (!fullyDeleteContents(dir)) { 099 return false; 100 } 101 return dir.delete(); 102 } 103 104 /** 105 * Delete the contents of a directory, not the directory itself. If 106 * we return false, the directory may be partially-deleted. 107 * If dir is a symlink to a directory, all the contents of the actual 108 * directory pointed to by dir will be deleted. 109 */ 110 public static boolean fullyDeleteContents(File dir) { 111 boolean deletionSucceeded = true; 112 File contents[] = dir.listFiles(); 113 if (contents != null) { 114 for (int i = 0; i < contents.length; i++) { 115 if (contents[i].isFile()) { 116 if (!contents[i].delete()) {// normal file or symlink to another file 117 deletionSucceeded = false; 118 continue; // continue deletion of other files/dirs under dir 119 } 120 } else { 121 // Either directory or symlink to another directory. 122 // Try deleting the directory as this might be a symlink 123 boolean b = false; 124 b = contents[i].delete(); 125 if (b){ 126 //this was indeed a symlink or an empty directory 127 continue; 128 } 129 // if not an empty directory or symlink let 130 // fullydelete handle it. 131 if (!fullyDelete(contents[i])) { 132 deletionSucceeded = false; 133 continue; // continue deletion of other files/dirs under dir 134 } 135 } 136 } 137 } 138 return deletionSucceeded; 139 } 140 141 /** 142 * Recursively delete a directory. 143 * 144 * @param fs {@link FileSystem} on which the path is present 145 * @param dir directory to recursively delete 146 * @throws IOException 147 * @deprecated Use {@link FileSystem#delete(Path, boolean)} 148 */ 149 @Deprecated 150 public static void fullyDelete(FileSystem fs, Path dir) 151 throws IOException { 152 fs.delete(dir, true); 153 } 154 155 // 156 // If the destination is a subdirectory of the source, then 157 // generate exception 158 // 159 private static void checkDependencies(FileSystem srcFS, 160 Path src, 161 FileSystem dstFS, 162 Path dst) 163 throws IOException { 164 if (srcFS == dstFS) { 165 String srcq = src.makeQualified(srcFS).toString() + Path.SEPARATOR; 166 String dstq = dst.makeQualified(dstFS).toString() + Path.SEPARATOR; 167 if (dstq.startsWith(srcq)) { 168 if (srcq.length() == dstq.length()) { 169 throw new IOException("Cannot copy " + src + " to itself."); 170 } else { 171 throw new IOException("Cannot copy " + src + " to its subdirectory " + 172 dst); 173 } 174 } 175 } 176 } 177 178 /** Copy files between FileSystems. */ 179 public static boolean copy(FileSystem srcFS, Path src, 180 FileSystem dstFS, Path dst, 181 boolean deleteSource, 182 Configuration conf) throws IOException { 183 return copy(srcFS, src, dstFS, dst, deleteSource, true, conf); 184 } 185 186 public static boolean copy(FileSystem srcFS, Path[] srcs, 187 FileSystem dstFS, Path dst, 188 boolean deleteSource, 189 boolean overwrite, Configuration conf) 190 throws IOException { 191 boolean gotException = false; 192 boolean returnVal = true; 193 StringBuilder exceptions = new StringBuilder(); 194 195 if (srcs.length == 1) 196 return copy(srcFS, srcs[0], dstFS, dst, deleteSource, overwrite, conf); 197 198 // Check if dest is directory 199 if (!dstFS.exists(dst)) { 200 throw new IOException("`" + dst +"': specified destination directory " + 201 "doest not exist"); 202 } else { 203 FileStatus sdst = dstFS.getFileStatus(dst); 204 if (!sdst.isDirectory()) 205 throw new IOException("copying multiple files, but last argument `" + 206 dst + "' is not a directory"); 207 } 208 209 for (Path src : srcs) { 210 try { 211 if (!copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf)) 212 returnVal = false; 213 } catch (IOException e) { 214 gotException = true; 215 exceptions.append(e.getMessage()); 216 exceptions.append("\n"); 217 } 218 } 219 if (gotException) { 220 throw new IOException(exceptions.toString()); 221 } 222 return returnVal; 223 } 224 225 /** Copy files between FileSystems. */ 226 public static boolean copy(FileSystem srcFS, Path src, 227 FileSystem dstFS, Path dst, 228 boolean deleteSource, 229 boolean overwrite, 230 Configuration conf) throws IOException { 231 FileStatus fileStatus = srcFS.getFileStatus(src); 232 return copy(srcFS, fileStatus, dstFS, dst, deleteSource, overwrite, conf); 233 } 234 235 /** Copy files between FileSystems. */ 236 private static boolean copy(FileSystem srcFS, FileStatus srcStatus, 237 FileSystem dstFS, Path dst, 238 boolean deleteSource, 239 boolean overwrite, 240 Configuration conf) throws IOException { 241 Path src = srcStatus.getPath(); 242 dst = checkDest(src.getName(), dstFS, dst, overwrite); 243 if (srcStatus.isDirectory()) { 244 checkDependencies(srcFS, src, dstFS, dst); 245 if (!dstFS.mkdirs(dst)) { 246 return false; 247 } 248 FileStatus contents[] = srcFS.listStatus(src); 249 for (int i = 0; i < contents.length; i++) { 250 copy(srcFS, contents[i], dstFS, 251 new Path(dst, contents[i].getPath().getName()), 252 deleteSource, overwrite, conf); 253 } 254 } else { 255 InputStream in=null; 256 OutputStream out = null; 257 try { 258 in = srcFS.open(src); 259 out = dstFS.create(dst, overwrite); 260 IOUtils.copyBytes(in, out, conf, true); 261 } catch (IOException e) { 262 IOUtils.closeStream(out); 263 IOUtils.closeStream(in); 264 throw e; 265 } 266 } 267 if (deleteSource) { 268 return srcFS.delete(src, true); 269 } else { 270 return true; 271 } 272 273 } 274 275 /** Copy all files in a directory to one output file (merge). */ 276 public static boolean copyMerge(FileSystem srcFS, Path srcDir, 277 FileSystem dstFS, Path dstFile, 278 boolean deleteSource, 279 Configuration conf, String addString) throws IOException { 280 dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false); 281 282 if (!srcFS.getFileStatus(srcDir).isDirectory()) 283 return false; 284 285 OutputStream out = dstFS.create(dstFile); 286 287 try { 288 FileStatus contents[] = srcFS.listStatus(srcDir); 289 Arrays.sort(contents); 290 for (int i = 0; i < contents.length; i++) { 291 if (contents[i].isFile()) { 292 InputStream in = srcFS.open(contents[i].getPath()); 293 try { 294 IOUtils.copyBytes(in, out, conf, false); 295 if (addString!=null) 296 out.write(addString.getBytes("UTF-8")); 297 298 } finally { 299 in.close(); 300 } 301 } 302 } 303 } finally { 304 out.close(); 305 } 306 307 308 if (deleteSource) { 309 return srcFS.delete(srcDir, true); 310 } else { 311 return true; 312 } 313 } 314 315 /** Copy local files to a FileSystem. */ 316 public static boolean copy(File src, 317 FileSystem dstFS, Path dst, 318 boolean deleteSource, 319 Configuration conf) throws IOException { 320 dst = checkDest(src.getName(), dstFS, dst, false); 321 322 if (src.isDirectory()) { 323 if (!dstFS.mkdirs(dst)) { 324 return false; 325 } 326 File contents[] = listFiles(src); 327 for (int i = 0; i < contents.length; i++) { 328 copy(contents[i], dstFS, new Path(dst, contents[i].getName()), 329 deleteSource, conf); 330 } 331 } else if (src.isFile()) { 332 InputStream in = null; 333 OutputStream out =null; 334 try { 335 in = new FileInputStream(src); 336 out = dstFS.create(dst); 337 IOUtils.copyBytes(in, out, conf); 338 } catch (IOException e) { 339 IOUtils.closeStream( out ); 340 IOUtils.closeStream( in ); 341 throw e; 342 } 343 } else { 344 throw new IOException(src.toString() + 345 ": No such file or directory"); 346 } 347 if (deleteSource) { 348 return FileUtil.fullyDelete(src); 349 } else { 350 return true; 351 } 352 } 353 354 /** Copy FileSystem files to local files. */ 355 public static boolean copy(FileSystem srcFS, Path src, 356 File dst, boolean deleteSource, 357 Configuration conf) throws IOException { 358 FileStatus filestatus = srcFS.getFileStatus(src); 359 return copy(srcFS, filestatus, dst, deleteSource, conf); 360 } 361 362 /** Copy FileSystem files to local files. */ 363 private static boolean copy(FileSystem srcFS, FileStatus srcStatus, 364 File dst, boolean deleteSource, 365 Configuration conf) throws IOException { 366 Path src = srcStatus.getPath(); 367 if (srcStatus.isDirectory()) { 368 if (!dst.mkdirs()) { 369 return false; 370 } 371 FileStatus contents[] = srcFS.listStatus(src); 372 for (int i = 0; i < contents.length; i++) { 373 copy(srcFS, contents[i], 374 new File(dst, contents[i].getPath().getName()), 375 deleteSource, conf); 376 } 377 } else { 378 InputStream in = srcFS.open(src); 379 IOUtils.copyBytes(in, new FileOutputStream(dst), conf); 380 } 381 if (deleteSource) { 382 return srcFS.delete(src, true); 383 } else { 384 return true; 385 } 386 } 387 388 private static Path checkDest(String srcName, FileSystem dstFS, Path dst, 389 boolean overwrite) throws IOException { 390 if (dstFS.exists(dst)) { 391 FileStatus sdst = dstFS.getFileStatus(dst); 392 if (sdst.isDirectory()) { 393 if (null == srcName) { 394 throw new IOException("Target " + dst + " is a directory"); 395 } 396 return checkDest(null, dstFS, new Path(dst, srcName), overwrite); 397 } else if (!overwrite) { 398 throw new IOException("Target " + dst + " already exists"); 399 } 400 } 401 return dst; 402 } 403 404 /** 405 * This class is only used on windows to invoke the cygpath command. 406 */ 407 private static class CygPathCommand extends Shell { 408 String[] command; 409 String result; 410 CygPathCommand(String path) throws IOException { 411 command = new String[]{"cygpath", "-u", path}; 412 run(); 413 } 414 String getResult() throws IOException { 415 return result; 416 } 417 protected String[] getExecString() { 418 return command; 419 } 420 protected void parseExecResult(BufferedReader lines) throws IOException { 421 String line = lines.readLine(); 422 if (line == null) { 423 throw new IOException("Can't convert '" + command[2] + 424 " to a cygwin path"); 425 } 426 result = line; 427 } 428 } 429 430 /** 431 * Convert a os-native filename to a path that works for the shell. 432 * @param filename The filename to convert 433 * @return The unix pathname 434 * @throws IOException on windows, there can be problems with the subprocess 435 */ 436 public static String makeShellPath(String filename) throws IOException { 437 if (Path.WINDOWS) { 438 return new CygPathCommand(filename).getResult(); 439 } else { 440 return filename; 441 } 442 } 443 444 /** 445 * Convert a os-native filename to a path that works for the shell. 446 * @param file The filename to convert 447 * @return The unix pathname 448 * @throws IOException on windows, there can be problems with the subprocess 449 */ 450 public static String makeShellPath(File file) throws IOException { 451 return makeShellPath(file, false); 452 } 453 454 /** 455 * Convert a os-native filename to a path that works for the shell. 456 * @param file The filename to convert 457 * @param makeCanonicalPath 458 * Whether to make canonical path for the file passed 459 * @return The unix pathname 460 * @throws IOException on windows, there can be problems with the subprocess 461 */ 462 public static String makeShellPath(File file, boolean makeCanonicalPath) 463 throws IOException { 464 if (makeCanonicalPath) { 465 return makeShellPath(file.getCanonicalPath()); 466 } else { 467 return makeShellPath(file.toString()); 468 } 469 } 470 471 /** 472 * Takes an input dir and returns the du on that local directory. Very basic 473 * implementation. 474 * 475 * @param dir 476 * The input dir to get the disk space of this local dir 477 * @return The total disk space of the input local directory 478 */ 479 public static long getDU(File dir) { 480 long size = 0; 481 if (!dir.exists()) 482 return 0; 483 if (!dir.isDirectory()) { 484 return dir.length(); 485 } else { 486 File[] allFiles = dir.listFiles(); 487 if(allFiles != null) { 488 for (int i = 0; i < allFiles.length; i++) { 489 boolean isSymLink; 490 try { 491 isSymLink = org.apache.commons.io.FileUtils.isSymlink(allFiles[i]); 492 } catch(IOException ioe) { 493 isSymLink = true; 494 } 495 if(!isSymLink) { 496 size += getDU(allFiles[i]); 497 } 498 } 499 } 500 return size; 501 } 502 } 503 504 /** 505 * Given a File input it will unzip the file in a the unzip directory 506 * passed as the second parameter 507 * @param inFile The zip file as input 508 * @param unzipDir The unzip directory where to unzip the zip file. 509 * @throws IOException 510 */ 511 public static void unZip(File inFile, File unzipDir) throws IOException { 512 Enumeration<? extends ZipEntry> entries; 513 ZipFile zipFile = new ZipFile(inFile); 514 515 try { 516 entries = zipFile.entries(); 517 while (entries.hasMoreElements()) { 518 ZipEntry entry = entries.nextElement(); 519 if (!entry.isDirectory()) { 520 InputStream in = zipFile.getInputStream(entry); 521 try { 522 File file = new File(unzipDir, entry.getName()); 523 if (!file.getParentFile().mkdirs()) { 524 if (!file.getParentFile().isDirectory()) { 525 throw new IOException("Mkdirs failed to create " + 526 file.getParentFile().toString()); 527 } 528 } 529 OutputStream out = new FileOutputStream(file); 530 try { 531 byte[] buffer = new byte[8192]; 532 int i; 533 while ((i = in.read(buffer)) != -1) { 534 out.write(buffer, 0, i); 535 } 536 } finally { 537 out.close(); 538 } 539 } finally { 540 in.close(); 541 } 542 } 543 } 544 } finally { 545 zipFile.close(); 546 } 547 } 548 549 /** 550 * Given a Tar File as input it will untar the file in a the untar directory 551 * passed as the second parameter 552 * 553 * This utility will untar ".tar" files and ".tar.gz","tgz" files. 554 * 555 * @param inFile The tar file as input. 556 * @param untarDir The untar directory where to untar the tar file. 557 * @throws IOException 558 */ 559 public static void unTar(File inFile, File untarDir) throws IOException { 560 if (!untarDir.mkdirs()) { 561 if (!untarDir.isDirectory()) { 562 throw new IOException("Mkdirs failed to create " + untarDir); 563 } 564 } 565 566 StringBuilder untarCommand = new StringBuilder(); 567 boolean gzipped = inFile.toString().endsWith("gz"); 568 if (gzipped) { 569 untarCommand.append(" gzip -dc '"); 570 untarCommand.append(FileUtil.makeShellPath(inFile)); 571 untarCommand.append("' | ("); 572 } 573 untarCommand.append("cd '"); 574 untarCommand.append(FileUtil.makeShellPath(untarDir)); 575 untarCommand.append("' ; "); 576 untarCommand.append("tar -xf "); 577 578 if (gzipped) { 579 untarCommand.append(" -)"); 580 } else { 581 untarCommand.append(FileUtil.makeShellPath(inFile)); 582 } 583 String[] shellCmd = { "bash", "-c", untarCommand.toString() }; 584 ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd); 585 shexec.execute(); 586 int exitcode = shexec.getExitCode(); 587 if (exitcode != 0) { 588 throw new IOException("Error untarring file " + inFile + 589 ". Tar process exited with exit code " + exitcode); 590 } 591 } 592 593 /** 594 * Class for creating hardlinks. 595 * Supports Unix, Cygwin, WindXP. 596 * @deprecated Use {@link org.apache.hadoop.fs.HardLink} 597 */ 598 @Deprecated 599 public static class HardLink extends org.apache.hadoop.fs.HardLink { 600 // This is a stub to assist with coordinated change between 601 // COMMON and HDFS projects. It will be removed after the 602 // corresponding change is committed to HDFS. 603 } 604 605 /** 606 * Create a soft link between a src and destination 607 * only on a local disk. HDFS does not support this 608 * @param target the target for symlink 609 * @param linkname the symlink 610 * @return value returned by the command 611 */ 612 public static int symLink(String target, String linkname) throws IOException{ 613 String cmd = "ln -s " + target + " " + linkname; 614 Process p = Runtime.getRuntime().exec(cmd, null); 615 int returnVal = -1; 616 try{ 617 returnVal = p.waitFor(); 618 } catch(InterruptedException e){ 619 //do nothing as of yet 620 } 621 return returnVal; 622 } 623 624 /** 625 * Change the permissions on a filename. 626 * @param filename the name of the file to change 627 * @param perm the permission string 628 * @return the exit code from the command 629 * @throws IOException 630 * @throws InterruptedException 631 */ 632 public static int chmod(String filename, String perm 633 ) throws IOException, InterruptedException { 634 return chmod(filename, perm, false); 635 } 636 637 /** 638 * Change the permissions on a file / directory, recursively, if 639 * needed. 640 * @param filename name of the file whose permissions are to change 641 * @param perm permission string 642 * @param recursive true, if permissions should be changed recursively 643 * @return the exit code from the command. 644 * @throws IOException 645 * @throws InterruptedException 646 */ 647 public static int chmod(String filename, String perm, boolean recursive) 648 throws IOException, InterruptedException { 649 StringBuilder cmdBuf = new StringBuilder(); 650 cmdBuf.append("chmod "); 651 if (recursive) { 652 cmdBuf.append("-R "); 653 } 654 cmdBuf.append(perm).append(" "); 655 cmdBuf.append(filename); 656 String[] shellCmd = {"bash", "-c" ,cmdBuf.toString()}; 657 ShellCommandExecutor shExec = new ShellCommandExecutor(shellCmd); 658 try { 659 shExec.execute(); 660 }catch(Exception e) { 661 if (LOG.isDebugEnabled()) { 662 LOG.debug("Error while changing permission : " + filename 663 + " Exception: ", e); 664 } 665 } 666 return shExec.getExitCode(); 667 } 668 669 /** 670 * Create a tmp file for a base file. 671 * @param basefile the base file of the tmp 672 * @param prefix file name prefix of tmp 673 * @param isDeleteOnExit if true, the tmp will be deleted when the VM exits 674 * @return a newly created tmp file 675 * @exception IOException If a tmp file cannot created 676 * @see java.io.File#createTempFile(String, String, File) 677 * @see java.io.File#deleteOnExit() 678 */ 679 public static final File createLocalTempFile(final File basefile, 680 final String prefix, 681 final boolean isDeleteOnExit) 682 throws IOException { 683 File tmp = File.createTempFile(prefix + basefile.getName(), 684 "", basefile.getParentFile()); 685 if (isDeleteOnExit) { 686 tmp.deleteOnExit(); 687 } 688 return tmp; 689 } 690 691 /** 692 * Move the src file to the name specified by target. 693 * @param src the source file 694 * @param target the target file 695 * @exception IOException If this operation fails 696 */ 697 public static void replaceFile(File src, File target) throws IOException { 698 /* renameTo() has two limitations on Windows platform. 699 * src.renameTo(target) fails if 700 * 1) If target already exists OR 701 * 2) If target is already open for reading/writing. 702 */ 703 if (!src.renameTo(target)) { 704 int retries = 5; 705 while (target.exists() && !target.delete() && retries-- >= 0) { 706 try { 707 Thread.sleep(1000); 708 } catch (InterruptedException e) { 709 throw new IOException("replaceFile interrupted."); 710 } 711 } 712 if (!src.renameTo(target)) { 713 throw new IOException("Unable to rename " + src + 714 " to " + target); 715 } 716 } 717 } 718 719 /** 720 * A wrapper for {@link File#listFiles()}. This java.io API returns null 721 * when a dir is not a directory or for any I/O error. Instead of having 722 * null check everywhere File#listFiles() is used, we will add utility API 723 * to get around this problem. For the majority of cases where we prefer 724 * an IOException to be thrown. 725 * @param dir directory for which listing should be performed 726 * @return list of files or empty list 727 * @exception IOException for invalid directory or for a bad disk. 728 */ 729 public static File[] listFiles(File dir) throws IOException { 730 File[] files = dir.listFiles(); 731 if(files == null) { 732 throw new IOException("Invalid directory or I/O error occurred for dir: " 733 + dir.toString()); 734 } 735 return files; 736 } 737 738 /** 739 * A wrapper for {@link File#list()}. This java.io API returns null 740 * when a dir is not a directory or for any I/O error. Instead of having 741 * null check everywhere File#list() is used, we will add utility API 742 * to get around this problem. For the majority of cases where we prefer 743 * an IOException to be thrown. 744 * @param dir directory for which listing should be performed 745 * @return list of file names or empty string list 746 * @exception IOException for invalid directory or for a bad disk. 747 */ 748 public static String[] list(File dir) throws IOException { 749 String[] fileNames = dir.list(); 750 if(fileNames == null) { 751 throw new IOException("Invalid directory or I/O error occurred for dir: " 752 + dir.toString()); 753 } 754 return fileNames; 755 } 756 }