001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.fs; 020 021import java.io.*; 022import java.util.Arrays; 023 024import org.apache.hadoop.classification.InterfaceAudience; 025import org.apache.hadoop.classification.InterfaceStability; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.fs.permission.FsPermission; 028import org.apache.hadoop.util.Progressable; 029import org.apache.hadoop.util.PureJavaCrc32; 030 031/**************************************************************** 032 * Abstract Checksumed FileSystem. 033 * It provide a basic implementation of a Checksumed FileSystem, 034 * which creates a checksum file for each raw file. 035 * It generates & verifies checksums at the client side. 036 * 037 *****************************************************************/ 038@InterfaceAudience.Public 039@InterfaceStability.Stable 040public abstract class ChecksumFileSystem extends FilterFileSystem { 041 private static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0}; 042 private int bytesPerChecksum = 512; 043 private boolean verifyChecksum = true; 044 private boolean writeChecksum = true; 045 046 public static double getApproxChkSumLength(long size) { 047 return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size; 048 } 049 050 public ChecksumFileSystem(FileSystem fs) { 051 super(fs); 052 } 053 054 @Override 055 public void setConf(Configuration conf) { 056 super.setConf(conf); 057 if (conf != null) { 058 bytesPerChecksum = conf.getInt(LocalFileSystemConfigKeys.LOCAL_FS_BYTES_PER_CHECKSUM_KEY, 059 LocalFileSystemConfigKeys.LOCAL_FS_BYTES_PER_CHECKSUM_DEFAULT); 060 } 061 } 062 063 /** 064 * Set whether to verify checksum. 065 */ 066 @Override 067 public void setVerifyChecksum(boolean verifyChecksum) { 068 this.verifyChecksum = verifyChecksum; 069 } 070 071 @Override 072 public void setWriteChecksum(boolean writeChecksum) { 073 this.writeChecksum = writeChecksum; 074 } 075 076 /** get the raw file system */ 077 @Override 078 public FileSystem getRawFileSystem() { 079 return fs; 080 } 081 082 /** Return the name of the checksum file associated with a file.*/ 083 public Path getChecksumFile(Path file) { 084 return new Path(file.getParent(), "." + file.getName() + ".crc"); 085 } 086 087 /** Return true iff file is a checksum file name.*/ 088 public static boolean isChecksumFile(Path file) { 089 String name = file.getName(); 090 return name.startsWith(".") && name.endsWith(".crc"); 091 } 092 093 /** Return the length of the checksum file given the size of the 094 * actual file. 095 **/ 096 public long getChecksumFileLength(Path file, long fileSize) { 097 return getChecksumLength(fileSize, getBytesPerSum()); 098 } 099 100 /** Return the bytes Per Checksum */ 101 public int getBytesPerSum() { 102 return bytesPerChecksum; 103 } 104 105 private int getSumBufferSize(int bytesPerSum, int bufferSize) { 106 int defaultBufferSize = getConf().getInt( 107 LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY, 108 LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_DEFAULT); 109 int proportionalBufferSize = bufferSize / bytesPerSum; 110 return Math.max(bytesPerSum, 111 Math.max(proportionalBufferSize, defaultBufferSize)); 112 } 113 114 /******************************************************* 115 * For open()'s FSInputStream 116 * It verifies that data matches checksums. 117 *******************************************************/ 118 private static class ChecksumFSInputChecker extends FSInputChecker { 119 private ChecksumFileSystem fs; 120 private FSDataInputStream datas; 121 private FSDataInputStream sums; 122 123 private static final int HEADER_LENGTH = 8; 124 125 private int bytesPerSum = 1; 126 127 public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file) 128 throws IOException { 129 this(fs, file, fs.getConf().getInt( 130 LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY, 131 LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_DEFAULT)); 132 } 133 134 public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize) 135 throws IOException { 136 super( file, fs.getFileStatus(file).getReplication() ); 137 this.datas = fs.getRawFileSystem().open(file, bufferSize); 138 this.fs = fs; 139 Path sumFile = fs.getChecksumFile(file); 140 try { 141 int sumBufferSize = fs.getSumBufferSize(fs.getBytesPerSum(), bufferSize); 142 sums = fs.getRawFileSystem().open(sumFile, sumBufferSize); 143 144 byte[] version = new byte[CHECKSUM_VERSION.length]; 145 sums.readFully(version); 146 if (!Arrays.equals(version, CHECKSUM_VERSION)) 147 throw new IOException("Not a checksum file: "+sumFile); 148 this.bytesPerSum = sums.readInt(); 149 set(fs.verifyChecksum, new PureJavaCrc32(), bytesPerSum, 4); 150 } catch (FileNotFoundException e) { // quietly ignore 151 set(fs.verifyChecksum, null, 1, 0); 152 } catch (IOException e) { // loudly ignore 153 LOG.warn("Problem opening checksum file: "+ file + 154 ". Ignoring exception: " , e); 155 set(fs.verifyChecksum, null, 1, 0); 156 } 157 } 158 159 private long getChecksumFilePos( long dataPos ) { 160 return HEADER_LENGTH + 4*(dataPos/bytesPerSum); 161 } 162 163 @Override 164 protected long getChunkPosition( long dataPos ) { 165 return dataPos/bytesPerSum*bytesPerSum; 166 } 167 168 @Override 169 public int available() throws IOException { 170 return datas.available() + super.available(); 171 } 172 173 @Override 174 public int read(long position, byte[] b, int off, int len) 175 throws IOException { 176 // parameter check 177 if ((off | len | (off + len) | (b.length - (off + len))) < 0) { 178 throw new IndexOutOfBoundsException(); 179 } else if (len == 0) { 180 return 0; 181 } 182 if( position<0 ) { 183 throw new IllegalArgumentException( 184 "Parameter position can not to be negative"); 185 } 186 187 ChecksumFSInputChecker checker = new ChecksumFSInputChecker(fs, file); 188 checker.seek(position); 189 int nread = checker.read(b, off, len); 190 checker.close(); 191 return nread; 192 } 193 194 @Override 195 public void close() throws IOException { 196 datas.close(); 197 if( sums != null ) { 198 sums.close(); 199 } 200 set(fs.verifyChecksum, null, 1, 0); 201 } 202 203 204 @Override 205 public boolean seekToNewSource(long targetPos) throws IOException { 206 long sumsPos = getChecksumFilePos(targetPos); 207 fs.reportChecksumFailure(file, datas, targetPos, sums, sumsPos); 208 boolean newDataSource = datas.seekToNewSource(targetPos); 209 return sums.seekToNewSource(sumsPos) || newDataSource; 210 } 211 212 @Override 213 protected int readChunk(long pos, byte[] buf, int offset, int len, 214 byte[] checksum) throws IOException { 215 216 boolean eof = false; 217 if (needChecksum()) { 218 assert checksum != null; // we have a checksum buffer 219 assert checksum.length % CHECKSUM_SIZE == 0; // it is sane length 220 assert len >= bytesPerSum; // we must read at least one chunk 221 222 final int checksumsToRead = Math.min( 223 len/bytesPerSum, // number of checksums based on len to read 224 checksum.length / CHECKSUM_SIZE); // size of checksum buffer 225 long checksumPos = getChecksumFilePos(pos); 226 if(checksumPos != sums.getPos()) { 227 sums.seek(checksumPos); 228 } 229 230 int sumLenRead = sums.read(checksum, 0, CHECKSUM_SIZE * checksumsToRead); 231 if (sumLenRead >= 0 && sumLenRead % CHECKSUM_SIZE != 0) { 232 throw new ChecksumException( 233 "Checksum file not a length multiple of checksum size " + 234 "in " + file + " at " + pos + " checksumpos: " + checksumPos + 235 " sumLenread: " + sumLenRead, 236 pos); 237 } 238 if (sumLenRead <= 0) { // we're at the end of the file 239 eof = true; 240 } else { 241 // Adjust amount of data to read based on how many checksum chunks we read 242 len = Math.min(len, bytesPerSum * (sumLenRead / CHECKSUM_SIZE)); 243 } 244 } 245 if(pos != datas.getPos()) { 246 datas.seek(pos); 247 } 248 int nread = readFully(datas, buf, offset, len); 249 if (eof && nread > 0) { 250 throw new ChecksumException("Checksum error: "+file+" at "+pos, pos); 251 } 252 return nread; 253 } 254 } 255 256 private static class FSDataBoundedInputStream extends FSDataInputStream { 257 private FileSystem fs; 258 private Path file; 259 private long fileLen = -1L; 260 261 FSDataBoundedInputStream(FileSystem fs, Path file, InputStream in) 262 throws IOException { 263 super(in); 264 this.fs = fs; 265 this.file = file; 266 } 267 268 @Override 269 public boolean markSupported() { 270 return false; 271 } 272 273 /* Return the file length */ 274 private long getFileLength() throws IOException { 275 if( fileLen==-1L ) { 276 fileLen = fs.getContentSummary(file).getLength(); 277 } 278 return fileLen; 279 } 280 281 /** 282 * Skips over and discards <code>n</code> bytes of data from the 283 * input stream. 284 * 285 *The <code>skip</code> method skips over some smaller number of bytes 286 * when reaching end of file before <code>n</code> bytes have been skipped. 287 * The actual number of bytes skipped is returned. If <code>n</code> is 288 * negative, no bytes are skipped. 289 * 290 * @param n the number of bytes to be skipped. 291 * @return the actual number of bytes skipped. 292 * @exception IOException if an I/O error occurs. 293 * ChecksumException if the chunk to skip to is corrupted 294 */ 295 @Override 296 public synchronized long skip(long n) throws IOException { 297 long curPos = getPos(); 298 long fileLength = getFileLength(); 299 if( n+curPos > fileLength ) { 300 n = fileLength - curPos; 301 } 302 return super.skip(n); 303 } 304 305 /** 306 * Seek to the given position in the stream. 307 * The next read() will be from that position. 308 * 309 * <p>This method does not allow seek past the end of the file. 310 * This produces IOException. 311 * 312 * @param pos the postion to seek to. 313 * @exception IOException if an I/O error occurs or seeks after EOF 314 * ChecksumException if the chunk to seek to is corrupted 315 */ 316 317 @Override 318 public synchronized void seek(long pos) throws IOException { 319 if(pos>getFileLength()) { 320 throw new IOException("Cannot seek after EOF"); 321 } 322 super.seek(pos); 323 } 324 325 } 326 327 /** 328 * Opens an FSDataInputStream at the indicated Path. 329 * @param f the file name to open 330 * @param bufferSize the size of the buffer to be used. 331 */ 332 @Override 333 public FSDataInputStream open(Path f, int bufferSize) throws IOException { 334 FileSystem fs; 335 InputStream in; 336 if (verifyChecksum) { 337 fs = this; 338 in = new ChecksumFSInputChecker(this, f, bufferSize); 339 } else { 340 fs = getRawFileSystem(); 341 in = fs.open(f, bufferSize); 342 } 343 return new FSDataBoundedInputStream(fs, f, in); 344 } 345 346 @Override 347 public FSDataOutputStream append(Path f, int bufferSize, 348 Progressable progress) throws IOException { 349 throw new IOException("Not supported"); 350 } 351 352 /** 353 * Calculated the length of the checksum file in bytes. 354 * @param size the length of the data file in bytes 355 * @param bytesPerSum the number of bytes in a checksum block 356 * @return the number of bytes in the checksum file 357 */ 358 public static long getChecksumLength(long size, int bytesPerSum) { 359 //the checksum length is equal to size passed divided by bytesPerSum + 360 //bytes written in the beginning of the checksum file. 361 return ((size + bytesPerSum - 1) / bytesPerSum) * 4 + 362 CHECKSUM_VERSION.length + 4; 363 } 364 365 /** This class provides an output stream for a checksummed file. 366 * It generates checksums for data. */ 367 private static class ChecksumFSOutputSummer extends FSOutputSummer { 368 private FSDataOutputStream datas; 369 private FSDataOutputStream sums; 370 private static final float CHKSUM_AS_FRACTION = 0.01f; 371 372 public ChecksumFSOutputSummer(ChecksumFileSystem fs, 373 Path file, 374 boolean overwrite, 375 int bufferSize, 376 short replication, 377 long blockSize, 378 Progressable progress) 379 throws IOException { 380 super(new PureJavaCrc32(), fs.getBytesPerSum(), 4); 381 int bytesPerSum = fs.getBytesPerSum(); 382 this.datas = fs.getRawFileSystem().create(file, overwrite, bufferSize, 383 replication, blockSize, progress); 384 int sumBufferSize = fs.getSumBufferSize(bytesPerSum, bufferSize); 385 this.sums = fs.getRawFileSystem().create(fs.getChecksumFile(file), true, 386 sumBufferSize, replication, 387 blockSize); 388 sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length); 389 sums.writeInt(bytesPerSum); 390 } 391 392 @Override 393 public void close() throws IOException { 394 flushBuffer(); 395 sums.close(); 396 datas.close(); 397 } 398 399 @Override 400 protected void writeChunk(byte[] b, int offset, int len, byte[] checksum) 401 throws IOException { 402 datas.write(b, offset, len); 403 sums.write(checksum); 404 } 405 } 406 407 @Override 408 public FSDataOutputStream create(Path f, FsPermission permission, 409 boolean overwrite, int bufferSize, short replication, long blockSize, 410 Progressable progress) throws IOException { 411 return create(f, permission, overwrite, true, bufferSize, 412 replication, blockSize, progress); 413 } 414 415 private FSDataOutputStream create(Path f, FsPermission permission, 416 boolean overwrite, boolean createParent, int bufferSize, 417 short replication, long blockSize, 418 Progressable progress) throws IOException { 419 Path parent = f.getParent(); 420 if (parent != null) { 421 if (!createParent && !exists(parent)) { 422 throw new FileNotFoundException("Parent directory doesn't exist: " 423 + parent); 424 } else if (!mkdirs(parent)) { 425 throw new IOException("Mkdirs failed to create " + parent); 426 } 427 } 428 final FSDataOutputStream out; 429 if (writeChecksum) { 430 out = new FSDataOutputStream( 431 new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication, 432 blockSize, progress), null); 433 } else { 434 out = fs.create(f, permission, overwrite, bufferSize, replication, 435 blockSize, progress); 436 // remove the checksum file since we aren't writing one 437 Path checkFile = getChecksumFile(f); 438 if (fs.exists(checkFile)) { 439 fs.delete(checkFile, true); 440 } 441 } 442 if (permission != null) { 443 setPermission(f, permission); 444 } 445 return out; 446 } 447 448 @Override 449 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 450 boolean overwrite, int bufferSize, short replication, long blockSize, 451 Progressable progress) throws IOException { 452 return create(f, permission, overwrite, false, bufferSize, replication, 453 blockSize, progress); 454 } 455 456 /** 457 * Set replication for an existing file. 458 * Implement the abstract <tt>setReplication</tt> of <tt>FileSystem</tt> 459 * @param src file name 460 * @param replication new replication 461 * @throws IOException 462 * @return true if successful; 463 * false if file does not exist or is a directory 464 */ 465 @Override 466 public boolean setReplication(Path src, short replication) throws IOException { 467 boolean value = fs.setReplication(src, replication); 468 if (!value) 469 return false; 470 471 Path checkFile = getChecksumFile(src); 472 if (exists(checkFile)) 473 fs.setReplication(checkFile, replication); 474 475 return true; 476 } 477 478 /** 479 * Rename files/dirs 480 */ 481 @Override 482 public boolean rename(Path src, Path dst) throws IOException { 483 if (fs.isDirectory(src)) { 484 return fs.rename(src, dst); 485 } else { 486 if (fs.isDirectory(dst)) { 487 dst = new Path(dst, src.getName()); 488 } 489 490 boolean value = fs.rename(src, dst); 491 if (!value) 492 return false; 493 494 Path srcCheckFile = getChecksumFile(src); 495 Path dstCheckFile = getChecksumFile(dst); 496 if (fs.exists(srcCheckFile)) { //try to rename checksum 497 value = fs.rename(srcCheckFile, dstCheckFile); 498 } else if (fs.exists(dstCheckFile)) { 499 // no src checksum, so remove dst checksum 500 value = fs.delete(dstCheckFile, true); 501 } 502 503 return value; 504 } 505 } 506 507 /** 508 * Implement the delete(Path, boolean) in checksum 509 * file system. 510 */ 511 @Override 512 public boolean delete(Path f, boolean recursive) throws IOException{ 513 FileStatus fstatus = null; 514 try { 515 fstatus = fs.getFileStatus(f); 516 } catch(FileNotFoundException e) { 517 return false; 518 } 519 if (fstatus.isDirectory()) { 520 //this works since the crcs are in the same 521 //directories and the files. so we just delete 522 //everything in the underlying filesystem 523 return fs.delete(f, recursive); 524 } else { 525 Path checkFile = getChecksumFile(f); 526 if (fs.exists(checkFile)) { 527 fs.delete(checkFile, true); 528 } 529 return fs.delete(f, true); 530 } 531 } 532 533 final private static PathFilter DEFAULT_FILTER = new PathFilter() { 534 @Override 535 public boolean accept(Path file) { 536 return !isChecksumFile(file); 537 } 538 }; 539 540 /** 541 * List the statuses of the files/directories in the given path if the path is 542 * a directory. 543 * 544 * @param f 545 * given path 546 * @return the statuses of the files/directories in the given patch 547 * @throws IOException 548 */ 549 @Override 550 public FileStatus[] listStatus(Path f) throws IOException { 551 return fs.listStatus(f, DEFAULT_FILTER); 552 } 553 554 /** 555 * List the statuses of the files/directories in the given path if the path is 556 * a directory. 557 * 558 * @param f 559 * given path 560 * @return the statuses of the files/directories in the given patch 561 * @throws IOException 562 */ 563 @Override 564 public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f) 565 throws IOException { 566 return fs.listLocatedStatus(f, DEFAULT_FILTER); 567 } 568 569 @Override 570 public boolean mkdirs(Path f) throws IOException { 571 return fs.mkdirs(f); 572 } 573 574 @Override 575 public void copyFromLocalFile(boolean delSrc, Path src, Path dst) 576 throws IOException { 577 Configuration conf = getConf(); 578 FileUtil.copy(getLocal(conf), src, this, dst, delSrc, conf); 579 } 580 581 /** 582 * The src file is under FS, and the dst is on the local disk. 583 * Copy it from FS control to the local dst name. 584 */ 585 @Override 586 public void copyToLocalFile(boolean delSrc, Path src, Path dst) 587 throws IOException { 588 Configuration conf = getConf(); 589 FileUtil.copy(this, src, getLocal(conf), dst, delSrc, conf); 590 } 591 592 /** 593 * The src file is under FS, and the dst is on the local disk. 594 * Copy it from FS control to the local dst name. 595 * If src and dst are directories, the copyCrc parameter 596 * determines whether to copy CRC files. 597 */ 598 public void copyToLocalFile(Path src, Path dst, boolean copyCrc) 599 throws IOException { 600 if (!fs.isDirectory(src)) { // source is a file 601 fs.copyToLocalFile(src, dst); 602 FileSystem localFs = getLocal(getConf()).getRawFileSystem(); 603 if (localFs.isDirectory(dst)) { 604 dst = new Path(dst, src.getName()); 605 } 606 dst = getChecksumFile(dst); 607 if (localFs.exists(dst)) { //remove old local checksum file 608 localFs.delete(dst, true); 609 } 610 Path checksumFile = getChecksumFile(src); 611 if (copyCrc && fs.exists(checksumFile)) { //copy checksum file 612 fs.copyToLocalFile(checksumFile, dst); 613 } 614 } else { 615 FileStatus[] srcs = listStatus(src); 616 for (FileStatus srcFile : srcs) { 617 copyToLocalFile(srcFile.getPath(), 618 new Path(dst, srcFile.getPath().getName()), copyCrc); 619 } 620 } 621 } 622 623 @Override 624 public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 625 throws IOException { 626 return tmpLocalFile; 627 } 628 629 @Override 630 public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 631 throws IOException { 632 moveFromLocalFile(tmpLocalFile, fsOutputFile); 633 } 634 635 /** 636 * Report a checksum error to the file system. 637 * @param f the file name containing the error 638 * @param in the stream open on the file 639 * @param inPos the position of the beginning of the bad data in the file 640 * @param sums the stream open on the checksum file 641 * @param sumsPos the position of the beginning of the bad data in the checksum file 642 * @return if retry is neccessary 643 */ 644 public boolean reportChecksumFailure(Path f, FSDataInputStream in, 645 long inPos, FSDataInputStream sums, long sumsPos) { 646 return false; 647 } 648}