001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.fs;
020
021 import java.io.*;
022 import java.util.Arrays;
023
024 import org.apache.hadoop.classification.InterfaceAudience;
025 import org.apache.hadoop.classification.InterfaceStability;
026 import org.apache.hadoop.conf.Configuration;
027 import org.apache.hadoop.fs.permission.FsPermission;
028 import org.apache.hadoop.util.Progressable;
029 import org.apache.hadoop.util.PureJavaCrc32;
030
031 /****************************************************************
032 * Abstract Checksumed FileSystem.
033 * It provide a basic implementation of a Checksumed FileSystem,
034 * which creates a checksum file for each raw file.
035 * It generates & verifies checksums at the client side.
036 *
037 *****************************************************************/
038 @InterfaceAudience.Public
039 @InterfaceStability.Stable
040 public abstract class ChecksumFileSystem extends FilterFileSystem {
041 private static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0};
042 private int bytesPerChecksum = 512;
043 private boolean verifyChecksum = true;
044 private boolean writeChecksum = true;
045
046 public static double getApproxChkSumLength(long size) {
047 return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size;
048 }
049
050 public ChecksumFileSystem(FileSystem fs) {
051 super(fs);
052 }
053
054 @Override
055 public void setConf(Configuration conf) {
056 super.setConf(conf);
057 if (conf != null) {
058 bytesPerChecksum = conf.getInt(LocalFileSystemConfigKeys.LOCAL_FS_BYTES_PER_CHECKSUM_KEY,
059 LocalFileSystemConfigKeys.LOCAL_FS_BYTES_PER_CHECKSUM_DEFAULT);
060 }
061 }
062
063 /**
064 * Set whether to verify checksum.
065 */
066 @Override
067 public void setVerifyChecksum(boolean verifyChecksum) {
068 this.verifyChecksum = verifyChecksum;
069 }
070
071 @Override
072 public void setWriteChecksum(boolean writeChecksum) {
073 this.writeChecksum = writeChecksum;
074 }
075
076 /** get the raw file system */
077 @Override
078 public FileSystem getRawFileSystem() {
079 return fs;
080 }
081
082 /** Return the name of the checksum file associated with a file.*/
083 public Path getChecksumFile(Path file) {
084 return new Path(file.getParent(), "." + file.getName() + ".crc");
085 }
086
087 /** Return true iff file is a checksum file name.*/
088 public static boolean isChecksumFile(Path file) {
089 String name = file.getName();
090 return name.startsWith(".") && name.endsWith(".crc");
091 }
092
093 /** Return the length of the checksum file given the size of the
094 * actual file.
095 **/
096 public long getChecksumFileLength(Path file, long fileSize) {
097 return getChecksumLength(fileSize, getBytesPerSum());
098 }
099
100 /** Return the bytes Per Checksum */
101 public int getBytesPerSum() {
102 return bytesPerChecksum;
103 }
104
105 private int getSumBufferSize(int bytesPerSum, int bufferSize) {
106 int defaultBufferSize = getConf().getInt(
107 LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY,
108 LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_DEFAULT);
109 int proportionalBufferSize = bufferSize / bytesPerSum;
110 return Math.max(bytesPerSum,
111 Math.max(proportionalBufferSize, defaultBufferSize));
112 }
113
114 /*******************************************************
115 * For open()'s FSInputStream
116 * It verifies that data matches checksums.
117 *******************************************************/
118 private static class ChecksumFSInputChecker extends FSInputChecker {
119 private ChecksumFileSystem fs;
120 private FSDataInputStream datas;
121 private FSDataInputStream sums;
122
123 private static final int HEADER_LENGTH = 8;
124
125 private int bytesPerSum = 1;
126
127 public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file)
128 throws IOException {
129 this(fs, file, fs.getConf().getInt(
130 LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY,
131 LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_DEFAULT));
132 }
133
134 public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize)
135 throws IOException {
136 super( file, fs.getFileStatus(file).getReplication() );
137 this.datas = fs.getRawFileSystem().open(file, bufferSize);
138 this.fs = fs;
139 Path sumFile = fs.getChecksumFile(file);
140 try {
141 int sumBufferSize = fs.getSumBufferSize(fs.getBytesPerSum(), bufferSize);
142 sums = fs.getRawFileSystem().open(sumFile, sumBufferSize);
143
144 byte[] version = new byte[CHECKSUM_VERSION.length];
145 sums.readFully(version);
146 if (!Arrays.equals(version, CHECKSUM_VERSION))
147 throw new IOException("Not a checksum file: "+sumFile);
148 this.bytesPerSum = sums.readInt();
149 set(fs.verifyChecksum, new PureJavaCrc32(), bytesPerSum, 4);
150 } catch (FileNotFoundException e) { // quietly ignore
151 set(fs.verifyChecksum, null, 1, 0);
152 } catch (IOException e) { // loudly ignore
153 LOG.warn("Problem opening checksum file: "+ file +
154 ". Ignoring exception: " , e);
155 set(fs.verifyChecksum, null, 1, 0);
156 }
157 }
158
159 private long getChecksumFilePos( long dataPos ) {
160 return HEADER_LENGTH + 4*(dataPos/bytesPerSum);
161 }
162
163 @Override
164 protected long getChunkPosition( long dataPos ) {
165 return dataPos/bytesPerSum*bytesPerSum;
166 }
167
168 @Override
169 public int available() throws IOException {
170 return datas.available() + super.available();
171 }
172
173 @Override
174 public int read(long position, byte[] b, int off, int len)
175 throws IOException {
176 // parameter check
177 if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
178 throw new IndexOutOfBoundsException();
179 } else if (len == 0) {
180 return 0;
181 }
182 if( position<0 ) {
183 throw new IllegalArgumentException(
184 "Parameter position can not to be negative");
185 }
186
187 ChecksumFSInputChecker checker = new ChecksumFSInputChecker(fs, file);
188 checker.seek(position);
189 int nread = checker.read(b, off, len);
190 checker.close();
191 return nread;
192 }
193
194 @Override
195 public void close() throws IOException {
196 datas.close();
197 if( sums != null ) {
198 sums.close();
199 }
200 set(fs.verifyChecksum, null, 1, 0);
201 }
202
203
204 @Override
205 public boolean seekToNewSource(long targetPos) throws IOException {
206 long sumsPos = getChecksumFilePos(targetPos);
207 fs.reportChecksumFailure(file, datas, targetPos, sums, sumsPos);
208 boolean newDataSource = datas.seekToNewSource(targetPos);
209 return sums.seekToNewSource(sumsPos) || newDataSource;
210 }
211
212 @Override
213 protected int readChunk(long pos, byte[] buf, int offset, int len,
214 byte[] checksum) throws IOException {
215
216 boolean eof = false;
217 if (needChecksum()) {
218 assert checksum != null; // we have a checksum buffer
219 assert checksum.length % CHECKSUM_SIZE == 0; // it is sane length
220 assert len >= bytesPerSum; // we must read at least one chunk
221
222 final int checksumsToRead = Math.min(
223 len/bytesPerSum, // number of checksums based on len to read
224 checksum.length / CHECKSUM_SIZE); // size of checksum buffer
225 long checksumPos = getChecksumFilePos(pos);
226 if(checksumPos != sums.getPos()) {
227 sums.seek(checksumPos);
228 }
229
230 int sumLenRead = sums.read(checksum, 0, CHECKSUM_SIZE * checksumsToRead);
231 if (sumLenRead >= 0 && sumLenRead % CHECKSUM_SIZE != 0) {
232 throw new ChecksumException(
233 "Checksum file not a length multiple of checksum size " +
234 "in " + file + " at " + pos + " checksumpos: " + checksumPos +
235 " sumLenread: " + sumLenRead,
236 pos);
237 }
238 if (sumLenRead <= 0) { // we're at the end of the file
239 eof = true;
240 } else {
241 // Adjust amount of data to read based on how many checksum chunks we read
242 len = Math.min(len, bytesPerSum * (sumLenRead / CHECKSUM_SIZE));
243 }
244 }
245 if(pos != datas.getPos()) {
246 datas.seek(pos);
247 }
248 int nread = readFully(datas, buf, offset, len);
249 if (eof && nread > 0) {
250 throw new ChecksumException("Checksum error: "+file+" at "+pos, pos);
251 }
252 return nread;
253 }
254 }
255
256 private static class FSDataBoundedInputStream extends FSDataInputStream {
257 private FileSystem fs;
258 private Path file;
259 private long fileLen = -1L;
260
261 FSDataBoundedInputStream(FileSystem fs, Path file, InputStream in)
262 throws IOException {
263 super(in);
264 this.fs = fs;
265 this.file = file;
266 }
267
268 @Override
269 public boolean markSupported() {
270 return false;
271 }
272
273 /* Return the file length */
274 private long getFileLength() throws IOException {
275 if( fileLen==-1L ) {
276 fileLen = fs.getContentSummary(file).getLength();
277 }
278 return fileLen;
279 }
280
281 /**
282 * Skips over and discards <code>n</code> bytes of data from the
283 * input stream.
284 *
285 *The <code>skip</code> method skips over some smaller number of bytes
286 * when reaching end of file before <code>n</code> bytes have been skipped.
287 * The actual number of bytes skipped is returned. If <code>n</code> is
288 * negative, no bytes are skipped.
289 *
290 * @param n the number of bytes to be skipped.
291 * @return the actual number of bytes skipped.
292 * @exception IOException if an I/O error occurs.
293 * ChecksumException if the chunk to skip to is corrupted
294 */
295 @Override
296 public synchronized long skip(long n) throws IOException {
297 long curPos = getPos();
298 long fileLength = getFileLength();
299 if( n+curPos > fileLength ) {
300 n = fileLength - curPos;
301 }
302 return super.skip(n);
303 }
304
305 /**
306 * Seek to the given position in the stream.
307 * The next read() will be from that position.
308 *
309 * <p>This method does not allow seek past the end of the file.
310 * This produces IOException.
311 *
312 * @param pos the postion to seek to.
313 * @exception IOException if an I/O error occurs or seeks after EOF
314 * ChecksumException if the chunk to seek to is corrupted
315 */
316
317 @Override
318 public synchronized void seek(long pos) throws IOException {
319 if(pos>getFileLength()) {
320 throw new IOException("Cannot seek after EOF");
321 }
322 super.seek(pos);
323 }
324
325 }
326
327 /**
328 * Opens an FSDataInputStream at the indicated Path.
329 * @param f the file name to open
330 * @param bufferSize the size of the buffer to be used.
331 */
332 @Override
333 public FSDataInputStream open(Path f, int bufferSize) throws IOException {
334 FileSystem fs;
335 InputStream in;
336 if (verifyChecksum) {
337 fs = this;
338 in = new ChecksumFSInputChecker(this, f, bufferSize);
339 } else {
340 fs = getRawFileSystem();
341 in = fs.open(f, bufferSize);
342 }
343 return new FSDataBoundedInputStream(fs, f, in);
344 }
345
346 @Override
347 public FSDataOutputStream append(Path f, int bufferSize,
348 Progressable progress) throws IOException {
349 throw new IOException("Not supported");
350 }
351
352 /**
353 * Calculated the length of the checksum file in bytes.
354 * @param size the length of the data file in bytes
355 * @param bytesPerSum the number of bytes in a checksum block
356 * @return the number of bytes in the checksum file
357 */
358 public static long getChecksumLength(long size, int bytesPerSum) {
359 //the checksum length is equal to size passed divided by bytesPerSum +
360 //bytes written in the beginning of the checksum file.
361 return ((size + bytesPerSum - 1) / bytesPerSum) * 4 +
362 CHECKSUM_VERSION.length + 4;
363 }
364
365 /** This class provides an output stream for a checksummed file.
366 * It generates checksums for data. */
367 private static class ChecksumFSOutputSummer extends FSOutputSummer {
368 private FSDataOutputStream datas;
369 private FSDataOutputStream sums;
370 private static final float CHKSUM_AS_FRACTION = 0.01f;
371
372 public ChecksumFSOutputSummer(ChecksumFileSystem fs,
373 Path file,
374 boolean overwrite,
375 int bufferSize,
376 short replication,
377 long blockSize,
378 Progressable progress)
379 throws IOException {
380 super(new PureJavaCrc32(), fs.getBytesPerSum(), 4);
381 int bytesPerSum = fs.getBytesPerSum();
382 this.datas = fs.getRawFileSystem().create(file, overwrite, bufferSize,
383 replication, blockSize, progress);
384 int sumBufferSize = fs.getSumBufferSize(bytesPerSum, bufferSize);
385 this.sums = fs.getRawFileSystem().create(fs.getChecksumFile(file), true,
386 sumBufferSize, replication,
387 blockSize);
388 sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length);
389 sums.writeInt(bytesPerSum);
390 }
391
392 @Override
393 public void close() throws IOException {
394 flushBuffer();
395 sums.close();
396 datas.close();
397 }
398
399 @Override
400 protected void writeChunk(byte[] b, int offset, int len, byte[] checksum)
401 throws IOException {
402 datas.write(b, offset, len);
403 sums.write(checksum);
404 }
405 }
406
407 @Override
408 public FSDataOutputStream create(Path f, FsPermission permission,
409 boolean overwrite, int bufferSize, short replication, long blockSize,
410 Progressable progress) throws IOException {
411 return create(f, permission, overwrite, true, bufferSize,
412 replication, blockSize, progress);
413 }
414
415 private FSDataOutputStream create(Path f, FsPermission permission,
416 boolean overwrite, boolean createParent, int bufferSize,
417 short replication, long blockSize,
418 Progressable progress) throws IOException {
419 Path parent = f.getParent();
420 if (parent != null) {
421 if (!createParent && !exists(parent)) {
422 throw new FileNotFoundException("Parent directory doesn't exist: "
423 + parent);
424 } else if (!mkdirs(parent)) {
425 throw new IOException("Mkdirs failed to create " + parent);
426 }
427 }
428 final FSDataOutputStream out;
429 if (writeChecksum) {
430 out = new FSDataOutputStream(
431 new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication,
432 blockSize, progress), null);
433 } else {
434 out = fs.create(f, permission, overwrite, bufferSize, replication,
435 blockSize, progress);
436 // remove the checksum file since we aren't writing one
437 Path checkFile = getChecksumFile(f);
438 if (fs.exists(checkFile)) {
439 fs.delete(checkFile, true);
440 }
441 }
442 if (permission != null) {
443 setPermission(f, permission);
444 }
445 return out;
446 }
447
448 @Override
449 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
450 boolean overwrite, int bufferSize, short replication, long blockSize,
451 Progressable progress) throws IOException {
452 return create(f, permission, overwrite, false, bufferSize, replication,
453 blockSize, progress);
454 }
455
456 /**
457 * Set replication for an existing file.
458 * Implement the abstract <tt>setReplication</tt> of <tt>FileSystem</tt>
459 * @param src file name
460 * @param replication new replication
461 * @throws IOException
462 * @return true if successful;
463 * false if file does not exist or is a directory
464 */
465 @Override
466 public boolean setReplication(Path src, short replication) throws IOException {
467 boolean value = fs.setReplication(src, replication);
468 if (!value)
469 return false;
470
471 Path checkFile = getChecksumFile(src);
472 if (exists(checkFile))
473 fs.setReplication(checkFile, replication);
474
475 return true;
476 }
477
478 /**
479 * Rename files/dirs
480 */
481 @Override
482 public boolean rename(Path src, Path dst) throws IOException {
483 if (fs.isDirectory(src)) {
484 return fs.rename(src, dst);
485 } else {
486 if (fs.isDirectory(dst)) {
487 dst = new Path(dst, src.getName());
488 }
489
490 boolean value = fs.rename(src, dst);
491 if (!value)
492 return false;
493
494 Path srcCheckFile = getChecksumFile(src);
495 Path dstCheckFile = getChecksumFile(dst);
496 if (fs.exists(srcCheckFile)) { //try to rename checksum
497 value = fs.rename(srcCheckFile, dstCheckFile);
498 } else if (fs.exists(dstCheckFile)) {
499 // no src checksum, so remove dst checksum
500 value = fs.delete(dstCheckFile, true);
501 }
502
503 return value;
504 }
505 }
506
507 /**
508 * Implement the delete(Path, boolean) in checksum
509 * file system.
510 */
511 @Override
512 public boolean delete(Path f, boolean recursive) throws IOException{
513 FileStatus fstatus = null;
514 try {
515 fstatus = fs.getFileStatus(f);
516 } catch(FileNotFoundException e) {
517 return false;
518 }
519 if (fstatus.isDirectory()) {
520 //this works since the crcs are in the same
521 //directories and the files. so we just delete
522 //everything in the underlying filesystem
523 return fs.delete(f, recursive);
524 } else {
525 Path checkFile = getChecksumFile(f);
526 if (fs.exists(checkFile)) {
527 fs.delete(checkFile, true);
528 }
529 return fs.delete(f, true);
530 }
531 }
532
533 final private static PathFilter DEFAULT_FILTER = new PathFilter() {
534 @Override
535 public boolean accept(Path file) {
536 return !isChecksumFile(file);
537 }
538 };
539
540 /**
541 * List the statuses of the files/directories in the given path if the path is
542 * a directory.
543 *
544 * @param f
545 * given path
546 * @return the statuses of the files/directories in the given patch
547 * @throws IOException
548 */
549 @Override
550 public FileStatus[] listStatus(Path f) throws IOException {
551 return fs.listStatus(f, DEFAULT_FILTER);
552 }
553
554 /**
555 * List the statuses of the files/directories in the given path if the path is
556 * a directory.
557 *
558 * @param f
559 * given path
560 * @return the statuses of the files/directories in the given patch
561 * @throws IOException
562 */
563 @Override
564 public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f)
565 throws IOException {
566 return fs.listLocatedStatus(f, DEFAULT_FILTER);
567 }
568
569 @Override
570 public boolean mkdirs(Path f) throws IOException {
571 return fs.mkdirs(f);
572 }
573
574 @Override
575 public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
576 throws IOException {
577 Configuration conf = getConf();
578 FileUtil.copy(getLocal(conf), src, this, dst, delSrc, conf);
579 }
580
581 /**
582 * The src file is under FS, and the dst is on the local disk.
583 * Copy it from FS control to the local dst name.
584 */
585 @Override
586 public void copyToLocalFile(boolean delSrc, Path src, Path dst)
587 throws IOException {
588 Configuration conf = getConf();
589 FileUtil.copy(this, src, getLocal(conf), dst, delSrc, conf);
590 }
591
592 /**
593 * The src file is under FS, and the dst is on the local disk.
594 * Copy it from FS control to the local dst name.
595 * If src and dst are directories, the copyCrc parameter
596 * determines whether to copy CRC files.
597 */
598 public void copyToLocalFile(Path src, Path dst, boolean copyCrc)
599 throws IOException {
600 if (!fs.isDirectory(src)) { // source is a file
601 fs.copyToLocalFile(src, dst);
602 FileSystem localFs = getLocal(getConf()).getRawFileSystem();
603 if (localFs.isDirectory(dst)) {
604 dst = new Path(dst, src.getName());
605 }
606 dst = getChecksumFile(dst);
607 if (localFs.exists(dst)) { //remove old local checksum file
608 localFs.delete(dst, true);
609 }
610 Path checksumFile = getChecksumFile(src);
611 if (copyCrc && fs.exists(checksumFile)) { //copy checksum file
612 fs.copyToLocalFile(checksumFile, dst);
613 }
614 } else {
615 FileStatus[] srcs = listStatus(src);
616 for (FileStatus srcFile : srcs) {
617 copyToLocalFile(srcFile.getPath(),
618 new Path(dst, srcFile.getPath().getName()), copyCrc);
619 }
620 }
621 }
622
623 @Override
624 public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
625 throws IOException {
626 return tmpLocalFile;
627 }
628
629 @Override
630 public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
631 throws IOException {
632 moveFromLocalFile(tmpLocalFile, fsOutputFile);
633 }
634
635 /**
636 * Report a checksum error to the file system.
637 * @param f the file name containing the error
638 * @param in the stream open on the file
639 * @param inPos the position of the beginning of the bad data in the file
640 * @param sums the stream open on the checksum file
641 * @param sumsPos the position of the beginning of the bad data in the checksum file
642 * @return if retry is neccessary
643 */
644 public boolean reportChecksumFailure(Path f, FSDataInputStream in,
645 long inPos, FSDataInputStream sums, long sumsPos) {
646 return false;
647 }
648 }