001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs;
020
021import java.io.*;
022import java.util.Arrays;
023
024import org.apache.hadoop.classification.InterfaceAudience;
025import org.apache.hadoop.classification.InterfaceStability;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.fs.permission.FsPermission;
028import org.apache.hadoop.util.Progressable;
029import org.apache.hadoop.util.PureJavaCrc32;
030
031/****************************************************************
032 * Abstract Checksumed FileSystem.
033 * It provide a basic implementation of a Checksumed FileSystem,
034 * which creates a checksum file for each raw file.
035 * It generates & verifies checksums at the client side.
036 *
037 *****************************************************************/
038@InterfaceAudience.Public
039@InterfaceStability.Stable
040public abstract class ChecksumFileSystem extends FilterFileSystem {
041  private static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0};
042  private int bytesPerChecksum = 512;
043  private boolean verifyChecksum = true;
044  private boolean writeChecksum = true;
045
046  public static double getApproxChkSumLength(long size) {
047    return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size;
048  }
049  
050  public ChecksumFileSystem(FileSystem fs) {
051    super(fs);
052  }
053
054  @Override
055  public void setConf(Configuration conf) {
056    super.setConf(conf);
057    if (conf != null) {
058      bytesPerChecksum = conf.getInt(LocalFileSystemConfigKeys.LOCAL_FS_BYTES_PER_CHECKSUM_KEY,
059                                     LocalFileSystemConfigKeys.LOCAL_FS_BYTES_PER_CHECKSUM_DEFAULT);
060    }
061  }
062  
063  /**
064   * Set whether to verify checksum.
065   */
066  @Override
067  public void setVerifyChecksum(boolean verifyChecksum) {
068    this.verifyChecksum = verifyChecksum;
069  }
070
071  @Override
072  public void setWriteChecksum(boolean writeChecksum) {
073    this.writeChecksum = writeChecksum;
074  }
075  
076  /** get the raw file system */
077  @Override
078  public FileSystem getRawFileSystem() {
079    return fs;
080  }
081
082  /** Return the name of the checksum file associated with a file.*/
083  public Path getChecksumFile(Path file) {
084    return new Path(file.getParent(), "." + file.getName() + ".crc");
085  }
086
087  /** Return true iff file is a checksum file name.*/
088  public static boolean isChecksumFile(Path file) {
089    String name = file.getName();
090    return name.startsWith(".") && name.endsWith(".crc");
091  }
092
093  /** Return the length of the checksum file given the size of the 
094   * actual file.
095   **/
096  public long getChecksumFileLength(Path file, long fileSize) {
097    return getChecksumLength(fileSize, getBytesPerSum());
098  }
099
100  /** Return the bytes Per Checksum */
101  public int getBytesPerSum() {
102    return bytesPerChecksum;
103  }
104
105  private int getSumBufferSize(int bytesPerSum, int bufferSize) {
106    int defaultBufferSize = getConf().getInt(
107                       LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY,
108                       LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_DEFAULT);
109    int proportionalBufferSize = bufferSize / bytesPerSum;
110    return Math.max(bytesPerSum,
111                    Math.max(proportionalBufferSize, defaultBufferSize));
112  }
113
114  /*******************************************************
115   * For open()'s FSInputStream
116   * It verifies that data matches checksums.
117   *******************************************************/
118  private static class ChecksumFSInputChecker extends FSInputChecker {
119    private ChecksumFileSystem fs;
120    private FSDataInputStream datas;
121    private FSDataInputStream sums;
122    
123    private static final int HEADER_LENGTH = 8;
124    
125    private int bytesPerSum = 1;
126    
127    public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file)
128      throws IOException {
129      this(fs, file, fs.getConf().getInt(
130                       LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY, 
131                       LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_DEFAULT));
132    }
133    
134    public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize)
135      throws IOException {
136      super( file, fs.getFileStatus(file).getReplication() );
137      this.datas = fs.getRawFileSystem().open(file, bufferSize);
138      this.fs = fs;
139      Path sumFile = fs.getChecksumFile(file);
140      try {
141        int sumBufferSize = fs.getSumBufferSize(fs.getBytesPerSum(), bufferSize);
142        sums = fs.getRawFileSystem().open(sumFile, sumBufferSize);
143
144        byte[] version = new byte[CHECKSUM_VERSION.length];
145        sums.readFully(version);
146        if (!Arrays.equals(version, CHECKSUM_VERSION))
147          throw new IOException("Not a checksum file: "+sumFile);
148        this.bytesPerSum = sums.readInt();
149        set(fs.verifyChecksum, new PureJavaCrc32(), bytesPerSum, 4);
150      } catch (FileNotFoundException e) {         // quietly ignore
151        set(fs.verifyChecksum, null, 1, 0);
152      } catch (IOException e) {                   // loudly ignore
153        LOG.warn("Problem opening checksum file: "+ file + 
154                 ".  Ignoring exception: " , e); 
155        set(fs.verifyChecksum, null, 1, 0);
156      }
157    }
158    
159    private long getChecksumFilePos( long dataPos ) {
160      return HEADER_LENGTH + 4*(dataPos/bytesPerSum);
161    }
162    
163    @Override
164    protected long getChunkPosition( long dataPos ) {
165      return dataPos/bytesPerSum*bytesPerSum;
166    }
167    
168    @Override
169    public int available() throws IOException {
170      return datas.available() + super.available();
171    }
172    
173    @Override
174    public int read(long position, byte[] b, int off, int len)
175      throws IOException {
176      // parameter check
177      if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
178        throw new IndexOutOfBoundsException();
179      } else if (len == 0) {
180        return 0;
181      }
182      if( position<0 ) {
183        throw new IllegalArgumentException(
184            "Parameter position can not to be negative");
185      }
186
187      ChecksumFSInputChecker checker = new ChecksumFSInputChecker(fs, file);
188      checker.seek(position);
189      int nread = checker.read(b, off, len);
190      checker.close();
191      return nread;
192    }
193    
194    @Override
195    public void close() throws IOException {
196      datas.close();
197      if( sums != null ) {
198        sums.close();
199      }
200      set(fs.verifyChecksum, null, 1, 0);
201    }
202    
203
204    @Override
205    public boolean seekToNewSource(long targetPos) throws IOException {
206      long sumsPos = getChecksumFilePos(targetPos);
207      fs.reportChecksumFailure(file, datas, targetPos, sums, sumsPos);
208      boolean newDataSource = datas.seekToNewSource(targetPos);
209      return sums.seekToNewSource(sumsPos) || newDataSource;
210    }
211
212    @Override
213    protected int readChunk(long pos, byte[] buf, int offset, int len,
214        byte[] checksum) throws IOException {
215
216      boolean eof = false;
217      if (needChecksum()) {
218        assert checksum != null; // we have a checksum buffer
219        assert checksum.length % CHECKSUM_SIZE == 0; // it is sane length
220        assert len >= bytesPerSum; // we must read at least one chunk
221
222        final int checksumsToRead = Math.min(
223          len/bytesPerSum, // number of checksums based on len to read
224          checksum.length / CHECKSUM_SIZE); // size of checksum buffer
225        long checksumPos = getChecksumFilePos(pos); 
226        if(checksumPos != sums.getPos()) {
227          sums.seek(checksumPos);
228        }
229
230        int sumLenRead = sums.read(checksum, 0, CHECKSUM_SIZE * checksumsToRead);
231        if (sumLenRead >= 0 && sumLenRead % CHECKSUM_SIZE != 0) {
232          throw new ChecksumException(
233            "Checksum file not a length multiple of checksum size " +
234            "in " + file + " at " + pos + " checksumpos: " + checksumPos +
235            " sumLenread: " + sumLenRead,
236            pos);
237        }
238        if (sumLenRead <= 0) { // we're at the end of the file
239          eof = true;
240        } else {
241          // Adjust amount of data to read based on how many checksum chunks we read
242          len = Math.min(len, bytesPerSum * (sumLenRead / CHECKSUM_SIZE));
243        }
244      }
245      if(pos != datas.getPos()) {
246        datas.seek(pos);
247      }
248      int nread = readFully(datas, buf, offset, len);
249      if (eof && nread > 0) {
250        throw new ChecksumException("Checksum error: "+file+" at "+pos, pos);
251      }
252      return nread;
253    }
254  }
255  
256  private static class FSDataBoundedInputStream extends FSDataInputStream {
257    private FileSystem fs;
258    private Path file;
259    private long fileLen = -1L;
260
261    FSDataBoundedInputStream(FileSystem fs, Path file, InputStream in)
262        throws IOException {
263      super(in);
264      this.fs = fs;
265      this.file = file;
266    }
267    
268    @Override
269    public boolean markSupported() {
270      return false;
271    }
272    
273    /* Return the file length */
274    private long getFileLength() throws IOException {
275      if( fileLen==-1L ) {
276        fileLen = fs.getContentSummary(file).getLength();
277      }
278      return fileLen;
279    }
280    
281    /**
282     * Skips over and discards <code>n</code> bytes of data from the
283     * input stream.
284     *
285     *The <code>skip</code> method skips over some smaller number of bytes
286     * when reaching end of file before <code>n</code> bytes have been skipped.
287     * The actual number of bytes skipped is returned.  If <code>n</code> is
288     * negative, no bytes are skipped.
289     *
290     * @param      n   the number of bytes to be skipped.
291     * @return     the actual number of bytes skipped.
292     * @exception  IOException  if an I/O error occurs.
293     *             ChecksumException if the chunk to skip to is corrupted
294     */
295    @Override
296    public synchronized long skip(long n) throws IOException {
297      long curPos = getPos();
298      long fileLength = getFileLength();
299      if( n+curPos > fileLength ) {
300        n = fileLength - curPos;
301      }
302      return super.skip(n);
303    }
304    
305    /**
306     * Seek to the given position in the stream.
307     * The next read() will be from that position.
308     * 
309     * <p>This method does not allow seek past the end of the file.
310     * This produces IOException.
311     *
312     * @param      pos   the postion to seek to.
313     * @exception  IOException  if an I/O error occurs or seeks after EOF
314     *             ChecksumException if the chunk to seek to is corrupted
315     */
316
317    @Override
318    public synchronized void seek(long pos) throws IOException {
319      if(pos>getFileLength()) {
320        throw new IOException("Cannot seek after EOF");
321      }
322      super.seek(pos);
323    }
324
325  }
326
327  /**
328   * Opens an FSDataInputStream at the indicated Path.
329   * @param f the file name to open
330   * @param bufferSize the size of the buffer to be used.
331   */
332  @Override
333  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
334    FileSystem fs;
335    InputStream in;
336    if (verifyChecksum) {
337      fs = this;
338      in = new ChecksumFSInputChecker(this, f, bufferSize);
339    } else {
340      fs = getRawFileSystem();
341      in = fs.open(f, bufferSize);
342    }
343    return new FSDataBoundedInputStream(fs, f, in);
344  }
345
346  @Override
347  public FSDataOutputStream append(Path f, int bufferSize,
348      Progressable progress) throws IOException {
349    throw new IOException("Not supported");
350  }
351
352  /**
353   * Calculated the length of the checksum file in bytes.
354   * @param size the length of the data file in bytes
355   * @param bytesPerSum the number of bytes in a checksum block
356   * @return the number of bytes in the checksum file
357   */
358  public static long getChecksumLength(long size, int bytesPerSum) {
359    //the checksum length is equal to size passed divided by bytesPerSum +
360    //bytes written in the beginning of the checksum file.  
361    return ((size + bytesPerSum - 1) / bytesPerSum) * 4 +
362             CHECKSUM_VERSION.length + 4;  
363  }
364
365  /** This class provides an output stream for a checksummed file.
366   * It generates checksums for data. */
367  private static class ChecksumFSOutputSummer extends FSOutputSummer {
368    private FSDataOutputStream datas;    
369    private FSDataOutputStream sums;
370    private static final float CHKSUM_AS_FRACTION = 0.01f;
371    
372    public ChecksumFSOutputSummer(ChecksumFileSystem fs, 
373                          Path file, 
374                          boolean overwrite,
375                          int bufferSize,
376                          short replication,
377                          long blockSize,
378                          Progressable progress)
379      throws IOException {
380      super(new PureJavaCrc32(), fs.getBytesPerSum(), 4);
381      int bytesPerSum = fs.getBytesPerSum();
382      this.datas = fs.getRawFileSystem().create(file, overwrite, bufferSize, 
383                                         replication, blockSize, progress);
384      int sumBufferSize = fs.getSumBufferSize(bytesPerSum, bufferSize);
385      this.sums = fs.getRawFileSystem().create(fs.getChecksumFile(file), true, 
386                                               sumBufferSize, replication,
387                                               blockSize);
388      sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length);
389      sums.writeInt(bytesPerSum);
390    }
391    
392    @Override
393    public void close() throws IOException {
394      flushBuffer();
395      sums.close();
396      datas.close();
397    }
398    
399    @Override
400    protected void writeChunk(byte[] b, int offset, int len, byte[] checksum)
401    throws IOException {
402      datas.write(b, offset, len);
403      sums.write(checksum);
404    }
405  }
406
407  @Override
408  public FSDataOutputStream create(Path f, FsPermission permission,
409      boolean overwrite, int bufferSize, short replication, long blockSize,
410      Progressable progress) throws IOException {
411    return create(f, permission, overwrite, true, bufferSize,
412        replication, blockSize, progress);
413  }
414
415  private FSDataOutputStream create(Path f, FsPermission permission,
416      boolean overwrite, boolean createParent, int bufferSize,
417      short replication, long blockSize,
418      Progressable progress) throws IOException {
419    Path parent = f.getParent();
420    if (parent != null) {
421      if (!createParent && !exists(parent)) {
422        throw new FileNotFoundException("Parent directory doesn't exist: "
423            + parent);
424      } else if (!mkdirs(parent)) {
425        throw new IOException("Mkdirs failed to create " + parent);
426      }
427    }
428    final FSDataOutputStream out;
429    if (writeChecksum) {
430      out = new FSDataOutputStream(
431          new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication,
432              blockSize, progress), null);
433    } else {
434      out = fs.create(f, permission, overwrite, bufferSize, replication,
435          blockSize, progress);
436      // remove the checksum file since we aren't writing one
437      Path checkFile = getChecksumFile(f);
438      if (fs.exists(checkFile)) {
439        fs.delete(checkFile, true);
440      }
441    }
442    if (permission != null) {
443      setPermission(f, permission);
444    }
445    return out;
446  }
447
448  @Override
449  public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
450      boolean overwrite, int bufferSize, short replication, long blockSize,
451      Progressable progress) throws IOException {
452    return create(f, permission, overwrite, false, bufferSize, replication,
453        blockSize, progress);
454  }
455
456  /**
457   * Set replication for an existing file.
458   * Implement the abstract <tt>setReplication</tt> of <tt>FileSystem</tt>
459   * @param src file name
460   * @param replication new replication
461   * @throws IOException
462   * @return true if successful;
463   *         false if file does not exist or is a directory
464   */
465  @Override
466  public boolean setReplication(Path src, short replication) throws IOException {
467    boolean value = fs.setReplication(src, replication);
468    if (!value)
469      return false;
470
471    Path checkFile = getChecksumFile(src);
472    if (exists(checkFile))
473      fs.setReplication(checkFile, replication);
474
475    return true;
476  }
477
478  /**
479   * Rename files/dirs
480   */
481  @Override
482  public boolean rename(Path src, Path dst) throws IOException {
483    if (fs.isDirectory(src)) {
484      return fs.rename(src, dst);
485    } else {
486      if (fs.isDirectory(dst)) {
487        dst = new Path(dst, src.getName());
488      }
489
490      boolean value = fs.rename(src, dst);
491      if (!value)
492        return false;
493
494      Path srcCheckFile = getChecksumFile(src);
495      Path dstCheckFile = getChecksumFile(dst);
496      if (fs.exists(srcCheckFile)) { //try to rename checksum
497        value = fs.rename(srcCheckFile, dstCheckFile);
498      } else if (fs.exists(dstCheckFile)) {
499        // no src checksum, so remove dst checksum
500        value = fs.delete(dstCheckFile, true); 
501      }
502
503      return value;
504    }
505  }
506
507  /**
508   * Implement the delete(Path, boolean) in checksum
509   * file system.
510   */
511  @Override
512  public boolean delete(Path f, boolean recursive) throws IOException{
513    FileStatus fstatus = null;
514    try {
515      fstatus = fs.getFileStatus(f);
516    } catch(FileNotFoundException e) {
517      return false;
518    }
519    if (fstatus.isDirectory()) {
520      //this works since the crcs are in the same
521      //directories and the files. so we just delete
522      //everything in the underlying filesystem
523      return fs.delete(f, recursive);
524    } else {
525      Path checkFile = getChecksumFile(f);
526      if (fs.exists(checkFile)) {
527        fs.delete(checkFile, true);
528      }
529      return fs.delete(f, true);
530    }
531  }
532    
533  final private static PathFilter DEFAULT_FILTER = new PathFilter() {
534    @Override
535    public boolean accept(Path file) {
536      return !isChecksumFile(file);
537    }
538  };
539
540  /**
541   * List the statuses of the files/directories in the given path if the path is
542   * a directory.
543   * 
544   * @param f
545   *          given path
546   * @return the statuses of the files/directories in the given patch
547   * @throws IOException
548   */
549  @Override
550  public FileStatus[] listStatus(Path f) throws IOException {
551    return fs.listStatus(f, DEFAULT_FILTER);
552  }
553  
554  /**
555   * List the statuses of the files/directories in the given path if the path is
556   * a directory.
557   * 
558   * @param f
559   *          given path
560   * @return the statuses of the files/directories in the given patch
561   * @throws IOException
562   */
563  @Override
564  public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f)
565  throws IOException {
566    return fs.listLocatedStatus(f, DEFAULT_FILTER);
567  }
568  
569  @Override
570  public boolean mkdirs(Path f) throws IOException {
571    return fs.mkdirs(f);
572  }
573
574  @Override
575  public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
576    throws IOException {
577    Configuration conf = getConf();
578    FileUtil.copy(getLocal(conf), src, this, dst, delSrc, conf);
579  }
580
581  /**
582   * The src file is under FS, and the dst is on the local disk.
583   * Copy it from FS control to the local dst name.
584   */
585  @Override
586  public void copyToLocalFile(boolean delSrc, Path src, Path dst)
587    throws IOException {
588    Configuration conf = getConf();
589    FileUtil.copy(this, src, getLocal(conf), dst, delSrc, conf);
590  }
591
592  /**
593   * The src file is under FS, and the dst is on the local disk.
594   * Copy it from FS control to the local dst name.
595   * If src and dst are directories, the copyCrc parameter
596   * determines whether to copy CRC files.
597   */
598  public void copyToLocalFile(Path src, Path dst, boolean copyCrc)
599    throws IOException {
600    if (!fs.isDirectory(src)) { // source is a file
601      fs.copyToLocalFile(src, dst);
602      FileSystem localFs = getLocal(getConf()).getRawFileSystem();
603      if (localFs.isDirectory(dst)) {
604        dst = new Path(dst, src.getName());
605      }
606      dst = getChecksumFile(dst);
607      if (localFs.exists(dst)) { //remove old local checksum file
608        localFs.delete(dst, true);
609      }
610      Path checksumFile = getChecksumFile(src);
611      if (copyCrc && fs.exists(checksumFile)) { //copy checksum file
612        fs.copyToLocalFile(checksumFile, dst);
613      }
614    } else {
615      FileStatus[] srcs = listStatus(src);
616      for (FileStatus srcFile : srcs) {
617        copyToLocalFile(srcFile.getPath(), 
618                        new Path(dst, srcFile.getPath().getName()), copyCrc);
619      }
620    }
621  }
622
623  @Override
624  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
625    throws IOException {
626    return tmpLocalFile;
627  }
628
629  @Override
630  public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
631    throws IOException {
632    moveFromLocalFile(tmpLocalFile, fsOutputFile);
633  }
634
635  /**
636   * Report a checksum error to the file system.
637   * @param f the file name containing the error
638   * @param in the stream open on the file
639   * @param inPos the position of the beginning of the bad data in the file
640   * @param sums the stream open on the checksum file
641   * @param sumsPos the position of the beginning of the bad data in the checksum file
642   * @return if retry is neccessary
643   */
644  public boolean reportChecksumFailure(Path f, FSDataInputStream in,
645                                       long inPos, FSDataInputStream sums, long sumsPos) {
646    return false;
647  }
648}